Add link metadata fetching package
This commit is contained in:
parent
a4a4588ae6
commit
d22758227d
22 changed files with 27836 additions and 0 deletions
463
vendor/github.com/golang-commonmark/markdown/linkify/linkify.go
generated
vendored
Normal file
463
vendor/github.com/golang-commonmark/markdown/linkify/linkify.go
generated
vendored
Normal file
|
@ -0,0 +1,463 @@
|
|||
// Copyright 2015 The Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package linkify provides a way to find links in plain text.
|
||||
package linkify
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/golang-commonmark/markdown/byteutil"
|
||||
)
|
||||
|
||||
// Link represents a link found in a string with a schema and a position in the string.
|
||||
type Link struct {
|
||||
Scheme string
|
||||
Start, End int
|
||||
}
|
||||
|
||||
func max(a, b int) int {
|
||||
if a >= b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// Links returns links found in s.
|
||||
func Links(s string) (links []Link) {
|
||||
for i := 0; i < len(s)-2; i++ {
|
||||
switch s[i] {
|
||||
case '.': // IP address or domain name
|
||||
if i == 0 {
|
||||
continue // . at the start of a line
|
||||
}
|
||||
if length := match(s[i+1:]); length > 0 {
|
||||
pos := i + 1 + length
|
||||
switch s[pos-1] {
|
||||
case '.': // IP address
|
||||
if pos >= len(s) {
|
||||
continue // . at the end of line
|
||||
}
|
||||
if !byteutil.IsDigit(s[i-1]) {
|
||||
i = pos
|
||||
continue // . should be preceded by a digit
|
||||
}
|
||||
if !byteutil.IsDigit(s[pos]) {
|
||||
i = pos
|
||||
continue // . should be followed by a digit
|
||||
}
|
||||
|
||||
// find the start of the IP address
|
||||
j := i - 2
|
||||
m := max(0, j-3)
|
||||
for j >= m && byteutil.IsDigit(s[j]) {
|
||||
j--
|
||||
}
|
||||
if i-2-j > 2 {
|
||||
i = pos + 1
|
||||
continue // at most 3 digits
|
||||
}
|
||||
start := 0
|
||||
if j >= 0 {
|
||||
r, rlen := utf8.DecodeLastRuneInString(s[:j+1])
|
||||
if !isPunctOrSpaceOrControl(r) {
|
||||
i = pos + 1
|
||||
continue
|
||||
}
|
||||
switch r {
|
||||
case '.', ':', '/', '\\', '-', '_':
|
||||
i = pos + 1
|
||||
continue
|
||||
}
|
||||
start = j + 2 - rlen
|
||||
}
|
||||
|
||||
length, ok := skipIPv4(s[start:])
|
||||
if !ok {
|
||||
i = pos + 1
|
||||
continue
|
||||
}
|
||||
end := start + length
|
||||
if end == len(s) {
|
||||
links = append(links, Link{
|
||||
Scheme: "",
|
||||
Start: start,
|
||||
End: end,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
r, _ := utf8.DecodeRuneInString(s[end:])
|
||||
if !isPunctOrSpaceOrControl(r) {
|
||||
continue
|
||||
}
|
||||
|
||||
end = skipPort(s, end)
|
||||
end = skipPath(s, end)
|
||||
end = skipQuery(s, end)
|
||||
end = skipFragment(s, end)
|
||||
end = unskipPunct(s, end)
|
||||
|
||||
if end < len(s) {
|
||||
r, _ = utf8.DecodeRuneInString(s[end:])
|
||||
if !isPunctOrSpaceOrControl(r) || r == '%' {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
links = append(links, Link{
|
||||
Scheme: "",
|
||||
Start: start,
|
||||
End: end,
|
||||
})
|
||||
i = end
|
||||
|
||||
default: // domain name
|
||||
r, _ := utf8.DecodeLastRuneInString(s[:i])
|
||||
if !isLetterOrDigit(r) {
|
||||
continue // should be preceded by a letter or a digit
|
||||
}
|
||||
|
||||
if pos == len(s) {
|
||||
start, ok := findHostnameStart(s, i)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
links = append(links, Link{
|
||||
Scheme: "",
|
||||
Start: start,
|
||||
End: pos,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
if s[i+1:pos] != "xn--" {
|
||||
r, _ = utf8.DecodeRuneInString(s[pos:])
|
||||
if isLetterOrDigit(r) {
|
||||
continue // should not be followed by a letter or a digit
|
||||
}
|
||||
}
|
||||
|
||||
end, dot, ok := findHostnameEnd(s, pos)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
dot = max(dot, i)
|
||||
|
||||
if !(dot+5 <= len(s) && s[dot+1:dot+5] == "xn--") {
|
||||
if length := match(s[dot+1:]); dot+length+1 != end {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
start, ok := findHostnameStart(s, i)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
end = skipPort(s, end)
|
||||
end = skipPath(s, end)
|
||||
end = skipQuery(s, end)
|
||||
end = skipFragment(s, end)
|
||||
end = unskipPunct(s, end)
|
||||
|
||||
if end < len(s) {
|
||||
r, _ = utf8.DecodeRuneInString(s[end:])
|
||||
if !isPunctOrSpaceOrControl(r) || r == '%' {
|
||||
continue // should be followed by punctuation or space
|
||||
}
|
||||
}
|
||||
|
||||
links = append(links, Link{
|
||||
Scheme: "",
|
||||
Start: start,
|
||||
End: end,
|
||||
})
|
||||
i = end
|
||||
}
|
||||
}
|
||||
|
||||
case '/': // schema-less link
|
||||
if s[i+1] != '/' {
|
||||
continue
|
||||
}
|
||||
|
||||
if i > 0 {
|
||||
if s[i-1] == ':' {
|
||||
i++
|
||||
continue // should not be preceded by a colon
|
||||
}
|
||||
r, _ := utf8.DecodeLastRuneInString(s[:i])
|
||||
if !isPunctOrSpaceOrControl(r) {
|
||||
i++
|
||||
continue // should be preceded by punctuation or space
|
||||
}
|
||||
}
|
||||
|
||||
r, _ := utf8.DecodeRuneInString(s[i+2:])
|
||||
if !isLetterOrDigit(r) {
|
||||
i++
|
||||
continue // should be followed by a letter or a digit
|
||||
}
|
||||
|
||||
start := i
|
||||
end, dot, ok := findHostnameEnd(s, i+2)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if s[i+2:end] != "localhost" {
|
||||
if dot == -1 {
|
||||
continue // no dot
|
||||
}
|
||||
if length, ok := skipIPv4(s[i+2:]); !ok || i+2+length != end {
|
||||
if length := match(s[dot+1:]); dot+length+1 != end {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
end = skipPort(s, end)
|
||||
end = skipPath(s, end)
|
||||
end = skipQuery(s, end)
|
||||
end = skipFragment(s, end)
|
||||
end = unskipPunct(s, end)
|
||||
|
||||
if end < len(s) {
|
||||
r, _ = utf8.DecodeRuneInString(s[end:])
|
||||
if !isPunctOrSpaceOrControl(r) || r == '%' {
|
||||
continue // should be followed by punctuation or space
|
||||
}
|
||||
}
|
||||
|
||||
links = append(links, Link{
|
||||
Scheme: "//",
|
||||
Start: start,
|
||||
End: end,
|
||||
})
|
||||
i = end
|
||||
|
||||
case ':': // http, https, ftp, mailto or localhost
|
||||
if i < 3 { // at least ftp:
|
||||
continue
|
||||
}
|
||||
|
||||
if i >= 9 && s[i-1] == 't' && s[i-9:i] == "localhost" {
|
||||
j := i - 9
|
||||
if !byteutil.IsDigit(s[j+10]) {
|
||||
continue
|
||||
}
|
||||
if j > 0 {
|
||||
r, _ := utf8.DecodeLastRuneInString(s[:j])
|
||||
if !isPunctOrSpaceOrControl(r) {
|
||||
i++
|
||||
continue // should be preceded by punctuation or space
|
||||
}
|
||||
}
|
||||
|
||||
start := j
|
||||
pos := j + 9
|
||||
end := skipPort(s, pos)
|
||||
if end == pos {
|
||||
continue // invalid port
|
||||
}
|
||||
end = skipPath(s, end)
|
||||
end = skipQuery(s, end)
|
||||
end = skipFragment(s, end)
|
||||
end = unskipPunct(s, end)
|
||||
|
||||
if end < len(s) {
|
||||
r, _ := utf8.DecodeRuneInString(s[end:])
|
||||
if !isPunctOrSpaceOrControl(r) || r == '%' {
|
||||
i++
|
||||
continue // should be followed by punctuation or space
|
||||
}
|
||||
}
|
||||
|
||||
links = append(links, Link{
|
||||
Scheme: "",
|
||||
Start: start,
|
||||
End: end,
|
||||
})
|
||||
i = end
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
j := i - 1
|
||||
var start int
|
||||
var schema string
|
||||
|
||||
switch byteutil.ByteToLower(s[j]) {
|
||||
case 'o': // mailto
|
||||
if j < 5 {
|
||||
continue // too short for mailto
|
||||
}
|
||||
if len(s)-j < 8 {
|
||||
continue // insufficient length after
|
||||
}
|
||||
if byteutil.ToLower(s[j-5:j+2]) != "mailto:" {
|
||||
continue
|
||||
}
|
||||
r, _ := utf8.DecodeLastRuneInString(s[:j-5])
|
||||
if isLetterOrDigit(r) {
|
||||
continue // should not be preceded by a letter or a digit
|
||||
}
|
||||
r, _ = utf8.DecodeRuneInString(s[j+2:])
|
||||
if !isAllowedInEmail(r) {
|
||||
continue // should be followed by a valid e-mail character
|
||||
}
|
||||
|
||||
start = j - 5
|
||||
end, ok := findEmailEnd(s, j+2)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
links = append(links, Link{
|
||||
Scheme: "mailto:",
|
||||
Start: start,
|
||||
End: end,
|
||||
})
|
||||
i = end
|
||||
continue // continue processing
|
||||
|
||||
case 'p': // http or ftp
|
||||
if len(s)-j < 8 {
|
||||
continue // insufficient length after
|
||||
}
|
||||
switch byteutil.ByteToLower(s[j-2]) {
|
||||
case 'f':
|
||||
if byteutil.ToLower(s[j-2:j+4]) != "ftp://" {
|
||||
continue
|
||||
}
|
||||
start = j - 2
|
||||
schema = "ftp:"
|
||||
case 't':
|
||||
if j < 3 {
|
||||
continue
|
||||
}
|
||||
if byteutil.ToLower(s[j-3:j+4]) != "http://" {
|
||||
continue
|
||||
}
|
||||
start = j - 3
|
||||
schema = "http:"
|
||||
default:
|
||||
continue
|
||||
}
|
||||
|
||||
case 's': // https
|
||||
if j < 4 {
|
||||
continue // too short for https
|
||||
}
|
||||
if len(s)-j < 8 {
|
||||
continue // insufficient length after
|
||||
}
|
||||
start = j - 4
|
||||
if byteutil.ToLower(s[start:j+4]) != "https://" {
|
||||
continue
|
||||
}
|
||||
schema = "https:"
|
||||
|
||||
default:
|
||||
continue
|
||||
}
|
||||
|
||||
// http, https or ftp
|
||||
|
||||
if start > 0 {
|
||||
r, _ := utf8.DecodeLastRuneInString(s[:start])
|
||||
if !isPunctOrSpaceOrControl(r) {
|
||||
continue // should be preceded by punctuation or space
|
||||
}
|
||||
}
|
||||
|
||||
r, _ := utf8.DecodeRuneInString(s[j+4:])
|
||||
if !isLetterOrDigit(r) {
|
||||
continue // should be followed by a letter or a digit
|
||||
}
|
||||
|
||||
end, dot, ok := findHostnameEnd(s, j+4)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if s[j+4:end] != "localhost" {
|
||||
if dot == -1 {
|
||||
continue // no dot
|
||||
}
|
||||
if length, ok := skipIPv4(s[j+4:]); !ok || j+4+length != end {
|
||||
if !(dot+5 <= len(s) && s[dot+1:dot+5] == "xn--") {
|
||||
if length := match(s[dot+1:]); dot+length+1 != end {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
end = skipPort(s, end)
|
||||
end = skipPath(s, end)
|
||||
end = skipQuery(s, end)
|
||||
end = skipFragment(s, end)
|
||||
end = unskipPunct(s, end)
|
||||
|
||||
if end < len(s) {
|
||||
r, _ = utf8.DecodeRuneInString(s[end:])
|
||||
if !isPunctOrSpaceOrControl(r) || r == '%' {
|
||||
continue // should be followed by punctuation or space
|
||||
}
|
||||
}
|
||||
|
||||
links = append(links, Link{
|
||||
Scheme: schema,
|
||||
Start: start,
|
||||
End: end,
|
||||
})
|
||||
i = end
|
||||
|
||||
case '@': // schema-less e-mail
|
||||
if i == 0 {
|
||||
continue // @ at the start of a line
|
||||
}
|
||||
|
||||
if len(s)-i < 5 {
|
||||
continue // insufficient length after
|
||||
}
|
||||
|
||||
r, _ := utf8.DecodeLastRuneInString(s[:i])
|
||||
if !isAllowedInEmail(r) {
|
||||
continue // should be preceded by a valid e-mail character
|
||||
}
|
||||
|
||||
r, _ = utf8.DecodeRuneInString(s[i+1:])
|
||||
if !isLetterOrDigit(r) {
|
||||
continue // should be followed by a letter or a digit
|
||||
}
|
||||
|
||||
start, ok := findEmailStart(s, i-1)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
end, dot, ok := findHostnameEnd(s, i+1)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if dot == -1 {
|
||||
continue // no dot
|
||||
}
|
||||
if !(dot+5 <= len(s) && s[dot+1:dot+5] == "xn--") {
|
||||
if length := match(s[dot+1:]); dot+length+1 != end {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
links = append(links, Link{
|
||||
Scheme: "mailto:",
|
||||
Start: start,
|
||||
End: end,
|
||||
})
|
||||
i = end
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue