Add link metadata fetching package

This commit is contained in:
Ken-Håvard Lieng 2017-07-17 23:11:36 +02:00
parent a4a4588ae6
commit d22758227d
22 changed files with 27836 additions and 0 deletions

132
links/links.go Normal file
View File

@ -0,0 +1,132 @@
package links
import (
"errors"
"io"
"net/http"
"strings"
"time"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
var (
Client = &http.Client{
Timeout: 15 * time.Second,
}
ErrContentType = errors.New("Unsupported Content-Type")
)
type Meta struct {
URL string `json:"URL"`
SiteName string `json:"siteName,omitempty"`
Color string `json:"color,omitempty"`
Title string `json:"title"`
Description string `json:"description"`
ImageURL string `json:"imageURL,omitempty"`
VideoURL string `json:"videoURL,omitempty"`
}
func Fetch(url string) (*Meta, error) {
resp, err := http.Get(url)
if err != nil {
return nil, err
}
defer resp.Body.Close()
// TODO: Image links
if !strings.HasPrefix(resp.Header.Get("Content-Type"), "text/html") {
return nil, ErrContentType
}
return ExtractMeta(resp.Body, url)
}
func ExtractMeta(body io.Reader, url string) (*Meta, error) {
meta := Meta{URL: url}
var currentNode atom.Atom
z := html.NewTokenizer(body)
for {
tt := z.Next()
switch tt {
case html.ErrorToken:
if z.Err() == io.EOF {
return &meta, nil
}
return nil, z.Err()
case html.TextToken:
if currentNode == atom.Title && meta.Title == "" {
meta.Title = string(z.Text())
}
case html.StartTagToken, html.SelfClosingTagToken, html.EndTagToken:
name, hasAttr := z.TagName()
node := atom.Lookup(name)
if node == atom.Meta && hasAttr {
var key, val []byte
var name, content string
for hasAttr {
key, val, hasAttr = z.TagAttr()
switch atom.String(key) {
case "name":
name = string(val)
case "property":
name = string(val)
case "content":
content = string(val)
}
}
if content != "" {
switch name {
case "og:site_name":
meta.SiteName = content
case "theme-color", "msapplication-TileColor":
meta.Color = content
case "og:title", "twitter:title", "title":
meta.Title = content
case "og:description", "twitter:description":
meta.Description = content
case "description":
if meta.Description == "" {
meta.Description = content
}
case "og:image", "og:image:secure_url", "twitter:image":
if !strings.HasPrefix(meta.ImageURL, "https:") {
meta.ImageURL = content
}
case "og:video:url", "og:video:secure_url", "twitter:player":
if !strings.HasPrefix(meta.VideoURL, "https:") {
meta.VideoURL = content
}
}
}
continue
}
if tt == html.StartTagToken {
currentNode = node
} else {
currentNode = 0
}
if (node == atom.Head && tt == html.EndTagToken) || node == atom.Body {
return &meta, nil
}
}
}
}

10
vendor/github.com/golang-commonmark/markdown/LICENSE generated vendored Normal file
View File

@ -0,0 +1,10 @@
Copyright (c) 2015, The Authors
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,160 @@
// Copyright 2015 The Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package byteutil provides various operations on bytes and byte strings.
package byteutil
var (
digit [256]bool
hexdigit [256]bool
letter [256]bool
uppercase [256]bool
lowercase [256]bool
alphanum [256]bool
tolower [256]byte
toupper [256]byte
)
func init() {
for _, b := range "0123456789" {
digit[b] = true
}
for _, b := range "0123456789abcdefABCDEF" {
hexdigit[b] = true
}
for _, b := range "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" {
letter[b] = true
}
for _, b := range "abcdefghijklmnopqrstuvwxyz" {
lowercase[b] = true
}
for _, b := range "ABCDEFGHIJKLMNOPQRSTUVWXYZ" {
uppercase[b] = true
}
for _, b := range "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" {
alphanum[b] = true
}
for i := 0; i < 256; i++ {
tolower[i] = byte(i)
toupper[i] = byte(i)
}
for _, b := range "ABCDEFGHIJKLMNOPQRSTUVWXYZ" {
tolower[b] = byte(b) - 'A' + 'a'
}
for _, b := range "abcdefghijklmnopqrstuvwxyz" {
toupper[b] = byte(b) - 'a' + 'A'
}
}
func IsDigit(b byte) bool {
return digit[b]
}
func IsHexDigit(b byte) bool {
return hexdigit[b]
}
func IsLetter(b byte) bool {
return letter[b]
}
func IsLowercaseLetter(b byte) bool {
return lowercase[b]
}
func IsUppercaseLetter(b byte) bool {
return uppercase[b]
}
func IsAlphaNum(b byte) bool {
return alphanum[b]
}
func ToLower(s string) string {
if s == "" {
return ""
}
hasUpper := false
for i := 0; i < len(s); i++ {
if uppercase[s[i]] {
hasUpper = true
break
}
}
if !hasUpper {
return s
}
buf := make([]byte, len(s))
for i := 0; i < len(s); i++ {
buf[i] = tolower[s[i]]
}
return string(buf)
}
func ToUpper(s string) string {
if s == "" {
return ""
}
hasLower := false
for i := 0; i < len(s); i++ {
if lowercase[s[i]] {
hasLower = true
break
}
}
if !hasLower {
return s
}
buf := make([]byte, len(s))
for i := 0; i < len(s); i++ {
buf[i] = toupper[s[i]]
}
return string(buf)
}
func ByteToLower(b byte) byte {
return tolower[b]
}
func ByteToUpper(b byte) byte {
return toupper[b]
}
func IndexAny(s, chars string) int {
var t [256]bool
for i := 0; i < len(chars); i++ {
t[chars[i]] = true
}
for i := 0; i < len(s); i++ {
if t[s[i]] {
return i
}
}
return -1
}
func IndexAnyTable(s string, t *[256]bool) int {
for i := 0; i < len(s); i++ {
if t[s[i]] {
return i
}
}
return -1
}
func Unhex(d byte) byte {
switch {
case digit[d]:
return d - '0'
case uppercase[d]:
return d - 'A' + 10
case lowercase[d]:
return d - 'a' + 10
}
panic("unhex: not hex digit")
}

View File

@ -0,0 +1,50 @@
// Copyright 2015 The Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package linkify
import "unicode"
var (
trigger [256]bool
unreserved [256]bool
subdelims [256]bool
emailcs [256]bool
basicPunct [256]bool
)
func init() {
for _, b := range "-._~" {
unreserved[b] = true
}
for _, b := range "!$&'()*+,;=" {
subdelims[b] = true
}
for _, b := range "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!#$%&'*+/=?^_`{|}~-" {
emailcs[b] = true
}
for _, b := range ".,?!;:" {
basicPunct[b] = true
}
}
func isAllowedInEmail(r rune) bool {
return r < 0x7f && emailcs[r]
}
func isLetterOrDigit(r rune) bool {
return unicode.In(r, unicode.Letter, unicode.Digit)
}
func isPunctOrSpaceOrControl(r rune) bool {
return r == '<' || r == '>' || unicode.In(r, unicode.Punct, unicode.Space, unicode.Cc)
}
func isUnreserved(r rune) bool {
return (r < 0x7f && unreserved[r]) || isLetterOrDigit(r)
}
func isSubDelimiter(r rune) bool {
return r < 0x7f && subdelims[r]
}

View File

@ -0,0 +1,93 @@
// Copyright 2015 The Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package linkify
import (
"unicode"
"unicode/utf8"
)
func findEmailStart(s string, start int) (_ int, _ bool) {
end := start
allowDot := false
for end >= 0 {
b := s[end]
switch {
case emailcs[b]:
allowDot = true
case b == '.':
if !allowDot {
return
}
allowDot = false
default:
if end == start {
return
}
if s[end+1] == '.' {
return
}
r, _ := utf8.DecodeLastRuneInString(s[:end+1])
if r == utf8.RuneError {
return
}
if !unicode.IsSpace(r) {
return
}
return end + 1, true
}
end--
}
if end < start && s[end+1] == '.' {
return
}
return end + 1, true
}
func findEmailEnd(s string, start int) (_ int, _ bool) {
end := start
allowDot := false
loop:
for end < len(s) {
b := s[end]
switch {
case emailcs[b]:
allowDot = true
case b == '.':
if !allowDot {
return
}
allowDot = false
case b == '@':
break loop
default:
return
}
end++
}
if end >= len(s)-5 {
return
}
if end > start && s[end-1] == '.' {
return
}
var dot int
var ok bool
end, dot, ok = findHostnameEnd(s, end+1)
if !ok || dot == -1 {
return
}
if dot+5 <= len(s) && s[dot+1:dot+5] == "xn--" {
return end, true
}
if length := match(s[dot+1:]); dot+length+1 != end {
return
}
return end, true
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,463 @@
// Copyright 2015 The Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package linkify provides a way to find links in plain text.
package linkify
import (
"unicode/utf8"
"github.com/golang-commonmark/markdown/byteutil"
)
// Link represents a link found in a string with a schema and a position in the string.
type Link struct {
Scheme string
Start, End int
}
func max(a, b int) int {
if a >= b {
return a
}
return b
}
// Links returns links found in s.
func Links(s string) (links []Link) {
for i := 0; i < len(s)-2; i++ {
switch s[i] {
case '.': // IP address or domain name
if i == 0 {
continue // . at the start of a line
}
if length := match(s[i+1:]); length > 0 {
pos := i + 1 + length
switch s[pos-1] {
case '.': // IP address
if pos >= len(s) {
continue // . at the end of line
}
if !byteutil.IsDigit(s[i-1]) {
i = pos
continue // . should be preceded by a digit
}
if !byteutil.IsDigit(s[pos]) {
i = pos
continue // . should be followed by a digit
}
// find the start of the IP address
j := i - 2
m := max(0, j-3)
for j >= m && byteutil.IsDigit(s[j]) {
j--
}
if i-2-j > 2 {
i = pos + 1
continue // at most 3 digits
}
start := 0
if j >= 0 {
r, rlen := utf8.DecodeLastRuneInString(s[:j+1])
if !isPunctOrSpaceOrControl(r) {
i = pos + 1
continue
}
switch r {
case '.', ':', '/', '\\', '-', '_':
i = pos + 1
continue
}
start = j + 2 - rlen
}
length, ok := skipIPv4(s[start:])
if !ok {
i = pos + 1
continue
}
end := start + length
if end == len(s) {
links = append(links, Link{
Scheme: "",
Start: start,
End: end,
})
return
}
r, _ := utf8.DecodeRuneInString(s[end:])
if !isPunctOrSpaceOrControl(r) {
continue
}
end = skipPort(s, end)
end = skipPath(s, end)
end = skipQuery(s, end)
end = skipFragment(s, end)
end = unskipPunct(s, end)
if end < len(s) {
r, _ = utf8.DecodeRuneInString(s[end:])
if !isPunctOrSpaceOrControl(r) || r == '%' {
continue
}
}
links = append(links, Link{
Scheme: "",
Start: start,
End: end,
})
i = end
default: // domain name
r, _ := utf8.DecodeLastRuneInString(s[:i])
if !isLetterOrDigit(r) {
continue // should be preceded by a letter or a digit
}
if pos == len(s) {
start, ok := findHostnameStart(s, i)
if !ok {
continue
}
links = append(links, Link{
Scheme: "",
Start: start,
End: pos,
})
return
}
if s[i+1:pos] != "xn--" {
r, _ = utf8.DecodeRuneInString(s[pos:])
if isLetterOrDigit(r) {
continue // should not be followed by a letter or a digit
}
}
end, dot, ok := findHostnameEnd(s, pos)
if !ok {
continue
}
dot = max(dot, i)
if !(dot+5 <= len(s) && s[dot+1:dot+5] == "xn--") {
if length := match(s[dot+1:]); dot+length+1 != end {
continue
}
}
start, ok := findHostnameStart(s, i)
if !ok {
continue
}
end = skipPort(s, end)
end = skipPath(s, end)
end = skipQuery(s, end)
end = skipFragment(s, end)
end = unskipPunct(s, end)
if end < len(s) {
r, _ = utf8.DecodeRuneInString(s[end:])
if !isPunctOrSpaceOrControl(r) || r == '%' {
continue // should be followed by punctuation or space
}
}
links = append(links, Link{
Scheme: "",
Start: start,
End: end,
})
i = end
}
}
case '/': // schema-less link
if s[i+1] != '/' {
continue
}
if i > 0 {
if s[i-1] == ':' {
i++
continue // should not be preceded by a colon
}
r, _ := utf8.DecodeLastRuneInString(s[:i])
if !isPunctOrSpaceOrControl(r) {
i++
continue // should be preceded by punctuation or space
}
}
r, _ := utf8.DecodeRuneInString(s[i+2:])
if !isLetterOrDigit(r) {
i++
continue // should be followed by a letter or a digit
}
start := i
end, dot, ok := findHostnameEnd(s, i+2)
if !ok {
continue
}
if s[i+2:end] != "localhost" {
if dot == -1 {
continue // no dot
}
if length, ok := skipIPv4(s[i+2:]); !ok || i+2+length != end {
if length := match(s[dot+1:]); dot+length+1 != end {
continue
}
}
}
end = skipPort(s, end)
end = skipPath(s, end)
end = skipQuery(s, end)
end = skipFragment(s, end)
end = unskipPunct(s, end)
if end < len(s) {
r, _ = utf8.DecodeRuneInString(s[end:])
if !isPunctOrSpaceOrControl(r) || r == '%' {
continue // should be followed by punctuation or space
}
}
links = append(links, Link{
Scheme: "//",
Start: start,
End: end,
})
i = end
case ':': // http, https, ftp, mailto or localhost
if i < 3 { // at least ftp:
continue
}
if i >= 9 && s[i-1] == 't' && s[i-9:i] == "localhost" {
j := i - 9
if !byteutil.IsDigit(s[j+10]) {
continue
}
if j > 0 {
r, _ := utf8.DecodeLastRuneInString(s[:j])
if !isPunctOrSpaceOrControl(r) {
i++
continue // should be preceded by punctuation or space
}
}
start := j
pos := j + 9
end := skipPort(s, pos)
if end == pos {
continue // invalid port
}
end = skipPath(s, end)
end = skipQuery(s, end)
end = skipFragment(s, end)
end = unskipPunct(s, end)
if end < len(s) {
r, _ := utf8.DecodeRuneInString(s[end:])
if !isPunctOrSpaceOrControl(r) || r == '%' {
i++
continue // should be followed by punctuation or space
}
}
links = append(links, Link{
Scheme: "",
Start: start,
End: end,
})
i = end
break
}
j := i - 1
var start int
var schema string
switch byteutil.ByteToLower(s[j]) {
case 'o': // mailto
if j < 5 {
continue // too short for mailto
}
if len(s)-j < 8 {
continue // insufficient length after
}
if byteutil.ToLower(s[j-5:j+2]) != "mailto:" {
continue
}
r, _ := utf8.DecodeLastRuneInString(s[:j-5])
if isLetterOrDigit(r) {
continue // should not be preceded by a letter or a digit
}
r, _ = utf8.DecodeRuneInString(s[j+2:])
if !isAllowedInEmail(r) {
continue // should be followed by a valid e-mail character
}
start = j - 5
end, ok := findEmailEnd(s, j+2)
if !ok {
continue
}
links = append(links, Link{
Scheme: "mailto:",
Start: start,
End: end,
})
i = end
continue // continue processing
case 'p': // http or ftp
if len(s)-j < 8 {
continue // insufficient length after
}
switch byteutil.ByteToLower(s[j-2]) {
case 'f':
if byteutil.ToLower(s[j-2:j+4]) != "ftp://" {
continue
}
start = j - 2
schema = "ftp:"
case 't':
if j < 3 {
continue
}
if byteutil.ToLower(s[j-3:j+4]) != "http://" {
continue
}
start = j - 3
schema = "http:"
default:
continue
}
case 's': // https
if j < 4 {
continue // too short for https
}
if len(s)-j < 8 {
continue // insufficient length after
}
start = j - 4
if byteutil.ToLower(s[start:j+4]) != "https://" {
continue
}
schema = "https:"
default:
continue
}
// http, https or ftp
if start > 0 {
r, _ := utf8.DecodeLastRuneInString(s[:start])
if !isPunctOrSpaceOrControl(r) {
continue // should be preceded by punctuation or space
}
}
r, _ := utf8.DecodeRuneInString(s[j+4:])
if !isLetterOrDigit(r) {
continue // should be followed by a letter or a digit
}
end, dot, ok := findHostnameEnd(s, j+4)
if !ok {
continue
}
if s[j+4:end] != "localhost" {
if dot == -1 {
continue // no dot
}
if length, ok := skipIPv4(s[j+4:]); !ok || j+4+length != end {
if !(dot+5 <= len(s) && s[dot+1:dot+5] == "xn--") {
if length := match(s[dot+1:]); dot+length+1 != end {
continue
}
}
}
}
end = skipPort(s, end)
end = skipPath(s, end)
end = skipQuery(s, end)
end = skipFragment(s, end)
end = unskipPunct(s, end)
if end < len(s) {
r, _ = utf8.DecodeRuneInString(s[end:])
if !isPunctOrSpaceOrControl(r) || r == '%' {
continue // should be followed by punctuation or space
}
}
links = append(links, Link{
Scheme: schema,
Start: start,
End: end,
})
i = end
case '@': // schema-less e-mail
if i == 0 {
continue // @ at the start of a line
}
if len(s)-i < 5 {
continue // insufficient length after
}
r, _ := utf8.DecodeLastRuneInString(s[:i])
if !isAllowedInEmail(r) {
continue // should be preceded by a valid e-mail character
}
r, _ = utf8.DecodeRuneInString(s[i+1:])
if !isLetterOrDigit(r) {
continue // should be followed by a letter or a digit
}
start, ok := findEmailStart(s, i-1)
if !ok {
continue
}
end, dot, ok := findHostnameEnd(s, i+1)
if !ok {
continue
}
if dot == -1 {
continue // no dot
}
if !(dot+5 <= len(s) && s[dot+1:dot+5] == "xn--") {
if length := match(s[dot+1:]); dot+length+1 != end {
continue
}
}
links = append(links, Link{
Scheme: "mailto:",
Start: start,
End: end,
})
i = end
}
}
return
}

View File

@ -0,0 +1,412 @@
// Copyright 2015 The Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package linkify
import (
"unicode/utf8"
"github.com/golang-commonmark/markdown/byteutil"
)
func atoi3(s string, start int) (int, bool) {
n := 0
var i int
for i = start; i < len(s) && byteutil.IsDigit(s[i]); i++ {
n = n*10 + int(s[i]-'0')
if n > 255 {
return 0, false
}
}
if i == start {
return 0, false
}
return i, true
}
func skipIPv4(s string) (_ int, _ bool) {
j := 0
for i := 0; i < 4; i++ {
if j >= len(s) {
return
}
if i > 0 {
if s[j] != '.' {
return
}
j++
}
if n, ok := atoi3(s, j); !ok {
return
} else {
j = n
}
}
return j, true
}
func atoi5(s string, start int) (int, bool) {
n := 0
var i int
for i = start; i < len(s) && byteutil.IsDigit(s[i]); i++ {
n = n*10 + int(s[i]-'0')
if n > 65535 {
return 0, false
}
}
if i == start || n == 0 {
return 0, false
}
return i, true
}
func skipPort(s string, start int) int {
if start >= len(s) || s[start] != ':' {
return start
}
end, ok := atoi5(s, start+1)
if !ok {
return start
}
return end
}
func skipPath(s string, start int) int {
if start >= len(s) || s[start] != '/' {
return start // skip empty path
}
var stack []rune
var notClosedIndex int
var nHyphen int
end := start + 1
loop:
for end < len(s) {
r, rlen := utf8.DecodeRuneInString(s[end:])
if r == utf8.RuneError {
nHyphen = 0
break
}
switch {
case isUnreserved(r):
if r == '-' {
nHyphen++
if nHyphen > 1 {
break loop
}
} else {
nHyphen = 0
}
case isSubDelimiter(r) || r == '[' || r == ']':
nHyphen = 0
switch r {
case '[', '(':
if len(stack) == 0 {
notClosedIndex = end
}
stack = append(stack, r)
case ']', ')':
opening := '['
if r == ')' {
opening = '('
}
if len(stack) == 0 || stack[len(stack)-1] != opening {
break loop
}
stack = stack[:len(stack)-1]
}
case r == '/' || r == ':' || r == '@':
nHyphen = 0
case r == '%':
nHyphen = 0
if end+2 >= len(s) {
break loop
}
if !(byteutil.IsHexDigit(s[end+1]) &&
byteutil.IsHexDigit(s[end+2])) {
break loop
}
end += 2
default:
nHyphen = 0
if r != ' ' || len(stack) == 0 {
break loop
}
}
end += rlen
}
if len(stack) > 0 {
return notClosedIndex
}
if nHyphen > 0 {
return end - nHyphen + 1
}
return end
}
func skipQuery(s string, start int) int {
if start >= len(s) || s[start] != '?' {
return start
}
var stack []rune
var notClosedIndex int
var nHyphen int
end := start + 1
loop:
for end < len(s) {
r, rlen := utf8.DecodeRuneInString(s[end:])
if r == utf8.RuneError {
nHyphen = 0
break
}
switch {
case isUnreserved(r):
if r == '-' {
nHyphen++
if nHyphen > 1 {
break loop
}
} else {
nHyphen = 0
}
case isSubDelimiter(r) || r == '[' || r == ']':
nHyphen = 0
switch r {
case '[', '(':
if len(stack) == 0 {
notClosedIndex = end
}
stack = append(stack, r)
case ']', ')':
opening := '['
if r == ')' {
opening = '('
}
if len(stack) == 0 || stack[len(stack)-1] != opening {
break loop
}
stack = stack[:len(stack)-1]
}
case r == '?' || r == '/' || r == ':' || r == '@':
nHyphen = 0
case r == '%':
nHyphen = 0
if end+2 >= len(s) {
break loop
}
if !(byteutil.IsHexDigit(s[end+1]) &&
byteutil.IsHexDigit(s[end+2])) {
break loop
}
end += 2
default:
nHyphen = 0
if r != ' ' || len(stack) == 0 {
break loop
}
}
end += rlen
}
if len(stack) > 0 {
return notClosedIndex
}
if nHyphen > 0 {
return end - nHyphen + 1
}
return end
}
func skipFragment(s string, start int) int {
if start >= len(s) || s[start] != '#' {
return start
}
var stack []rune
var notClosedIndex int
var nHyphen int
end := start + 1
loop:
for end < len(s) {
r, rlen := utf8.DecodeRuneInString(s[end:])
if r == utf8.RuneError {
nHyphen = 0
break
}
switch {
case isUnreserved(r):
if r == '-' {
nHyphen++
if nHyphen > 1 {
break loop
}
} else {
nHyphen = 0
}
case isSubDelimiter(r) || r == '[' || r == ']':
nHyphen = 0
switch r {
case '[', '(':
if len(stack) == 0 {
notClosedIndex = end
}
stack = append(stack, r)
case ']', ')':
opening := '['
if r == ')' {
opening = '('
}
if len(stack) == 0 || stack[len(stack)-1] != opening {
break loop
}
stack = stack[:len(stack)-1]
}
case r == '?' || r == '/' || r == ':' || r == '@':
nHyphen = 0
case r == '%':
nHyphen = 0
if end+2 >= len(s) {
break loop
}
if !(byteutil.IsHexDigit(s[end+1]) &&
byteutil.IsHexDigit(s[end+2])) {
break loop
}
end += 2
default:
nHyphen = 0
if r != ' ' || len(stack) == 0 {
break loop
}
}
end += rlen
}
if len(stack) > 0 {
return notClosedIndex
}
if nHyphen > 0 {
return end - nHyphen + 1
}
return end
}
func unskipPunct(s string, start int) int {
end := start - 1
if end < 0 || end >= len(s) || !basicPunct[s[end]] {
return start
}
return end
}
func findHostnameStart(s string, start int) (_ int, _ bool) {
end := start
lastDot := true
nHyphen := 0
loop:
for end > 0 {
r, rlen := utf8.DecodeLastRuneInString(s[:end])
if r == utf8.RuneError {
return
}
switch {
case isLetterOrDigit(r):
lastDot = false
nHyphen = 0
case r == '.':
if nHyphen > 0 {
return
}
lastDot = true
case r == '-':
if end == start {
return
}
if lastDot {
return
}
nHyphen++
if nHyphen == 3 {
return
}
case r == ':' || r == '/' || r == '\\' || r == '_':
return
case isPunctOrSpaceOrControl(r):
break loop
default:
return
}
end -= rlen
}
if lastDot || nHyphen > 0 {
return
}
return end, true
}
func findHostnameEnd(s string, start int) (_ int, _ int, _ bool) {
end := start
lastDot := false
lastDotPos := -1
nHyphen := 0
loop:
for end < len(s) {
r, rlen := utf8.DecodeRuneInString(s[end:])
if r == utf8.RuneError {
return
}
switch {
case isLetterOrDigit(r):
lastDot = false
nHyphen = 0
case r == '.':
if nHyphen > 0 {
return
}
if lastDot {
break loop
}
lastDot = true
lastDotPos = end
nHyphen = 0
case r == '-':
lastDot = false
if end == start {
return
}
if lastDot {
return
}
nHyphen++
if nHyphen == 3 {
break loop
}
case r == '\\' || r == '_':
return
case isPunctOrSpaceOrControl(r):
break loop
default:
return
}
end += rlen
}
if nHyphen > 0 {
end -= nHyphen
} else if lastDot {
if s[end-1] == '.' {
end--
}
lastDotPos = end - 1
for lastDotPos >= start && s[lastDotPos] != '.' {
lastDotPos--
}
if lastDotPos < start {
lastDotPos = -1
}
}
return end, lastDotPos, true
}

78
vendor/golang.org/x/net/html/atom/atom.go generated vendored Normal file
View File

@ -0,0 +1,78 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package atom provides integer codes (also known as atoms) for a fixed set of
// frequently occurring HTML strings: tag names and attribute keys such as "p"
// and "id".
//
// Sharing an atom's name between all elements with the same tag can result in
// fewer string allocations when tokenizing and parsing HTML. Integer
// comparisons are also generally faster than string comparisons.
//
// The value of an atom's particular code is not guaranteed to stay the same
// between versions of this package. Neither is any ordering guaranteed:
// whether atom.H1 < atom.H2 may also change. The codes are not guaranteed to
// be dense. The only guarantees are that e.g. looking up "div" will yield
// atom.Div, calling atom.Div.String will return "div", and atom.Div != 0.
package atom // import "golang.org/x/net/html/atom"
// Atom is an integer code for a string. The zero value maps to "".
type Atom uint32
// String returns the atom's name.
func (a Atom) String() string {
start := uint32(a >> 8)
n := uint32(a & 0xff)
if start+n > uint32(len(atomText)) {
return ""
}
return atomText[start : start+n]
}
func (a Atom) string() string {
return atomText[a>>8 : a>>8+a&0xff]
}
// fnv computes the FNV hash with an arbitrary starting value h.
func fnv(h uint32, s []byte) uint32 {
for i := range s {
h ^= uint32(s[i])
h *= 16777619
}
return h
}
func match(s string, t []byte) bool {
for i, c := range t {
if s[i] != c {
return false
}
}
return true
}
// Lookup returns the atom whose name is s. It returns zero if there is no
// such atom. The lookup is case sensitive.
func Lookup(s []byte) Atom {
if len(s) == 0 || len(s) > maxAtomLen {
return 0
}
h := fnv(hash0, s)
if a := table[h&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
return a
}
if a := table[(h>>16)&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
return a
}
return 0
}
// String returns a string whose contents are equal to s. In that sense, it is
// equivalent to string(s) but may be more efficient.
func String(s []byte) string {
if a := Lookup(s); a != 0 {
return a.String()
}
return string(s)
}

648
vendor/golang.org/x/net/html/atom/gen.go generated vendored Normal file
View File

@ -0,0 +1,648 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This program generates table.go and table_test.go.
// Invoke as
//
// go run gen.go |gofmt >table.go
// go run gen.go -test |gofmt >table_test.go
import (
"flag"
"fmt"
"math/rand"
"os"
"sort"
"strings"
)
// identifier converts s to a Go exported identifier.
// It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
func identifier(s string) string {
b := make([]byte, 0, len(s))
cap := true
for _, c := range s {
if c == '-' {
cap = true
continue
}
if cap && 'a' <= c && c <= 'z' {
c -= 'a' - 'A'
}
cap = false
b = append(b, byte(c))
}
return string(b)
}
var test = flag.Bool("test", false, "generate table_test.go")
func main() {
flag.Parse()
var all []string
all = append(all, elements...)
all = append(all, attributes...)
all = append(all, eventHandlers...)
all = append(all, extra...)
sort.Strings(all)
if *test {
fmt.Printf("// generated by go run gen.go -test; DO NOT EDIT\n\n")
fmt.Printf("package atom\n\n")
fmt.Printf("var testAtomList = []string{\n")
for _, s := range all {
fmt.Printf("\t%q,\n", s)
}
fmt.Printf("}\n")
return
}
// uniq - lists have dups
// compute max len too
maxLen := 0
w := 0
for _, s := range all {
if w == 0 || all[w-1] != s {
if maxLen < len(s) {
maxLen = len(s)
}
all[w] = s
w++
}
}
all = all[:w]
// Find hash that minimizes table size.
var best *table
for i := 0; i < 1000000; i++ {
if best != nil && 1<<(best.k-1) < len(all) {
break
}
h := rand.Uint32()
for k := uint(0); k <= 16; k++ {
if best != nil && k >= best.k {
break
}
var t table
if t.init(h, k, all) {
best = &t
break
}
}
}
if best == nil {
fmt.Fprintf(os.Stderr, "failed to construct string table\n")
os.Exit(1)
}
// Lay out strings, using overlaps when possible.
layout := append([]string{}, all...)
// Remove strings that are substrings of other strings
for changed := true; changed; {
changed = false
for i, s := range layout {
if s == "" {
continue
}
for j, t := range layout {
if i != j && t != "" && strings.Contains(s, t) {
changed = true
layout[j] = ""
}
}
}
}
// Join strings where one suffix matches another prefix.
for {
// Find best i, j, k such that layout[i][len-k:] == layout[j][:k],
// maximizing overlap length k.
besti := -1
bestj := -1
bestk := 0
for i, s := range layout {
if s == "" {
continue
}
for j, t := range layout {
if i == j {
continue
}
for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
if s[len(s)-k:] == t[:k] {
besti = i
bestj = j
bestk = k
}
}
}
}
if bestk > 0 {
layout[besti] += layout[bestj][bestk:]
layout[bestj] = ""
continue
}
break
}
text := strings.Join(layout, "")
atom := map[string]uint32{}
for _, s := range all {
off := strings.Index(text, s)
if off < 0 {
panic("lost string " + s)
}
atom[s] = uint32(off<<8 | len(s))
}
// Generate the Go code.
fmt.Printf("// generated by go run gen.go; DO NOT EDIT\n\n")
fmt.Printf("package atom\n\nconst (\n")
for _, s := range all {
fmt.Printf("\t%s Atom = %#x\n", identifier(s), atom[s])
}
fmt.Printf(")\n\n")
fmt.Printf("const hash0 = %#x\n\n", best.h0)
fmt.Printf("const maxAtomLen = %d\n\n", maxLen)
fmt.Printf("var table = [1<<%d]Atom{\n", best.k)
for i, s := range best.tab {
if s == "" {
continue
}
fmt.Printf("\t%#x: %#x, // %s\n", i, atom[s], s)
}
fmt.Printf("}\n")
datasize := (1 << best.k) * 4
fmt.Printf("const atomText =\n")
textsize := len(text)
for len(text) > 60 {
fmt.Printf("\t%q +\n", text[:60])
text = text[60:]
}
fmt.Printf("\t%q\n\n", text)
fmt.Fprintf(os.Stderr, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize)
}
type byLen []string
func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) }
func (x byLen) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
func (x byLen) Len() int { return len(x) }
// fnv computes the FNV hash with an arbitrary starting value h.
func fnv(h uint32, s string) uint32 {
for i := 0; i < len(s); i++ {
h ^= uint32(s[i])
h *= 16777619
}
return h
}
// A table represents an attempt at constructing the lookup table.
// The lookup table uses cuckoo hashing, meaning that each string
// can be found in one of two positions.
type table struct {
h0 uint32
k uint
mask uint32
tab []string
}
// hash returns the two hashes for s.
func (t *table) hash(s string) (h1, h2 uint32) {
h := fnv(t.h0, s)
h1 = h & t.mask
h2 = (h >> 16) & t.mask
return
}
// init initializes the table with the given parameters.
// h0 is the initial hash value,
// k is the number of bits of hash value to use, and
// x is the list of strings to store in the table.
// init returns false if the table cannot be constructed.
func (t *table) init(h0 uint32, k uint, x []string) bool {
t.h0 = h0
t.k = k
t.tab = make([]string, 1<<k)
t.mask = 1<<k - 1
for _, s := range x {
if !t.insert(s) {
return false
}
}
return true
}
// insert inserts s in the table.
func (t *table) insert(s string) bool {
h1, h2 := t.hash(s)
if t.tab[h1] == "" {
t.tab[h1] = s
return true
}
if t.tab[h2] == "" {
t.tab[h2] = s
return true
}
if t.push(h1, 0) {
t.tab[h1] = s
return true
}
if t.push(h2, 0) {
t.tab[h2] = s
return true
}
return false
}
// push attempts to push aside the entry in slot i.
func (t *table) push(i uint32, depth int) bool {
if depth > len(t.tab) {
return false
}
s := t.tab[i]
h1, h2 := t.hash(s)
j := h1 + h2 - i
if t.tab[j] != "" && !t.push(j, depth+1) {
return false
}
t.tab[j] = s
return true
}
// The lists of element names and attribute keys were taken from
// https://html.spec.whatwg.org/multipage/indices.html#index
// as of the "HTML Living Standard - Last Updated 21 February 2015" version.
var elements = []string{
"a",
"abbr",
"address",
"area",
"article",
"aside",
"audio",
"b",
"base",
"bdi",
"bdo",
"blockquote",
"body",
"br",
"button",
"canvas",
"caption",
"cite",
"code",
"col",
"colgroup",
"command",
"data",
"datalist",
"dd",
"del",
"details",
"dfn",
"dialog",
"div",
"dl",
"dt",
"em",
"embed",
"fieldset",
"figcaption",
"figure",
"footer",
"form",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"head",
"header",
"hgroup",
"hr",
"html",
"i",
"iframe",
"img",
"input",
"ins",
"kbd",
"keygen",
"label",
"legend",
"li",
"link",
"map",
"mark",
"menu",
"menuitem",
"meta",
"meter",
"nav",
"noscript",
"object",
"ol",
"optgroup",
"option",
"output",
"p",
"param",
"pre",
"progress",
"q",
"rp",
"rt",
"ruby",
"s",
"samp",
"script",
"section",
"select",
"small",
"source",
"span",
"strong",
"style",
"sub",
"summary",
"sup",
"table",
"tbody",
"td",
"template",
"textarea",
"tfoot",
"th",
"thead",
"time",
"title",
"tr",
"track",
"u",
"ul",
"var",
"video",
"wbr",
}
// https://html.spec.whatwg.org/multipage/indices.html#attributes-3
var attributes = []string{
"abbr",
"accept",
"accept-charset",
"accesskey",
"action",
"alt",
"async",
"autocomplete",
"autofocus",
"autoplay",
"challenge",
"charset",
"checked",
"cite",
"class",
"cols",
"colspan",
"command",
"content",
"contenteditable",
"contextmenu",
"controls",
"coords",
"crossorigin",
"data",
"datetime",
"default",
"defer",
"dir",
"dirname",
"disabled",
"download",
"draggable",
"dropzone",
"enctype",
"for",
"form",
"formaction",
"formenctype",
"formmethod",
"formnovalidate",
"formtarget",
"headers",
"height",
"hidden",
"high",
"href",
"hreflang",
"http-equiv",
"icon",
"id",
"inputmode",
"ismap",
"itemid",
"itemprop",
"itemref",
"itemscope",
"itemtype",
"keytype",
"kind",
"label",
"lang",
"list",
"loop",
"low",
"manifest",
"max",
"maxlength",
"media",
"mediagroup",
"method",
"min",
"minlength",
"multiple",
"muted",
"name",
"novalidate",
"open",
"optimum",
"pattern",
"ping",
"placeholder",
"poster",
"preload",
"radiogroup",
"readonly",
"rel",
"required",
"reversed",
"rows",
"rowspan",
"sandbox",
"spellcheck",
"scope",
"scoped",
"seamless",
"selected",
"shape",
"size",
"sizes",
"sortable",
"sorted",
"span",
"src",
"srcdoc",
"srclang",
"start",
"step",
"style",
"tabindex",
"target",
"title",
"translate",
"type",
"typemustmatch",
"usemap",
"value",
"width",
"wrap",
}
var eventHandlers = []string{
"onabort",
"onautocomplete",
"onautocompleteerror",
"onafterprint",
"onbeforeprint",
"onbeforeunload",
"onblur",
"oncancel",
"oncanplay",
"oncanplaythrough",
"onchange",
"onclick",
"onclose",
"oncontextmenu",
"oncuechange",
"ondblclick",
"ondrag",
"ondragend",
"ondragenter",
"ondragleave",
"ondragover",
"ondragstart",
"ondrop",
"ondurationchange",
"onemptied",
"onended",
"onerror",
"onfocus",
"onhashchange",
"oninput",
"oninvalid",
"onkeydown",
"onkeypress",
"onkeyup",
"onlanguagechange",
"onload",
"onloadeddata",
"onloadedmetadata",
"onloadstart",
"onmessage",
"onmousedown",
"onmousemove",
"onmouseout",
"onmouseover",
"onmouseup",
"onmousewheel",
"onoffline",
"ononline",
"onpagehide",
"onpageshow",
"onpause",
"onplay",
"onplaying",
"onpopstate",
"onprogress",
"onratechange",
"onreset",
"onresize",
"onscroll",
"onseeked",
"onseeking",
"onselect",
"onshow",
"onsort",
"onstalled",
"onstorage",
"onsubmit",
"onsuspend",
"ontimeupdate",
"ontoggle",
"onunload",
"onvolumechange",
"onwaiting",
}
// extra are ad-hoc values not covered by any of the lists above.
var extra = []string{
"align",
"annotation",
"annotation-xml",
"applet",
"basefont",
"bgsound",
"big",
"blink",
"center",
"color",
"desc",
"face",
"font",
"foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive.
"foreignobject",
"frame",
"frameset",
"image",
"isindex",
"listing",
"malignmark",
"marquee",
"math",
"mglyph",
"mi",
"mn",
"mo",
"ms",
"mtext",
"nobr",
"noembed",
"noframes",
"plaintext",
"prompt",
"public",
"spacer",
"strike",
"svg",
"system",
"tt",
"xmp",
}

713
vendor/golang.org/x/net/html/atom/table.go generated vendored Normal file
View File

@ -0,0 +1,713 @@
// generated by go run gen.go; DO NOT EDIT
package atom
const (
A Atom = 0x1
Abbr Atom = 0x4
Accept Atom = 0x2106
AcceptCharset Atom = 0x210e
Accesskey Atom = 0x3309
Action Atom = 0x1f606
Address Atom = 0x4f307
Align Atom = 0x1105
Alt Atom = 0x4503
Annotation Atom = 0x1670a
AnnotationXml Atom = 0x1670e
Applet Atom = 0x2b306
Area Atom = 0x2fa04
Article Atom = 0x38807
Aside Atom = 0x8305
Async Atom = 0x7b05
Audio Atom = 0xa605
Autocomplete Atom = 0x1fc0c
Autofocus Atom = 0xb309
Autoplay Atom = 0xce08
B Atom = 0x101
Base Atom = 0xd604
Basefont Atom = 0xd608
Bdi Atom = 0x1a03
Bdo Atom = 0xe703
Bgsound Atom = 0x11807
Big Atom = 0x12403
Blink Atom = 0x12705
Blockquote Atom = 0x12c0a
Body Atom = 0x2f04
Br Atom = 0x202
Button Atom = 0x13606
Canvas Atom = 0x7f06
Caption Atom = 0x1bb07
Center Atom = 0x5b506
Challenge Atom = 0x21f09
Charset Atom = 0x2807
Checked Atom = 0x32807
Cite Atom = 0x3c804
Class Atom = 0x4de05
Code Atom = 0x14904
Col Atom = 0x15003
Colgroup Atom = 0x15008
Color Atom = 0x15d05
Cols Atom = 0x16204
Colspan Atom = 0x16207
Command Atom = 0x17507
Content Atom = 0x42307
Contenteditable Atom = 0x4230f
Contextmenu Atom = 0x3310b
Controls Atom = 0x18808
Coords Atom = 0x19406
Crossorigin Atom = 0x19f0b
Data Atom = 0x44a04
Datalist Atom = 0x44a08
Datetime Atom = 0x23c08
Dd Atom = 0x26702
Default Atom = 0x8607
Defer Atom = 0x14b05
Del Atom = 0x3ef03
Desc Atom = 0x4db04
Details Atom = 0x4807
Dfn Atom = 0x6103
Dialog Atom = 0x1b06
Dir Atom = 0x6903
Dirname Atom = 0x6907
Disabled Atom = 0x10c08
Div Atom = 0x11303
Dl Atom = 0x11e02
Download Atom = 0x40008
Draggable Atom = 0x17b09
Dropzone Atom = 0x39108
Dt Atom = 0x50902
Em Atom = 0x6502
Embed Atom = 0x6505
Enctype Atom = 0x21107
Face Atom = 0x5b304
Fieldset Atom = 0x1b008
Figcaption Atom = 0x1b80a
Figure Atom = 0x1cc06
Font Atom = 0xda04
Footer Atom = 0x8d06
For Atom = 0x1d803
ForeignObject Atom = 0x1d80d
Foreignobject Atom = 0x1e50d
Form Atom = 0x1f204
Formaction Atom = 0x1f20a
Formenctype Atom = 0x20d0b
Formmethod Atom = 0x2280a
Formnovalidate Atom = 0x2320e
Formtarget Atom = 0x2470a
Frame Atom = 0x9a05
Frameset Atom = 0x9a08
H1 Atom = 0x26e02
H2 Atom = 0x29402
H3 Atom = 0x2a702
H4 Atom = 0x2e902
H5 Atom = 0x2f302
H6 Atom = 0x50b02
Head Atom = 0x2d504
Header Atom = 0x2d506
Headers Atom = 0x2d507
Height Atom = 0x25106
Hgroup Atom = 0x25906
Hidden Atom = 0x26506
High Atom = 0x26b04
Hr Atom = 0x27002
Href Atom = 0x27004
Hreflang Atom = 0x27008
Html Atom = 0x25504
HttpEquiv Atom = 0x2780a
I Atom = 0x601
Icon Atom = 0x42204
Id Atom = 0x8502
Iframe Atom = 0x29606
Image Atom = 0x29c05
Img Atom = 0x2a103
Input Atom = 0x3e805
Inputmode Atom = 0x3e809
Ins Atom = 0x1a803
Isindex Atom = 0x2a907
Ismap Atom = 0x2b005
Itemid Atom = 0x33c06
Itemprop Atom = 0x3c908
Itemref Atom = 0x5ad07
Itemscope Atom = 0x2b909
Itemtype Atom = 0x2c308
Kbd Atom = 0x1903
Keygen Atom = 0x3906
Keytype Atom = 0x53707
Kind Atom = 0x10904
Label Atom = 0xf005
Lang Atom = 0x27404
Legend Atom = 0x18206
Li Atom = 0x1202
Link Atom = 0x12804
List Atom = 0x44e04
Listing Atom = 0x44e07
Loop Atom = 0xf404
Low Atom = 0x11f03
Malignmark Atom = 0x100a
Manifest Atom = 0x5f108
Map Atom = 0x2b203
Mark Atom = 0x1604
Marquee Atom = 0x2cb07
Math Atom = 0x2d204
Max Atom = 0x2e103
Maxlength Atom = 0x2e109
Media Atom = 0x6e05
Mediagroup Atom = 0x6e0a
Menu Atom = 0x33804
Menuitem Atom = 0x33808
Meta Atom = 0x45d04
Meter Atom = 0x24205
Method Atom = 0x22c06
Mglyph Atom = 0x2a206
Mi Atom = 0x2eb02
Min Atom = 0x2eb03
Minlength Atom = 0x2eb09
Mn Atom = 0x23502
Mo Atom = 0x3ed02
Ms Atom = 0x2bc02
Mtext Atom = 0x2f505
Multiple Atom = 0x30308
Muted Atom = 0x30b05
Name Atom = 0x6c04
Nav Atom = 0x3e03
Nobr Atom = 0x5704
Noembed Atom = 0x6307
Noframes Atom = 0x9808
Noscript Atom = 0x3d208
Novalidate Atom = 0x2360a
Object Atom = 0x1ec06
Ol Atom = 0xc902
Onabort Atom = 0x13a07
Onafterprint Atom = 0x1c00c
Onautocomplete Atom = 0x1fa0e
Onautocompleteerror Atom = 0x1fa13
Onbeforeprint Atom = 0x6040d
Onbeforeunload Atom = 0x4e70e
Onblur Atom = 0xaa06
Oncancel Atom = 0xe908
Oncanplay Atom = 0x28509
Oncanplaythrough Atom = 0x28510
Onchange Atom = 0x3a708
Onclick Atom = 0x31007
Onclose Atom = 0x31707
Oncontextmenu Atom = 0x32f0d
Oncuechange Atom = 0x3420b
Ondblclick Atom = 0x34d0a
Ondrag Atom = 0x35706
Ondragend Atom = 0x35709
Ondragenter Atom = 0x3600b
Ondragleave Atom = 0x36b0b
Ondragover Atom = 0x3760a
Ondragstart Atom = 0x3800b
Ondrop Atom = 0x38f06
Ondurationchange Atom = 0x39f10
Onemptied Atom = 0x39609
Onended Atom = 0x3af07
Onerror Atom = 0x3b607
Onfocus Atom = 0x3bd07
Onhashchange Atom = 0x3da0c
Oninput Atom = 0x3e607
Oninvalid Atom = 0x3f209
Onkeydown Atom = 0x3fb09
Onkeypress Atom = 0x4080a
Onkeyup Atom = 0x41807
Onlanguagechange Atom = 0x43210
Onload Atom = 0x44206
Onloadeddata Atom = 0x4420c
Onloadedmetadata Atom = 0x45510
Onloadstart Atom = 0x46b0b
Onmessage Atom = 0x47609
Onmousedown Atom = 0x47f0b
Onmousemove Atom = 0x48a0b
Onmouseout Atom = 0x4950a
Onmouseover Atom = 0x4a20b
Onmouseup Atom = 0x4ad09
Onmousewheel Atom = 0x4b60c
Onoffline Atom = 0x4c209
Ononline Atom = 0x4cb08
Onpagehide Atom = 0x4d30a
Onpageshow Atom = 0x4fe0a
Onpause Atom = 0x50d07
Onplay Atom = 0x51706
Onplaying Atom = 0x51709
Onpopstate Atom = 0x5200a
Onprogress Atom = 0x52a0a
Onratechange Atom = 0x53e0c
Onreset Atom = 0x54a07
Onresize Atom = 0x55108
Onscroll Atom = 0x55f08
Onseeked Atom = 0x56708
Onseeking Atom = 0x56f09
Onselect Atom = 0x57808
Onshow Atom = 0x58206
Onsort Atom = 0x58b06
Onstalled Atom = 0x59509
Onstorage Atom = 0x59e09
Onsubmit Atom = 0x5a708
Onsuspend Atom = 0x5bb09
Ontimeupdate Atom = 0xdb0c
Ontoggle Atom = 0x5c408
Onunload Atom = 0x5cc08
Onvolumechange Atom = 0x5d40e
Onwaiting Atom = 0x5e209
Open Atom = 0x3cf04
Optgroup Atom = 0xf608
Optimum Atom = 0x5eb07
Option Atom = 0x60006
Output Atom = 0x49c06
P Atom = 0xc01
Param Atom = 0xc05
Pattern Atom = 0x5107
Ping Atom = 0x7704
Placeholder Atom = 0xc30b
Plaintext Atom = 0xfd09
Poster Atom = 0x15706
Pre Atom = 0x25e03
Preload Atom = 0x25e07
Progress Atom = 0x52c08
Prompt Atom = 0x5fa06
Public Atom = 0x41e06
Q Atom = 0x13101
Radiogroup Atom = 0x30a
Readonly Atom = 0x2fb08
Rel Atom = 0x25f03
Required Atom = 0x1d008
Reversed Atom = 0x5a08
Rows Atom = 0x9204
Rowspan Atom = 0x9207
Rp Atom = 0x1c602
Rt Atom = 0x13f02
Ruby Atom = 0xaf04
S Atom = 0x2c01
Samp Atom = 0x4e04
Sandbox Atom = 0xbb07
Scope Atom = 0x2bd05
Scoped Atom = 0x2bd06
Script Atom = 0x3d406
Seamless Atom = 0x31c08
Section Atom = 0x4e207
Select Atom = 0x57a06
Selected Atom = 0x57a08
Shape Atom = 0x4f905
Size Atom = 0x55504
Sizes Atom = 0x55505
Small Atom = 0x18f05
Sortable Atom = 0x58d08
Sorted Atom = 0x19906
Source Atom = 0x1aa06
Spacer Atom = 0x2db06
Span Atom = 0x9504
Spellcheck Atom = 0x3230a
Src Atom = 0x3c303
Srcdoc Atom = 0x3c306
Srclang Atom = 0x41107
Start Atom = 0x38605
Step Atom = 0x5f704
Strike Atom = 0x53306
Strong Atom = 0x55906
Style Atom = 0x61105
Sub Atom = 0x5a903
Summary Atom = 0x61607
Sup Atom = 0x61d03
Svg Atom = 0x62003
System Atom = 0x62306
Tabindex Atom = 0x46308
Table Atom = 0x42d05
Target Atom = 0x24b06
Tbody Atom = 0x2e05
Td Atom = 0x4702
Template Atom = 0x62608
Textarea Atom = 0x2f608
Tfoot Atom = 0x8c05
Th Atom = 0x22e02
Thead Atom = 0x2d405
Time Atom = 0xdd04
Title Atom = 0xa105
Tr Atom = 0x10502
Track Atom = 0x10505
Translate Atom = 0x14009
Tt Atom = 0x5302
Type Atom = 0x21404
Typemustmatch Atom = 0x2140d
U Atom = 0xb01
Ul Atom = 0x8a02
Usemap Atom = 0x51106
Value Atom = 0x4005
Var Atom = 0x11503
Video Atom = 0x28105
Wbr Atom = 0x12103
Width Atom = 0x50705
Wrap Atom = 0x58704
Xmp Atom = 0xc103
)
const hash0 = 0xc17da63e
const maxAtomLen = 19
var table = [1 << 9]Atom{
0x1: 0x48a0b, // onmousemove
0x2: 0x5e209, // onwaiting
0x3: 0x1fa13, // onautocompleteerror
0x4: 0x5fa06, // prompt
0x7: 0x5eb07, // optimum
0x8: 0x1604, // mark
0xa: 0x5ad07, // itemref
0xb: 0x4fe0a, // onpageshow
0xc: 0x57a06, // select
0xd: 0x17b09, // draggable
0xe: 0x3e03, // nav
0xf: 0x17507, // command
0x11: 0xb01, // u
0x14: 0x2d507, // headers
0x15: 0x44a08, // datalist
0x17: 0x4e04, // samp
0x1a: 0x3fb09, // onkeydown
0x1b: 0x55f08, // onscroll
0x1c: 0x15003, // col
0x20: 0x3c908, // itemprop
0x21: 0x2780a, // http-equiv
0x22: 0x61d03, // sup
0x24: 0x1d008, // required
0x2b: 0x25e07, // preload
0x2c: 0x6040d, // onbeforeprint
0x2d: 0x3600b, // ondragenter
0x2e: 0x50902, // dt
0x2f: 0x5a708, // onsubmit
0x30: 0x27002, // hr
0x31: 0x32f0d, // oncontextmenu
0x33: 0x29c05, // image
0x34: 0x50d07, // onpause
0x35: 0x25906, // hgroup
0x36: 0x7704, // ping
0x37: 0x57808, // onselect
0x3a: 0x11303, // div
0x3b: 0x1fa0e, // onautocomplete
0x40: 0x2eb02, // mi
0x41: 0x31c08, // seamless
0x42: 0x2807, // charset
0x43: 0x8502, // id
0x44: 0x5200a, // onpopstate
0x45: 0x3ef03, // del
0x46: 0x2cb07, // marquee
0x47: 0x3309, // accesskey
0x49: 0x8d06, // footer
0x4a: 0x44e04, // list
0x4b: 0x2b005, // ismap
0x51: 0x33804, // menu
0x52: 0x2f04, // body
0x55: 0x9a08, // frameset
0x56: 0x54a07, // onreset
0x57: 0x12705, // blink
0x58: 0xa105, // title
0x59: 0x38807, // article
0x5b: 0x22e02, // th
0x5d: 0x13101, // q
0x5e: 0x3cf04, // open
0x5f: 0x2fa04, // area
0x61: 0x44206, // onload
0x62: 0xda04, // font
0x63: 0xd604, // base
0x64: 0x16207, // colspan
0x65: 0x53707, // keytype
0x66: 0x11e02, // dl
0x68: 0x1b008, // fieldset
0x6a: 0x2eb03, // min
0x6b: 0x11503, // var
0x6f: 0x2d506, // header
0x70: 0x13f02, // rt
0x71: 0x15008, // colgroup
0x72: 0x23502, // mn
0x74: 0x13a07, // onabort
0x75: 0x3906, // keygen
0x76: 0x4c209, // onoffline
0x77: 0x21f09, // challenge
0x78: 0x2b203, // map
0x7a: 0x2e902, // h4
0x7b: 0x3b607, // onerror
0x7c: 0x2e109, // maxlength
0x7d: 0x2f505, // mtext
0x7e: 0xbb07, // sandbox
0x7f: 0x58b06, // onsort
0x80: 0x100a, // malignmark
0x81: 0x45d04, // meta
0x82: 0x7b05, // async
0x83: 0x2a702, // h3
0x84: 0x26702, // dd
0x85: 0x27004, // href
0x86: 0x6e0a, // mediagroup
0x87: 0x19406, // coords
0x88: 0x41107, // srclang
0x89: 0x34d0a, // ondblclick
0x8a: 0x4005, // value
0x8c: 0xe908, // oncancel
0x8e: 0x3230a, // spellcheck
0x8f: 0x9a05, // frame
0x91: 0x12403, // big
0x94: 0x1f606, // action
0x95: 0x6903, // dir
0x97: 0x2fb08, // readonly
0x99: 0x42d05, // table
0x9a: 0x61607, // summary
0x9b: 0x12103, // wbr
0x9c: 0x30a, // radiogroup
0x9d: 0x6c04, // name
0x9f: 0x62306, // system
0xa1: 0x15d05, // color
0xa2: 0x7f06, // canvas
0xa3: 0x25504, // html
0xa5: 0x56f09, // onseeking
0xac: 0x4f905, // shape
0xad: 0x25f03, // rel
0xae: 0x28510, // oncanplaythrough
0xaf: 0x3760a, // ondragover
0xb0: 0x62608, // template
0xb1: 0x1d80d, // foreignObject
0xb3: 0x9204, // rows
0xb6: 0x44e07, // listing
0xb7: 0x49c06, // output
0xb9: 0x3310b, // contextmenu
0xbb: 0x11f03, // low
0xbc: 0x1c602, // rp
0xbd: 0x5bb09, // onsuspend
0xbe: 0x13606, // button
0xbf: 0x4db04, // desc
0xc1: 0x4e207, // section
0xc2: 0x52a0a, // onprogress
0xc3: 0x59e09, // onstorage
0xc4: 0x2d204, // math
0xc5: 0x4503, // alt
0xc7: 0x8a02, // ul
0xc8: 0x5107, // pattern
0xc9: 0x4b60c, // onmousewheel
0xca: 0x35709, // ondragend
0xcb: 0xaf04, // ruby
0xcc: 0xc01, // p
0xcd: 0x31707, // onclose
0xce: 0x24205, // meter
0xcf: 0x11807, // bgsound
0xd2: 0x25106, // height
0xd4: 0x101, // b
0xd5: 0x2c308, // itemtype
0xd8: 0x1bb07, // caption
0xd9: 0x10c08, // disabled
0xdb: 0x33808, // menuitem
0xdc: 0x62003, // svg
0xdd: 0x18f05, // small
0xde: 0x44a04, // data
0xe0: 0x4cb08, // ononline
0xe1: 0x2a206, // mglyph
0xe3: 0x6505, // embed
0xe4: 0x10502, // tr
0xe5: 0x46b0b, // onloadstart
0xe7: 0x3c306, // srcdoc
0xeb: 0x5c408, // ontoggle
0xed: 0xe703, // bdo
0xee: 0x4702, // td
0xef: 0x8305, // aside
0xf0: 0x29402, // h2
0xf1: 0x52c08, // progress
0xf2: 0x12c0a, // blockquote
0xf4: 0xf005, // label
0xf5: 0x601, // i
0xf7: 0x9207, // rowspan
0xfb: 0x51709, // onplaying
0xfd: 0x2a103, // img
0xfe: 0xf608, // optgroup
0xff: 0x42307, // content
0x101: 0x53e0c, // onratechange
0x103: 0x3da0c, // onhashchange
0x104: 0x4807, // details
0x106: 0x40008, // download
0x109: 0x14009, // translate
0x10b: 0x4230f, // contenteditable
0x10d: 0x36b0b, // ondragleave
0x10e: 0x2106, // accept
0x10f: 0x57a08, // selected
0x112: 0x1f20a, // formaction
0x113: 0x5b506, // center
0x115: 0x45510, // onloadedmetadata
0x116: 0x12804, // link
0x117: 0xdd04, // time
0x118: 0x19f0b, // crossorigin
0x119: 0x3bd07, // onfocus
0x11a: 0x58704, // wrap
0x11b: 0x42204, // icon
0x11d: 0x28105, // video
0x11e: 0x4de05, // class
0x121: 0x5d40e, // onvolumechange
0x122: 0xaa06, // onblur
0x123: 0x2b909, // itemscope
0x124: 0x61105, // style
0x127: 0x41e06, // public
0x129: 0x2320e, // formnovalidate
0x12a: 0x58206, // onshow
0x12c: 0x51706, // onplay
0x12d: 0x3c804, // cite
0x12e: 0x2bc02, // ms
0x12f: 0xdb0c, // ontimeupdate
0x130: 0x10904, // kind
0x131: 0x2470a, // formtarget
0x135: 0x3af07, // onended
0x136: 0x26506, // hidden
0x137: 0x2c01, // s
0x139: 0x2280a, // formmethod
0x13a: 0x3e805, // input
0x13c: 0x50b02, // h6
0x13d: 0xc902, // ol
0x13e: 0x3420b, // oncuechange
0x13f: 0x1e50d, // foreignobject
0x143: 0x4e70e, // onbeforeunload
0x144: 0x2bd05, // scope
0x145: 0x39609, // onemptied
0x146: 0x14b05, // defer
0x147: 0xc103, // xmp
0x148: 0x39f10, // ondurationchange
0x149: 0x1903, // kbd
0x14c: 0x47609, // onmessage
0x14d: 0x60006, // option
0x14e: 0x2eb09, // minlength
0x14f: 0x32807, // checked
0x150: 0xce08, // autoplay
0x152: 0x202, // br
0x153: 0x2360a, // novalidate
0x156: 0x6307, // noembed
0x159: 0x31007, // onclick
0x15a: 0x47f0b, // onmousedown
0x15b: 0x3a708, // onchange
0x15e: 0x3f209, // oninvalid
0x15f: 0x2bd06, // scoped
0x160: 0x18808, // controls
0x161: 0x30b05, // muted
0x162: 0x58d08, // sortable
0x163: 0x51106, // usemap
0x164: 0x1b80a, // figcaption
0x165: 0x35706, // ondrag
0x166: 0x26b04, // high
0x168: 0x3c303, // src
0x169: 0x15706, // poster
0x16b: 0x1670e, // annotation-xml
0x16c: 0x5f704, // step
0x16d: 0x4, // abbr
0x16e: 0x1b06, // dialog
0x170: 0x1202, // li
0x172: 0x3ed02, // mo
0x175: 0x1d803, // for
0x176: 0x1a803, // ins
0x178: 0x55504, // size
0x179: 0x43210, // onlanguagechange
0x17a: 0x8607, // default
0x17b: 0x1a03, // bdi
0x17c: 0x4d30a, // onpagehide
0x17d: 0x6907, // dirname
0x17e: 0x21404, // type
0x17f: 0x1f204, // form
0x181: 0x28509, // oncanplay
0x182: 0x6103, // dfn
0x183: 0x46308, // tabindex
0x186: 0x6502, // em
0x187: 0x27404, // lang
0x189: 0x39108, // dropzone
0x18a: 0x4080a, // onkeypress
0x18b: 0x23c08, // datetime
0x18c: 0x16204, // cols
0x18d: 0x1, // a
0x18e: 0x4420c, // onloadeddata
0x190: 0xa605, // audio
0x192: 0x2e05, // tbody
0x193: 0x22c06, // method
0x195: 0xf404, // loop
0x196: 0x29606, // iframe
0x198: 0x2d504, // head
0x19e: 0x5f108, // manifest
0x19f: 0xb309, // autofocus
0x1a0: 0x14904, // code
0x1a1: 0x55906, // strong
0x1a2: 0x30308, // multiple
0x1a3: 0xc05, // param
0x1a6: 0x21107, // enctype
0x1a7: 0x5b304, // face
0x1a8: 0xfd09, // plaintext
0x1a9: 0x26e02, // h1
0x1aa: 0x59509, // onstalled
0x1ad: 0x3d406, // script
0x1ae: 0x2db06, // spacer
0x1af: 0x55108, // onresize
0x1b0: 0x4a20b, // onmouseover
0x1b1: 0x5cc08, // onunload
0x1b2: 0x56708, // onseeked
0x1b4: 0x2140d, // typemustmatch
0x1b5: 0x1cc06, // figure
0x1b6: 0x4950a, // onmouseout
0x1b7: 0x25e03, // pre
0x1b8: 0x50705, // width
0x1b9: 0x19906, // sorted
0x1bb: 0x5704, // nobr
0x1be: 0x5302, // tt
0x1bf: 0x1105, // align
0x1c0: 0x3e607, // oninput
0x1c3: 0x41807, // onkeyup
0x1c6: 0x1c00c, // onafterprint
0x1c7: 0x210e, // accept-charset
0x1c8: 0x33c06, // itemid
0x1c9: 0x3e809, // inputmode
0x1cb: 0x53306, // strike
0x1cc: 0x5a903, // sub
0x1cd: 0x10505, // track
0x1ce: 0x38605, // start
0x1d0: 0xd608, // basefont
0x1d6: 0x1aa06, // source
0x1d7: 0x18206, // legend
0x1d8: 0x2d405, // thead
0x1da: 0x8c05, // tfoot
0x1dd: 0x1ec06, // object
0x1de: 0x6e05, // media
0x1df: 0x1670a, // annotation
0x1e0: 0x20d0b, // formenctype
0x1e2: 0x3d208, // noscript
0x1e4: 0x55505, // sizes
0x1e5: 0x1fc0c, // autocomplete
0x1e6: 0x9504, // span
0x1e7: 0x9808, // noframes
0x1e8: 0x24b06, // target
0x1e9: 0x38f06, // ondrop
0x1ea: 0x2b306, // applet
0x1ec: 0x5a08, // reversed
0x1f0: 0x2a907, // isindex
0x1f3: 0x27008, // hreflang
0x1f5: 0x2f302, // h5
0x1f6: 0x4f307, // address
0x1fa: 0x2e103, // max
0x1fb: 0xc30b, // placeholder
0x1fc: 0x2f608, // textarea
0x1fe: 0x4ad09, // onmouseup
0x1ff: 0x3800b, // ondragstart
}
const atomText = "abbradiogrouparamalignmarkbdialogaccept-charsetbodyaccesskey" +
"genavaluealtdetailsampatternobreversedfnoembedirnamediagroup" +
"ingasyncanvasidefaultfooterowspanoframesetitleaudionblurubya" +
"utofocusandboxmplaceholderautoplaybasefontimeupdatebdoncance" +
"labelooptgrouplaintextrackindisabledivarbgsoundlowbrbigblink" +
"blockquotebuttonabortranslatecodefercolgroupostercolorcolspa" +
"nnotation-xmlcommandraggablegendcontrolsmallcoordsortedcross" +
"originsourcefieldsetfigcaptionafterprintfigurequiredforeignO" +
"bjectforeignobjectformactionautocompleteerrorformenctypemust" +
"matchallengeformmethodformnovalidatetimeterformtargetheightm" +
"lhgroupreloadhiddenhigh1hreflanghttp-equivideoncanplaythroug" +
"h2iframeimageimglyph3isindexismappletitemscopeditemtypemarqu" +
"eematheaderspacermaxlength4minlength5mtextareadonlymultiplem" +
"utedonclickoncloseamlesspellcheckedoncontextmenuitemidoncuec" +
"hangeondblclickondragendondragenterondragleaveondragoverondr" +
"agstarticleondropzonemptiedondurationchangeonendedonerroronf" +
"ocusrcdocitempropenoscriptonhashchangeoninputmodeloninvalido" +
"nkeydownloadonkeypressrclangonkeyupublicontenteditableonlang" +
"uagechangeonloadeddatalistingonloadedmetadatabindexonloadsta" +
"rtonmessageonmousedownonmousemoveonmouseoutputonmouseoveronm" +
"ouseuponmousewheelonofflineononlineonpagehidesclassectionbef" +
"oreunloaddresshapeonpageshowidth6onpausemaponplayingonpopsta" +
"teonprogresstrikeytypeonratechangeonresetonresizestrongonscr" +
"ollonseekedonseekingonselectedonshowraponsortableonstalledon" +
"storageonsubmitemrefacenteronsuspendontoggleonunloadonvolume" +
"changeonwaitingoptimumanifestepromptoptionbeforeprintstylesu" +
"mmarysupsvgsystemplate"

102
vendor/golang.org/x/net/html/const.go generated vendored Normal file
View File

@ -0,0 +1,102 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
// Section 12.2.3.2 of the HTML5 specification says "The following elements
// have varying levels of special parsing rules".
// https://html.spec.whatwg.org/multipage/syntax.html#the-stack-of-open-elements
var isSpecialElementMap = map[string]bool{
"address": true,
"applet": true,
"area": true,
"article": true,
"aside": true,
"base": true,
"basefont": true,
"bgsound": true,
"blockquote": true,
"body": true,
"br": true,
"button": true,
"caption": true,
"center": true,
"col": true,
"colgroup": true,
"dd": true,
"details": true,
"dir": true,
"div": true,
"dl": true,
"dt": true,
"embed": true,
"fieldset": true,
"figcaption": true,
"figure": true,
"footer": true,
"form": true,
"frame": true,
"frameset": true,
"h1": true,
"h2": true,
"h3": true,
"h4": true,
"h5": true,
"h6": true,
"head": true,
"header": true,
"hgroup": true,
"hr": true,
"html": true,
"iframe": true,
"img": true,
"input": true,
"isindex": true,
"li": true,
"link": true,
"listing": true,
"marquee": true,
"menu": true,
"meta": true,
"nav": true,
"noembed": true,
"noframes": true,
"noscript": true,
"object": true,
"ol": true,
"p": true,
"param": true,
"plaintext": true,
"pre": true,
"script": true,
"section": true,
"select": true,
"source": true,
"style": true,
"summary": true,
"table": true,
"tbody": true,
"td": true,
"template": true,
"textarea": true,
"tfoot": true,
"th": true,
"thead": true,
"title": true,
"tr": true,
"track": true,
"ul": true,
"wbr": true,
"xmp": true,
}
func isSpecialElement(element *Node) bool {
switch element.Namespace {
case "", "html":
return isSpecialElementMap[element.Data]
case "svg":
return element.Data == "foreignObject"
}
return false
}

106
vendor/golang.org/x/net/html/doc.go generated vendored Normal file
View File

@ -0,0 +1,106 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Package html implements an HTML5-compliant tokenizer and parser.
Tokenization is done by creating a Tokenizer for an io.Reader r. It is the
caller's responsibility to ensure that r provides UTF-8 encoded HTML.
z := html.NewTokenizer(r)
Given a Tokenizer z, the HTML is tokenized by repeatedly calling z.Next(),
which parses the next token and returns its type, or an error:
for {
tt := z.Next()
if tt == html.ErrorToken {
// ...
return ...
}
// Process the current token.
}
There are two APIs for retrieving the current token. The high-level API is to
call Token; the low-level API is to call Text or TagName / TagAttr. Both APIs
allow optionally calling Raw after Next but before Token, Text, TagName, or
TagAttr. In EBNF notation, the valid call sequence per token is:
Next {Raw} [ Token | Text | TagName {TagAttr} ]
Token returns an independent data structure that completely describes a token.
Entities (such as "&lt;") are unescaped, tag names and attribute keys are
lower-cased, and attributes are collected into a []Attribute. For example:
for {
if z.Next() == html.ErrorToken {
// Returning io.EOF indicates success.
return z.Err()
}
emitToken(z.Token())
}
The low-level API performs fewer allocations and copies, but the contents of
the []byte values returned by Text, TagName and TagAttr may change on the next
call to Next. For example, to extract an HTML page's anchor text:
depth := 0
for {
tt := z.Next()
switch tt {
case ErrorToken:
return z.Err()
case TextToken:
if depth > 0 {
// emitBytes should copy the []byte it receives,
// if it doesn't process it immediately.
emitBytes(z.Text())
}
case StartTagToken, EndTagToken:
tn, _ := z.TagName()
if len(tn) == 1 && tn[0] == 'a' {
if tt == StartTagToken {
depth++
} else {
depth--
}
}
}
}
Parsing is done by calling Parse with an io.Reader, which returns the root of
the parse tree (the document element) as a *Node. It is the caller's
responsibility to ensure that the Reader provides UTF-8 encoded HTML. For
example, to process each anchor node in depth-first order:
doc, err := html.Parse(r)
if err != nil {
// ...
}
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "a" {
// Do something with n...
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
f(doc)
The relevant specifications include:
https://html.spec.whatwg.org/multipage/syntax.html and
https://html.spec.whatwg.org/multipage/syntax.html#tokenization
*/
package html // import "golang.org/x/net/html"
// The tokenization algorithm implemented by this package is not a line-by-line
// transliteration of the relatively verbose state-machine in the WHATWG
// specification. A more direct approach is used instead, where the program
// counter implies the state, such as whether it is tokenizing a tag or a text
// node. Specification compliance is verified by checking expected and actual
// outputs over a test suite rather than aiming for algorithmic fidelity.
// TODO(nigeltao): Does a DOM API belong in this package or a separate one?
// TODO(nigeltao): How does parsing interact with a JavaScript engine?

156
vendor/golang.org/x/net/html/doctype.go generated vendored Normal file
View File

@ -0,0 +1,156 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"strings"
)
// parseDoctype parses the data from a DoctypeToken into a name,
// public identifier, and system identifier. It returns a Node whose Type
// is DoctypeNode, whose Data is the name, and which has attributes
// named "system" and "public" for the two identifiers if they were present.
// quirks is whether the document should be parsed in "quirks mode".
func parseDoctype(s string) (n *Node, quirks bool) {
n = &Node{Type: DoctypeNode}
// Find the name.
space := strings.IndexAny(s, whitespace)
if space == -1 {
space = len(s)
}
n.Data = s[:space]
// The comparison to "html" is case-sensitive.
if n.Data != "html" {
quirks = true
}
n.Data = strings.ToLower(n.Data)
s = strings.TrimLeft(s[space:], whitespace)
if len(s) < 6 {
// It can't start with "PUBLIC" or "SYSTEM".
// Ignore the rest of the string.
return n, quirks || s != ""
}
key := strings.ToLower(s[:6])
s = s[6:]
for key == "public" || key == "system" {
s = strings.TrimLeft(s, whitespace)
if s == "" {
break
}
quote := s[0]
if quote != '"' && quote != '\'' {
break
}
s = s[1:]
q := strings.IndexRune(s, rune(quote))
var id string
if q == -1 {
id = s
s = ""
} else {
id = s[:q]
s = s[q+1:]
}
n.Attr = append(n.Attr, Attribute{Key: key, Val: id})
if key == "public" {
key = "system"
} else {
key = ""
}
}
if key != "" || s != "" {
quirks = true
} else if len(n.Attr) > 0 {
if n.Attr[0].Key == "public" {
public := strings.ToLower(n.Attr[0].Val)
switch public {
case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html":
quirks = true
default:
for _, q := range quirkyIDs {
if strings.HasPrefix(public, q) {
quirks = true
break
}
}
}
// The following two public IDs only cause quirks mode if there is no system ID.
if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") ||
strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) {
quirks = true
}
}
if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" &&
strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" {
quirks = true
}
}
return n, quirks
}
// quirkyIDs is a list of public doctype identifiers that cause a document
// to be interpreted in quirks mode. The identifiers should be in lower case.
var quirkyIDs = []string{
"+//silmaril//dtd html pro v0r11 19970101//",
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
"-//as//dtd html 3.0 aswedit + extensions//",
"-//ietf//dtd html 2.0 level 1//",
"-//ietf//dtd html 2.0 level 2//",
"-//ietf//dtd html 2.0 strict level 1//",
"-//ietf//dtd html 2.0 strict level 2//",
"-//ietf//dtd html 2.0 strict//",
"-//ietf//dtd html 2.0//",
"-//ietf//dtd html 2.1e//",
"-//ietf//dtd html 3.0//",
"-//ietf//dtd html 3.2 final//",
"-//ietf//dtd html 3.2//",
"-//ietf//dtd html 3//",
"-//ietf//dtd html level 0//",
"-//ietf//dtd html level 1//",
"-//ietf//dtd html level 2//",
"-//ietf//dtd html level 3//",
"-//ietf//dtd html strict level 0//",
"-//ietf//dtd html strict level 1//",
"-//ietf//dtd html strict level 2//",
"-//ietf//dtd html strict level 3//",
"-//ietf//dtd html strict//",
"-//ietf//dtd html//",
"-//metrius//dtd metrius presentational//",
"-//microsoft//dtd internet explorer 2.0 html strict//",
"-//microsoft//dtd internet explorer 2.0 html//",
"-//microsoft//dtd internet explorer 2.0 tables//",
"-//microsoft//dtd internet explorer 3.0 html strict//",
"-//microsoft//dtd internet explorer 3.0 html//",
"-//microsoft//dtd internet explorer 3.0 tables//",
"-//netscape comm. corp.//dtd html//",
"-//netscape comm. corp.//dtd strict html//",
"-//o'reilly and associates//dtd html 2.0//",
"-//o'reilly and associates//dtd html extended 1.0//",
"-//o'reilly and associates//dtd html extended relaxed 1.0//",
"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
"-//spyglass//dtd html 2.0 extended//",
"-//sq//dtd html 2.0 hotmetal + extensions//",
"-//sun microsystems corp.//dtd hotjava html//",
"-//sun microsystems corp.//dtd hotjava strict html//",
"-//w3c//dtd html 3 1995-03-24//",
"-//w3c//dtd html 3.2 draft//",
"-//w3c//dtd html 3.2 final//",
"-//w3c//dtd html 3.2//",
"-//w3c//dtd html 3.2s draft//",
"-//w3c//dtd html 4.0 frameset//",
"-//w3c//dtd html 4.0 transitional//",
"-//w3c//dtd html experimental 19960712//",
"-//w3c//dtd html experimental 970421//",
"-//w3c//dtd w3 html//",
"-//w3o//dtd w3 html 3.0//",
"-//webtechs//dtd mozilla html 2.0//",
"-//webtechs//dtd mozilla html//",
}

2253
vendor/golang.org/x/net/html/entity.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

258
vendor/golang.org/x/net/html/escape.go generated vendored Normal file
View File

@ -0,0 +1,258 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"bytes"
"strings"
"unicode/utf8"
)
// These replacements permit compatibility with old numeric entities that
// assumed Windows-1252 encoding.
// https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference
var replacementTable = [...]rune{
'\u20AC', // First entry is what 0x80 should be replaced with.
'\u0081',
'\u201A',
'\u0192',
'\u201E',
'\u2026',
'\u2020',
'\u2021',
'\u02C6',
'\u2030',
'\u0160',
'\u2039',
'\u0152',
'\u008D',
'\u017D',
'\u008F',
'\u0090',
'\u2018',
'\u2019',
'\u201C',
'\u201D',
'\u2022',
'\u2013',
'\u2014',
'\u02DC',
'\u2122',
'\u0161',
'\u203A',
'\u0153',
'\u009D',
'\u017E',
'\u0178', // Last entry is 0x9F.
// 0x00->'\uFFFD' is handled programmatically.
// 0x0D->'\u000D' is a no-op.
}
// unescapeEntity reads an entity like "&lt;" from b[src:] and writes the
// corresponding "<" to b[dst:], returning the incremented dst and src cursors.
// Precondition: b[src] == '&' && dst <= src.
// attribute should be true if parsing an attribute value.
func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) {
// https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference
// i starts at 1 because we already know that s[0] == '&'.
i, s := 1, b[src:]
if len(s) <= 1 {
b[dst] = b[src]
return dst + 1, src + 1
}
if s[i] == '#' {
if len(s) <= 3 { // We need to have at least "&#.".
b[dst] = b[src]
return dst + 1, src + 1
}
i++
c := s[i]
hex := false
if c == 'x' || c == 'X' {
hex = true
i++
}
x := '\x00'
for i < len(s) {
c = s[i]
i++
if hex {
if '0' <= c && c <= '9' {
x = 16*x + rune(c) - '0'
continue
} else if 'a' <= c && c <= 'f' {
x = 16*x + rune(c) - 'a' + 10
continue
} else if 'A' <= c && c <= 'F' {
x = 16*x + rune(c) - 'A' + 10
continue
}
} else if '0' <= c && c <= '9' {
x = 10*x + rune(c) - '0'
continue
}
if c != ';' {
i--
}
break
}
if i <= 3 { // No characters matched.
b[dst] = b[src]
return dst + 1, src + 1
}
if 0x80 <= x && x <= 0x9F {
// Replace characters from Windows-1252 with UTF-8 equivalents.
x = replacementTable[x-0x80]
} else if x == 0 || (0xD800 <= x && x <= 0xDFFF) || x > 0x10FFFF {
// Replace invalid characters with the replacement character.
x = '\uFFFD'
}
return dst + utf8.EncodeRune(b[dst:], x), src + i
}
// Consume the maximum number of characters possible, with the
// consumed characters matching one of the named references.
for i < len(s) {
c := s[i]
i++
// Lower-cased characters are more common in entities, so we check for them first.
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
continue
}
if c != ';' {
i--
}
break
}
entityName := string(s[1:i])
if entityName == "" {
// No-op.
} else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' {
// No-op.
} else if x := entity[entityName]; x != 0 {
return dst + utf8.EncodeRune(b[dst:], x), src + i
} else if x := entity2[entityName]; x[0] != 0 {
dst1 := dst + utf8.EncodeRune(b[dst:], x[0])
return dst1 + utf8.EncodeRune(b[dst1:], x[1]), src + i
} else if !attribute {
maxLen := len(entityName) - 1
if maxLen > longestEntityWithoutSemicolon {
maxLen = longestEntityWithoutSemicolon
}
for j := maxLen; j > 1; j-- {
if x := entity[entityName[:j]]; x != 0 {
return dst + utf8.EncodeRune(b[dst:], x), src + j + 1
}
}
}
dst1, src1 = dst+i, src+i
copy(b[dst:dst1], b[src:src1])
return dst1, src1
}
// unescape unescapes b's entities in-place, so that "a&lt;b" becomes "a<b".
// attribute should be true if parsing an attribute value.
func unescape(b []byte, attribute bool) []byte {
for i, c := range b {
if c == '&' {
dst, src := unescapeEntity(b, i, i, attribute)
for src < len(b) {
c := b[src]
if c == '&' {
dst, src = unescapeEntity(b, dst, src, attribute)
} else {
b[dst] = c
dst, src = dst+1, src+1
}
}
return b[0:dst]
}
}
return b
}
// lower lower-cases the A-Z bytes in b in-place, so that "aBc" becomes "abc".
func lower(b []byte) []byte {
for i, c := range b {
if 'A' <= c && c <= 'Z' {
b[i] = c + 'a' - 'A'
}
}
return b
}
const escapedChars = "&'<>\"\r"
func escape(w writer, s string) error {
i := strings.IndexAny(s, escapedChars)
for i != -1 {
if _, err := w.WriteString(s[:i]); err != nil {
return err
}
var esc string
switch s[i] {
case '&':
esc = "&amp;"
case '\'':
// "&#39;" is shorter than "&apos;" and apos was not in HTML until HTML5.
esc = "&#39;"
case '<':
esc = "&lt;"
case '>':
esc = "&gt;"
case '"':
// "&#34;" is shorter than "&quot;".
esc = "&#34;"
case '\r':
esc = "&#13;"
default:
panic("unrecognized escape character")
}
s = s[i+1:]
if _, err := w.WriteString(esc); err != nil {
return err
}
i = strings.IndexAny(s, escapedChars)
}
_, err := w.WriteString(s)
return err
}
// EscapeString escapes special characters like "<" to become "&lt;". It
// escapes only five such characters: <, >, &, ' and ".
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
// always true.
func EscapeString(s string) string {
if strings.IndexAny(s, escapedChars) == -1 {
return s
}
var buf bytes.Buffer
escape(&buf, s)
return buf.String()
}
// UnescapeString unescapes entities like "&lt;" to become "<". It unescapes a
// larger range of entities than EscapeString escapes. For example, "&aacute;"
// unescapes to "á", as does "&#225;" and "&xE1;".
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
// always true.
func UnescapeString(s string) string {
for _, c := range s {
if c == '&' {
return string(unescape([]byte(s), false))
}
}
return s
}

226
vendor/golang.org/x/net/html/foreign.go generated vendored Normal file
View File

@ -0,0 +1,226 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"strings"
)
func adjustAttributeNames(aa []Attribute, nameMap map[string]string) {
for i := range aa {
if newName, ok := nameMap[aa[i].Key]; ok {
aa[i].Key = newName
}
}
}
func adjustForeignAttributes(aa []Attribute) {
for i, a := range aa {
if a.Key == "" || a.Key[0] != 'x' {
continue
}
switch a.Key {
case "xlink:actuate", "xlink:arcrole", "xlink:href", "xlink:role", "xlink:show",
"xlink:title", "xlink:type", "xml:base", "xml:lang", "xml:space", "xmlns:xlink":
j := strings.Index(a.Key, ":")
aa[i].Namespace = a.Key[:j]
aa[i].Key = a.Key[j+1:]
}
}
}
func htmlIntegrationPoint(n *Node) bool {
if n.Type != ElementNode {
return false
}
switch n.Namespace {
case "math":
if n.Data == "annotation-xml" {
for _, a := range n.Attr {
if a.Key == "encoding" {
val := strings.ToLower(a.Val)
if val == "text/html" || val == "application/xhtml+xml" {
return true
}
}
}
}
case "svg":
switch n.Data {
case "desc", "foreignObject", "title":
return true
}
}
return false
}
func mathMLTextIntegrationPoint(n *Node) bool {
if n.Namespace != "math" {
return false
}
switch n.Data {
case "mi", "mo", "mn", "ms", "mtext":
return true
}
return false
}
// Section 12.2.5.5.
var breakout = map[string]bool{
"b": true,
"big": true,
"blockquote": true,
"body": true,
"br": true,
"center": true,
"code": true,
"dd": true,
"div": true,
"dl": true,
"dt": true,
"em": true,
"embed": true,
"h1": true,
"h2": true,
"h3": true,
"h4": true,
"h5": true,
"h6": true,
"head": true,
"hr": true,
"i": true,
"img": true,
"li": true,
"listing": true,
"menu": true,
"meta": true,
"nobr": true,
"ol": true,
"p": true,
"pre": true,
"ruby": true,
"s": true,
"small": true,
"span": true,
"strong": true,
"strike": true,
"sub": true,
"sup": true,
"table": true,
"tt": true,
"u": true,
"ul": true,
"var": true,
}
// Section 12.2.5.5.
var svgTagNameAdjustments = map[string]string{
"altglyph": "altGlyph",
"altglyphdef": "altGlyphDef",
"altglyphitem": "altGlyphItem",
"animatecolor": "animateColor",
"animatemotion": "animateMotion",
"animatetransform": "animateTransform",
"clippath": "clipPath",
"feblend": "feBlend",
"fecolormatrix": "feColorMatrix",
"fecomponenttransfer": "feComponentTransfer",
"fecomposite": "feComposite",
"feconvolvematrix": "feConvolveMatrix",
"fediffuselighting": "feDiffuseLighting",
"fedisplacementmap": "feDisplacementMap",
"fedistantlight": "feDistantLight",
"feflood": "feFlood",
"fefunca": "feFuncA",
"fefuncb": "feFuncB",
"fefuncg": "feFuncG",
"fefuncr": "feFuncR",
"fegaussianblur": "feGaussianBlur",
"feimage": "feImage",
"femerge": "feMerge",
"femergenode": "feMergeNode",
"femorphology": "feMorphology",
"feoffset": "feOffset",
"fepointlight": "fePointLight",
"fespecularlighting": "feSpecularLighting",
"fespotlight": "feSpotLight",
"fetile": "feTile",
"feturbulence": "feTurbulence",
"foreignobject": "foreignObject",
"glyphref": "glyphRef",
"lineargradient": "linearGradient",
"radialgradient": "radialGradient",
"textpath": "textPath",
}
// Section 12.2.5.1
var mathMLAttributeAdjustments = map[string]string{
"definitionurl": "definitionURL",
}
var svgAttributeAdjustments = map[string]string{
"attributename": "attributeName",
"attributetype": "attributeType",
"basefrequency": "baseFrequency",
"baseprofile": "baseProfile",
"calcmode": "calcMode",
"clippathunits": "clipPathUnits",
"contentscripttype": "contentScriptType",
"contentstyletype": "contentStyleType",
"diffuseconstant": "diffuseConstant",
"edgemode": "edgeMode",
"externalresourcesrequired": "externalResourcesRequired",
"filterres": "filterRes",
"filterunits": "filterUnits",
"glyphref": "glyphRef",
"gradienttransform": "gradientTransform",
"gradientunits": "gradientUnits",
"kernelmatrix": "kernelMatrix",
"kernelunitlength": "kernelUnitLength",
"keypoints": "keyPoints",
"keysplines": "keySplines",
"keytimes": "keyTimes",
"lengthadjust": "lengthAdjust",
"limitingconeangle": "limitingConeAngle",
"markerheight": "markerHeight",
"markerunits": "markerUnits",
"markerwidth": "markerWidth",
"maskcontentunits": "maskContentUnits",
"maskunits": "maskUnits",
"numoctaves": "numOctaves",
"pathlength": "pathLength",
"patterncontentunits": "patternContentUnits",
"patterntransform": "patternTransform",
"patternunits": "patternUnits",
"pointsatx": "pointsAtX",
"pointsaty": "pointsAtY",
"pointsatz": "pointsAtZ",
"preservealpha": "preserveAlpha",
"preserveaspectratio": "preserveAspectRatio",
"primitiveunits": "primitiveUnits",
"refx": "refX",
"refy": "refY",
"repeatcount": "repeatCount",
"repeatdur": "repeatDur",
"requiredextensions": "requiredExtensions",
"requiredfeatures": "requiredFeatures",
"specularconstant": "specularConstant",
"specularexponent": "specularExponent",
"spreadmethod": "spreadMethod",
"startoffset": "startOffset",
"stddeviation": "stdDeviation",
"stitchtiles": "stitchTiles",
"surfacescale": "surfaceScale",
"systemlanguage": "systemLanguage",
"tablevalues": "tableValues",
"targetx": "targetX",
"targety": "targetY",
"textlength": "textLength",
"viewbox": "viewBox",
"viewtarget": "viewTarget",
"xchannelselector": "xChannelSelector",
"ychannelselector": "yChannelSelector",
"zoomandpan": "zoomAndPan",
}

193
vendor/golang.org/x/net/html/node.go generated vendored Normal file
View File

@ -0,0 +1,193 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"golang.org/x/net/html/atom"
)
// A NodeType is the type of a Node.
type NodeType uint32
const (
ErrorNode NodeType = iota
TextNode
DocumentNode
ElementNode
CommentNode
DoctypeNode
scopeMarkerNode
)
// Section 12.2.3.3 says "scope markers are inserted when entering applet
// elements, buttons, object elements, marquees, table cells, and table
// captions, and are used to prevent formatting from 'leaking'".
var scopeMarker = Node{Type: scopeMarkerNode}
// A Node consists of a NodeType and some Data (tag name for element nodes,
// content for text) and are part of a tree of Nodes. Element nodes may also
// have a Namespace and contain a slice of Attributes. Data is unescaped, so
// that it looks like "a<b" rather than "a&lt;b". For element nodes, DataAtom
// is the atom for Data, or zero if Data is not a known tag name.
//
// An empty Namespace implies a "http://www.w3.org/1999/xhtml" namespace.
// Similarly, "math" is short for "http://www.w3.org/1998/Math/MathML", and
// "svg" is short for "http://www.w3.org/2000/svg".
type Node struct {
Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node
Type NodeType
DataAtom atom.Atom
Data string
Namespace string
Attr []Attribute
}
// InsertBefore inserts newChild as a child of n, immediately before oldChild
// in the sequence of n's children. oldChild may be nil, in which case newChild
// is appended to the end of n's children.
//
// It will panic if newChild already has a parent or siblings.
func (n *Node) InsertBefore(newChild, oldChild *Node) {
if newChild.Parent != nil || newChild.PrevSibling != nil || newChild.NextSibling != nil {
panic("html: InsertBefore called for an attached child Node")
}
var prev, next *Node
if oldChild != nil {
prev, next = oldChild.PrevSibling, oldChild
} else {
prev = n.LastChild
}
if prev != nil {
prev.NextSibling = newChild
} else {
n.FirstChild = newChild
}
if next != nil {
next.PrevSibling = newChild
} else {
n.LastChild = newChild
}
newChild.Parent = n
newChild.PrevSibling = prev
newChild.NextSibling = next
}
// AppendChild adds a node c as a child of n.
//
// It will panic if c already has a parent or siblings.
func (n *Node) AppendChild(c *Node) {
if c.Parent != nil || c.PrevSibling != nil || c.NextSibling != nil {
panic("html: AppendChild called for an attached child Node")
}
last := n.LastChild
if last != nil {
last.NextSibling = c
} else {
n.FirstChild = c
}
n.LastChild = c
c.Parent = n
c.PrevSibling = last
}
// RemoveChild removes a node c that is a child of n. Afterwards, c will have
// no parent and no siblings.
//
// It will panic if c's parent is not n.
func (n *Node) RemoveChild(c *Node) {
if c.Parent != n {
panic("html: RemoveChild called for a non-child Node")
}
if n.FirstChild == c {
n.FirstChild = c.NextSibling
}
if c.NextSibling != nil {
c.NextSibling.PrevSibling = c.PrevSibling
}
if n.LastChild == c {
n.LastChild = c.PrevSibling
}
if c.PrevSibling != nil {
c.PrevSibling.NextSibling = c.NextSibling
}
c.Parent = nil
c.PrevSibling = nil
c.NextSibling = nil
}
// reparentChildren reparents all of src's child nodes to dst.
func reparentChildren(dst, src *Node) {
for {
child := src.FirstChild
if child == nil {
break
}
src.RemoveChild(child)
dst.AppendChild(child)
}
}
// clone returns a new node with the same type, data and attributes.
// The clone has no parent, no siblings and no children.
func (n *Node) clone() *Node {
m := &Node{
Type: n.Type,
DataAtom: n.DataAtom,
Data: n.Data,
Attr: make([]Attribute, len(n.Attr)),
}
copy(m.Attr, n.Attr)
return m
}
// nodeStack is a stack of nodes.
type nodeStack []*Node
// pop pops the stack. It will panic if s is empty.
func (s *nodeStack) pop() *Node {
i := len(*s)
n := (*s)[i-1]
*s = (*s)[:i-1]
return n
}
// top returns the most recently pushed node, or nil if s is empty.
func (s *nodeStack) top() *Node {
if i := len(*s); i > 0 {
return (*s)[i-1]
}
return nil
}
// index returns the index of the top-most occurrence of n in the stack, or -1
// if n is not present.
func (s *nodeStack) index(n *Node) int {
for i := len(*s) - 1; i >= 0; i-- {
if (*s)[i] == n {
return i
}
}
return -1
}
// insert inserts a node at the given index.
func (s *nodeStack) insert(i int, n *Node) {
(*s) = append(*s, nil)
copy((*s)[i+1:], (*s)[i:])
(*s)[i] = n
}
// remove removes a node from the stack. It is a no-op if n is not present.
func (s *nodeStack) remove(n *Node) {
i := s.index(n)
if i == -1 {
return
}
copy((*s)[i:], (*s)[i+1:])
j := len(*s) - 1
(*s)[j] = nil
*s = (*s)[:j]
}

2094
vendor/golang.org/x/net/html/parse.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

271
vendor/golang.org/x/net/html/render.go generated vendored Normal file
View File

@ -0,0 +1,271 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"bufio"
"errors"
"fmt"
"io"
"strings"
)
type writer interface {
io.Writer
io.ByteWriter
WriteString(string) (int, error)
}
// Render renders the parse tree n to the given writer.
//
// Rendering is done on a 'best effort' basis: calling Parse on the output of
// Render will always result in something similar to the original tree, but it
// is not necessarily an exact clone unless the original tree was 'well-formed'.
// 'Well-formed' is not easily specified; the HTML5 specification is
// complicated.
//
// Calling Parse on arbitrary input typically results in a 'well-formed' parse
// tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
// For example, in a 'well-formed' parse tree, no <a> element is a child of
// another <a> element: parsing "<a><a>" results in two sibling elements.
// Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
// <table> element: parsing "<p><table><a>" results in a <p> with two sibling
// children; the <a> is reparented to the <table>'s parent. However, calling
// Parse on "<a><table><a>" does not return an error, but the result has an <a>
// element with an <a> child, and is therefore not 'well-formed'.
//
// Programmatically constructed trees are typically also 'well-formed', but it
// is possible to construct a tree that looks innocuous but, when rendered and
// re-parsed, results in a different tree. A simple example is that a solitary
// text node would become a tree containing <html>, <head> and <body> elements.
// Another example is that the programmatic equivalent of "a<head>b</head>c"
// becomes "<html><head><head/><body>abc</body></html>".
func Render(w io.Writer, n *Node) error {
if x, ok := w.(writer); ok {
return render(x, n)
}
buf := bufio.NewWriter(w)
if err := render(buf, n); err != nil {
return err
}
return buf.Flush()
}
// plaintextAbort is returned from render1 when a <plaintext> element
// has been rendered. No more end tags should be rendered after that.
var plaintextAbort = errors.New("html: internal error (plaintext abort)")
func render(w writer, n *Node) error {
err := render1(w, n)
if err == plaintextAbort {
err = nil
}
return err
}
func render1(w writer, n *Node) error {
// Render non-element nodes; these are the easy cases.
switch n.Type {
case ErrorNode:
return errors.New("html: cannot render an ErrorNode node")
case TextNode:
return escape(w, n.Data)
case DocumentNode:
for c := n.FirstChild; c != nil; c = c.NextSibling {
if err := render1(w, c); err != nil {
return err
}
}
return nil
case ElementNode:
// No-op.
case CommentNode:
if _, err := w.WriteString("<!--"); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
if _, err := w.WriteString("-->"); err != nil {
return err
}
return nil
case DoctypeNode:
if _, err := w.WriteString("<!DOCTYPE "); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
if n.Attr != nil {
var p, s string
for _, a := range n.Attr {
switch a.Key {
case "public":
p = a.Val
case "system":
s = a.Val
}
}
if p != "" {
if _, err := w.WriteString(" PUBLIC "); err != nil {
return err
}
if err := writeQuoted(w, p); err != nil {
return err
}
if s != "" {
if err := w.WriteByte(' '); err != nil {
return err
}
if err := writeQuoted(w, s); err != nil {
return err
}
}
} else if s != "" {
if _, err := w.WriteString(" SYSTEM "); err != nil {
return err
}
if err := writeQuoted(w, s); err != nil {
return err
}
}
}
return w.WriteByte('>')
default:
return errors.New("html: unknown node type")
}
// Render the <xxx> opening tag.
if err := w.WriteByte('<'); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
for _, a := range n.Attr {
if err := w.WriteByte(' '); err != nil {
return err
}
if a.Namespace != "" {
if _, err := w.WriteString(a.Namespace); err != nil {
return err
}
if err := w.WriteByte(':'); err != nil {
return err
}
}
if _, err := w.WriteString(a.Key); err != nil {
return err
}
if _, err := w.WriteString(`="`); err != nil {
return err
}
if err := escape(w, a.Val); err != nil {
return err
}
if err := w.WriteByte('"'); err != nil {
return err
}
}
if voidElements[n.Data] {
if n.FirstChild != nil {
return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
}
_, err := w.WriteString("/>")
return err
}
if err := w.WriteByte('>'); err != nil {
return err
}
// Add initial newline where there is danger of a newline beging ignored.
if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") {
switch n.Data {
case "pre", "listing", "textarea":
if err := w.WriteByte('\n'); err != nil {
return err
}
}
}
// Render any child nodes.
switch n.Data {
case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type == TextNode {
if _, err := w.WriteString(c.Data); err != nil {
return err
}
} else {
if err := render1(w, c); err != nil {
return err
}
}
}
if n.Data == "plaintext" {
// Don't render anything else. <plaintext> must be the
// last element in the file, with no closing tag.
return plaintextAbort
}
default:
for c := n.FirstChild; c != nil; c = c.NextSibling {
if err := render1(w, c); err != nil {
return err
}
}
}
// Render the </xxx> closing tag.
if _, err := w.WriteString("</"); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
return w.WriteByte('>')
}
// writeQuoted writes s to w surrounded by quotes. Normally it will use double
// quotes, but if s contains a double quote, it will use single quotes.
// It is used for writing the identifiers in a doctype declaration.
// In valid HTML, they can't contain both types of quotes.
func writeQuoted(w writer, s string) error {
var q byte = '"'
if strings.Contains(s, `"`) {
q = '\''
}
if err := w.WriteByte(q); err != nil {
return err
}
if _, err := w.WriteString(s); err != nil {
return err
}
if err := w.WriteByte(q); err != nil {
return err
}
return nil
}
// Section 12.1.2, "Elements", gives this list of void elements. Void elements
// are those that can't have any contents.
var voidElements = map[string]bool{
"area": true,
"base": true,
"br": true,
"col": true,
"command": true,
"embed": true,
"hr": true,
"img": true,
"input": true,
"keygen": true,
"link": true,
"meta": true,
"param": true,
"source": true,
"track": true,
"wbr": true,
}

1219
vendor/golang.org/x/net/html/token.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

24
vendor/vendor.json vendored
View File

@ -242,6 +242,18 @@
"revision": "4da3e2cfbabc9f751898f250b49f2439785783a1",
"revisionTime": "2017-03-29T04:21:07Z"
},
{
"checksumSHA1": "62c090psZxeg45bfq85D/+Xbczg=",
"path": "github.com/golang-commonmark/markdown/byteutil",
"revision": "9e199b881116ec8dec80c69b927b43f9482a669d",
"revisionTime": "2015-10-17T03:50:26Z"
},
{
"checksumSHA1": "MwPblsw+AoBHOsWsXgmv4qQNrqY=",
"path": "github.com/golang-commonmark/markdown/linkify",
"revision": "9e199b881116ec8dec80c69b927b43f9482a669d",
"revisionTime": "2015-10-17T03:50:26Z"
},
{
"checksumSHA1": "kBeNcaKk56FguvPSUCEaH6AxpRc=",
"path": "github.com/golang/protobuf/proto",
@ -440,6 +452,18 @@
"revision": "5602c733f70afc6dcec6766be0d5034d4c4f14de",
"revisionTime": "2017-04-13T17:15:43Z"
},
{
"checksumSHA1": "vqc3a+oTUGX8PmD0TS+qQ7gmN8I=",
"path": "golang.org/x/net/html",
"revision": "054b33e6527139ad5b1ec2f6232c3b175bd9a30c",
"revisionTime": "2017-07-05T22:25:54Z"
},
{
"checksumSHA1": "00eQaGynDYrv3tL+C7l9xH0IDZg=",
"path": "golang.org/x/net/html/atom",
"revision": "054b33e6527139ad5b1ec2f6232c3b175bd9a30c",
"revisionTime": "2017-07-05T22:25:54Z"
},
{
"checksumSHA1": "kEO69MIsqWDxBOIs3ez1faofNSg=",
"path": "golang.org/x/net/publicsuffix",