Prerender index page

This commit is contained in:
Ken-Håvard Lieng 2018-12-17 14:41:24 +01:00
parent fc643483be
commit 6fedb23363
41 changed files with 5442 additions and 118 deletions

98
vendor/github.com/tdewolff/parse/v2/html/README.md generated vendored Normal file
View file

@ -0,0 +1,98 @@
# HTML [![GoDoc](http://godoc.org/github.com/tdewolff/parse/html?status.svg)](http://godoc.org/github.com/tdewolff/parse/html) [![GoCover](http://gocover.io/_badge/github.com/tdewolff/parse/html)](http://gocover.io/github.com/tdewolff/parse/html)
This package is an HTML5 lexer written in [Go][1]. It follows the specification at [The HTML syntax](http://www.w3.org/TR/html5/syntax.html). The lexer takes an io.Reader and converts it into tokens until the EOF.
## Installation
Run the following command
go get github.com/tdewolff/parse/html
or add the following import and run project with `go get`
import "github.com/tdewolff/parse/v2/html"
## Lexer
### Usage
The following initializes a new Lexer with io.Reader `r`:
``` go
l := html.NewLexer(r)
```
To tokenize until EOF an error, use:
``` go
for {
tt, data := l.Next()
switch tt {
case html.ErrorToken:
// error or EOF set in l.Err()
return
case html.StartTagToken:
// ...
for {
ttAttr, dataAttr := l.Next()
if ttAttr != html.AttributeToken {
break
}
// ...
}
// ...
}
}
```
All tokens:
``` go
ErrorToken TokenType = iota // extra token when errors occur
CommentToken
DoctypeToken
StartTagToken
StartTagCloseToken
StartTagVoidToken
EndTagToken
AttributeToken
TextToken
```
### Examples
``` go
package main
import (
"os"
"github.com/tdewolff/parse/v2/html"
)
// Tokenize HTML from stdin.
func main() {
l := html.NewLexer(os.Stdin)
for {
tt, data := l.Next()
switch tt {
case html.ErrorToken:
if l.Err() != io.EOF {
fmt.Println("Error on line", l.Line(), ":", l.Err())
}
return
case html.StartTagToken:
fmt.Println("Tag", string(data))
for {
ttAttr, dataAttr := l.Next()
if ttAttr != html.AttributeToken {
break
}
key := dataAttr
val := l.AttrVal()
fmt.Println("Attribute", string(key), "=", string(val))
}
// ...
}
}
}
```
## License
Released under the [MIT license](https://github.com/tdewolff/parse/blob/master/LICENSE.md).
[1]: http://golang.org/ "Go Language"

831
vendor/github.com/tdewolff/parse/v2/html/hash.go generated vendored Normal file
View file

@ -0,0 +1,831 @@
package html
// generated by hasher -type=Hash -file=hash.go; DO NOT EDIT, except for adding more constants to the list and rerun go generate
// uses github.com/tdewolff/hasher
//go:generate hasher -type=Hash -file=hash.go
// Hash defines perfect hashes for a predefined list of strings
type Hash uint32
// Unique hash definitions to be used instead of strings
const (
A Hash = 0x1 // a
Abbr Hash = 0x4 // abbr
Accept Hash = 0x3206 // accept
Accept_Charset Hash = 0x320e // accept-charset
Accesskey Hash = 0x4409 // accesskey
Acronym Hash = 0xbb07 // acronym
Action Hash = 0x2ba06 // action
Address Hash = 0x67e07 // address
Align Hash = 0x1605 // align
Alink Hash = 0xd205 // alink
Allowfullscreen Hash = 0x23d0f // allowfullscreen
Alt Hash = 0xee03 // alt
Annotation Hash = 0x2070a // annotation
AnnotationXml Hash = 0x2070d // annotationXml
Applet Hash = 0x14506 // applet
Area Hash = 0x38d04 // area
Article Hash = 0x40e07 // article
Aside Hash = 0x8305 // aside
Async Hash = 0xfa05 // async
Audio Hash = 0x11605 // audio
Autocomplete Hash = 0x12e0c // autocomplete
Autofocus Hash = 0x13a09 // autofocus
Autoplay Hash = 0x14f08 // autoplay
Axis Hash = 0x15704 // axis
B Hash = 0x101 // b
Background Hash = 0x1e0a // background
Base Hash = 0x45404 // base
Basefont Hash = 0x45408 // basefont
Bdi Hash = 0xcb03 // bdi
Bdo Hash = 0x18403 // bdo
Bgcolor Hash = 0x19707 // bgcolor
Bgsound Hash = 0x19e07 // bgsound
Big Hash = 0x1a603 // big
Blink Hash = 0x1a905 // blink
Blockquote Hash = 0x1ae0a // blockquote
Body Hash = 0x4004 // body
Border Hash = 0x33806 // border
Br Hash = 0x202 // br
Button Hash = 0x1b806 // button
Canvas Hash = 0x7f06 // canvas
Caption Hash = 0x27f07 // caption
Center Hash = 0x62a06 // center
Challenge Hash = 0x1e509 // challenge
Charset Hash = 0x3907 // charset
Checked Hash = 0x3b407 // checked
Cite Hash = 0xfe04 // cite
Class Hash = 0x1c305 // class
Classid Hash = 0x1c307 // classid
Clear Hash = 0x41205 // clear
Code Hash = 0x1d604 // code
Codebase Hash = 0x45008 // codebase
Codetype Hash = 0x1d608 // codetype
Col Hash = 0x19903 // col
Colgroup Hash = 0x1ee08 // colgroup
Color Hash = 0x19905 // color
Cols Hash = 0x20204 // cols
Colspan Hash = 0x20207 // colspan
Command Hash = 0x21407 // command
Compact Hash = 0x21b07 // compact
Content Hash = 0x4a907 // content
Contenteditable Hash = 0x4a90f // contenteditable
Contextmenu Hash = 0x3bd0b // contextmenu
Controls Hash = 0x22a08 // controls
Coords Hash = 0x23606 // coords
Crossorigin Hash = 0x25b0b // crossorigin
Data Hash = 0x4c004 // data
Datalist Hash = 0x4c008 // datalist
Datetime Hash = 0x2ea08 // datetime
Dd Hash = 0x31602 // dd
Declare Hash = 0x8607 // declare
Default Hash = 0x5407 // default
DefaultChecked Hash = 0x5040e // defaultChecked
DefaultMuted Hash = 0x5650c // defaultMuted
DefaultSelected Hash = 0x540f // defaultSelected
Defer Hash = 0x6205 // defer
Del Hash = 0x7203 // del
Desc Hash = 0x7c04 // desc
Details Hash = 0x9207 // details
Dfn Hash = 0xab03 // dfn
Dialog Hash = 0xcc06 // dialog
Dir Hash = 0xd903 // dir
Dirname Hash = 0xd907 // dirname
Disabled Hash = 0x10408 // disabled
Div Hash = 0x10b03 // div
Dl Hash = 0x1a402 // dl
Download Hash = 0x48608 // download
Draggable Hash = 0x1c909 // draggable
Dropzone Hash = 0x41908 // dropzone
Dt Hash = 0x60602 // dt
Em Hash = 0x6e02 // em
Embed Hash = 0x6e05 // embed
Enabled Hash = 0x4e07 // enabled
Enctype Hash = 0x2cf07 // enctype
Face Hash = 0x62804 // face
Fieldset Hash = 0x26c08 // fieldset
Figcaption Hash = 0x27c0a // figcaption
Figure Hash = 0x29006 // figure
Font Hash = 0x45804 // font
Footer Hash = 0xf106 // footer
For Hash = 0x29c03 // for
ForeignObject Hash = 0x29c0d // foreignObject
Foreignobject Hash = 0x2a90d // foreignobject
Form Hash = 0x2b604 // form
Formaction Hash = 0x2b60a // formaction
Formenctype Hash = 0x2cb0b // formenctype
Formmethod Hash = 0x2d60a // formmethod
Formnovalidate Hash = 0x2e00e // formnovalidate
Formtarget Hash = 0x2f50a // formtarget
Frame Hash = 0xa305 // frame
Frameborder Hash = 0x3330b // frameborder
Frameset Hash = 0xa308 // frameset
H1 Hash = 0x19502 // h1
H2 Hash = 0x32402 // h2
H3 Hash = 0x34902 // h3
H4 Hash = 0x38602 // h4
H5 Hash = 0x60802 // h5
H6 Hash = 0x2ff02 // h6
Head Hash = 0x37204 // head
Header Hash = 0x37206 // header
Headers Hash = 0x37207 // headers
Height Hash = 0x30106 // height
Hgroup Hash = 0x30906 // hgroup
Hidden Hash = 0x31406 // hidden
High Hash = 0x32104 // high
Hr Hash = 0xaf02 // hr
Href Hash = 0xaf04 // href
Hreflang Hash = 0xaf08 // hreflang
Html Hash = 0x30504 // html
Http_Equiv Hash = 0x3260a // http-equiv
I Hash = 0x601 // i
Icon Hash = 0x4a804 // icon
Id Hash = 0x8502 // id
Iframe Hash = 0x33206 // iframe
Image Hash = 0x33e05 // image
Img Hash = 0x34303 // img
Inert Hash = 0x55005 // inert
Input Hash = 0x47305 // input
Ins Hash = 0x26403 // ins
Isindex Hash = 0x15907 // isindex
Ismap Hash = 0x34b05 // ismap
Itemid Hash = 0xff06 // itemid
Itemprop Hash = 0x58808 // itemprop
Itemref Hash = 0x62207 // itemref
Itemscope Hash = 0x35609 // itemscope
Itemtype Hash = 0x36008 // itemtype
Kbd Hash = 0xca03 // kbd
Keygen Hash = 0x4a06 // keygen
Keytype Hash = 0x68807 // keytype
Kind Hash = 0xd604 // kind
Label Hash = 0x7405 // label
Lang Hash = 0xb304 // lang
Language Hash = 0xb308 // language
Legend Hash = 0x1d006 // legend
Li Hash = 0x1702 // li
Link Hash = 0xd304 // link
List Hash = 0x4c404 // list
Listing Hash = 0x4c407 // listing
Longdesc Hash = 0x7808 // longdesc
Loop Hash = 0x12104 // loop
Low Hash = 0x23f03 // low
Main Hash = 0x1004 // main
Malignmark Hash = 0xc10a // malignmark
Manifest Hash = 0x65e08 // manifest
Map Hash = 0x14403 // map
Mark Hash = 0xc704 // mark
Marquee Hash = 0x36807 // marquee
Math Hash = 0x36f04 // math
Max Hash = 0x37e03 // max
Maxlength Hash = 0x37e09 // maxlength
Media Hash = 0xde05 // media
Mediagroup Hash = 0xde0a // mediagroup
Menu Hash = 0x3c404 // menu
Meta Hash = 0x4d304 // meta
Meter Hash = 0x2f005 // meter
Method Hash = 0x2da06 // method
Mglyph Hash = 0x34406 // mglyph
Mi Hash = 0x2c02 // mi
Min Hash = 0x2c03 // min
Mn Hash = 0x2e302 // mn
Mo Hash = 0x4f702 // mo
Ms Hash = 0x35902 // ms
Mtext Hash = 0x38805 // mtext
Multiple Hash = 0x39608 // multiple
Muted Hash = 0x39e05 // muted
Name Hash = 0xdc04 // name
Nav Hash = 0x1303 // nav
Nobr Hash = 0x1a04 // nobr
Noembed Hash = 0x6c07 // noembed
Noframes Hash = 0xa108 // noframes
Nohref Hash = 0xad06 // nohref
Noresize Hash = 0x24b08 // noresize
Noscript Hash = 0x31908 // noscript
Noshade Hash = 0x4ff07 // noshade
Novalidate Hash = 0x2e40a // novalidate
Nowrap Hash = 0x59106 // nowrap
Object Hash = 0x2b006 // object
Ol Hash = 0x17102 // ol
Onabort Hash = 0x1bc07 // onabort
Onafterprint Hash = 0x2840c // onafterprint
Onbeforeprint Hash = 0x2be0d // onbeforeprint
Onbeforeunload Hash = 0x6720e // onbeforeunload
Onblur Hash = 0x17e06 // onblur
Oncancel Hash = 0x11a08 // oncancel
Oncanplay Hash = 0x18609 // oncanplay
Oncanplaythrough Hash = 0x18610 // oncanplaythrough
Onchange Hash = 0x42f08 // onchange
Onclick Hash = 0x6b607 // onclick
Onclose Hash = 0x3a307 // onclose
Oncontextmenu Hash = 0x3bb0d // oncontextmenu
Oncuechange Hash = 0x3c80b // oncuechange
Ondblclick Hash = 0x3d30a // ondblclick
Ondrag Hash = 0x3dd06 // ondrag
Ondragend Hash = 0x3dd09 // ondragend
Ondragenter Hash = 0x3e60b // ondragenter
Ondragleave Hash = 0x3f10b // ondragleave
Ondragover Hash = 0x3fc0a // ondragover
Ondragstart Hash = 0x4060b // ondragstart
Ondrop Hash = 0x41706 // ondrop
Ondurationchange Hash = 0x42710 // ondurationchange
Onemptied Hash = 0x41e09 // onemptied
Onended Hash = 0x43707 // onended
Onerror Hash = 0x43e07 // onerror
Onfocus Hash = 0x44507 // onfocus
Onhashchange Hash = 0x4650c // onhashchange
Oninput Hash = 0x47107 // oninput
Oninvalid Hash = 0x47809 // oninvalid
Onkeydown Hash = 0x48109 // onkeydown
Onkeypress Hash = 0x48e0a // onkeypress
Onkeyup Hash = 0x49e07 // onkeyup
Onload Hash = 0x4b806 // onload
Onloadeddata Hash = 0x4b80c // onloadeddata
Onloadedmetadata Hash = 0x4cb10 // onloadedmetadata
Onloadstart Hash = 0x4e10b // onloadstart
Onmessage Hash = 0x4ec09 // onmessage
Onmousedown Hash = 0x4f50b // onmousedown
Onmousemove Hash = 0x5120b // onmousemove
Onmouseout Hash = 0x51d0a // onmouseout
Onmouseover Hash = 0x52a0b // onmouseover
Onmouseup Hash = 0x53509 // onmouseup
Onmousewheel Hash = 0x53e0c // onmousewheel
Onoffline Hash = 0x54a09 // onoffline
Ononline Hash = 0x55508 // ononline
Onpagehide Hash = 0x55d0a // onpagehide
Onpageshow Hash = 0x5710a // onpageshow
Onpause Hash = 0x57d07 // onpause
Onplay Hash = 0x59c06 // onplay
Onplaying Hash = 0x59c09 // onplaying
Onpopstate Hash = 0x5a50a // onpopstate
Onprogress Hash = 0x5af0a // onprogress
Onratechange Hash = 0x5be0c // onratechange
Onreset Hash = 0x5ca07 // onreset
Onresize Hash = 0x5d108 // onresize
Onscroll Hash = 0x5d908 // onscroll
Onseeked Hash = 0x5e408 // onseeked
Onseeking Hash = 0x5ec09 // onseeking
Onselect Hash = 0x5f508 // onselect
Onshow Hash = 0x5ff06 // onshow
Onstalled Hash = 0x60a09 // onstalled
Onstorage Hash = 0x61309 // onstorage
Onsubmit Hash = 0x61c08 // onsubmit
Onsuspend Hash = 0x63009 // onsuspend
Ontimeupdate Hash = 0x4590c // ontimeupdate
Onunload Hash = 0x63908 // onunload
Onvolumechange Hash = 0x6410e // onvolumechange
Onwaiting Hash = 0x64f09 // onwaiting
Open Hash = 0x58e04 // open
Optgroup Hash = 0x12308 // optgroup
Optimum Hash = 0x65807 // optimum
Option Hash = 0x66e06 // option
Output Hash = 0x52406 // output
P Hash = 0xc01 // p
Param Hash = 0xc05 // param
Pattern Hash = 0x9b07 // pattern
Pauseonexit Hash = 0x57f0b // pauseonexit
Picture Hash = 0xe707 // picture
Ping Hash = 0x12a04 // ping
Placeholder Hash = 0x16b0b // placeholder
Plaintext Hash = 0x1f509 // plaintext
Poster Hash = 0x30e06 // poster
Pre Hash = 0x34f03 // pre
Preload Hash = 0x34f07 // preload
Profile Hash = 0x66707 // profile
Progress Hash = 0x5b108 // progress
Prompt Hash = 0x59606 // prompt
Public Hash = 0x4a406 // public
Q Hash = 0x8d01 // q
Radiogroup Hash = 0x30a // radiogroup
Rb Hash = 0x1d02 // rb
Readonly Hash = 0x38e08 // readonly
Rel Hash = 0x35003 // rel
Required Hash = 0x8b08 // required
Rev Hash = 0x29403 // rev
Reversed Hash = 0x29408 // reversed
Rows Hash = 0x6604 // rows
Rowspan Hash = 0x6607 // rowspan
Rp Hash = 0x28a02 // rp
Rt Hash = 0x1c102 // rt
Rtc Hash = 0x1c103 // rtc
Ruby Hash = 0xf604 // ruby
Rules Hash = 0x17505 // rules
S Hash = 0x3d01 // s
Samp Hash = 0x9804 // samp
Sandbox Hash = 0x16307 // sandbox
Scope Hash = 0x35a05 // scope
Scoped Hash = 0x35a06 // scoped
Script Hash = 0x31b06 // script
Scrolling Hash = 0x5db09 // scrolling
Seamless Hash = 0x3a808 // seamless
Section Hash = 0x17907 // section
Select Hash = 0x5f706 // select
Selected Hash = 0x5f708 // selected
Shape Hash = 0x23105 // shape
Size Hash = 0x24f04 // size
Sizes Hash = 0x24f05 // sizes
Small Hash = 0x23b05 // small
Sortable Hash = 0x25308 // sortable
Source Hash = 0x26606 // source
Spacer Hash = 0x37806 // spacer
Span Hash = 0x6904 // span
Spellcheck Hash = 0x3af0a // spellcheck
Src Hash = 0x44b03 // src
Srcdoc Hash = 0x44b06 // srcdoc
Srclang Hash = 0x49707 // srclang
Srcset Hash = 0x5b806 // srcset
Start Hash = 0x40c05 // start
Step Hash = 0x66404 // step
Strike Hash = 0x68406 // strike
Strong Hash = 0x68f06 // strong
Style Hash = 0x69505 // style
Sub Hash = 0x61e03 // sub
Summary Hash = 0x69a07 // summary
Sup Hash = 0x6a103 // sup
Svg Hash = 0x6a403 // svg
System Hash = 0x6a706 // system
Tabindex Hash = 0x4d908 // tabindex
Table Hash = 0x25605 // table
Target Hash = 0x2f906 // target
Tbody Hash = 0x3f05 // tbody
Td Hash = 0xaa02 // td
Template Hash = 0x6aa08 // template
Text Hash = 0x1fa04 // text
Textarea Hash = 0x38908 // textarea
Tfoot Hash = 0xf005 // tfoot
Th Hash = 0x18f02 // th
Thead Hash = 0x37105 // thead
Time Hash = 0x2ee04 // time
Title Hash = 0x14a05 // title
Tr Hash = 0x1fd02 // tr
Track Hash = 0x1fd05 // track
Translate Hash = 0x22109 // translate
Truespeed Hash = 0x27309 // truespeed
Tt Hash = 0x9d02 // tt
Type Hash = 0x11204 // type
Typemustmatch Hash = 0x1da0d // typemustmatch
U Hash = 0xb01 // u
Ul Hash = 0x5802 // ul
Undeterminate Hash = 0x250d // undeterminate
Usemap Hash = 0x14106 // usemap
Valign Hash = 0x1506 // valign
Value Hash = 0x10d05 // value
Valuetype Hash = 0x10d09 // valuetype
Var Hash = 0x32f03 // var
Video Hash = 0x6b205 // video
Visible Hash = 0x6bd07 // visible
Vlink Hash = 0x6c405 // vlink
Wbr Hash = 0x57a03 // wbr
Width Hash = 0x60405 // width
Wrap Hash = 0x59304 // wrap
Xmlns Hash = 0x15f05 // xmlns
Xmp Hash = 0x16903 // xmp
)
// String returns the hash' name.
func (i Hash) String() string {
start := uint32(i >> 8)
n := uint32(i & 0xff)
if start+n > uint32(len(_Hash_text)) {
return ""
}
return _Hash_text[start : start+n]
}
// ToHash returns the hash whose name is s. It returns zero if there is no
// such hash. It is case sensitive.
func ToHash(s []byte) Hash {
if len(s) == 0 || len(s) > _Hash_maxLen {
return 0
}
h := uint32(_Hash_hash0)
for i := 0; i < len(s); i++ {
h ^= uint32(s[i])
h *= 16777619
}
if i := _Hash_table[h&uint32(len(_Hash_table)-1)]; int(i&0xff) == len(s) {
t := _Hash_text[i>>8 : i>>8+i&0xff]
for i := 0; i < len(s); i++ {
if t[i] != s[i] {
goto NEXT
}
}
return i
}
NEXT:
if i := _Hash_table[(h>>16)&uint32(len(_Hash_table)-1)]; int(i&0xff) == len(s) {
t := _Hash_text[i>>8 : i>>8+i&0xff]
for i := 0; i < len(s); i++ {
if t[i] != s[i] {
return 0
}
}
return i
}
return 0
}
const _Hash_hash0 = 0x5334b67c
const _Hash_maxLen = 16
const _Hash_text = "abbradiogrouparamainavalignobrbackgroundeterminateaccept-cha" +
"rsetbodyaccesskeygenabledefaultSelectedeferowspanoembedelabe" +
"longdescanvasideclarequiredetailsampatternoframesetdfnohrefl" +
"anguageacronymalignmarkbdialogalinkindirnamediagroupictureal" +
"tfooterubyasyncitemidisabledivaluetypeaudioncancelooptgroupi" +
"ngautocompleteautofocusemappletitleautoplayaxisindexmlnsandb" +
"oxmplaceholderulesectionblurbdoncanplaythrough1bgcolorbgsoun" +
"dlbigblinkblockquotebuttonabortclassidraggablegendcodetypemu" +
"stmatchallengecolgrouplaintextrackcolspannotationXmlcommandc" +
"ompactranslatecontrolshapecoordsmallowfullscreenoresizesorta" +
"blecrossoriginsourcefieldsetruespeedfigcaptionafterprintfigu" +
"reversedforeignObjectforeignobjectformactionbeforeprintforme" +
"nctypeformmethodformnovalidatetimeterformtargeth6heightmlhgr" +
"ouposterhiddenoscripthigh2http-equivariframeborderimageimgly" +
"ph3ismapreloaditemscopeditemtypemarqueematheaderspacermaxlen" +
"gth4mtextareadonlymultiplemutedoncloseamlesspellcheckedoncon" +
"textmenuoncuechangeondblclickondragendondragenterondragleave" +
"ondragoverondragstarticlearondropzonemptiedondurationchangeo" +
"nendedonerroronfocusrcdocodebasefontimeupdateonhashchangeoni" +
"nputoninvalidonkeydownloadonkeypressrclangonkeyupublicontent" +
"editableonloadeddatalistingonloadedmetadatabindexonloadstart" +
"onmessageonmousedownoshadefaultCheckedonmousemoveonmouseoutp" +
"utonmouseoveronmouseuponmousewheelonofflinertononlineonpageh" +
"idefaultMutedonpageshowbronpauseonexitempropenowrapromptonpl" +
"ayingonpopstateonprogressrcsetonratechangeonresetonresizeons" +
"crollingonseekedonseekingonselectedonshowidth5onstalledonsto" +
"rageonsubmitemrefacenteronsuspendonunloadonvolumechangeonwai" +
"tingoptimumanifesteprofileoptionbeforeunloaddresstrikeytypes" +
"trongstylesummarysupsvgsystemplatevideonclickvisiblevlink"
var _Hash_table = [1 << 9]Hash{
0x0: 0x2cb0b, // formenctype
0x1: 0x2d60a, // formmethod
0x2: 0x3c80b, // oncuechange
0x3: 0x3dd06, // ondrag
0x6: 0x68406, // strike
0x7: 0x6b205, // video
0x9: 0x4a907, // content
0xa: 0x4e07, // enabled
0xb: 0x59106, // nowrap
0xc: 0xd304, // link
0xe: 0x28a02, // rp
0xf: 0x2840c, // onafterprint
0x10: 0x14506, // applet
0x11: 0xf005, // tfoot
0x12: 0x5040e, // defaultChecked
0x13: 0x3330b, // frameborder
0x14: 0xf106, // footer
0x15: 0x5f708, // selected
0x16: 0x49707, // srclang
0x18: 0x52a0b, // onmouseover
0x19: 0x1d604, // code
0x1b: 0x47809, // oninvalid
0x1c: 0x62804, // face
0x1e: 0x3bd0b, // contextmenu
0x1f: 0xa308, // frameset
0x21: 0x5650c, // defaultMuted
0x22: 0x19905, // color
0x23: 0x59c06, // onplay
0x25: 0x2f005, // meter
0x26: 0x61309, // onstorage
0x27: 0x38e08, // readonly
0x29: 0x66707, // profile
0x2a: 0x8607, // declare
0x2b: 0xb01, // u
0x2c: 0x31908, // noscript
0x2d: 0x65e08, // manifest
0x2e: 0x1b806, // button
0x2f: 0x2ea08, // datetime
0x30: 0x47305, // input
0x31: 0x5407, // default
0x32: 0x1d608, // codetype
0x33: 0x2a90d, // foreignobject
0x34: 0x36807, // marquee
0x36: 0x19707, // bgcolor
0x37: 0x19502, // h1
0x39: 0x1e0a, // background
0x3b: 0x2f50a, // formtarget
0x41: 0x2f906, // target
0x43: 0x23b05, // small
0x44: 0x45008, // codebase
0x45: 0x55005, // inert
0x47: 0x38805, // mtext
0x48: 0x6607, // rowspan
0x49: 0x2be0d, // onbeforeprint
0x4a: 0x55508, // ononline
0x4c: 0x29006, // figure
0x4d: 0x4cb10, // onloadedmetadata
0x4e: 0xbb07, // acronym
0x50: 0x39608, // multiple
0x51: 0x320e, // accept-charset
0x52: 0x24f05, // sizes
0x53: 0x29c0d, // foreignObject
0x55: 0x2e40a, // novalidate
0x56: 0x55d0a, // onpagehide
0x57: 0x2e302, // mn
0x58: 0x38602, // h4
0x5a: 0x1c102, // rt
0x5b: 0xd205, // alink
0x5e: 0x59606, // prompt
0x5f: 0x17102, // ol
0x61: 0x5d108, // onresize
0x64: 0x69a07, // summary
0x65: 0x5a50a, // onpopstate
0x66: 0x38d04, // area
0x68: 0x64f09, // onwaiting
0x6b: 0xdc04, // name
0x6c: 0x23606, // coords
0x6d: 0x34303, // img
0x6e: 0x66404, // step
0x6f: 0x5ec09, // onseeking
0x70: 0x32104, // high
0x71: 0x49e07, // onkeyup
0x72: 0x5f706, // select
0x73: 0x1fd05, // track
0x74: 0x34b05, // ismap
0x76: 0x47107, // oninput
0x77: 0x8d01, // q
0x78: 0x48109, // onkeydown
0x79: 0x33e05, // image
0x7a: 0x2b604, // form
0x7b: 0x60a09, // onstalled
0x7c: 0xe707, // picture
0x7d: 0x42f08, // onchange
0x7e: 0x1a905, // blink
0x7f: 0xee03, // alt
0x80: 0xfa05, // async
0x82: 0x1702, // li
0x84: 0x2c02, // mi
0x85: 0xff06, // itemid
0x86: 0x11605, // audio
0x87: 0x31b06, // script
0x8b: 0x44b06, // srcdoc
0x8e: 0xc704, // mark
0x8f: 0x18403, // bdo
0x91: 0x5120b, // onmousemove
0x93: 0x3c404, // menu
0x94: 0x45804, // font
0x95: 0x14f08, // autoplay
0x96: 0x6c405, // vlink
0x98: 0x6e02, // em
0x9a: 0x5b806, // srcset
0x9b: 0x1ee08, // colgroup
0x9c: 0x58e04, // open
0x9d: 0x1d006, // legend
0x9e: 0x4e10b, // onloadstart
0xa2: 0x22109, // translate
0xa3: 0x6e05, // embed
0xa4: 0x1c305, // class
0xa6: 0x6aa08, // template
0xa7: 0x37206, // header
0xa9: 0x4b806, // onload
0xaa: 0x37105, // thead
0xab: 0x5db09, // scrolling
0xac: 0xc05, // param
0xae: 0x9b07, // pattern
0xaf: 0x9207, // details
0xb1: 0x4a406, // public
0xb3: 0x4f50b, // onmousedown
0xb4: 0x14403, // map
0xb6: 0x25b0b, // crossorigin
0xb7: 0x1506, // valign
0xb9: 0x1bc07, // onabort
0xba: 0x66e06, // option
0xbb: 0x26606, // source
0xbc: 0x6205, // defer
0xbd: 0x1e509, // challenge
0xbf: 0x10d05, // value
0xc0: 0x23d0f, // allowfullscreen
0xc1: 0xca03, // kbd
0xc2: 0x2070d, // annotationXml
0xc3: 0x5be0c, // onratechange
0xc4: 0x4f702, // mo
0xc6: 0x3af0a, // spellcheck
0xc7: 0x2c03, // min
0xc8: 0x4b80c, // onloadeddata
0xc9: 0x41205, // clear
0xca: 0x42710, // ondurationchange
0xcb: 0x1a04, // nobr
0xcd: 0x27309, // truespeed
0xcf: 0x30906, // hgroup
0xd0: 0x40c05, // start
0xd3: 0x41908, // dropzone
0xd5: 0x7405, // label
0xd8: 0xde0a, // mediagroup
0xd9: 0x17e06, // onblur
0xdb: 0x27f07, // caption
0xdd: 0x7c04, // desc
0xde: 0x15f05, // xmlns
0xdf: 0x30106, // height
0xe0: 0x21407, // command
0xe2: 0x57f0b, // pauseonexit
0xe3: 0x68f06, // strong
0xe4: 0x43e07, // onerror
0xe5: 0x61c08, // onsubmit
0xe6: 0xb308, // language
0xe7: 0x48608, // download
0xe9: 0x53509, // onmouseup
0xec: 0x2cf07, // enctype
0xed: 0x5f508, // onselect
0xee: 0x2b006, // object
0xef: 0x1f509, // plaintext
0xf0: 0x3d30a, // ondblclick
0xf1: 0x18610, // oncanplaythrough
0xf2: 0xd903, // dir
0xf3: 0x38908, // textarea
0xf4: 0x12a04, // ping
0xf5: 0x2da06, // method
0xf6: 0x22a08, // controls
0xf7: 0x37806, // spacer
0xf8: 0x6a403, // svg
0xf9: 0x30504, // html
0xfa: 0x3d01, // s
0xfc: 0xcc06, // dialog
0xfe: 0x1da0d, // typemustmatch
0xff: 0x3b407, // checked
0x101: 0x30e06, // poster
0x102: 0x3260a, // http-equiv
0x103: 0x44b03, // src
0x104: 0x10408, // disabled
0x105: 0x37207, // headers
0x106: 0x5af0a, // onprogress
0x107: 0x26c08, // fieldset
0x108: 0x32f03, // var
0x10a: 0xa305, // frame
0x10b: 0x36008, // itemtype
0x10c: 0x3fc0a, // ondragover
0x10d: 0x13a09, // autofocus
0x10f: 0x601, // i
0x110: 0x35902, // ms
0x111: 0x45404, // base
0x113: 0x35a05, // scope
0x114: 0x3206, // accept
0x115: 0x58808, // itemprop
0x117: 0xfe04, // cite
0x118: 0x3907, // charset
0x119: 0x14a05, // title
0x11a: 0x68807, // keytype
0x11b: 0x1fa04, // text
0x11c: 0x65807, // optimum
0x11e: 0x37204, // head
0x121: 0x21b07, // compact
0x123: 0x63009, // onsuspend
0x124: 0x4c404, // list
0x125: 0x4590c, // ontimeupdate
0x126: 0x62a06, // center
0x127: 0x31406, // hidden
0x129: 0x35609, // itemscope
0x12c: 0x1a402, // dl
0x12d: 0x17907, // section
0x12e: 0x11a08, // oncancel
0x12f: 0x6b607, // onclick
0x130: 0xde05, // media
0x131: 0x52406, // output
0x132: 0x4c008, // datalist
0x133: 0x53e0c, // onmousewheel
0x134: 0x45408, // basefont
0x135: 0x37e09, // maxlength
0x136: 0x6bd07, // visible
0x137: 0x2e00e, // formnovalidate
0x139: 0x16903, // xmp
0x13a: 0x101, // b
0x13b: 0x5710a, // onpageshow
0x13c: 0xf604, // ruby
0x13d: 0x16b0b, // placeholder
0x13e: 0x4c407, // listing
0x140: 0x26403, // ins
0x141: 0x62207, // itemref
0x144: 0x540f, // defaultSelected
0x146: 0x3f10b, // ondragleave
0x147: 0x1ae0a, // blockquote
0x148: 0x59304, // wrap
0x14a: 0x1a603, // big
0x14b: 0x35003, // rel
0x14c: 0x41706, // ondrop
0x14e: 0x6a706, // system
0x14f: 0x30a, // radiogroup
0x150: 0x25605, // table
0x152: 0x57a03, // wbr
0x153: 0x3bb0d, // oncontextmenu
0x155: 0x250d, // undeterminate
0x157: 0x20204, // cols
0x158: 0x16307, // sandbox
0x159: 0x1303, // nav
0x15a: 0x37e03, // max
0x15b: 0x7808, // longdesc
0x15c: 0x60405, // width
0x15d: 0x34902, // h3
0x15e: 0x19e07, // bgsound
0x161: 0x10d09, // valuetype
0x162: 0x69505, // style
0x164: 0x3f05, // tbody
0x165: 0x40e07, // article
0x169: 0xcb03, // bdi
0x16a: 0x67e07, // address
0x16b: 0x23105, // shape
0x16c: 0x2ba06, // action
0x16e: 0x1fd02, // tr
0x16f: 0xaa02, // td
0x170: 0x3dd09, // ondragend
0x171: 0x5802, // ul
0x172: 0x33806, // border
0x174: 0x4a06, // keygen
0x175: 0x4004, // body
0x177: 0x1c909, // draggable
0x178: 0x2b60a, // formaction
0x17b: 0x34406, // mglyph
0x17d: 0x1d02, // rb
0x17e: 0x2ff02, // h6
0x17f: 0x41e09, // onemptied
0x180: 0x5ca07, // onreset
0x181: 0x1004, // main
0x182: 0x12104, // loop
0x183: 0x48e0a, // onkeypress
0x184: 0x9d02, // tt
0x186: 0x20207, // colspan
0x188: 0x36f04, // math
0x189: 0x1605, // align
0x18a: 0xa108, // noframes
0x18b: 0xaf02, // hr
0x18c: 0xc10a, // malignmark
0x18e: 0x23f03, // low
0x18f: 0x8502, // id
0x190: 0x6604, // rows
0x191: 0x29403, // rev
0x192: 0x63908, // onunload
0x193: 0x39e05, // muted
0x194: 0x35a06, // scoped
0x195: 0x31602, // dd
0x196: 0x60602, // dt
0x197: 0x6720e, // onbeforeunload
0x199: 0x2070a, // annotation
0x19a: 0x29408, // reversed
0x19c: 0x11204, // type
0x19d: 0x57d07, // onpause
0x19e: 0xd604, // kind
0x19f: 0x4c004, // data
0x1a0: 0x4ff07, // noshade
0x1a3: 0x17505, // rules
0x1a4: 0x12308, // optgroup
0x1a5: 0x202, // br
0x1a7: 0x1, // a
0x1a8: 0x51d0a, // onmouseout
0x1aa: 0x54a09, // onoffline
0x1ab: 0x6410e, // onvolumechange
0x1ae: 0x61e03, // sub
0x1b3: 0x29c03, // for
0x1b5: 0x8b08, // required
0x1b6: 0x5b108, // progress
0x1b7: 0x14106, // usemap
0x1b8: 0x7f06, // canvas
0x1b9: 0x4a804, // icon
0x1bb: 0x1c103, // rtc
0x1bc: 0x8305, // aside
0x1bd: 0x2ee04, // time
0x1be: 0x4060b, // ondragstart
0x1c0: 0x27c0a, // figcaption
0x1c1: 0xaf04, // href
0x1c2: 0x33206, // iframe
0x1c3: 0x18609, // oncanplay
0x1c4: 0x6904, // span
0x1c5: 0x34f03, // pre
0x1c6: 0x6c07, // noembed
0x1c8: 0x5e408, // onseeked
0x1c9: 0x4d304, // meta
0x1ca: 0x32402, // h2
0x1cb: 0x3a808, // seamless
0x1cc: 0xab03, // dfn
0x1cd: 0x15704, // axis
0x1cf: 0x3e60b, // ondragenter
0x1d0: 0x18f02, // th
0x1d1: 0x4650c, // onhashchange
0x1d2: 0xb304, // lang
0x1d3: 0x44507, // onfocus
0x1d5: 0x24f04, // size
0x1d8: 0x12e0c, // autocomplete
0x1d9: 0xaf08, // hreflang
0x1da: 0x9804, // samp
0x1de: 0x19903, // col
0x1df: 0x10b03, // div
0x1e0: 0x25308, // sortable
0x1e1: 0x7203, // del
0x1e3: 0x3a307, // onclose
0x1e6: 0xd907, // dirname
0x1e8: 0x1c307, // classid
0x1e9: 0x34f07, // preload
0x1ea: 0x4d908, // tabindex
0x1eb: 0x60802, // h5
0x1ec: 0x5d908, // onscroll
0x1ed: 0x4a90f, // contenteditable
0x1ee: 0x4ec09, // onmessage
0x1ef: 0x4, // abbr
0x1f0: 0x15907, // isindex
0x1f1: 0x6a103, // sup
0x1f3: 0x24b08, // noresize
0x1f5: 0x59c09, // onplaying
0x1f6: 0x4409, // accesskey
0x1fa: 0xc01, // p
0x1fb: 0x43707, // onended
0x1fc: 0x5ff06, // onshow
0x1fe: 0xad06, // nohref
}

498
vendor/github.com/tdewolff/parse/v2/html/lex.go generated vendored Normal file
View file

@ -0,0 +1,498 @@
// Package html is an HTML5 lexer following the specifications at http://www.w3.org/TR/html5/syntax.html.
package html // import "github.com/tdewolff/parse/html"
import (
"io"
"strconv"
"github.com/tdewolff/parse/v2"
"github.com/tdewolff/parse/v2/buffer"
)
// TokenType determines the type of token, eg. a number or a semicolon.
type TokenType uint32
// TokenType values.
const (
ErrorToken TokenType = iota // extra token when errors occur
CommentToken
DoctypeToken
StartTagToken
StartTagCloseToken
StartTagVoidToken
EndTagToken
AttributeToken
TextToken
SvgToken
MathToken
)
// String returns the string representation of a TokenType.
func (tt TokenType) String() string {
switch tt {
case ErrorToken:
return "Error"
case CommentToken:
return "Comment"
case DoctypeToken:
return "Doctype"
case StartTagToken:
return "StartTag"
case StartTagCloseToken:
return "StartTagClose"
case StartTagVoidToken:
return "StartTagVoid"
case EndTagToken:
return "EndTag"
case AttributeToken:
return "Attribute"
case TextToken:
return "Text"
case SvgToken:
return "Svg"
case MathToken:
return "Math"
}
return "Invalid(" + strconv.Itoa(int(tt)) + ")"
}
////////////////////////////////////////////////////////////////
// Lexer is the state for the lexer.
type Lexer struct {
r *buffer.Lexer
err error
rawTag Hash
inTag bool
text []byte
attrVal []byte
}
// NewLexer returns a new Lexer for a given io.Reader.
func NewLexer(r io.Reader) *Lexer {
return &Lexer{
r: buffer.NewLexer(r),
}
}
// Err returns the error encountered during lexing, this is often io.EOF but also other errors can be returned.
func (l *Lexer) Err() error {
if l.err != nil {
return l.err
}
return l.r.Err()
}
// Restore restores the NULL byte at the end of the buffer.
func (l *Lexer) Restore() {
l.r.Restore()
}
// Next returns the next Token. It returns ErrorToken when an error was encountered. Using Err() one can retrieve the error message.
func (l *Lexer) Next() (TokenType, []byte) {
l.text = nil
var c byte
if l.inTag {
l.attrVal = nil
for { // before attribute name state
if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' {
l.r.Move(1)
continue
}
break
}
if c == 0 && l.r.Err() != nil {
return ErrorToken, nil
} else if c != '>' && (c != '/' || l.r.Peek(1) != '>') {
return AttributeToken, l.shiftAttribute()
}
start := l.r.Pos()
l.inTag = false
if c == '/' {
l.r.Move(2)
l.text = l.r.Lexeme()[start:]
return StartTagVoidToken, l.r.Shift()
}
l.r.Move(1)
l.text = l.r.Lexeme()[start:]
return StartTagCloseToken, l.r.Shift()
}
if l.rawTag != 0 {
if rawText := l.shiftRawText(); len(rawText) > 0 {
l.rawTag = 0
return TextToken, rawText
}
l.rawTag = 0
}
for {
c = l.r.Peek(0)
if c == '<' {
c = l.r.Peek(1)
isEndTag := c == '/' && l.r.Peek(2) != '>' && (l.r.Peek(2) != 0 || l.r.PeekErr(2) == nil)
if l.r.Pos() > 0 {
if isEndTag || 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '!' || c == '?' {
// return currently buffered texttoken so that we can return tag next iteration
return TextToken, l.r.Shift()
}
} else if isEndTag {
l.r.Move(2)
// only endtags that are not followed by > or EOF arrive here
if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
return CommentToken, l.shiftBogusComment()
}
return EndTagToken, l.shiftEndTag()
} else if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
l.r.Move(1)
l.inTag = true
return l.shiftStartTag()
} else if c == '!' {
l.r.Move(2)
return l.readMarkup()
} else if c == '?' {
l.r.Move(1)
return CommentToken, l.shiftBogusComment()
}
} else if c == 0 && l.r.Err() != nil {
if l.r.Pos() > 0 {
return TextToken, l.r.Shift()
}
return ErrorToken, nil
}
l.r.Move(1)
}
}
// Text returns the textual representation of a token. This excludes delimiters and additional leading/trailing characters.
func (l *Lexer) Text() []byte {
return l.text
}
// AttrVal returns the attribute value when an AttributeToken was returned from Next.
func (l *Lexer) AttrVal() []byte {
return l.attrVal
}
////////////////////////////////////////////////////////////////
// The following functions follow the specifications at http://www.w3.org/html/wg/drafts/html/master/syntax.html
func (l *Lexer) shiftRawText() []byte {
if l.rawTag == Plaintext {
for {
if l.r.Peek(0) == 0 && l.r.Err() != nil {
return l.r.Shift()
}
l.r.Move(1)
}
} else { // RCDATA, RAWTEXT and SCRIPT
for {
c := l.r.Peek(0)
if c == '<' {
if l.r.Peek(1) == '/' {
mark := l.r.Pos()
l.r.Move(2)
for {
if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
break
}
l.r.Move(1)
}
if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark+2:]))); h == l.rawTag { // copy so that ToLower doesn't change the case of the underlying slice
l.r.Rewind(mark)
return l.r.Shift()
}
} else if l.rawTag == Script && l.r.Peek(1) == '!' && l.r.Peek(2) == '-' && l.r.Peek(3) == '-' {
l.r.Move(4)
inScript := false
for {
c := l.r.Peek(0)
if c == '-' && l.r.Peek(1) == '-' && l.r.Peek(2) == '>' {
l.r.Move(3)
break
} else if c == '<' {
isEnd := l.r.Peek(1) == '/'
if isEnd {
l.r.Move(2)
} else {
l.r.Move(1)
}
mark := l.r.Pos()
for {
if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
break
}
l.r.Move(1)
}
if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark:]))); h == Script { // copy so that ToLower doesn't change the case of the underlying slice
if !isEnd {
inScript = true
} else {
if !inScript {
l.r.Rewind(mark - 2)
return l.r.Shift()
}
inScript = false
}
}
} else if c == 0 && l.r.Err() != nil {
return l.r.Shift()
} else {
l.r.Move(1)
}
}
} else {
l.r.Move(1)
}
} else if c == 0 && l.r.Err() != nil {
return l.r.Shift()
} else {
l.r.Move(1)
}
}
}
}
func (l *Lexer) readMarkup() (TokenType, []byte) {
if l.at('-', '-') {
l.r.Move(2)
for {
if l.r.Peek(0) == 0 && l.r.Err() != nil {
return CommentToken, l.r.Shift()
} else if l.at('-', '-', '>') {
l.text = l.r.Lexeme()[4:]
l.r.Move(3)
return CommentToken, l.r.Shift()
} else if l.at('-', '-', '!', '>') {
l.text = l.r.Lexeme()[4:]
l.r.Move(4)
return CommentToken, l.r.Shift()
}
l.r.Move(1)
}
} else if l.at('[', 'C', 'D', 'A', 'T', 'A', '[') {
l.r.Move(7)
for {
if l.r.Peek(0) == 0 && l.r.Err() != nil {
return TextToken, l.r.Shift()
} else if l.at(']', ']', '>') {
l.r.Move(3)
return TextToken, l.r.Shift()
}
l.r.Move(1)
}
} else {
if l.atCaseInsensitive('d', 'o', 'c', 't', 'y', 'p', 'e') {
l.r.Move(7)
if l.r.Peek(0) == ' ' {
l.r.Move(1)
}
for {
if c := l.r.Peek(0); c == '>' || c == 0 && l.r.Err() != nil {
l.text = l.r.Lexeme()[9:]
if c == '>' {
l.r.Move(1)
}
return DoctypeToken, l.r.Shift()
}
l.r.Move(1)
}
}
}
return CommentToken, l.shiftBogusComment()
}
func (l *Lexer) shiftBogusComment() []byte {
for {
c := l.r.Peek(0)
if c == '>' {
l.text = l.r.Lexeme()[2:]
l.r.Move(1)
return l.r.Shift()
} else if c == 0 && l.r.Err() != nil {
l.text = l.r.Lexeme()[2:]
return l.r.Shift()
}
l.r.Move(1)
}
}
func (l *Lexer) shiftStartTag() (TokenType, []byte) {
for {
if c := l.r.Peek(0); c == ' ' || c == '>' || c == '/' && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 && l.r.Err() != nil {
break
}
l.r.Move(1)
}
l.text = parse.ToLower(l.r.Lexeme()[1:])
if h := ToHash(l.text); h == Textarea || h == Title || h == Style || h == Xmp || h == Iframe || h == Script || h == Plaintext || h == Svg || h == Math {
if h == Svg || h == Math {
data := l.shiftXml(h)
if l.err != nil {
return ErrorToken, nil
}
l.inTag = false
if h == Svg {
return SvgToken, data
} else {
return MathToken, data
}
}
l.rawTag = h
}
return StartTagToken, l.r.Shift()
}
func (l *Lexer) shiftAttribute() []byte {
nameStart := l.r.Pos()
var c byte
for { // attribute name state
if c = l.r.Peek(0); c == ' ' || c == '=' || c == '>' || c == '/' && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 && l.r.Err() != nil {
break
}
l.r.Move(1)
}
nameEnd := l.r.Pos()
for { // after attribute name state
if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' {
l.r.Move(1)
continue
}
break
}
if c == '=' {
l.r.Move(1)
for { // before attribute value state
if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' {
l.r.Move(1)
continue
}
break
}
attrPos := l.r.Pos()
delim := c
if delim == '"' || delim == '\'' { // attribute value single- and double-quoted state
l.r.Move(1)
for {
c := l.r.Peek(0)
if c == delim {
l.r.Move(1)
break
} else if c == 0 && l.r.Err() != nil {
break
}
l.r.Move(1)
}
} else { // attribute value unquoted state
for {
if c := l.r.Peek(0); c == ' ' || c == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 && l.r.Err() != nil {
break
}
l.r.Move(1)
}
}
l.attrVal = l.r.Lexeme()[attrPos:]
} else {
l.r.Rewind(nameEnd)
l.attrVal = nil
}
l.text = parse.ToLower(l.r.Lexeme()[nameStart:nameEnd])
return l.r.Shift()
}
func (l *Lexer) shiftEndTag() []byte {
for {
c := l.r.Peek(0)
if c == '>' {
l.text = l.r.Lexeme()[2:]
l.r.Move(1)
break
} else if c == 0 && l.r.Err() != nil {
l.text = l.r.Lexeme()[2:]
break
}
l.r.Move(1)
}
end := len(l.text)
for end > 0 {
if c := l.text[end-1]; c == ' ' || c == '\t' || c == '\n' || c == '\r' {
end--
continue
}
break
}
l.text = l.text[:end]
return parse.ToLower(l.r.Shift())
}
// shiftXml parses the content of a svg or math tag according to the XML 1.1 specifications, including the tag itself.
// So far we have already parsed `<svg` or `<math`.
func (l *Lexer) shiftXml(rawTag Hash) []byte {
inQuote := false
for {
c := l.r.Peek(0)
if c == '"' {
inQuote = !inQuote
l.r.Move(1)
} else if c == '<' && !inQuote && l.r.Peek(1) == '/' {
mark := l.r.Pos()
l.r.Move(2)
for {
if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
break
}
l.r.Move(1)
}
if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark+2:]))); h == rawTag { // copy so that ToLower doesn't change the case of the underlying slice
break
}
} else if c == 0 {
if l.r.Err() == nil {
l.err = parse.NewErrorLexer("unexpected null character", l.r)
}
return l.r.Shift()
} else {
l.r.Move(1)
}
}
for {
c := l.r.Peek(0)
if c == '>' {
l.r.Move(1)
break
} else if c == 0 {
if l.r.Err() == nil {
l.err = parse.NewErrorLexer("unexpected null character", l.r)
}
return l.r.Shift()
}
l.r.Move(1)
}
return l.r.Shift()
}
////////////////////////////////////////////////////////////////
func (l *Lexer) at(b ...byte) bool {
for i, c := range b {
if l.r.Peek(i) != c {
return false
}
}
return true
}
func (l *Lexer) atCaseInsensitive(b ...byte) bool {
for i, c := range b {
if l.r.Peek(i) != c && (l.r.Peek(i)+('a'-'A')) != c {
return false
}
}
return true
}

129
vendor/github.com/tdewolff/parse/v2/html/util.go generated vendored Normal file
View file

@ -0,0 +1,129 @@
package html // import "github.com/tdewolff/parse/html"
import "github.com/tdewolff/parse/v2"
var (
singleQuoteEntityBytes = []byte("&#39;")
doubleQuoteEntityBytes = []byte("&#34;")
)
var charTable = [256]bool{
// ASCII
false, false, false, false, false, false, false, false,
false, true, true, true, true, true, false, false, // tab, new line, vertical tab, form feed, carriage return
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
true, false, true, false, false, false, true, true, // space, ", &, '
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, true, true, true, false, // <, =, >
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
true, false, false, false, false, false, false, false, // `
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
// non-ASCII
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
}
// EscapeAttrVal returns the escaped attribute value bytes without quotes.
func EscapeAttrVal(buf *[]byte, orig, b []byte) []byte {
singles := 0
doubles := 0
unquoted := true
entities := false
for i, c := range b {
if charTable[c] {
if c == '&' {
entities = true
if quote, n := parse.QuoteEntity(b[i:]); n > 0 {
if quote == '"' {
unquoted = false
doubles++
} else {
unquoted = false
singles++
}
}
} else {
unquoted = false
if c == '"' {
doubles++
} else if c == '\'' {
singles++
}
}
}
}
if unquoted {
return b
} else if !entities && len(orig) == len(b)+2 && (singles == 0 && orig[0] == '\'' || doubles == 0 && orig[0] == '"') {
return orig
}
n := len(b) + 2
var quote byte
var escapedQuote []byte
if doubles > singles {
n += singles * 4
quote = '\''
escapedQuote = singleQuoteEntityBytes
} else {
n += doubles * 4
quote = '"'
escapedQuote = doubleQuoteEntityBytes
}
if n > cap(*buf) {
*buf = make([]byte, 0, n) // maximum size, not actual size
}
t := (*buf)[:n] // maximum size, not actual size
t[0] = quote
j := 1
start := 0
for i, c := range b {
if c == '&' {
if entityQuote, n := parse.QuoteEntity(b[i:]); n > 0 {
j += copy(t[j:], b[start:i])
if entityQuote != quote {
t[j] = entityQuote
j++
} else {
j += copy(t[j:], escapedQuote)
}
start = i + n
}
} else if c == quote {
j += copy(t[j:], b[start:i])
j += copy(t[j:], escapedQuote)
start = i + 1
}
}
j += copy(t[j:], b[start:])
t[j] = quote
return t[:j+1]
}