Update server dependencies

This commit is contained in:
Ken-Håvard Lieng 2020-04-29 04:23:32 +02:00
parent c704ebb042
commit 1794e2680a
369 changed files with 23554 additions and 6306 deletions

View file

@ -1,6 +1,10 @@
language: go
go:
- 1.13.x
env:
- GO111MODULE=on
before_install:
- go get github.com/mattn/goveralls
script:
- go test -v -covermode=count -coverprofile=profile.cov . ./buffer ./css ./html ./js ./json ./strconv ./svg ./xml
- goveralls -v -coverprofile=profile.cov -service travis-ci -repotoken $COVERALLS_TOKEN
- go test -covermode=count -coverprofile=profile.cov . ./buffer ./css ./html ./js ./json ./strconv ./svg ./xml
- goveralls -coverprofile=profile.cov -service travis-ci

View file

@ -5,7 +5,7 @@ The `Reader` and `Writer` types implement the `io.Reader` and `io.Writer` respec
The `Lexer` type is useful for building lexers because it keeps track of the start and end position of a byte selection, and shifts the bytes whenever a valid token is found.
The `StreamLexer` does the same, but keeps a buffer pool so that it reads a limited amount at a time, allowing to parse from streaming sources.
*/
package buffer // import "github.com/tdewolff/parse/buffer"
package buffer
// defaultBufSize specifies the default initial length of internal buffers.
var defaultBufSize = 4096

View file

@ -1,4 +1,4 @@
package buffer // import "github.com/tdewolff/parse/buffer"
package buffer
import (
"io"
@ -52,7 +52,7 @@ func NewLexerBytes(b []byte) *Lexer {
n := len(b)
if n == 0 {
z.buf = nullBuffer
} else if b[n-1] != 0 {
} else {
// Append NULL to buffer, but try to avoid reallocation
if cap(b) > n {
// Overwrite next byte but restore when done

View file

@ -1,4 +1,4 @@
package buffer // import "github.com/tdewolff/parse/buffer"
package buffer
import "io"

View file

@ -1,4 +1,4 @@
package buffer // import "github.com/tdewolff/parse/buffer"
package buffer
import (
"io"

View file

@ -1,4 +1,4 @@
package buffer // import "github.com/tdewolff/parse/buffer"
package buffer
// Writer implements an io.Writer over a byte slice.
type Writer struct {

View file

@ -1,11 +1,10 @@
// Package parse contains a collection of parsers for various formats in its subpackages.
package parse // import "github.com/tdewolff/parse"
package parse
import (
"bytes"
"encoding/base64"
"errors"
"net/url"
)
// ErrBadDataURI is returned by DataURI when the byte slice does not start with 'data:' or is too short.
@ -178,8 +177,8 @@ func DataURI(dataURI []byte) ([]byte, []byte, error) {
return nil, nil, err
}
data = decoded[:n]
} else if unescaped, err := url.QueryUnescape(string(data)); err == nil {
data = []byte(unescaped)
} else {
data = DecodeURL(data)
}
return mediatype, data, nil
}
@ -190,6 +189,7 @@ func DataURI(dataURI []byte) ([]byte, []byte, error) {
}
// QuoteEntity parses the given byte slice and returns the quote that got matched (' or ") and its entity length.
// TODO: deprecated
func QuoteEntity(b []byte) (quote byte, n int) {
if len(b) < 5 || b[0] != '&' {
return 0, 0
@ -221,9 +221,9 @@ func QuoteEntity(b []byte) (quote byte, n int) {
}
}
} else if len(b) >= 6 && b[5] == ';' {
if EqualFold(b[1:5], []byte{'q', 'u', 'o', 't'}) {
if bytes.Equal(b[1:5], []byte{'q', 'u', 'o', 't'}) {
return '"', 6 // &quot;
} else if EqualFold(b[1:5], []byte{'a', 'p', 'o', 's'}) {
} else if bytes.Equal(b[1:5], []byte{'a', 'p', 'o', 's'}) {
return '\'', 6 // &apos;
}
}

View file

@ -10,40 +10,39 @@ import (
// Error is a parsing error returned by parser. It contains a message and an offset at which the error occurred.
type Error struct {
Message string
r io.Reader
Offset int
line int
column int
context string
Line int
Column int
Context string
}
// NewError creates a new error
func NewError(msg string, r io.Reader, offset int) *Error {
func NewError(r io.Reader, offset int, message string, a ...interface{}) *Error {
line, column, context := Position(r, offset)
if 0 < len(a) {
message = fmt.Sprintf(message, a...)
}
return &Error{
Message: msg,
r: r,
Offset: offset,
Message: message,
Line: line,
Column: column,
Context: context,
}
}
// NewErrorLexer creates a new error from an active Lexer.
func NewErrorLexer(msg string, l *buffer.Lexer) *Error {
func NewErrorLexer(l *buffer.Lexer, message string, a ...interface{}) *Error {
r := buffer.NewReader(l.Bytes())
offset := l.Offset()
return NewError(msg, r, offset)
return NewError(r, offset, message, a...)
}
// Positions re-parses the file to determine the line, column, and context of the error.
// Positions returns the line, column, and context of the error.
// Context is the entire line at which the error occurred.
func (e *Error) Position() (int, int, string) {
if e.line == 0 {
e.line, e.column, e.context = Position(e.r, e.Offset)
}
return e.line, e.column, e.context
return e.Line, e.Column, e.Context
}
// Error returns the error string, containing the context and line + column number.
func (e *Error) Error() string {
line, column, context := e.Position()
return fmt.Sprintf("parse error:%d:%d: %s\n%s", line, column, e.Message, context)
return fmt.Sprintf("%s on line %d and column %d\n%s", e.Message, e.Line, e.Column, e.Context)
}

View file

@ -1,3 +1,5 @@
module github.com/tdewolff/parse/v2
require github.com/tdewolff/test v1.0.0
go 1.13
require github.com/tdewolff/test v1.0.6

View file

@ -1,2 +1,2 @@
github.com/tdewolff/test v1.0.0 h1:jOwzqCXr5ePXEPGJaq2ivoR6HOCi+D5TPfpoyg8yvmU=
github.com/tdewolff/test v1.0.0/go.mod h1:DiQUlutnqlEvdvhSn2LPGy4TFwRauAaYDsL+683RNX4=
github.com/tdewolff/test v1.0.6 h1:76mzYJQ83Op284kMT+63iCNCI7NEERsIN8dLM+RiKr4=
github.com/tdewolff/test v1.0.6/go.mod h1:6DAvZliBAAnD7rhVgwaM7DE5/d9NMOAJ09SqYqeK4QE=

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,5 @@
// Package html is an HTML5 lexer following the specifications at http://www.w3.org/TR/html5/syntax.html.
package html // import "github.com/tdewolff/parse/html"
package html
import (
"io"
@ -90,6 +90,21 @@ func (l *Lexer) Restore() {
l.r.Restore()
}
// Offset returns the current position in the input stream.
func (l *Lexer) Offset() int {
return l.r.Offset()
}
// Text returns the textual representation of a token. This excludes delimiters and additional leading/trailing characters.
func (l *Lexer) Text() []byte {
return l.text
}
// AttrVal returns the attribute value when an AttributeToken was returned from Next.
func (l *Lexer) AttrVal() []byte {
return l.attrVal
}
// Next returns the next Token. It returns ErrorToken when an error was encountered. Using Err() one can retrieve the error message.
func (l *Lexer) Next() (TokenType, []byte) {
l.text = nil
@ -108,15 +123,13 @@ func (l *Lexer) Next() (TokenType, []byte) {
} else if c != '>' && (c != '/' || l.r.Peek(1) != '>') {
return AttributeToken, l.shiftAttribute()
}
start := l.r.Pos()
l.r.Skip()
l.inTag = false
if c == '/' {
l.r.Move(2)
l.text = l.r.Lexeme()[start:]
return StartTagVoidToken, l.r.Shift()
}
l.r.Move(1)
l.text = l.r.Lexeme()[start:]
return StartTagCloseToken, l.r.Shift()
}
@ -136,7 +149,8 @@ func (l *Lexer) Next() (TokenType, []byte) {
if l.r.Pos() > 0 {
if isEndTag || 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '!' || c == '?' {
// return currently buffered texttoken so that we can return tag next iteration
return TextToken, l.r.Shift()
l.text = l.r.Shift()
return TextToken, l.text
}
} else if isEndTag {
l.r.Move(2)
@ -158,7 +172,8 @@ func (l *Lexer) Next() (TokenType, []byte) {
}
} else if c == 0 && l.r.Err() != nil {
if l.r.Pos() > 0 {
return TextToken, l.r.Shift()
l.text = l.r.Shift()
return TextToken, l.text
}
return ErrorToken, nil
}
@ -166,19 +181,9 @@ func (l *Lexer) Next() (TokenType, []byte) {
}
}
// Text returns the textual representation of a token. This excludes delimiters and additional leading/trailing characters.
func (l *Lexer) Text() []byte {
return l.text
}
// AttrVal returns the attribute value when an AttributeToken was returned from Next.
func (l *Lexer) AttrVal() []byte {
return l.attrVal
}
////////////////////////////////////////////////////////////////
// The following functions follow the specifications at http://www.w3.org/html/wg/drafts/html/master/syntax.html
// The following functions follow the specifications at https://html.spec.whatwg.org/multipage/parsing.html
func (l *Lexer) shiftRawText() []byte {
if l.rawTag == Plaintext {
@ -261,6 +266,7 @@ func (l *Lexer) readMarkup() (TokenType, []byte) {
l.r.Move(2)
for {
if l.r.Peek(0) == 0 && l.r.Err() != nil {
l.text = l.r.Lexeme()[4:]
return CommentToken, l.r.Shift()
} else if l.at('-', '-', '>') {
l.text = l.r.Lexeme()[4:]
@ -277,8 +283,10 @@ func (l *Lexer) readMarkup() (TokenType, []byte) {
l.r.Move(7)
for {
if l.r.Peek(0) == 0 && l.r.Err() != nil {
l.text = l.r.Lexeme()[9:]
return TextToken, l.r.Shift()
} else if l.at(']', ']', '>') {
l.text = l.r.Lexeme()[9:]
l.r.Move(3)
return TextToken, l.r.Shift()
}
@ -453,7 +461,7 @@ func (l *Lexer) shiftXml(rawTag Hash) []byte {
}
} else if c == 0 {
if l.r.Err() == nil {
l.err = parse.NewErrorLexer("unexpected null character", l.r)
l.err = parse.NewErrorLexer(l.r, "HTML parse error: unexpected NULL character")
}
return l.r.Shift()
} else {
@ -468,7 +476,7 @@ func (l *Lexer) shiftXml(rawTag Hash) []byte {
break
} else if c == 0 {
if l.r.Err() == nil {
l.err = parse.NewErrorLexer("unexpected null character", l.r)
l.err = parse.NewErrorLexer(l.r, "HTML parse error: unexpected NULL character")
}
return l.r.Shift()
}

File diff suppressed because it is too large Load diff

View file

@ -22,30 +22,32 @@ func Position(r io.Reader, offset int) (line, col int, context string) {
return
}
nNewline := 0
n := 1
newline := false
if c == '\n' {
nNewline = 1
newline = true
} else if c == '\r' {
if l.Peek(1) == '\n' {
nNewline = 2
newline = true
n = 2
} else {
nNewline = 1
newline = true
}
} else if c >= 0xC0 {
if r, n := l.PeekRune(0); r == '\u2028' || r == '\u2029' {
nNewline = n
var r rune
if r, n = l.PeekRune(0); r == '\u2028' || r == '\u2029' {
newline = true
}
} else {
l.Move(1)
}
if nNewline > 0 {
if offset < l.Pos()+nNewline {
// move onto offset position, let next iteration handle it
l.Move(offset - l.Pos())
continue
}
l.Move(nNewline)
if 1 < n && offset < l.Pos()+n {
// move onto offset position, let next iteration handle it
l.Move(offset - l.Pos())
continue
}
l.Move(n)
if newline {
line++
offset -= l.Pos()
l.Skip()

View file

@ -1,6 +1,8 @@
package strconv // import "github.com/tdewolff/parse/strconv"
package strconv
import "math"
import (
"math"
)
var float64pow10 = []float64{
1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
@ -83,8 +85,7 @@ func ParseFloat(b []byte) (float64, int) {
return f * math.Pow10(int(expExp)), i
}
const log2 = 0.301029995
const int64maxlen = 18
const log2 = 0.3010299956639812
func float64exp(f float64) int {
exp2 := 0
@ -100,11 +101,10 @@ func float64exp(f float64) int {
return int(exp10)
}
// AppendFloat appends a float to `b` with precision `prec`. It returns the new slice and whether succesful or not. Precision is the number of decimals to display, thus prec + 1 == number of significant digits.
func AppendFloat(b []byte, f float64, prec int) ([]byte, bool) {
if math.IsNaN(f) || math.IsInf(f, 0) {
return b, false
} else if prec >= int64maxlen {
return b, false
}
neg := false
@ -112,8 +112,8 @@ func AppendFloat(b []byte, f float64, prec int) ([]byte, bool) {
f = -f
neg = true
}
if prec == -1 {
prec = int64maxlen - 1
if prec < 0 || 17 < prec {
prec = 17 // maximum number of significant digits in double
}
prec -= float64exp(f) // number of digits in front of the dot
f *= math.Pow10(prec)

View file

@ -1,4 +1,4 @@
package strconv // import "github.com/tdewolff/parse/strconv"
package strconv
import (
"math"

View file

@ -1,4 +1,9 @@
package parse // import "github.com/tdewolff/parse"
package parse
import (
"bytes"
"strconv"
)
// Copy returns a copy of the given byte slice.
func Copy(src []byte) (dst []byte) {
@ -161,37 +166,260 @@ func TrimWhitespace(b []byte) []byte {
// ReplaceMultipleWhitespace replaces character series of space, \n, \t, \f, \r into a single space or newline (when the serie contained a \n or \r).
func ReplaceMultipleWhitespace(b []byte) []byte {
j := 0
prevWS := false
hasNewline := false
for i, c := range b {
if IsWhitespace(c) {
prevWS = true
if IsNewline(c) {
hasNewline = true
}
} else {
if prevWS {
prevWS = false
if hasNewline {
hasNewline = false
b[j] = '\n'
} else {
b[j] = ' '
j, k := 0, 0 // j is write position, k is start of next text section
for i := 0; i < len(b); i++ {
if IsWhitespace(b[i]) {
start := i
newline := IsNewline(b[i])
i++
for ; i < len(b) && IsWhitespace(b[i]); i++ {
if IsNewline(b[i]) {
newline = true
}
j++
}
b[j] = b[i]
j++
if newline {
b[start] = '\n'
} else {
b[start] = ' '
}
if 1 < i-start { // more than one whitespace
if j == 0 {
j = start + 1
} else {
j += copy(b[j:], b[k:start+1])
}
k = i
}
}
}
if prevWS {
if hasNewline {
b[j] = '\n'
} else {
b[j] = ' '
}
j++
if j == 0 {
return b
} else if j == 1 { // only if starts with whitespace
b[k-1] = b[0]
return b[k-1:]
} else if k < len(b) {
j += copy(b[j:], b[k:])
}
return b[:j]
}
// replaceEntities will replace in b at index i, assuming that b[i] == '&' and that i+3<len(b). The returned int will be the last character of the entity, so that the next iteration can safely do i++ to continue and not miss any entitites.
func replaceEntities(b []byte, i int, entitiesMap map[string][]byte, revEntitiesMap map[byte][]byte) ([]byte, int) {
const MaxEntityLength = 31 // longest HTML entity: CounterClockwiseContourIntegral
var r []byte
j := i + 1
if b[j] == '#' {
j++
if b[j] == 'x' {
j++
c := 0
for ; j < len(b) && (b[j] >= '0' && b[j] <= '9' || b[j] >= 'a' && b[j] <= 'f' || b[j] >= 'A' && b[j] <= 'F'); j++ {
if b[j] <= '9' {
c = c<<4 + int(b[j]-'0')
} else if b[j] <= 'F' {
c = c<<4 + int(b[j]-'A') + 10
} else if b[j] <= 'f' {
c = c<<4 + int(b[j]-'a') + 10
}
}
if j <= i+3 || 10000 <= c {
return b, j - 1
}
if c < 128 {
r = []byte{byte(c)}
} else {
r = append(r, '&', '#')
r = strconv.AppendInt(r, int64(c), 10)
r = append(r, ';')
}
} else {
c := 0
for ; j < len(b) && c < 128 && b[j] >= '0' && b[j] <= '9'; j++ {
c = c*10 + int(b[j]-'0')
}
if j <= i+2 || 128 <= c {
return b, j - 1
}
r = []byte{byte(c)}
}
} else {
for ; j < len(b) && j-i-1 <= MaxEntityLength && b[j] != ';'; j++ {
}
if j <= i+1 || len(b) <= j {
return b, j - 1
}
var ok bool
r, ok = entitiesMap[string(b[i+1:j])]
if !ok {
return b, j
}
}
// j is at semicolon
n := j + 1 - i
if j < len(b) && b[j] == ';' && 2 < n {
if len(r) == 1 {
if q, ok := revEntitiesMap[r[0]]; ok {
if len(q) == len(b[i:j+1]) && bytes.Equal(q, b[i:j+1]) {
return b, j
}
r = q
} else if r[0] == '&' {
// check if for example &amp; is followed by something that could potentially be an entity
k := j + 1
if k < len(b) && b[k] == '#' {
k++
}
for ; k < len(b) && k-j <= MaxEntityLength && (b[k] >= '0' && b[k] <= '9' || b[k] >= 'a' && b[k] <= 'z' || b[k] >= 'A' && b[k] <= 'Z'); k++ {
}
if k < len(b) && b[k] == ';' {
return b, k
}
}
}
copy(b[i:], r)
copy(b[i+len(r):], b[j+1:])
b = b[:len(b)-n+len(r)]
return b, i + len(r) - 1
}
return b, i
}
// ReplaceEntities replaces all occurrences of entites (such as &quot;) to their respective unencoded bytes.
func ReplaceEntities(b []byte, entitiesMap map[string][]byte, revEntitiesMap map[byte][]byte) []byte {
for i := 0; i < len(b); i++ {
if b[i] == '&' && i+3 < len(b) {
b, i = replaceEntities(b, i, entitiesMap, revEntitiesMap)
}
}
return b
}
// ReplaceMultipleWhitespaceAndEntities is a combination of ReplaceMultipleWhitespace and ReplaceEntities. It is faster than executing both sequentially.
func ReplaceMultipleWhitespaceAndEntities(b []byte, entitiesMap map[string][]byte, revEntitiesMap map[byte][]byte) []byte {
j, k := 0, 0 // j is write position, k is start of next text section
for i := 0; i < len(b); i++ {
if IsWhitespace(b[i]) {
start := i
newline := IsNewline(b[i])
i++
for ; i < len(b) && IsWhitespace(b[i]); i++ {
if IsNewline(b[i]) {
newline = true
}
}
if newline {
b[start] = '\n'
} else {
b[start] = ' '
}
if 1 < i-start { // more than one whitespace
if j == 0 {
j = start + 1
} else {
j += copy(b[j:], b[k:start+1])
}
k = i
}
}
if i+3 < len(b) && b[i] == '&' {
b, i = replaceEntities(b, i, entitiesMap, revEntitiesMap)
}
}
if j == 0 {
return b
} else if j == 1 { // only if starts with whitespace
b[k-1] = b[0]
return b[k-1:]
} else if k < len(b) {
j += copy(b[j:], b[k:])
}
return b[:j]
}
func DecodeURL(b []byte) []byte {
for i := 0; i < len(b); i++ {
if b[i] == '%' && i+2 < len(b) {
j := i + 1
c := 0
for ; j < i+3 && (b[j] >= '0' && b[j] <= '9' || b[j] >= 'a' && b[j] <= 'z' || b[j] >= 'A' && b[j] <= 'Z'); j++ {
if b[j] <= '9' {
c = c<<4 + int(b[j]-'0')
} else if b[j] <= 'F' {
c = c<<4 + int(b[j]-'A') + 10
} else if b[j] <= 'f' {
c = c<<4 + int(b[j]-'a') + 10
}
}
if j == i+3 && c < 128 {
b[i] = byte(c)
b = append(b[:i+1], b[i+3:]...)
}
} else if b[i] == '+' {
b[i] = ' '
}
}
return b
}
var URLEncodingTable = [256]bool{
// ASCII
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
false, false, true, true, true, true, true, false, // space, !, '
false, false, false, true, true, false, false, true, // (, ), *, -, .
false, false, false, false, false, false, false, false, // 0, 1, 2, 3, 4, 5, 6, 7
false, false, true, true, true, true, true, true, // 8, 9
true, false, false, false, false, false, false, false, // A, B, C, D, E, F, G
false, false, false, false, false, false, false, false, // H, I, J, K, L, M, N, O
false, false, false, false, false, false, false, false, // P, Q, R, S, T, U, V, W
false, false, false, true, true, true, true, false, // X, Y, Z, _
true, false, false, false, false, false, false, false, // a, b, c, d, e, f, g
false, false, false, false, false, false, false, false, // h, i, j, k, l, m, n, o
false, false, false, false, false, false, false, false, // p, q, r, s, t, u, v, w
false, false, false, true, true, true, false, true, // x, y, z, ~
// non-ASCII
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
}
func EncodeURL(b []byte, table [256]bool) []byte {
for i := 0; i < len(b); i++ {
c := b[i]
if table[c] {
b = append(b, 0, 0)
copy(b[i+3:], b[i+1:])
b[i+0] = '%'
b[i+1] = "0123456789ABCDEF"[c>>4]
b[i+2] = "0123456789ABCDEF"[c&15]
} else if c == ' ' {
b[i] = '+'
}
}
return b
}