242 lines
6.5 KiB
Go
242 lines
6.5 KiB
Go
|
// Copyright (c) 2014 Couchbase, Inc.
|
||
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||
|
// except in compliance with the License. You may obtain a copy of the License at
|
||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||
|
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||
|
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||
|
// either express or implied. See the License for the specific language governing permissions
|
||
|
// and limitations under the License.
|
||
|
|
||
|
package segment
|
||
|
|
||
|
import (
|
||
|
"bufio"
|
||
|
"bytes"
|
||
|
"errors"
|
||
|
"io"
|
||
|
"strings"
|
||
|
"testing"
|
||
|
)
|
||
|
|
||
|
// Tests borrowed from Scanner to test Segmenter
|
||
|
|
||
|
// slowReader is a reader that returns only a few bytes at a time, to test the incremental
|
||
|
// reads in Scanner.Scan.
|
||
|
type slowReader struct {
|
||
|
max int
|
||
|
buf io.Reader
|
||
|
}
|
||
|
|
||
|
func (sr *slowReader) Read(p []byte) (n int, err error) {
|
||
|
if len(p) > sr.max {
|
||
|
p = p[0:sr.max]
|
||
|
}
|
||
|
return sr.buf.Read(p)
|
||
|
}
|
||
|
|
||
|
// genLine writes to buf a predictable but non-trivial line of text of length
|
||
|
// n, including the terminal newline and an occasional carriage return.
|
||
|
// If addNewline is false, the \r and \n are not emitted.
|
||
|
func genLine(buf *bytes.Buffer, lineNum, n int, addNewline bool) {
|
||
|
buf.Reset()
|
||
|
doCR := lineNum%5 == 0
|
||
|
if doCR {
|
||
|
n--
|
||
|
}
|
||
|
for i := 0; i < n-1; i++ { // Stop early for \n.
|
||
|
c := 'a' + byte(lineNum+i)
|
||
|
if c == '\n' || c == '\r' { // Don't confuse us.
|
||
|
c = 'N'
|
||
|
}
|
||
|
buf.WriteByte(c)
|
||
|
}
|
||
|
if addNewline {
|
||
|
if doCR {
|
||
|
buf.WriteByte('\r')
|
||
|
}
|
||
|
buf.WriteByte('\n')
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func wrapSplitFuncAsSegmentFuncForTesting(splitFunc bufio.SplitFunc) SegmentFunc {
|
||
|
return func(data []byte, atEOF bool) (advance int, token []byte, typ int, err error) {
|
||
|
typ = 0
|
||
|
advance, token, err = splitFunc(data, atEOF)
|
||
|
return
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Test that the line segmenter errors out on a long line.
|
||
|
func TestSegmentTooLong(t *testing.T) {
|
||
|
const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
|
||
|
// Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.
|
||
|
tmp := new(bytes.Buffer)
|
||
|
buf := new(bytes.Buffer)
|
||
|
lineNum := 0
|
||
|
j := 0
|
||
|
for i := 0; i < 2*smallMaxTokenSize; i++ {
|
||
|
genLine(tmp, lineNum, j, true)
|
||
|
j++
|
||
|
buf.Write(tmp.Bytes())
|
||
|
lineNum++
|
||
|
}
|
||
|
s := NewSegmenter(&slowReader{3, buf})
|
||
|
// change to line segmenter for testing
|
||
|
s.SetSegmenter(wrapSplitFuncAsSegmentFuncForTesting(bufio.ScanLines))
|
||
|
s.MaxTokenSize(smallMaxTokenSize)
|
||
|
j = 0
|
||
|
for lineNum := 0; s.Segment(); lineNum++ {
|
||
|
genLine(tmp, lineNum, j, false)
|
||
|
if j < smallMaxTokenSize {
|
||
|
j++
|
||
|
} else {
|
||
|
j--
|
||
|
}
|
||
|
line := tmp.Bytes()
|
||
|
if !bytes.Equal(s.Bytes(), line) {
|
||
|
t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Bytes(), line)
|
||
|
}
|
||
|
}
|
||
|
err := s.Err()
|
||
|
if err != ErrTooLong {
|
||
|
t.Fatalf("expected ErrTooLong; got %s", err)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
var testError = errors.New("testError")
|
||
|
|
||
|
// Test the correct error is returned when the split function errors out.
|
||
|
func TestSegmentError(t *testing.T) {
|
||
|
// Create a split function that delivers a little data, then a predictable error.
|
||
|
numSplits := 0
|
||
|
const okCount = 7
|
||
|
errorSplit := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
||
|
if atEOF {
|
||
|
panic("didn't get enough data")
|
||
|
}
|
||
|
if numSplits >= okCount {
|
||
|
return 0, nil, testError
|
||
|
}
|
||
|
numSplits++
|
||
|
return 1, data[0:1], nil
|
||
|
}
|
||
|
// Read the data.
|
||
|
const text = "abcdefghijklmnopqrstuvwxyz"
|
||
|
buf := strings.NewReader(text)
|
||
|
s := NewSegmenter(&slowReader{1, buf})
|
||
|
// change to line segmenter for testing
|
||
|
s.SetSegmenter(wrapSplitFuncAsSegmentFuncForTesting(errorSplit))
|
||
|
var i int
|
||
|
for i = 0; s.Segment(); i++ {
|
||
|
if len(s.Bytes()) != 1 || text[i] != s.Bytes()[0] {
|
||
|
t.Errorf("#%d: expected %q got %q", i, text[i], s.Bytes()[0])
|
||
|
}
|
||
|
}
|
||
|
// Check correct termination location and error.
|
||
|
if i != okCount {
|
||
|
t.Errorf("unexpected termination; expected %d tokens got %d", okCount, i)
|
||
|
}
|
||
|
err := s.Err()
|
||
|
if err != testError {
|
||
|
t.Fatalf("expected %q got %v", testError, err)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Test that Scan finishes if we have endless empty reads.
|
||
|
type endlessZeros struct{}
|
||
|
|
||
|
func (endlessZeros) Read(p []byte) (int, error) {
|
||
|
return 0, nil
|
||
|
}
|
||
|
|
||
|
func TestBadReader(t *testing.T) {
|
||
|
scanner := NewSegmenter(endlessZeros{})
|
||
|
for scanner.Segment() {
|
||
|
t.Fatal("read should fail")
|
||
|
}
|
||
|
err := scanner.Err()
|
||
|
if err != io.ErrNoProgress {
|
||
|
t.Errorf("unexpected error: %v", err)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func TestSegmentAdvanceNegativeError(t *testing.T) {
|
||
|
errorSplit := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
||
|
if atEOF {
|
||
|
panic("didn't get enough data")
|
||
|
}
|
||
|
return -1, data[0:1], nil
|
||
|
}
|
||
|
// Read the data.
|
||
|
const text = "abcdefghijklmnopqrstuvwxyz"
|
||
|
buf := strings.NewReader(text)
|
||
|
s := NewSegmenter(&slowReader{1, buf})
|
||
|
// change to line segmenter for testing
|
||
|
s.SetSegmenter(wrapSplitFuncAsSegmentFuncForTesting(errorSplit))
|
||
|
s.Segment()
|
||
|
err := s.Err()
|
||
|
if err != ErrNegativeAdvance {
|
||
|
t.Fatalf("expected %q got %v", testError, err)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func TestSegmentAdvanceTooFarError(t *testing.T) {
|
||
|
errorSplit := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
||
|
if atEOF {
|
||
|
panic("didn't get enough data")
|
||
|
}
|
||
|
return len(data) + 10, data[0:1], nil
|
||
|
}
|
||
|
// Read the data.
|
||
|
const text = "abcdefghijklmnopqrstuvwxyz"
|
||
|
buf := strings.NewReader(text)
|
||
|
s := NewSegmenter(&slowReader{1, buf})
|
||
|
// change to line segmenter for testing
|
||
|
s.SetSegmenter(wrapSplitFuncAsSegmentFuncForTesting(errorSplit))
|
||
|
s.Segment()
|
||
|
err := s.Err()
|
||
|
if err != ErrAdvanceTooFar {
|
||
|
t.Fatalf("expected %q got %v", testError, err)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func TestSegmentLongTokens(t *testing.T) {
|
||
|
// Read the data.
|
||
|
text := bytes.Repeat([]byte("abcdefghijklmnop"), 257)
|
||
|
buf := strings.NewReader(string(text))
|
||
|
s := NewSegmenter(&slowReader{1, buf})
|
||
|
// change to line segmenter for testing
|
||
|
s.SetSegmenter(wrapSplitFuncAsSegmentFuncForTesting(bufio.ScanLines))
|
||
|
for s.Segment() {
|
||
|
line := s.Bytes()
|
||
|
if !bytes.Equal(text, line) {
|
||
|
t.Errorf("expected %s, got %s", text, line)
|
||
|
}
|
||
|
}
|
||
|
err := s.Err()
|
||
|
if err != nil {
|
||
|
t.Fatalf("unexpected error; got %s", err)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func TestSegmentLongTokensDontDouble(t *testing.T) {
|
||
|
// Read the data.
|
||
|
text := bytes.Repeat([]byte("abcdefghijklmnop"), 257)
|
||
|
buf := strings.NewReader(string(text))
|
||
|
s := NewSegmenter(&slowReader{1, buf})
|
||
|
// change to line segmenter for testing
|
||
|
s.SetSegmenter(wrapSplitFuncAsSegmentFuncForTesting(bufio.ScanLines))
|
||
|
s.MaxTokenSize(6144)
|
||
|
for s.Segment() {
|
||
|
line := s.Bytes()
|
||
|
if !bytes.Equal(text, line) {
|
||
|
t.Errorf("expected %s, got %s", text, line)
|
||
|
}
|
||
|
}
|
||
|
err := s.Err()
|
||
|
if err != nil {
|
||
|
t.Fatalf("unexpected error; got %s", err)
|
||
|
}
|
||
|
}
|