dispatch/vendor/github.com/blevesearch/segment/segment_test.go

//  Copyright (c) 2014 Couchbase, Inc.
//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
//  except in compliance with the License. You may obtain a copy of the License at
//    http://www.apache.org/licenses/LICENSE-2.0
//  Unless required by applicable law or agreed to in writing, software distributed under the
//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
//  either express or implied. See the License for the specific language governing permissions
//  and limitations under the License.

package segment

import (
	"bufio"
	"bytes"
	"errors"
	"io"
	"strings"
	"testing"
)

// Tests borrowed from Scanner to test Segmenter

// slowReader is a reader that returns only a few bytes at a time, to test the incremental
// reads in Scanner.Scan.
type slowReader struct {
	max int
	buf io.Reader
}

func (sr *slowReader) Read(p []byte) (n int, err error) {
	if len(p) > sr.max {
		p = p[0:sr.max]
	}
	return sr.buf.Read(p)
}

// genLine writes to buf a predictable but non-trivial line of text of length
// n, including the terminal newline and an occasional carriage return.
// If addNewline is false, the \r and \n are not emitted.
func genLine(buf *bytes.Buffer, lineNum, n int, addNewline bool) {
	buf.Reset()
	doCR := lineNum%5 == 0
	if doCR {
		n--
	}
	for i := 0; i < n-1; i++ { // Stop early for \n.
		c := 'a' + byte(lineNum+i)
		if c == '\n' || c == '\r' { // Don't confuse us.
			c = 'N'
		}
		buf.WriteByte(c)
	}
	if addNewline {
		if doCR {
			buf.WriteByte('\r')
		}
		buf.WriteByte('\n')
	}
	return
}

func wrapSplitFuncAsSegmentFuncForTesting(splitFunc bufio.SplitFunc) SegmentFunc {
	return func(data []byte, atEOF bool) (advance int, token []byte, typ int, err error) {
		typ = 0
		advance, token, err = splitFunc(data, atEOF)
		return
	}
}

// Test that the line segmenter errors out on a long line.
func TestSegmentTooLong(t *testing.T) {
	const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
	// Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.
	tmp := new(bytes.Buffer)
	buf := new(bytes.Buffer)
	lineNum := 0
	j := 0
	for i := 0; i < 2*smallMaxTokenSize; i++ {
		genLine(tmp, lineNum, j, true)
		j++
		buf.Write(tmp.Bytes())
		lineNum++
	}
	s := NewSegmenter(&slowReader{3, buf})
	// change to line segmenter for testing
	s.SetSegmenter(wrapSplitFuncAsSegmentFuncForTesting(bufio.ScanLines))
	s.MaxTokenSize(smallMaxTokenSize)
	j = 0
	for lineNum := 0; s.Segment(); lineNum++ {
		genLine(tmp, lineNum, j, false)
		if j < smallMaxTokenSize {
			j++
		} else {
			j--
		}
		line := tmp.Bytes()
		if !bytes.Equal(s.Bytes(), line) {
			t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Bytes(), line)
		}
	}
	err := s.Err()
	if err != ErrTooLong {
		t.Fatalf("expected ErrTooLong; got %s", err)
	}
}

var testError = errors.New("testError")

// Test the correct error is returned when the split function errors out.
func TestSegmentError(t *testing.T) {
	// Create a split function that delivers a little data, then a predictable error.
	numSplits := 0
	const okCount = 7
	errorSplit := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
		if atEOF {
			panic("didn't get enough data")
		}
		if numSplits >= okCount {
			return 0, nil, testError
		}
		numSplits++
		return 1, data[0:1], nil
	}
	// Read the data.
	const text = "abcdefghijklmnopqrstuvwxyz"
	buf := strings.NewReader(text)
	s := NewSegmenter(&slowReader{1, buf})
	// change to line segmenter for testing
	s.SetSegmenter(wrapSplitFuncAsSegmentFuncForTesting(errorSplit))
	var i int
	for i = 0; s.Segment(); i++ {
		if len(s.Bytes()) != 1 || text[i] != s.Bytes()[0] {
			t.Errorf("#%d: expected %q got %q", i, text[i], s.Bytes()[0])
		}
	}
	// Check correct termination location and error.
	if i != okCount {
		t.Errorf("unexpected termination; expected %d tokens got %d", okCount, i)
	}
	err := s.Err()
	if err != testError {
		t.Fatalf("expected %q got %v", testError, err)
	}
}

// Test that Scan finishes if we have endless empty reads.
type endlessZeros struct{}

func (endlessZeros) Read(p []byte) (int, error) {
	return 0, nil
}

func TestBadReader(t *testing.T) {
	scanner := NewSegmenter(endlessZeros{})
	for scanner.Segment() {
		t.Fatal("read should fail")
	}
	err := scanner.Err()
	if err != io.ErrNoProgress {
		t.Errorf("unexpected error: %v", err)
	}
}

func TestSegmentAdvanceNegativeError(t *testing.T) {
	errorSplit := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
		if atEOF {
			panic("didn't get enough data")
		}
		return -1, data[0:1], nil
	}
	// Read the data.
	const text = "abcdefghijklmnopqrstuvwxyz"
	buf := strings.NewReader(text)
	s := NewSegmenter(&slowReader{1, buf})
	// change to line segmenter for testing
	s.SetSegmenter(wrapSplitFuncAsSegmentFuncForTesting(errorSplit))
	s.Segment()
	err := s.Err()
	if err != ErrNegativeAdvance {
		t.Fatalf("expected %q got %v", testError, err)
	}
}

func TestSegmentAdvanceTooFarError(t *testing.T) {
	errorSplit := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
		if atEOF {
			panic("didn't get enough data")
		}
		return len(data) + 10, data[0:1], nil
	}
	// Read the data.
	const text = "abcdefghijklmnopqrstuvwxyz"
	buf := strings.NewReader(text)
	s := NewSegmenter(&slowReader{1, buf})
	// change to line segmenter for testing
	s.SetSegmenter(wrapSplitFuncAsSegmentFuncForTesting(errorSplit))
	s.Segment()
	err := s.Err()
	if err != ErrAdvanceTooFar {
		t.Fatalf("expected %q got %v", testError, err)
	}
}

func TestSegmentLongTokens(t *testing.T) {
	// Read the data.
	text := bytes.Repeat([]byte("abcdefghijklmnop"), 257)
	buf := strings.NewReader(string(text))
	s := NewSegmenter(&slowReader{1, buf})
	// change to line segmenter for testing
	s.SetSegmenter(wrapSplitFuncAsSegmentFuncForTesting(bufio.ScanLines))
	for s.Segment() {
		line := s.Bytes()
		if !bytes.Equal(text, line) {
			t.Errorf("expected %s, got %s", text, line)
		}
	}
	err := s.Err()
	if err != nil {
		t.Fatalf("unexpected error; got %s", err)
	}
}

func TestSegmentLongTokensDontDouble(t *testing.T) {
	// Read the data.
	text := bytes.Repeat([]byte("abcdefghijklmnop"), 257)
	buf := strings.NewReader(string(text))
	s := NewSegmenter(&slowReader{1, buf})
	// change to line segmenter for testing
	s.SetSegmenter(wrapSplitFuncAsSegmentFuncForTesting(bufio.ScanLines))
	s.MaxTokenSize(6144)
	for s.Segment() {
		line := s.Bytes()
		if !bytes.Equal(text, line) {
			t.Errorf("expected %s, got %s", text, line)
		}
	}
	err := s.Err()
	if err != nil {
		t.Fatalf("unexpected error; got %s", err)
	}
}
Add message logging and search server side 2015-04-29 21:54:44 +00:00			`// Copyright (c) 2014 Couchbase, Inc.`
			`// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file`
			`// except in compliance with the License. You may obtain a copy of the License at`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`// Unless required by applicable law or agreed to in writing, software distributed under the`
			`// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,`
			`// either express or implied. See the License for the specific language governing permissions`
			`// and limitations under the License.`

			`package segment`

			`import (`
			`"bufio"`
			`"bytes"`
			`"errors"`
			`"io"`
			`"strings"`
			`"testing"`
			`)`

			`// Tests borrowed from Scanner to test Segmenter`

			`// slowReader is a reader that returns only a few bytes at a time, to test the incremental`
			`// reads in Scanner.Scan.`
			`type slowReader struct {`
			`max int`
			`buf io.Reader`
			`}`

			`func (sr *slowReader) Read(p []byte) (n int, err error) {`
			`if len(p) > sr.max {`
			`p = p[0:sr.max]`
			`}`
			`return sr.buf.Read(p)`
			`}`

			`// genLine writes to buf a predictable but non-trivial line of text of length`
			`// n, including the terminal newline and an occasional carriage return.`
			`// If addNewline is false, the \r and \n are not emitted.`
			`func genLine(buf *bytes.Buffer, lineNum, n int, addNewline bool) {`
			`buf.Reset()`
			`doCR := lineNum%5 == 0`
			`if doCR {`
			`n--`
			`}`
			`for i := 0; i < n-1; i++ { // Stop early for \n.`
			`c := 'a' + byte(lineNum+i)`
			`if c == '\n' \|\| c == '\r' { // Don't confuse us.`
			`c = 'N'`
			`}`
			`buf.WriteByte(c)`
			`}`
			`if addNewline {`
			`if doCR {`
			`buf.WriteByte('\r')`
			`}`
			`buf.WriteByte('\n')`
			`}`
			`return`
			`}`

			`func wrapSplitFuncAsSegmentFuncForTesting(splitFunc bufio.SplitFunc) SegmentFunc {`
			`return func(data []byte, atEOF bool) (advance int, token []byte, typ int, err error) {`
			`typ = 0`
			`advance, token, err = splitFunc(data, atEOF)`
			`return`
			`}`
			`}`

			`// Test that the line segmenter errors out on a long line.`
			`func TestSegmentTooLong(t *testing.T) {`
			`const smallMaxTokenSize = 256 // Much smaller for more efficient testing.`
			`// Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.`
			`tmp := new(bytes.Buffer)`
			`buf := new(bytes.Buffer)`
			`lineNum := 0`
			`j := 0`
			`for i := 0; i < 2*smallMaxTokenSize; i++ {`
			`genLine(tmp, lineNum, j, true)`
			`j++`
			`buf.Write(tmp.Bytes())`
			`lineNum++`
			`}`
			`s := NewSegmenter(&slowReader{3, buf})`
			`// change to line segmenter for testing`
			`s.SetSegmenter(wrapSplitFuncAsSegmentFuncForTesting(bufio.ScanLines))`
			`s.MaxTokenSize(smallMaxTokenSize)`
			`j = 0`
			`for lineNum := 0; s.Segment(); lineNum++ {`
			`genLine(tmp, lineNum, j, false)`
			`if j < smallMaxTokenSize {`
			`j++`
			`} else {`
			`j--`
			`}`
			`line := tmp.Bytes()`
			`if !bytes.Equal(s.Bytes(), line) {`
			`t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Bytes(), line)`
			`}`
			`}`
			`err := s.Err()`
			`if err != ErrTooLong {`
			`t.Fatalf("expected ErrTooLong; got %s", err)`
			`}`
			`}`

			`var testError = errors.New("testError")`

			`// Test the correct error is returned when the split function errors out.`
			`func TestSegmentError(t *testing.T) {`
			`// Create a split function that delivers a little data, then a predictable error.`
			`numSplits := 0`
			`const okCount = 7`
			`errorSplit := func(data []byte, atEOF bool) (advance int, token []byte, err error) {`
			`if atEOF {`
			`panic("didn't get enough data")`
			`}`
			`if numSplits >= okCount {`
			`return 0, nil, testError`
			`}`
			`numSplits++`
			`return 1, data[0:1], nil`
			`}`
			`// Read the data.`
			`const text = "abcdefghijklmnopqrstuvwxyz"`
			`buf := strings.NewReader(text)`
			`s := NewSegmenter(&slowReader{1, buf})`
			`// change to line segmenter for testing`
			`s.SetSegmenter(wrapSplitFuncAsSegmentFuncForTesting(errorSplit))`
			`var i int`
			`for i = 0; s.Segment(); i++ {`
			`if len(s.Bytes()) != 1 \|\| text[i] != s.Bytes()[0] {`
			`t.Errorf("#%d: expected %q got %q", i, text[i], s.Bytes()[0])`
			`}`
			`}`
			`// Check correct termination location and error.`
			`if i != okCount {`
			`t.Errorf("unexpected termination; expected %d tokens got %d", okCount, i)`
			`}`
			`err := s.Err()`
			`if err != testError {`
			`t.Fatalf("expected %q got %v", testError, err)`
			`}`
			`}`

			`// Test that Scan finishes if we have endless empty reads.`
			`type endlessZeros struct{}`

			`func (endlessZeros) Read(p []byte) (int, error) {`
			`return 0, nil`
			`}`

			`func TestBadReader(t *testing.T) {`
			`scanner := NewSegmenter(endlessZeros{})`
			`for scanner.Segment() {`
			`t.Fatal("read should fail")`
			`}`
			`err := scanner.Err()`
			`if err != io.ErrNoProgress {`
			`t.Errorf("unexpected error: %v", err)`
			`}`
			`}`

			`func TestSegmentAdvanceNegativeError(t *testing.T) {`
			`errorSplit := func(data []byte, atEOF bool) (advance int, token []byte, err error) {`
			`if atEOF {`
			`panic("didn't get enough data")`
			`}`
			`return -1, data[0:1], nil`
			`}`
			`// Read the data.`
			`const text = "abcdefghijklmnopqrstuvwxyz"`
			`buf := strings.NewReader(text)`
			`s := NewSegmenter(&slowReader{1, buf})`
			`// change to line segmenter for testing`
			`s.SetSegmenter(wrapSplitFuncAsSegmentFuncForTesting(errorSplit))`
			`s.Segment()`
			`err := s.Err()`
			`if err != ErrNegativeAdvance {`
			`t.Fatalf("expected %q got %v", testError, err)`
			`}`
			`}`

			`func TestSegmentAdvanceTooFarError(t *testing.T) {`
			`errorSplit := func(data []byte, atEOF bool) (advance int, token []byte, err error) {`
			`if atEOF {`
			`panic("didn't get enough data")`
			`}`
			`return len(data) + 10, data[0:1], nil`
			`}`
			`// Read the data.`
			`const text = "abcdefghijklmnopqrstuvwxyz"`
			`buf := strings.NewReader(text)`
			`s := NewSegmenter(&slowReader{1, buf})`
			`// change to line segmenter for testing`
			`s.SetSegmenter(wrapSplitFuncAsSegmentFuncForTesting(errorSplit))`
			`s.Segment()`
			`err := s.Err()`
			`if err != ErrAdvanceTooFar {`
			`t.Fatalf("expected %q got %v", testError, err)`
			`}`
			`}`

			`func TestSegmentLongTokens(t *testing.T) {`
			`// Read the data.`
			`text := bytes.Repeat([]byte("abcdefghijklmnop"), 257)`
			`buf := strings.NewReader(string(text))`
			`s := NewSegmenter(&slowReader{1, buf})`
			`// change to line segmenter for testing`
			`s.SetSegmenter(wrapSplitFuncAsSegmentFuncForTesting(bufio.ScanLines))`
			`for s.Segment() {`
			`line := s.Bytes()`
			`if !bytes.Equal(text, line) {`
			`t.Errorf("expected %s, got %s", text, line)`
			`}`
			`}`
			`err := s.Err()`
			`if err != nil {`
			`t.Fatalf("unexpected error; got %s", err)`
			`}`
			`}`

			`func TestSegmentLongTokensDontDouble(t *testing.T) {`
			`// Read the data.`
			`text := bytes.Repeat([]byte("abcdefghijklmnop"), 257)`
			`buf := strings.NewReader(string(text))`
			`s := NewSegmenter(&slowReader{1, buf})`
			`// change to line segmenter for testing`
			`s.SetSegmenter(wrapSplitFuncAsSegmentFuncForTesting(bufio.ScanLines))`
			`s.MaxTokenSize(6144)`
			`for s.Segment() {`
			`line := s.Bytes()`
			`if !bytes.Equal(text, line) {`
			`t.Errorf("expected %s, got %s", text, line)`
			`}`
			`}`
			`err := s.Err()`
			`if err != nil {`
			`t.Fatalf("unexpected error; got %s", err)`
			`}`
			`}`