Switch from Godep to go vendoring

This commit is contained in:
Ken-Håvard Lieng 2016-03-01 01:51:26 +01:00
parent 6b37713bc0
commit cd317761c5
1504 changed files with 263076 additions and 34441 deletions

View file

@ -0,0 +1,24 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"time"
)
type Collector interface {
Collect(searcher Searcher) error
Results() DocumentMatchCollection
Total() uint64
MaxScore() float64
Took() time.Duration
SetFacetsBuilder(facetsBuilder *FacetsBuilder)
FacetResults() FacetResults
}

View file

@ -0,0 +1,135 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package collectors
import (
"container/list"
"time"
"github.com/blevesearch/bleve/search"
)
type TopScoreCollector struct {
k int
skip int
results *list.List
took time.Duration
maxScore float64
total uint64
facetsBuilder *search.FacetsBuilder
}
func NewTopScorerCollector(k int) *TopScoreCollector {
return &TopScoreCollector{
k: k,
skip: 0,
results: list.New(),
}
}
func NewTopScorerSkipCollector(k, skip int) *TopScoreCollector {
return &TopScoreCollector{
k: k,
skip: skip,
results: list.New(),
}
}
func (tksc *TopScoreCollector) Total() uint64 {
return tksc.total
}
func (tksc *TopScoreCollector) MaxScore() float64 {
return tksc.maxScore
}
func (tksc *TopScoreCollector) Took() time.Duration {
return tksc.took
}
func (tksc *TopScoreCollector) Collect(searcher search.Searcher) error {
startTime := time.Now()
next, err := searcher.Next()
for err == nil && next != nil {
tksc.collectSingle(next)
if tksc.facetsBuilder != nil {
err = tksc.facetsBuilder.Update(next)
if err != nil {
break
}
}
next, err = searcher.Next()
}
// compute search duration
tksc.took = time.Since(startTime)
if err != nil {
return err
}
return nil
}
func (tksc *TopScoreCollector) collectSingle(dm *search.DocumentMatch) {
// increment total hits
tksc.total++
// update max score
if dm.Score > tksc.maxScore {
tksc.maxScore = dm.Score
}
for e := tksc.results.Front(); e != nil; e = e.Next() {
curr := e.Value.(*search.DocumentMatch)
if dm.Score < curr.Score {
tksc.results.InsertBefore(dm, e)
// if we just made the list too long
if tksc.results.Len() > (tksc.k + tksc.skip) {
// remove the head
tksc.results.Remove(tksc.results.Front())
}
return
}
}
// if we got to the end, we still have to add it
tksc.results.PushBack(dm)
if tksc.results.Len() > (tksc.k + tksc.skip) {
// remove the head
tksc.results.Remove(tksc.results.Front())
}
}
func (tksc *TopScoreCollector) Results() search.DocumentMatchCollection {
if tksc.results.Len()-tksc.skip > 0 {
rv := make(search.DocumentMatchCollection, tksc.results.Len()-tksc.skip)
i := 0
skipped := 0
for e := tksc.results.Back(); e != nil; e = e.Prev() {
if skipped < tksc.skip {
skipped++
continue
}
rv[i] = e.Value.(*search.DocumentMatch)
i++
}
return rv
}
return search.DocumentMatchCollection{}
}
func (tksc *TopScoreCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
tksc.facetsBuilder = facetsBuilder
}
func (tksc *TopScoreCollector) FacetResults() search.FacetResults {
if tksc.facetsBuilder != nil {
return tksc.facetsBuilder.Results()
}
return search.FacetResults{}
}

View file

@ -0,0 +1,249 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package collectors
import (
"math/rand"
"strconv"
"testing"
"github.com/blevesearch/bleve/search"
)
func TestTop10Scores(t *testing.T) {
// a stub search with more than 10 matches
// the top-10 scores are > 10
// everything else is less than 10
searcher := &stubSearcher{
matches: search.DocumentMatchCollection{
&search.DocumentMatch{
ID: "a",
Score: 11,
},
&search.DocumentMatch{
ID: "b",
Score: 9,
},
&search.DocumentMatch{
ID: "c",
Score: 11,
},
&search.DocumentMatch{
ID: "d",
Score: 9,
},
&search.DocumentMatch{
ID: "e",
Score: 11,
},
&search.DocumentMatch{
ID: "f",
Score: 9,
},
&search.DocumentMatch{
ID: "g",
Score: 11,
},
&search.DocumentMatch{
ID: "h",
Score: 9,
},
&search.DocumentMatch{
ID: "i",
Score: 11,
},
&search.DocumentMatch{
ID: "j",
Score: 11,
},
&search.DocumentMatch{
ID: "k",
Score: 11,
},
&search.DocumentMatch{
ID: "l",
Score: 99,
},
&search.DocumentMatch{
ID: "m",
Score: 11,
},
&search.DocumentMatch{
ID: "n",
Score: 11,
},
},
}
collector := NewTopScorerCollector(10)
err := collector.Collect(searcher)
if err != nil {
t.Fatal(err)
}
maxScore := collector.MaxScore()
if maxScore != 99.0 {
t.Errorf("expected max score 99.0, got %f", maxScore)
}
total := collector.Total()
if total != 14 {
t.Errorf("expected 14 total results, got %d", total)
}
results := collector.Results()
if len(results) != 10 {
t.Fatalf("expected 10 results, got %d", len(results))
}
if results[0].ID != "l" {
t.Errorf("expected first result to have ID 'l', got %s", results[0].ID)
}
if results[0].Score != 99.0 {
t.Errorf("expected highest score to be 99.0, got %f", results[0].Score)
}
minScore := 1000.0
for _, result := range results {
if result.Score < minScore {
minScore = result.Score
}
}
if minScore < 10 {
t.Errorf("expected minimum score to be higher than 10, got %f", minScore)
}
}
func TestTop10ScoresSkip10(t *testing.T) {
// a stub search with more than 10 matches
// the top-10 scores are > 10
// everything else is less than 10
searcher := &stubSearcher{
matches: search.DocumentMatchCollection{
&search.DocumentMatch{
ID: "a",
Score: 11,
},
&search.DocumentMatch{
ID: "b",
Score: 9.5,
},
&search.DocumentMatch{
ID: "c",
Score: 11,
},
&search.DocumentMatch{
ID: "d",
Score: 9,
},
&search.DocumentMatch{
ID: "e",
Score: 11,
},
&search.DocumentMatch{
ID: "f",
Score: 9,
},
&search.DocumentMatch{
ID: "g",
Score: 11,
},
&search.DocumentMatch{
ID: "h",
Score: 9,
},
&search.DocumentMatch{
ID: "i",
Score: 11,
},
&search.DocumentMatch{
ID: "j",
Score: 11,
},
&search.DocumentMatch{
ID: "k",
Score: 11,
},
&search.DocumentMatch{
ID: "l",
Score: 99,
},
&search.DocumentMatch{
ID: "m",
Score: 11,
},
&search.DocumentMatch{
ID: "n",
Score: 11,
},
},
}
collector := NewTopScorerSkipCollector(10, 10)
err := collector.Collect(searcher)
if err != nil {
t.Fatal(err)
}
maxScore := collector.MaxScore()
if maxScore != 99.0 {
t.Errorf("expected max score 99.0, got %f", maxScore)
}
total := collector.Total()
if total != 14 {
t.Errorf("expected 14 total results, got %d", total)
}
results := collector.Results()
if len(results) != 4 {
t.Fatalf("expected 4 results, got %d", len(results))
}
if results[0].ID != "b" {
t.Errorf("expected first result to have ID 'b', got %s", results[0].ID)
}
if results[0].Score != 9.5 {
t.Errorf("expected highest score to be 9.5ß, got %f", results[0].Score)
}
}
func BenchmarkTop10of100000Scores(b *testing.B) {
matches := make(search.DocumentMatchCollection, 0, 100000)
for i := 0; i < 100000; i++ {
matches = append(matches, &search.DocumentMatch{
ID: strconv.Itoa(i),
Score: rand.Float64(),
})
}
searcher := &stubSearcher{
matches: matches,
}
collector := NewTopScorerCollector(10)
b.ResetTimer()
err := collector.Collect(searcher)
if err != nil {
b.Fatal(err)
}
res := collector.Results()
for _, dm := range res {
b.Logf("%s - %f\n", dm.ID, dm.Score)
}
}

View file

@ -0,0 +1,60 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package collectors
import (
"github.com/blevesearch/bleve/search"
)
type stubSearcher struct {
index int
matches search.DocumentMatchCollection
}
func (ss *stubSearcher) Next() (*search.DocumentMatch, error) {
if ss.index < len(ss.matches) {
rv := ss.matches[ss.index]
ss.index++
return rv, nil
}
return nil, nil
}
func (ss *stubSearcher) Advance(ID string) (*search.DocumentMatch, error) {
for ss.index < len(ss.matches) && ss.matches[ss.index].ID < ID {
ss.index++
}
if ss.index < len(ss.matches) {
rv := ss.matches[ss.index]
ss.index++
return rv, nil
}
return nil, nil
}
func (ss *stubSearcher) Close() error {
return nil
}
func (ss *stubSearcher) Weight() float64 {
return 0.0
}
func (ss *stubSearcher) SetQueryNorm(float64) {
}
func (ss *stubSearcher) Count() uint64 {
return uint64(len(ss.matches))
}
func (ss *stubSearcher) Min() int {
return 0
}

View file

@ -0,0 +1,29 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"encoding/json"
"fmt"
)
type Explanation struct {
Value float64 `json:"value"`
Message string `json:"message"`
Children []*Explanation `json:"children,omitempty"`
}
func (expl *Explanation) String() string {
js, err := json.MarshalIndent(expl, "", " ")
if err != nil {
return fmt.Sprintf("error serializing explanation to json: %v", err)
}
return string(js)
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,147 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package facets
import (
"container/list"
"time"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric_util"
"github.com/blevesearch/bleve/search"
)
type dateTimeRange struct {
start time.Time
end time.Time
}
type DateTimeFacetBuilder struct {
size int
field string
termsCount map[string]int
total int
missing int
ranges map[string]*dateTimeRange
}
func NewDateTimeFacetBuilder(field string, size int) *DateTimeFacetBuilder {
return &DateTimeFacetBuilder{
size: size,
field: field,
termsCount: make(map[string]int),
ranges: make(map[string]*dateTimeRange, 0),
}
}
func (fb *DateTimeFacetBuilder) AddRange(name string, start, end time.Time) {
r := dateTimeRange{
start: start,
end: end,
}
fb.ranges[name] = &r
}
func (fb *DateTimeFacetBuilder) Update(ft index.FieldTerms) {
terms, ok := ft[fb.field]
if ok {
for _, term := range terms {
// only consider the values which are shifted 0
prefixCoded := numeric_util.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
t := time.Unix(0, i64)
// look at each of the ranges for a match
for rangeName, r := range fb.ranges {
if (r.start.IsZero() || t.After(r.start) || t.Equal(r.start)) && (r.end.IsZero() || t.Before(r.end)) {
existingCount, existed := fb.termsCount[rangeName]
if existed {
fb.termsCount[rangeName] = existingCount + 1
} else {
fb.termsCount[rangeName] = 1
}
fb.total++
}
}
}
}
}
} else {
fb.missing++
}
}
func (fb *DateTimeFacetBuilder) Result() *search.FacetResult {
rv := search.FacetResult{
Field: fb.field,
Total: fb.total,
Missing: fb.missing,
}
// FIXME better implementation needed here this is quick and dirty
topN := list.New()
// walk entries and find top N
OUTER:
for term, count := range fb.termsCount {
dateRange := fb.ranges[term]
tf := &search.DateRangeFacet{
Name: term,
Count: count,
}
if !dateRange.start.IsZero() {
start := dateRange.start.Format(time.RFC3339Nano)
tf.Start = &start
}
if !dateRange.end.IsZero() {
end := dateRange.end.Format(time.RFC3339Nano)
tf.End = &end
}
for e := topN.Front(); e != nil; e = e.Next() {
curr := e.Value.(*search.DateRangeFacet)
if tf.Count < curr.Count {
topN.InsertBefore(tf, e)
// if we just made the list too long
if topN.Len() > fb.size {
// remove the head
topN.Remove(topN.Front())
}
continue OUTER
}
}
// if we got to the end, we still have to add it
topN.PushBack(tf)
if topN.Len() > fb.size {
// remove the head
topN.Remove(topN.Front())
}
}
// we now have the list of the top N facets
rv.DateRanges = make([]*search.DateRangeFacet, topN.Len())
i := 0
notOther := 0
for e := topN.Back(); e != nil; e = e.Prev() {
rv.DateRanges[i] = e.Value.(*search.DateRangeFacet)
i++
notOther += e.Value.(*search.DateRangeFacet).Count
}
rv.Other = fb.total - notOther
return &rv
}

View file

@ -0,0 +1,120 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package facets
import (
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric_util"
"github.com/blevesearch/bleve/search"
)
type numericRange struct {
min *float64
max *float64
}
type NumericFacetBuilder struct {
size int
field string
termsCount map[string]int
total int
missing int
ranges map[string]*numericRange
}
func NewNumericFacetBuilder(field string, size int) *NumericFacetBuilder {
return &NumericFacetBuilder{
size: size,
field: field,
termsCount: make(map[string]int),
ranges: make(map[string]*numericRange, 0),
}
}
func (fb *NumericFacetBuilder) AddRange(name string, min, max *float64) {
r := numericRange{
min: min,
max: max,
}
fb.ranges[name] = &r
}
func (fb *NumericFacetBuilder) Update(ft index.FieldTerms) {
terms, ok := ft[fb.field]
if ok {
for _, term := range terms {
// only consider the values which are shifted 0
prefixCoded := numeric_util.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
f64 := numeric_util.Int64ToFloat64(i64)
// look at each of the ranges for a match
for rangeName, r := range fb.ranges {
if (r.min == nil || f64 >= *r.min) && (r.max == nil || f64 < *r.max) {
existingCount, existed := fb.termsCount[rangeName]
if existed {
fb.termsCount[rangeName] = existingCount + 1
} else {
fb.termsCount[rangeName] = 1
}
fb.total++
}
}
}
}
}
} else {
fb.missing++
}
}
func (fb *NumericFacetBuilder) Result() *search.FacetResult {
rv := search.FacetResult{
Field: fb.field,
Total: fb.total,
Missing: fb.missing,
}
rv.NumericRanges = make([]*search.NumericRangeFacet, 0, len(fb.termsCount))
for term, count := range fb.termsCount {
numericRange := fb.ranges[term]
tf := &search.NumericRangeFacet{
Name: term,
Count: count,
Min: numericRange.min,
Max: numericRange.max,
}
rv.NumericRanges = append(rv.NumericRanges, tf)
}
sort.Sort(rv.NumericRanges)
// we now have the list of the top N facets
if fb.size < len(rv.NumericRanges) {
rv.NumericRanges = rv.NumericRanges[:fb.size]
}
notOther := 0
for _, nr := range rv.NumericRanges {
notOther += nr.Count
}
rv.Other = fb.total - notOther
return &rv
}

View file

@ -0,0 +1,49 @@
package facets
import (
"strconv"
"testing"
"github.com/blevesearch/bleve/index"
nu "github.com/blevesearch/bleve/numeric_util"
)
var pcodedvalues []nu.PrefixCoded
func init() {
pcodedvalues = []nu.PrefixCoded{nu.PrefixCoded{0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1}, nu.PrefixCoded{0x20, 0x0, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f}, nu.PrefixCoded{0x20, 0x0, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7a, 0x1d, 0xa}, nu.PrefixCoded{0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x1, 0x16, 0x9, 0x4a, 0x7b}}
}
func BenchmarkNumericFacet10(b *testing.B) {
numericFacetN(b, 10)
}
func BenchmarkNumericFacet100(b *testing.B) {
numericFacetN(b, 100)
}
func BenchmarkNumericFacet1000(b *testing.B) {
numericFacetN(b, 1000)
}
func numericFacetN(b *testing.B, numTerms int) {
field := "test"
nfb := NewNumericFacetBuilder(field, numTerms)
min, max := 0.0, 9999999998.0
for i := 0; i <= numTerms; i++ {
max++
min--
nfb.AddRange("rangename"+strconv.Itoa(i), &min, &max)
for _, pv := range pcodedvalues {
nfb.Update(index.FieldTerms{field: []string{string(pv)}})
}
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
nfb.Result()
}
}

View file

@ -0,0 +1,86 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package facets
import (
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
type TermsFacetBuilder struct {
size int
field string
termsCount map[string]int
total int
missing int
}
func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder {
return &TermsFacetBuilder{
size: size,
field: field,
termsCount: make(map[string]int),
}
}
func (fb *TermsFacetBuilder) Update(ft index.FieldTerms) {
terms, ok := ft[fb.field]
if ok {
for _, term := range terms {
existingCount, existed := fb.termsCount[term]
if existed {
fb.termsCount[term] = existingCount + 1
} else {
fb.termsCount[term] = 1
}
fb.total++
}
} else {
fb.missing++
}
}
func (fb *TermsFacetBuilder) Result() *search.FacetResult {
rv := search.FacetResult{
Field: fb.field,
Total: fb.total,
Missing: fb.missing,
}
rv.Terms = make([]*search.TermFacet, 0, len(fb.termsCount))
for term, count := range fb.termsCount {
tf := &search.TermFacet{
Term: term,
Count: count,
}
rv.Terms = append(rv.Terms, tf)
}
sort.Sort(rv.Terms)
// we now have the list of the top N facets
trimTopN := fb.size
if trimTopN > len(rv.Terms) {
trimTopN = len(rv.Terms)
}
rv.Terms = rv.Terms[:trimTopN]
notOther := 0
for _, tf := range rv.Terms {
notOther += tf.Count
}
rv.Other = fb.total - notOther
return &rv
}

View file

@ -0,0 +1,58 @@
package facets
import (
"io/ioutil"
"regexp"
"testing"
"github.com/blevesearch/bleve/index"
)
var terms []string
func init() {
wsRegexp := regexp.MustCompile(`\W+`)
input, err := ioutil.ReadFile("benchmark_data.txt")
if err != nil {
panic(err)
}
terms = wsRegexp.Split(string(input), -1)
}
func BenchmarkTermsFacet10(b *testing.B) {
termsFacetN(b, 10)
}
func BenchmarkTermsFacet100(b *testing.B) {
termsFacetN(b, 100)
}
func BenchmarkTermsFacet1000(b *testing.B) {
termsFacetN(b, 1000)
}
func BenchmarkTermsFacet10000(b *testing.B) {
termsFacetN(b, 10000)
}
// func BenchmarkTermsFacet100000(b *testing.B) {
// termsFacetN(b, 100000)
// }
func termsFacetN(b *testing.B, numTerms int) {
field := "test"
termsLen := len(terms)
tfb := NewTermsFacetBuilder(field, 3)
i := 0
for len(tfb.termsCount) < numTerms && i <= termsLen {
j := i % termsLen
term := terms[j]
tfb.Update(index.FieldTerms{field: []string{term}})
i++
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
tfb.Result()
}
}

View file

@ -0,0 +1,212 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"sort"
"github.com/blevesearch/bleve/index"
)
type FacetBuilder interface {
Update(index.FieldTerms)
Result() *FacetResult
}
type FacetsBuilder struct {
indexReader index.IndexReader
facets map[string]FacetBuilder
}
func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {
return &FacetsBuilder{
indexReader: indexReader,
facets: make(map[string]FacetBuilder, 0),
}
}
func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) {
fb.facets[name] = facetBuilder
}
func (fb *FacetsBuilder) Update(docMatch *DocumentMatch) error {
fieldTerms, err := fb.indexReader.DocumentFieldTerms(docMatch.ID)
if err != nil {
return err
}
for _, facetBuilder := range fb.facets {
facetBuilder.Update(fieldTerms)
}
return nil
}
type TermFacet struct {
Term string `json:"term"`
Count int `json:"count"`
}
type TermFacets []*TermFacet
func (tf TermFacets) Add(termFacet *TermFacet) TermFacets {
for _, existingTerm := range tf {
if termFacet.Term == existingTerm.Term {
existingTerm.Count += termFacet.Count
return tf
}
}
// if we got here it wasn't already in the existing terms
tf = append(tf, termFacet)
return tf
}
func (tf TermFacets) Len() int { return len(tf) }
func (tf TermFacets) Swap(i, j int) { tf[i], tf[j] = tf[j], tf[i] }
func (tf TermFacets) Less(i, j int) bool { return tf[i].Count > tf[j].Count }
type NumericRangeFacet struct {
Name string `json:"name"`
Min *float64 `json:"min,omitempty"`
Max *float64 `json:"max,omitempty"`
Count int `json:"count"`
}
type NumericRangeFacets []*NumericRangeFacet
func (nrf NumericRangeFacets) Add(numericRangeFacet *NumericRangeFacet) NumericRangeFacets {
for _, existingNr := range nrf {
if numericRangeFacet.Min == existingNr.Min && numericRangeFacet.Max == existingNr.Max {
existingNr.Count += numericRangeFacet.Count
return nrf
}
}
// if we got here it wasn't already in the existing terms
nrf = append(nrf, numericRangeFacet)
return nrf
}
func (nrf NumericRangeFacets) Len() int { return len(nrf) }
func (nrf NumericRangeFacets) Swap(i, j int) { nrf[i], nrf[j] = nrf[j], nrf[i] }
func (nrf NumericRangeFacets) Less(i, j int) bool { return nrf[i].Count > nrf[j].Count }
type DateRangeFacet struct {
Name string `json:"name"`
Start *string `json:"start,omitempty"`
End *string `json:"end,omitempty"`
Count int `json:"count"`
}
type DateRangeFacets []*DateRangeFacet
func (drf DateRangeFacets) Add(dateRangeFacet *DateRangeFacet) DateRangeFacets {
for _, existingDr := range drf {
if dateRangeFacet.Start == existingDr.Start && dateRangeFacet.End == existingDr.End {
existingDr.Count += dateRangeFacet.Count
return drf
}
}
// if we got here it wasn't already in the existing terms
drf = append(drf, dateRangeFacet)
return drf
}
func (drf DateRangeFacets) Len() int { return len(drf) }
func (drf DateRangeFacets) Swap(i, j int) { drf[i], drf[j] = drf[j], drf[i] }
func (drf DateRangeFacets) Less(i, j int) bool { return drf[i].Count > drf[j].Count }
type FacetResult struct {
Field string `json:"field"`
Total int `json:"total"`
Missing int `json:"missing"`
Other int `json:"other"`
Terms TermFacets `json:"terms,omitempty"`
NumericRanges NumericRangeFacets `json:"numeric_ranges,omitempty"`
DateRanges DateRangeFacets `json:"date_ranges,omitempty"`
}
func (fr *FacetResult) Merge(other *FacetResult) {
fr.Total += other.Total
fr.Missing += other.Missing
fr.Other += other.Other
if fr.Terms != nil && other.Terms != nil {
for _, term := range other.Terms {
fr.Terms = fr.Terms.Add(term)
}
}
if fr.NumericRanges != nil && other.NumericRanges != nil {
for _, nr := range other.NumericRanges {
fr.NumericRanges = fr.NumericRanges.Add(nr)
}
}
if fr.DateRanges != nil && other.DateRanges != nil {
for _, dr := range other.DateRanges {
fr.DateRanges = fr.DateRanges.Add(dr)
}
}
}
func (fr *FacetResult) Fixup(size int) {
if fr.Terms != nil {
sort.Sort(fr.Terms)
if len(fr.Terms) > size {
moveToOther := fr.Terms[size:]
for _, mto := range moveToOther {
fr.Other += mto.Count
}
fr.Terms = fr.Terms[0:size]
}
} else if fr.NumericRanges != nil {
sort.Sort(fr.NumericRanges)
if len(fr.NumericRanges) > size {
moveToOther := fr.NumericRanges[size:]
for _, mto := range moveToOther {
fr.Other += mto.Count
}
fr.NumericRanges = fr.NumericRanges[0:size]
}
} else if fr.DateRanges != nil {
sort.Sort(fr.DateRanges)
if len(fr.DateRanges) > size {
moveToOther := fr.DateRanges[size:]
for _, mto := range moveToOther {
fr.Other += mto.Count
}
fr.DateRanges = fr.DateRanges[0:size]
}
}
}
type FacetResults map[string]*FacetResult
func (fr FacetResults) Merge(other FacetResults) {
for name, oFacetResult := range other {
facetResult, ok := fr[name]
if ok {
facetResult.Merge(oFacetResult)
} else {
fr[name] = oFacetResult
}
}
}
func (fr FacetResults) Fixup(name string, size int) {
facetResult, ok := fr[name]
if ok {
facetResult.Fixup(size)
}
}
func (fb *FacetsBuilder) Results() FacetResults {
fr := make(FacetResults)
for facetName, facetBuilder := range fb.facets {
facetResult := facetBuilder.Result()
fr[facetName] = facetResult
}
return fr
}

View file

@ -0,0 +1,301 @@
package search
import (
"reflect"
"testing"
)
func TestTermFacetResultsMerge(t *testing.T) {
fr1 := &FacetResult{
Field: "type",
Total: 100,
Missing: 25,
Other: 25,
Terms: []*TermFacet{
&TermFacet{
Term: "blog",
Count: 25,
},
&TermFacet{
Term: "comment",
Count: 24,
},
&TermFacet{
Term: "feedback",
Count: 1,
},
},
}
fr1Only := &FacetResult{
Field: "category",
Total: 97,
Missing: 22,
Other: 15,
Terms: []*TermFacet{
&TermFacet{
Term: "clothing",
Count: 35,
},
&TermFacet{
Term: "electronics",
Count: 25,
},
},
}
frs1 := FacetResults{
"types": fr1,
"categories": fr1Only,
}
fr2 := &FacetResult{
Field: "type",
Total: 100,
Missing: 25,
Other: 25,
Terms: []*TermFacet{
&TermFacet{
Term: "blog",
Count: 25,
},
&TermFacet{
Term: "comment",
Count: 22,
},
&TermFacet{
Term: "flag",
Count: 3,
},
},
}
frs2 := FacetResults{
"types": fr2,
}
expectedFr := &FacetResult{
Field: "type",
Total: 200,
Missing: 50,
Other: 51,
Terms: []*TermFacet{
&TermFacet{
Term: "blog",
Count: 50,
},
&TermFacet{
Term: "comment",
Count: 46,
},
&TermFacet{
Term: "flag",
Count: 3,
},
},
}
expectedFrs := FacetResults{
"types": expectedFr,
"categories": fr1Only,
}
frs1.Merge(frs2)
frs1.Fixup("types", 3)
if !reflect.DeepEqual(frs1, expectedFrs) {
t.Errorf("expected %v, got %v", expectedFrs, frs1)
}
}
func TestNumericFacetResultsMerge(t *testing.T) {
lowmed := 3.0
medhi := 6.0
hihigher := 9.0
fr1 := &FacetResult{
Field: "rating",
Total: 100,
Missing: 25,
Other: 25,
NumericRanges: []*NumericRangeFacet{
&NumericRangeFacet{
Name: "low",
Max: &lowmed,
Count: 25,
},
&NumericRangeFacet{
Name: "med",
Count: 24,
Max: &lowmed,
Min: &medhi,
},
&NumericRangeFacet{
Name: "hi",
Count: 1,
Min: &medhi,
Max: &hihigher,
},
},
}
frs1 := FacetResults{
"ratings": fr1,
}
fr2 := &FacetResult{
Field: "rating",
Total: 100,
Missing: 25,
Other: 25,
NumericRanges: []*NumericRangeFacet{
&NumericRangeFacet{
Name: "low",
Max: &lowmed,
Count: 25,
},
&NumericRangeFacet{
Name: "med",
Max: &lowmed,
Min: &medhi,
Count: 22,
},
&NumericRangeFacet{
Name: "highest",
Min: &hihigher,
Count: 3,
},
},
}
frs2 := FacetResults{
"ratings": fr2,
}
expectedFr := &FacetResult{
Field: "rating",
Total: 200,
Missing: 50,
Other: 51,
NumericRanges: []*NumericRangeFacet{
&NumericRangeFacet{
Name: "low",
Count: 50,
Max: &lowmed,
},
&NumericRangeFacet{
Name: "med",
Max: &lowmed,
Min: &medhi,
Count: 46,
},
&NumericRangeFacet{
Name: "highest",
Min: &hihigher,
Count: 3,
},
},
}
expectedFrs := FacetResults{
"ratings": expectedFr,
}
frs1.Merge(frs2)
frs1.Fixup("ratings", 3)
if !reflect.DeepEqual(frs1, expectedFrs) {
t.Errorf("expected %#v, got %#v", expectedFrs, frs1)
}
}
func TestDateFacetResultsMerge(t *testing.T) {
lowmed := "2010-01-01"
medhi := "2011-01-01"
hihigher := "2012-01-01"
fr1 := &FacetResult{
Field: "birthday",
Total: 100,
Missing: 25,
Other: 25,
DateRanges: []*DateRangeFacet{
&DateRangeFacet{
Name: "low",
End: &lowmed,
Count: 25,
},
&DateRangeFacet{
Name: "med",
Count: 24,
Start: &lowmed,
End: &medhi,
},
&DateRangeFacet{
Name: "hi",
Count: 1,
Start: &medhi,
End: &hihigher,
},
},
}
frs1 := FacetResults{
"birthdays": fr1,
}
fr2 := &FacetResult{
Field: "birthday",
Total: 100,
Missing: 25,
Other: 25,
DateRanges: []*DateRangeFacet{
&DateRangeFacet{
Name: "low",
End: &lowmed,
Count: 25,
},
&DateRangeFacet{
Name: "med",
Start: &lowmed,
End: &medhi,
Count: 22,
},
&DateRangeFacet{
Name: "highest",
Start: &hihigher,
Count: 3,
},
},
}
frs2 := FacetResults{
"birthdays": fr2,
}
expectedFr := &FacetResult{
Field: "birthday",
Total: 200,
Missing: 50,
Other: 51,
DateRanges: []*DateRangeFacet{
&DateRangeFacet{
Name: "low",
Count: 50,
End: &lowmed,
},
&DateRangeFacet{
Name: "med",
Start: &lowmed,
End: &medhi,
Count: 46,
},
&DateRangeFacet{
Name: "highest",
Start: &hihigher,
Count: 3,
},
},
}
expectedFrs := FacetResults{
"birthdays": expectedFr,
}
frs1.Merge(frs2)
frs1.Fixup("birthdays", 3)
if !reflect.DeepEqual(frs1, expectedFrs) {
t.Errorf("expected %#v, got %#v", expectedFrs, frs1)
}
}

View file

@ -0,0 +1,100 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package ansi
import (
"github.com/blevesearch/bleve/registry"
"github.com/blevesearch/bleve/search/highlight"
)
const Name = "ansi"
const DefaultAnsiHighlight = BgYellow
type FragmentFormatter struct {
color string
}
func NewFragmentFormatter(color string) *FragmentFormatter {
return &FragmentFormatter{
color: color,
}
}
func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations highlight.TermLocations) string {
rv := ""
curr := f.Start
for _, termLocation := range orderedTermLocations {
if termLocation == nil {
continue
}
if termLocation.Start < curr {
continue
}
if termLocation.End > f.End {
break
}
// add the stuff before this location
rv += string(f.Orig[curr:termLocation.Start])
// add the color
rv += a.color
// add the term itself
rv += string(f.Orig[termLocation.Start:termLocation.End])
// reset the color
rv += Reset
// update current
curr = termLocation.End
}
// add any remaining text after the last token
rv += string(f.Orig[curr:f.End])
return rv
}
// ANSI color control escape sequences.
// Shamelessly copied from https://github.com/sqp/godock/blob/master/libs/log/colors.go
const (
Reset = "\x1b[0m"
Bright = "\x1b[1m"
Dim = "\x1b[2m"
Underscore = "\x1b[4m"
Blink = "\x1b[5m"
Reverse = "\x1b[7m"
Hidden = "\x1b[8m"
FgBlack = "\x1b[30m"
FgRed = "\x1b[31m"
FgGreen = "\x1b[32m"
FgYellow = "\x1b[33m"
FgBlue = "\x1b[34m"
FgMagenta = "\x1b[35m"
FgCyan = "\x1b[36m"
FgWhite = "\x1b[37m"
BgBlack = "\x1b[40m"
BgRed = "\x1b[41m"
BgGreen = "\x1b[42m"
BgYellow = "\x1b[43m"
BgBlue = "\x1b[44m"
BgMagenta = "\x1b[45m"
BgCyan = "\x1b[46m"
BgWhite = "\x1b[47m"
)
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.FragmentFormatter, error) {
color := DefaultAnsiHighlight
colorVal, ok := config["color"].(string)
if ok {
color = colorVal
}
return NewFragmentFormatter(color), nil
}
func init() {
registry.RegisterFragmentFormatter(Name, Constructor)
}

View file

@ -0,0 +1,80 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package html
import (
"github.com/blevesearch/bleve/registry"
"github.com/blevesearch/bleve/search/highlight"
)
const Name = "html"
const defaultHTMLHighlightBefore = "<b>"
const defaultHTMLHighlightAfter = "</b>"
type FragmentFormatter struct {
before string
after string
}
func NewFragmentFormatter(before, after string) *FragmentFormatter {
return &FragmentFormatter{
before: before,
after: after,
}
}
func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations highlight.TermLocations) string {
rv := ""
curr := f.Start
for _, termLocation := range orderedTermLocations {
if termLocation == nil {
continue
}
if termLocation.Start < curr {
continue
}
if termLocation.End > f.End {
break
}
// add the stuff before this location
rv += string(f.Orig[curr:termLocation.Start])
// add the color
rv += a.before
// add the term itself
rv += string(f.Orig[termLocation.Start:termLocation.End])
// reset the color
rv += a.after
// update current
curr = termLocation.End
}
// add any remaining text after the last token
rv += string(f.Orig[curr:f.End])
return rv
}
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.FragmentFormatter, error) {
before := defaultHTMLHighlightBefore
beforeVal, ok := config["before"].(string)
if ok {
before = beforeVal
}
after := defaultHTMLHighlightAfter
afterVal, ok := config["after"].(string)
if ok {
after = afterVal
}
return NewFragmentFormatter(before, after), nil
}
func init() {
registry.RegisterFragmentFormatter(Name, Constructor)
}

View file

@ -0,0 +1,87 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package html
import (
"testing"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/highlight"
)
func TestHTMLFragmentFormatter1(t *testing.T) {
tests := []struct {
fragment *highlight.Fragment
tlm search.TermLocationMap
output string
}{
{
fragment: &highlight.Fragment{
Orig: []byte("the quick brown fox"),
Start: 0,
End: 19,
},
tlm: search.TermLocationMap{
"quick": search.Locations{
&search.Location{
Pos: 2,
Start: 4,
End: 9,
},
},
},
output: "the <b>quick</b> brown fox",
},
}
emHTMLFormatter := NewFragmentFormatter("<b>", "</b>")
for _, test := range tests {
otl := highlight.OrderTermLocations(test.tlm)
result := emHTMLFormatter.Format(test.fragment, otl)
if result != test.output {
t.Errorf("expected `%s`, got `%s`", test.output, result)
}
}
}
func TestHTMLFragmentFormatter2(t *testing.T) {
tests := []struct {
fragment *highlight.Fragment
tlm search.TermLocationMap
output string
}{
{
fragment: &highlight.Fragment{
Orig: []byte("the quick brown fox"),
Start: 0,
End: 19,
},
tlm: search.TermLocationMap{
"quick": search.Locations{
&search.Location{
Pos: 2,
Start: 4,
End: 9,
},
},
},
output: "the <em>quick</em> brown fox",
},
}
emHTMLFormatter := NewFragmentFormatter("<em>", "</em>")
for _, test := range tests {
otl := highlight.OrderTermLocations(test.tlm)
result := emHTMLFormatter.Format(test.fragment, otl)
if result != test.output {
t.Errorf("expected `%s`, got `%s`", test.output, result)
}
}
}

View file

@ -0,0 +1,138 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package simple
import (
"unicode/utf8"
"github.com/blevesearch/bleve/registry"
"github.com/blevesearch/bleve/search/highlight"
)
const Name = "simple"
const defaultFragmentSize = 200
type Fragmenter struct {
fragmentSize int
}
func NewFragmenter(fragmentSize int) *Fragmenter {
return &Fragmenter{
fragmentSize: fragmentSize,
}
}
func (s *Fragmenter) Fragment(orig []byte, ot highlight.TermLocations) []*highlight.Fragment {
rv := make([]*highlight.Fragment, 0)
maxbegin := 0
OUTER:
for currTermIndex, termLocation := range ot {
// start with this
// it should be the highest scoring fragment with this term first
start := termLocation.Start
end := start
used := 0
for end < len(orig) && used < s.fragmentSize {
r, size := utf8.DecodeRune(orig[end:])
if r == utf8.RuneError {
continue OUTER // bail
}
end += size
used += 1
}
// if we still have more characters available to us
// push back towards begining
// without cross maxbegin
for start > 0 && used < s.fragmentSize {
r, size := utf8.DecodeLastRune(orig[0:start])
if r == utf8.RuneError {
continue OUTER // bail
}
if start-size >= maxbegin {
start -= size
used += 1
} else {
break
}
}
// however, we'd rather have the tokens centered more in the frag
// lets try to do that as best we can, without affecting the score
// find the end of the last term in this fragment
minend := end
for _, innerTermLocation := range ot[currTermIndex:] {
if innerTermLocation.End > end {
break
}
minend = innerTermLocation.End
}
// find the smaller of the two rooms to move
roomToMove := utf8.RuneCount(orig[minend:end])
roomToMoveStart := 0
if start >= maxbegin {
roomToMoveStart = utf8.RuneCount(orig[maxbegin:start])
}
if roomToMoveStart < roomToMove {
roomToMove = roomToMoveStart
}
offset := roomToMove / 2
for offset > 0 {
r, size := utf8.DecodeLastRune(orig[0:start])
if r == utf8.RuneError {
continue OUTER // bail
}
start -= size
r, size = utf8.DecodeLastRune(orig[0:end])
if r == utf8.RuneError {
continue OUTER // bail
}
end -= size
offset--
}
rv = append(rv, &highlight.Fragment{Orig: orig, Start: start - offset, End: end - offset})
// set maxbegin to the end of the current term location
// so that next one won't back up to include it
maxbegin = termLocation.End
}
if len(ot) == 0 {
// if there were no terms to highlight
// produce a single fragment from the beginning
start := 0
end := start + s.fragmentSize
if end > len(orig) {
end = len(orig)
}
rv = append(rv, &highlight.Fragment{Orig: orig, Start: start, End: end})
}
return rv
}
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.Fragmenter, error) {
size := defaultFragmentSize
sizeVal, ok := config["size"].(float64)
if ok {
size = int(sizeVal)
}
return NewFragmenter(size), nil
}
func init() {
registry.RegisterFragmenter(Name, Constructor)
}

View file

@ -0,0 +1,295 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package simple
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/search/highlight"
)
func TestSimpleFragmenter(t *testing.T) {
tests := []struct {
orig []byte
fragments []*highlight.Fragment
ot highlight.TermLocations
size int
}{
{
orig: []byte("this is a test"),
fragments: []*highlight.Fragment{
&highlight.Fragment{
Orig: []byte("this is a test"),
Start: 0,
End: 14,
},
},
ot: highlight.TermLocations{
&highlight.TermLocation{
Term: "test",
Pos: 4,
Start: 10,
End: 14,
},
},
size: 100,
},
{
orig: []byte("0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"),
fragments: []*highlight.Fragment{
&highlight.Fragment{
Orig: []byte("0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"),
Start: 0,
End: 100,
},
},
ot: highlight.TermLocations{
&highlight.TermLocation{
Term: "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789",
Pos: 1,
Start: 0,
End: 100,
},
},
size: 100,
},
{
orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
fragments: []*highlight.Fragment{
&highlight.Fragment{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 0,
End: 100,
},
&highlight.Fragment{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 10,
End: 101,
},
&highlight.Fragment{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 20,
End: 101,
},
&highlight.Fragment{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 30,
End: 101,
},
&highlight.Fragment{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 40,
End: 101,
},
&highlight.Fragment{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 50,
End: 101,
},
&highlight.Fragment{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 60,
End: 101,
},
&highlight.Fragment{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 70,
End: 101,
},
&highlight.Fragment{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 80,
End: 101,
},
&highlight.Fragment{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 90,
End: 101,
},
},
ot: highlight.TermLocations{
&highlight.TermLocation{
Term: "0123456789",
Pos: 1,
Start: 0,
End: 10,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 2,
Start: 10,
End: 20,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 3,
Start: 20,
End: 30,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 4,
Start: 30,
End: 40,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 5,
Start: 40,
End: 50,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 6,
Start: 50,
End: 60,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 7,
Start: 60,
End: 70,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 8,
Start: 70,
End: 80,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 9,
Start: 80,
End: 90,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 10,
Start: 90,
End: 100,
},
},
size: 100,
},
{
orig: []byte("[[पानी का स्वाद]] [[नीलेश रघुवंशी]] का कविता संग्रह हैं। इस कृति के लिए उन्हें २००४ में [[केदार सम्मान]] से सम्मानित किया गया है।{{केदार सम्मान से सम्मानित कृतियाँ}}"),
fragments: []*highlight.Fragment{
&highlight.Fragment{
Orig: []byte("[[पानी का स्वाद]] [[नीलेश रघुवंशी]] का कविता संग्रह हैं। इस कृति के लिए उन्हें २००४ में [[केदार सम्मान]] से सम्मानित किया गया है।{{केदार सम्मान से सम्मानित कृतियाँ}}"),
Start: 0,
End: 411,
},
},
ot: highlight.TermLocations{
&highlight.TermLocation{
Term: "पानी",
Pos: 1,
Start: 2,
End: 14,
},
},
size: 200,
},
{
orig: []byte("交换机"),
fragments: []*highlight.Fragment{
&highlight.Fragment{
Orig: []byte("交换机"),
Start: 0,
End: 9,
},
&highlight.Fragment{
Orig: []byte("交换机"),
Start: 3,
End: 9,
},
},
ot: highlight.TermLocations{
&highlight.TermLocation{
Term: "交换",
Pos: 1,
Start: 0,
End: 6,
},
&highlight.TermLocation{
Term: "换机",
Pos: 2,
Start: 3,
End: 9,
},
},
size: 200,
},
}
for _, test := range tests {
fragmenter := NewFragmenter(test.size)
fragments := fragmenter.Fragment(test.orig, test.ot)
if !reflect.DeepEqual(fragments, test.fragments) {
t.Errorf("expected %#v, got %#v", test.fragments, fragments)
for _, fragment := range fragments {
t.Logf("frag: %s", fragment.Orig[fragment.Start:fragment.End])
t.Logf("frag: %d - %d", fragment.Start, fragment.End)
}
}
}
}
func TestSimpleFragmenterWithSize(t *testing.T) {
tests := []struct {
orig []byte
fragments []*highlight.Fragment
ot highlight.TermLocations
}{
{
orig: []byte("this is a test"),
fragments: []*highlight.Fragment{
&highlight.Fragment{
Orig: []byte("this is a test"),
Start: 0,
End: 5,
},
&highlight.Fragment{
Orig: []byte("this is a test"),
Start: 9,
End: 14,
},
},
ot: highlight.TermLocations{
&highlight.TermLocation{
Term: "this",
Pos: 1,
Start: 0,
End: 5,
},
&highlight.TermLocation{
Term: "test",
Pos: 4,
Start: 10,
End: 14,
},
},
},
}
fragmenter := NewFragmenter(5)
for _, test := range tests {
fragments := fragmenter.Fragment(test.orig, test.ot)
if !reflect.DeepEqual(fragments, test.fragments) {
t.Errorf("expected %#v, got %#v", test.fragments, fragments)
for _, fragment := range fragments {
t.Logf("frag: %#v", fragment)
}
}
}
}

View file

@ -0,0 +1,58 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package highlight
import (
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/search"
)
type Fragment struct {
Orig []byte
Start int
End int
Score float64
Index int // used by heap
}
func (f *Fragment) Overlaps(other *Fragment) bool {
if other.Start >= f.Start && other.Start < f.End {
return true
} else if f.Start >= other.Start && f.Start < other.End {
return true
}
return false
}
type Fragmenter interface {
Fragment([]byte, TermLocations) []*Fragment
}
type FragmentFormatter interface {
Format(f *Fragment, orderedTermLocations TermLocations) string
}
type FragmentScorer interface {
Score(f *Fragment) float64
}
type Highlighter interface {
Fragmenter() Fragmenter
SetFragmenter(Fragmenter)
FragmentFormatter() FragmentFormatter
SetFragmentFormatter(FragmentFormatter)
Separator() string
SetSeparator(string)
BestFragmentInField(*search.DocumentMatch, *document.Document, string) string
BestFragmentsInField(*search.DocumentMatch, *document.Document, string, int) []string
}

View file

@ -0,0 +1,44 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package simple
import (
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/highlight"
)
// FragmentScorer will score fragments by how many
// unique terms occur in the fragment with no regard for
// any boost values used in the original query
type FragmentScorer struct {
tlm search.TermLocationMap
}
func NewFragmentScorer(tlm search.TermLocationMap) *FragmentScorer {
return &FragmentScorer{
tlm: tlm,
}
}
func (s *FragmentScorer) Score(f *highlight.Fragment) {
score := 0.0
OUTER:
for _, locations := range s.tlm {
for _, location := range locations {
if int(location.Start) >= f.Start && int(location.End) <= f.End {
score += 1.0
// once we find a term in the fragment
// don't care about additional matches
continue OUTER
}
}
}
f.Score = score
}

View file

@ -0,0 +1,77 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package simple
import (
"testing"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/highlight"
)
func TestSimpleFragmentScorer(t *testing.T) {
tests := []struct {
fragment *highlight.Fragment
tlm search.TermLocationMap
score float64
}{
{
fragment: &highlight.Fragment{
Orig: []byte("cat in the hat"),
Start: 0,
End: 14,
},
tlm: search.TermLocationMap{
"cat": search.Locations{
&search.Location{
Pos: 0,
Start: 0,
End: 3,
},
},
},
score: 1,
},
{
fragment: &highlight.Fragment{
Orig: []byte("cat in the hat"),
Start: 0,
End: 14,
},
tlm: search.TermLocationMap{
"cat": search.Locations{
&search.Location{
Pos: 1,
Start: 0,
End: 3,
},
},
"hat": search.Locations{
&search.Location{
Pos: 4,
Start: 11,
End: 14,
},
},
},
score: 2,
},
}
for _, test := range tests {
scorer := NewFragmentScorer(test.tlm)
scorer.Score(test.fragment)
if test.fragment.Score != test.score {
t.Errorf("expected score %f, got %f", test.score, test.fragment.Score)
}
}
}

View file

@ -0,0 +1,208 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package simple
import (
"container/heap"
"fmt"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/registry"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/highlight"
)
const Name = "simple"
const defaultSeparator = "…"
type Highlighter struct {
fragmenter highlight.Fragmenter
formatter highlight.FragmentFormatter
sep string
}
func NewHighlighter(fragmenter highlight.Fragmenter, formatter highlight.FragmentFormatter, separator string) *Highlighter {
return &Highlighter{
fragmenter: fragmenter,
formatter: formatter,
sep: separator,
}
}
func (s *Highlighter) Fragmenter() highlight.Fragmenter {
return s.fragmenter
}
func (s *Highlighter) SetFragmenter(f highlight.Fragmenter) {
s.fragmenter = f
}
func (s *Highlighter) FragmentFormatter() highlight.FragmentFormatter {
return s.formatter
}
func (s *Highlighter) SetFragmentFormatter(f highlight.FragmentFormatter) {
s.formatter = f
}
func (s *Highlighter) Separator() string {
return s.sep
}
func (s *Highlighter) SetSeparator(sep string) {
s.sep = sep
}
func (s *Highlighter) BestFragmentInField(dm *search.DocumentMatch, doc *document.Document, field string) string {
fragments := s.BestFragmentsInField(dm, doc, field, 1)
if len(fragments) > 0 {
return fragments[0]
}
return ""
}
func (s *Highlighter) BestFragmentsInField(dm *search.DocumentMatch, doc *document.Document, field string, num int) []string {
tlm := dm.Locations[field]
orderedTermLocations := highlight.OrderTermLocations(tlm)
scorer := NewFragmentScorer(tlm)
// score the fragments and put them into a priority queue ordered by score
fq := make(FragmentQueue, 0)
heap.Init(&fq)
for _, f := range doc.Fields {
if f.Name() == field {
_, ok := f.(*document.TextField)
if ok {
fieldData := f.Value()
fragments := s.fragmenter.Fragment(fieldData, orderedTermLocations)
for _, fragment := range fragments {
scorer.Score(fragment)
heap.Push(&fq, fragment)
}
}
}
}
// now find the N best non-overlapping fragments
bestFragments := make([]*highlight.Fragment, 0)
if len(fq) > 0 {
candidate := heap.Pop(&fq)
OUTER:
for candidate != nil && len(bestFragments) < num {
// see if this overlaps with any of the best already identified
if len(bestFragments) > 0 {
for _, frag := range bestFragments {
if candidate.(*highlight.Fragment).Overlaps(frag) {
if len(fq) < 1 {
break OUTER
}
candidate = heap.Pop(&fq)
continue OUTER
}
}
bestFragments = append(bestFragments, candidate.(*highlight.Fragment))
} else {
bestFragments = append(bestFragments, candidate.(*highlight.Fragment))
}
if len(fq) < 1 {
break
}
candidate = heap.Pop(&fq)
}
}
// now that we have the best fragments, we can format them
orderedTermLocations.MergeOverlapping()
formattedFragments := make([]string, len(bestFragments))
for i, fragment := range bestFragments {
formattedFragments[i] = ""
if fragment.Start != 0 {
formattedFragments[i] += s.sep
}
formattedFragments[i] += s.formatter.Format(fragment, orderedTermLocations)
if fragment.End != len(fragment.Orig) {
formattedFragments[i] += s.sep
}
}
if dm.Fragments == nil {
dm.Fragments = make(search.FieldFragmentMap, 0)
}
if len(formattedFragments) > 0 {
dm.Fragments[field] = formattedFragments
}
return formattedFragments
}
// FragmentQueue implements heap.Interface and holds Items.
type FragmentQueue []*highlight.Fragment
func (fq FragmentQueue) Len() int { return len(fq) }
func (fq FragmentQueue) Less(i, j int) bool {
// We want Pop to give us the highest, not lowest, priority so we use greater-than here.
return fq[i].Score > fq[j].Score
}
func (fq FragmentQueue) Swap(i, j int) {
fq[i], fq[j] = fq[j], fq[i]
fq[i].Index = i
fq[j].Index = j
}
func (fq *FragmentQueue) Push(x interface{}) {
n := len(*fq)
item := x.(*highlight.Fragment)
item.Index = n
*fq = append(*fq, item)
}
func (fq *FragmentQueue) Pop() interface{} {
old := *fq
n := len(old)
item := old[n-1]
item.Index = -1 // for safety
*fq = old[0 : n-1]
return item
}
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.Highlighter, error) {
separator := defaultSeparator
separatorVal, ok := config["separator"].(string)
if ok {
separator = separatorVal
}
fragmenterName, ok := config["fragmenter"].(string)
if !ok {
return nil, fmt.Errorf("must specify fragmenter")
}
fragmenter, err := cache.FragmenterNamed(fragmenterName)
if err != nil {
return nil, fmt.Errorf("error building fragmenter: %v", err)
}
formatterName, ok := config["formatter"].(string)
if !ok {
return nil, fmt.Errorf("must specify formatter")
}
formatter, err := cache.FragmentFormatterNamed(formatterName)
if err != nil {
return nil, fmt.Errorf("error building fragment formatter: %v", err)
}
return NewHighlighter(fragmenter, formatter, separator), nil
}
func init() {
registry.RegisterHighlighter(Name, Constructor)
}

View file

@ -0,0 +1,164 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package simple
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/highlight/fragment_formatters/ansi"
sfrag "github.com/blevesearch/bleve/search/highlight/fragmenters/simple"
)
const (
reset = "\x1b[0m"
DefaultAnsiHighlight = "\x1b[43m"
)
func TestSimpleHighlighter(t *testing.T) {
fragmenter := sfrag.NewFragmenter(100)
formatter := ansi.NewFragmentFormatter(ansi.DefaultAnsiHighlight)
highlighter := NewHighlighter(fragmenter, formatter, defaultSeparator)
docMatch := search.DocumentMatch{
ID: "a",
Score: 1.0,
Locations: search.FieldTermLocationMap{
"desc": search.TermLocationMap{
"quick": search.Locations{
&search.Location{
Pos: 2,
Start: 4,
End: 9,
},
},
"fox": search.Locations{
&search.Location{
Pos: 4,
Start: 16,
End: 19,
},
},
},
},
}
expectedFragment := "the " + DefaultAnsiHighlight + "quick" + reset + " brown " + DefaultAnsiHighlight + "fox" + reset + " jumps over the lazy dog"
doc := document.NewDocument("a").AddField(document.NewTextField("desc", []uint64{}, []byte("the quick brown fox jumps over the lazy dog")))
fragment := highlighter.BestFragmentInField(&docMatch, doc, "desc")
if fragment != expectedFragment {
t.Errorf("expected `%s`, got `%s`", expectedFragment, fragment)
}
}
func TestSimpleHighlighterLonger(t *testing.T) {
fieldBytes := []byte(`Lorem ipsum dolor sit amet, consectetur adipiscing elit. Mauris sed semper nulla, sed pellentesque urna. Suspendisse potenti. Aliquam dignissim pulvinar erat vel ullamcorper. Nullam sed diam at dolor dapibus varius. Vestibulum at semper nunc. Integer ullamcorper enim ut nisi condimentum lacinia. Nulla ipsum ipsum, dictum in dapibus non, bibendum eget neque. Vestibulum malesuada erat quis malesuada dictum. Mauris luctus viverra lorem, nec hendrerit lacus lacinia ut. Donec suscipit sit amet nisi et dictum. Maecenas ultrices mollis diam, vel commodo libero lobortis nec. Nunc non dignissim dolor. Nulla non tempus risus, eget porttitor lectus. Suspendisse vitae gravida magna, a sagittis urna. Curabitur nec dui volutpat, hendrerit nisi non, adipiscing erat. Maecenas aliquet sem sit amet nibh ultrices accumsan.
Mauris lobortis sem sed blandit bibendum. In scelerisque eros sed metus aliquet convallis ac eget metus. Donec eget feugiat sem. Quisque venenatis, augue et blandit vulputate, velit odio viverra dolor, eu iaculis eros urna ut nunc. Duis faucibus mattis enim ut ultricies. Donec scelerisque volutpat elit, vel varius ante porttitor vel. Duis neque nulla, ultrices vel est id, molestie semper odio. Maecenas condimentum felis vitae nibh venenatis, ut feugiat risus vehicula. Suspendisse non sapien neque. Etiam et lorem consequat lorem aliquam ullamcorper. Pellentesque id vestibulum neque, at aliquam turpis. Aenean ultrices nec erat sit amet aliquam. Morbi eu sem in augue cursus ullamcorper a sed dolor. Integer et lobortis nulla, sit amet laoreet elit. In elementum, nibh nec volutpat pretium, lectus est pulvinar arcu, vehicula lobortis tellus sem id mauris. Maecenas ac blandit purus, sit amet scelerisque magna.
In hac habitasse platea dictumst. In lacinia elit non risus venenatis viverra. Nulla vestibulum laoreet turpis ac accumsan. Vivamus eros felis, rhoncus vel interdum bibendum, imperdiet nec diam. Etiam sed eros sed orci pellentesque sagittis. Praesent a fermentum leo. Vivamus ipsum risus, faucibus a dignissim ut, ullamcorper nec risus. Etiam quis adipiscing velit. Nam ac cursus arcu. Sed bibendum lectus quis massa dapibus dapibus. Vestibulum fermentum eros vitae hendrerit condimentum.
Fusce viverra eleifend iaculis. Maecenas tempor dictum cursus. Mauris faucibus, tortor in bibendum ornare, nibh lorem sollicitudin est, sed consectetur nulla dui imperdiet urna. Fusce aliquet odio fermentum massa mollis, id feugiat lacus egestas. Integer et eleifend metus. Duis neque tellus, vulputate nec dui eu, euismod sodales orci. Vivamus turpis erat, consectetur et pulvinar nec, ornare a quam. Maecenas fermentum, ligula vitae consectetur lobortis, mi lacus fermentum ante, ut semper lacus lectus porta orci. Nulla vehicula sodales eros, in iaculis ante laoreet at. Sed venenatis interdum metus, egestas scelerisque orci laoreet ut. Donec fermentum enim eget nibh blandit laoreet. Proin lacinia adipiscing lorem vel ornare. Donec ullamcorper massa elementum urna varius viverra. Proin pharetra, erat at feugiat rhoncus, velit eros condimentum mi, ac mattis sapien dolor non elit. Aenean viverra purus id tincidunt vulputate.
Etiam vel augue vel nisl commodo suscipit et ac nisl. Quisque eros diam, porttitor et aliquet sed, vulputate in odio. Aenean feugiat est quis neque vehicula, eget vulputate nunc tempor. Donec quis nulla ut quam feugiat consectetur ut et justo. Nulla congue, metus auctor facilisis scelerisque, nunc risus vulputate urna, in blandit urna nibh et neque. Etiam quis tortor ut nulla dignissim dictum non sed ligula. Vivamus accumsan ligula eget ipsum ultrices, a tincidunt urna blandit. In hac habitasse platea dictumst.`)
doc := document.NewDocument("a").AddField(document.NewTextField("full", []uint64{}, fieldBytes))
docMatch := search.DocumentMatch{
ID: "a",
Score: 1.0,
Locations: search.FieldTermLocationMap{
"full": search.TermLocationMap{
"metus": search.Locations{
&search.Location{
Pos: 0,
Start: 883,
End: 888,
},
&search.Location{
Pos: 0,
Start: 915,
End: 920,
},
&search.Location{
Pos: 0,
Start: 2492,
End: 2497,
},
&search.Location{
Pos: 0,
Start: 2822,
End: 2827,
},
&search.Location{
Pos: 0,
Start: 3417,
End: 3422,
},
},
"interdum": search.Locations{
&search.Location{
Pos: 0,
Start: 1891,
End: 1899,
},
&search.Location{
Pos: 0,
Start: 2813,
End: 2821,
},
},
"venenatis": search.Locations{
&search.Location{
Pos: 0,
Start: 954,
End: 963,
},
&search.Location{
Pos: 0,
Start: 1252,
End: 1261,
},
&search.Location{
Pos: 0,
Start: 1795,
End: 1804,
},
&search.Location{
Pos: 0,
Start: 2803,
End: 2812,
},
},
},
},
}
expectedFragments := []string{
"…eros, in iaculis ante laoreet at. Sed " + DefaultAnsiHighlight + "venenatis" + reset + " " + DefaultAnsiHighlight + "interdum" + reset + " " + DefaultAnsiHighlight + "metus" + reset + ", egestas scelerisque orci laoreet ut.…",
"… eros sed " + DefaultAnsiHighlight + "metus" + reset + " aliquet convallis ac eget " + DefaultAnsiHighlight + "metus" + reset + ". Donec eget feugiat sem. Quisque " + DefaultAnsiHighlight + "venenatis" + reset + ", augue et…",
"… odio. Maecenas condimentum felis vitae nibh " + DefaultAnsiHighlight + "venenatis" + reset + ", ut feugiat risus vehicula. Suspendisse non s…",
"… id feugiat lacus egestas. Integer et eleifend " + DefaultAnsiHighlight + "metus" + reset + ". Duis neque tellus, vulputate nec dui eu, euism…",
"… accumsan. Vivamus eros felis, rhoncus vel " + DefaultAnsiHighlight + "interdum" + reset + " bibendum, imperdiet nec diam. Etiam sed eros sed…",
}
fragmenter := sfrag.NewFragmenter(100)
formatter := ansi.NewFragmentFormatter(ansi.DefaultAnsiHighlight)
highlighter := NewHighlighter(fragmenter, formatter, defaultSeparator)
fragments := highlighter.BestFragmentsInField(&docMatch, doc, "full", 5)
if !reflect.DeepEqual(fragments, expectedFragments) {
t.Errorf("expected %#v, got %#v", expectedFragments, fragments)
}
}

View file

@ -0,0 +1,70 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package highlight
import (
"sort"
"github.com/blevesearch/bleve/search"
)
type TermLocation struct {
Term string
Pos int
Start int
End int
}
func (tl *TermLocation) Overlaps(other *TermLocation) bool {
if other.Start >= tl.Start && other.Start < tl.End {
return true
} else if tl.Start >= other.Start && tl.Start < other.End {
return true
}
return false
}
type TermLocations []*TermLocation
func (t TermLocations) Len() int { return len(t) }
func (t TermLocations) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
func (t TermLocations) Less(i, j int) bool { return t[i].Start < t[j].Start }
func (t TermLocations) MergeOverlapping() {
var lastTl *TermLocation
for i, tl := range t {
if lastTl == nil && tl != nil {
lastTl = tl
} else if lastTl != nil && tl != nil {
if lastTl.Overlaps(tl) {
// ok merge this with previous
lastTl.End = tl.End
t[i] = nil
}
}
}
}
func OrderTermLocations(tlm search.TermLocationMap) TermLocations {
rv := make(TermLocations, 0)
for term, locations := range tlm {
for _, location := range locations {
tl := TermLocation{
Term: term,
Pos: int(location.Pos),
Start: int(location.Start),
End: int(location.End),
}
rv = append(rv, &tl)
}
}
sort.Sort(rv)
return rv
}

View file

@ -0,0 +1,173 @@
package highlight
import (
"reflect"
"testing"
)
func TestTermLocationOverlaps(t *testing.T) {
tests := []struct {
left *TermLocation
right *TermLocation
expected bool
}{
{
left: &TermLocation{
Start: 0,
End: 5,
},
right: &TermLocation{
Start: 3,
End: 7,
},
expected: true,
},
{
left: &TermLocation{
Start: 0,
End: 5,
},
right: &TermLocation{
Start: 5,
End: 7,
},
expected: false,
},
{
left: &TermLocation{
Start: 0,
End: 5,
},
right: &TermLocation{
Start: 7,
End: 11,
},
expected: false,
},
}
for _, test := range tests {
actual := test.left.Overlaps(test.right)
if actual != test.expected {
t.Errorf("expected %t got %t for %#v", test.expected, actual, test)
}
}
}
func TestTermLocationsMergeOverlapping(t *testing.T) {
tests := []struct {
input TermLocations
output TermLocations
}{
{
input: TermLocations{},
output: TermLocations{},
},
{
input: TermLocations{
&TermLocation{
Start: 0,
End: 5,
},
&TermLocation{
Start: 7,
End: 11,
},
},
output: TermLocations{
&TermLocation{
Start: 0,
End: 5,
},
&TermLocation{
Start: 7,
End: 11,
},
},
},
{
input: TermLocations{
&TermLocation{
Start: 0,
End: 5,
},
&TermLocation{
Start: 4,
End: 11,
},
},
output: TermLocations{
&TermLocation{
Start: 0,
End: 11,
},
nil,
},
},
{
input: TermLocations{
&TermLocation{
Start: 0,
End: 5,
},
&TermLocation{
Start: 4,
End: 11,
},
&TermLocation{
Start: 9,
End: 13,
},
},
output: TermLocations{
&TermLocation{
Start: 0,
End: 13,
},
nil,
nil,
},
},
{
input: TermLocations{
&TermLocation{
Start: 0,
End: 5,
},
&TermLocation{
Start: 4,
End: 11,
},
&TermLocation{
Start: 9,
End: 13,
},
&TermLocation{
Start: 15,
End: 21,
},
},
output: TermLocations{
&TermLocation{
Start: 0,
End: 13,
},
nil,
nil,
&TermLocation{
Start: 15,
End: 21,
},
},
},
}
for _, test := range tests {
test.input.MergeOverlapping()
if !reflect.DeepEqual(test.input, test.output) {
t.Errorf("expected: %#v got %#v", test.output, test.input)
}
}
}

View file

@ -0,0 +1,91 @@
package search
import (
"math"
)
func LevenshteinDistance(a, b *string) int {
la := len(*a)
lb := len(*b)
d := make([]int, la+1)
var lastdiag, olddiag, temp int
for i := 1; i <= la; i++ {
d[i] = i
}
for i := 1; i <= lb; i++ {
d[0] = i
lastdiag = i - 1
for j := 1; j <= la; j++ {
olddiag = d[j]
min := d[j] + 1
if (d[j-1] + 1) < min {
min = d[j-1] + 1
}
if (*a)[j-1] == (*b)[i-1] {
temp = 0
} else {
temp = 1
}
if (lastdiag + temp) < min {
min = lastdiag + temp
}
d[j] = min
lastdiag = olddiag
}
}
return d[la]
}
// levenshteinDistanceMax same as levenshteinDistance but
// attempts to bail early once we know the distance
// will be greater than max
// in which case the first return val will be the max
// and the second will be true, indicating max was exceeded
func LevenshteinDistanceMax(a, b *string, max int) (int, bool) {
la := len(*a)
lb := len(*b)
ld := int(math.Abs(float64(la - lb)))
if ld > max {
return max, true
}
d := make([]int, la+1)
var lastdiag, olddiag, temp int
for i := 1; i <= la; i++ {
d[i] = i
}
for i := 1; i <= lb; i++ {
d[0] = i
lastdiag = i - 1
rowmin := max + 1
for j := 1; j <= la; j++ {
olddiag = d[j]
min := d[j] + 1
if (d[j-1] + 1) < min {
min = d[j-1] + 1
}
if (*a)[j-1] == (*b)[i-1] {
temp = 0
} else {
temp = 1
}
if (lastdiag + temp) < min {
min = lastdiag + temp
}
if min < rowmin {
rowmin = min
}
d[j] = min
lastdiag = olddiag
}
// after each row if rowmin isnt less than max stop
if rowmin > max {
return max, true
}
}
return d[la], false
}

View file

@ -0,0 +1,114 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"testing"
)
func TestLevenshteinDistance(t *testing.T) {
tests := []struct {
a string
b string
dist int
}{
{
"water",
"atec",
2,
},
{
"water",
"aphex",
4,
},
}
for _, test := range tests {
actual := LevenshteinDistance(&test.a, &test.b)
if actual != test.dist {
t.Errorf("expected %d, got %d for %s and %s", test.dist, actual, test.a, test.b)
}
}
}
func TestLevenshteinDistanceMax(t *testing.T) {
tests := []struct {
a string
b string
max int
dist int
exceeded bool
}{
{
a: "water",
b: "atec",
max: 1,
dist: 1,
exceeded: true,
},
{
a: "water",
b: "christmas",
max: 3,
dist: 3,
exceeded: true,
},
{
a: "water",
b: "water",
max: 1,
dist: 0,
exceeded: false,
},
}
for _, test := range tests {
actual, exceeded := LevenshteinDistanceMax(&test.a, &test.b, test.max)
if actual != test.dist || exceeded != test.exceeded {
t.Errorf("expected %d %t, got %d %t for %s and %s", test.dist, test.exceeded, actual, exceeded, test.a, test.b)
}
}
}
// 5 terms that are less than 2
// 5 terms that are more than 2
var benchmarkTerms = []string{
"watex",
"aters",
"wayer",
"wbter",
"yater",
"christmas",
"waterwaterwater",
"watcatdogfish",
"q",
"couchbase",
}
func BenchmarkLevenshteinDistance(b *testing.B) {
a := "water"
for i := 0; i < b.N; i++ {
for _, t := range benchmarkTerms {
LevenshteinDistance(&a, &t)
}
}
}
func BenchmarkLevenshteinDistanceMax(b *testing.B) {
a := "water"
for i := 0; i < b.N; i++ {
for _, t := range benchmarkTerms {
LevenshteinDistanceMax(&a, &t, 2)
}
}
}

View file

@ -0,0 +1,59 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package scorers
import (
"github.com/blevesearch/bleve/search"
)
type ConjunctionQueryScorer struct {
explain bool
}
func NewConjunctionQueryScorer(explain bool) *ConjunctionQueryScorer {
return &ConjunctionQueryScorer{
explain: explain,
}
}
func (s *ConjunctionQueryScorer) Score(constituents []*search.DocumentMatch) *search.DocumentMatch {
rv := search.DocumentMatch{
ID: constituents[0].ID,
}
var sum float64
var childrenExplanations []*search.Explanation
if s.explain {
childrenExplanations = make([]*search.Explanation, len(constituents))
}
locations := []search.FieldTermLocationMap{}
for i, docMatch := range constituents {
sum += docMatch.Score
if s.explain {
childrenExplanations[i] = docMatch.Expl
}
if docMatch.Locations != nil {
locations = append(locations, docMatch.Locations)
}
}
rv.Score = sum
if s.explain {
rv.Expl = &search.Explanation{Value: sum, Message: "sum of:", Children: childrenExplanations}
}
if len(locations) == 1 {
rv.Locations = locations[0]
} else if len(locations) > 1 {
rv.Locations = search.MergeLocations(locations)
}
return &rv
}

View file

@ -0,0 +1,103 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package scorers
import (
"fmt"
"github.com/blevesearch/bleve/search"
)
type ConstantScorer struct {
constant float64
boost float64
explain bool
queryNorm float64
queryWeight float64
queryWeightExplanation *search.Explanation
}
func NewConstantScorer(constant float64, boost float64, explain bool) *ConstantScorer {
rv := ConstantScorer{
explain: explain,
queryWeight: 1.0,
constant: constant,
boost: boost,
}
return &rv
}
func (s *ConstantScorer) Weight() float64 {
sum := s.boost
return sum * sum
}
func (s *ConstantScorer) SetQueryNorm(qnorm float64) {
s.queryNorm = qnorm
// update the query weight
s.queryWeight = s.boost * s.queryNorm
if s.explain {
childrenExplanations := make([]*search.Explanation, 2)
childrenExplanations[0] = &search.Explanation{
Value: s.boost,
Message: "boost",
}
childrenExplanations[1] = &search.Explanation{
Value: s.queryNorm,
Message: "queryNorm",
}
s.queryWeightExplanation = &search.Explanation{
Value: s.queryWeight,
Message: fmt.Sprintf("ConstantScore()^%f, product of:", s.boost),
Children: childrenExplanations,
}
}
}
func (s *ConstantScorer) Score(id string) *search.DocumentMatch {
var scoreExplanation *search.Explanation
score := s.constant
if s.explain {
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("ConstantScore()"),
}
}
// if the query weight isn't 1, multiply
if s.queryWeight != 1.0 {
score = score * s.queryWeight
if s.explain {
childExplanations := make([]*search.Explanation, 2)
childExplanations[0] = s.queryWeightExplanation
childExplanations[1] = scoreExplanation
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("weight(^%f), product of:", s.boost),
Children: childExplanations,
}
}
}
rv := search.DocumentMatch{
ID: id,
Score: score,
}
if s.explain {
rv.Expl = scoreExplanation
}
return &rv
}

View file

@ -0,0 +1,118 @@
// Copyright (c) 2013 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package scorers
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
func TestConstantScorer(t *testing.T) {
scorer := NewConstantScorer(1, 1, true)
tests := []struct {
termMatch *index.TermFieldDoc
result *search.DocumentMatch
}{
// test some simple math
{
termMatch: &index.TermFieldDoc{
ID: "one",
Freq: 1,
Norm: 1.0,
Vectors: []*index.TermFieldVector{
&index.TermFieldVector{
Field: "desc",
Pos: 1,
Start: 0,
End: 4,
},
},
},
result: &search.DocumentMatch{
ID: "one",
Score: 1.0,
Expl: &search.Explanation{
Value: 1.0,
Message: "ConstantScore()",
},
},
},
}
for _, test := range tests {
actual := scorer.Score(test.termMatch.ID)
if !reflect.DeepEqual(actual, test.result) {
t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch)
}
}
}
func TestConstantScorerWithQueryNorm(t *testing.T) {
scorer := NewConstantScorer(1, 1, true)
scorer.SetQueryNorm(2.0)
tests := []struct {
termMatch *index.TermFieldDoc
result *search.DocumentMatch
}{
{
termMatch: &index.TermFieldDoc{
ID: "one",
Freq: 1,
Norm: 1.0,
},
result: &search.DocumentMatch{
ID: "one",
Score: 2.0,
Expl: &search.Explanation{
Value: 2.0,
Message: "weight(^1.000000), product of:",
Children: []*search.Explanation{
&search.Explanation{
Value: 2.0,
Message: "ConstantScore()^1.000000, product of:",
Children: []*search.Explanation{
&search.Explanation{
Value: 1,
Message: "boost",
},
&search.Explanation{
Value: 2,
Message: "queryNorm",
},
},
},
&search.Explanation{
Value: 1.0,
Message: "ConstantScore()",
},
},
},
},
},
}
for _, test := range tests {
actual := scorer.Score(test.termMatch.ID)
if !reflect.DeepEqual(actual, test.result) {
t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch)
}
}
}

View file

@ -0,0 +1,71 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package scorers
import (
"fmt"
"github.com/blevesearch/bleve/search"
)
type DisjunctionQueryScorer struct {
explain bool
}
func NewDisjunctionQueryScorer(explain bool) *DisjunctionQueryScorer {
return &DisjunctionQueryScorer{
explain: explain,
}
}
func (s *DisjunctionQueryScorer) Score(constituents []*search.DocumentMatch, countMatch, countTotal int) *search.DocumentMatch {
rv := search.DocumentMatch{
ID: constituents[0].ID,
}
var sum float64
var childrenExplanations []*search.Explanation
if s.explain {
childrenExplanations = make([]*search.Explanation, len(constituents))
}
locations := []search.FieldTermLocationMap{}
for i, docMatch := range constituents {
sum += docMatch.Score
if s.explain {
childrenExplanations[i] = docMatch.Expl
}
if docMatch.Locations != nil {
locations = append(locations, docMatch.Locations)
}
}
var rawExpl *search.Explanation
if s.explain {
rawExpl = &search.Explanation{Value: sum, Message: "sum of:", Children: childrenExplanations}
}
coord := float64(countMatch) / float64(countTotal)
rv.Score = sum * coord
if s.explain {
ce := make([]*search.Explanation, 2)
ce[0] = rawExpl
ce[1] = &search.Explanation{Value: coord, Message: fmt.Sprintf("coord(%d/%d)", countMatch, countTotal)}
rv.Expl = &search.Explanation{Value: rv.Score, Message: "product of:", Children: ce}
}
if len(locations) == 1 {
rv.Locations = locations[0]
} else if len(locations) > 1 {
rv.Locations = search.MergeLocations(locations)
}
return &rv
}

View file

@ -0,0 +1,169 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package scorers
import (
"fmt"
"math"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
type TermQueryScorer struct {
queryTerm string
queryField string
queryBoost float64
docTerm uint64
docTotal uint64
idf float64
explain bool
idfExplanation *search.Explanation
queryNorm float64
queryWeight float64
queryWeightExplanation *search.Explanation
}
func NewTermQueryScorer(queryTerm string, queryField string, queryBoost float64, docTotal, docTerm uint64, explain bool) *TermQueryScorer {
rv := TermQueryScorer{
queryTerm: queryTerm,
queryField: queryField,
queryBoost: queryBoost,
docTerm: docTerm,
docTotal: docTotal,
idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)),
explain: explain,
queryWeight: 1.0,
}
if explain {
rv.idfExplanation = &search.Explanation{
Value: rv.idf,
Message: fmt.Sprintf("idf(docFreq=%d, maxDocs=%d)", docTerm, docTotal),
}
}
return &rv
}
func (s *TermQueryScorer) Weight() float64 {
sum := s.queryBoost * s.idf
return sum * sum
}
func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
s.queryNorm = qnorm
// update the query weight
s.queryWeight = s.queryBoost * s.idf * s.queryNorm
if s.explain {
childrenExplanations := make([]*search.Explanation, 3)
childrenExplanations[0] = &search.Explanation{
Value: s.queryBoost,
Message: "boost",
}
childrenExplanations[1] = s.idfExplanation
childrenExplanations[2] = &search.Explanation{
Value: s.queryNorm,
Message: "queryNorm",
}
s.queryWeightExplanation = &search.Explanation{
Value: s.queryWeight,
Message: fmt.Sprintf("queryWeight(%s:%s^%f), product of:", s.queryField, string(s.queryTerm), s.queryBoost),
Children: childrenExplanations,
}
}
}
func (s *TermQueryScorer) Score(termMatch *index.TermFieldDoc) *search.DocumentMatch {
var scoreExplanation *search.Explanation
// need to compute score
var tf float64
if termMatch.Freq < MaxSqrtCache {
tf = SqrtCache[int(termMatch.Freq)]
} else {
tf = math.Sqrt(float64(termMatch.Freq))
}
score := tf * termMatch.Norm * s.idf
if s.explain {
childrenExplanations := make([]*search.Explanation, 3)
childrenExplanations[0] = &search.Explanation{
Value: tf,
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, string(s.queryTerm), termMatch.Freq),
}
childrenExplanations[1] = &search.Explanation{
Value: termMatch.Norm,
Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID),
}
childrenExplanations[2] = s.idfExplanation
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, string(s.queryTerm), termMatch.ID),
Children: childrenExplanations,
}
}
// if the query weight isn't 1, multiply
if s.queryWeight != 1.0 {
score = score * s.queryWeight
if s.explain {
childExplanations := make([]*search.Explanation, 2)
childExplanations[0] = s.queryWeightExplanation
childExplanations[1] = scoreExplanation
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, string(s.queryTerm), s.queryBoost, termMatch.ID),
Children: childExplanations,
}
}
}
rv := search.DocumentMatch{
ID: termMatch.ID,
Score: score,
}
if s.explain {
rv.Expl = scoreExplanation
}
if termMatch.Vectors != nil && len(termMatch.Vectors) > 0 {
rv.Locations = make(search.FieldTermLocationMap)
for _, v := range termMatch.Vectors {
tlm := rv.Locations[v.Field]
if tlm == nil {
tlm = make(search.TermLocationMap)
}
loc := search.Location{
Pos: float64(v.Pos),
Start: float64(v.Start),
End: float64(v.End),
}
locations := tlm[s.queryTerm]
if locations == nil {
locations = make(search.Locations, 1)
locations[0] = &loc
} else {
locations = append(locations, &loc)
}
tlm[s.queryTerm] = locations
rv.Locations[v.Field] = tlm
}
}
return &rv
}

View file

@ -0,0 +1,241 @@
// Copyright (c) 2013 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package scorers
import (
"math"
"reflect"
"testing"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
func TestTermScorer(t *testing.T) {
var docTotal uint64 = 100
var docTerm uint64 = 9
var queryTerm = "beer"
var queryField = "desc"
var queryBoost = 1.0
scorer := NewTermQueryScorer(queryTerm, queryField, queryBoost, docTotal, docTerm, true)
idf := 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0))
tests := []struct {
termMatch *index.TermFieldDoc
result *search.DocumentMatch
}{
// test some simple math
{
termMatch: &index.TermFieldDoc{
ID: "one",
Freq: 1,
Norm: 1.0,
Vectors: []*index.TermFieldVector{
&index.TermFieldVector{
Field: "desc",
Pos: 1,
Start: 0,
End: 4,
},
},
},
result: &search.DocumentMatch{
ID: "one",
Score: math.Sqrt(1.0) * idf,
Expl: &search.Explanation{
Value: math.Sqrt(1.0) * idf,
Message: "fieldWeight(desc:beer in one), product of:",
Children: []*search.Explanation{
&search.Explanation{
Value: 1,
Message: "tf(termFreq(desc:beer)=1",
},
&search.Explanation{
Value: 1,
Message: "fieldNorm(field=desc, doc=one)",
},
&search.Explanation{
Value: idf,
Message: "idf(docFreq=9, maxDocs=100)",
},
},
},
Locations: search.FieldTermLocationMap{
"desc": search.TermLocationMap{
"beer": search.Locations{
&search.Location{
Pos: 1,
Start: 0,
End: 4,
},
},
},
},
},
},
// test the same thing again (score should be cached this time)
{
termMatch: &index.TermFieldDoc{
ID: "one",
Freq: 1,
Norm: 1.0,
},
result: &search.DocumentMatch{
ID: "one",
Score: math.Sqrt(1.0) * idf,
Expl: &search.Explanation{
Value: math.Sqrt(1.0) * idf,
Message: "fieldWeight(desc:beer in one), product of:",
Children: []*search.Explanation{
&search.Explanation{
Value: 1,
Message: "tf(termFreq(desc:beer)=1",
},
&search.Explanation{
Value: 1,
Message: "fieldNorm(field=desc, doc=one)",
},
&search.Explanation{
Value: idf,
Message: "idf(docFreq=9, maxDocs=100)",
},
},
},
},
},
// test a case where the sqrt isn't precalculated
{
termMatch: &index.TermFieldDoc{
ID: "one",
Freq: 65,
Norm: 1.0,
},
result: &search.DocumentMatch{
ID: "one",
Score: math.Sqrt(65) * idf,
Expl: &search.Explanation{
Value: math.Sqrt(65) * idf,
Message: "fieldWeight(desc:beer in one), product of:",
Children: []*search.Explanation{
&search.Explanation{
Value: math.Sqrt(65),
Message: "tf(termFreq(desc:beer)=65",
},
&search.Explanation{
Value: 1,
Message: "fieldNorm(field=desc, doc=one)",
},
&search.Explanation{
Value: idf,
Message: "idf(docFreq=9, maxDocs=100)",
},
},
},
},
},
}
for _, test := range tests {
actual := scorer.Score(test.termMatch)
if !reflect.DeepEqual(actual, test.result) {
t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch)
}
}
}
func TestTermScorerWithQueryNorm(t *testing.T) {
var docTotal uint64 = 100
var docTerm uint64 = 9
var queryTerm = "beer"
var queryField = "desc"
var queryBoost = 3.0
scorer := NewTermQueryScorer(queryTerm, queryField, queryBoost, docTotal, docTerm, true)
idf := 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0))
scorer.SetQueryNorm(2.0)
expectedQueryWeight := 3 * idf * 3 * idf
actualQueryWeight := scorer.Weight()
if expectedQueryWeight != actualQueryWeight {
t.Errorf("expected query weight %f, got %f", expectedQueryWeight, actualQueryWeight)
}
tests := []struct {
termMatch *index.TermFieldDoc
result *search.DocumentMatch
}{
{
termMatch: &index.TermFieldDoc{
ID: "one",
Freq: 1,
Norm: 1.0,
},
result: &search.DocumentMatch{
ID: "one",
Score: math.Sqrt(1.0) * idf * 3.0 * idf * 2.0,
Expl: &search.Explanation{
Value: math.Sqrt(1.0) * idf * 3.0 * idf * 2.0,
Message: "weight(desc:beer^3.000000 in one), product of:",
Children: []*search.Explanation{
&search.Explanation{
Value: 2.0 * idf * 3.0,
Message: "queryWeight(desc:beer^3.000000), product of:",
Children: []*search.Explanation{
&search.Explanation{
Value: 3,
Message: "boost",
},
&search.Explanation{
Value: idf,
Message: "idf(docFreq=9, maxDocs=100)",
},
&search.Explanation{
Value: 2,
Message: "queryNorm",
},
},
},
&search.Explanation{
Value: math.Sqrt(1.0) * idf,
Message: "fieldWeight(desc:beer in one), product of:",
Children: []*search.Explanation{
&search.Explanation{
Value: 1,
Message: "tf(termFreq(desc:beer)=1",
},
&search.Explanation{
Value: 1,
Message: "fieldNorm(field=desc, doc=one)",
},
&search.Explanation{
Value: idf,
Message: "idf(docFreq=9, maxDocs=100)",
},
},
},
},
},
},
},
}
for _, test := range tests {
actual := scorer.Score(test.termMatch)
if !reflect.DeepEqual(actual, test.result) {
t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch)
}
}
}

View file

@ -0,0 +1,25 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package scorers
import (
"math"
)
var SqrtCache map[int]float64
const MaxSqrtCache = 64
func init() {
SqrtCache = make(map[int]float64, MaxSqrtCache)
for i := 0; i < MaxSqrtCache; i++ {
SqrtCache[i] = math.Sqrt(float64(i))
}
}

81
vendor/github.com/blevesearch/bleve/search/search.go generated vendored Normal file
View file

@ -0,0 +1,81 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
type Location struct {
Pos float64 `json:"pos"`
Start float64 `json:"start"`
End float64 `json:"end"`
}
type Locations []*Location
type TermLocationMap map[string]Locations
func (t TermLocationMap) AddLocation(term string, location *Location) {
existingLocations, exists := t[term]
if exists {
existingLocations = append(existingLocations, location)
t[term] = existingLocations
} else {
locations := make(Locations, 1)
locations[0] = location
t[term] = locations
}
}
type FieldTermLocationMap map[string]TermLocationMap
type FieldFragmentMap map[string][]string
type DocumentMatch struct {
ID string `json:"id"`
Score float64 `json:"score"`
Expl *Explanation `json:"explanation,omitempty"`
Locations FieldTermLocationMap `json:"locations,omitempty"`
Fragments FieldFragmentMap `json:"fragments,omitempty"`
Fields map[string]interface{} `json:"fields,omitempty"`
}
func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) {
if dm.Fields == nil {
dm.Fields = make(map[string]interface{})
}
existingVal, ok := dm.Fields[name]
if ok {
valSlice, ok := existingVal.([]interface{})
if ok {
// already a slice, append to it
valSlice = append(valSlice, value)
} else {
// create a slice
valSlice = []interface{}{existingVal, value}
}
dm.Fields[name] = valSlice
} else {
dm.Fields[name] = value
}
}
type DocumentMatchCollection []*DocumentMatch
func (c DocumentMatchCollection) Len() int { return len(c) }
func (c DocumentMatchCollection) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
func (c DocumentMatchCollection) Less(i, j int) bool { return c[i].Score > c[j].Score }
type Searcher interface {
Next() (*DocumentMatch, error)
Advance(ID string) (*DocumentMatch, error)
Close() error
Weight() float64
SetQueryNorm(float64)
Count() uint64
Min() int
}

View file

@ -0,0 +1,80 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"math"
"regexp"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/analysis/tokenizers/regexp_tokenizer"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store/inmem"
"github.com/blevesearch/bleve/index/upside_down"
)
var twoDocIndex index.Index //= upside_down.NewUpsideDownCouch(inmem.MustOpen())
func init() {
inMemStore, _ := inmem.New()
analysisQueue := upside_down.NewAnalysisQueue(1)
twoDocIndex = upside_down.NewUpsideDownCouch(inMemStore, analysisQueue)
err := twoDocIndex.Open()
if err != nil {
panic(err)
}
for _, doc := range twoDocIndexDocs {
err := twoDocIndex.Update(doc)
if err != nil {
panic(err)
}
}
}
// create a simpler analyzer which will support these tests
var testAnalyzer = &analysis.Analyzer{
Tokenizer: regexp_tokenizer.NewRegexpTokenizer(regexp.MustCompile(`\w+`)),
}
// sets up some mock data used in many tests in this package
var twoDocIndexDescIndexingOptions = document.DefaultTextIndexingOptions | document.IncludeTermVectors
var twoDocIndexDocs = []*document.Document{
// must have 4/4 beer
document.NewDocument("1").
AddField(document.NewTextField("name", []uint64{}, []byte("marty"))).
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("beer beer beer beer"), twoDocIndexDescIndexingOptions, testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("street", []uint64{}, []byte("couchbase way"), testAnalyzer)),
// must have 1/4 beer
document.NewDocument("2").
AddField(document.NewTextField("name", []uint64{}, []byte("steve"))).
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("angst beer couch database"), twoDocIndexDescIndexingOptions, testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("street", []uint64{}, []byte("couchbase way"), testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("title", []uint64{}, []byte("mister"), testAnalyzer)),
// must have 1/4 beer
document.NewDocument("3").
AddField(document.NewTextField("name", []uint64{}, []byte("dustin"))).
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("apple beer column dank"), twoDocIndexDescIndexingOptions, testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("title", []uint64{}, []byte("mister"), testAnalyzer)),
// must have 65/65 beer
document.NewDocument("4").
AddField(document.NewTextField("name", []uint64{}, []byte("ravi"))).
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer"), twoDocIndexDescIndexingOptions, testAnalyzer)),
// must have 0/x beer
document.NewDocument("5").
AddField(document.NewTextField("name", []uint64{}, []byte("bobert"))).
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("water"), twoDocIndexDescIndexingOptions, testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("title", []uint64{}, []byte("mister"), testAnalyzer)),
}
func scoresCloseEnough(a, b float64) bool {
return math.Abs(a-b) < 0.001
}

View file

@ -0,0 +1,30 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"github.com/blevesearch/bleve/search"
)
type OrderedSearcherList []search.Searcher
// sort.Interface
func (otrl OrderedSearcherList) Len() int {
return len(otrl)
}
func (otrl OrderedSearcherList) Less(i, j int) bool {
return otrl[i].Count() < otrl[j].Count()
}
func (otrl OrderedSearcherList) Swap(i, j int) {
otrl[i], otrl[j] = otrl[j], otrl[i]
}

View file

@ -0,0 +1,335 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"math"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorers"
)
type BooleanSearcher struct {
initialized bool
indexReader index.IndexReader
mustSearcher search.Searcher
shouldSearcher search.Searcher
mustNotSearcher search.Searcher
queryNorm float64
currMust *search.DocumentMatch
currShould *search.DocumentMatch
currMustNot *search.DocumentMatch
currentID string
min uint64
scorer *scorers.ConjunctionQueryScorer
}
func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, explain bool) (*BooleanSearcher, error) {
// build our searcher
rv := BooleanSearcher{
indexReader: indexReader,
mustSearcher: mustSearcher,
shouldSearcher: shouldSearcher,
mustNotSearcher: mustNotSearcher,
scorer: scorers.NewConjunctionQueryScorer(explain),
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *BooleanSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
if s.mustSearcher != nil {
sumOfSquaredWeights += s.mustSearcher.Weight()
}
if s.shouldSearcher != nil {
sumOfSquaredWeights += s.shouldSearcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
if s.mustSearcher != nil {
s.mustSearcher.SetQueryNorm(s.queryNorm)
}
if s.shouldSearcher != nil {
s.shouldSearcher.SetQueryNorm(s.queryNorm)
}
}
func (s *BooleanSearcher) initSearchers() error {
var err error
// get all searchers pointing at their first match
if s.mustSearcher != nil {
s.currMust, err = s.mustSearcher.Next()
if err != nil {
return err
}
}
if s.shouldSearcher != nil {
s.currShould, err = s.shouldSearcher.Next()
if err != nil {
return err
}
}
if s.mustNotSearcher != nil {
s.currMustNot, err = s.mustNotSearcher.Next()
if err != nil {
return err
}
}
if s.mustSearcher != nil && s.currMust != nil {
s.currentID = s.currMust.ID
} else if s.mustSearcher == nil && s.currShould != nil {
s.currentID = s.currShould.ID
} else {
s.currentID = ""
}
s.initialized = true
return nil
}
func (s *BooleanSearcher) advanceNextMust() error {
var err error
if s.mustSearcher != nil {
s.currMust, err = s.mustSearcher.Next()
if err != nil {
return err
}
} else if s.mustSearcher == nil {
s.currShould, err = s.shouldSearcher.Next()
if err != nil {
return err
}
}
if s.mustSearcher != nil && s.currMust != nil {
s.currentID = s.currMust.ID
} else if s.mustSearcher == nil && s.currShould != nil {
s.currentID = s.currShould.ID
} else {
s.currentID = ""
}
return nil
}
func (s *BooleanSearcher) Weight() float64 {
var rv float64
if s.mustSearcher != nil {
rv += s.mustSearcher.Weight()
}
if s.shouldSearcher != nil {
rv += s.shouldSearcher.Weight()
}
return rv
}
func (s *BooleanSearcher) SetQueryNorm(qnorm float64) {
if s.mustSearcher != nil {
s.mustSearcher.SetQueryNorm(qnorm)
}
if s.shouldSearcher != nil {
s.shouldSearcher.SetQueryNorm(qnorm)
}
}
func (s *BooleanSearcher) Next() (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers()
if err != nil {
return nil, err
}
}
var err error
var rv *search.DocumentMatch
for s.currentID != "" {
if s.currMustNot != nil && s.currMustNot.ID < s.currentID {
// advance must not searcher to our candidate entry
s.currMustNot, err = s.mustNotSearcher.Advance(s.currentID)
if err != nil {
return nil, err
}
if s.currMustNot != nil && s.currMustNot.ID == s.currentID {
// the candidate is excluded
err = s.advanceNextMust()
if err != nil {
return nil, err
}
continue
}
} else if s.currMustNot != nil && s.currMustNot.ID == s.currentID {
// the candidate is excluded
err = s.advanceNextMust()
if err != nil {
return nil, err
}
continue
}
if s.currShould != nil && s.currShould.ID < s.currentID {
// advance should searcher to our candidate entry
s.currShould, err = s.shouldSearcher.Advance(s.currentID)
if err != nil {
return nil, err
}
if s.currShould != nil && s.currShould.ID == s.currentID {
// score bonus matches should
var cons []*search.DocumentMatch
if s.currMust != nil {
cons = []*search.DocumentMatch{
s.currMust,
s.currShould,
}
} else {
cons = []*search.DocumentMatch{
s.currShould,
}
}
rv = s.scorer.Score(cons)
err = s.advanceNextMust()
if err != nil {
return nil, err
}
break
} else if s.shouldSearcher.Min() == 0 {
// match is OK anyway
rv = s.scorer.Score([]*search.DocumentMatch{s.currMust})
err = s.advanceNextMust()
if err != nil {
return nil, err
}
break
}
} else if s.currShould != nil && s.currShould.ID == s.currentID {
// score bonus matches should
var cons []*search.DocumentMatch
if s.currMust != nil {
cons = []*search.DocumentMatch{
s.currMust,
s.currShould,
}
} else {
cons = []*search.DocumentMatch{
s.currShould,
}
}
rv = s.scorer.Score(cons)
err = s.advanceNextMust()
if err != nil {
return nil, err
}
break
} else if s.shouldSearcher == nil || s.shouldSearcher.Min() == 0 {
// match is OK anyway
rv = s.scorer.Score([]*search.DocumentMatch{s.currMust})
err = s.advanceNextMust()
if err != nil {
return nil, err
}
break
}
err = s.advanceNextMust()
if err != nil {
return nil, err
}
}
return rv, nil
}
func (s *BooleanSearcher) Advance(ID string) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers()
if err != nil {
return nil, err
}
}
var err error
if s.mustSearcher != nil {
s.currMust, err = s.mustSearcher.Advance(ID)
if err != nil {
return nil, err
}
}
if s.shouldSearcher != nil {
s.currShould, err = s.shouldSearcher.Advance(ID)
if err != nil {
return nil, err
}
}
if s.mustNotSearcher != nil {
s.currMustNot, err = s.mustNotSearcher.Advance(ID)
if err != nil {
return nil, err
}
}
if s.mustSearcher != nil && s.currMust != nil {
s.currentID = s.currMust.ID
} else if s.mustSearcher == nil && s.currShould != nil {
s.currentID = s.currShould.ID
} else {
s.currentID = ""
}
return s.Next()
}
func (s *BooleanSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
if s.mustSearcher != nil {
sum += s.mustSearcher.Count()
}
if s.shouldSearcher != nil {
sum += s.shouldSearcher.Count()
}
return sum
}
func (s *BooleanSearcher) Close() error {
if s.mustSearcher != nil {
err := s.mustSearcher.Close()
if err != nil {
return err
}
}
if s.shouldSearcher != nil {
err := s.shouldSearcher.Close()
if err != nil {
return err
}
}
if s.mustNotSearcher != nil {
err := s.mustNotSearcher.Close()
if err != nil {
return err
}
}
return nil
}
func (s *BooleanSearcher) Min() int {
return 0
}

View file

@ -0,0 +1,364 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"testing"
"github.com/blevesearch/bleve/search"
)
func TestBooleanSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
// test 0
beerTermSearcher, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher}, true)
if err != nil {
t.Fatal(err)
}
martyTermSearcher, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
shouldSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher}, 0, true)
if err != nil {
t.Fatal(err)
}
steveTermSearcher, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustNotSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher}, 0, true)
if err != nil {
t.Fatal(err)
}
booleanSearcher, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher, shouldSearcher, mustNotSearcher, true)
if err != nil {
t.Fatal(err)
}
// test 1
martyTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
shouldSearcher2, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher2, dustinTermSearcher2}, 0, true)
if err != nil {
t.Fatal(err)
}
steveTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustNotSearcher2, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher2}, 0, true)
if err != nil {
t.Fatal(err)
}
booleanSearcher2, err := NewBooleanSearcher(twoDocIndexReader, nil, shouldSearcher2, mustNotSearcher2, true)
if err != nil {
t.Fatal(err)
}
// test 2
steveTermSearcher3, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustNotSearcher3, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher3}, 0, true)
if err != nil {
t.Fatal(err)
}
booleanSearcher3, err := NewBooleanSearcher(twoDocIndexReader, nil, nil, mustNotSearcher3, true)
if err != nil {
t.Fatal(err)
}
// test 3
beerTermSearcher4, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustSearcher4, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher4}, true)
if err != nil {
t.Fatal(err)
}
steveTermSearcher4, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustNotSearcher4, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher4}, 0, true)
if err != nil {
t.Fatal(err)
}
booleanSearcher4, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher4, nil, mustNotSearcher4, true)
if err != nil {
t.Fatal(err)
}
// test 4
beerTermSearcher5, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustSearcher5, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher5}, true)
if err != nil {
t.Fatal(err)
}
steveTermSearcher5, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
martyTermSearcher5, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustNotSearcher5, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher5, martyTermSearcher5}, 0, true)
if err != nil {
t.Fatal(err)
}
booleanSearcher5, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher5, nil, mustNotSearcher5, true)
if err != nil {
t.Fatal(err)
}
// test 5
beerTermSearcher6, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustSearcher6, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher6}, true)
if err != nil {
t.Fatal(err)
}
martyTermSearcher6, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher6, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
shouldSearcher6, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher6, dustinTermSearcher6}, 2, true)
if err != nil {
t.Fatal(err)
}
booleanSearcher6, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher6, shouldSearcher6, nil, true)
if err != nil {
t.Fatal(err)
}
// test 6
beerTermSearcher7, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustSearcher7, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher7}, true)
if err != nil {
t.Fatal(err)
}
booleanSearcher7, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher7, nil, nil, true)
if err != nil {
t.Fatal(err)
}
martyTermSearcher7, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 5.0, true)
if err != nil {
t.Fatal(err)
}
conjunctionSearcher7, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher7, booleanSearcher7}, true)
// test 7
beerTermSearcher8, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustSearcher8, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher8}, true)
if err != nil {
t.Fatal(err)
}
martyTermSearcher8, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher8, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
shouldSearcher8, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher8, dustinTermSearcher8}, 0, true)
if err != nil {
t.Fatal(err)
}
steveTermSearcher8, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustNotSearcher8, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher8}, 0, true)
if err != nil {
t.Fatal(err)
}
booleanSearcher8, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher8, shouldSearcher8, mustNotSearcher8, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher8a, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 5.0, true)
if err != nil {
t.Fatal(err)
}
conjunctionSearcher8, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{booleanSearcher8, dustinTermSearcher8a}, true)
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
results []*search.DocumentMatch
}{
{
searcher: booleanSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 0.9818005051949021,
},
&search.DocumentMatch{
ID: "3",
Score: 0.808709699395535,
},
&search.DocumentMatch{
ID: "4",
Score: 0.34618161159873423,
},
},
},
{
searcher: booleanSearcher2,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 0.6775110856165737,
},
&search.DocumentMatch{
ID: "3",
Score: 0.6775110856165737,
},
},
},
// no MUST or SHOULD clauses yields no results
{
searcher: booleanSearcher3,
results: []*search.DocumentMatch{},
},
{
searcher: booleanSearcher4,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 1.0,
},
&search.DocumentMatch{
ID: "3",
Score: 0.5,
},
&search.DocumentMatch{
ID: "4",
Score: 1.0,
},
},
},
{
searcher: booleanSearcher5,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "3",
Score: 0.5,
},
&search.DocumentMatch{
ID: "4",
Score: 1.0,
},
},
},
{
searcher: booleanSearcher6,
results: []*search.DocumentMatch{},
},
// test a conjunction query with a nested boolean
{
searcher: conjunctionSearcher7,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 2.0097428702814377,
},
},
},
{
searcher: conjunctionSearcher8,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "3",
Score: 2.0681575785068107,
},
},
},
}
for testIndex, test := range tests {
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
next, err := test.searcher.Next()
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if next.ID != test.results[i].ID {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex)
}
if !scoresCloseEnough(next.Score, test.results[i].Score) {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
next, err = test.searcher.Next()
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}

View file

@ -0,0 +1,197 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"math"
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorers"
)
type ConjunctionSearcher struct {
initialized bool
indexReader index.IndexReader
searchers OrderedSearcherList
explain bool
queryNorm float64
currs []*search.DocumentMatch
currentID string
scorer *scorers.ConjunctionQueryScorer
}
func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, explain bool) (*ConjunctionSearcher, error) {
// build the downstream searchers
searchers := make(OrderedSearcherList, len(qsearchers))
for i, searcher := range qsearchers {
searchers[i] = searcher
}
// sort the searchers
sort.Sort(searchers)
// build our searcher
rv := ConjunctionSearcher{
indexReader: indexReader,
explain: explain,
searchers: searchers,
currs: make([]*search.DocumentMatch, len(searchers)),
scorer: scorers.NewConjunctionQueryScorer(explain),
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *ConjunctionSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
for _, termSearcher := range s.searchers {
sumOfSquaredWeights += termSearcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
for _, termSearcher := range s.searchers {
termSearcher.SetQueryNorm(s.queryNorm)
}
}
func (s *ConjunctionSearcher) initSearchers() error {
var err error
// get all searchers pointing at their first match
for i, termSearcher := range s.searchers {
s.currs[i], err = termSearcher.Next()
if err != nil {
return err
}
}
if len(s.currs) > 0 {
if s.currs[0] != nil {
s.currentID = s.currs[0].ID
} else {
s.currentID = ""
}
}
s.initialized = true
return nil
}
func (s *ConjunctionSearcher) Weight() float64 {
var rv float64
for _, searcher := range s.searchers {
rv += searcher.Weight()
}
return rv
}
func (s *ConjunctionSearcher) SetQueryNorm(qnorm float64) {
for _, searcher := range s.searchers {
searcher.SetQueryNorm(qnorm)
}
}
func (s *ConjunctionSearcher) Next() (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers()
if err != nil {
return nil, err
}
}
var rv *search.DocumentMatch
var err error
OUTER:
for s.currentID != "" {
for i, termSearcher := range s.searchers {
if s.currs[i] != nil && s.currs[i].ID != s.currentID {
if s.currentID < s.currs[i].ID {
s.currentID = s.currs[i].ID
continue OUTER
}
// this reader doesn't have the currentID, try to advance
s.currs[i], err = termSearcher.Advance(s.currentID)
if err != nil {
return nil, err
}
if s.currs[i] == nil {
s.currentID = ""
continue OUTER
}
if s.currs[i].ID != s.currentID {
// we just advanced, so it doesn't match, it must be greater
// no need to call next
s.currentID = s.currs[i].ID
continue OUTER
}
} else if s.currs[i] == nil {
s.currentID = ""
continue OUTER
}
}
// if we get here, a doc matched all readers, sum the score and add it
rv = s.scorer.Score(s.currs)
// prepare for next entry
s.currs[0], err = s.searchers[0].Next()
if err != nil {
return nil, err
}
if s.currs[0] == nil {
s.currentID = ""
} else {
s.currentID = s.currs[0].ID
}
// don't continue now, wait for the next call to Next()
break
}
return rv, nil
}
func (s *ConjunctionSearcher) Advance(ID string) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers()
if err != nil {
return nil, err
}
}
var err error
for i, searcher := range s.searchers {
s.currs[i], err = searcher.Advance(ID)
if err != nil {
return nil, err
}
}
s.currentID = ID
return s.Next()
}
func (s *ConjunctionSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
for _, searcher := range s.searchers {
sum += searcher.Count()
}
return sum
}
func (s *ConjunctionSearcher) Close() error {
for _, searcher := range s.searchers {
err := searcher.Close()
if err != nil {
return err
}
}
return nil
}
func (s *ConjunctionSearcher) Min() int {
return 0
}

View file

@ -0,0 +1,212 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"testing"
"github.com/blevesearch/bleve/search"
)
func TestConjunctionSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
// test 0
beerTermSearcher, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
martyTermSearcher, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 5.0, true)
if err != nil {
t.Fatal(err)
}
beerAndMartySearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher, martyTermSearcher}, true)
if err != nil {
t.Fatal(err)
}
// test 1
angstTermSearcher, err := NewTermSearcher(twoDocIndexReader, "angst", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
beerTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
angstAndBeerSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{angstTermSearcher, beerTermSearcher2}, true)
if err != nil {
t.Fatal(err)
}
// test 2
beerTermSearcher3, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
jackTermSearcher, err := NewTermSearcher(twoDocIndexReader, "jack", "name", 5.0, true)
if err != nil {
t.Fatal(err)
}
beerAndJackSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher3, jackTermSearcher}, true)
if err != nil {
t.Fatal(err)
}
// test 3
beerTermSearcher4, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
misterTermSearcher, err := NewTermSearcher(twoDocIndexReader, "mister", "title", 5.0, true)
if err != nil {
t.Fatal(err)
}
beerAndMisterSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher4, misterTermSearcher}, true)
if err != nil {
t.Fatal(err)
}
// test 4
couchbaseTermSearcher, err := NewTermSearcher(twoDocIndexReader, "couchbase", "street", 1.0, true)
if err != nil {
t.Fatal(err)
}
misterTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "mister", "title", 5.0, true)
if err != nil {
t.Fatal(err)
}
couchbaseAndMisterSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{couchbaseTermSearcher, misterTermSearcher2}, true)
if err != nil {
t.Fatal(err)
}
// test 5
beerTermSearcher5, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 5.0, true)
if err != nil {
t.Fatal(err)
}
couchbaseTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "couchbase", "street", 1.0, true)
if err != nil {
t.Fatal(err)
}
misterTermSearcher3, err := NewTermSearcher(twoDocIndexReader, "mister", "title", 5.0, true)
if err != nil {
t.Fatal(err)
}
couchbaseAndMisterSearcher2, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{couchbaseTermSearcher2, misterTermSearcher3}, true)
if err != nil {
t.Fatal(err)
}
beerAndCouchbaseAndMisterSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher5, couchbaseAndMisterSearcher2}, true)
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
results []*search.DocumentMatch
}{
{
searcher: beerAndMartySearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 2.0097428702814377,
},
},
},
{
searcher: angstAndBeerSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "2",
Score: 1.0807601687084403,
},
},
},
{
searcher: beerAndJackSearcher,
results: []*search.DocumentMatch{},
},
{
searcher: beerAndMisterSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "2",
Score: 1.2877980334016337,
},
&search.DocumentMatch{
ID: "3",
Score: 1.2877980334016337,
},
},
},
{
searcher: couchbaseAndMisterSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "2",
Score: 1.4436599157093672,
},
},
},
{
searcher: beerAndCouchbaseAndMisterSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "2",
Score: 1.441614953806971,
},
},
},
}
for testIndex, test := range tests {
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
next, err := test.searcher.Next()
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if next.ID != test.results[i].ID {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex)
}
if !scoresCloseEnough(next.Score, test.results[i].Score) {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
next, err = test.searcher.Next()
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}

View file

@ -0,0 +1,189 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"math"
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorers"
)
type DisjunctionSearcher struct {
initialized bool
indexReader index.IndexReader
searchers OrderedSearcherList
queryNorm float64
currs []*search.DocumentMatch
currentID string
scorer *scorers.DisjunctionQueryScorer
min float64
}
func NewDisjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, min float64, explain bool) (*DisjunctionSearcher, error) {
// build the downstream searchers
searchers := make(OrderedSearcherList, len(qsearchers))
for i, searcher := range qsearchers {
searchers[i] = searcher
}
// sort the searchers
sort.Sort(sort.Reverse(searchers))
// build our searcher
rv := DisjunctionSearcher{
indexReader: indexReader,
searchers: searchers,
currs: make([]*search.DocumentMatch, len(searchers)),
scorer: scorers.NewDisjunctionQueryScorer(explain),
min: min,
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *DisjunctionSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
for _, termSearcher := range s.searchers {
sumOfSquaredWeights += termSearcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
for _, termSearcher := range s.searchers {
termSearcher.SetQueryNorm(s.queryNorm)
}
}
func (s *DisjunctionSearcher) initSearchers() error {
var err error
// get all searchers pointing at their first match
for i, termSearcher := range s.searchers {
s.currs[i], err = termSearcher.Next()
if err != nil {
return err
}
}
s.currentID = s.nextSmallestID()
s.initialized = true
return nil
}
func (s *DisjunctionSearcher) nextSmallestID() string {
rv := ""
for _, curr := range s.currs {
if curr != nil && (curr.ID < rv || rv == "") {
rv = curr.ID
}
}
return rv
}
func (s *DisjunctionSearcher) Weight() float64 {
var rv float64
for _, searcher := range s.searchers {
rv += searcher.Weight()
}
return rv
}
func (s *DisjunctionSearcher) SetQueryNorm(qnorm float64) {
for _, searcher := range s.searchers {
searcher.SetQueryNorm(qnorm)
}
}
func (s *DisjunctionSearcher) Next() (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers()
if err != nil {
return nil, err
}
}
var err error
var rv *search.DocumentMatch
matching := make([]*search.DocumentMatch, 0, len(s.searchers))
found := false
for !found && s.currentID != "" {
for _, curr := range s.currs {
if curr != nil && curr.ID == s.currentID {
matching = append(matching, curr)
}
}
if len(matching) >= int(s.min) {
found = true
// score this match
rv = s.scorer.Score(matching, len(matching), len(s.searchers))
}
// reset matching
matching = make([]*search.DocumentMatch, 0)
// invoke next on all the matching searchers
for i, curr := range s.currs {
if curr != nil && curr.ID == s.currentID {
searcher := s.searchers[i]
s.currs[i], err = searcher.Next()
if err != nil {
return nil, err
}
}
}
s.currentID = s.nextSmallestID()
}
return rv, nil
}
func (s *DisjunctionSearcher) Advance(ID string) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers()
if err != nil {
return nil, err
}
}
// get all searchers pointing at their first match
var err error
for i, termSearcher := range s.searchers {
s.currs[i], err = termSearcher.Advance(ID)
if err != nil {
return nil, err
}
}
s.currentID = s.nextSmallestID()
return s.Next()
}
func (s *DisjunctionSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
for _, searcher := range s.searchers {
sum += searcher.Count()
}
return sum
}
func (s *DisjunctionSearcher) Close() error {
for _, searcher := range s.searchers {
err := searcher.Close()
if err != nil {
return err
}
}
return nil
}
func (s *DisjunctionSearcher) Min() int {
return int(s.min) // FIXME just make this an int
}

View file

@ -0,0 +1,168 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"testing"
"github.com/blevesearch/bleve/search"
)
func TestDisjunctionSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
martyTermSearcher, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
martyOrDustinSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher}, 0, true)
if err != nil {
t.Fatal(err)
}
martyTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
martyOrDustinSearcher2, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher2, dustinTermSearcher2}, 0, true)
if err != nil {
t.Fatal(err)
}
raviTermSearcher, err := NewTermSearcher(twoDocIndexReader, "ravi", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
nestedRaviOrMartyOrDustinSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{raviTermSearcher, martyOrDustinSearcher2}, 0, true)
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
results []*search.DocumentMatch
}{
{
searcher: martyOrDustinSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 0.6775110856165737,
},
&search.DocumentMatch{
ID: "3",
Score: 0.6775110856165737,
},
},
},
// test a nested disjunction
{
searcher: nestedRaviOrMartyOrDustinSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 0.2765927424732821,
},
&search.DocumentMatch{
ID: "3",
Score: 0.2765927424732821,
},
&search.DocumentMatch{
ID: "4",
Score: 0.5531854849465642,
},
},
},
}
for testIndex, test := range tests {
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
next, err := test.searcher.Next()
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if next.ID != test.results[i].ID {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex)
}
if !scoresCloseEnough(next.Score, test.results[i].Score) {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
next, err = test.searcher.Next()
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}
func TestDisjunctionAdvance(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
martyTermSearcher, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
martyOrDustinSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher}, 0, true)
if err != nil {
t.Fatal(err)
}
match, err := martyOrDustinSearcher.Advance("3")
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if match == nil {
t.Errorf("expected 3, got nil")
}
}

View file

@ -0,0 +1,112 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
type FuzzySearcher struct {
indexReader index.IndexReader
term string
prefix int
fuzziness int
field string
explain bool
searcher *DisjunctionSearcher
}
func NewFuzzySearcher(indexReader index.IndexReader, term string, prefix, fuzziness int, field string, boost float64, explain bool) (*FuzzySearcher, error) {
prefixTerm := ""
for i, r := range term {
if i < prefix {
prefixTerm += string(r)
}
}
// find the terms with this prefix
var fieldDict index.FieldDict
var err error
if len(prefixTerm) > 0 {
fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
} else {
fieldDict, err = indexReader.FieldDict(field)
}
// enumerate terms and check levenshtein distance
candidateTerms := make([]string, 0)
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
ld, exceeded := search.LevenshteinDistanceMax(&term, &tfd.Term, fuzziness)
if !exceeded && ld <= fuzziness {
candidateTerms = append(candidateTerms, tfd.Term)
}
tfd, err = fieldDict.Next()
}
if err != nil {
return nil, err
}
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, 0, 25)
for _, cterm := range candidateTerms {
qsearcher, err := NewTermSearcher(indexReader, cterm, field, 1.0, explain)
if err != nil {
return nil, err
}
qsearchers = append(qsearchers, qsearcher)
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
if err != nil {
return nil, err
}
return &FuzzySearcher{
indexReader: indexReader,
term: term,
prefix: prefix,
fuzziness: fuzziness,
field: field,
explain: explain,
searcher: searcher,
}, nil
}
func (s *FuzzySearcher) Count() uint64 {
return s.searcher.Count()
}
func (s *FuzzySearcher) Weight() float64 {
return s.searcher.Weight()
}
func (s *FuzzySearcher) SetQueryNorm(qnorm float64) {
s.searcher.SetQueryNorm(qnorm)
}
func (s *FuzzySearcher) Next() (*search.DocumentMatch, error) {
return s.searcher.Next()
}
func (s *FuzzySearcher) Advance(ID string) (*search.DocumentMatch, error) {
return s.searcher.Next()
}
func (s *FuzzySearcher) Close() error {
return s.searcher.Close()
}
func (s *FuzzySearcher) Min() int {
return 0
}

View file

@ -0,0 +1,89 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorers"
)
type MatchAllSearcher struct {
indexReader index.IndexReader
reader index.DocIDReader
scorer *scorers.ConstantScorer
}
func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, explain bool) (*MatchAllSearcher, error) {
reader, err := indexReader.DocIDReader("", "")
if err != nil {
return nil, err
}
scorer := scorers.NewConstantScorer(1.0, boost, explain)
return &MatchAllSearcher{
indexReader: indexReader,
reader: reader,
scorer: scorer,
}, nil
}
func (s *MatchAllSearcher) Count() uint64 {
return s.indexReader.DocCount()
}
func (s *MatchAllSearcher) Weight() float64 {
return s.scorer.Weight()
}
func (s *MatchAllSearcher) SetQueryNorm(qnorm float64) {
s.scorer.SetQueryNorm(qnorm)
}
func (s *MatchAllSearcher) Next() (*search.DocumentMatch, error) {
id, err := s.reader.Next()
if err != nil {
return nil, err
}
if id == "" {
return nil, nil
}
// score match
docMatch := s.scorer.Score(id)
// return doc match
return docMatch, nil
}
func (s *MatchAllSearcher) Advance(ID string) (*search.DocumentMatch, error) {
id, err := s.reader.Advance(ID)
if err != nil {
return nil, err
}
if id == "" {
return nil, nil
}
// score match
docMatch := s.scorer.Score(id)
// return doc match
return docMatch, nil
}
func (s *MatchAllSearcher) Close() error {
return s.reader.Close()
}
func (s *MatchAllSearcher) Min() int {
return 0
}

View file

@ -0,0 +1,134 @@
// Copyright (c) 2013 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"testing"
"github.com/blevesearch/bleve/search"
)
func TestMatchAllSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
allSearcher, err := NewMatchAllSearcher(twoDocIndexReader, 1.0, true)
if err != nil {
t.Fatal(err)
}
allSearcher2, err := NewMatchAllSearcher(twoDocIndexReader, 1.2, true)
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
queryNorm float64
results []*search.DocumentMatch
}{
{
searcher: allSearcher,
queryNorm: 1.0,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 1.0,
},
&search.DocumentMatch{
ID: "2",
Score: 1.0,
},
&search.DocumentMatch{
ID: "3",
Score: 1.0,
},
&search.DocumentMatch{
ID: "4",
Score: 1.0,
},
&search.DocumentMatch{
ID: "5",
Score: 1.0,
},
},
},
{
searcher: allSearcher2,
queryNorm: 0.8333333,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 1.0,
},
&search.DocumentMatch{
ID: "2",
Score: 1.0,
},
&search.DocumentMatch{
ID: "3",
Score: 1.0,
},
&search.DocumentMatch{
ID: "4",
Score: 1.0,
},
&search.DocumentMatch{
ID: "5",
Score: 1.0,
},
},
},
}
for testIndex, test := range tests {
if test.queryNorm != 1.0 {
test.searcher.SetQueryNorm(test.queryNorm)
}
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
next, err := test.searcher.Next()
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if next.ID != test.results[i].ID {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex)
}
if !scoresCloseEnough(next.Score, test.results[i].Score) {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
next, err = test.searcher.Next()
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}

View file

@ -0,0 +1,53 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
type MatchNoneSearcher struct {
indexReader index.IndexReader
}
func NewMatchNoneSearcher(indexReader index.IndexReader) (*MatchNoneSearcher, error) {
return &MatchNoneSearcher{
indexReader: indexReader,
}, nil
}
func (s *MatchNoneSearcher) Count() uint64 {
return uint64(0)
}
func (s *MatchNoneSearcher) Weight() float64 {
return 0.0
}
func (s *MatchNoneSearcher) SetQueryNorm(qnorm float64) {
}
func (s *MatchNoneSearcher) Next() (*search.DocumentMatch, error) {
return nil, nil
}
func (s *MatchNoneSearcher) Advance(ID string) (*search.DocumentMatch, error) {
return nil, nil
}
func (s *MatchNoneSearcher) Close() error {
return nil
}
func (s *MatchNoneSearcher) Min() int {
return 0
}

View file

@ -0,0 +1,76 @@
// Copyright (c) 2013 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"testing"
"github.com/blevesearch/bleve/search"
)
func TestMatchNoneSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
noneSearcher, err := NewMatchNoneSearcher(twoDocIndexReader)
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
results []*search.DocumentMatch
}{
{
searcher: noneSearcher,
results: []*search.DocumentMatch{},
},
}
for testIndex, test := range tests {
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
next, err := test.searcher.Next()
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if next.ID != test.results[i].ID {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex)
}
if !scoresCloseEnough(next.Score, test.results[i].Score) {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
next, err = test.searcher.Next()
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}

View file

@ -0,0 +1,214 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"bytes"
"math"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric_util"
"github.com/blevesearch/bleve/search"
)
type NumericRangeSearcher struct {
indexReader index.IndexReader
min *float64
max *float64
field string
explain bool
searcher *DisjunctionSearcher
}
func NewNumericRangeSearcher(indexReader index.IndexReader, min *float64, max *float64, inclusiveMin, inclusiveMax *bool, field string, boost float64, explain bool) (*NumericRangeSearcher, error) {
// account for unbounded edges
if min == nil {
negInf := math.Inf(-1)
min = &negInf
}
if max == nil {
Inf := math.Inf(1)
max = &Inf
}
if inclusiveMin == nil {
defaultInclusiveMin := true
inclusiveMin = &defaultInclusiveMin
}
if inclusiveMax == nil {
defaultInclusiveMax := false
inclusiveMax = &defaultInclusiveMax
}
// find all the ranges
minInt64 := numeric_util.Float64ToInt64(*min)
if !*inclusiveMin && minInt64 != math.MaxInt64 {
minInt64++
}
maxInt64 := numeric_util.Float64ToInt64(*max)
if !*inclusiveMax && maxInt64 != math.MinInt64 {
maxInt64--
}
// FIXME hard-coded precision, should match field declaration
termRanges := splitInt64Range(minInt64, maxInt64, 4)
terms := termRanges.Enumerate()
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, len(terms))
for i, term := range terms {
var err error
qsearchers[i], err = NewTermSearcher(indexReader, string(term), field, 1.0, explain)
if err != nil {
return nil, err
}
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
if err != nil {
return nil, err
}
return &NumericRangeSearcher{
indexReader: indexReader,
min: min,
max: max,
field: field,
explain: explain,
searcher: searcher,
}, nil
}
func (s *NumericRangeSearcher) Count() uint64 {
return s.searcher.Count()
}
func (s *NumericRangeSearcher) Weight() float64 {
return s.searcher.Weight()
}
func (s *NumericRangeSearcher) SetQueryNorm(qnorm float64) {
s.searcher.SetQueryNorm(qnorm)
}
func (s *NumericRangeSearcher) Next() (*search.DocumentMatch, error) {
return s.searcher.Next()
}
func (s *NumericRangeSearcher) Advance(ID string) (*search.DocumentMatch, error) {
return s.searcher.Advance(ID)
}
func (s *NumericRangeSearcher) Close() error {
return s.searcher.Close()
}
type termRange struct {
startTerm []byte
endTerm []byte
}
func (t *termRange) Enumerate() [][]byte {
rv := make([][]byte, 0)
next := t.startTerm
for bytes.Compare(next, t.endTerm) <= 0 {
rv = append(rv, next)
next = incrementBytes(next)
}
return rv
}
func incrementBytes(in []byte) []byte {
rv := make([]byte, len(in))
copy(rv, in)
for i := len(rv) - 1; i >= 0; i-- {
rv[i] = rv[i] + 1
if rv[i] != 0 {
// didnt' overflow, so stop
break
}
}
return rv
}
type termRanges []*termRange
func (tr termRanges) Enumerate() [][]byte {
rv := make([][]byte, 0)
for _, tri := range tr {
trie := tri.Enumerate()
rv = append(rv, trie...)
}
return rv
}
func splitInt64Range(minBound, maxBound int64, precisionStep uint) termRanges {
rv := make(termRanges, 0)
if minBound > maxBound {
return rv
}
for shift := uint(0); ; shift += precisionStep {
diff := int64(1) << (shift + precisionStep)
mask := ((int64(1) << precisionStep) - int64(1)) << shift
hasLower := (minBound & mask) != int64(0)
hasUpper := (maxBound & mask) != mask
var nextMinBound int64
if hasLower {
nextMinBound = (minBound + diff) &^ mask
} else {
nextMinBound = minBound &^ mask
}
var nextMaxBound int64
if hasUpper {
nextMaxBound = (maxBound - diff) &^ mask
} else {
nextMaxBound = maxBound &^ mask
}
lowerWrapped := nextMinBound < minBound
upperWrapped := nextMaxBound > maxBound
if shift+precisionStep >= 64 || nextMinBound > nextMaxBound || lowerWrapped || upperWrapped {
// We are in the lowest precision or the next precision is not available.
rv = append(rv, newRange(minBound, maxBound, shift))
// exit the split recursion loop
break
}
if hasLower {
rv = append(rv, newRange(minBound, minBound|mask, shift))
}
if hasUpper {
rv = append(rv, newRange(maxBound&^mask, maxBound, shift))
}
// recurse to next precision
minBound = nextMinBound
maxBound = nextMaxBound
}
return rv
}
func newRange(minBound, maxBound int64, shift uint) *termRange {
maxBound |= (int64(1) << shift) - int64(1)
minBytes := numeric_util.MustNewPrefixCodedInt64(minBound, shift)
maxBytes := numeric_util.MustNewPrefixCodedInt64(maxBound, shift)
return newRangeBytes(minBytes, maxBytes)
}
func newRangeBytes(minBytes, maxBytes []byte) *termRange {
return &termRange{
startTerm: minBytes,
endTerm: maxBytes,
}
}
func (s *NumericRangeSearcher) Min() int {
return 0
}

View file

@ -0,0 +1,55 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/numeric_util"
)
func TestSplitRange(t *testing.T) {
min := numeric_util.Float64ToInt64(1.0)
max := numeric_util.Float64ToInt64(5.0)
ranges := splitInt64Range(min, max, 4)
enumerated := ranges.Enumerate()
if len(enumerated) != 135 {
t.Errorf("expected 135 terms, got %d", len(enumerated))
}
}
func TestIncrementBytes(t *testing.T) {
tests := []struct {
in []byte
out []byte
}{
{
in: []byte{0},
out: []byte{1},
},
{
in: []byte{0, 0},
out: []byte{0, 1},
},
{
in: []byte{0, 255},
out: []byte{1, 0},
},
}
for _, test := range tests {
actual := incrementBytes(test.in)
if !reflect.DeepEqual(actual, test.out) {
t.Errorf("expected %#v, got %#v", test.out, actual)
}
}
}

View file

@ -0,0 +1,197 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"math"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
type PhraseSearcher struct {
initialized bool
indexReader index.IndexReader
mustSearcher *ConjunctionSearcher
queryNorm float64
currMust *search.DocumentMatch
slop int
terms []string
}
func NewPhraseSearcher(indexReader index.IndexReader, mustSearcher *ConjunctionSearcher, terms []string) (*PhraseSearcher, error) {
// build our searcher
rv := PhraseSearcher{
indexReader: indexReader,
mustSearcher: mustSearcher,
terms: terms,
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *PhraseSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
if s.mustSearcher != nil {
sumOfSquaredWeights += s.mustSearcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
if s.mustSearcher != nil {
s.mustSearcher.SetQueryNorm(s.queryNorm)
}
}
func (s *PhraseSearcher) initSearchers() error {
var err error
// get all searchers pointing at their first match
if s.mustSearcher != nil {
s.currMust, err = s.mustSearcher.Next()
if err != nil {
return err
}
}
s.initialized = true
return nil
}
func (s *PhraseSearcher) advanceNextMust() error {
var err error
if s.mustSearcher != nil {
s.currMust, err = s.mustSearcher.Next()
if err != nil {
return err
}
}
return nil
}
func (s *PhraseSearcher) Weight() float64 {
var rv float64
rv += s.mustSearcher.Weight()
return rv
}
func (s *PhraseSearcher) SetQueryNorm(qnorm float64) {
s.mustSearcher.SetQueryNorm(qnorm)
}
func (s *PhraseSearcher) Next() (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers()
if err != nil {
return nil, err
}
}
var rv *search.DocumentMatch
for s.currMust != nil {
rvftlm := make(search.FieldTermLocationMap, 0)
freq := 0
firstTerm := s.terms[0]
for field, termLocMap := range s.currMust.Locations {
rvtlm := make(search.TermLocationMap, 0)
locations, ok := termLocMap[firstTerm]
if ok {
OUTER:
for _, location := range locations {
crvtlm := make(search.TermLocationMap, 0)
INNER:
for i := 0; i < len(s.terms); i++ {
nextTerm := s.terms[i]
if nextTerm != "" {
// look through all these term locations
// to try and find the correct offsets
nextLocations, ok := termLocMap[nextTerm]
if ok {
for _, nextLocation := range nextLocations {
if nextLocation.Pos == location.Pos+float64(i) {
// found a location match for this term
crvtlm.AddLocation(nextTerm, nextLocation)
continue INNER
}
}
// if we got here we didn't find a location match for this term
continue OUTER
} else {
continue OUTER
}
}
}
// if we got here all the terms matched
freq++
search.MergeTermLocationMaps(rvtlm, crvtlm)
rvftlm[field] = rvtlm
}
}
}
if freq > 0 {
// return match
rv = s.currMust
rv.Locations = rvftlm
err := s.advanceNextMust()
if err != nil {
return nil, err
}
return rv, nil
}
err := s.advanceNextMust()
if err != nil {
return nil, err
}
}
return nil, nil
}
func (s *PhraseSearcher) Advance(ID string) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers()
if err != nil {
return nil, err
}
}
var err error
s.currMust, err = s.mustSearcher.Advance(ID)
if err != nil {
return nil, err
}
return s.Next()
}
func (s *PhraseSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
sum += s.mustSearcher.Count()
return sum
}
func (s *PhraseSearcher) Close() error {
if s.mustSearcher != nil {
err := s.mustSearcher.Close()
if err != nil {
return err
}
}
return nil
}
func (s *PhraseSearcher) Min() int {
return 0
}

View file

@ -0,0 +1,93 @@
// Copyright (c) 2013 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"testing"
"github.com/blevesearch/bleve/search"
)
func TestPhraseSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
angstTermSearcher, err := NewTermSearcher(twoDocIndexReader, "angst", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
beerTermSearcher, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{angstTermSearcher, beerTermSearcher}, true)
if err != nil {
t.Fatal(err)
}
phraseSearcher, err := NewPhraseSearcher(twoDocIndexReader, mustSearcher, []string{"angst", "beer"})
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
results []*search.DocumentMatch
}{
{
searcher: phraseSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "2",
Score: 1.0807601687084403,
},
},
},
}
for testIndex, test := range tests {
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
next, err := test.searcher.Next()
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if next.ID != test.results[i].ID {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex)
}
if next.Score != test.results[i].Score {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
next, err = test.searcher.Next()
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}

View file

@ -0,0 +1,108 @@
// Copyright (c) 2015 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"regexp"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
type RegexpSearcher struct {
indexReader index.IndexReader
pattern *regexp.Regexp
field string
explain bool
searcher *DisjunctionSearcher
}
func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp, field string, boost float64, explain bool) (*RegexpSearcher, error) {
prefixTerm, complete := pattern.LiteralPrefix()
candidateTerms := make([]string, 0)
if complete {
// there is no pattern
candidateTerms = append(candidateTerms, prefixTerm)
} else {
var fieldDict index.FieldDict
var err error
if len(prefixTerm) > 0 {
fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
} else {
fieldDict, err = indexReader.FieldDict(field)
}
// enumerate the terms and check against regexp
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
if pattern.MatchString(tfd.Term) {
candidateTerms = append(candidateTerms, tfd.Term)
}
tfd, err = fieldDict.Next()
}
if err != nil {
return nil, err
}
}
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, 0, 25)
for _, cterm := range candidateTerms {
qsearcher, err := NewTermSearcher(indexReader, cterm, field, 1.0, explain)
if err != nil {
return nil, err
}
qsearchers = append(qsearchers, qsearcher)
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
if err != nil {
return nil, err
}
return &RegexpSearcher{
indexReader: indexReader,
pattern: pattern,
field: field,
explain: explain,
searcher: searcher,
}, nil
}
func (s *RegexpSearcher) Count() uint64 {
return s.searcher.Count()
}
func (s *RegexpSearcher) Weight() float64 {
return s.searcher.Weight()
}
func (s *RegexpSearcher) SetQueryNorm(qnorm float64) {
s.searcher.SetQueryNorm(qnorm)
}
func (s *RegexpSearcher) Next() (*search.DocumentMatch, error) {
return s.searcher.Next()
}
func (s *RegexpSearcher) Advance(ID string) (*search.DocumentMatch, error) {
return s.searcher.Next()
}
func (s *RegexpSearcher) Close() error {
return s.searcher.Close()
}
func (s *RegexpSearcher) Min() int {
return 0
}

View file

@ -0,0 +1,110 @@
// Copyright (c) 2015 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"regexp"
"testing"
"github.com/blevesearch/bleve/search"
)
func TestRegexpSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
pattern, err := regexp.Compile("ma.*")
if err != nil {
t.Fatal(err)
}
regexpSearcher, err := NewRegexpSearcher(twoDocIndexReader, pattern, "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
patternCo, err := regexp.Compile("co.*")
if err != nil {
t.Fatal(err)
}
regexpSearcherCo, err := NewRegexpSearcher(twoDocIndexReader, patternCo, "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
results []*search.DocumentMatch
}{
{
searcher: regexpSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 1.916290731874155,
},
},
},
{
searcher: regexpSearcherCo,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "2",
Score: 0.33875554280828685,
},
&search.DocumentMatch{
ID: "3",
Score: 0.33875554280828685,
},
},
},
}
for testIndex, test := range tests {
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
next, err := test.searcher.Next()
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if next.ID != test.results[i].ID {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex)
}
if next.Score != test.results[i].Score {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
next, err = test.searcher.Next()
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}

View file

@ -0,0 +1,95 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorers"
)
type TermSearcher struct {
indexReader index.IndexReader
term string
field string
explain bool
reader index.TermFieldReader
scorer *scorers.TermQueryScorer
}
func NewTermSearcher(indexReader index.IndexReader, term string, field string, boost float64, explain bool) (*TermSearcher, error) {
reader, err := indexReader.TermFieldReader([]byte(term), field)
if err != nil {
return nil, err
}
scorer := scorers.NewTermQueryScorer(term, field, boost, indexReader.DocCount(), reader.Count(), explain)
return &TermSearcher{
indexReader: indexReader,
term: term,
field: field,
explain: explain,
reader: reader,
scorer: scorer,
}, nil
}
func (s *TermSearcher) Count() uint64 {
return s.reader.Count()
}
func (s *TermSearcher) Weight() float64 {
return s.scorer.Weight()
}
func (s *TermSearcher) SetQueryNorm(qnorm float64) {
s.scorer.SetQueryNorm(qnorm)
}
func (s *TermSearcher) Next() (*search.DocumentMatch, error) {
termMatch, err := s.reader.Next()
if err != nil {
return nil, err
}
if termMatch == nil {
return nil, nil
}
// score match
docMatch := s.scorer.Score(termMatch)
// return doc match
return docMatch, nil
}
func (s *TermSearcher) Advance(ID string) (*search.DocumentMatch, error) {
termMatch, err := s.reader.Advance(ID)
if err != nil {
return nil, err
}
if termMatch == nil {
return nil, nil
}
// score match
docMatch := s.scorer.Score(termMatch)
// return doc match
return docMatch, nil
}
func (s *TermSearcher) Close() error {
return s.reader.Close()
}
func (s *TermSearcher) Min() int {
return 0
}

View file

@ -0,0 +1,81 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
type TermPrefixSearcher struct {
indexReader index.IndexReader
prefix string
field string
explain bool
searcher *DisjunctionSearcher
}
func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string, field string, boost float64, explain bool) (*TermPrefixSearcher, error) {
// find the terms with this prefix
fieldDict, err := indexReader.FieldDictPrefix(field, []byte(prefix))
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, 0, 25)
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
qsearcher, err := NewTermSearcher(indexReader, string(tfd.Term), field, 1.0, explain)
if err != nil {
return nil, err
}
qsearchers = append(qsearchers, qsearcher)
tfd, err = fieldDict.Next()
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
if err != nil {
return nil, err
}
return &TermPrefixSearcher{
indexReader: indexReader,
prefix: prefix,
field: field,
explain: explain,
searcher: searcher,
}, nil
}
func (s *TermPrefixSearcher) Count() uint64 {
return s.searcher.Count()
}
func (s *TermPrefixSearcher) Weight() float64 {
return s.searcher.Weight()
}
func (s *TermPrefixSearcher) SetQueryNorm(qnorm float64) {
s.searcher.SetQueryNorm(qnorm)
}
func (s *TermPrefixSearcher) Next() (*search.DocumentMatch, error) {
return s.searcher.Next()
}
func (s *TermPrefixSearcher) Advance(ID string) (*search.DocumentMatch, error) {
return s.searcher.Next()
}
func (s *TermPrefixSearcher) Close() error {
return s.searcher.Close()
}
func (s *TermPrefixSearcher) Min() int {
return 0
}

View file

@ -0,0 +1,195 @@
// Copyright (c) 2013 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"math"
"testing"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index/store/inmem"
"github.com/blevesearch/bleve/index/upside_down"
)
func TestTermSearcher(t *testing.T) {
var queryTerm = "beer"
var queryField = "desc"
var queryBoost = 3.0
var queryExplain = true
inMemStore, _ := inmem.New()
analysisQueue := upside_down.NewAnalysisQueue(1)
i := upside_down.NewUpsideDownCouch(inMemStore, analysisQueue)
err := i.Open()
if err != nil {
t.Fatal(err)
}
err = i.Update(&document.Document{
ID: "a",
Fields: []document.Field{
document.NewTextField("desc", []uint64{}, []byte("beer")),
},
})
if err != nil {
t.Fatal(err)
}
err = i.Update(&document.Document{
ID: "b",
Fields: []document.Field{
document.NewTextField("desc", []uint64{}, []byte("beer")),
},
})
if err != nil {
t.Fatal(err)
}
err = i.Update(&document.Document{
ID: "c",
Fields: []document.Field{
document.NewTextField("desc", []uint64{}, []byte("beer")),
},
})
if err != nil {
t.Fatal(err)
}
err = i.Update(&document.Document{
ID: "d",
Fields: []document.Field{
document.NewTextField("desc", []uint64{}, []byte("beer")),
},
})
if err != nil {
t.Fatal(err)
}
err = i.Update(&document.Document{
ID: "e",
Fields: []document.Field{
document.NewTextField("desc", []uint64{}, []byte("beer")),
},
})
if err != nil {
t.Fatal(err)
}
err = i.Update(&document.Document{
ID: "f",
Fields: []document.Field{
document.NewTextField("desc", []uint64{}, []byte("beer")),
},
})
if err != nil {
t.Fatal(err)
}
err = i.Update(&document.Document{
ID: "g",
Fields: []document.Field{
document.NewTextField("desc", []uint64{}, []byte("beer")),
},
})
if err != nil {
t.Fatal(err)
}
err = i.Update(&document.Document{
ID: "h",
Fields: []document.Field{
document.NewTextField("desc", []uint64{}, []byte("beer")),
},
})
if err != nil {
t.Fatal(err)
}
err = i.Update(&document.Document{
ID: "i",
Fields: []document.Field{
document.NewTextField("desc", []uint64{}, []byte("beer")),
},
})
if err != nil {
t.Fatal(err)
}
err = i.Update(&document.Document{
ID: "j",
Fields: []document.Field{
document.NewTextField("title", []uint64{}, []byte("cat")),
},
})
if err != nil {
t.Fatal(err)
}
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
searcher, err := NewTermSearcher(indexReader, queryTerm, queryField, queryBoost, queryExplain)
if err != nil {
t.Fatal(err)
}
defer func() {
err := searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
searcher.SetQueryNorm(2.0)
docCount, err := i.DocCount()
if err != nil {
t.Fatal(err)
}
idf := 1.0 + math.Log(float64(docCount)/float64(searcher.Count()+1.0))
expectedQueryWeight := 3 * idf * 3 * idf
if expectedQueryWeight != searcher.Weight() {
t.Errorf("expected weight %v got %v", expectedQueryWeight, searcher.Weight())
}
if searcher.Count() != 9 {
t.Errorf("expected count of 9, got %d", searcher.Count())
}
docMatch, err := searcher.Next()
if err != nil {
t.Errorf("expected result, got %v", err)
}
if docMatch.ID != "a" {
t.Errorf("expected result ID to be 'a', got '%s", docMatch.ID)
}
docMatch, err = searcher.Advance("c")
if err != nil {
t.Errorf("expected result, got %v", err)
}
if docMatch.ID != "c" {
t.Errorf("expected result ID to be 'c' got '%s'", docMatch.ID)
}
// try advancing past end
docMatch, err = searcher.Advance("z")
if err != nil {
t.Fatal(err)
}
if docMatch != nil {
t.Errorf("expected nil, got %v", docMatch)
}
// try pushing next past end
docMatch, err = searcher.Next()
if err != nil {
t.Fatal(err)
}
if docMatch != nil {
t.Errorf("expected nil, got %v", docMatch)
}
}

37
vendor/github.com/blevesearch/bleve/search/util.go generated vendored Normal file
View file

@ -0,0 +1,37 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
func MergeLocations(locations []FieldTermLocationMap) FieldTermLocationMap {
rv := locations[0]
for i := 1; i < len(locations); i++ {
nextLocations := locations[i]
for field, termLocationMap := range nextLocations {
rvTermLocationMap, rvHasField := rv[field]
if rvHasField {
rv[field] = MergeTermLocationMaps(rvTermLocationMap, termLocationMap)
} else {
rv[field] = termLocationMap
}
}
}
return rv
}
func MergeTermLocationMaps(rv, other TermLocationMap) TermLocationMap {
for term, locationMap := range other {
// for a given term/document there cannot be different locations
// if they came back from different clauses, overwrite is ok
rv[term] = locationMap
}
return rv
}

View file

@ -0,0 +1,86 @@
// Copyright (c) 2013 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"reflect"
"testing"
)
func TestMergeLocations(t *testing.T) {
flm1 := FieldTermLocationMap{
"marty": TermLocationMap{
"name": {
&Location{
Pos: 1,
Start: 0,
End: 5,
},
},
},
}
flm2 := FieldTermLocationMap{
"marty": TermLocationMap{
"description": {
&Location{
Pos: 5,
Start: 20,
End: 25,
},
},
},
}
flm3 := FieldTermLocationMap{
"josh": TermLocationMap{
"description": {
&Location{
Pos: 5,
Start: 20,
End: 25,
},
},
},
}
expectedMerge := FieldTermLocationMap{
"marty": TermLocationMap{
"description": {
&Location{
Pos: 5,
Start: 20,
End: 25,
},
},
"name": {
&Location{
Pos: 1,
Start: 0,
End: 5,
},
},
},
"josh": TermLocationMap{
"description": {
&Location{
Pos: 5,
Start: 20,
End: 25,
},
},
},
}
mergedLocations := MergeLocations([]FieldTermLocationMap{flm1, flm2, flm3})
if !reflect.DeepEqual(expectedMerge, mergedLocations) {
t.Errorf("expected %v, got %v", expectedMerge, mergedLocations)
}
}