Switch from Godep to go vendoring

This commit is contained in:
Ken-Håvard Lieng 2016-03-01 01:51:26 +01:00
parent 6b37713bc0
commit cd317761c5
1504 changed files with 263076 additions and 34441 deletions

View file

@ -0,0 +1,80 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"math"
"regexp"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/analysis/tokenizers/regexp_tokenizer"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store/inmem"
"github.com/blevesearch/bleve/index/upside_down"
)
var twoDocIndex index.Index //= upside_down.NewUpsideDownCouch(inmem.MustOpen())
func init() {
inMemStore, _ := inmem.New()
analysisQueue := upside_down.NewAnalysisQueue(1)
twoDocIndex = upside_down.NewUpsideDownCouch(inMemStore, analysisQueue)
err := twoDocIndex.Open()
if err != nil {
panic(err)
}
for _, doc := range twoDocIndexDocs {
err := twoDocIndex.Update(doc)
if err != nil {
panic(err)
}
}
}
// create a simpler analyzer which will support these tests
var testAnalyzer = &analysis.Analyzer{
Tokenizer: regexp_tokenizer.NewRegexpTokenizer(regexp.MustCompile(`\w+`)),
}
// sets up some mock data used in many tests in this package
var twoDocIndexDescIndexingOptions = document.DefaultTextIndexingOptions | document.IncludeTermVectors
var twoDocIndexDocs = []*document.Document{
// must have 4/4 beer
document.NewDocument("1").
AddField(document.NewTextField("name", []uint64{}, []byte("marty"))).
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("beer beer beer beer"), twoDocIndexDescIndexingOptions, testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("street", []uint64{}, []byte("couchbase way"), testAnalyzer)),
// must have 1/4 beer
document.NewDocument("2").
AddField(document.NewTextField("name", []uint64{}, []byte("steve"))).
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("angst beer couch database"), twoDocIndexDescIndexingOptions, testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("street", []uint64{}, []byte("couchbase way"), testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("title", []uint64{}, []byte("mister"), testAnalyzer)),
// must have 1/4 beer
document.NewDocument("3").
AddField(document.NewTextField("name", []uint64{}, []byte("dustin"))).
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("apple beer column dank"), twoDocIndexDescIndexingOptions, testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("title", []uint64{}, []byte("mister"), testAnalyzer)),
// must have 65/65 beer
document.NewDocument("4").
AddField(document.NewTextField("name", []uint64{}, []byte("ravi"))).
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer"), twoDocIndexDescIndexingOptions, testAnalyzer)),
// must have 0/x beer
document.NewDocument("5").
AddField(document.NewTextField("name", []uint64{}, []byte("bobert"))).
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("water"), twoDocIndexDescIndexingOptions, testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("title", []uint64{}, []byte("mister"), testAnalyzer)),
}
func scoresCloseEnough(a, b float64) bool {
return math.Abs(a-b) < 0.001
}

View file

@ -0,0 +1,30 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"github.com/blevesearch/bleve/search"
)
type OrderedSearcherList []search.Searcher
// sort.Interface
func (otrl OrderedSearcherList) Len() int {
return len(otrl)
}
func (otrl OrderedSearcherList) Less(i, j int) bool {
return otrl[i].Count() < otrl[j].Count()
}
func (otrl OrderedSearcherList) Swap(i, j int) {
otrl[i], otrl[j] = otrl[j], otrl[i]
}

View file

@ -0,0 +1,335 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"math"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorers"
)
type BooleanSearcher struct {
initialized bool
indexReader index.IndexReader
mustSearcher search.Searcher
shouldSearcher search.Searcher
mustNotSearcher search.Searcher
queryNorm float64
currMust *search.DocumentMatch
currShould *search.DocumentMatch
currMustNot *search.DocumentMatch
currentID string
min uint64
scorer *scorers.ConjunctionQueryScorer
}
func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, explain bool) (*BooleanSearcher, error) {
// build our searcher
rv := BooleanSearcher{
indexReader: indexReader,
mustSearcher: mustSearcher,
shouldSearcher: shouldSearcher,
mustNotSearcher: mustNotSearcher,
scorer: scorers.NewConjunctionQueryScorer(explain),
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *BooleanSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
if s.mustSearcher != nil {
sumOfSquaredWeights += s.mustSearcher.Weight()
}
if s.shouldSearcher != nil {
sumOfSquaredWeights += s.shouldSearcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
if s.mustSearcher != nil {
s.mustSearcher.SetQueryNorm(s.queryNorm)
}
if s.shouldSearcher != nil {
s.shouldSearcher.SetQueryNorm(s.queryNorm)
}
}
func (s *BooleanSearcher) initSearchers() error {
var err error
// get all searchers pointing at their first match
if s.mustSearcher != nil {
s.currMust, err = s.mustSearcher.Next()
if err != nil {
return err
}
}
if s.shouldSearcher != nil {
s.currShould, err = s.shouldSearcher.Next()
if err != nil {
return err
}
}
if s.mustNotSearcher != nil {
s.currMustNot, err = s.mustNotSearcher.Next()
if err != nil {
return err
}
}
if s.mustSearcher != nil && s.currMust != nil {
s.currentID = s.currMust.ID
} else if s.mustSearcher == nil && s.currShould != nil {
s.currentID = s.currShould.ID
} else {
s.currentID = ""
}
s.initialized = true
return nil
}
func (s *BooleanSearcher) advanceNextMust() error {
var err error
if s.mustSearcher != nil {
s.currMust, err = s.mustSearcher.Next()
if err != nil {
return err
}
} else if s.mustSearcher == nil {
s.currShould, err = s.shouldSearcher.Next()
if err != nil {
return err
}
}
if s.mustSearcher != nil && s.currMust != nil {
s.currentID = s.currMust.ID
} else if s.mustSearcher == nil && s.currShould != nil {
s.currentID = s.currShould.ID
} else {
s.currentID = ""
}
return nil
}
func (s *BooleanSearcher) Weight() float64 {
var rv float64
if s.mustSearcher != nil {
rv += s.mustSearcher.Weight()
}
if s.shouldSearcher != nil {
rv += s.shouldSearcher.Weight()
}
return rv
}
func (s *BooleanSearcher) SetQueryNorm(qnorm float64) {
if s.mustSearcher != nil {
s.mustSearcher.SetQueryNorm(qnorm)
}
if s.shouldSearcher != nil {
s.shouldSearcher.SetQueryNorm(qnorm)
}
}
func (s *BooleanSearcher) Next() (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers()
if err != nil {
return nil, err
}
}
var err error
var rv *search.DocumentMatch
for s.currentID != "" {
if s.currMustNot != nil && s.currMustNot.ID < s.currentID {
// advance must not searcher to our candidate entry
s.currMustNot, err = s.mustNotSearcher.Advance(s.currentID)
if err != nil {
return nil, err
}
if s.currMustNot != nil && s.currMustNot.ID == s.currentID {
// the candidate is excluded
err = s.advanceNextMust()
if err != nil {
return nil, err
}
continue
}
} else if s.currMustNot != nil && s.currMustNot.ID == s.currentID {
// the candidate is excluded
err = s.advanceNextMust()
if err != nil {
return nil, err
}
continue
}
if s.currShould != nil && s.currShould.ID < s.currentID {
// advance should searcher to our candidate entry
s.currShould, err = s.shouldSearcher.Advance(s.currentID)
if err != nil {
return nil, err
}
if s.currShould != nil && s.currShould.ID == s.currentID {
// score bonus matches should
var cons []*search.DocumentMatch
if s.currMust != nil {
cons = []*search.DocumentMatch{
s.currMust,
s.currShould,
}
} else {
cons = []*search.DocumentMatch{
s.currShould,
}
}
rv = s.scorer.Score(cons)
err = s.advanceNextMust()
if err != nil {
return nil, err
}
break
} else if s.shouldSearcher.Min() == 0 {
// match is OK anyway
rv = s.scorer.Score([]*search.DocumentMatch{s.currMust})
err = s.advanceNextMust()
if err != nil {
return nil, err
}
break
}
} else if s.currShould != nil && s.currShould.ID == s.currentID {
// score bonus matches should
var cons []*search.DocumentMatch
if s.currMust != nil {
cons = []*search.DocumentMatch{
s.currMust,
s.currShould,
}
} else {
cons = []*search.DocumentMatch{
s.currShould,
}
}
rv = s.scorer.Score(cons)
err = s.advanceNextMust()
if err != nil {
return nil, err
}
break
} else if s.shouldSearcher == nil || s.shouldSearcher.Min() == 0 {
// match is OK anyway
rv = s.scorer.Score([]*search.DocumentMatch{s.currMust})
err = s.advanceNextMust()
if err != nil {
return nil, err
}
break
}
err = s.advanceNextMust()
if err != nil {
return nil, err
}
}
return rv, nil
}
func (s *BooleanSearcher) Advance(ID string) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers()
if err != nil {
return nil, err
}
}
var err error
if s.mustSearcher != nil {
s.currMust, err = s.mustSearcher.Advance(ID)
if err != nil {
return nil, err
}
}
if s.shouldSearcher != nil {
s.currShould, err = s.shouldSearcher.Advance(ID)
if err != nil {
return nil, err
}
}
if s.mustNotSearcher != nil {
s.currMustNot, err = s.mustNotSearcher.Advance(ID)
if err != nil {
return nil, err
}
}
if s.mustSearcher != nil && s.currMust != nil {
s.currentID = s.currMust.ID
} else if s.mustSearcher == nil && s.currShould != nil {
s.currentID = s.currShould.ID
} else {
s.currentID = ""
}
return s.Next()
}
func (s *BooleanSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
if s.mustSearcher != nil {
sum += s.mustSearcher.Count()
}
if s.shouldSearcher != nil {
sum += s.shouldSearcher.Count()
}
return sum
}
func (s *BooleanSearcher) Close() error {
if s.mustSearcher != nil {
err := s.mustSearcher.Close()
if err != nil {
return err
}
}
if s.shouldSearcher != nil {
err := s.shouldSearcher.Close()
if err != nil {
return err
}
}
if s.mustNotSearcher != nil {
err := s.mustNotSearcher.Close()
if err != nil {
return err
}
}
return nil
}
func (s *BooleanSearcher) Min() int {
return 0
}

View file

@ -0,0 +1,364 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"testing"
"github.com/blevesearch/bleve/search"
)
func TestBooleanSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
// test 0
beerTermSearcher, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher}, true)
if err != nil {
t.Fatal(err)
}
martyTermSearcher, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
shouldSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher}, 0, true)
if err != nil {
t.Fatal(err)
}
steveTermSearcher, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustNotSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher}, 0, true)
if err != nil {
t.Fatal(err)
}
booleanSearcher, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher, shouldSearcher, mustNotSearcher, true)
if err != nil {
t.Fatal(err)
}
// test 1
martyTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
shouldSearcher2, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher2, dustinTermSearcher2}, 0, true)
if err != nil {
t.Fatal(err)
}
steveTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustNotSearcher2, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher2}, 0, true)
if err != nil {
t.Fatal(err)
}
booleanSearcher2, err := NewBooleanSearcher(twoDocIndexReader, nil, shouldSearcher2, mustNotSearcher2, true)
if err != nil {
t.Fatal(err)
}
// test 2
steveTermSearcher3, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustNotSearcher3, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher3}, 0, true)
if err != nil {
t.Fatal(err)
}
booleanSearcher3, err := NewBooleanSearcher(twoDocIndexReader, nil, nil, mustNotSearcher3, true)
if err != nil {
t.Fatal(err)
}
// test 3
beerTermSearcher4, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustSearcher4, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher4}, true)
if err != nil {
t.Fatal(err)
}
steveTermSearcher4, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustNotSearcher4, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher4}, 0, true)
if err != nil {
t.Fatal(err)
}
booleanSearcher4, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher4, nil, mustNotSearcher4, true)
if err != nil {
t.Fatal(err)
}
// test 4
beerTermSearcher5, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustSearcher5, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher5}, true)
if err != nil {
t.Fatal(err)
}
steveTermSearcher5, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
martyTermSearcher5, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustNotSearcher5, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher5, martyTermSearcher5}, 0, true)
if err != nil {
t.Fatal(err)
}
booleanSearcher5, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher5, nil, mustNotSearcher5, true)
if err != nil {
t.Fatal(err)
}
// test 5
beerTermSearcher6, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustSearcher6, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher6}, true)
if err != nil {
t.Fatal(err)
}
martyTermSearcher6, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher6, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
shouldSearcher6, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher6, dustinTermSearcher6}, 2, true)
if err != nil {
t.Fatal(err)
}
booleanSearcher6, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher6, shouldSearcher6, nil, true)
if err != nil {
t.Fatal(err)
}
// test 6
beerTermSearcher7, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustSearcher7, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher7}, true)
if err != nil {
t.Fatal(err)
}
booleanSearcher7, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher7, nil, nil, true)
if err != nil {
t.Fatal(err)
}
martyTermSearcher7, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 5.0, true)
if err != nil {
t.Fatal(err)
}
conjunctionSearcher7, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher7, booleanSearcher7}, true)
// test 7
beerTermSearcher8, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustSearcher8, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher8}, true)
if err != nil {
t.Fatal(err)
}
martyTermSearcher8, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher8, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
shouldSearcher8, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher8, dustinTermSearcher8}, 0, true)
if err != nil {
t.Fatal(err)
}
steveTermSearcher8, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustNotSearcher8, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher8}, 0, true)
if err != nil {
t.Fatal(err)
}
booleanSearcher8, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher8, shouldSearcher8, mustNotSearcher8, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher8a, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 5.0, true)
if err != nil {
t.Fatal(err)
}
conjunctionSearcher8, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{booleanSearcher8, dustinTermSearcher8a}, true)
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
results []*search.DocumentMatch
}{
{
searcher: booleanSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 0.9818005051949021,
},
&search.DocumentMatch{
ID: "3",
Score: 0.808709699395535,
},
&search.DocumentMatch{
ID: "4",
Score: 0.34618161159873423,
},
},
},
{
searcher: booleanSearcher2,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 0.6775110856165737,
},
&search.DocumentMatch{
ID: "3",
Score: 0.6775110856165737,
},
},
},
// no MUST or SHOULD clauses yields no results
{
searcher: booleanSearcher3,
results: []*search.DocumentMatch{},
},
{
searcher: booleanSearcher4,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 1.0,
},
&search.DocumentMatch{
ID: "3",
Score: 0.5,
},
&search.DocumentMatch{
ID: "4",
Score: 1.0,
},
},
},
{
searcher: booleanSearcher5,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "3",
Score: 0.5,
},
&search.DocumentMatch{
ID: "4",
Score: 1.0,
},
},
},
{
searcher: booleanSearcher6,
results: []*search.DocumentMatch{},
},
// test a conjunction query with a nested boolean
{
searcher: conjunctionSearcher7,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 2.0097428702814377,
},
},
},
{
searcher: conjunctionSearcher8,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "3",
Score: 2.0681575785068107,
},
},
},
}
for testIndex, test := range tests {
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
next, err := test.searcher.Next()
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if next.ID != test.results[i].ID {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex)
}
if !scoresCloseEnough(next.Score, test.results[i].Score) {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
next, err = test.searcher.Next()
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}

View file

@ -0,0 +1,197 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"math"
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorers"
)
type ConjunctionSearcher struct {
initialized bool
indexReader index.IndexReader
searchers OrderedSearcherList
explain bool
queryNorm float64
currs []*search.DocumentMatch
currentID string
scorer *scorers.ConjunctionQueryScorer
}
func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, explain bool) (*ConjunctionSearcher, error) {
// build the downstream searchers
searchers := make(OrderedSearcherList, len(qsearchers))
for i, searcher := range qsearchers {
searchers[i] = searcher
}
// sort the searchers
sort.Sort(searchers)
// build our searcher
rv := ConjunctionSearcher{
indexReader: indexReader,
explain: explain,
searchers: searchers,
currs: make([]*search.DocumentMatch, len(searchers)),
scorer: scorers.NewConjunctionQueryScorer(explain),
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *ConjunctionSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
for _, termSearcher := range s.searchers {
sumOfSquaredWeights += termSearcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
for _, termSearcher := range s.searchers {
termSearcher.SetQueryNorm(s.queryNorm)
}
}
func (s *ConjunctionSearcher) initSearchers() error {
var err error
// get all searchers pointing at their first match
for i, termSearcher := range s.searchers {
s.currs[i], err = termSearcher.Next()
if err != nil {
return err
}
}
if len(s.currs) > 0 {
if s.currs[0] != nil {
s.currentID = s.currs[0].ID
} else {
s.currentID = ""
}
}
s.initialized = true
return nil
}
func (s *ConjunctionSearcher) Weight() float64 {
var rv float64
for _, searcher := range s.searchers {
rv += searcher.Weight()
}
return rv
}
func (s *ConjunctionSearcher) SetQueryNorm(qnorm float64) {
for _, searcher := range s.searchers {
searcher.SetQueryNorm(qnorm)
}
}
func (s *ConjunctionSearcher) Next() (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers()
if err != nil {
return nil, err
}
}
var rv *search.DocumentMatch
var err error
OUTER:
for s.currentID != "" {
for i, termSearcher := range s.searchers {
if s.currs[i] != nil && s.currs[i].ID != s.currentID {
if s.currentID < s.currs[i].ID {
s.currentID = s.currs[i].ID
continue OUTER
}
// this reader doesn't have the currentID, try to advance
s.currs[i], err = termSearcher.Advance(s.currentID)
if err != nil {
return nil, err
}
if s.currs[i] == nil {
s.currentID = ""
continue OUTER
}
if s.currs[i].ID != s.currentID {
// we just advanced, so it doesn't match, it must be greater
// no need to call next
s.currentID = s.currs[i].ID
continue OUTER
}
} else if s.currs[i] == nil {
s.currentID = ""
continue OUTER
}
}
// if we get here, a doc matched all readers, sum the score and add it
rv = s.scorer.Score(s.currs)
// prepare for next entry
s.currs[0], err = s.searchers[0].Next()
if err != nil {
return nil, err
}
if s.currs[0] == nil {
s.currentID = ""
} else {
s.currentID = s.currs[0].ID
}
// don't continue now, wait for the next call to Next()
break
}
return rv, nil
}
func (s *ConjunctionSearcher) Advance(ID string) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers()
if err != nil {
return nil, err
}
}
var err error
for i, searcher := range s.searchers {
s.currs[i], err = searcher.Advance(ID)
if err != nil {
return nil, err
}
}
s.currentID = ID
return s.Next()
}
func (s *ConjunctionSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
for _, searcher := range s.searchers {
sum += searcher.Count()
}
return sum
}
func (s *ConjunctionSearcher) Close() error {
for _, searcher := range s.searchers {
err := searcher.Close()
if err != nil {
return err
}
}
return nil
}
func (s *ConjunctionSearcher) Min() int {
return 0
}

View file

@ -0,0 +1,212 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"testing"
"github.com/blevesearch/bleve/search"
)
func TestConjunctionSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
// test 0
beerTermSearcher, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
martyTermSearcher, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 5.0, true)
if err != nil {
t.Fatal(err)
}
beerAndMartySearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher, martyTermSearcher}, true)
if err != nil {
t.Fatal(err)
}
// test 1
angstTermSearcher, err := NewTermSearcher(twoDocIndexReader, "angst", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
beerTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
angstAndBeerSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{angstTermSearcher, beerTermSearcher2}, true)
if err != nil {
t.Fatal(err)
}
// test 2
beerTermSearcher3, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
jackTermSearcher, err := NewTermSearcher(twoDocIndexReader, "jack", "name", 5.0, true)
if err != nil {
t.Fatal(err)
}
beerAndJackSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher3, jackTermSearcher}, true)
if err != nil {
t.Fatal(err)
}
// test 3
beerTermSearcher4, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
misterTermSearcher, err := NewTermSearcher(twoDocIndexReader, "mister", "title", 5.0, true)
if err != nil {
t.Fatal(err)
}
beerAndMisterSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher4, misterTermSearcher}, true)
if err != nil {
t.Fatal(err)
}
// test 4
couchbaseTermSearcher, err := NewTermSearcher(twoDocIndexReader, "couchbase", "street", 1.0, true)
if err != nil {
t.Fatal(err)
}
misterTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "mister", "title", 5.0, true)
if err != nil {
t.Fatal(err)
}
couchbaseAndMisterSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{couchbaseTermSearcher, misterTermSearcher2}, true)
if err != nil {
t.Fatal(err)
}
// test 5
beerTermSearcher5, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 5.0, true)
if err != nil {
t.Fatal(err)
}
couchbaseTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "couchbase", "street", 1.0, true)
if err != nil {
t.Fatal(err)
}
misterTermSearcher3, err := NewTermSearcher(twoDocIndexReader, "mister", "title", 5.0, true)
if err != nil {
t.Fatal(err)
}
couchbaseAndMisterSearcher2, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{couchbaseTermSearcher2, misterTermSearcher3}, true)
if err != nil {
t.Fatal(err)
}
beerAndCouchbaseAndMisterSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher5, couchbaseAndMisterSearcher2}, true)
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
results []*search.DocumentMatch
}{
{
searcher: beerAndMartySearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 2.0097428702814377,
},
},
},
{
searcher: angstAndBeerSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "2",
Score: 1.0807601687084403,
},
},
},
{
searcher: beerAndJackSearcher,
results: []*search.DocumentMatch{},
},
{
searcher: beerAndMisterSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "2",
Score: 1.2877980334016337,
},
&search.DocumentMatch{
ID: "3",
Score: 1.2877980334016337,
},
},
},
{
searcher: couchbaseAndMisterSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "2",
Score: 1.4436599157093672,
},
},
},
{
searcher: beerAndCouchbaseAndMisterSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "2",
Score: 1.441614953806971,
},
},
},
}
for testIndex, test := range tests {
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
next, err := test.searcher.Next()
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if next.ID != test.results[i].ID {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex)
}
if !scoresCloseEnough(next.Score, test.results[i].Score) {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
next, err = test.searcher.Next()
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}

View file

@ -0,0 +1,189 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"math"
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorers"
)
type DisjunctionSearcher struct {
initialized bool
indexReader index.IndexReader
searchers OrderedSearcherList
queryNorm float64
currs []*search.DocumentMatch
currentID string
scorer *scorers.DisjunctionQueryScorer
min float64
}
func NewDisjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, min float64, explain bool) (*DisjunctionSearcher, error) {
// build the downstream searchers
searchers := make(OrderedSearcherList, len(qsearchers))
for i, searcher := range qsearchers {
searchers[i] = searcher
}
// sort the searchers
sort.Sort(sort.Reverse(searchers))
// build our searcher
rv := DisjunctionSearcher{
indexReader: indexReader,
searchers: searchers,
currs: make([]*search.DocumentMatch, len(searchers)),
scorer: scorers.NewDisjunctionQueryScorer(explain),
min: min,
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *DisjunctionSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
for _, termSearcher := range s.searchers {
sumOfSquaredWeights += termSearcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
for _, termSearcher := range s.searchers {
termSearcher.SetQueryNorm(s.queryNorm)
}
}
func (s *DisjunctionSearcher) initSearchers() error {
var err error
// get all searchers pointing at their first match
for i, termSearcher := range s.searchers {
s.currs[i], err = termSearcher.Next()
if err != nil {
return err
}
}
s.currentID = s.nextSmallestID()
s.initialized = true
return nil
}
func (s *DisjunctionSearcher) nextSmallestID() string {
rv := ""
for _, curr := range s.currs {
if curr != nil && (curr.ID < rv || rv == "") {
rv = curr.ID
}
}
return rv
}
func (s *DisjunctionSearcher) Weight() float64 {
var rv float64
for _, searcher := range s.searchers {
rv += searcher.Weight()
}
return rv
}
func (s *DisjunctionSearcher) SetQueryNorm(qnorm float64) {
for _, searcher := range s.searchers {
searcher.SetQueryNorm(qnorm)
}
}
func (s *DisjunctionSearcher) Next() (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers()
if err != nil {
return nil, err
}
}
var err error
var rv *search.DocumentMatch
matching := make([]*search.DocumentMatch, 0, len(s.searchers))
found := false
for !found && s.currentID != "" {
for _, curr := range s.currs {
if curr != nil && curr.ID == s.currentID {
matching = append(matching, curr)
}
}
if len(matching) >= int(s.min) {
found = true
// score this match
rv = s.scorer.Score(matching, len(matching), len(s.searchers))
}
// reset matching
matching = make([]*search.DocumentMatch, 0)
// invoke next on all the matching searchers
for i, curr := range s.currs {
if curr != nil && curr.ID == s.currentID {
searcher := s.searchers[i]
s.currs[i], err = searcher.Next()
if err != nil {
return nil, err
}
}
}
s.currentID = s.nextSmallestID()
}
return rv, nil
}
func (s *DisjunctionSearcher) Advance(ID string) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers()
if err != nil {
return nil, err
}
}
// get all searchers pointing at their first match
var err error
for i, termSearcher := range s.searchers {
s.currs[i], err = termSearcher.Advance(ID)
if err != nil {
return nil, err
}
}
s.currentID = s.nextSmallestID()
return s.Next()
}
func (s *DisjunctionSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
for _, searcher := range s.searchers {
sum += searcher.Count()
}
return sum
}
func (s *DisjunctionSearcher) Close() error {
for _, searcher := range s.searchers {
err := searcher.Close()
if err != nil {
return err
}
}
return nil
}
func (s *DisjunctionSearcher) Min() int {
return int(s.min) // FIXME just make this an int
}

View file

@ -0,0 +1,168 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"testing"
"github.com/blevesearch/bleve/search"
)
func TestDisjunctionSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
martyTermSearcher, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
martyOrDustinSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher}, 0, true)
if err != nil {
t.Fatal(err)
}
martyTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
martyOrDustinSearcher2, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher2, dustinTermSearcher2}, 0, true)
if err != nil {
t.Fatal(err)
}
raviTermSearcher, err := NewTermSearcher(twoDocIndexReader, "ravi", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
nestedRaviOrMartyOrDustinSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{raviTermSearcher, martyOrDustinSearcher2}, 0, true)
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
results []*search.DocumentMatch
}{
{
searcher: martyOrDustinSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 0.6775110856165737,
},
&search.DocumentMatch{
ID: "3",
Score: 0.6775110856165737,
},
},
},
// test a nested disjunction
{
searcher: nestedRaviOrMartyOrDustinSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 0.2765927424732821,
},
&search.DocumentMatch{
ID: "3",
Score: 0.2765927424732821,
},
&search.DocumentMatch{
ID: "4",
Score: 0.5531854849465642,
},
},
},
}
for testIndex, test := range tests {
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
next, err := test.searcher.Next()
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if next.ID != test.results[i].ID {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex)
}
if !scoresCloseEnough(next.Score, test.results[i].Score) {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
next, err = test.searcher.Next()
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}
func TestDisjunctionAdvance(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
martyTermSearcher, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
martyOrDustinSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher}, 0, true)
if err != nil {
t.Fatal(err)
}
match, err := martyOrDustinSearcher.Advance("3")
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if match == nil {
t.Errorf("expected 3, got nil")
}
}

View file

@ -0,0 +1,112 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
type FuzzySearcher struct {
indexReader index.IndexReader
term string
prefix int
fuzziness int
field string
explain bool
searcher *DisjunctionSearcher
}
func NewFuzzySearcher(indexReader index.IndexReader, term string, prefix, fuzziness int, field string, boost float64, explain bool) (*FuzzySearcher, error) {
prefixTerm := ""
for i, r := range term {
if i < prefix {
prefixTerm += string(r)
}
}
// find the terms with this prefix
var fieldDict index.FieldDict
var err error
if len(prefixTerm) > 0 {
fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
} else {
fieldDict, err = indexReader.FieldDict(field)
}
// enumerate terms and check levenshtein distance
candidateTerms := make([]string, 0)
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
ld, exceeded := search.LevenshteinDistanceMax(&term, &tfd.Term, fuzziness)
if !exceeded && ld <= fuzziness {
candidateTerms = append(candidateTerms, tfd.Term)
}
tfd, err = fieldDict.Next()
}
if err != nil {
return nil, err
}
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, 0, 25)
for _, cterm := range candidateTerms {
qsearcher, err := NewTermSearcher(indexReader, cterm, field, 1.0, explain)
if err != nil {
return nil, err
}
qsearchers = append(qsearchers, qsearcher)
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
if err != nil {
return nil, err
}
return &FuzzySearcher{
indexReader: indexReader,
term: term,
prefix: prefix,
fuzziness: fuzziness,
field: field,
explain: explain,
searcher: searcher,
}, nil
}
func (s *FuzzySearcher) Count() uint64 {
return s.searcher.Count()
}
func (s *FuzzySearcher) Weight() float64 {
return s.searcher.Weight()
}
func (s *FuzzySearcher) SetQueryNorm(qnorm float64) {
s.searcher.SetQueryNorm(qnorm)
}
func (s *FuzzySearcher) Next() (*search.DocumentMatch, error) {
return s.searcher.Next()
}
func (s *FuzzySearcher) Advance(ID string) (*search.DocumentMatch, error) {
return s.searcher.Next()
}
func (s *FuzzySearcher) Close() error {
return s.searcher.Close()
}
func (s *FuzzySearcher) Min() int {
return 0
}

View file

@ -0,0 +1,89 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorers"
)
type MatchAllSearcher struct {
indexReader index.IndexReader
reader index.DocIDReader
scorer *scorers.ConstantScorer
}
func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, explain bool) (*MatchAllSearcher, error) {
reader, err := indexReader.DocIDReader("", "")
if err != nil {
return nil, err
}
scorer := scorers.NewConstantScorer(1.0, boost, explain)
return &MatchAllSearcher{
indexReader: indexReader,
reader: reader,
scorer: scorer,
}, nil
}
func (s *MatchAllSearcher) Count() uint64 {
return s.indexReader.DocCount()
}
func (s *MatchAllSearcher) Weight() float64 {
return s.scorer.Weight()
}
func (s *MatchAllSearcher) SetQueryNorm(qnorm float64) {
s.scorer.SetQueryNorm(qnorm)
}
func (s *MatchAllSearcher) Next() (*search.DocumentMatch, error) {
id, err := s.reader.Next()
if err != nil {
return nil, err
}
if id == "" {
return nil, nil
}
// score match
docMatch := s.scorer.Score(id)
// return doc match
return docMatch, nil
}
func (s *MatchAllSearcher) Advance(ID string) (*search.DocumentMatch, error) {
id, err := s.reader.Advance(ID)
if err != nil {
return nil, err
}
if id == "" {
return nil, nil
}
// score match
docMatch := s.scorer.Score(id)
// return doc match
return docMatch, nil
}
func (s *MatchAllSearcher) Close() error {
return s.reader.Close()
}
func (s *MatchAllSearcher) Min() int {
return 0
}

View file

@ -0,0 +1,134 @@
// Copyright (c) 2013 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"testing"
"github.com/blevesearch/bleve/search"
)
func TestMatchAllSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
allSearcher, err := NewMatchAllSearcher(twoDocIndexReader, 1.0, true)
if err != nil {
t.Fatal(err)
}
allSearcher2, err := NewMatchAllSearcher(twoDocIndexReader, 1.2, true)
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
queryNorm float64
results []*search.DocumentMatch
}{
{
searcher: allSearcher,
queryNorm: 1.0,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 1.0,
},
&search.DocumentMatch{
ID: "2",
Score: 1.0,
},
&search.DocumentMatch{
ID: "3",
Score: 1.0,
},
&search.DocumentMatch{
ID: "4",
Score: 1.0,
},
&search.DocumentMatch{
ID: "5",
Score: 1.0,
},
},
},
{
searcher: allSearcher2,
queryNorm: 0.8333333,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 1.0,
},
&search.DocumentMatch{
ID: "2",
Score: 1.0,
},
&search.DocumentMatch{
ID: "3",
Score: 1.0,
},
&search.DocumentMatch{
ID: "4",
Score: 1.0,
},
&search.DocumentMatch{
ID: "5",
Score: 1.0,
},
},
},
}
for testIndex, test := range tests {
if test.queryNorm != 1.0 {
test.searcher.SetQueryNorm(test.queryNorm)
}
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
next, err := test.searcher.Next()
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if next.ID != test.results[i].ID {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex)
}
if !scoresCloseEnough(next.Score, test.results[i].Score) {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
next, err = test.searcher.Next()
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}

View file

@ -0,0 +1,53 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
type MatchNoneSearcher struct {
indexReader index.IndexReader
}
func NewMatchNoneSearcher(indexReader index.IndexReader) (*MatchNoneSearcher, error) {
return &MatchNoneSearcher{
indexReader: indexReader,
}, nil
}
func (s *MatchNoneSearcher) Count() uint64 {
return uint64(0)
}
func (s *MatchNoneSearcher) Weight() float64 {
return 0.0
}
func (s *MatchNoneSearcher) SetQueryNorm(qnorm float64) {
}
func (s *MatchNoneSearcher) Next() (*search.DocumentMatch, error) {
return nil, nil
}
func (s *MatchNoneSearcher) Advance(ID string) (*search.DocumentMatch, error) {
return nil, nil
}
func (s *MatchNoneSearcher) Close() error {
return nil
}
func (s *MatchNoneSearcher) Min() int {
return 0
}

View file

@ -0,0 +1,76 @@
// Copyright (c) 2013 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"testing"
"github.com/blevesearch/bleve/search"
)
func TestMatchNoneSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
noneSearcher, err := NewMatchNoneSearcher(twoDocIndexReader)
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
results []*search.DocumentMatch
}{
{
searcher: noneSearcher,
results: []*search.DocumentMatch{},
},
}
for testIndex, test := range tests {
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
next, err := test.searcher.Next()
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if next.ID != test.results[i].ID {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex)
}
if !scoresCloseEnough(next.Score, test.results[i].Score) {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
next, err = test.searcher.Next()
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}

View file

@ -0,0 +1,214 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"bytes"
"math"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric_util"
"github.com/blevesearch/bleve/search"
)
type NumericRangeSearcher struct {
indexReader index.IndexReader
min *float64
max *float64
field string
explain bool
searcher *DisjunctionSearcher
}
func NewNumericRangeSearcher(indexReader index.IndexReader, min *float64, max *float64, inclusiveMin, inclusiveMax *bool, field string, boost float64, explain bool) (*NumericRangeSearcher, error) {
// account for unbounded edges
if min == nil {
negInf := math.Inf(-1)
min = &negInf
}
if max == nil {
Inf := math.Inf(1)
max = &Inf
}
if inclusiveMin == nil {
defaultInclusiveMin := true
inclusiveMin = &defaultInclusiveMin
}
if inclusiveMax == nil {
defaultInclusiveMax := false
inclusiveMax = &defaultInclusiveMax
}
// find all the ranges
minInt64 := numeric_util.Float64ToInt64(*min)
if !*inclusiveMin && minInt64 != math.MaxInt64 {
minInt64++
}
maxInt64 := numeric_util.Float64ToInt64(*max)
if !*inclusiveMax && maxInt64 != math.MinInt64 {
maxInt64--
}
// FIXME hard-coded precision, should match field declaration
termRanges := splitInt64Range(minInt64, maxInt64, 4)
terms := termRanges.Enumerate()
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, len(terms))
for i, term := range terms {
var err error
qsearchers[i], err = NewTermSearcher(indexReader, string(term), field, 1.0, explain)
if err != nil {
return nil, err
}
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
if err != nil {
return nil, err
}
return &NumericRangeSearcher{
indexReader: indexReader,
min: min,
max: max,
field: field,
explain: explain,
searcher: searcher,
}, nil
}
func (s *NumericRangeSearcher) Count() uint64 {
return s.searcher.Count()
}
func (s *NumericRangeSearcher) Weight() float64 {
return s.searcher.Weight()
}
func (s *NumericRangeSearcher) SetQueryNorm(qnorm float64) {
s.searcher.SetQueryNorm(qnorm)
}
func (s *NumericRangeSearcher) Next() (*search.DocumentMatch, error) {
return s.searcher.Next()
}
func (s *NumericRangeSearcher) Advance(ID string) (*search.DocumentMatch, error) {
return s.searcher.Advance(ID)
}
func (s *NumericRangeSearcher) Close() error {
return s.searcher.Close()
}
type termRange struct {
startTerm []byte
endTerm []byte
}
func (t *termRange) Enumerate() [][]byte {
rv := make([][]byte, 0)
next := t.startTerm
for bytes.Compare(next, t.endTerm) <= 0 {
rv = append(rv, next)
next = incrementBytes(next)
}
return rv
}
func incrementBytes(in []byte) []byte {
rv := make([]byte, len(in))
copy(rv, in)
for i := len(rv) - 1; i >= 0; i-- {
rv[i] = rv[i] + 1
if rv[i] != 0 {
// didnt' overflow, so stop
break
}
}
return rv
}
type termRanges []*termRange
func (tr termRanges) Enumerate() [][]byte {
rv := make([][]byte, 0)
for _, tri := range tr {
trie := tri.Enumerate()
rv = append(rv, trie...)
}
return rv
}
func splitInt64Range(minBound, maxBound int64, precisionStep uint) termRanges {
rv := make(termRanges, 0)
if minBound > maxBound {
return rv
}
for shift := uint(0); ; shift += precisionStep {
diff := int64(1) << (shift + precisionStep)
mask := ((int64(1) << precisionStep) - int64(1)) << shift
hasLower := (minBound & mask) != int64(0)
hasUpper := (maxBound & mask) != mask
var nextMinBound int64
if hasLower {
nextMinBound = (minBound + diff) &^ mask
} else {
nextMinBound = minBound &^ mask
}
var nextMaxBound int64
if hasUpper {
nextMaxBound = (maxBound - diff) &^ mask
} else {
nextMaxBound = maxBound &^ mask
}
lowerWrapped := nextMinBound < minBound
upperWrapped := nextMaxBound > maxBound
if shift+precisionStep >= 64 || nextMinBound > nextMaxBound || lowerWrapped || upperWrapped {
// We are in the lowest precision or the next precision is not available.
rv = append(rv, newRange(minBound, maxBound, shift))
// exit the split recursion loop
break
}
if hasLower {
rv = append(rv, newRange(minBound, minBound|mask, shift))
}
if hasUpper {
rv = append(rv, newRange(maxBound&^mask, maxBound, shift))
}
// recurse to next precision
minBound = nextMinBound
maxBound = nextMaxBound
}
return rv
}
func newRange(minBound, maxBound int64, shift uint) *termRange {
maxBound |= (int64(1) << shift) - int64(1)
minBytes := numeric_util.MustNewPrefixCodedInt64(minBound, shift)
maxBytes := numeric_util.MustNewPrefixCodedInt64(maxBound, shift)
return newRangeBytes(minBytes, maxBytes)
}
func newRangeBytes(minBytes, maxBytes []byte) *termRange {
return &termRange{
startTerm: minBytes,
endTerm: maxBytes,
}
}
func (s *NumericRangeSearcher) Min() int {
return 0
}

View file

@ -0,0 +1,55 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/numeric_util"
)
func TestSplitRange(t *testing.T) {
min := numeric_util.Float64ToInt64(1.0)
max := numeric_util.Float64ToInt64(5.0)
ranges := splitInt64Range(min, max, 4)
enumerated := ranges.Enumerate()
if len(enumerated) != 135 {
t.Errorf("expected 135 terms, got %d", len(enumerated))
}
}
func TestIncrementBytes(t *testing.T) {
tests := []struct {
in []byte
out []byte
}{
{
in: []byte{0},
out: []byte{1},
},
{
in: []byte{0, 0},
out: []byte{0, 1},
},
{
in: []byte{0, 255},
out: []byte{1, 0},
},
}
for _, test := range tests {
actual := incrementBytes(test.in)
if !reflect.DeepEqual(actual, test.out) {
t.Errorf("expected %#v, got %#v", test.out, actual)
}
}
}

View file

@ -0,0 +1,197 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"math"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
type PhraseSearcher struct {
initialized bool
indexReader index.IndexReader
mustSearcher *ConjunctionSearcher
queryNorm float64
currMust *search.DocumentMatch
slop int
terms []string
}
func NewPhraseSearcher(indexReader index.IndexReader, mustSearcher *ConjunctionSearcher, terms []string) (*PhraseSearcher, error) {
// build our searcher
rv := PhraseSearcher{
indexReader: indexReader,
mustSearcher: mustSearcher,
terms: terms,
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *PhraseSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
if s.mustSearcher != nil {
sumOfSquaredWeights += s.mustSearcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
if s.mustSearcher != nil {
s.mustSearcher.SetQueryNorm(s.queryNorm)
}
}
func (s *PhraseSearcher) initSearchers() error {
var err error
// get all searchers pointing at their first match
if s.mustSearcher != nil {
s.currMust, err = s.mustSearcher.Next()
if err != nil {
return err
}
}
s.initialized = true
return nil
}
func (s *PhraseSearcher) advanceNextMust() error {
var err error
if s.mustSearcher != nil {
s.currMust, err = s.mustSearcher.Next()
if err != nil {
return err
}
}
return nil
}
func (s *PhraseSearcher) Weight() float64 {
var rv float64
rv += s.mustSearcher.Weight()
return rv
}
func (s *PhraseSearcher) SetQueryNorm(qnorm float64) {
s.mustSearcher.SetQueryNorm(qnorm)
}
func (s *PhraseSearcher) Next() (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers()
if err != nil {
return nil, err
}
}
var rv *search.DocumentMatch
for s.currMust != nil {
rvftlm := make(search.FieldTermLocationMap, 0)
freq := 0
firstTerm := s.terms[0]
for field, termLocMap := range s.currMust.Locations {
rvtlm := make(search.TermLocationMap, 0)
locations, ok := termLocMap[firstTerm]
if ok {
OUTER:
for _, location := range locations {
crvtlm := make(search.TermLocationMap, 0)
INNER:
for i := 0; i < len(s.terms); i++ {
nextTerm := s.terms[i]
if nextTerm != "" {
// look through all these term locations
// to try and find the correct offsets
nextLocations, ok := termLocMap[nextTerm]
if ok {
for _, nextLocation := range nextLocations {
if nextLocation.Pos == location.Pos+float64(i) {
// found a location match for this term
crvtlm.AddLocation(nextTerm, nextLocation)
continue INNER
}
}
// if we got here we didn't find a location match for this term
continue OUTER
} else {
continue OUTER
}
}
}
// if we got here all the terms matched
freq++
search.MergeTermLocationMaps(rvtlm, crvtlm)
rvftlm[field] = rvtlm
}
}
}
if freq > 0 {
// return match
rv = s.currMust
rv.Locations = rvftlm
err := s.advanceNextMust()
if err != nil {
return nil, err
}
return rv, nil
}
err := s.advanceNextMust()
if err != nil {
return nil, err
}
}
return nil, nil
}
func (s *PhraseSearcher) Advance(ID string) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers()
if err != nil {
return nil, err
}
}
var err error
s.currMust, err = s.mustSearcher.Advance(ID)
if err != nil {
return nil, err
}
return s.Next()
}
func (s *PhraseSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
sum += s.mustSearcher.Count()
return sum
}
func (s *PhraseSearcher) Close() error {
if s.mustSearcher != nil {
err := s.mustSearcher.Close()
if err != nil {
return err
}
}
return nil
}
func (s *PhraseSearcher) Min() int {
return 0
}

View file

@ -0,0 +1,93 @@
// Copyright (c) 2013 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"testing"
"github.com/blevesearch/bleve/search"
)
func TestPhraseSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
angstTermSearcher, err := NewTermSearcher(twoDocIndexReader, "angst", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
beerTermSearcher, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{angstTermSearcher, beerTermSearcher}, true)
if err != nil {
t.Fatal(err)
}
phraseSearcher, err := NewPhraseSearcher(twoDocIndexReader, mustSearcher, []string{"angst", "beer"})
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
results []*search.DocumentMatch
}{
{
searcher: phraseSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "2",
Score: 1.0807601687084403,
},
},
},
}
for testIndex, test := range tests {
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
next, err := test.searcher.Next()
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if next.ID != test.results[i].ID {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex)
}
if next.Score != test.results[i].Score {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
next, err = test.searcher.Next()
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}

View file

@ -0,0 +1,108 @@
// Copyright (c) 2015 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"regexp"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
type RegexpSearcher struct {
indexReader index.IndexReader
pattern *regexp.Regexp
field string
explain bool
searcher *DisjunctionSearcher
}
func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp, field string, boost float64, explain bool) (*RegexpSearcher, error) {
prefixTerm, complete := pattern.LiteralPrefix()
candidateTerms := make([]string, 0)
if complete {
// there is no pattern
candidateTerms = append(candidateTerms, prefixTerm)
} else {
var fieldDict index.FieldDict
var err error
if len(prefixTerm) > 0 {
fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
} else {
fieldDict, err = indexReader.FieldDict(field)
}
// enumerate the terms and check against regexp
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
if pattern.MatchString(tfd.Term) {
candidateTerms = append(candidateTerms, tfd.Term)
}
tfd, err = fieldDict.Next()
}
if err != nil {
return nil, err
}
}
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, 0, 25)
for _, cterm := range candidateTerms {
qsearcher, err := NewTermSearcher(indexReader, cterm, field, 1.0, explain)
if err != nil {
return nil, err
}
qsearchers = append(qsearchers, qsearcher)
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
if err != nil {
return nil, err
}
return &RegexpSearcher{
indexReader: indexReader,
pattern: pattern,
field: field,
explain: explain,
searcher: searcher,
}, nil
}
func (s *RegexpSearcher) Count() uint64 {
return s.searcher.Count()
}
func (s *RegexpSearcher) Weight() float64 {
return s.searcher.Weight()
}
func (s *RegexpSearcher) SetQueryNorm(qnorm float64) {
s.searcher.SetQueryNorm(qnorm)
}
func (s *RegexpSearcher) Next() (*search.DocumentMatch, error) {
return s.searcher.Next()
}
func (s *RegexpSearcher) Advance(ID string) (*search.DocumentMatch, error) {
return s.searcher.Next()
}
func (s *RegexpSearcher) Close() error {
return s.searcher.Close()
}
func (s *RegexpSearcher) Min() int {
return 0
}

View file

@ -0,0 +1,110 @@
// Copyright (c) 2015 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"regexp"
"testing"
"github.com/blevesearch/bleve/search"
)
func TestRegexpSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
pattern, err := regexp.Compile("ma.*")
if err != nil {
t.Fatal(err)
}
regexpSearcher, err := NewRegexpSearcher(twoDocIndexReader, pattern, "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
patternCo, err := regexp.Compile("co.*")
if err != nil {
t.Fatal(err)
}
regexpSearcherCo, err := NewRegexpSearcher(twoDocIndexReader, patternCo, "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
results []*search.DocumentMatch
}{
{
searcher: regexpSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 1.916290731874155,
},
},
},
{
searcher: regexpSearcherCo,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "2",
Score: 0.33875554280828685,
},
&search.DocumentMatch{
ID: "3",
Score: 0.33875554280828685,
},
},
},
}
for testIndex, test := range tests {
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
next, err := test.searcher.Next()
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if next.ID != test.results[i].ID {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex)
}
if next.Score != test.results[i].Score {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
next, err = test.searcher.Next()
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}

View file

@ -0,0 +1,95 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorers"
)
type TermSearcher struct {
indexReader index.IndexReader
term string
field string
explain bool
reader index.TermFieldReader
scorer *scorers.TermQueryScorer
}
func NewTermSearcher(indexReader index.IndexReader, term string, field string, boost float64, explain bool) (*TermSearcher, error) {
reader, err := indexReader.TermFieldReader([]byte(term), field)
if err != nil {
return nil, err
}
scorer := scorers.NewTermQueryScorer(term, field, boost, indexReader.DocCount(), reader.Count(), explain)
return &TermSearcher{
indexReader: indexReader,
term: term,
field: field,
explain: explain,
reader: reader,
scorer: scorer,
}, nil
}
func (s *TermSearcher) Count() uint64 {
return s.reader.Count()
}
func (s *TermSearcher) Weight() float64 {
return s.scorer.Weight()
}
func (s *TermSearcher) SetQueryNorm(qnorm float64) {
s.scorer.SetQueryNorm(qnorm)
}
func (s *TermSearcher) Next() (*search.DocumentMatch, error) {
termMatch, err := s.reader.Next()
if err != nil {
return nil, err
}
if termMatch == nil {
return nil, nil
}
// score match
docMatch := s.scorer.Score(termMatch)
// return doc match
return docMatch, nil
}
func (s *TermSearcher) Advance(ID string) (*search.DocumentMatch, error) {
termMatch, err := s.reader.Advance(ID)
if err != nil {
return nil, err
}
if termMatch == nil {
return nil, nil
}
// score match
docMatch := s.scorer.Score(termMatch)
// return doc match
return docMatch, nil
}
func (s *TermSearcher) Close() error {
return s.reader.Close()
}
func (s *TermSearcher) Min() int {
return 0
}

View file

@ -0,0 +1,81 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
type TermPrefixSearcher struct {
indexReader index.IndexReader
prefix string
field string
explain bool
searcher *DisjunctionSearcher
}
func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string, field string, boost float64, explain bool) (*TermPrefixSearcher, error) {
// find the terms with this prefix
fieldDict, err := indexReader.FieldDictPrefix(field, []byte(prefix))
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, 0, 25)
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
qsearcher, err := NewTermSearcher(indexReader, string(tfd.Term), field, 1.0, explain)
if err != nil {
return nil, err
}
qsearchers = append(qsearchers, qsearcher)
tfd, err = fieldDict.Next()
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
if err != nil {
return nil, err
}
return &TermPrefixSearcher{
indexReader: indexReader,
prefix: prefix,
field: field,
explain: explain,
searcher: searcher,
}, nil
}
func (s *TermPrefixSearcher) Count() uint64 {
return s.searcher.Count()
}
func (s *TermPrefixSearcher) Weight() float64 {
return s.searcher.Weight()
}
func (s *TermPrefixSearcher) SetQueryNorm(qnorm float64) {
s.searcher.SetQueryNorm(qnorm)
}
func (s *TermPrefixSearcher) Next() (*search.DocumentMatch, error) {
return s.searcher.Next()
}
func (s *TermPrefixSearcher) Advance(ID string) (*search.DocumentMatch, error) {
return s.searcher.Next()
}
func (s *TermPrefixSearcher) Close() error {
return s.searcher.Close()
}
func (s *TermPrefixSearcher) Min() int {
return 0
}

View file

@ -0,0 +1,195 @@
// Copyright (c) 2013 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"math"
"testing"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index/store/inmem"
"github.com/blevesearch/bleve/index/upside_down"
)
func TestTermSearcher(t *testing.T) {
var queryTerm = "beer"
var queryField = "desc"
var queryBoost = 3.0
var queryExplain = true
inMemStore, _ := inmem.New()
analysisQueue := upside_down.NewAnalysisQueue(1)
i := upside_down.NewUpsideDownCouch(inMemStore, analysisQueue)
err := i.Open()
if err != nil {
t.Fatal(err)
}
err = i.Update(&document.Document{
ID: "a",
Fields: []document.Field{
document.NewTextField("desc", []uint64{}, []byte("beer")),
},
})
if err != nil {
t.Fatal(err)
}
err = i.Update(&document.Document{
ID: "b",
Fields: []document.Field{
document.NewTextField("desc", []uint64{}, []byte("beer")),
},
})
if err != nil {
t.Fatal(err)
}
err = i.Update(&document.Document{
ID: "c",
Fields: []document.Field{
document.NewTextField("desc", []uint64{}, []byte("beer")),
},
})
if err != nil {
t.Fatal(err)
}
err = i.Update(&document.Document{
ID: "d",
Fields: []document.Field{
document.NewTextField("desc", []uint64{}, []byte("beer")),
},
})
if err != nil {
t.Fatal(err)
}
err = i.Update(&document.Document{
ID: "e",
Fields: []document.Field{
document.NewTextField("desc", []uint64{}, []byte("beer")),
},
})
if err != nil {
t.Fatal(err)
}
err = i.Update(&document.Document{
ID: "f",
Fields: []document.Field{
document.NewTextField("desc", []uint64{}, []byte("beer")),
},
})
if err != nil {
t.Fatal(err)
}
err = i.Update(&document.Document{
ID: "g",
Fields: []document.Field{
document.NewTextField("desc", []uint64{}, []byte("beer")),
},
})
if err != nil {
t.Fatal(err)
}
err = i.Update(&document.Document{
ID: "h",
Fields: []document.Field{
document.NewTextField("desc", []uint64{}, []byte("beer")),
},
})
if err != nil {
t.Fatal(err)
}
err = i.Update(&document.Document{
ID: "i",
Fields: []document.Field{
document.NewTextField("desc", []uint64{}, []byte("beer")),
},
})
if err != nil {
t.Fatal(err)
}
err = i.Update(&document.Document{
ID: "j",
Fields: []document.Field{
document.NewTextField("title", []uint64{}, []byte("cat")),
},
})
if err != nil {
t.Fatal(err)
}
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
searcher, err := NewTermSearcher(indexReader, queryTerm, queryField, queryBoost, queryExplain)
if err != nil {
t.Fatal(err)
}
defer func() {
err := searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
searcher.SetQueryNorm(2.0)
docCount, err := i.DocCount()
if err != nil {
t.Fatal(err)
}
idf := 1.0 + math.Log(float64(docCount)/float64(searcher.Count()+1.0))
expectedQueryWeight := 3 * idf * 3 * idf
if expectedQueryWeight != searcher.Weight() {
t.Errorf("expected weight %v got %v", expectedQueryWeight, searcher.Weight())
}
if searcher.Count() != 9 {
t.Errorf("expected count of 9, got %d", searcher.Count())
}
docMatch, err := searcher.Next()
if err != nil {
t.Errorf("expected result, got %v", err)
}
if docMatch.ID != "a" {
t.Errorf("expected result ID to be 'a', got '%s", docMatch.ID)
}
docMatch, err = searcher.Advance("c")
if err != nil {
t.Errorf("expected result, got %v", err)
}
if docMatch.ID != "c" {
t.Errorf("expected result ID to be 'c' got '%s'", docMatch.ID)
}
// try advancing past end
docMatch, err = searcher.Advance("z")
if err != nil {
t.Fatal(err)
}
if docMatch != nil {
t.Errorf("expected nil, got %v", docMatch)
}
// try pushing next past end
docMatch, err = searcher.Next()
if err != nil {
t.Fatal(err)
}
if docMatch != nil {
t.Errorf("expected nil, got %v", docMatch)
}
}