Upgrade server dependencies, manage them with govendor

This commit is contained in:
Ken-Håvard Lieng 2017-04-18 03:02:51 +02:00
parent ebee2746d6
commit 971278e7e5
1748 changed files with 196165 additions and 194500 deletions

View file

@ -1,20 +1,29 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
import (
"time"
"github.com/blevesearch/bleve/index"
"golang.org/x/net/context"
)
type Collector interface {
Collect(searcher Searcher) error
Collect(ctx context.Context, searcher Searcher, reader index.IndexReader) error
Results() DocumentMatchCollection
Total() uint64
MaxScore() float64

View file

@ -0,0 +1,91 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"container/heap"
"github.com/blevesearch/bleve/search"
)
type collectStoreHeap struct {
heap search.DocumentMatchCollection
compare collectorCompare
}
func newStoreHeap(cap int, compare collectorCompare) *collectStoreHeap {
rv := &collectStoreHeap{
heap: make(search.DocumentMatchCollection, 0, cap),
compare: compare,
}
heap.Init(rv)
return rv
}
func (c *collectStoreHeap) Add(doc *search.DocumentMatch) {
heap.Push(c, doc)
}
func (c *collectStoreHeap) RemoveLast() *search.DocumentMatch {
return heap.Pop(c).(*search.DocumentMatch)
}
func (c *collectStoreHeap) Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error) {
count := c.Len()
size := count - skip
if size <= 0 {
return make(search.DocumentMatchCollection, 0), nil
}
rv := make(search.DocumentMatchCollection, size)
for count > 0 {
count--
if count >= skip {
size--
doc := heap.Pop(c).(*search.DocumentMatch)
rv[size] = doc
err := fixup(doc)
if err != nil {
return nil, err
}
}
}
return rv, nil
}
// heap interface implementation
func (c *collectStoreHeap) Len() int {
return len(c.heap)
}
func (c *collectStoreHeap) Less(i, j int) bool {
so := c.compare(c.heap[i], c.heap[j])
return -so < 0
}
func (c *collectStoreHeap) Swap(i, j int) {
c.heap[i], c.heap[j] = c.heap[j], c.heap[i]
}
func (c *collectStoreHeap) Push(x interface{}) {
c.heap = append(c.heap, x.(*search.DocumentMatch))
}
func (c *collectStoreHeap) Pop() interface{} {
var rv *search.DocumentMatch
rv, c.heap = c.heap[len(c.heap)-1], c.heap[:len(c.heap)-1]
return rv
}

View file

@ -0,0 +1,78 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"container/list"
"github.com/blevesearch/bleve/search"
)
type collectStoreList struct {
results *list.List
compare collectorCompare
}
func newStoreList(cap int, compare collectorCompare) *collectStoreList {
rv := &collectStoreList{
results: list.New(),
compare: compare,
}
return rv
}
func (c *collectStoreList) Add(doc *search.DocumentMatch) {
for e := c.results.Front(); e != nil; e = e.Next() {
curr := e.Value.(*search.DocumentMatch)
if c.compare(doc, curr) >= 0 {
c.results.InsertBefore(doc, e)
return
}
}
// if we got to the end, we still have to add it
c.results.PushBack(doc)
}
func (c *collectStoreList) RemoveLast() *search.DocumentMatch {
return c.results.Remove(c.results.Front()).(*search.DocumentMatch)
}
func (c *collectStoreList) Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error) {
if c.results.Len()-skip > 0 {
rv := make(search.DocumentMatchCollection, c.results.Len()-skip)
i := 0
skipped := 0
for e := c.results.Back(); e != nil; e = e.Prev() {
if skipped < skip {
skipped++
continue
}
rv[i] = e.Value.(*search.DocumentMatch)
err := fixup(rv[i])
if err != nil {
return nil, err
}
i++
}
return rv, nil
}
return search.DocumentMatchCollection{}, nil
}
func (c *collectStoreList) Len() int {
return c.results.Len()
}

View file

@ -0,0 +1,68 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import "github.com/blevesearch/bleve/search"
type collectStoreSlice struct {
slice search.DocumentMatchCollection
compare collectorCompare
}
func newStoreSlice(cap int, compare collectorCompare) *collectStoreSlice {
rv := &collectStoreSlice{
slice: make(search.DocumentMatchCollection, 0, cap),
compare: compare,
}
return rv
}
func (c *collectStoreSlice) Add(doc *search.DocumentMatch) {
// find where to insert, starting at end (lowest)
i := len(c.slice)
for ; i > 0; i-- {
cmp := c.compare(doc, c.slice[i-1])
if cmp >= 0 {
break
}
}
// insert at i
c.slice = append(c.slice, nil)
copy(c.slice[i+1:], c.slice[i:])
c.slice[i] = doc
}
func (c *collectStoreSlice) RemoveLast() *search.DocumentMatch {
var rv *search.DocumentMatch
rv, c.slice = c.slice[len(c.slice)-1], c.slice[:len(c.slice)-1]
return rv
}
func (c *collectStoreSlice) Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error) {
for i := skip; i < len(c.slice); i++ {
err := fixup(c.slice[i])
if err != nil {
return nil, err
}
}
if skip <= len(c.slice) {
return c.slice[skip:], nil
}
return search.DocumentMatchCollection{}, nil
}
func (c *collectStoreSlice) Len() int {
return len(c.slice)
}

View file

@ -0,0 +1,278 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"time"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"golang.org/x/net/context"
)
// PreAllocSizeSkipCap will cap preallocation to this amount when
// size+skip exceeds this value
var PreAllocSizeSkipCap = 1000
type collectorCompare func(i, j *search.DocumentMatch) int
type collectorFixup func(d *search.DocumentMatch) error
// TopNCollector collects the top N hits, optionally skipping some results
type TopNCollector struct {
size int
skip int
total uint64
maxScore float64
took time.Duration
sort search.SortOrder
results search.DocumentMatchCollection
facetsBuilder *search.FacetsBuilder
store *collectStoreHeap
needDocIds bool
neededFields []string
cachedScoring []bool
cachedDesc []bool
lowestMatchOutsideResults *search.DocumentMatch
}
// CheckDoneEvery controls how frequently we check the context deadline
const CheckDoneEvery = uint64(1024)
// NewTopNCollector builds a collector to find the top 'size' hits
// skipping over the first 'skip' hits
// ordering hits by the provided sort order
func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
hc := &TopNCollector{size: size, skip: skip, sort: sort}
// pre-allocate space on the store to avoid reslicing
// unless the size + skip is too large, then cap it
// everything should still work, just reslices as necessary
backingSize := size + skip + 1
if size+skip > PreAllocSizeSkipCap {
backingSize = PreAllocSizeSkipCap + 1
}
hc.store = newStoreHeap(backingSize, func(i, j *search.DocumentMatch) int {
return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j)
})
// these lookups traverse an interface, so do once up-front
if sort.RequiresDocID() {
hc.needDocIds = true
}
hc.neededFields = sort.RequiredFields()
hc.cachedScoring = sort.CacheIsScore()
hc.cachedDesc = sort.CacheDescending()
return hc
}
// Collect goes to the index to find the matching documents
func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error {
startTime := time.Now()
var err error
var next *search.DocumentMatch
// pre-allocate enough space in the DocumentMatchPool
// unless the size + skip is too large, then cap it
// everything should still work, just allocates DocumentMatches on demand
backingSize := hc.size + hc.skip + 1
if hc.size+hc.skip > PreAllocSizeSkipCap {
backingSize = PreAllocSizeSkipCap + 1
}
searchContext := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)),
}
select {
case <-ctx.Done():
return ctx.Err()
default:
next, err = searcher.Next(searchContext)
}
for err == nil && next != nil {
if hc.total%CheckDoneEvery == 0 {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
}
err = hc.collectSingle(searchContext, reader, next)
if err != nil {
break
}
next, err = searcher.Next(searchContext)
}
// compute search duration
hc.took = time.Since(startTime)
if err != nil {
return err
}
// finalize actual results
err = hc.finalizeResults(reader)
if err != nil {
return err
}
return nil
}
var sortByScoreOpt = []string{"_score"}
func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.IndexReader, d *search.DocumentMatch) error {
var err error
// visit field terms for features that require it (sort, facets)
if len(hc.neededFields) > 0 {
err = hc.visitFieldTerms(reader, d)
if err != nil {
return err
}
}
// increment total hits
hc.total++
d.HitNumber = hc.total
// update max score
if d.Score > hc.maxScore {
hc.maxScore = d.Score
}
// see if we need to load ID (at this early stage, for example to sort on it)
if hc.needDocIds {
d.ID, err = reader.ExternalID(d.IndexInternalID)
if err != nil {
return err
}
}
// compute this hits sort value
if len(hc.sort) == 1 && hc.cachedScoring[0] {
d.Sort = sortByScoreOpt
} else {
hc.sort.Value(d)
}
// optimization, we track lowest sorting hit already removed from heap
// with this one comparison, we can avoid all heap operations if
// this hit would have been added and then immediately removed
if hc.lowestMatchOutsideResults != nil {
cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.lowestMatchOutsideResults)
if cmp >= 0 {
// this hit can't possibly be in the result set, so avoid heap ops
ctx.DocumentMatchPool.Put(d)
return nil
}
}
hc.store.Add(d)
if hc.store.Len() > hc.size+hc.skip {
removed := hc.store.RemoveLast()
if hc.lowestMatchOutsideResults == nil {
hc.lowestMatchOutsideResults = removed
} else {
cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, removed, hc.lowestMatchOutsideResults)
if cmp < 0 {
tmp := hc.lowestMatchOutsideResults
hc.lowestMatchOutsideResults = removed
ctx.DocumentMatchPool.Put(tmp)
}
}
}
return nil
}
// visitFieldTerms is responsible for visiting the field terms of the
// search hit, and passing visited terms to the sort and facet builder
func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.DocumentMatch) error {
if hc.facetsBuilder != nil {
hc.facetsBuilder.StartDoc()
}
err := reader.DocumentVisitFieldTerms(d.IndexInternalID, hc.neededFields, func(field string, term []byte) {
if hc.facetsBuilder != nil {
hc.facetsBuilder.UpdateVisitor(field, term)
}
hc.sort.UpdateVisitor(field, term)
})
if hc.facetsBuilder != nil {
hc.facetsBuilder.EndDoc()
}
return err
}
// SetFacetsBuilder registers a facet builder for this collector
func (hc *TopNCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
hc.facetsBuilder = facetsBuilder
hc.neededFields = append(hc.neededFields, hc.facetsBuilder.RequiredFields()...)
}
// finalizeResults starts with the heap containing the final top size+skip
// it now throws away the results to be skipped
// and does final doc id lookup (if necessary)
func (hc *TopNCollector) finalizeResults(r index.IndexReader) error {
var err error
hc.results, err = hc.store.Final(hc.skip, func(doc *search.DocumentMatch) error {
if doc.ID == "" {
// look up the id since we need it for lookup
var err error
doc.ID, err = r.ExternalID(doc.IndexInternalID)
if err != nil {
return err
}
}
return nil
})
return err
}
// Results returns the collected hits
func (hc *TopNCollector) Results() search.DocumentMatchCollection {
return hc.results
}
// Total returns the total number of hits
func (hc *TopNCollector) Total() uint64 {
return hc.total
}
// MaxScore returns the maximum score seen across all the hits
func (hc *TopNCollector) MaxScore() float64 {
return hc.maxScore
}
// Took returns the time spent collecting hits
func (hc *TopNCollector) Took() time.Duration {
return hc.took
}
// FacetResults returns the computed facets results
func (hc *TopNCollector) FacetResults() search.FacetResults {
if hc.facetsBuilder != nil {
return hc.facetsBuilder.Results()
}
return search.FacetResults{}
}

View file

@ -1,135 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package collectors
import (
"container/list"
"time"
"github.com/blevesearch/bleve/search"
)
type TopScoreCollector struct {
k int
skip int
results *list.List
took time.Duration
maxScore float64
total uint64
facetsBuilder *search.FacetsBuilder
}
func NewTopScorerCollector(k int) *TopScoreCollector {
return &TopScoreCollector{
k: k,
skip: 0,
results: list.New(),
}
}
func NewTopScorerSkipCollector(k, skip int) *TopScoreCollector {
return &TopScoreCollector{
k: k,
skip: skip,
results: list.New(),
}
}
func (tksc *TopScoreCollector) Total() uint64 {
return tksc.total
}
func (tksc *TopScoreCollector) MaxScore() float64 {
return tksc.maxScore
}
func (tksc *TopScoreCollector) Took() time.Duration {
return tksc.took
}
func (tksc *TopScoreCollector) Collect(searcher search.Searcher) error {
startTime := time.Now()
next, err := searcher.Next()
for err == nil && next != nil {
tksc.collectSingle(next)
if tksc.facetsBuilder != nil {
err = tksc.facetsBuilder.Update(next)
if err != nil {
break
}
}
next, err = searcher.Next()
}
// compute search duration
tksc.took = time.Since(startTime)
if err != nil {
return err
}
return nil
}
func (tksc *TopScoreCollector) collectSingle(dm *search.DocumentMatch) {
// increment total hits
tksc.total++
// update max score
if dm.Score > tksc.maxScore {
tksc.maxScore = dm.Score
}
for e := tksc.results.Front(); e != nil; e = e.Next() {
curr := e.Value.(*search.DocumentMatch)
if dm.Score < curr.Score {
tksc.results.InsertBefore(dm, e)
// if we just made the list too long
if tksc.results.Len() > (tksc.k + tksc.skip) {
// remove the head
tksc.results.Remove(tksc.results.Front())
}
return
}
}
// if we got to the end, we still have to add it
tksc.results.PushBack(dm)
if tksc.results.Len() > (tksc.k + tksc.skip) {
// remove the head
tksc.results.Remove(tksc.results.Front())
}
}
func (tksc *TopScoreCollector) Results() search.DocumentMatchCollection {
if tksc.results.Len()-tksc.skip > 0 {
rv := make(search.DocumentMatchCollection, tksc.results.Len()-tksc.skip)
i := 0
skipped := 0
for e := tksc.results.Back(); e != nil; e = e.Prev() {
if skipped < tksc.skip {
skipped++
continue
}
rv[i] = e.Value.(*search.DocumentMatch)
i++
}
return rv
}
return search.DocumentMatchCollection{}
}
func (tksc *TopScoreCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
tksc.facetsBuilder = facetsBuilder
}
func (tksc *TopScoreCollector) FacetResults() search.FacetResults {
if tksc.facetsBuilder != nil {
return tksc.facetsBuilder.Results()
}
return search.FacetResults{}
}

View file

@ -1,249 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package collectors
import (
"math/rand"
"strconv"
"testing"
"github.com/blevesearch/bleve/search"
)
func TestTop10Scores(t *testing.T) {
// a stub search with more than 10 matches
// the top-10 scores are > 10
// everything else is less than 10
searcher := &stubSearcher{
matches: search.DocumentMatchCollection{
&search.DocumentMatch{
ID: "a",
Score: 11,
},
&search.DocumentMatch{
ID: "b",
Score: 9,
},
&search.DocumentMatch{
ID: "c",
Score: 11,
},
&search.DocumentMatch{
ID: "d",
Score: 9,
},
&search.DocumentMatch{
ID: "e",
Score: 11,
},
&search.DocumentMatch{
ID: "f",
Score: 9,
},
&search.DocumentMatch{
ID: "g",
Score: 11,
},
&search.DocumentMatch{
ID: "h",
Score: 9,
},
&search.DocumentMatch{
ID: "i",
Score: 11,
},
&search.DocumentMatch{
ID: "j",
Score: 11,
},
&search.DocumentMatch{
ID: "k",
Score: 11,
},
&search.DocumentMatch{
ID: "l",
Score: 99,
},
&search.DocumentMatch{
ID: "m",
Score: 11,
},
&search.DocumentMatch{
ID: "n",
Score: 11,
},
},
}
collector := NewTopScorerCollector(10)
err := collector.Collect(searcher)
if err != nil {
t.Fatal(err)
}
maxScore := collector.MaxScore()
if maxScore != 99.0 {
t.Errorf("expected max score 99.0, got %f", maxScore)
}
total := collector.Total()
if total != 14 {
t.Errorf("expected 14 total results, got %d", total)
}
results := collector.Results()
if len(results) != 10 {
t.Fatalf("expected 10 results, got %d", len(results))
}
if results[0].ID != "l" {
t.Errorf("expected first result to have ID 'l', got %s", results[0].ID)
}
if results[0].Score != 99.0 {
t.Errorf("expected highest score to be 99.0, got %f", results[0].Score)
}
minScore := 1000.0
for _, result := range results {
if result.Score < minScore {
minScore = result.Score
}
}
if minScore < 10 {
t.Errorf("expected minimum score to be higher than 10, got %f", minScore)
}
}
func TestTop10ScoresSkip10(t *testing.T) {
// a stub search with more than 10 matches
// the top-10 scores are > 10
// everything else is less than 10
searcher := &stubSearcher{
matches: search.DocumentMatchCollection{
&search.DocumentMatch{
ID: "a",
Score: 11,
},
&search.DocumentMatch{
ID: "b",
Score: 9.5,
},
&search.DocumentMatch{
ID: "c",
Score: 11,
},
&search.DocumentMatch{
ID: "d",
Score: 9,
},
&search.DocumentMatch{
ID: "e",
Score: 11,
},
&search.DocumentMatch{
ID: "f",
Score: 9,
},
&search.DocumentMatch{
ID: "g",
Score: 11,
},
&search.DocumentMatch{
ID: "h",
Score: 9,
},
&search.DocumentMatch{
ID: "i",
Score: 11,
},
&search.DocumentMatch{
ID: "j",
Score: 11,
},
&search.DocumentMatch{
ID: "k",
Score: 11,
},
&search.DocumentMatch{
ID: "l",
Score: 99,
},
&search.DocumentMatch{
ID: "m",
Score: 11,
},
&search.DocumentMatch{
ID: "n",
Score: 11,
},
},
}
collector := NewTopScorerSkipCollector(10, 10)
err := collector.Collect(searcher)
if err != nil {
t.Fatal(err)
}
maxScore := collector.MaxScore()
if maxScore != 99.0 {
t.Errorf("expected max score 99.0, got %f", maxScore)
}
total := collector.Total()
if total != 14 {
t.Errorf("expected 14 total results, got %d", total)
}
results := collector.Results()
if len(results) != 4 {
t.Fatalf("expected 4 results, got %d", len(results))
}
if results[0].ID != "b" {
t.Errorf("expected first result to have ID 'b', got %s", results[0].ID)
}
if results[0].Score != 9.5 {
t.Errorf("expected highest score to be 9.5ß, got %f", results[0].Score)
}
}
func BenchmarkTop10of100000Scores(b *testing.B) {
matches := make(search.DocumentMatchCollection, 0, 100000)
for i := 0; i < 100000; i++ {
matches = append(matches, &search.DocumentMatch{
ID: strconv.Itoa(i),
Score: rand.Float64(),
})
}
searcher := &stubSearcher{
matches: matches,
}
collector := NewTopScorerCollector(10)
b.ResetTimer()
err := collector.Collect(searcher)
if err != nil {
b.Fatal(err)
}
res := collector.Results()
for _, dm := range res {
b.Logf("%s - %f\n", dm.ID, dm.Score)
}
}

View file

@ -1,60 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package collectors
import (
"github.com/blevesearch/bleve/search"
)
type stubSearcher struct {
index int
matches search.DocumentMatchCollection
}
func (ss *stubSearcher) Next() (*search.DocumentMatch, error) {
if ss.index < len(ss.matches) {
rv := ss.matches[ss.index]
ss.index++
return rv, nil
}
return nil, nil
}
func (ss *stubSearcher) Advance(ID string) (*search.DocumentMatch, error) {
for ss.index < len(ss.matches) && ss.matches[ss.index].ID < ID {
ss.index++
}
if ss.index < len(ss.matches) {
rv := ss.matches[ss.index]
ss.index++
return rv, nil
}
return nil, nil
}
func (ss *stubSearcher) Close() error {
return nil
}
func (ss *stubSearcher) Weight() float64 {
return 0.0
}
func (ss *stubSearcher) SetQueryNorm(float64) {
}
func (ss *stubSearcher) Count() uint64 {
return uint64(len(ss.matches))
}
func (ss *stubSearcher) Min() int {
return 0
}

View file

@ -1,11 +1,16 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search

View file

@ -0,0 +1,134 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package facet
import (
"sort"
"time"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
)
type dateTimeRange struct {
start time.Time
end time.Time
}
type DateTimeFacetBuilder struct {
size int
field string
termsCount map[string]int
total int
missing int
ranges map[string]*dateTimeRange
sawValue bool
}
func NewDateTimeFacetBuilder(field string, size int) *DateTimeFacetBuilder {
return &DateTimeFacetBuilder{
size: size,
field: field,
termsCount: make(map[string]int),
ranges: make(map[string]*dateTimeRange, 0),
}
}
func (fb *DateTimeFacetBuilder) AddRange(name string, start, end time.Time) {
r := dateTimeRange{
start: start,
end: end,
}
fb.ranges[name] = &r
}
func (fb *DateTimeFacetBuilder) Field() string {
return fb.field
}
func (fb *DateTimeFacetBuilder) UpdateVisitor(field string, term []byte) {
if field == fb.field {
fb.sawValue = true
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
t := time.Unix(0, i64)
// look at each of the ranges for a match
for rangeName, r := range fb.ranges {
if (r.start.IsZero() || t.After(r.start) || t.Equal(r.start)) && (r.end.IsZero() || t.Before(r.end)) {
fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
fb.total++
}
}
}
}
}
}
func (fb *DateTimeFacetBuilder) StartDoc() {
fb.sawValue = false
}
func (fb *DateTimeFacetBuilder) EndDoc() {
if !fb.sawValue {
fb.missing++
}
}
func (fb *DateTimeFacetBuilder) Result() *search.FacetResult {
rv := search.FacetResult{
Field: fb.field,
Total: fb.total,
Missing: fb.missing,
}
rv.DateRanges = make([]*search.DateRangeFacet, 0, len(fb.termsCount))
for term, count := range fb.termsCount {
dateRange := fb.ranges[term]
tf := &search.DateRangeFacet{
Name: term,
Count: count,
}
if !dateRange.start.IsZero() {
start := dateRange.start.Format(time.RFC3339Nano)
tf.Start = &start
}
if !dateRange.end.IsZero() {
end := dateRange.end.Format(time.RFC3339Nano)
tf.End = &end
}
rv.DateRanges = append(rv.DateRanges, tf)
}
sort.Sort(rv.DateRanges)
// we now have the list of the top N facets
if fb.size < len(rv.DateRanges) {
rv.DateRanges = rv.DateRanges[:fb.size]
}
notOther := 0
for _, nr := range rv.DateRanges {
notOther += nr.Count
}
rv.Other = fb.total - notOther
return &rv
}

View file

@ -1,19 +1,23 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package facets
package facet
import (
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric_util"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
)
@ -29,6 +33,7 @@ type NumericFacetBuilder struct {
total int
missing int
ranges map[string]*numericRange
sawValue bool
}
func NewNumericFacetBuilder(field string, size int) *NumericFacetBuilder {
@ -48,36 +53,39 @@ func (fb *NumericFacetBuilder) AddRange(name string, min, max *float64) {
fb.ranges[name] = &r
}
func (fb *NumericFacetBuilder) Update(ft index.FieldTerms) {
terms, ok := ft[fb.field]
if ok {
for _, term := range terms {
// only consider the values which are shifted 0
prefixCoded := numeric_util.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
f64 := numeric_util.Int64ToFloat64(i64)
func (fb *NumericFacetBuilder) Field() string {
return fb.field
}
// look at each of the ranges for a match
for rangeName, r := range fb.ranges {
func (fb *NumericFacetBuilder) UpdateVisitor(field string, term []byte) {
if field == fb.field {
fb.sawValue = true
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
f64 := numeric.Int64ToFloat64(i64)
if (r.min == nil || f64 >= *r.min) && (r.max == nil || f64 < *r.max) {
existingCount, existed := fb.termsCount[rangeName]
if existed {
fb.termsCount[rangeName] = existingCount + 1
} else {
fb.termsCount[rangeName] = 1
}
fb.total++
}
// look at each of the ranges for a match
for rangeName, r := range fb.ranges {
if (r.min == nil || f64 >= *r.min) && (r.max == nil || f64 < *r.max) {
fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
fb.total++
}
}
}
}
} else {
}
}
func (fb *NumericFacetBuilder) StartDoc() {
fb.sawValue = false
}
func (fb *NumericFacetBuilder) EndDoc() {
if !fb.sawValue {
fb.missing++
}
}

View file

@ -1,18 +1,22 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package facets
package facet
import (
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
@ -22,6 +26,7 @@ type TermsFacetBuilder struct {
termsCount map[string]int
total int
missing int
sawValue bool
}
func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder {
@ -32,19 +37,24 @@ func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder {
}
}
func (fb *TermsFacetBuilder) Update(ft index.FieldTerms) {
terms, ok := ft[fb.field]
if ok {
for _, term := range terms {
existingCount, existed := fb.termsCount[term]
if existed {
fb.termsCount[term] = existingCount + 1
} else {
fb.termsCount[term] = 1
}
fb.total++
}
} else {
func (fb *TermsFacetBuilder) Field() string {
return fb.field
}
func (fb *TermsFacetBuilder) UpdateVisitor(field string, term []byte) {
if field == fb.field {
fb.sawValue = true
fb.termsCount[string(term)] = fb.termsCount[string(term)] + 1
fb.total++
}
}
func (fb *TermsFacetBuilder) StartDoc() {
fb.sawValue = false
}
func (fb *TermsFacetBuilder) EndDoc() {
if !fb.sawValue {
fb.missing++
}
}

View file

@ -1,147 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package facets
import (
"container/list"
"time"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric_util"
"github.com/blevesearch/bleve/search"
)
type dateTimeRange struct {
start time.Time
end time.Time
}
type DateTimeFacetBuilder struct {
size int
field string
termsCount map[string]int
total int
missing int
ranges map[string]*dateTimeRange
}
func NewDateTimeFacetBuilder(field string, size int) *DateTimeFacetBuilder {
return &DateTimeFacetBuilder{
size: size,
field: field,
termsCount: make(map[string]int),
ranges: make(map[string]*dateTimeRange, 0),
}
}
func (fb *DateTimeFacetBuilder) AddRange(name string, start, end time.Time) {
r := dateTimeRange{
start: start,
end: end,
}
fb.ranges[name] = &r
}
func (fb *DateTimeFacetBuilder) Update(ft index.FieldTerms) {
terms, ok := ft[fb.field]
if ok {
for _, term := range terms {
// only consider the values which are shifted 0
prefixCoded := numeric_util.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
t := time.Unix(0, i64)
// look at each of the ranges for a match
for rangeName, r := range fb.ranges {
if (r.start.IsZero() || t.After(r.start) || t.Equal(r.start)) && (r.end.IsZero() || t.Before(r.end)) {
existingCount, existed := fb.termsCount[rangeName]
if existed {
fb.termsCount[rangeName] = existingCount + 1
} else {
fb.termsCount[rangeName] = 1
}
fb.total++
}
}
}
}
}
} else {
fb.missing++
}
}
func (fb *DateTimeFacetBuilder) Result() *search.FacetResult {
rv := search.FacetResult{
Field: fb.field,
Total: fb.total,
Missing: fb.missing,
}
// FIXME better implementation needed here this is quick and dirty
topN := list.New()
// walk entries and find top N
OUTER:
for term, count := range fb.termsCount {
dateRange := fb.ranges[term]
tf := &search.DateRangeFacet{
Name: term,
Count: count,
}
if !dateRange.start.IsZero() {
start := dateRange.start.Format(time.RFC3339Nano)
tf.Start = &start
}
if !dateRange.end.IsZero() {
end := dateRange.end.Format(time.RFC3339Nano)
tf.End = &end
}
for e := topN.Front(); e != nil; e = e.Next() {
curr := e.Value.(*search.DateRangeFacet)
if tf.Count < curr.Count {
topN.InsertBefore(tf, e)
// if we just made the list too long
if topN.Len() > fb.size {
// remove the head
topN.Remove(topN.Front())
}
continue OUTER
}
}
// if we got to the end, we still have to add it
topN.PushBack(tf)
if topN.Len() > fb.size {
// remove the head
topN.Remove(topN.Front())
}
}
// we now have the list of the top N facets
rv.DateRanges = make([]*search.DateRangeFacet, topN.Len())
i := 0
notOther := 0
for e := topN.Back(); e != nil; e = e.Prev() {
rv.DateRanges[i] = e.Value.(*search.DateRangeFacet)
i++
notOther += e.Value.(*search.DateRangeFacet).Count
}
rv.Other = fb.total - notOther
return &rv
}

View file

@ -1,49 +0,0 @@
package facets
import (
"strconv"
"testing"
"github.com/blevesearch/bleve/index"
nu "github.com/blevesearch/bleve/numeric_util"
)
var pcodedvalues []nu.PrefixCoded
func init() {
pcodedvalues = []nu.PrefixCoded{nu.PrefixCoded{0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1}, nu.PrefixCoded{0x20, 0x0, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f}, nu.PrefixCoded{0x20, 0x0, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7a, 0x1d, 0xa}, nu.PrefixCoded{0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x1, 0x16, 0x9, 0x4a, 0x7b}}
}
func BenchmarkNumericFacet10(b *testing.B) {
numericFacetN(b, 10)
}
func BenchmarkNumericFacet100(b *testing.B) {
numericFacetN(b, 100)
}
func BenchmarkNumericFacet1000(b *testing.B) {
numericFacetN(b, 1000)
}
func numericFacetN(b *testing.B, numTerms int) {
field := "test"
nfb := NewNumericFacetBuilder(field, numTerms)
min, max := 0.0, 9999999998.0
for i := 0; i <= numTerms; i++ {
max++
min--
nfb.AddRange("rangename"+strconv.Itoa(i), &min, &max)
for _, pv := range pcodedvalues {
nfb.Update(index.FieldTerms{field: []string{string(pv)}})
}
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
nfb.Result()
}
}

View file

@ -1,58 +0,0 @@
package facets
import (
"io/ioutil"
"regexp"
"testing"
"github.com/blevesearch/bleve/index"
)
var terms []string
func init() {
wsRegexp := regexp.MustCompile(`\W+`)
input, err := ioutil.ReadFile("benchmark_data.txt")
if err != nil {
panic(err)
}
terms = wsRegexp.Split(string(input), -1)
}
func BenchmarkTermsFacet10(b *testing.B) {
termsFacetN(b, 10)
}
func BenchmarkTermsFacet100(b *testing.B) {
termsFacetN(b, 100)
}
func BenchmarkTermsFacet1000(b *testing.B) {
termsFacetN(b, 1000)
}
func BenchmarkTermsFacet10000(b *testing.B) {
termsFacetN(b, 10000)
}
// func BenchmarkTermsFacet100000(b *testing.B) {
// termsFacetN(b, 100000)
// }
func termsFacetN(b *testing.B, numTerms int) {
field := "test"
termsLen := len(terms)
tfb := NewTermsFacetBuilder(field, 3)
i := 0
for len(tfb.termsCount) < numTerms && i <= termsLen {
j := i % termsLen
term := terms[j]
tfb.Update(index.FieldTerms{field: []string{term}})
i++
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
tfb.Result()
}
}

View file

@ -1,11 +1,16 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
@ -16,13 +21,18 @@ import (
)
type FacetBuilder interface {
Update(index.FieldTerms)
StartDoc()
UpdateVisitor(field string, term []byte)
EndDoc()
Result() *FacetResult
Field() string
}
type FacetsBuilder struct {
indexReader index.IndexReader
facets map[string]FacetBuilder
fields []string
}
func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {
@ -34,17 +44,29 @@ func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {
func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) {
fb.facets[name] = facetBuilder
fb.fields = append(fb.fields, facetBuilder.Field())
}
func (fb *FacetsBuilder) Update(docMatch *DocumentMatch) error {
fieldTerms, err := fb.indexReader.DocumentFieldTerms(docMatch.ID)
if err != nil {
return err
}
func (fb *FacetsBuilder) RequiredFields() []string {
return fb.fields
}
func (fb *FacetsBuilder) StartDoc() {
for _, facetBuilder := range fb.facets {
facetBuilder.Update(fieldTerms)
facetBuilder.StartDoc()
}
}
func (fb *FacetsBuilder) EndDoc() {
for _, facetBuilder := range fb.facets {
facetBuilder.EndDoc()
}
}
func (fb *FacetsBuilder) UpdateVisitor(field string, term []byte) {
for _, facetBuilder := range fb.facets {
facetBuilder.UpdateVisitor(field, term)
}
return nil
}
type TermFacet struct {
@ -66,9 +88,14 @@ func (tf TermFacets) Add(termFacet *TermFacet) TermFacets {
return tf
}
func (tf TermFacets) Len() int { return len(tf) }
func (tf TermFacets) Swap(i, j int) { tf[i], tf[j] = tf[j], tf[i] }
func (tf TermFacets) Less(i, j int) bool { return tf[i].Count > tf[j].Count }
func (tf TermFacets) Len() int { return len(tf) }
func (tf TermFacets) Swap(i, j int) { tf[i], tf[j] = tf[j], tf[i] }
func (tf TermFacets) Less(i, j int) bool {
if tf[i].Count == tf[j].Count {
return tf[i].Term < tf[j].Term
}
return tf[i].Count > tf[j].Count
}
type NumericRangeFacet struct {
Name string `json:"name"`
@ -77,11 +104,34 @@ type NumericRangeFacet struct {
Count int `json:"count"`
}
func (nrf *NumericRangeFacet) Same(other *NumericRangeFacet) bool {
if nrf.Min == nil && other.Min != nil {
return false
}
if nrf.Min != nil && other.Min == nil {
return false
}
if nrf.Min != nil && other.Min != nil && *nrf.Min != *other.Min {
return false
}
if nrf.Max == nil && other.Max != nil {
return false
}
if nrf.Max != nil && other.Max == nil {
return false
}
if nrf.Max != nil && other.Max != nil && *nrf.Max != *other.Max {
return false
}
return true
}
type NumericRangeFacets []*NumericRangeFacet
func (nrf NumericRangeFacets) Add(numericRangeFacet *NumericRangeFacet) NumericRangeFacets {
for _, existingNr := range nrf {
if numericRangeFacet.Min == existingNr.Min && numericRangeFacet.Max == existingNr.Max {
if numericRangeFacet.Same(existingNr) {
existingNr.Count += numericRangeFacet.Count
return nrf
}
@ -91,9 +141,14 @@ func (nrf NumericRangeFacets) Add(numericRangeFacet *NumericRangeFacet) NumericR
return nrf
}
func (nrf NumericRangeFacets) Len() int { return len(nrf) }
func (nrf NumericRangeFacets) Swap(i, j int) { nrf[i], nrf[j] = nrf[j], nrf[i] }
func (nrf NumericRangeFacets) Less(i, j int) bool { return nrf[i].Count > nrf[j].Count }
func (nrf NumericRangeFacets) Len() int { return len(nrf) }
func (nrf NumericRangeFacets) Swap(i, j int) { nrf[i], nrf[j] = nrf[j], nrf[i] }
func (nrf NumericRangeFacets) Less(i, j int) bool {
if nrf[i].Count == nrf[j].Count {
return nrf[i].Name < nrf[j].Name
}
return nrf[i].Count > nrf[j].Count
}
type DateRangeFacet struct {
Name string `json:"name"`
@ -102,11 +157,34 @@ type DateRangeFacet struct {
Count int `json:"count"`
}
func (drf *DateRangeFacet) Same(other *DateRangeFacet) bool {
if drf.Start == nil && other.Start != nil {
return false
}
if drf.Start != nil && other.Start == nil {
return false
}
if drf.Start != nil && other.Start != nil && *drf.Start != *other.Start {
return false
}
if drf.End == nil && other.End != nil {
return false
}
if drf.End != nil && other.End == nil {
return false
}
if drf.End != nil && other.End != nil && *drf.End != *other.End {
return false
}
return true
}
type DateRangeFacets []*DateRangeFacet
func (drf DateRangeFacets) Add(dateRangeFacet *DateRangeFacet) DateRangeFacets {
for _, existingDr := range drf {
if dateRangeFacet.Start == existingDr.Start && dateRangeFacet.End == existingDr.End {
if dateRangeFacet.Same(existingDr) {
existingDr.Count += dateRangeFacet.Count
return drf
}
@ -116,9 +194,14 @@ func (drf DateRangeFacets) Add(dateRangeFacet *DateRangeFacet) DateRangeFacets {
return drf
}
func (drf DateRangeFacets) Len() int { return len(drf) }
func (drf DateRangeFacets) Swap(i, j int) { drf[i], drf[j] = drf[j], drf[i] }
func (drf DateRangeFacets) Less(i, j int) bool { return drf[i].Count > drf[j].Count }
func (drf DateRangeFacets) Len() int { return len(drf) }
func (drf DateRangeFacets) Swap(i, j int) { drf[i], drf[j] = drf[j], drf[i] }
func (drf DateRangeFacets) Less(i, j int) bool {
if drf[i].Count == drf[j].Count {
return drf[i].Name < drf[j].Name
}
return drf[i].Count > drf[j].Count
}
type FacetResult struct {
Field string `json:"field"`

View file

@ -1,301 +0,0 @@
package search
import (
"reflect"
"testing"
)
func TestTermFacetResultsMerge(t *testing.T) {
fr1 := &FacetResult{
Field: "type",
Total: 100,
Missing: 25,
Other: 25,
Terms: []*TermFacet{
&TermFacet{
Term: "blog",
Count: 25,
},
&TermFacet{
Term: "comment",
Count: 24,
},
&TermFacet{
Term: "feedback",
Count: 1,
},
},
}
fr1Only := &FacetResult{
Field: "category",
Total: 97,
Missing: 22,
Other: 15,
Terms: []*TermFacet{
&TermFacet{
Term: "clothing",
Count: 35,
},
&TermFacet{
Term: "electronics",
Count: 25,
},
},
}
frs1 := FacetResults{
"types": fr1,
"categories": fr1Only,
}
fr2 := &FacetResult{
Field: "type",
Total: 100,
Missing: 25,
Other: 25,
Terms: []*TermFacet{
&TermFacet{
Term: "blog",
Count: 25,
},
&TermFacet{
Term: "comment",
Count: 22,
},
&TermFacet{
Term: "flag",
Count: 3,
},
},
}
frs2 := FacetResults{
"types": fr2,
}
expectedFr := &FacetResult{
Field: "type",
Total: 200,
Missing: 50,
Other: 51,
Terms: []*TermFacet{
&TermFacet{
Term: "blog",
Count: 50,
},
&TermFacet{
Term: "comment",
Count: 46,
},
&TermFacet{
Term: "flag",
Count: 3,
},
},
}
expectedFrs := FacetResults{
"types": expectedFr,
"categories": fr1Only,
}
frs1.Merge(frs2)
frs1.Fixup("types", 3)
if !reflect.DeepEqual(frs1, expectedFrs) {
t.Errorf("expected %v, got %v", expectedFrs, frs1)
}
}
func TestNumericFacetResultsMerge(t *testing.T) {
lowmed := 3.0
medhi := 6.0
hihigher := 9.0
fr1 := &FacetResult{
Field: "rating",
Total: 100,
Missing: 25,
Other: 25,
NumericRanges: []*NumericRangeFacet{
&NumericRangeFacet{
Name: "low",
Max: &lowmed,
Count: 25,
},
&NumericRangeFacet{
Name: "med",
Count: 24,
Max: &lowmed,
Min: &medhi,
},
&NumericRangeFacet{
Name: "hi",
Count: 1,
Min: &medhi,
Max: &hihigher,
},
},
}
frs1 := FacetResults{
"ratings": fr1,
}
fr2 := &FacetResult{
Field: "rating",
Total: 100,
Missing: 25,
Other: 25,
NumericRanges: []*NumericRangeFacet{
&NumericRangeFacet{
Name: "low",
Max: &lowmed,
Count: 25,
},
&NumericRangeFacet{
Name: "med",
Max: &lowmed,
Min: &medhi,
Count: 22,
},
&NumericRangeFacet{
Name: "highest",
Min: &hihigher,
Count: 3,
},
},
}
frs2 := FacetResults{
"ratings": fr2,
}
expectedFr := &FacetResult{
Field: "rating",
Total: 200,
Missing: 50,
Other: 51,
NumericRanges: []*NumericRangeFacet{
&NumericRangeFacet{
Name: "low",
Count: 50,
Max: &lowmed,
},
&NumericRangeFacet{
Name: "med",
Max: &lowmed,
Min: &medhi,
Count: 46,
},
&NumericRangeFacet{
Name: "highest",
Min: &hihigher,
Count: 3,
},
},
}
expectedFrs := FacetResults{
"ratings": expectedFr,
}
frs1.Merge(frs2)
frs1.Fixup("ratings", 3)
if !reflect.DeepEqual(frs1, expectedFrs) {
t.Errorf("expected %#v, got %#v", expectedFrs, frs1)
}
}
func TestDateFacetResultsMerge(t *testing.T) {
lowmed := "2010-01-01"
medhi := "2011-01-01"
hihigher := "2012-01-01"
fr1 := &FacetResult{
Field: "birthday",
Total: 100,
Missing: 25,
Other: 25,
DateRanges: []*DateRangeFacet{
&DateRangeFacet{
Name: "low",
End: &lowmed,
Count: 25,
},
&DateRangeFacet{
Name: "med",
Count: 24,
Start: &lowmed,
End: &medhi,
},
&DateRangeFacet{
Name: "hi",
Count: 1,
Start: &medhi,
End: &hihigher,
},
},
}
frs1 := FacetResults{
"birthdays": fr1,
}
fr2 := &FacetResult{
Field: "birthday",
Total: 100,
Missing: 25,
Other: 25,
DateRanges: []*DateRangeFacet{
&DateRangeFacet{
Name: "low",
End: &lowmed,
Count: 25,
},
&DateRangeFacet{
Name: "med",
Start: &lowmed,
End: &medhi,
Count: 22,
},
&DateRangeFacet{
Name: "highest",
Start: &hihigher,
Count: 3,
},
},
}
frs2 := FacetResults{
"birthdays": fr2,
}
expectedFr := &FacetResult{
Field: "birthday",
Total: 200,
Missing: 50,
Other: 51,
DateRanges: []*DateRangeFacet{
&DateRangeFacet{
Name: "low",
Count: 50,
End: &lowmed,
},
&DateRangeFacet{
Name: "med",
Start: &lowmed,
End: &medhi,
Count: 46,
},
&DateRangeFacet{
Name: "highest",
Start: &hihigher,
Count: 3,
},
},
}
expectedFrs := FacetResults{
"birthdays": expectedFr,
}
frs1.Merge(frs2)
frs1.Fixup("birthdays", 3)
if !reflect.DeepEqual(frs1, expectedFrs) {
t.Errorf("expected %#v, got %#v", expectedFrs, frs1)
}
}

View file

@ -1,11 +1,16 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package html
@ -16,8 +21,8 @@ import (
const Name = "html"
const defaultHTMLHighlightBefore = "<b>"
const defaultHTMLHighlightAfter = "</b>"
const defaultHTMLHighlightBefore = "<mark>"
const defaultHTMLHighlightAfter = "</mark>"
type FragmentFormatter struct {
before string
@ -38,6 +43,10 @@ func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations h
if termLocation == nil {
continue
}
// make sure the array positions match
if !termLocation.ArrayPositions.Equals(f.ArrayPositions) {
continue
}
if termLocation.Start < curr {
continue
}

View file

@ -1,100 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package ansi
import (
"github.com/blevesearch/bleve/registry"
"github.com/blevesearch/bleve/search/highlight"
)
const Name = "ansi"
const DefaultAnsiHighlight = BgYellow
type FragmentFormatter struct {
color string
}
func NewFragmentFormatter(color string) *FragmentFormatter {
return &FragmentFormatter{
color: color,
}
}
func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations highlight.TermLocations) string {
rv := ""
curr := f.Start
for _, termLocation := range orderedTermLocations {
if termLocation == nil {
continue
}
if termLocation.Start < curr {
continue
}
if termLocation.End > f.End {
break
}
// add the stuff before this location
rv += string(f.Orig[curr:termLocation.Start])
// add the color
rv += a.color
// add the term itself
rv += string(f.Orig[termLocation.Start:termLocation.End])
// reset the color
rv += Reset
// update current
curr = termLocation.End
}
// add any remaining text after the last token
rv += string(f.Orig[curr:f.End])
return rv
}
// ANSI color control escape sequences.
// Shamelessly copied from https://github.com/sqp/godock/blob/master/libs/log/colors.go
const (
Reset = "\x1b[0m"
Bright = "\x1b[1m"
Dim = "\x1b[2m"
Underscore = "\x1b[4m"
Blink = "\x1b[5m"
Reverse = "\x1b[7m"
Hidden = "\x1b[8m"
FgBlack = "\x1b[30m"
FgRed = "\x1b[31m"
FgGreen = "\x1b[32m"
FgYellow = "\x1b[33m"
FgBlue = "\x1b[34m"
FgMagenta = "\x1b[35m"
FgCyan = "\x1b[36m"
FgWhite = "\x1b[37m"
BgBlack = "\x1b[40m"
BgRed = "\x1b[41m"
BgGreen = "\x1b[42m"
BgYellow = "\x1b[43m"
BgBlue = "\x1b[44m"
BgMagenta = "\x1b[45m"
BgCyan = "\x1b[46m"
BgWhite = "\x1b[47m"
)
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.FragmentFormatter, error) {
color := DefaultAnsiHighlight
colorVal, ok := config["color"].(string)
if ok {
color = colorVal
}
return NewFragmentFormatter(color), nil
}
func init() {
registry.RegisterFragmentFormatter(Name, Constructor)
}

View file

@ -1,87 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package html
import (
"testing"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/highlight"
)
func TestHTMLFragmentFormatter1(t *testing.T) {
tests := []struct {
fragment *highlight.Fragment
tlm search.TermLocationMap
output string
}{
{
fragment: &highlight.Fragment{
Orig: []byte("the quick brown fox"),
Start: 0,
End: 19,
},
tlm: search.TermLocationMap{
"quick": search.Locations{
&search.Location{
Pos: 2,
Start: 4,
End: 9,
},
},
},
output: "the <b>quick</b> brown fox",
},
}
emHTMLFormatter := NewFragmentFormatter("<b>", "</b>")
for _, test := range tests {
otl := highlight.OrderTermLocations(test.tlm)
result := emHTMLFormatter.Format(test.fragment, otl)
if result != test.output {
t.Errorf("expected `%s`, got `%s`", test.output, result)
}
}
}
func TestHTMLFragmentFormatter2(t *testing.T) {
tests := []struct {
fragment *highlight.Fragment
tlm search.TermLocationMap
output string
}{
{
fragment: &highlight.Fragment{
Orig: []byte("the quick brown fox"),
Start: 0,
End: 19,
},
tlm: search.TermLocationMap{
"quick": search.Locations{
&search.Location{
Pos: 2,
Start: 4,
End: 9,
},
},
},
output: "the <em>quick</em> brown fox",
},
}
emHTMLFormatter := NewFragmentFormatter("<em>", "</em>")
for _, test := range tests {
otl := highlight.OrderTermLocations(test.tlm)
result := emHTMLFormatter.Format(test.fragment, otl)
if result != test.output {
t.Errorf("expected `%s`, got `%s`", test.output, result)
}
}
}

View file

@ -1,11 +1,16 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package simple
@ -31,8 +36,7 @@ func NewFragmenter(fragmentSize int) *Fragmenter {
}
func (s *Fragmenter) Fragment(orig []byte, ot highlight.TermLocations) []*highlight.Fragment {
rv := make([]*highlight.Fragment, 0)
var rv []*highlight.Fragment
maxbegin := 0
OUTER:
for currTermIndex, termLocation := range ot {
@ -47,11 +51,11 @@ OUTER:
continue OUTER // bail
}
end += size
used += 1
used++
}
// if we still have more characters available to us
// push back towards begining
// push back towards beginning
// without cross maxbegin
for start > 0 && used < s.fragmentSize {
r, size := utf8.DecodeLastRune(orig[0:start])
@ -60,7 +64,7 @@ OUTER:
}
if start-size >= maxbegin {
start -= size
used += 1
used++
} else {
break
}

View file

@ -1,295 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package simple
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/search/highlight"
)
func TestSimpleFragmenter(t *testing.T) {
tests := []struct {
orig []byte
fragments []*highlight.Fragment
ot highlight.TermLocations
size int
}{
{
orig: []byte("this is a test"),
fragments: []*highlight.Fragment{
&highlight.Fragment{
Orig: []byte("this is a test"),
Start: 0,
End: 14,
},
},
ot: highlight.TermLocations{
&highlight.TermLocation{
Term: "test",
Pos: 4,
Start: 10,
End: 14,
},
},
size: 100,
},
{
orig: []byte("0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"),
fragments: []*highlight.Fragment{
&highlight.Fragment{
Orig: []byte("0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"),
Start: 0,
End: 100,
},
},
ot: highlight.TermLocations{
&highlight.TermLocation{
Term: "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789",
Pos: 1,
Start: 0,
End: 100,
},
},
size: 100,
},
{
orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
fragments: []*highlight.Fragment{
&highlight.Fragment{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 0,
End: 100,
},
&highlight.Fragment{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 10,
End: 101,
},
&highlight.Fragment{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 20,
End: 101,
},
&highlight.Fragment{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 30,
End: 101,
},
&highlight.Fragment{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 40,
End: 101,
},
&highlight.Fragment{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 50,
End: 101,
},
&highlight.Fragment{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 60,
End: 101,
},
&highlight.Fragment{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 70,
End: 101,
},
&highlight.Fragment{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 80,
End: 101,
},
&highlight.Fragment{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 90,
End: 101,
},
},
ot: highlight.TermLocations{
&highlight.TermLocation{
Term: "0123456789",
Pos: 1,
Start: 0,
End: 10,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 2,
Start: 10,
End: 20,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 3,
Start: 20,
End: 30,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 4,
Start: 30,
End: 40,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 5,
Start: 40,
End: 50,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 6,
Start: 50,
End: 60,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 7,
Start: 60,
End: 70,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 8,
Start: 70,
End: 80,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 9,
Start: 80,
End: 90,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 10,
Start: 90,
End: 100,
},
},
size: 100,
},
{
orig: []byte("[[पानी का स्वाद]] [[नीलेश रघुवंशी]] का कविता संग्रह हैं। इस कृति के लिए उन्हें २००४ में [[केदार सम्मान]] से सम्मानित किया गया है।{{केदार सम्मान से सम्मानित कृतियाँ}}"),
fragments: []*highlight.Fragment{
&highlight.Fragment{
Orig: []byte("[[पानी का स्वाद]] [[नीलेश रघुवंशी]] का कविता संग्रह हैं। इस कृति के लिए उन्हें २००४ में [[केदार सम्मान]] से सम्मानित किया गया है।{{केदार सम्मान से सम्मानित कृतियाँ}}"),
Start: 0,
End: 411,
},
},
ot: highlight.TermLocations{
&highlight.TermLocation{
Term: "पानी",
Pos: 1,
Start: 2,
End: 14,
},
},
size: 200,
},
{
orig: []byte("交换机"),
fragments: []*highlight.Fragment{
&highlight.Fragment{
Orig: []byte("交换机"),
Start: 0,
End: 9,
},
&highlight.Fragment{
Orig: []byte("交换机"),
Start: 3,
End: 9,
},
},
ot: highlight.TermLocations{
&highlight.TermLocation{
Term: "交换",
Pos: 1,
Start: 0,
End: 6,
},
&highlight.TermLocation{
Term: "换机",
Pos: 2,
Start: 3,
End: 9,
},
},
size: 200,
},
}
for _, test := range tests {
fragmenter := NewFragmenter(test.size)
fragments := fragmenter.Fragment(test.orig, test.ot)
if !reflect.DeepEqual(fragments, test.fragments) {
t.Errorf("expected %#v, got %#v", test.fragments, fragments)
for _, fragment := range fragments {
t.Logf("frag: %s", fragment.Orig[fragment.Start:fragment.End])
t.Logf("frag: %d - %d", fragment.Start, fragment.End)
}
}
}
}
func TestSimpleFragmenterWithSize(t *testing.T) {
tests := []struct {
orig []byte
fragments []*highlight.Fragment
ot highlight.TermLocations
}{
{
orig: []byte("this is a test"),
fragments: []*highlight.Fragment{
&highlight.Fragment{
Orig: []byte("this is a test"),
Start: 0,
End: 5,
},
&highlight.Fragment{
Orig: []byte("this is a test"),
Start: 9,
End: 14,
},
},
ot: highlight.TermLocations{
&highlight.TermLocation{
Term: "this",
Pos: 1,
Start: 0,
End: 5,
},
&highlight.TermLocation{
Term: "test",
Pos: 4,
Start: 10,
End: 14,
},
},
},
}
fragmenter := NewFragmenter(5)
for _, test := range tests {
fragments := fragmenter.Fragment(test.orig, test.ot)
if !reflect.DeepEqual(fragments, test.fragments) {
t.Errorf("expected %#v, got %#v", test.fragments, fragments)
for _, fragment := range fragments {
t.Logf("frag: %#v", fragment)
}
}
}
}

View file

@ -1,11 +1,16 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package highlight
@ -15,11 +20,12 @@ import (
)
type Fragment struct {
Orig []byte
Start int
End int
Score float64
Index int // used by heap
Orig []byte
ArrayPositions []uint64
Start int
End int
Score float64
Index int // used by heap
}
func (f *Fragment) Overlaps(other *Fragment) bool {

View file

@ -0,0 +1,50 @@
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package html
import (
"fmt"
"github.com/blevesearch/bleve/registry"
"github.com/blevesearch/bleve/search/highlight"
htmlFormatter "github.com/blevesearch/bleve/search/highlight/format/html"
simpleFragmenter "github.com/blevesearch/bleve/search/highlight/fragmenter/simple"
simpleHighlighter "github.com/blevesearch/bleve/search/highlight/highlighter/simple"
)
const Name = "html"
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.Highlighter, error) {
fragmenter, err := cache.FragmenterNamed(simpleFragmenter.Name)
if err != nil {
return nil, fmt.Errorf("error building fragmenter: %v", err)
}
formatter, err := cache.FragmentFormatterNamed(htmlFormatter.Name)
if err != nil {
return nil, fmt.Errorf("error building fragment formatter: %v", err)
}
return simpleHighlighter.NewHighlighter(
fragmenter,
formatter,
simpleHighlighter.DefaultSeparator),
nil
}
func init() {
registry.RegisterHighlighter(Name, Constructor)
}

View file

@ -1,11 +1,16 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package simple
@ -32,7 +37,7 @@ func (s *FragmentScorer) Score(f *highlight.Fragment) {
OUTER:
for _, locations := range s.tlm {
for _, location := range locations {
if int(location.Start) >= f.Start && int(location.End) <= f.End {
if location.ArrayPositions.Equals(f.ArrayPositions) && int(location.Start) >= f.Start && int(location.End) <= f.End {
score += 1.0
// once we find a term in the fragment
// don't care about additional matches

View file

@ -1,11 +1,16 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package simple
@ -20,7 +25,7 @@ import (
)
const Name = "simple"
const defaultSeparator = "…"
const DefaultSeparator = "…"
type Highlighter struct {
fragmenter highlight.Fragmenter
@ -80,9 +85,17 @@ func (s *Highlighter) BestFragmentsInField(dm *search.DocumentMatch, doc *docume
if f.Name() == field {
_, ok := f.(*document.TextField)
if ok {
termLocationsSameArrayPosition := make(highlight.TermLocations, 0)
for _, otl := range orderedTermLocations {
if otl.ArrayPositions.Equals(f.ArrayPositions()) {
termLocationsSameArrayPosition = append(termLocationsSameArrayPosition, otl)
}
}
fieldData := f.Value()
fragments := s.fragmenter.Fragment(fieldData, orderedTermLocations)
fragments := s.fragmenter.Fragment(fieldData, termLocationsSameArrayPosition)
for _, fragment := range fragments {
fragment.ArrayPositions = f.ArrayPositions()
scorer.Score(fragment)
heap.Push(&fq, fragment)
}
@ -91,7 +104,7 @@ func (s *Highlighter) BestFragmentsInField(dm *search.DocumentMatch, doc *docume
}
// now find the N best non-overlapping fragments
bestFragments := make([]*highlight.Fragment, 0)
var bestFragments []*highlight.Fragment
if len(fq) > 0 {
candidate := heap.Pop(&fq)
OUTER:
@ -176,7 +189,7 @@ func (fq *FragmentQueue) Pop() interface{} {
}
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.Highlighter, error) {
separator := defaultSeparator
separator := DefaultSeparator
separatorVal, ok := config["separator"].(string)
if ok {
separator = separatorVal

View file

@ -1,77 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package simple
import (
"testing"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/highlight"
)
func TestSimpleFragmentScorer(t *testing.T) {
tests := []struct {
fragment *highlight.Fragment
tlm search.TermLocationMap
score float64
}{
{
fragment: &highlight.Fragment{
Orig: []byte("cat in the hat"),
Start: 0,
End: 14,
},
tlm: search.TermLocationMap{
"cat": search.Locations{
&search.Location{
Pos: 0,
Start: 0,
End: 3,
},
},
},
score: 1,
},
{
fragment: &highlight.Fragment{
Orig: []byte("cat in the hat"),
Start: 0,
End: 14,
},
tlm: search.TermLocationMap{
"cat": search.Locations{
&search.Location{
Pos: 1,
Start: 0,
End: 3,
},
},
"hat": search.Locations{
&search.Location{
Pos: 4,
Start: 11,
End: 14,
},
},
},
score: 2,
},
}
for _, test := range tests {
scorer := NewFragmentScorer(test.tlm)
scorer.Score(test.fragment)
if test.fragment.Score != test.score {
t.Errorf("expected score %f, got %f", test.score, test.fragment.Score)
}
}
}

View file

@ -1,164 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package simple
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/highlight/fragment_formatters/ansi"
sfrag "github.com/blevesearch/bleve/search/highlight/fragmenters/simple"
)
const (
reset = "\x1b[0m"
DefaultAnsiHighlight = "\x1b[43m"
)
func TestSimpleHighlighter(t *testing.T) {
fragmenter := sfrag.NewFragmenter(100)
formatter := ansi.NewFragmentFormatter(ansi.DefaultAnsiHighlight)
highlighter := NewHighlighter(fragmenter, formatter, defaultSeparator)
docMatch := search.DocumentMatch{
ID: "a",
Score: 1.0,
Locations: search.FieldTermLocationMap{
"desc": search.TermLocationMap{
"quick": search.Locations{
&search.Location{
Pos: 2,
Start: 4,
End: 9,
},
},
"fox": search.Locations{
&search.Location{
Pos: 4,
Start: 16,
End: 19,
},
},
},
},
}
expectedFragment := "the " + DefaultAnsiHighlight + "quick" + reset + " brown " + DefaultAnsiHighlight + "fox" + reset + " jumps over the lazy dog"
doc := document.NewDocument("a").AddField(document.NewTextField("desc", []uint64{}, []byte("the quick brown fox jumps over the lazy dog")))
fragment := highlighter.BestFragmentInField(&docMatch, doc, "desc")
if fragment != expectedFragment {
t.Errorf("expected `%s`, got `%s`", expectedFragment, fragment)
}
}
func TestSimpleHighlighterLonger(t *testing.T) {
fieldBytes := []byte(`Lorem ipsum dolor sit amet, consectetur adipiscing elit. Mauris sed semper nulla, sed pellentesque urna. Suspendisse potenti. Aliquam dignissim pulvinar erat vel ullamcorper. Nullam sed diam at dolor dapibus varius. Vestibulum at semper nunc. Integer ullamcorper enim ut nisi condimentum lacinia. Nulla ipsum ipsum, dictum in dapibus non, bibendum eget neque. Vestibulum malesuada erat quis malesuada dictum. Mauris luctus viverra lorem, nec hendrerit lacus lacinia ut. Donec suscipit sit amet nisi et dictum. Maecenas ultrices mollis diam, vel commodo libero lobortis nec. Nunc non dignissim dolor. Nulla non tempus risus, eget porttitor lectus. Suspendisse vitae gravida magna, a sagittis urna. Curabitur nec dui volutpat, hendrerit nisi non, adipiscing erat. Maecenas aliquet sem sit amet nibh ultrices accumsan.
Mauris lobortis sem sed blandit bibendum. In scelerisque eros sed metus aliquet convallis ac eget metus. Donec eget feugiat sem. Quisque venenatis, augue et blandit vulputate, velit odio viverra dolor, eu iaculis eros urna ut nunc. Duis faucibus mattis enim ut ultricies. Donec scelerisque volutpat elit, vel varius ante porttitor vel. Duis neque nulla, ultrices vel est id, molestie semper odio. Maecenas condimentum felis vitae nibh venenatis, ut feugiat risus vehicula. Suspendisse non sapien neque. Etiam et lorem consequat lorem aliquam ullamcorper. Pellentesque id vestibulum neque, at aliquam turpis. Aenean ultrices nec erat sit amet aliquam. Morbi eu sem in augue cursus ullamcorper a sed dolor. Integer et lobortis nulla, sit amet laoreet elit. In elementum, nibh nec volutpat pretium, lectus est pulvinar arcu, vehicula lobortis tellus sem id mauris. Maecenas ac blandit purus, sit amet scelerisque magna.
In hac habitasse platea dictumst. In lacinia elit non risus venenatis viverra. Nulla vestibulum laoreet turpis ac accumsan. Vivamus eros felis, rhoncus vel interdum bibendum, imperdiet nec diam. Etiam sed eros sed orci pellentesque sagittis. Praesent a fermentum leo. Vivamus ipsum risus, faucibus a dignissim ut, ullamcorper nec risus. Etiam quis adipiscing velit. Nam ac cursus arcu. Sed bibendum lectus quis massa dapibus dapibus. Vestibulum fermentum eros vitae hendrerit condimentum.
Fusce viverra eleifend iaculis. Maecenas tempor dictum cursus. Mauris faucibus, tortor in bibendum ornare, nibh lorem sollicitudin est, sed consectetur nulla dui imperdiet urna. Fusce aliquet odio fermentum massa mollis, id feugiat lacus egestas. Integer et eleifend metus. Duis neque tellus, vulputate nec dui eu, euismod sodales orci. Vivamus turpis erat, consectetur et pulvinar nec, ornare a quam. Maecenas fermentum, ligula vitae consectetur lobortis, mi lacus fermentum ante, ut semper lacus lectus porta orci. Nulla vehicula sodales eros, in iaculis ante laoreet at. Sed venenatis interdum metus, egestas scelerisque orci laoreet ut. Donec fermentum enim eget nibh blandit laoreet. Proin lacinia adipiscing lorem vel ornare. Donec ullamcorper massa elementum urna varius viverra. Proin pharetra, erat at feugiat rhoncus, velit eros condimentum mi, ac mattis sapien dolor non elit. Aenean viverra purus id tincidunt vulputate.
Etiam vel augue vel nisl commodo suscipit et ac nisl. Quisque eros diam, porttitor et aliquet sed, vulputate in odio. Aenean feugiat est quis neque vehicula, eget vulputate nunc tempor. Donec quis nulla ut quam feugiat consectetur ut et justo. Nulla congue, metus auctor facilisis scelerisque, nunc risus vulputate urna, in blandit urna nibh et neque. Etiam quis tortor ut nulla dignissim dictum non sed ligula. Vivamus accumsan ligula eget ipsum ultrices, a tincidunt urna blandit. In hac habitasse platea dictumst.`)
doc := document.NewDocument("a").AddField(document.NewTextField("full", []uint64{}, fieldBytes))
docMatch := search.DocumentMatch{
ID: "a",
Score: 1.0,
Locations: search.FieldTermLocationMap{
"full": search.TermLocationMap{
"metus": search.Locations{
&search.Location{
Pos: 0,
Start: 883,
End: 888,
},
&search.Location{
Pos: 0,
Start: 915,
End: 920,
},
&search.Location{
Pos: 0,
Start: 2492,
End: 2497,
},
&search.Location{
Pos: 0,
Start: 2822,
End: 2827,
},
&search.Location{
Pos: 0,
Start: 3417,
End: 3422,
},
},
"interdum": search.Locations{
&search.Location{
Pos: 0,
Start: 1891,
End: 1899,
},
&search.Location{
Pos: 0,
Start: 2813,
End: 2821,
},
},
"venenatis": search.Locations{
&search.Location{
Pos: 0,
Start: 954,
End: 963,
},
&search.Location{
Pos: 0,
Start: 1252,
End: 1261,
},
&search.Location{
Pos: 0,
Start: 1795,
End: 1804,
},
&search.Location{
Pos: 0,
Start: 2803,
End: 2812,
},
},
},
},
}
expectedFragments := []string{
"…eros, in iaculis ante laoreet at. Sed " + DefaultAnsiHighlight + "venenatis" + reset + " " + DefaultAnsiHighlight + "interdum" + reset + " " + DefaultAnsiHighlight + "metus" + reset + ", egestas scelerisque orci laoreet ut.…",
"… eros sed " + DefaultAnsiHighlight + "metus" + reset + " aliquet convallis ac eget " + DefaultAnsiHighlight + "metus" + reset + ". Donec eget feugiat sem. Quisque " + DefaultAnsiHighlight + "venenatis" + reset + ", augue et…",
"… odio. Maecenas condimentum felis vitae nibh " + DefaultAnsiHighlight + "venenatis" + reset + ", ut feugiat risus vehicula. Suspendisse non s…",
"… id feugiat lacus egestas. Integer et eleifend " + DefaultAnsiHighlight + "metus" + reset + ". Duis neque tellus, vulputate nec dui eu, euism…",
"… accumsan. Vivamus eros felis, rhoncus vel " + DefaultAnsiHighlight + "interdum" + reset + " bibendum, imperdiet nec diam. Etiam sed eros sed…",
}
fragmenter := sfrag.NewFragmenter(100)
formatter := ansi.NewFragmentFormatter(ansi.DefaultAnsiHighlight)
highlighter := NewHighlighter(fragmenter, formatter, defaultSeparator)
fragments := highlighter.BestFragmentsInField(&docMatch, doc, "full", 5)
if !reflect.DeepEqual(fragments, expectedFragments) {
t.Errorf("expected %#v, got %#v", expectedFragments, fragments)
}
}

View file

@ -1,41 +1,75 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package highlight
import (
"reflect"
"sort"
"github.com/blevesearch/bleve/search"
)
type TermLocation struct {
Term string
Pos int
Start int
End int
Term string
ArrayPositions search.ArrayPositions
Pos int
Start int
End int
}
func (tl *TermLocation) Overlaps(other *TermLocation) bool {
if other.Start >= tl.Start && other.Start < tl.End {
return true
} else if tl.Start >= other.Start && tl.Start < other.End {
return true
if reflect.DeepEqual(tl.ArrayPositions, other.ArrayPositions) {
if other.Start >= tl.Start && other.Start < tl.End {
return true
} else if tl.Start >= other.Start && tl.Start < other.End {
return true
}
}
return false
}
type TermLocations []*TermLocation
func (t TermLocations) Len() int { return len(t) }
func (t TermLocations) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
func (t TermLocations) Less(i, j int) bool { return t[i].Start < t[j].Start }
func (t TermLocations) Len() int { return len(t) }
func (t TermLocations) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
func (t TermLocations) Less(i, j int) bool {
shortestArrayPositions := len(t[i].ArrayPositions)
if len(t[j].ArrayPositions) < shortestArrayPositions {
shortestArrayPositions = len(t[j].ArrayPositions)
}
// compare all the common array positions
for api := 0; api < shortestArrayPositions; api++ {
if t[i].ArrayPositions[api] < t[j].ArrayPositions[api] {
return true
}
if t[i].ArrayPositions[api] > t[j].ArrayPositions[api] {
return false
}
}
// all the common array positions are the same
if len(t[i].ArrayPositions) < len(t[j].ArrayPositions) {
return true // j array positions, longer so greater
} else if len(t[i].ArrayPositions) > len(t[j].ArrayPositions) {
return false // j array positions, shorter so less
}
// array positions the same, compare starts
return t[i].Start < t[j].Start
}
func (t TermLocations) MergeOverlapping() {
var lastTl *TermLocation
@ -57,10 +91,11 @@ func OrderTermLocations(tlm search.TermLocationMap) TermLocations {
for term, locations := range tlm {
for _, location := range locations {
tl := TermLocation{
Term: term,
Pos: int(location.Pos),
Start: int(location.Start),
End: int(location.End),
Term: term,
ArrayPositions: location.ArrayPositions,
Pos: int(location.Pos),
Start: int(location.Start),
End: int(location.End),
}
rv = append(rv, &tl)
}

View file

@ -1,173 +0,0 @@
package highlight
import (
"reflect"
"testing"
)
func TestTermLocationOverlaps(t *testing.T) {
tests := []struct {
left *TermLocation
right *TermLocation
expected bool
}{
{
left: &TermLocation{
Start: 0,
End: 5,
},
right: &TermLocation{
Start: 3,
End: 7,
},
expected: true,
},
{
left: &TermLocation{
Start: 0,
End: 5,
},
right: &TermLocation{
Start: 5,
End: 7,
},
expected: false,
},
{
left: &TermLocation{
Start: 0,
End: 5,
},
right: &TermLocation{
Start: 7,
End: 11,
},
expected: false,
},
}
for _, test := range tests {
actual := test.left.Overlaps(test.right)
if actual != test.expected {
t.Errorf("expected %t got %t for %#v", test.expected, actual, test)
}
}
}
func TestTermLocationsMergeOverlapping(t *testing.T) {
tests := []struct {
input TermLocations
output TermLocations
}{
{
input: TermLocations{},
output: TermLocations{},
},
{
input: TermLocations{
&TermLocation{
Start: 0,
End: 5,
},
&TermLocation{
Start: 7,
End: 11,
},
},
output: TermLocations{
&TermLocation{
Start: 0,
End: 5,
},
&TermLocation{
Start: 7,
End: 11,
},
},
},
{
input: TermLocations{
&TermLocation{
Start: 0,
End: 5,
},
&TermLocation{
Start: 4,
End: 11,
},
},
output: TermLocations{
&TermLocation{
Start: 0,
End: 11,
},
nil,
},
},
{
input: TermLocations{
&TermLocation{
Start: 0,
End: 5,
},
&TermLocation{
Start: 4,
End: 11,
},
&TermLocation{
Start: 9,
End: 13,
},
},
output: TermLocations{
&TermLocation{
Start: 0,
End: 13,
},
nil,
nil,
},
},
{
input: TermLocations{
&TermLocation{
Start: 0,
End: 5,
},
&TermLocation{
Start: 4,
End: 11,
},
&TermLocation{
Start: 9,
End: 13,
},
&TermLocation{
Start: 15,
End: 21,
},
},
output: TermLocations{
&TermLocation{
Start: 0,
End: 13,
},
nil,
nil,
&TermLocation{
Start: 15,
End: 21,
},
},
},
}
for _, test := range tests {
test.input.MergeOverlapping()
if !reflect.DeepEqual(test.input, test.output) {
t.Errorf("expected: %#v got %#v", test.output, test.input)
}
}
}

View file

@ -1,12 +1,26 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
import (
"math"
)
func LevenshteinDistance(a, b *string) int {
la := len(*a)
lb := len(*b)
func LevenshteinDistance(a, b string) int {
la := len(a)
lb := len(b)
d := make([]int, la+1)
var lastdiag, olddiag, temp int
@ -22,7 +36,7 @@ func LevenshteinDistance(a, b *string) int {
if (d[j-1] + 1) < min {
min = d[j-1] + 1
}
if (*a)[j-1] == (*b)[i-1] {
if a[j-1] == b[i-1] {
temp = 0
} else {
temp = 1
@ -37,14 +51,14 @@ func LevenshteinDistance(a, b *string) int {
return d[la]
}
// levenshteinDistanceMax same as levenshteinDistance but
// LevenshteinDistanceMax same as LevenshteinDistance but
// attempts to bail early once we know the distance
// will be greater than max
// in which case the first return val will be the max
// and the second will be true, indicating max was exceeded
func LevenshteinDistanceMax(a, b *string, max int) (int, bool) {
la := len(*a)
lb := len(*b)
func LevenshteinDistanceMax(a, b string, max int) (int, bool) {
la := len(a)
lb := len(b)
ld := int(math.Abs(float64(la - lb)))
if ld > max {
@ -67,7 +81,7 @@ func LevenshteinDistanceMax(a, b *string, max int) (int, bool) {
if (d[j-1] + 1) < min {
min = d[j-1] + 1
}
if (*a)[j-1] == (*b)[i-1] {
if a[j-1] == b[i-1] {
temp = 0
} else {
temp = 1
@ -82,7 +96,7 @@ func LevenshteinDistanceMax(a, b *string, max int) (int, bool) {
lastdiag = olddiag
}
// after each row if rowmin isnt less than max stop
// after each row if rowmin isn't less than max stop
if rowmin > max {
return max, true
}

View file

@ -1,114 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"testing"
)
func TestLevenshteinDistance(t *testing.T) {
tests := []struct {
a string
b string
dist int
}{
{
"water",
"atec",
2,
},
{
"water",
"aphex",
4,
},
}
for _, test := range tests {
actual := LevenshteinDistance(&test.a, &test.b)
if actual != test.dist {
t.Errorf("expected %d, got %d for %s and %s", test.dist, actual, test.a, test.b)
}
}
}
func TestLevenshteinDistanceMax(t *testing.T) {
tests := []struct {
a string
b string
max int
dist int
exceeded bool
}{
{
a: "water",
b: "atec",
max: 1,
dist: 1,
exceeded: true,
},
{
a: "water",
b: "christmas",
max: 3,
dist: 3,
exceeded: true,
},
{
a: "water",
b: "water",
max: 1,
dist: 0,
exceeded: false,
},
}
for _, test := range tests {
actual, exceeded := LevenshteinDistanceMax(&test.a, &test.b, test.max)
if actual != test.dist || exceeded != test.exceeded {
t.Errorf("expected %d %t, got %d %t for %s and %s", test.dist, test.exceeded, actual, exceeded, test.a, test.b)
}
}
}
// 5 terms that are less than 2
// 5 terms that are more than 2
var benchmarkTerms = []string{
"watex",
"aters",
"wayer",
"wbter",
"yater",
"christmas",
"waterwaterwater",
"watcatdogfish",
"q",
"couchbase",
}
func BenchmarkLevenshteinDistance(b *testing.B) {
a := "water"
for i := 0; i < b.N; i++ {
for _, t := range benchmarkTerms {
LevenshteinDistance(&a, &t)
}
}
}
func BenchmarkLevenshteinDistanceMax(b *testing.B) {
a := "water"
for i := 0; i < b.N; i++ {
for _, t := range benchmarkTerms {
LevenshteinDistanceMax(&a, &t, 2)
}
}
}

80
vendor/github.com/blevesearch/bleve/search/pool.go generated vendored Normal file
View file

@ -0,0 +1,80 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
// DocumentMatchPoolTooSmall is a callback function that can be executed
// when the DocumentMatchPool does not have sufficient capacity
// By default we just perform just-in-time allocation, but you could log
// a message, or panic, etc.
type DocumentMatchPoolTooSmall func(p *DocumentMatchPool) *DocumentMatch
// DocumentMatchPool manages use/re-use of DocumentMatch instances
// it pre-allocates space from a single large block with the expected
// number of instances. It is not thread-safe as currently all
// aspects of search take place in a single goroutine.
type DocumentMatchPool struct {
avail DocumentMatchCollection
TooSmall DocumentMatchPoolTooSmall
}
func defaultDocumentMatchPoolTooSmall(p *DocumentMatchPool) *DocumentMatch {
return &DocumentMatch{}
}
// NewDocumentMatchPool will build a DocumentMatchPool with memory
// pre-allocated to accommodate the requested number of DocumentMatch
// instances
func NewDocumentMatchPool(size, sortsize int) *DocumentMatchPool {
avail := make(DocumentMatchCollection, size)
// pre-allocate the expected number of instances
startBlock := make([]DocumentMatch, size)
startSorts := make([]string, size*sortsize)
// make these initial instances available
i, j := 0, 0
for i < size {
avail[i] = &startBlock[i]
avail[i].Sort = startSorts[j:j]
i += 1
j += sortsize
}
return &DocumentMatchPool{
avail: avail,
TooSmall: defaultDocumentMatchPoolTooSmall,
}
}
// Get returns an available DocumentMatch from the pool
// if the pool was not allocated with sufficient size, an allocation will
// occur to satisfy this request. As a side-effect this will grow the size
// of the pool.
func (p *DocumentMatchPool) Get() *DocumentMatch {
var rv *DocumentMatch
if len(p.avail) > 0 {
rv, p.avail = p.avail[len(p.avail)-1], p.avail[:len(p.avail)-1]
} else {
rv = p.TooSmall(p)
}
return rv
}
// Put returns a DocumentMatch to the pool
func (p *DocumentMatchPool) Put(d *DocumentMatch) {
if d == nil {
return
}
// reset DocumentMatch before returning it to available pool
d.Reset()
p.avail = append(p.avail, d)
}

View file

@ -0,0 +1,64 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type BoolFieldQuery struct {
Bool bool `json:"bool"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewBoolFieldQuery creates a new Query for boolean fields
func NewBoolFieldQuery(val bool) *BoolFieldQuery {
return &BoolFieldQuery{
Bool: val,
}
}
func (q *BoolFieldQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *BoolFieldQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *BoolFieldQuery) SetField(f string) {
q.FieldVal = f
}
func (q *BoolFieldQuery) Field() string {
return q.FieldVal
}
func (q *BoolFieldQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
term := "F"
if q.Bool {
term = "T"
}
return searcher.NewTermSearcher(i, term, field, q.BoostVal.Value(), options)
}

View file

@ -0,0 +1,248 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type BooleanQuery struct {
Must Query `json:"must,omitempty"`
Should Query `json:"should,omitempty"`
MustNot Query `json:"must_not,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
queryStringMode bool
}
// NewBooleanQuery creates a compound Query composed
// of several other Query objects.
// Result documents must satisfy ALL of the
// must Queries.
// Result documents must satisfy NONE of the must not
// Queries.
// Result documents that ALSO satisfy any of the should
// Queries will score higher.
func NewBooleanQuery(must []Query, should []Query, mustNot []Query) *BooleanQuery {
rv := BooleanQuery{}
if len(must) > 0 {
rv.Must = NewConjunctionQuery(must)
}
if len(should) > 0 {
rv.Should = NewDisjunctionQuery(should)
}
if len(mustNot) > 0 {
rv.MustNot = NewDisjunctionQuery(mustNot)
}
return &rv
}
func NewBooleanQueryForQueryString(must []Query, should []Query, mustNot []Query) *BooleanQuery {
rv := NewBooleanQuery(nil, nil, nil)
rv.queryStringMode = true
rv.AddMust(must...)
rv.AddShould(should...)
rv.AddMustNot(mustNot...)
return rv
}
// SetMinShould requires that at least minShould of the
// should Queries must be satisfied.
func (q *BooleanQuery) SetMinShould(minShould float64) {
q.Should.(*DisjunctionQuery).SetMin(minShould)
}
func (q *BooleanQuery) AddMust(m ...Query) {
if q.Must == nil {
tmp := NewConjunctionQuery([]Query{})
tmp.queryStringMode = q.queryStringMode
q.Must = tmp
}
for _, mq := range m {
q.Must.(*ConjunctionQuery).AddQuery(mq)
}
}
func (q *BooleanQuery) AddShould(m ...Query) {
if q.Should == nil {
tmp := NewDisjunctionQuery([]Query{})
tmp.queryStringMode = q.queryStringMode
q.Should = tmp
}
for _, mq := range m {
q.Should.(*DisjunctionQuery).AddQuery(mq)
}
}
func (q *BooleanQuery) AddMustNot(m ...Query) {
if q.MustNot == nil {
tmp := NewDisjunctionQuery([]Query{})
tmp.queryStringMode = q.queryStringMode
q.MustNot = tmp
}
for _, mq := range m {
q.MustNot.(*DisjunctionQuery).AddQuery(mq)
}
}
func (q *BooleanQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *BooleanQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *BooleanQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
var err error
var mustNotSearcher search.Searcher
if q.MustNot != nil {
mustNotSearcher, err = q.MustNot.Searcher(i, m, options)
if err != nil {
return nil, err
}
// if must not is MatchNone, reset it to nil
if _, ok := mustNotSearcher.(*searcher.MatchNoneSearcher); ok {
mustNotSearcher = nil
}
}
var mustSearcher search.Searcher
if q.Must != nil {
mustSearcher, err = q.Must.Searcher(i, m, options)
if err != nil {
return nil, err
}
// if must searcher is MatchNone, reset it to nil
if _, ok := mustSearcher.(*searcher.MatchNoneSearcher); ok {
mustSearcher = nil
}
}
var shouldSearcher search.Searcher
if q.Should != nil {
shouldSearcher, err = q.Should.Searcher(i, m, options)
if err != nil {
return nil, err
}
// if should searcher is MatchNone, reset it to nil
if _, ok := shouldSearcher.(*searcher.MatchNoneSearcher); ok {
shouldSearcher = nil
}
}
// if all 3 are nil, return MatchNone
if mustSearcher == nil && shouldSearcher == nil && mustNotSearcher == nil {
return searcher.NewMatchNoneSearcher(i)
}
// if only mustNotSearcher, start with MatchAll
if mustSearcher == nil && shouldSearcher == nil && mustNotSearcher != nil {
mustSearcher, err = searcher.NewMatchAllSearcher(i, 1.0, options)
if err != nil {
return nil, err
}
}
// optimization, if only should searcher, just return it instead
if mustSearcher == nil && shouldSearcher != nil && mustNotSearcher == nil {
return shouldSearcher, nil
}
return searcher.NewBooleanSearcher(i, mustSearcher, shouldSearcher, mustNotSearcher, options)
}
func (q *BooleanQuery) Validate() error {
if qm, ok := q.Must.(ValidatableQuery); ok {
err := qm.Validate()
if err != nil {
return err
}
}
if qs, ok := q.Should.(ValidatableQuery); ok {
err := qs.Validate()
if err != nil {
return err
}
}
if qmn, ok := q.MustNot.(ValidatableQuery); ok {
err := qmn.Validate()
if err != nil {
return err
}
}
if q.Must == nil && q.Should == nil && q.MustNot == nil {
return fmt.Errorf("boolean query must contain at least one must or should or not must clause")
}
return nil
}
func (q *BooleanQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Must json.RawMessage `json:"must,omitempty"`
Should json.RawMessage `json:"should,omitempty"`
MustNot json.RawMessage `json:"must_not,omitempty"`
Boost *Boost `json:"boost,omitempty"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
if tmp.Must != nil {
q.Must, err = ParseQuery(tmp.Must)
if err != nil {
return err
}
_, isConjunctionQuery := q.Must.(*ConjunctionQuery)
if !isConjunctionQuery {
return fmt.Errorf("must clause must be conjunction")
}
}
if tmp.Should != nil {
q.Should, err = ParseQuery(tmp.Should)
if err != nil {
return err
}
_, isDisjunctionQuery := q.Should.(*DisjunctionQuery)
if !isDisjunctionQuery {
return fmt.Errorf("should clause must be disjunction")
}
}
if tmp.MustNot != nil {
q.MustNot, err = ParseQuery(tmp.MustNot)
if err != nil {
return err
}
_, isDisjunctionQuery := q.MustNot.(*DisjunctionQuery)
if !isDisjunctionQuery {
return fmt.Errorf("must not clause must be disjunction")
}
}
q.BoostVal = tmp.Boost
return nil
}

View file

@ -0,0 +1,33 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import "fmt"
type Boost float64
func (b *Boost) Value() float64 {
if b == nil {
return 1.0
}
return float64(*b)
}
func (b *Boost) GoString() string {
if b == nil {
return "boost unspecified"
}
return fmt.Sprintf("%f", *b)
}

View file

@ -0,0 +1,110 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type ConjunctionQuery struct {
Conjuncts []Query `json:"conjuncts"`
BoostVal *Boost `json:"boost,omitempty"`
queryStringMode bool
}
// NewConjunctionQuery creates a new compound Query.
// Result documents must satisfy all of the queries.
func NewConjunctionQuery(conjuncts []Query) *ConjunctionQuery {
return &ConjunctionQuery{
Conjuncts: conjuncts,
}
}
func (q *ConjunctionQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *ConjunctionQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *ConjunctionQuery) AddQuery(aq ...Query) {
for _, aaq := range aq {
q.Conjuncts = append(q.Conjuncts, aaq)
}
}
func (q *ConjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
ss := make([]search.Searcher, 0, len(q.Conjuncts))
for _, conjunct := range q.Conjuncts {
sr, err := conjunct.Searcher(i, m, options)
if err != nil {
for _, searcher := range ss {
if searcher != nil {
_ = searcher.Close()
}
}
return nil, err
}
if _, ok := sr.(*searcher.MatchNoneSearcher); ok && q.queryStringMode {
// in query string mode, skip match none
continue
}
ss = append(ss, sr)
}
if len(ss) < 1 {
return searcher.NewMatchNoneSearcher(i)
}
return searcher.NewConjunctionSearcher(i, ss, options)
}
func (q *ConjunctionQuery) Validate() error {
for _, q := range q.Conjuncts {
if q, ok := q.(ValidatableQuery); ok {
err := q.Validate()
if err != nil {
return err
}
}
}
return nil
}
func (q *ConjunctionQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Conjuncts []json.RawMessage `json:"conjuncts"`
Boost *Boost `json:"boost,omitempty"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
q.Conjuncts = make([]Query, len(tmp.Conjuncts))
for i, term := range tmp.Conjuncts {
query, err := ParseQuery(term)
if err != nil {
return err
}
q.Conjuncts[i] = query
}
q.BoostVal = tmp.Boost
return nil
}

View file

@ -0,0 +1,164 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"math"
"time"
"github.com/blevesearch/bleve/analysis/datetime/optional"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/registry"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
// QueryDateTimeParser controls the default query date time parser
var QueryDateTimeParser = optional.Name
// QueryDateTimeFormat controls the format when Marshaling to JSON
var QueryDateTimeFormat = time.RFC3339
var cache = registry.NewCache()
type BleveQueryTime struct {
time.Time
}
func queryTimeFromString(t string) (time.Time, error) {
dateTimeParser, err := cache.DateTimeParserNamed(QueryDateTimeParser)
if err != nil {
return time.Time{}, err
}
rv, err := dateTimeParser.ParseDateTime(t)
if err != nil {
return time.Time{}, err
}
return rv, nil
}
func (t *BleveQueryTime) MarshalJSON() ([]byte, error) {
tt := time.Time(t.Time)
return []byte("\"" + tt.Format(QueryDateTimeFormat) + "\""), nil
}
func (t *BleveQueryTime) UnmarshalJSON(data []byte) error {
var timeString string
err := json.Unmarshal(data, &timeString)
if err != nil {
return err
}
dateTimeParser, err := cache.DateTimeParserNamed(QueryDateTimeParser)
if err != nil {
return err
}
t.Time, err = dateTimeParser.ParseDateTime(timeString)
if err != nil {
return err
}
return nil
}
type DateRangeQuery struct {
Start BleveQueryTime `json:"start,omitempty"`
End BleveQueryTime `json:"end,omitempty"`
InclusiveStart *bool `json:"inclusive_start,omitempty"`
InclusiveEnd *bool `json:"inclusive_end,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewDateRangeQuery creates a new Query for ranges
// of date values.
// Date strings are parsed using the DateTimeParser configured in the
// top-level config.QueryDateTimeParser
// Either, but not both endpoints can be nil.
func NewDateRangeQuery(start, end time.Time) *DateRangeQuery {
return NewDateRangeInclusiveQuery(start, end, nil, nil)
}
// NewDateRangeInclusiveQuery creates a new Query for ranges
// of date values.
// Date strings are parsed using the DateTimeParser configured in the
// top-level config.QueryDateTimeParser
// Either, but not both endpoints can be nil.
// startInclusive and endInclusive control inclusion of the endpoints.
func NewDateRangeInclusiveQuery(start, end time.Time, startInclusive, endInclusive *bool) *DateRangeQuery {
return &DateRangeQuery{
Start: BleveQueryTime{start},
End: BleveQueryTime{end},
InclusiveStart: startInclusive,
InclusiveEnd: endInclusive,
}
}
func (q *DateRangeQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *DateRangeQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *DateRangeQuery) SetField(f string) {
q.FieldVal = f
}
func (q *DateRangeQuery) Field() string {
return q.FieldVal
}
func (q *DateRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
min, max, err := q.parseEndpoints()
if err != nil {
return nil, err
}
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
return searcher.NewNumericRangeSearcher(i, min, max, q.InclusiveStart, q.InclusiveEnd, field, q.BoostVal.Value(), options)
}
func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) {
min := math.Inf(-1)
max := math.Inf(1)
if !q.Start.IsZero() {
min = numeric.Int64ToFloat64(q.Start.UnixNano())
}
if !q.End.IsZero() {
max = numeric.Int64ToFloat64(q.End.UnixNano())
}
return &min, &max, nil
}
func (q *DateRangeQuery) Validate() error {
if q.Start.IsZero() && q.End.IsZero() {
return fmt.Errorf("must specify start or end")
}
_, _, err := q.parseEndpoints()
if err != nil {
return err
}
return nil
}

View file

@ -0,0 +1,121 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type DisjunctionQuery struct {
Disjuncts []Query `json:"disjuncts"`
BoostVal *Boost `json:"boost,omitempty"`
Min float64 `json:"min"`
queryStringMode bool
}
// NewDisjunctionQuery creates a new compound Query.
// Result documents satisfy at least one Query.
func NewDisjunctionQuery(disjuncts []Query) *DisjunctionQuery {
return &DisjunctionQuery{
Disjuncts: disjuncts,
}
}
func (q *DisjunctionQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *DisjunctionQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *DisjunctionQuery) AddQuery(aq ...Query) {
for _, aaq := range aq {
q.Disjuncts = append(q.Disjuncts, aaq)
}
}
func (q *DisjunctionQuery) SetMin(m float64) {
q.Min = m
}
func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
ss := make([]search.Searcher, 0, len(q.Disjuncts))
for _, disjunct := range q.Disjuncts {
sr, err := disjunct.Searcher(i, m, options)
if err != nil {
for _, searcher := range ss {
if searcher != nil {
_ = searcher.Close()
}
}
return nil, err
}
if _, ok := sr.(*searcher.MatchNoneSearcher); ok && q.queryStringMode {
// in query string mode, skip match none
continue
}
ss = append(ss, sr)
}
if len(ss) < 1 {
return searcher.NewMatchNoneSearcher(i)
}
return searcher.NewDisjunctionSearcher(i, ss, q.Min, options)
}
func (q *DisjunctionQuery) Validate() error {
if int(q.Min) > len(q.Disjuncts) {
return fmt.Errorf("disjunction query has fewer than the minimum number of clauses to satisfy")
}
for _, q := range q.Disjuncts {
if q, ok := q.(ValidatableQuery); ok {
err := q.Validate()
if err != nil {
return err
}
}
}
return nil
}
func (q *DisjunctionQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Disjuncts []json.RawMessage `json:"disjuncts"`
Boost *Boost `json:"boost,omitempty"`
Min float64 `json:"min"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
q.Disjuncts = make([]Query, len(tmp.Disjuncts))
for i, term := range tmp.Disjuncts {
query, err := ParseQuery(term)
if err != nil {
return err
}
q.Disjuncts[i] = query
}
q.BoostVal = tmp.Boost
q.Min = tmp.Min
return nil
}

View file

@ -0,0 +1,49 @@
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type DocIDQuery struct {
IDs []string `json:"ids"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewDocIDQuery creates a new Query object returning indexed documents among
// the specified set. Combine it with ConjunctionQuery to restrict the scope of
// other queries output.
func NewDocIDQuery(ids []string) *DocIDQuery {
return &DocIDQuery{
IDs: ids,
}
}
func (q *DocIDQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *DocIDQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *DocIDQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewDocIDSearcher(i, q.IDs, q.BoostVal.Value(), options)
}

View file

@ -0,0 +1,77 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type FuzzyQuery struct {
Term string `json:"term"`
Prefix int `json:"prefix_length"`
Fuzziness int `json:"fuzziness"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewFuzzyQuery creates a new Query which finds
// documents containing terms within a specific
// fuzziness of the specified term.
// The default fuzziness is 1.
//
// The current implementation uses Levenshtein edit
// distance as the fuzziness metric.
func NewFuzzyQuery(term string) *FuzzyQuery {
return &FuzzyQuery{
Term: term,
Fuzziness: 1,
}
}
func (q *FuzzyQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *FuzzyQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *FuzzyQuery) SetField(f string) {
q.FieldVal = f
}
func (q *FuzzyQuery) Field() string {
return q.FieldVal
}
func (q *FuzzyQuery) SetFuzziness(f int) {
q.Fuzziness = f
}
func (q *FuzzyQuery) SetPrefix(p int) {
q.Prefix = p
}
func (q *FuzzyQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
return searcher.NewFuzzySearcher(i, q.Term, q.Prefix, q.Fuzziness, field, q.BoostVal.Value(), options)
}

View file

@ -0,0 +1,113 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type GeoBoundingBoxQuery struct {
TopLeft []float64 `json:"top_left,omitempty"`
BottomRight []float64 `json:"bottom_right,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
func NewGeoBoundingBoxQuery(topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64) *GeoBoundingBoxQuery {
return &GeoBoundingBoxQuery{
TopLeft: []float64{topLeftLon, topLeftLat},
BottomRight: []float64{bottomRightLon, bottomRightLat},
}
}
func (q *GeoBoundingBoxQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *GeoBoundingBoxQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *GeoBoundingBoxQuery) SetField(f string) {
q.FieldVal = f
}
func (q *GeoBoundingBoxQuery) Field() string {
return q.FieldVal
}
func (q *GeoBoundingBoxQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
if q.BottomRight[0] < q.TopLeft[0] {
// cross date line, rewrite as two parts
leftSearcher, err := searcher.NewGeoBoundingBoxSearcher(i, -180, q.BottomRight[1], q.BottomRight[0], q.TopLeft[1], field, q.BoostVal.Value(), options, true)
if err != nil {
return nil, err
}
rightSearcher, err := searcher.NewGeoBoundingBoxSearcher(i, q.TopLeft[0], q.BottomRight[1], 180, q.TopLeft[1], field, q.BoostVal.Value(), options, true)
if err != nil {
_ = leftSearcher.Close()
return nil, err
}
return searcher.NewDisjunctionSearcher(i, []search.Searcher{leftSearcher, rightSearcher}, 0, options)
}
return searcher.NewGeoBoundingBoxSearcher(i, q.TopLeft[0], q.BottomRight[1], q.BottomRight[0], q.TopLeft[1], field, q.BoostVal.Value(), options, true)
}
func (q *GeoBoundingBoxQuery) Validate() error {
return nil
}
func (q *GeoBoundingBoxQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
TopLeft interface{} `json:"top_left,omitempty"`
BottomRight interface{} `json:"bottom_right,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
// now use our generic point parsing code from the geo package
lon, lat, found := geo.ExtractGeoPoint(tmp.TopLeft)
if !found {
return fmt.Errorf("geo location top_left not in a valid format")
}
q.TopLeft = []float64{lon, lat}
lon, lat, found = geo.ExtractGeoPoint(tmp.BottomRight)
if !found {
return fmt.Errorf("geo location bottom_right not in a valid format")
}
q.BottomRight = []float64{lon, lat}
q.FieldVal = tmp.FieldVal
q.BoostVal = tmp.BoostVal
return nil
}

View file

@ -0,0 +1,100 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type GeoDistanceQuery struct {
Location []float64 `json:"location,omitempty"`
Distance string `json:"distance,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
func NewGeoDistanceQuery(lon, lat float64, distance string) *GeoDistanceQuery {
return &GeoDistanceQuery{
Location: []float64{lon, lat},
Distance: distance,
}
}
func (q *GeoDistanceQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *GeoDistanceQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *GeoDistanceQuery) SetField(f string) {
q.FieldVal = f
}
func (q *GeoDistanceQuery) Field() string {
return q.FieldVal
}
func (q *GeoDistanceQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
dist, err := geo.ParseDistance(q.Distance)
if err != nil {
return nil, err
}
return searcher.NewGeoPointDistanceSearcher(i, q.Location[0], q.Location[1],
dist, field, q.BoostVal.Value(), options)
}
func (q *GeoDistanceQuery) Validate() error {
return nil
}
func (q *GeoDistanceQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Location interface{} `json:"location,omitempty"`
Distance string `json:"distance,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
// now use our generic point parsing code from the geo package
lon, lat, found := geo.ExtractGeoPoint(tmp.Location)
if !found {
return fmt.Errorf("geo location not in a valid format")
}
q.Location = []float64{lon, lat}
q.Distance = tmp.Distance
q.FieldVal = tmp.FieldVal
q.BoostVal = tmp.BoostVal
return nil
}

View file

@ -0,0 +1,176 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
)
type MatchQuery struct {
Match string `json:"match"`
FieldVal string `json:"field,omitempty"`
Analyzer string `json:"analyzer,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
Prefix int `json:"prefix_length"`
Fuzziness int `json:"fuzziness"`
Operator MatchQueryOperator `json:"operator,omitempty"`
}
type MatchQueryOperator int
const (
// Document must satisfy AT LEAST ONE of term searches.
MatchQueryOperatorOr = 0
// Document must satisfy ALL of term searches.
MatchQueryOperatorAnd = 1
)
func (o MatchQueryOperator) MarshalJSON() ([]byte, error) {
switch o {
case MatchQueryOperatorOr:
return json.Marshal("or")
case MatchQueryOperatorAnd:
return json.Marshal("and")
default:
return nil, fmt.Errorf("cannot marshal match operator %d to JSON", o)
}
}
func (o *MatchQueryOperator) UnmarshalJSON(data []byte) error {
var operatorString string
err := json.Unmarshal(data, &operatorString)
if err != nil {
return err
}
switch operatorString {
case "or":
*o = MatchQueryOperatorOr
return nil
case "and":
*o = MatchQueryOperatorAnd
return nil
default:
return fmt.Errorf("cannot unmarshal match operator '%v' from JSON", o)
}
}
// NewMatchQuery creates a Query for matching text.
// An Analyzer is chosen based on the field.
// Input text is analyzed using this analyzer.
// Token terms resulting from this analysis are
// used to perform term searches. Result documents
// must satisfy at least one of these term searches.
func NewMatchQuery(match string) *MatchQuery {
return &MatchQuery{
Match: match,
Operator: MatchQueryOperatorOr,
}
}
func (q *MatchQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *MatchQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *MatchQuery) SetField(f string) {
q.FieldVal = f
}
func (q *MatchQuery) Field() string {
return q.FieldVal
}
func (q *MatchQuery) SetFuzziness(f int) {
q.Fuzziness = f
}
func (q *MatchQuery) SetPrefix(p int) {
q.Prefix = p
}
func (q *MatchQuery) SetOperator(operator MatchQueryOperator) {
q.Operator = operator
}
func (q *MatchQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
analyzerName := ""
if q.Analyzer != "" {
analyzerName = q.Analyzer
} else {
analyzerName = m.AnalyzerNameForPath(field)
}
analyzer := m.AnalyzerNamed(analyzerName)
if analyzer == nil {
return nil, fmt.Errorf("no analyzer named '%s' registered", q.Analyzer)
}
tokens := analyzer.Analyze([]byte(q.Match))
if len(tokens) > 0 {
tqs := make([]Query, len(tokens))
if q.Fuzziness != 0 {
for i, token := range tokens {
query := NewFuzzyQuery(string(token.Term))
query.SetFuzziness(q.Fuzziness)
query.SetPrefix(q.Prefix)
query.SetField(field)
query.SetBoost(q.BoostVal.Value())
tqs[i] = query
}
} else {
for i, token := range tokens {
tq := NewTermQuery(string(token.Term))
tq.SetField(field)
tq.SetBoost(q.BoostVal.Value())
tqs[i] = tq
}
}
switch q.Operator {
case MatchQueryOperatorOr:
shouldQuery := NewDisjunctionQuery(tqs)
shouldQuery.SetMin(1)
shouldQuery.SetBoost(q.BoostVal.Value())
return shouldQuery.Searcher(i, m, options)
case MatchQueryOperatorAnd:
mustQuery := NewConjunctionQuery(tqs)
mustQuery.SetBoost(q.BoostVal.Value())
return mustQuery.Searcher(i, m, options)
default:
return nil, fmt.Errorf("unhandled operator %d", q.Operator)
}
}
noneQuery := NewMatchNoneQuery()
return noneQuery.Searcher(i, m, options)
}

View file

@ -0,0 +1,55 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type MatchAllQuery struct {
BoostVal *Boost `json:"boost,omitempty"`
}
// NewMatchAllQuery creates a Query which will
// match all documents in the index.
func NewMatchAllQuery() *MatchAllQuery {
return &MatchAllQuery{}
}
func (q *MatchAllQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *MatchAllQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *MatchAllQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewMatchAllSearcher(i, q.BoostVal.Value(), options)
}
func (q *MatchAllQuery) MarshalJSON() ([]byte, error) {
tmp := map[string]interface{}{
"boost": q.BoostVal,
"match_all": map[string]interface{}{},
}
return json.Marshal(tmp)
}

View file

@ -0,0 +1,55 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type MatchNoneQuery struct {
BoostVal *Boost `json:"boost,omitempty"`
}
// NewMatchNoneQuery creates a Query which will not
// match any documents in the index.
func NewMatchNoneQuery() *MatchNoneQuery {
return &MatchNoneQuery{}
}
func (q *MatchNoneQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *MatchNoneQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *MatchNoneQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewMatchNoneSearcher(i)
}
func (q *MatchNoneQuery) MarshalJSON() ([]byte, error) {
tmp := map[string]interface{}{
"boost": q.BoostVal,
"match_none": map[string]interface{}{},
}
return json.Marshal(tmp)
}

View file

@ -0,0 +1,113 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"fmt"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
)
type MatchPhraseQuery struct {
MatchPhrase string `json:"match_phrase"`
FieldVal string `json:"field,omitempty"`
Analyzer string `json:"analyzer,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewMatchPhraseQuery creates a new Query object
// for matching phrases in the index.
// An Analyzer is chosen based on the field.
// Input text is analyzed using this analyzer.
// Token terms resulting from this analysis are
// used to build a search phrase. Result documents
// must match this phrase. Queried field must have been indexed with
// IncludeTermVectors set to true.
func NewMatchPhraseQuery(matchPhrase string) *MatchPhraseQuery {
return &MatchPhraseQuery{
MatchPhrase: matchPhrase,
}
}
func (q *MatchPhraseQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *MatchPhraseQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *MatchPhraseQuery) SetField(f string) {
q.FieldVal = f
}
func (q *MatchPhraseQuery) Field() string {
return q.FieldVal
}
func (q *MatchPhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
analyzerName := ""
if q.Analyzer != "" {
analyzerName = q.Analyzer
} else {
analyzerName = m.AnalyzerNameForPath(field)
}
analyzer := m.AnalyzerNamed(analyzerName)
if analyzer == nil {
return nil, fmt.Errorf("no analyzer named '%s' registered", q.Analyzer)
}
tokens := analyzer.Analyze([]byte(q.MatchPhrase))
if len(tokens) > 0 {
phrase := tokenStreamToPhrase(tokens)
phraseQuery := NewMultiPhraseQuery(phrase, field)
phraseQuery.SetBoost(q.BoostVal.Value())
return phraseQuery.Searcher(i, m, options)
}
noneQuery := NewMatchNoneQuery()
return noneQuery.Searcher(i, m, options)
}
func tokenStreamToPhrase(tokens analysis.TokenStream) [][]string {
firstPosition := int(^uint(0) >> 1)
lastPosition := 0
for _, token := range tokens {
if token.Position < firstPosition {
firstPosition = token.Position
}
if token.Position > lastPosition {
lastPosition = token.Position
}
}
phraseLen := lastPosition - firstPosition + 1
if phraseLen > 0 {
rv := make([][]string, phraseLen)
for _, token := range tokens {
pos := token.Position - firstPosition
rv[pos] = append(rv[pos], string(token.Term))
}
return rv
}
return nil
}

View file

@ -0,0 +1,80 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type MultiPhraseQuery struct {
Terms [][]string `json:"terms"`
Field string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewMultiPhraseQuery creates a new Query for finding
// term phrases in the index.
// It is like PhraseQuery, but each position in the
// phrase may be satisfied by a list of terms
// as opposed to just one.
// At least one of the terms must exist in the correct
// order, at the correct index offsets, in the
// specified field. Queried field must have been indexed with
// IncludeTermVectors set to true.
func NewMultiPhraseQuery(terms [][]string, field string) *MultiPhraseQuery {
return &MultiPhraseQuery{
Terms: terms,
Field: field,
}
}
func (q *MultiPhraseQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *MultiPhraseQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *MultiPhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewMultiPhraseSearcher(i, q.Terms, q.Field, options)
}
func (q *MultiPhraseQuery) Validate() error {
if len(q.Terms) < 1 {
return fmt.Errorf("phrase query must contain at least one term")
}
return nil
}
func (q *MultiPhraseQuery) UnmarshalJSON(data []byte) error {
type _mphraseQuery MultiPhraseQuery
tmp := _mphraseQuery{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
q.Terms = tmp.Terms
q.Field = tmp.Field
q.BoostVal = tmp.BoostVal
return nil
}

View file

@ -0,0 +1,87 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"fmt"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type NumericRangeQuery struct {
Min *float64 `json:"min,omitempty"`
Max *float64 `json:"max,omitempty"`
InclusiveMin *bool `json:"inclusive_min,omitempty"`
InclusiveMax *bool `json:"inclusive_max,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewNumericRangeQuery creates a new Query for ranges
// of numeric values.
// Either, but not both endpoints can be nil.
// The minimum value is inclusive.
// The maximum value is exclusive.
func NewNumericRangeQuery(min, max *float64) *NumericRangeQuery {
return NewNumericRangeInclusiveQuery(min, max, nil, nil)
}
// NewNumericRangeInclusiveQuery creates a new Query for ranges
// of numeric values.
// Either, but not both endpoints can be nil.
// Control endpoint inclusion with inclusiveMin, inclusiveMax.
func NewNumericRangeInclusiveQuery(min, max *float64, minInclusive, maxInclusive *bool) *NumericRangeQuery {
return &NumericRangeQuery{
Min: min,
Max: max,
InclusiveMin: minInclusive,
InclusiveMax: maxInclusive,
}
}
func (q *NumericRangeQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *NumericRangeQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *NumericRangeQuery) SetField(f string) {
q.FieldVal = f
}
func (q *NumericRangeQuery) Field() string {
return q.FieldVal
}
func (q *NumericRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
return searcher.NewNumericRangeSearcher(i, q.Min, q.Max, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), options)
}
func (q *NumericRangeQuery) Validate() error {
if q.Min == nil && q.Min == q.Max {
return fmt.Errorf("numeric range query must specify min or max")
}
return nil
}

View file

@ -0,0 +1,77 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type PhraseQuery struct {
Terms []string `json:"terms"`
Field string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewPhraseQuery creates a new Query for finding
// exact term phrases in the index.
// The provided terms must exist in the correct
// order, at the correct index offsets, in the
// specified field. Queried field must have been indexed with
// IncludeTermVectors set to true.
func NewPhraseQuery(terms []string, field string) *PhraseQuery {
return &PhraseQuery{
Terms: terms,
Field: field,
}
}
func (q *PhraseQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *PhraseQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *PhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewPhraseSearcher(i, q.Terms, q.Field, options)
}
func (q *PhraseQuery) Validate() error {
if len(q.Terms) < 1 {
return fmt.Errorf("phrase query must contain at least one term")
}
return nil
}
func (q *PhraseQuery) UnmarshalJSON(data []byte) error {
type _phraseQuery PhraseQuery
tmp := _phraseQuery{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
q.Terms = tmp.Terms
q.Field = tmp.Field
q.BoostVal = tmp.BoostVal
return nil
}

View file

@ -0,0 +1,62 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type PrefixQuery struct {
Prefix string `json:"prefix"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewPrefixQuery creates a new Query which finds
// documents containing terms that start with the
// specified prefix.
func NewPrefixQuery(prefix string) *PrefixQuery {
return &PrefixQuery{
Prefix: prefix,
}
}
func (q *PrefixQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *PrefixQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *PrefixQuery) SetField(f string) {
q.FieldVal = f
}
func (q *PrefixQuery) Field() string {
return q.FieldVal
}
func (q *PrefixQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
return searcher.NewTermPrefixSearcher(i, q.Prefix, field, q.BoostVal.Value(), options)
}

View file

@ -0,0 +1,356 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
)
var logger = log.New(ioutil.Discard, "bleve mapping ", log.LstdFlags)
// SetLog sets the logger used for logging
// by default log messages are sent to ioutil.Discard
func SetLog(l *log.Logger) {
logger = l
}
// A Query represents a description of the type
// and parameters for a query into the index.
type Query interface {
Searcher(i index.IndexReader, m mapping.IndexMapping,
options search.SearcherOptions) (search.Searcher, error)
}
// A BoostableQuery represents a Query which can be boosted
// relative to other queries.
type BoostableQuery interface {
Query
SetBoost(b float64)
Boost() float64
}
// A FieldableQuery represents a Query which can be restricted
// to a single field.
type FieldableQuery interface {
Query
SetField(f string)
Field() string
}
// A ValidatableQuery represents a Query which can be validated
// prior to execution.
type ValidatableQuery interface {
Query
Validate() error
}
// ParseQuery deserializes a JSON representation of
// a Query object.
func ParseQuery(input []byte) (Query, error) {
var tmp map[string]interface{}
err := json.Unmarshal(input, &tmp)
if err != nil {
return nil, err
}
_, isMatchQuery := tmp["match"]
_, hasFuzziness := tmp["fuzziness"]
if hasFuzziness && !isMatchQuery {
var rv FuzzyQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, isTermQuery := tmp["term"]
if isTermQuery {
var rv TermQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
if isMatchQuery {
var rv MatchQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, isMatchPhraseQuery := tmp["match_phrase"]
if isMatchPhraseQuery {
var rv MatchPhraseQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasMust := tmp["must"]
_, hasShould := tmp["should"]
_, hasMustNot := tmp["must_not"]
if hasMust || hasShould || hasMustNot {
var rv BooleanQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasTerms := tmp["terms"]
if hasTerms {
var rv PhraseQuery
err := json.Unmarshal(input, &rv)
if err != nil {
// now try multi-phrase
var rv2 MultiPhraseQuery
err = json.Unmarshal(input, &rv2)
if err != nil {
return nil, err
}
return &rv2, nil
}
return &rv, nil
}
_, hasConjuncts := tmp["conjuncts"]
if hasConjuncts {
var rv ConjunctionQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasDisjuncts := tmp["disjuncts"]
if hasDisjuncts {
var rv DisjunctionQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasSyntaxQuery := tmp["query"]
if hasSyntaxQuery {
var rv QueryStringQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasMin := tmp["min"].(float64)
_, hasMax := tmp["max"].(float64)
if hasMin || hasMax {
var rv NumericRangeQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasMinStr := tmp["min"].(string)
_, hasMaxStr := tmp["max"].(string)
if hasMinStr || hasMaxStr {
var rv TermRangeQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasStart := tmp["start"]
_, hasEnd := tmp["end"]
if hasStart || hasEnd {
var rv DateRangeQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasPrefix := tmp["prefix"]
if hasPrefix {
var rv PrefixQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasRegexp := tmp["regexp"]
if hasRegexp {
var rv RegexpQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasWildcard := tmp["wildcard"]
if hasWildcard {
var rv WildcardQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasMatchAll := tmp["match_all"]
if hasMatchAll {
var rv MatchAllQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasMatchNone := tmp["match_none"]
if hasMatchNone {
var rv MatchNoneQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasDocIds := tmp["ids"]
if hasDocIds {
var rv DocIDQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasBool := tmp["bool"]
if hasBool {
var rv BoolFieldQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasTopLeft := tmp["top_left"]
_, hasBottomRight := tmp["bottom_right"]
if hasTopLeft && hasBottomRight {
var rv GeoBoundingBoxQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasDistance := tmp["distance"]
if hasDistance {
var rv GeoDistanceQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
return nil, fmt.Errorf("unknown query type")
}
// expandQuery traverses the input query tree and returns a new tree where
// query string queries have been expanded into base queries. Returned tree may
// reference queries from the input tree or new queries.
func expandQuery(m mapping.IndexMapping, query Query) (Query, error) {
var expand func(query Query) (Query, error)
var expandSlice func(queries []Query) ([]Query, error)
expandSlice = func(queries []Query) ([]Query, error) {
expanded := []Query{}
for _, q := range queries {
exp, err := expand(q)
if err != nil {
return nil, err
}
expanded = append(expanded, exp)
}
return expanded, nil
}
expand = func(query Query) (Query, error) {
switch query.(type) {
case *QueryStringQuery:
q := query.(*QueryStringQuery)
parsed, err := parseQuerySyntax(q.Query)
if err != nil {
return nil, fmt.Errorf("could not parse '%s': %s", q.Query, err)
}
return expand(parsed)
case *ConjunctionQuery:
q := *query.(*ConjunctionQuery)
children, err := expandSlice(q.Conjuncts)
if err != nil {
return nil, err
}
q.Conjuncts = children
return &q, nil
case *DisjunctionQuery:
q := *query.(*DisjunctionQuery)
children, err := expandSlice(q.Disjuncts)
if err != nil {
return nil, err
}
q.Disjuncts = children
return &q, nil
case *BooleanQuery:
q := *query.(*BooleanQuery)
var err error
q.Must, err = expand(q.Must)
if err != nil {
return nil, err
}
q.Should, err = expand(q.Should)
if err != nil {
return nil, err
}
q.MustNot, err = expand(q.MustNot)
if err != nil {
return nil, err
}
return &q, nil
default:
return query, nil
}
}
return expand(query)
}
// DumpQuery returns a string representation of the query tree, where query
// string queries have been expanded into base queries. The output format is
// meant for debugging purpose and may change in the future.
func DumpQuery(m mapping.IndexMapping, query Query) (string, error) {
q, err := expandQuery(m, query)
if err != nil {
return "", err
}
data, err := json.MarshalIndent(q, "", " ")
return string(data), err
}

View file

@ -0,0 +1,63 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
)
type QueryStringQuery struct {
Query string `json:"query"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewQueryStringQuery creates a new Query used for
// finding documents that satisfy a query string. The
// query string is a small query language for humans.
func NewQueryStringQuery(query string) *QueryStringQuery {
return &QueryStringQuery{
Query: query,
}
}
func (q *QueryStringQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *QueryStringQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *QueryStringQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
newQuery, err := parseQuerySyntax(q.Query)
if err != nil {
return nil, err
}
return newQuery.Searcher(i, m, options)
}
func (q *QueryStringQuery) Validate() error {
newQuery, err := parseQuerySyntax(q.Query)
if err != nil {
return err
}
if newQuery, ok := newQuery.(ValidatableQuery); ok {
return newQuery.Validate()
}
return nil
}

View file

@ -0,0 +1,328 @@
%{
package query
import (
"fmt"
"strconv"
"strings"
"time"
)
func logDebugGrammar(format string, v ...interface{}) {
if debugParser {
logger.Printf(format, v...)
}
}
%}
%union {
s string
n int
f float64
q Query
pf *float64}
%token tSTRING tPHRASE tPLUS tMINUS tCOLON tBOOST tNUMBER tSTRING tGREATER tLESS
tEQUAL tTILDE
%type <s> tSTRING
%type <s> tPHRASE
%type <s> tNUMBER
%type <s> posOrNegNumber
%type <s> tTILDE
%type <s> tBOOST
%type <q> searchBase
%type <pf> searchSuffix
%type <n> searchPrefix
%%
input:
searchParts {
logDebugGrammar("INPUT")
};
searchParts:
searchPart searchParts {
logDebugGrammar("SEARCH PARTS")
}
|
searchPart {
logDebugGrammar("SEARCH PART")
};
searchPart:
searchPrefix searchBase searchSuffix {
query := $2
if $3 != nil {
if query, ok := query.(BoostableQuery); ok {
query.SetBoost(*$3)
}
}
switch($1) {
case queryShould:
yylex.(*lexerWrapper).query.AddShould(query)
case queryMust:
yylex.(*lexerWrapper).query.AddMust(query)
case queryMustNot:
yylex.(*lexerWrapper).query.AddMustNot(query)
}
};
searchPrefix:
/* empty */ {
$$ = queryShould
}
|
tPLUS {
logDebugGrammar("PLUS")
$$ = queryMust
}
|
tMINUS {
logDebugGrammar("MINUS")
$$ = queryMustNot
};
searchBase:
tSTRING {
str := $1
logDebugGrammar("STRING - %s", str)
var q FieldableQuery
if strings.HasPrefix(str, "/") && strings.HasSuffix(str, "/") {
q = NewRegexpQuery(str[1:len(str)-1])
} else if strings.ContainsAny(str, "*?"){
q = NewWildcardQuery(str)
} else {
q = NewMatchQuery(str)
}
$$ = q
}
|
tSTRING tTILDE {
str := $1
fuzziness, err := strconv.ParseFloat($2, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid fuzziness value: %v", err))
}
logDebugGrammar("FUZZY STRING - %s %f", str, fuzziness)
q := NewMatchQuery(str)
q.SetFuzziness(int(fuzziness))
$$ = q
}
|
tSTRING tCOLON tSTRING tTILDE {
field := $1
str := $3
fuzziness, err := strconv.ParseFloat($4, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid fuzziness value: %v", err))
}
logDebugGrammar("FIELD - %s FUZZY STRING - %s %f", field, str, fuzziness)
q := NewMatchQuery(str)
q.SetFuzziness(int(fuzziness))
q.SetField(field)
$$ = q
}
|
tNUMBER {
str := $1
logDebugGrammar("STRING - %s", str)
q1 := NewMatchQuery(str)
val, err := strconv.ParseFloat($1, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
inclusive := true
q2 := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive)
q := NewDisjunctionQuery([]Query{q1,q2})
q.queryStringMode = true
$$ = q
}
|
tPHRASE {
phrase := $1
logDebugGrammar("PHRASE - %s", phrase)
q := NewMatchPhraseQuery(phrase)
$$ = q
}
|
tSTRING tCOLON tSTRING {
field := $1
str := $3
logDebugGrammar("FIELD - %s STRING - %s", field, str)
var q FieldableQuery
if strings.HasPrefix(str, "/") && strings.HasSuffix(str, "/") {
q = NewRegexpQuery(str[1:len(str)-1])
} else if strings.ContainsAny(str, "*?"){
q = NewWildcardQuery(str)
} else {
q = NewMatchQuery(str)
}
q.SetField(field)
$$ = q
}
|
tSTRING tCOLON posOrNegNumber {
field := $1
str := $3
logDebugGrammar("FIELD - %s STRING - %s", field, str)
q1 := NewMatchQuery(str)
q1.SetField(field)
val, err := strconv.ParseFloat($3, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
inclusive := true
q2 := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive)
q2.SetField(field)
q := NewDisjunctionQuery([]Query{q1,q2})
q.queryStringMode = true
$$ = q
}
|
tSTRING tCOLON tPHRASE {
field := $1
phrase := $3
logDebugGrammar("FIELD - %s PHRASE - %s", field, phrase)
q := NewMatchPhraseQuery(phrase)
q.SetField(field)
$$ = q
}
|
tSTRING tCOLON tGREATER posOrNegNumber {
field := $1
min, err := strconv.ParseFloat($4, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
minInclusive := false
logDebugGrammar("FIELD - GREATER THAN %f", min)
q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil)
q.SetField(field)
$$ = q
}
|
tSTRING tCOLON tGREATER tEQUAL posOrNegNumber {
field := $1
min, err := strconv.ParseFloat($5, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
minInclusive := true
logDebugGrammar("FIELD - GREATER THAN OR EQUAL %f", min)
q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil)
q.SetField(field)
$$ = q
}
|
tSTRING tCOLON tLESS posOrNegNumber {
field := $1
max, err := strconv.ParseFloat($4, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
maxInclusive := false
logDebugGrammar("FIELD - LESS THAN %f", max)
q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive)
q.SetField(field)
$$ = q
}
|
tSTRING tCOLON tLESS tEQUAL posOrNegNumber {
field := $1
max, err := strconv.ParseFloat($5, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
maxInclusive := true
logDebugGrammar("FIELD - LESS THAN OR EQUAL %f", max)
q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive)
q.SetField(field)
$$ = q
}
|
tSTRING tCOLON tGREATER tPHRASE {
field := $1
minInclusive := false
phrase := $4
logDebugGrammar("FIELD - GREATER THAN DATE %s", phrase)
minTime, err := queryTimeFromString(phrase)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid time: %v", err))
}
q := NewDateRangeInclusiveQuery(minTime, time.Time{}, &minInclusive, nil)
q.SetField(field)
$$ = q
}
|
tSTRING tCOLON tGREATER tEQUAL tPHRASE {
field := $1
minInclusive := true
phrase := $5
logDebugGrammar("FIELD - GREATER THAN OR EQUAL DATE %s", phrase)
minTime, err := queryTimeFromString(phrase)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid time: %v", err))
}
q := NewDateRangeInclusiveQuery(minTime, time.Time{}, &minInclusive, nil)
q.SetField(field)
$$ = q
}
|
tSTRING tCOLON tLESS tPHRASE {
field := $1
maxInclusive := false
phrase := $4
logDebugGrammar("FIELD - LESS THAN DATE %s", phrase)
maxTime, err := queryTimeFromString(phrase)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid time: %v", err))
}
q := NewDateRangeInclusiveQuery(time.Time{}, maxTime, nil, &maxInclusive)
q.SetField(field)
$$ = q
}
|
tSTRING tCOLON tLESS tEQUAL tPHRASE {
field := $1
maxInclusive := true
phrase := $5
logDebugGrammar("FIELD - LESS THAN OR EQUAL DATE %s", phrase)
maxTime, err := queryTimeFromString(phrase)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid time: %v", err))
}
q := NewDateRangeInclusiveQuery(time.Time{}, maxTime, nil, &maxInclusive)
q.SetField(field)
$$ = q
};
searchSuffix:
/* empty */ {
$$ = nil
}
|
tBOOST {
$$ = nil
boost, err := strconv.ParseFloat($1, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid boost value: %v", err))
} else {
$$ = &boost
}
logDebugGrammar("BOOST %f", boost)
};
posOrNegNumber:
tNUMBER {
$$ = $1
}
|
tMINUS tNUMBER {
$$ = "-" + $2
};

View file

@ -0,0 +1,815 @@
package query
import __yyfmt__ "fmt"
//line query_string.y:2
import (
"fmt"
"strconv"
"strings"
"time"
)
func logDebugGrammar(format string, v ...interface{}) {
if debugParser {
logger.Printf(format, v...)
}
}
//line query_string.y:17
type yySymType struct {
yys int
s string
n int
f float64
q Query
pf *float64
}
const tSTRING = 57346
const tPHRASE = 57347
const tPLUS = 57348
const tMINUS = 57349
const tCOLON = 57350
const tBOOST = 57351
const tNUMBER = 57352
const tGREATER = 57353
const tLESS = 57354
const tEQUAL = 57355
const tTILDE = 57356
var yyToknames = [...]string{
"$end",
"error",
"$unk",
"tSTRING",
"tPHRASE",
"tPLUS",
"tMINUS",
"tCOLON",
"tBOOST",
"tNUMBER",
"tGREATER",
"tLESS",
"tEQUAL",
"tTILDE",
}
var yyStatenames = [...]string{}
const yyEofCode = 1
const yyErrCode = 2
const yyInitialStackSize = 16
//line yacctab:1
var yyExca = [...]int{
-1, 1,
1, -1,
-2, 0,
-1, 3,
1, 3,
-2, 5,
}
const yyNprod = 28
const yyPrivate = 57344
var yyTokenNames []string
var yyStates []string
const yyLast = 42
var yyAct = [...]int{
17, 16, 18, 23, 22, 30, 3, 21, 19, 20,
29, 26, 22, 22, 1, 21, 21, 15, 28, 25,
24, 27, 34, 14, 22, 13, 31, 21, 32, 33,
22, 9, 11, 21, 5, 6, 2, 10, 4, 12,
7, 8,
}
var yyPact = [...]int{
28, -1000, -1000, 28, 27, -1000, -1000, -1000, 16, 9,
-1000, -1000, -1000, -1000, -1000, -3, -11, -1000, -1000, 6,
5, -1000, -5, -1000, -1000, 23, -1000, -1000, 17, -1000,
-1000, -1000, -1000, -1000, -1000,
}
var yyPgo = [...]int{
0, 0, 41, 39, 38, 14, 36, 6,
}
var yyR1 = [...]int{
0, 5, 6, 6, 7, 4, 4, 4, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 3, 3, 1, 1,
}
var yyR2 = [...]int{
0, 1, 2, 1, 3, 0, 1, 1, 1, 2,
4, 1, 1, 3, 3, 3, 4, 5, 4, 5,
4, 5, 4, 5, 0, 1, 1, 2,
}
var yyChk = [...]int{
-1000, -5, -6, -7, -4, 6, 7, -6, -2, 4,
10, 5, -3, 9, 14, 8, 4, -1, 5, 11,
12, 10, 7, 14, -1, 13, 5, -1, 13, 5,
10, -1, 5, -1, 5,
}
var yyDef = [...]int{
5, -2, 1, -2, 0, 6, 7, 2, 24, 8,
11, 12, 4, 25, 9, 0, 13, 14, 15, 0,
0, 26, 0, 10, 16, 0, 20, 18, 0, 22,
27, 17, 21, 19, 23,
}
var yyTok1 = [...]int{
1,
}
var yyTok2 = [...]int{
2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14,
}
var yyTok3 = [...]int{
0,
}
var yyErrorMessages = [...]struct {
state int
token int
msg string
}{}
//line yaccpar:1
/* parser for yacc output */
var (
yyDebug = 0
yyErrorVerbose = false
)
type yyLexer interface {
Lex(lval *yySymType) int
Error(s string)
}
type yyParser interface {
Parse(yyLexer) int
Lookahead() int
}
type yyParserImpl struct {
lval yySymType
stack [yyInitialStackSize]yySymType
char int
}
func (p *yyParserImpl) Lookahead() int {
return p.char
}
func yyNewParser() yyParser {
return &yyParserImpl{}
}
const yyFlag = -1000
func yyTokname(c int) string {
if c >= 1 && c-1 < len(yyToknames) {
if yyToknames[c-1] != "" {
return yyToknames[c-1]
}
}
return __yyfmt__.Sprintf("tok-%v", c)
}
func yyStatname(s int) string {
if s >= 0 && s < len(yyStatenames) {
if yyStatenames[s] != "" {
return yyStatenames[s]
}
}
return __yyfmt__.Sprintf("state-%v", s)
}
func yyErrorMessage(state, lookAhead int) string {
const TOKSTART = 4
if !yyErrorVerbose {
return "syntax error"
}
for _, e := range yyErrorMessages {
if e.state == state && e.token == lookAhead {
return "syntax error: " + e.msg
}
}
res := "syntax error: unexpected " + yyTokname(lookAhead)
// To match Bison, suggest at most four expected tokens.
expected := make([]int, 0, 4)
// Look for shiftable tokens.
base := yyPact[state]
for tok := TOKSTART; tok-1 < len(yyToknames); tok++ {
if n := base + tok; n >= 0 && n < yyLast && yyChk[yyAct[n]] == tok {
if len(expected) == cap(expected) {
return res
}
expected = append(expected, tok)
}
}
if yyDef[state] == -2 {
i := 0
for yyExca[i] != -1 || yyExca[i+1] != state {
i += 2
}
// Look for tokens that we accept or reduce.
for i += 2; yyExca[i] >= 0; i += 2 {
tok := yyExca[i]
if tok < TOKSTART || yyExca[i+1] == 0 {
continue
}
if len(expected) == cap(expected) {
return res
}
expected = append(expected, tok)
}
// If the default action is to accept or reduce, give up.
if yyExca[i+1] != 0 {
return res
}
}
for i, tok := range expected {
if i == 0 {
res += ", expecting "
} else {
res += " or "
}
res += yyTokname(tok)
}
return res
}
func yylex1(lex yyLexer, lval *yySymType) (char, token int) {
token = 0
char = lex.Lex(lval)
if char <= 0 {
token = yyTok1[0]
goto out
}
if char < len(yyTok1) {
token = yyTok1[char]
goto out
}
if char >= yyPrivate {
if char < yyPrivate+len(yyTok2) {
token = yyTok2[char-yyPrivate]
goto out
}
}
for i := 0; i < len(yyTok3); i += 2 {
token = yyTok3[i+0]
if token == char {
token = yyTok3[i+1]
goto out
}
}
out:
if token == 0 {
token = yyTok2[1] /* unknown char */
}
if yyDebug >= 3 {
__yyfmt__.Printf("lex %s(%d)\n", yyTokname(token), uint(char))
}
return char, token
}
func yyParse(yylex yyLexer) int {
return yyNewParser().Parse(yylex)
}
func (yyrcvr *yyParserImpl) Parse(yylex yyLexer) int {
var yyn int
var yyVAL yySymType
var yyDollar []yySymType
_ = yyDollar // silence set and not used
yyS := yyrcvr.stack[:]
Nerrs := 0 /* number of errors */
Errflag := 0 /* error recovery flag */
yystate := 0
yyrcvr.char = -1
yytoken := -1 // yyrcvr.char translated into internal numbering
defer func() {
// Make sure we report no lookahead when not parsing.
yystate = -1
yyrcvr.char = -1
yytoken = -1
}()
yyp := -1
goto yystack
ret0:
return 0
ret1:
return 1
yystack:
/* put a state and value onto the stack */
if yyDebug >= 4 {
__yyfmt__.Printf("char %v in %v\n", yyTokname(yytoken), yyStatname(yystate))
}
yyp++
if yyp >= len(yyS) {
nyys := make([]yySymType, len(yyS)*2)
copy(nyys, yyS)
yyS = nyys
}
yyS[yyp] = yyVAL
yyS[yyp].yys = yystate
yynewstate:
yyn = yyPact[yystate]
if yyn <= yyFlag {
goto yydefault /* simple state */
}
if yyrcvr.char < 0 {
yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval)
}
yyn += yytoken
if yyn < 0 || yyn >= yyLast {
goto yydefault
}
yyn = yyAct[yyn]
if yyChk[yyn] == yytoken { /* valid shift */
yyrcvr.char = -1
yytoken = -1
yyVAL = yyrcvr.lval
yystate = yyn
if Errflag > 0 {
Errflag--
}
goto yystack
}
yydefault:
/* default state action */
yyn = yyDef[yystate]
if yyn == -2 {
if yyrcvr.char < 0 {
yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval)
}
/* look through exception table */
xi := 0
for {
if yyExca[xi+0] == -1 && yyExca[xi+1] == yystate {
break
}
xi += 2
}
for xi += 2; ; xi += 2 {
yyn = yyExca[xi+0]
if yyn < 0 || yyn == yytoken {
break
}
}
yyn = yyExca[xi+1]
if yyn < 0 {
goto ret0
}
}
if yyn == 0 {
/* error ... attempt to resume parsing */
switch Errflag {
case 0: /* brand new error */
yylex.Error(yyErrorMessage(yystate, yytoken))
Nerrs++
if yyDebug >= 1 {
__yyfmt__.Printf("%s", yyStatname(yystate))
__yyfmt__.Printf(" saw %s\n", yyTokname(yytoken))
}
fallthrough
case 1, 2: /* incompletely recovered error ... try again */
Errflag = 3
/* find a state where "error" is a legal shift action */
for yyp >= 0 {
yyn = yyPact[yyS[yyp].yys] + yyErrCode
if yyn >= 0 && yyn < yyLast {
yystate = yyAct[yyn] /* simulate a shift of "error" */
if yyChk[yystate] == yyErrCode {
goto yystack
}
}
/* the current p has no shift on "error", pop stack */
if yyDebug >= 2 {
__yyfmt__.Printf("error recovery pops state %d\n", yyS[yyp].yys)
}
yyp--
}
/* there is no state on the stack with an error shift ... abort */
goto ret1
case 3: /* no shift yet; clobber input char */
if yyDebug >= 2 {
__yyfmt__.Printf("error recovery discards %s\n", yyTokname(yytoken))
}
if yytoken == yyEofCode {
goto ret1
}
yyrcvr.char = -1
yytoken = -1
goto yynewstate /* try again in the same state */
}
}
/* reduction by production yyn */
if yyDebug >= 2 {
__yyfmt__.Printf("reduce %v in:\n\t%v\n", yyn, yyStatname(yystate))
}
yynt := yyn
yypt := yyp
_ = yypt // guard against "declared and not used"
yyp -= yyR2[yyn]
// yyp is now the index of $0. Perform the default action. Iff the
// reduced production is ε, $1 is possibly out of range.
if yyp+1 >= len(yyS) {
nyys := make([]yySymType, len(yyS)*2)
copy(nyys, yyS)
yyS = nyys
}
yyVAL = yyS[yyp+1]
/* consult goto table to find next state */
yyn = yyR1[yyn]
yyg := yyPgo[yyn]
yyj := yyg + yyS[yyp].yys + 1
if yyj >= yyLast {
yystate = yyAct[yyg]
} else {
yystate = yyAct[yyj]
if yyChk[yystate] != -yyn {
yystate = yyAct[yyg]
}
}
// dummy call; replaced with literal code
switch yynt {
case 1:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:40
{
logDebugGrammar("INPUT")
}
case 2:
yyDollar = yyS[yypt-2 : yypt+1]
//line query_string.y:45
{
logDebugGrammar("SEARCH PARTS")
}
case 3:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:49
{
logDebugGrammar("SEARCH PART")
}
case 4:
yyDollar = yyS[yypt-3 : yypt+1]
//line query_string.y:54
{
query := yyDollar[2].q
if yyDollar[3].pf != nil {
if query, ok := query.(BoostableQuery); ok {
query.SetBoost(*yyDollar[3].pf)
}
}
switch yyDollar[1].n {
case queryShould:
yylex.(*lexerWrapper).query.AddShould(query)
case queryMust:
yylex.(*lexerWrapper).query.AddMust(query)
case queryMustNot:
yylex.(*lexerWrapper).query.AddMustNot(query)
}
}
case 5:
yyDollar = yyS[yypt-0 : yypt+1]
//line query_string.y:73
{
yyVAL.n = queryShould
}
case 6:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:77
{
logDebugGrammar("PLUS")
yyVAL.n = queryMust
}
case 7:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:82
{
logDebugGrammar("MINUS")
yyVAL.n = queryMustNot
}
case 8:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:88
{
str := yyDollar[1].s
logDebugGrammar("STRING - %s", str)
var q FieldableQuery
if strings.HasPrefix(str, "/") && strings.HasSuffix(str, "/") {
q = NewRegexpQuery(str[1 : len(str)-1])
} else if strings.ContainsAny(str, "*?") {
q = NewWildcardQuery(str)
} else {
q = NewMatchQuery(str)
}
yyVAL.q = q
}
case 9:
yyDollar = yyS[yypt-2 : yypt+1]
//line query_string.y:102
{
str := yyDollar[1].s
fuzziness, err := strconv.ParseFloat(yyDollar[2].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid fuzziness value: %v", err))
}
logDebugGrammar("FUZZY STRING - %s %f", str, fuzziness)
q := NewMatchQuery(str)
q.SetFuzziness(int(fuzziness))
yyVAL.q = q
}
case 10:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:114
{
field := yyDollar[1].s
str := yyDollar[3].s
fuzziness, err := strconv.ParseFloat(yyDollar[4].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid fuzziness value: %v", err))
}
logDebugGrammar("FIELD - %s FUZZY STRING - %s %f", field, str, fuzziness)
q := NewMatchQuery(str)
q.SetFuzziness(int(fuzziness))
q.SetField(field)
yyVAL.q = q
}
case 11:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:128
{
str := yyDollar[1].s
logDebugGrammar("STRING - %s", str)
q1 := NewMatchQuery(str)
val, err := strconv.ParseFloat(yyDollar[1].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
inclusive := true
q2 := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive)
q := NewDisjunctionQuery([]Query{q1, q2})
q.queryStringMode = true
yyVAL.q = q
}
case 12:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:143
{
phrase := yyDollar[1].s
logDebugGrammar("PHRASE - %s", phrase)
q := NewMatchPhraseQuery(phrase)
yyVAL.q = q
}
case 13:
yyDollar = yyS[yypt-3 : yypt+1]
//line query_string.y:150
{
field := yyDollar[1].s
str := yyDollar[3].s
logDebugGrammar("FIELD - %s STRING - %s", field, str)
var q FieldableQuery
if strings.HasPrefix(str, "/") && strings.HasSuffix(str, "/") {
q = NewRegexpQuery(str[1 : len(str)-1])
} else if strings.ContainsAny(str, "*?") {
q = NewWildcardQuery(str)
} else {
q = NewMatchQuery(str)
}
q.SetField(field)
yyVAL.q = q
}
case 14:
yyDollar = yyS[yypt-3 : yypt+1]
//line query_string.y:166
{
field := yyDollar[1].s
str := yyDollar[3].s
logDebugGrammar("FIELD - %s STRING - %s", field, str)
q1 := NewMatchQuery(str)
q1.SetField(field)
val, err := strconv.ParseFloat(yyDollar[3].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
inclusive := true
q2 := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive)
q2.SetField(field)
q := NewDisjunctionQuery([]Query{q1, q2})
q.queryStringMode = true
yyVAL.q = q
}
case 15:
yyDollar = yyS[yypt-3 : yypt+1]
//line query_string.y:184
{
field := yyDollar[1].s
phrase := yyDollar[3].s
logDebugGrammar("FIELD - %s PHRASE - %s", field, phrase)
q := NewMatchPhraseQuery(phrase)
q.SetField(field)
yyVAL.q = q
}
case 16:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:193
{
field := yyDollar[1].s
min, err := strconv.ParseFloat(yyDollar[4].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
minInclusive := false
logDebugGrammar("FIELD - GREATER THAN %f", min)
q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil)
q.SetField(field)
yyVAL.q = q
}
case 17:
yyDollar = yyS[yypt-5 : yypt+1]
//line query_string.y:206
{
field := yyDollar[1].s
min, err := strconv.ParseFloat(yyDollar[5].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
minInclusive := true
logDebugGrammar("FIELD - GREATER THAN OR EQUAL %f", min)
q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil)
q.SetField(field)
yyVAL.q = q
}
case 18:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:219
{
field := yyDollar[1].s
max, err := strconv.ParseFloat(yyDollar[4].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
maxInclusive := false
logDebugGrammar("FIELD - LESS THAN %f", max)
q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive)
q.SetField(field)
yyVAL.q = q
}
case 19:
yyDollar = yyS[yypt-5 : yypt+1]
//line query_string.y:232
{
field := yyDollar[1].s
max, err := strconv.ParseFloat(yyDollar[5].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
maxInclusive := true
logDebugGrammar("FIELD - LESS THAN OR EQUAL %f", max)
q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive)
q.SetField(field)
yyVAL.q = q
}
case 20:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:245
{
field := yyDollar[1].s
minInclusive := false
phrase := yyDollar[4].s
logDebugGrammar("FIELD - GREATER THAN DATE %s", phrase)
minTime, err := queryTimeFromString(phrase)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid time: %v", err))
}
q := NewDateRangeInclusiveQuery(minTime, time.Time{}, &minInclusive, nil)
q.SetField(field)
yyVAL.q = q
}
case 21:
yyDollar = yyS[yypt-5 : yypt+1]
//line query_string.y:260
{
field := yyDollar[1].s
minInclusive := true
phrase := yyDollar[5].s
logDebugGrammar("FIELD - GREATER THAN OR EQUAL DATE %s", phrase)
minTime, err := queryTimeFromString(phrase)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid time: %v", err))
}
q := NewDateRangeInclusiveQuery(minTime, time.Time{}, &minInclusive, nil)
q.SetField(field)
yyVAL.q = q
}
case 22:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:275
{
field := yyDollar[1].s
maxInclusive := false
phrase := yyDollar[4].s
logDebugGrammar("FIELD - LESS THAN DATE %s", phrase)
maxTime, err := queryTimeFromString(phrase)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid time: %v", err))
}
q := NewDateRangeInclusiveQuery(time.Time{}, maxTime, nil, &maxInclusive)
q.SetField(field)
yyVAL.q = q
}
case 23:
yyDollar = yyS[yypt-5 : yypt+1]
//line query_string.y:290
{
field := yyDollar[1].s
maxInclusive := true
phrase := yyDollar[5].s
logDebugGrammar("FIELD - LESS THAN OR EQUAL DATE %s", phrase)
maxTime, err := queryTimeFromString(phrase)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid time: %v", err))
}
q := NewDateRangeInclusiveQuery(time.Time{}, maxTime, nil, &maxInclusive)
q.SetField(field)
yyVAL.q = q
}
case 24:
yyDollar = yyS[yypt-0 : yypt+1]
//line query_string.y:306
{
yyVAL.pf = nil
}
case 25:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:310
{
yyVAL.pf = nil
boost, err := strconv.ParseFloat(yyDollar[1].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid boost value: %v", err))
} else {
yyVAL.pf = &boost
}
logDebugGrammar("BOOST %f", boost)
}
case 26:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:322
{
yyVAL.s = yyDollar[1].s
}
case 27:
yyDollar = yyS[yypt-2 : yypt+1]
//line query_string.y:326
{
yyVAL.s = "-" + yyDollar[2].s
}
}
goto yystack /* stack new state and value */
}

View file

@ -0,0 +1,322 @@
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"bufio"
"io"
"strings"
"unicode"
)
const reservedChars = "+-=&|><!(){}[]^\"~*?:\\/ "
func unescape(escaped string) string {
// see if this character can be escaped
if strings.ContainsAny(escaped, reservedChars) {
return escaped
}
// otherwise return it with the \ intact
return "\\" + escaped
}
type queryStringLex struct {
in *bufio.Reader
buf string
currState lexState
currConsumed bool
inEscape bool
nextToken *yySymType
nextTokenType int
seenDot bool
nextRune rune
nextRuneSize int
atEOF bool
}
func (l *queryStringLex) reset() {
l.buf = ""
l.inEscape = false
l.seenDot = false
}
func (l *queryStringLex) Error(msg string) {
panic(msg)
}
func (l *queryStringLex) Lex(lval *yySymType) int {
var err error
for l.nextToken == nil {
if l.currConsumed {
l.nextRune, l.nextRuneSize, err = l.in.ReadRune()
if err != nil && err == io.EOF {
l.nextRune = 0
l.atEOF = true
} else if err != nil {
return 0
}
}
l.currState, l.currConsumed = l.currState(l, l.nextRune, l.atEOF)
if l.currState == nil {
return 0
}
}
*lval = *l.nextToken
rv := l.nextTokenType
l.nextToken = nil
l.nextTokenType = 0
return rv
}
func newQueryStringLex(in io.Reader) *queryStringLex {
return &queryStringLex{
in: bufio.NewReader(in),
currState: startState,
currConsumed: true,
}
}
type lexState func(l *queryStringLex, next rune, eof bool) (lexState, bool)
func startState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
if eof {
return nil, false
}
// handle inside escape case up front
if l.inEscape {
l.inEscape = false
l.buf += unescape(string(next))
return inStrState, true
}
switch next {
case '"':
return inPhraseState, true
case '+', '-', ':', '>', '<', '=':
l.buf += string(next)
return singleCharOpState, true
case '^':
return inBoostState, true
case '~':
return inTildeState, true
}
switch {
case !l.inEscape && next == '\\':
l.inEscape = true
return startState, true
case unicode.IsDigit(next):
l.buf += string(next)
return inNumOrStrState, true
case !unicode.IsSpace(next):
l.buf += string(next)
return inStrState, true
}
// doesn't look like anything, just eat it and stay here
l.reset()
return startState, true
}
func inPhraseState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
// unterminated phrase eats the phrase
if eof {
l.Error("unterminated quote")
return nil, false
}
// only a non-escaped " ends the phrase
if !l.inEscape && next == '"' {
// end phrase
l.nextTokenType = tPHRASE
l.nextToken = &yySymType{
s: l.buf,
}
logDebugTokens("PHRASE - '%s'", l.nextToken.s)
l.reset()
return startState, true
} else if !l.inEscape && next == '\\' {
l.inEscape = true
} else if l.inEscape {
// if in escape, end it
l.inEscape = false
l.buf += unescape(string(next))
} else {
l.buf += string(next)
}
return inPhraseState, true
}
func singleCharOpState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
l.nextToken = &yySymType{}
switch l.buf {
case "+":
l.nextTokenType = tPLUS
logDebugTokens("PLUS")
case "-":
l.nextTokenType = tMINUS
logDebugTokens("MINUS")
case ":":
l.nextTokenType = tCOLON
logDebugTokens("COLON")
case ">":
l.nextTokenType = tGREATER
logDebugTokens("GREATER")
case "<":
l.nextTokenType = tLESS
logDebugTokens("LESS")
case "=":
l.nextTokenType = tEQUAL
logDebugTokens("EQUAL")
}
l.reset()
return startState, false
}
func inBoostState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
// only a non-escaped space ends the boost (or eof)
if eof || (!l.inEscape && next == ' ') {
// end boost
l.nextTokenType = tBOOST
if l.buf == "" {
l.buf = "1"
}
l.nextToken = &yySymType{
s: l.buf,
}
logDebugTokens("BOOST - '%s'", l.nextToken.s)
l.reset()
return startState, true
} else if !l.inEscape && next == '\\' {
l.inEscape = true
} else if l.inEscape {
// if in escape, end it
l.inEscape = false
l.buf += unescape(string(next))
} else {
l.buf += string(next)
}
return inBoostState, true
}
func inTildeState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
// only a non-escaped space ends the tilde (or eof)
if eof || (!l.inEscape && next == ' ') {
// end tilde
l.nextTokenType = tTILDE
if l.buf == "" {
l.buf = "1"
}
l.nextToken = &yySymType{
s: l.buf,
}
logDebugTokens("TILDE - '%s'", l.nextToken.s)
l.reset()
return startState, true
} else if !l.inEscape && next == '\\' {
l.inEscape = true
} else if l.inEscape {
// if in escape, end it
l.inEscape = false
l.buf += unescape(string(next))
} else {
l.buf += string(next)
}
return inTildeState, true
}
func inNumOrStrState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
// only a non-escaped space ends the tilde (or eof)
if eof || (!l.inEscape && next == ' ') {
// end number
l.nextTokenType = tNUMBER
l.nextToken = &yySymType{
s: l.buf,
}
logDebugTokens("NUMBER - '%s'", l.nextToken.s)
l.reset()
return startState, true
} else if !l.inEscape && next == '\\' {
l.inEscape = true
return inNumOrStrState, true
} else if l.inEscape {
// if in escape, end it
l.inEscape = false
l.buf += unescape(string(next))
// go directly to string, no successfully or unsuccessfully
// escaped string results in a valid number
return inStrState, true
}
// see where to go
if !l.seenDot && next == '.' {
// stay in this state
l.buf += string(next)
return inNumOrStrState, true
} else if unicode.IsDigit(next) {
l.buf += string(next)
return inNumOrStrState, true
}
// doesn't look like an number, transition
l.buf += string(next)
return inStrState, true
}
func inStrState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
// end on non-escped space, colon, tilde, boost (or eof)
if eof || (!l.inEscape && (next == ' ' || next == ':' || next == '^' || next == '~')) {
// end string
l.nextTokenType = tSTRING
l.nextToken = &yySymType{
s: l.buf,
}
logDebugTokens("STRING - '%s'", l.nextToken.s)
l.reset()
consumed := true
if !eof && (next == ':' || next == '^' || next == '~') {
consumed = false
}
return startState, consumed
} else if !l.inEscape && next == '\\' {
l.inEscape = true
} else if l.inEscape {
// if in escape, end it
l.inEscape = false
l.buf += unescape(string(next))
} else {
l.buf += string(next)
}
return inStrState, true
}
func logDebugTokens(format string, v ...interface{}) {
if debugLexer {
logger.Printf(format, v...)
}
}

View file

@ -0,0 +1,85 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// as of Go 1.8 this requires the goyacc external tool
// available from golang.org/x/tools/cmd/goyacc
//go:generate goyacc -o query_string.y.go query_string.y
//go:generate sed -i.tmp -e 1d query_string.y.go
//go:generate rm query_string.y.go.tmp
// note: OSX sed and gnu sed handle the -i (in-place) option differently.
// using -i.tmp works on both, at the expense of having to remove
// the unsightly .tmp files
package query
import (
"fmt"
"strings"
)
var debugParser bool
var debugLexer bool
func parseQuerySyntax(query string) (rq Query, err error) {
if query == "" {
return NewMatchNoneQuery(), nil
}
lex := newLexerWrapper(newQueryStringLex(strings.NewReader(query)))
doParse(lex)
if len(lex.errs) > 0 {
return nil, fmt.Errorf(strings.Join(lex.errs, "\n"))
}
return lex.query, nil
}
func doParse(lex *lexerWrapper) {
defer func() {
r := recover()
if r != nil {
lex.errs = append(lex.errs, fmt.Sprintf("parse error: %v", r))
}
}()
yyParse(lex)
}
const (
queryShould = iota
queryMust
queryMustNot
)
type lexerWrapper struct {
lex yyLexer
errs []string
query *BooleanQuery
}
func newLexerWrapper(lex yyLexer) *lexerWrapper {
return &lexerWrapper{
lex: lex,
query: NewBooleanQueryForQueryString(nil, nil, nil),
}
}
func (l *lexerWrapper) Lex(lval *yySymType) int {
return l.lex.Lex(lval)
}
func (l *lexerWrapper) Error(s string) {
l.errs = append(l.errs, s)
}

View file

@ -0,0 +1,96 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"regexp"
"strings"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type RegexpQuery struct {
Regexp string `json:"regexp"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
compiled *regexp.Regexp
}
// NewRegexpQuery creates a new Query which finds
// documents containing terms that match the
// specified regular expression. The regexp pattern
// SHOULD NOT include ^ or $ modifiers, the search
// will only match entire terms even without them.
func NewRegexpQuery(regexp string) *RegexpQuery {
return &RegexpQuery{
Regexp: regexp,
}
}
func (q *RegexpQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *RegexpQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *RegexpQuery) SetField(f string) {
q.FieldVal = f
}
func (q *RegexpQuery) Field() string {
return q.FieldVal
}
func (q *RegexpQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
err := q.compile()
if err != nil {
return nil, err
}
return searcher.NewRegexpSearcher(i, q.compiled, field, q.BoostVal.Value(), options)
}
func (q *RegexpQuery) Validate() error {
return q.compile()
}
func (q *RegexpQuery) compile() error {
if q.compiled == nil {
// require that pattern NOT be anchored to start and end of term
actualRegexp := q.Regexp
if strings.HasPrefix(actualRegexp, "^") {
actualRegexp = actualRegexp[1:] // remove leading ^
}
// do not attempt to remove trailing $, it's presence is not
// known to interfere with LiteralPrefix() the way ^ does
// and removing $ introduces possible ambiguities with escaped \$, \\$, etc
var err error
q.compiled, err = regexp.Compile(actualRegexp)
if err != nil {
return err
}
}
return nil
}

View file

@ -0,0 +1,61 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type TermQuery struct {
Term string `json:"term"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewTermQuery creates a new Query for finding an
// exact term match in the index.
func NewTermQuery(term string) *TermQuery {
return &TermQuery{
Term: term,
}
}
func (q *TermQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *TermQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *TermQuery) SetField(f string) {
q.FieldVal = f
}
func (q *TermQuery) Field() string {
return q.FieldVal
}
func (q *TermQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
return searcher.NewTermSearcher(i, q.Term, field, q.BoostVal.Value(), options)
}

View file

@ -0,0 +1,95 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"fmt"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type TermRangeQuery struct {
Min string `json:"min,omitempty"`
Max string `json:"max,omitempty"`
InclusiveMin *bool `json:"inclusive_min,omitempty"`
InclusiveMax *bool `json:"inclusive_max,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewTermRangeQuery creates a new Query for ranges
// of text term values.
// Either, but not both endpoints can be nil.
// The minimum value is inclusive.
// The maximum value is exclusive.
func NewTermRangeQuery(min, max string) *TermRangeQuery {
return NewTermRangeInclusiveQuery(min, max, nil, nil)
}
// NewTermRangeInclusiveQuery creates a new Query for ranges
// of numeric values.
// Either, but not both endpoints can be nil.
// Control endpoint inclusion with inclusiveMin, inclusiveMax.
func NewTermRangeInclusiveQuery(min, max string, minInclusive, maxInclusive *bool) *TermRangeQuery {
return &TermRangeQuery{
Min: min,
Max: max,
InclusiveMin: minInclusive,
InclusiveMax: maxInclusive,
}
}
func (q *TermRangeQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *TermRangeQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *TermRangeQuery) SetField(f string) {
q.FieldVal = f
}
func (q *TermRangeQuery) Field() string {
return q.FieldVal
}
func (q *TermRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
var minTerm []byte
if q.Min != "" {
minTerm = []byte(q.Min)
}
var maxTerm []byte
if q.Max != "" {
maxTerm = []byte(q.Max)
}
return searcher.NewTermRangeSearcher(i, minTerm, maxTerm, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), options)
}
func (q *TermRangeQuery) Validate() error {
if q.Min == "" && q.Min == q.Max {
return fmt.Errorf("term range query must specify min or max")
}
return nil
}

View file

@ -0,0 +1,106 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"regexp"
"strings"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
var wildcardRegexpReplacer = strings.NewReplacer(
// characters in the wildcard that must
// be escaped in the regexp
"+", `\+`,
"(", `\(`,
")", `\)`,
"^", `\^`,
"$", `\$`,
".", `\.`,
"{", `\{`,
"}", `\}`,
"[", `\[`,
"]", `\]`,
`|`, `\|`,
`\`, `\\`,
// wildcard characters
"*", ".*",
"?", ".")
type WildcardQuery struct {
Wildcard string `json:"wildcard"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
compiled *regexp.Regexp
}
// NewWildcardQuery creates a new Query which finds
// documents containing terms that match the
// specified wildcard. In the wildcard pattern '*'
// will match any sequence of 0 or more characters,
// and '?' will match any single character.
func NewWildcardQuery(wildcard string) *WildcardQuery {
return &WildcardQuery{
Wildcard: wildcard,
}
}
func (q *WildcardQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *WildcardQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *WildcardQuery) SetField(f string) {
q.FieldVal = f
}
func (q *WildcardQuery) Field() string {
return q.FieldVal
}
func (q *WildcardQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
if q.compiled == nil {
var err error
q.compiled, err = q.convertToRegexp()
if err != nil {
return nil, err
}
}
return searcher.NewRegexpSearcher(i, q.compiled, field, q.BoostVal.Value(), options)
}
func (q *WildcardQuery) Validate() error {
var err error
q.compiled, err = q.convertToRegexp()
return err
}
func (q *WildcardQuery) convertToRegexp() (*regexp.Regexp, error) {
regexpString := wildcardRegexpReplacer.Replace(q.Wildcard)
return regexp.Compile(regexpString)
}

View file

@ -0,0 +1,65 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorer
import (
"github.com/blevesearch/bleve/search"
)
type ConjunctionQueryScorer struct {
options search.SearcherOptions
}
func NewConjunctionQueryScorer(options search.SearcherOptions) *ConjunctionQueryScorer {
return &ConjunctionQueryScorer{
options: options,
}
}
func (s *ConjunctionQueryScorer) Score(ctx *search.SearchContext, constituents []*search.DocumentMatch) *search.DocumentMatch {
var sum float64
var childrenExplanations []*search.Explanation
if s.options.Explain {
childrenExplanations = make([]*search.Explanation, len(constituents))
}
locations := []search.FieldTermLocationMap{}
for i, docMatch := range constituents {
sum += docMatch.Score
if s.options.Explain {
childrenExplanations[i] = docMatch.Expl
}
if docMatch.Locations != nil {
locations = append(locations, docMatch.Locations)
}
}
newScore := sum
var newExpl *search.Explanation
if s.options.Explain {
newExpl = &search.Explanation{Value: sum, Message: "sum of:", Children: childrenExplanations}
}
// reuse constituents[0] as the return value
rv := constituents[0]
rv.Score = newScore
rv.Expl = newExpl
if len(locations) == 1 {
rv.Locations = locations[0]
} else if len(locations) > 1 {
rv.Locations = search.MergeLocations(locations)
}
return rv
}

View file

@ -1,32 +1,38 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorers
package scorer
import (
"fmt"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
type ConstantScorer struct {
constant float64
boost float64
explain bool
options search.SearcherOptions
queryNorm float64
queryWeight float64
queryWeightExplanation *search.Explanation
}
func NewConstantScorer(constant float64, boost float64, explain bool) *ConstantScorer {
func NewConstantScorer(constant float64, boost float64, options search.SearcherOptions) *ConstantScorer {
rv := ConstantScorer{
explain: explain,
options: options,
queryWeight: 1.0,
constant: constant,
boost: boost,
@ -46,7 +52,7 @@ func (s *ConstantScorer) SetQueryNorm(qnorm float64) {
// update the query weight
s.queryWeight = s.boost * s.queryNorm
if s.explain {
if s.options.Explain {
childrenExplanations := make([]*search.Explanation, 2)
childrenExplanations[0] = &search.Explanation{
Value: s.boost,
@ -64,12 +70,12 @@ func (s *ConstantScorer) SetQueryNorm(qnorm float64) {
}
}
func (s *ConstantScorer) Score(id string) *search.DocumentMatch {
func (s *ConstantScorer) Score(ctx *search.SearchContext, id index.IndexInternalID) *search.DocumentMatch {
var scoreExplanation *search.Explanation
score := s.constant
if s.explain {
if s.options.Explain {
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("ConstantScore()"),
@ -79,7 +85,7 @@ func (s *ConstantScorer) Score(id string) *search.DocumentMatch {
// if the query weight isn't 1, multiply
if s.queryWeight != 1.0 {
score = score * s.queryWeight
if s.explain {
if s.options.Explain {
childExplanations := make([]*search.Explanation, 2)
childExplanations[0] = s.queryWeightExplanation
childExplanations[1] = scoreExplanation
@ -91,13 +97,12 @@ func (s *ConstantScorer) Score(id string) *search.DocumentMatch {
}
}
rv := search.DocumentMatch{
ID: id,
Score: score,
}
if s.explain {
rv := ctx.DocumentMatchPool.Get()
rv.IndexInternalID = id
rv.Score = score
if s.options.Explain {
rv.Expl = scoreExplanation
}
return &rv
return rv
}

View file

@ -0,0 +1,77 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorer
import (
"fmt"
"github.com/blevesearch/bleve/search"
)
type DisjunctionQueryScorer struct {
options search.SearcherOptions
}
func NewDisjunctionQueryScorer(options search.SearcherOptions) *DisjunctionQueryScorer {
return &DisjunctionQueryScorer{
options: options,
}
}
func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents []*search.DocumentMatch, countMatch, countTotal int) *search.DocumentMatch {
var sum float64
var childrenExplanations []*search.Explanation
if s.options.Explain {
childrenExplanations = make([]*search.Explanation, len(constituents))
}
var locations []search.FieldTermLocationMap
for i, docMatch := range constituents {
sum += docMatch.Score
if s.options.Explain {
childrenExplanations[i] = docMatch.Expl
}
if docMatch.Locations != nil {
locations = append(locations, docMatch.Locations)
}
}
var rawExpl *search.Explanation
if s.options.Explain {
rawExpl = &search.Explanation{Value: sum, Message: "sum of:", Children: childrenExplanations}
}
coord := float64(countMatch) / float64(countTotal)
newScore := sum * coord
var newExpl *search.Explanation
if s.options.Explain {
ce := make([]*search.Explanation, 2)
ce[0] = rawExpl
ce[1] = &search.Explanation{Value: coord, Message: fmt.Sprintf("coord(%d/%d)", countMatch, countTotal)}
newExpl = &search.Explanation{Value: newScore, Message: "product of:", Children: ce}
}
// reuse constituents[0] as the return value
rv := constituents[0]
rv.Score = newScore
rv.Expl = newExpl
if len(locations) == 1 {
rv.Locations = locations[0]
} else if len(locations) > 1 {
rv.Locations = search.MergeLocations(locations)
}
return rv
}

View file

@ -1,13 +1,18 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorers
package scorer
import (
"fmt"
@ -18,20 +23,20 @@ import (
)
type TermQueryScorer struct {
queryTerm string
queryTerm []byte
queryField string
queryBoost float64
docTerm uint64
docTotal uint64
idf float64
explain bool
options search.SearcherOptions
idfExplanation *search.Explanation
queryNorm float64
queryWeight float64
queryWeightExplanation *search.Explanation
}
func NewTermQueryScorer(queryTerm string, queryField string, queryBoost float64, docTotal, docTerm uint64, explain bool) *TermQueryScorer {
func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer {
rv := TermQueryScorer{
queryTerm: queryTerm,
queryField: queryField,
@ -39,11 +44,11 @@ func NewTermQueryScorer(queryTerm string, queryField string, queryBoost float64,
docTerm: docTerm,
docTotal: docTotal,
idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)),
explain: explain,
options: options,
queryWeight: 1.0,
}
if explain {
if options.Explain {
rv.idfExplanation = &search.Explanation{
Value: rv.idf,
Message: fmt.Sprintf("idf(docFreq=%d, maxDocs=%d)", docTerm, docTotal),
@ -64,7 +69,7 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
// update the query weight
s.queryWeight = s.queryBoost * s.idf * s.queryNorm
if s.explain {
if s.options.Explain {
childrenExplanations := make([]*search.Explanation, 3)
childrenExplanations[0] = &search.Explanation{
Value: s.queryBoost,
@ -83,7 +88,7 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
}
}
func (s *TermQueryScorer) Score(termMatch *index.TermFieldDoc) *search.DocumentMatch {
func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.TermFieldDoc) *search.DocumentMatch {
var scoreExplanation *search.Explanation
// need to compute score
@ -95,7 +100,7 @@ func (s *TermQueryScorer) Score(termMatch *index.TermFieldDoc) *search.DocumentM
}
score := tf * termMatch.Norm * s.idf
if s.explain {
if s.options.Explain {
childrenExplanations := make([]*search.Explanation, 3)
childrenExplanations[0] = &search.Explanation{
Value: tf,
@ -116,7 +121,7 @@ func (s *TermQueryScorer) Score(termMatch *index.TermFieldDoc) *search.DocumentM
// if the query weight isn't 1, multiply
if s.queryWeight != 1.0 {
score = score * s.queryWeight
if s.explain {
if s.options.Explain {
childExplanations := make([]*search.Explanation, 2)
childExplanations[0] = s.queryWeightExplanation
childExplanations[1] = scoreExplanation
@ -128,42 +133,50 @@ func (s *TermQueryScorer) Score(termMatch *index.TermFieldDoc) *search.DocumentM
}
}
rv := search.DocumentMatch{
ID: termMatch.ID,
Score: score,
}
if s.explain {
rv := ctx.DocumentMatchPool.Get()
rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...)
rv.Score = score
if s.options.Explain {
rv.Expl = scoreExplanation
}
if termMatch.Vectors != nil && len(termMatch.Vectors) > 0 {
locs := make([]search.Location, len(termMatch.Vectors))
locsUsed := 0
totalPositions := 0
for _, v := range termMatch.Vectors {
totalPositions += len(v.ArrayPositions)
}
positions := make(search.ArrayPositions, totalPositions)
positionsUsed := 0
rv.Locations = make(search.FieldTermLocationMap)
for _, v := range termMatch.Vectors {
tlm := rv.Locations[v.Field]
if tlm == nil {
tlm = make(search.TermLocationMap)
rv.Locations[v.Field] = tlm
}
loc := search.Location{
Pos: float64(v.Pos),
Start: float64(v.Start),
End: float64(v.End),
loc := &locs[locsUsed]
locsUsed++
loc.Pos = v.Pos
loc.Start = v.Start
loc.End = v.End
if len(v.ArrayPositions) > 0 {
loc.ArrayPositions = positions[positionsUsed : positionsUsed+len(v.ArrayPositions)]
for i, ap := range v.ArrayPositions {
loc.ArrayPositions[i] = ap
}
positionsUsed += len(v.ArrayPositions)
}
locations := tlm[s.queryTerm]
if locations == nil {
locations = make(search.Locations, 1)
locations[0] = &loc
} else {
locations = append(locations, &loc)
}
tlm[s.queryTerm] = locations
rv.Locations[v.Field] = tlm
tlm[string(s.queryTerm)] = append(tlm[string(s.queryTerm)], loc)
}
}
return &rv
return rv
}

View file

@ -0,0 +1,30 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorer
import (
"math"
)
var SqrtCache []float64
const MaxSqrtCache = 64
func init() {
SqrtCache = make([]float64, MaxSqrtCache)
for i := 0; i < MaxSqrtCache; i++ {
SqrtCache[i] = math.Sqrt(float64(i))
}
}

View file

@ -1,59 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package scorers
import (
"github.com/blevesearch/bleve/search"
)
type ConjunctionQueryScorer struct {
explain bool
}
func NewConjunctionQueryScorer(explain bool) *ConjunctionQueryScorer {
return &ConjunctionQueryScorer{
explain: explain,
}
}
func (s *ConjunctionQueryScorer) Score(constituents []*search.DocumentMatch) *search.DocumentMatch {
rv := search.DocumentMatch{
ID: constituents[0].ID,
}
var sum float64
var childrenExplanations []*search.Explanation
if s.explain {
childrenExplanations = make([]*search.Explanation, len(constituents))
}
locations := []search.FieldTermLocationMap{}
for i, docMatch := range constituents {
sum += docMatch.Score
if s.explain {
childrenExplanations[i] = docMatch.Expl
}
if docMatch.Locations != nil {
locations = append(locations, docMatch.Locations)
}
}
rv.Score = sum
if s.explain {
rv.Expl = &search.Explanation{Value: sum, Message: "sum of:", Children: childrenExplanations}
}
if len(locations) == 1 {
rv.Locations = locations[0]
} else if len(locations) > 1 {
rv.Locations = search.MergeLocations(locations)
}
return &rv
}

View file

@ -1,118 +0,0 @@
// Copyright (c) 2013 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package scorers
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
func TestConstantScorer(t *testing.T) {
scorer := NewConstantScorer(1, 1, true)
tests := []struct {
termMatch *index.TermFieldDoc
result *search.DocumentMatch
}{
// test some simple math
{
termMatch: &index.TermFieldDoc{
ID: "one",
Freq: 1,
Norm: 1.0,
Vectors: []*index.TermFieldVector{
&index.TermFieldVector{
Field: "desc",
Pos: 1,
Start: 0,
End: 4,
},
},
},
result: &search.DocumentMatch{
ID: "one",
Score: 1.0,
Expl: &search.Explanation{
Value: 1.0,
Message: "ConstantScore()",
},
},
},
}
for _, test := range tests {
actual := scorer.Score(test.termMatch.ID)
if !reflect.DeepEqual(actual, test.result) {
t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch)
}
}
}
func TestConstantScorerWithQueryNorm(t *testing.T) {
scorer := NewConstantScorer(1, 1, true)
scorer.SetQueryNorm(2.0)
tests := []struct {
termMatch *index.TermFieldDoc
result *search.DocumentMatch
}{
{
termMatch: &index.TermFieldDoc{
ID: "one",
Freq: 1,
Norm: 1.0,
},
result: &search.DocumentMatch{
ID: "one",
Score: 2.0,
Expl: &search.Explanation{
Value: 2.0,
Message: "weight(^1.000000), product of:",
Children: []*search.Explanation{
&search.Explanation{
Value: 2.0,
Message: "ConstantScore()^1.000000, product of:",
Children: []*search.Explanation{
&search.Explanation{
Value: 1,
Message: "boost",
},
&search.Explanation{
Value: 2,
Message: "queryNorm",
},
},
},
&search.Explanation{
Value: 1.0,
Message: "ConstantScore()",
},
},
},
},
},
}
for _, test := range tests {
actual := scorer.Score(test.termMatch.ID)
if !reflect.DeepEqual(actual, test.result) {
t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch)
}
}
}

View file

@ -1,71 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package scorers
import (
"fmt"
"github.com/blevesearch/bleve/search"
)
type DisjunctionQueryScorer struct {
explain bool
}
func NewDisjunctionQueryScorer(explain bool) *DisjunctionQueryScorer {
return &DisjunctionQueryScorer{
explain: explain,
}
}
func (s *DisjunctionQueryScorer) Score(constituents []*search.DocumentMatch, countMatch, countTotal int) *search.DocumentMatch {
rv := search.DocumentMatch{
ID: constituents[0].ID,
}
var sum float64
var childrenExplanations []*search.Explanation
if s.explain {
childrenExplanations = make([]*search.Explanation, len(constituents))
}
locations := []search.FieldTermLocationMap{}
for i, docMatch := range constituents {
sum += docMatch.Score
if s.explain {
childrenExplanations[i] = docMatch.Expl
}
if docMatch.Locations != nil {
locations = append(locations, docMatch.Locations)
}
}
var rawExpl *search.Explanation
if s.explain {
rawExpl = &search.Explanation{Value: sum, Message: "sum of:", Children: childrenExplanations}
}
coord := float64(countMatch) / float64(countTotal)
rv.Score = sum * coord
if s.explain {
ce := make([]*search.Explanation, 2)
ce[0] = rawExpl
ce[1] = &search.Explanation{Value: coord, Message: fmt.Sprintf("coord(%d/%d)", countMatch, countTotal)}
rv.Expl = &search.Explanation{Value: rv.Score, Message: "product of:", Children: ce}
}
if len(locations) == 1 {
rv.Locations = locations[0]
} else if len(locations) > 1 {
rv.Locations = search.MergeLocations(locations)
}
return &rv
}

View file

@ -1,241 +0,0 @@
// Copyright (c) 2013 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package scorers
import (
"math"
"reflect"
"testing"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
func TestTermScorer(t *testing.T) {
var docTotal uint64 = 100
var docTerm uint64 = 9
var queryTerm = "beer"
var queryField = "desc"
var queryBoost = 1.0
scorer := NewTermQueryScorer(queryTerm, queryField, queryBoost, docTotal, docTerm, true)
idf := 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0))
tests := []struct {
termMatch *index.TermFieldDoc
result *search.DocumentMatch
}{
// test some simple math
{
termMatch: &index.TermFieldDoc{
ID: "one",
Freq: 1,
Norm: 1.0,
Vectors: []*index.TermFieldVector{
&index.TermFieldVector{
Field: "desc",
Pos: 1,
Start: 0,
End: 4,
},
},
},
result: &search.DocumentMatch{
ID: "one",
Score: math.Sqrt(1.0) * idf,
Expl: &search.Explanation{
Value: math.Sqrt(1.0) * idf,
Message: "fieldWeight(desc:beer in one), product of:",
Children: []*search.Explanation{
&search.Explanation{
Value: 1,
Message: "tf(termFreq(desc:beer)=1",
},
&search.Explanation{
Value: 1,
Message: "fieldNorm(field=desc, doc=one)",
},
&search.Explanation{
Value: idf,
Message: "idf(docFreq=9, maxDocs=100)",
},
},
},
Locations: search.FieldTermLocationMap{
"desc": search.TermLocationMap{
"beer": search.Locations{
&search.Location{
Pos: 1,
Start: 0,
End: 4,
},
},
},
},
},
},
// test the same thing again (score should be cached this time)
{
termMatch: &index.TermFieldDoc{
ID: "one",
Freq: 1,
Norm: 1.0,
},
result: &search.DocumentMatch{
ID: "one",
Score: math.Sqrt(1.0) * idf,
Expl: &search.Explanation{
Value: math.Sqrt(1.0) * idf,
Message: "fieldWeight(desc:beer in one), product of:",
Children: []*search.Explanation{
&search.Explanation{
Value: 1,
Message: "tf(termFreq(desc:beer)=1",
},
&search.Explanation{
Value: 1,
Message: "fieldNorm(field=desc, doc=one)",
},
&search.Explanation{
Value: idf,
Message: "idf(docFreq=9, maxDocs=100)",
},
},
},
},
},
// test a case where the sqrt isn't precalculated
{
termMatch: &index.TermFieldDoc{
ID: "one",
Freq: 65,
Norm: 1.0,
},
result: &search.DocumentMatch{
ID: "one",
Score: math.Sqrt(65) * idf,
Expl: &search.Explanation{
Value: math.Sqrt(65) * idf,
Message: "fieldWeight(desc:beer in one), product of:",
Children: []*search.Explanation{
&search.Explanation{
Value: math.Sqrt(65),
Message: "tf(termFreq(desc:beer)=65",
},
&search.Explanation{
Value: 1,
Message: "fieldNorm(field=desc, doc=one)",
},
&search.Explanation{
Value: idf,
Message: "idf(docFreq=9, maxDocs=100)",
},
},
},
},
},
}
for _, test := range tests {
actual := scorer.Score(test.termMatch)
if !reflect.DeepEqual(actual, test.result) {
t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch)
}
}
}
func TestTermScorerWithQueryNorm(t *testing.T) {
var docTotal uint64 = 100
var docTerm uint64 = 9
var queryTerm = "beer"
var queryField = "desc"
var queryBoost = 3.0
scorer := NewTermQueryScorer(queryTerm, queryField, queryBoost, docTotal, docTerm, true)
idf := 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0))
scorer.SetQueryNorm(2.0)
expectedQueryWeight := 3 * idf * 3 * idf
actualQueryWeight := scorer.Weight()
if expectedQueryWeight != actualQueryWeight {
t.Errorf("expected query weight %f, got %f", expectedQueryWeight, actualQueryWeight)
}
tests := []struct {
termMatch *index.TermFieldDoc
result *search.DocumentMatch
}{
{
termMatch: &index.TermFieldDoc{
ID: "one",
Freq: 1,
Norm: 1.0,
},
result: &search.DocumentMatch{
ID: "one",
Score: math.Sqrt(1.0) * idf * 3.0 * idf * 2.0,
Expl: &search.Explanation{
Value: math.Sqrt(1.0) * idf * 3.0 * idf * 2.0,
Message: "weight(desc:beer^3.000000 in one), product of:",
Children: []*search.Explanation{
&search.Explanation{
Value: 2.0 * idf * 3.0,
Message: "queryWeight(desc:beer^3.000000), product of:",
Children: []*search.Explanation{
&search.Explanation{
Value: 3,
Message: "boost",
},
&search.Explanation{
Value: idf,
Message: "idf(docFreq=9, maxDocs=100)",
},
&search.Explanation{
Value: 2,
Message: "queryNorm",
},
},
},
&search.Explanation{
Value: math.Sqrt(1.0) * idf,
Message: "fieldWeight(desc:beer in one), product of:",
Children: []*search.Explanation{
&search.Explanation{
Value: 1,
Message: "tf(termFreq(desc:beer)=1",
},
&search.Explanation{
Value: 1,
Message: "fieldNorm(field=desc, doc=one)",
},
&search.Explanation{
Value: idf,
Message: "idf(docFreq=9, maxDocs=100)",
},
},
},
},
},
},
},
}
for _, test := range tests {
actual := scorer.Score(test.termMatch)
if !reflect.DeepEqual(actual, test.result) {
t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch)
}
}
}

View file

@ -1,25 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package scorers
import (
"math"
)
var SqrtCache map[int]float64
const MaxSqrtCache = 64
func init() {
SqrtCache = make(map[int]float64, MaxSqrtCache)
for i := 0; i < MaxSqrtCache; i++ {
SqrtCache[i] = math.Sqrt(float64(i))
}
}

View file

@ -1,18 +1,45 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
import (
"fmt"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
)
type ArrayPositions []uint64
func (ap ArrayPositions) Equals(other ArrayPositions) bool {
if len(ap) != len(other) {
return false
}
for i := range ap {
if ap[i] != other[i] {
return false
}
}
return true
}
type Location struct {
Pos float64 `json:"pos"`
Start float64 `json:"start"`
End float64 `json:"end"`
Pos uint64 `json:"pos"`
Start uint64 `json:"start"`
End uint64 `json:"end"`
ArrayPositions ArrayPositions `json:"array_positions"`
}
type Locations []*Location
@ -20,15 +47,7 @@ type Locations []*Location
type TermLocationMap map[string]Locations
func (t TermLocationMap) AddLocation(term string, location *Location) {
existingLocations, exists := t[term]
if exists {
existingLocations = append(existingLocations, location)
t[term] = existingLocations
} else {
locations := make(Locations, 1)
locations[0] = location
t[term] = locations
}
t[term] = append(t[term], location)
}
type FieldTermLocationMap map[string]TermLocationMap
@ -36,12 +55,25 @@ type FieldTermLocationMap map[string]TermLocationMap
type FieldFragmentMap map[string][]string
type DocumentMatch struct {
ID string `json:"id"`
Score float64 `json:"score"`
Expl *Explanation `json:"explanation,omitempty"`
Locations FieldTermLocationMap `json:"locations,omitempty"`
Fragments FieldFragmentMap `json:"fragments,omitempty"`
Fields map[string]interface{} `json:"fields,omitempty"`
Index string `json:"index,omitempty"`
ID string `json:"id"`
IndexInternalID index.IndexInternalID `json:"-"`
Score float64 `json:"score"`
Expl *Explanation `json:"explanation,omitempty"`
Locations FieldTermLocationMap `json:"locations,omitempty"`
Fragments FieldFragmentMap `json:"fragments,omitempty"`
Sort []string `json:"sort,omitempty"`
// Fields contains the values for document fields listed in
// SearchRequest.Fields. Text fields are returned as strings, numeric
// fields as float64s and date fields as time.RFC3339 formatted strings.
Fields map[string]interface{} `json:"fields,omitempty"`
// if we load the document for this hit, remember it so we dont load again
Document *document.Document `json:"-"`
// used to maintain natural index order
HitNumber uint64 `json:"-"`
}
func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) {
@ -49,19 +81,39 @@ func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) {
dm.Fields = make(map[string]interface{})
}
existingVal, ok := dm.Fields[name]
if ok {
valSlice, ok := existingVal.([]interface{})
if ok {
// already a slice, append to it
valSlice = append(valSlice, value)
} else {
// create a slice
valSlice = []interface{}{existingVal, value}
}
dm.Fields[name] = valSlice
} else {
if !ok {
dm.Fields[name] = value
return
}
valSlice, ok := existingVal.([]interface{})
if ok {
// already a slice, append to it
valSlice = append(valSlice, value)
} else {
// create a slice
valSlice = []interface{}{existingVal, value}
}
dm.Fields[name] = valSlice
}
// Reset allows an already allocated DocumentMatch to be reused
func (dm *DocumentMatch) Reset() *DocumentMatch {
// remember the []byte used for the IndexInternalID
indexInternalID := dm.IndexInternalID
// remember the []interface{} used for sort
sort := dm.Sort
// idiom to copy over from empty DocumentMatch (0 allocations)
*dm = DocumentMatch{}
// reuse the []byte already allocated (and reset len to 0)
dm.IndexInternalID = indexInternalID[:0]
// reuse the []interface{} already allocated (and reset len to 0)
dm.Sort = sort[:0]
return dm
}
func (dm *DocumentMatch) String() string {
return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score)
}
type DocumentMatchCollection []*DocumentMatch
@ -71,11 +123,23 @@ func (c DocumentMatchCollection) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
func (c DocumentMatchCollection) Less(i, j int) bool { return c[i].Score > c[j].Score }
type Searcher interface {
Next() (*DocumentMatch, error)
Advance(ID string) (*DocumentMatch, error)
Next(ctx *SearchContext) (*DocumentMatch, error)
Advance(ctx *SearchContext, ID index.IndexInternalID) (*DocumentMatch, error)
Close() error
Weight() float64
SetQueryNorm(float64)
Count() uint64
Min() int
DocumentMatchPoolSize() int
}
type SearcherOptions struct {
Explain bool
IncludeTermVectors bool
}
// SearchContext represents the context around a single search
type SearchContext struct {
DocumentMatchPool *DocumentMatchPool
}

View file

@ -0,0 +1,35 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/search"
)
type OrderedSearcherList []search.Searcher
// sort.Interface
func (otrl OrderedSearcherList) Len() int {
return len(otrl)
}
func (otrl OrderedSearcherList) Less(i, j int) bool {
return otrl[i].Count() < otrl[j].Count()
}
func (otrl OrderedSearcherList) Swap(i, j int) {
otrl[i], otrl[j] = otrl[j], otrl[i]
}

View file

@ -0,0 +1,391 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"math"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
)
type BooleanSearcher struct {
indexReader index.IndexReader
mustSearcher search.Searcher
shouldSearcher search.Searcher
mustNotSearcher search.Searcher
queryNorm float64
currMust *search.DocumentMatch
currShould *search.DocumentMatch
currMustNot *search.DocumentMatch
currentID index.IndexInternalID
min uint64
scorer *scorer.ConjunctionQueryScorer
matches []*search.DocumentMatch
initialized bool
}
func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) {
// build our searcher
rv := BooleanSearcher{
indexReader: indexReader,
mustSearcher: mustSearcher,
shouldSearcher: shouldSearcher,
mustNotSearcher: mustNotSearcher,
scorer: scorer.NewConjunctionQueryScorer(options),
matches: make([]*search.DocumentMatch, 2),
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *BooleanSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
if s.mustSearcher != nil {
sumOfSquaredWeights += s.mustSearcher.Weight()
}
if s.shouldSearcher != nil {
sumOfSquaredWeights += s.shouldSearcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
if s.mustSearcher != nil {
s.mustSearcher.SetQueryNorm(s.queryNorm)
}
if s.shouldSearcher != nil {
s.shouldSearcher.SetQueryNorm(s.queryNorm)
}
}
func (s *BooleanSearcher) initSearchers(ctx *search.SearchContext) error {
var err error
// get all searchers pointing at their first match
if s.mustSearcher != nil {
if s.currMust != nil {
ctx.DocumentMatchPool.Put(s.currMust)
}
s.currMust, err = s.mustSearcher.Next(ctx)
if err != nil {
return err
}
}
if s.shouldSearcher != nil {
if s.currShould != nil {
ctx.DocumentMatchPool.Put(s.currShould)
}
s.currShould, err = s.shouldSearcher.Next(ctx)
if err != nil {
return err
}
}
if s.mustNotSearcher != nil {
if s.currMustNot != nil {
ctx.DocumentMatchPool.Put(s.currMustNot)
}
s.currMustNot, err = s.mustNotSearcher.Next(ctx)
if err != nil {
return err
}
}
if s.mustSearcher != nil && s.currMust != nil {
s.currentID = s.currMust.IndexInternalID
} else if s.mustSearcher == nil && s.currShould != nil {
s.currentID = s.currShould.IndexInternalID
} else {
s.currentID = nil
}
s.initialized = true
return nil
}
func (s *BooleanSearcher) advanceNextMust(ctx *search.SearchContext, skipReturn *search.DocumentMatch) error {
var err error
if s.mustSearcher != nil {
if s.currMust != skipReturn {
ctx.DocumentMatchPool.Put(s.currMust)
}
s.currMust, err = s.mustSearcher.Next(ctx)
if err != nil {
return err
}
} else {
if s.currShould != skipReturn {
ctx.DocumentMatchPool.Put(s.currShould)
}
s.currShould, err = s.shouldSearcher.Next(ctx)
if err != nil {
return err
}
}
if s.mustSearcher != nil && s.currMust != nil {
s.currentID = s.currMust.IndexInternalID
} else if s.mustSearcher == nil && s.currShould != nil {
s.currentID = s.currShould.IndexInternalID
} else {
s.currentID = nil
}
return nil
}
func (s *BooleanSearcher) Weight() float64 {
var rv float64
if s.mustSearcher != nil {
rv += s.mustSearcher.Weight()
}
if s.shouldSearcher != nil {
rv += s.shouldSearcher.Weight()
}
return rv
}
func (s *BooleanSearcher) SetQueryNorm(qnorm float64) {
if s.mustSearcher != nil {
s.mustSearcher.SetQueryNorm(qnorm)
}
if s.shouldSearcher != nil {
s.shouldSearcher.SetQueryNorm(qnorm)
}
}
func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
var err error
var rv *search.DocumentMatch
for s.currentID != nil {
if s.currMustNot != nil {
cmp := s.currMustNot.IndexInternalID.Compare(s.currentID)
if cmp < 0 {
ctx.DocumentMatchPool.Put(s.currMustNot)
// advance must not searcher to our candidate entry
s.currMustNot, err = s.mustNotSearcher.Advance(ctx, s.currentID)
if err != nil {
return nil, err
}
if s.currMustNot != nil && s.currMustNot.IndexInternalID.Equals(s.currentID) {
// the candidate is excluded
err = s.advanceNextMust(ctx, nil)
if err != nil {
return nil, err
}
continue
}
} else if cmp == 0 {
// the candidate is excluded
err = s.advanceNextMust(ctx, nil)
if err != nil {
return nil, err
}
continue
}
}
shouldCmpOrNil := 1 // NOTE: shouldCmp will also be 1 when currShould == nil.
if s.currShould != nil {
shouldCmpOrNil = s.currShould.IndexInternalID.Compare(s.currentID)
}
if shouldCmpOrNil < 0 {
ctx.DocumentMatchPool.Put(s.currShould)
// advance should searcher to our candidate entry
s.currShould, err = s.shouldSearcher.Advance(ctx, s.currentID)
if err != nil {
return nil, err
}
if s.currShould != nil && s.currShould.IndexInternalID.Equals(s.currentID) {
// score bonus matches should
var cons []*search.DocumentMatch
if s.currMust != nil {
cons = s.matches
cons[0] = s.currMust
cons[1] = s.currShould
} else {
cons = s.matches[0:1]
cons[0] = s.currShould
}
rv = s.scorer.Score(ctx, cons)
err = s.advanceNextMust(ctx, rv)
if err != nil {
return nil, err
}
break
} else if s.shouldSearcher.Min() == 0 {
// match is OK anyway
cons := s.matches[0:1]
cons[0] = s.currMust
rv = s.scorer.Score(ctx, cons)
err = s.advanceNextMust(ctx, rv)
if err != nil {
return nil, err
}
break
}
} else if shouldCmpOrNil == 0 {
// score bonus matches should
var cons []*search.DocumentMatch
if s.currMust != nil {
cons = s.matches
cons[0] = s.currMust
cons[1] = s.currShould
} else {
cons = s.matches[0:1]
cons[0] = s.currShould
}
rv = s.scorer.Score(ctx, cons)
err = s.advanceNextMust(ctx, rv)
if err != nil {
return nil, err
}
break
} else if s.shouldSearcher == nil || s.shouldSearcher.Min() == 0 {
// match is OK anyway
cons := s.matches[0:1]
cons[0] = s.currMust
rv = s.scorer.Score(ctx, cons)
err = s.advanceNextMust(ctx, rv)
if err != nil {
return nil, err
}
break
}
err = s.advanceNextMust(ctx, nil)
if err != nil {
return nil, err
}
}
return rv, nil
}
func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
var err error
if s.mustSearcher != nil {
if s.currMust != nil {
ctx.DocumentMatchPool.Put(s.currMust)
}
s.currMust, err = s.mustSearcher.Advance(ctx, ID)
if err != nil {
return nil, err
}
}
if s.shouldSearcher != nil {
if s.currShould != nil {
ctx.DocumentMatchPool.Put(s.currShould)
}
s.currShould, err = s.shouldSearcher.Advance(ctx, ID)
if err != nil {
return nil, err
}
}
if s.mustNotSearcher != nil {
if s.currMustNot != nil {
ctx.DocumentMatchPool.Put(s.currMustNot)
}
s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
if err != nil {
return nil, err
}
}
if s.mustSearcher != nil && s.currMust != nil {
s.currentID = s.currMust.IndexInternalID
} else if s.mustSearcher == nil && s.currShould != nil {
s.currentID = s.currShould.IndexInternalID
} else {
s.currentID = nil
}
return s.Next(ctx)
}
func (s *BooleanSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
if s.mustSearcher != nil {
sum += s.mustSearcher.Count()
}
if s.shouldSearcher != nil {
sum += s.shouldSearcher.Count()
}
return sum
}
func (s *BooleanSearcher) Close() error {
var err0, err1, err2 error
if s.mustSearcher != nil {
err0 = s.mustSearcher.Close()
}
if s.shouldSearcher != nil {
err1 = s.shouldSearcher.Close()
}
if s.mustNotSearcher != nil {
err2 = s.mustNotSearcher.Close()
}
if err0 != nil {
return err0
}
if err1 != nil {
return err1
}
if err2 != nil {
return err2
}
return nil
}
func (s *BooleanSearcher) Min() int {
return 0
}
func (s *BooleanSearcher) DocumentMatchPoolSize() int {
rv := 3
if s.mustSearcher != nil {
rv += s.mustSearcher.DocumentMatchPoolSize()
}
if s.shouldSearcher != nil {
rv += s.shouldSearcher.DocumentMatchPoolSize()
}
if s.mustNotSearcher != nil {
rv += s.mustNotSearcher.DocumentMatchPoolSize()
}
return rv
}

View file

@ -0,0 +1,232 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"math"
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
)
type ConjunctionSearcher struct {
indexReader index.IndexReader
searchers OrderedSearcherList
queryNorm float64
currs []*search.DocumentMatch
maxIDIdx int
scorer *scorer.ConjunctionQueryScorer
initialized bool
options search.SearcherOptions
}
func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, options search.SearcherOptions) (*ConjunctionSearcher, error) {
// build the downstream searchers
searchers := make(OrderedSearcherList, len(qsearchers))
for i, searcher := range qsearchers {
searchers[i] = searcher
}
// sort the searchers
sort.Sort(searchers)
// build our searcher
rv := ConjunctionSearcher{
indexReader: indexReader,
options: options,
searchers: searchers,
currs: make([]*search.DocumentMatch, len(searchers)),
scorer: scorer.NewConjunctionQueryScorer(options),
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *ConjunctionSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
for _, termSearcher := range s.searchers {
sumOfSquaredWeights += termSearcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
for _, termSearcher := range s.searchers {
termSearcher.SetQueryNorm(s.queryNorm)
}
}
func (s *ConjunctionSearcher) initSearchers(ctx *search.SearchContext) error {
var err error
// get all searchers pointing at their first match
for i, termSearcher := range s.searchers {
if s.currs[i] != nil {
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = termSearcher.Next(ctx)
if err != nil {
return err
}
}
s.initialized = true
return nil
}
func (s *ConjunctionSearcher) Weight() float64 {
var rv float64
for _, searcher := range s.searchers {
rv += searcher.Weight()
}
return rv
}
func (s *ConjunctionSearcher) SetQueryNorm(qnorm float64) {
for _, searcher := range s.searchers {
searcher.SetQueryNorm(qnorm)
}
}
func (s *ConjunctionSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
var rv *search.DocumentMatch
var err error
OUTER:
for s.currs[s.maxIDIdx] != nil {
maxID := s.currs[s.maxIDIdx].IndexInternalID
i := 0
for i < len(s.currs) {
if s.currs[i] == nil {
return nil, nil
}
if i == s.maxIDIdx {
i++
continue
}
cmp := maxID.Compare(s.currs[i].IndexInternalID)
if cmp == 0 {
i++
continue
}
if cmp < 0 {
// maxID < currs[i], so we found a new maxIDIdx
s.maxIDIdx = i
// advance the positions where [0 <= x < i], since we
// know they were equal to the former max entry
maxID = s.currs[s.maxIDIdx].IndexInternalID
for x := 0; x < i; x++ {
err = s.advanceChild(ctx, x, maxID)
if err != nil {
return nil, err
}
}
continue OUTER
}
// maxID > currs[i], so need to advance searchers[i]
err = s.advanceChild(ctx, i, maxID)
if err != nil {
return nil, err
}
// don't bump i, so that we'll examine the just-advanced
// currs[i] again
}
// if we get here, a doc matched all readers, so score and add it
rv = s.scorer.Score(ctx, s.currs)
// we know all the searchers are pointing at the same thing
// so they all need to be bumped
for i, termSearcher := range s.searchers {
if s.currs[i] != rv {
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = termSearcher.Next(ctx)
if err != nil {
return nil, err
}
}
// don't continue now, wait for the next call to Next()
break
}
return rv, nil
}
func (s *ConjunctionSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
for i := range s.searchers {
err := s.advanceChild(ctx, i, ID)
if err != nil {
return nil, err
}
}
return s.Next(ctx)
}
func (s *ConjunctionSearcher) advanceChild(ctx *search.SearchContext, i int, ID index.IndexInternalID) (err error) {
if s.currs[i] != nil {
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = s.searchers[i].Advance(ctx, ID)
return err
}
func (s *ConjunctionSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
for _, searcher := range s.searchers {
sum += searcher.Count()
}
return sum
}
func (s *ConjunctionSearcher) Close() (rv error) {
for _, searcher := range s.searchers {
err := searcher.Close()
if err != nil && rv == nil {
rv = err
}
}
return rv
}
func (s *ConjunctionSearcher) Min() int {
return 0
}
func (s *ConjunctionSearcher) DocumentMatchPoolSize() int {
rv := len(s.currs)
for _, s := range s.searchers {
rv += s.DocumentMatchPoolSize()
}
return rv
}

View file

@ -0,0 +1,271 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"fmt"
"math"
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
)
// DisjunctionMaxClauseCount is a compile time setting that applications can
// adjust to non-zero value to cause the DisjunctionSearcher to return an
// error instead of exeucting searches when the size exceeds this value.
var DisjunctionMaxClauseCount = 0
type DisjunctionSearcher struct {
indexReader index.IndexReader
searchers OrderedSearcherList
numSearchers int
queryNorm float64
currs []*search.DocumentMatch
scorer *scorer.DisjunctionQueryScorer
min int
matching []*search.DocumentMatch
matchingIdxs []int
initialized bool
}
func tooManyClauses(count int) bool {
if DisjunctionMaxClauseCount != 0 && count > DisjunctionMaxClauseCount {
return true
}
return false
}
func tooManyClausesErr() error {
return fmt.Errorf("TooManyClauses[maxClauseCount is set to %d]",
DisjunctionMaxClauseCount)
}
func NewDisjunctionSearcher(indexReader index.IndexReader,
qsearchers []search.Searcher, min float64, options search.SearcherOptions) (
*DisjunctionSearcher, error) {
return newDisjunctionSearcher(indexReader, qsearchers, min, options,
true)
}
func newDisjunctionSearcher(indexReader index.IndexReader,
qsearchers []search.Searcher, min float64, options search.SearcherOptions,
limit bool) (
*DisjunctionSearcher, error) {
if limit && tooManyClauses(len(qsearchers)) {
return nil, tooManyClausesErr()
}
// build the downstream searchers
searchers := make(OrderedSearcherList, len(qsearchers))
for i, searcher := range qsearchers {
searchers[i] = searcher
}
// sort the searchers
sort.Sort(sort.Reverse(searchers))
// build our searcher
rv := DisjunctionSearcher{
indexReader: indexReader,
searchers: searchers,
numSearchers: len(searchers),
currs: make([]*search.DocumentMatch, len(searchers)),
scorer: scorer.NewDisjunctionQueryScorer(options),
min: int(min),
matching: make([]*search.DocumentMatch, len(searchers)),
matchingIdxs: make([]int, len(searchers)),
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *DisjunctionSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
for _, termSearcher := range s.searchers {
sumOfSquaredWeights += termSearcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
for _, termSearcher := range s.searchers {
termSearcher.SetQueryNorm(s.queryNorm)
}
}
func (s *DisjunctionSearcher) initSearchers(ctx *search.SearchContext) error {
var err error
// get all searchers pointing at their first match
for i, termSearcher := range s.searchers {
if s.currs[i] != nil {
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = termSearcher.Next(ctx)
if err != nil {
return err
}
}
err = s.updateMatches()
if err != nil {
return err
}
s.initialized = true
return nil
}
func (s *DisjunctionSearcher) updateMatches() error {
matching := s.matching[:0]
matchingIdxs := s.matchingIdxs[:0]
for i := 0; i < len(s.currs); i++ {
curr := s.currs[i]
if curr == nil {
continue
}
if len(matching) > 0 {
cmp := curr.IndexInternalID.Compare(matching[0].IndexInternalID)
if cmp > 0 {
continue
}
if cmp < 0 {
matching = matching[:0]
matchingIdxs = matchingIdxs[:0]
}
}
matching = append(matching, curr)
matchingIdxs = append(matchingIdxs, i)
}
s.matching = matching
s.matchingIdxs = matchingIdxs
return nil
}
func (s *DisjunctionSearcher) Weight() float64 {
var rv float64
for _, searcher := range s.searchers {
rv += searcher.Weight()
}
return rv
}
func (s *DisjunctionSearcher) SetQueryNorm(qnorm float64) {
for _, searcher := range s.searchers {
searcher.SetQueryNorm(qnorm)
}
}
func (s *DisjunctionSearcher) Next(ctx *search.SearchContext) (
*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
var err error
var rv *search.DocumentMatch
found := false
for !found && len(s.matching) > 0 {
if len(s.matching) >= s.min {
found = true
// score this match
rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers)
}
// invoke next on all the matching searchers
for _, i := range s.matchingIdxs {
searcher := s.searchers[i]
if s.currs[i] != rv {
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = searcher.Next(ctx)
if err != nil {
return nil, err
}
}
err = s.updateMatches()
if err != nil {
return nil, err
}
}
return rv, nil
}
func (s *DisjunctionSearcher) Advance(ctx *search.SearchContext,
ID index.IndexInternalID) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
// get all searchers pointing at their first match
var err error
for i, termSearcher := range s.searchers {
if s.currs[i] != nil {
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = termSearcher.Advance(ctx, ID)
if err != nil {
return nil, err
}
}
err = s.updateMatches()
if err != nil {
return nil, err
}
return s.Next(ctx)
}
func (s *DisjunctionSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
for _, searcher := range s.searchers {
sum += searcher.Count()
}
return sum
}
func (s *DisjunctionSearcher) Close() (rv error) {
for _, searcher := range s.searchers {
err := searcher.Close()
if err != nil && rv == nil {
rv = err
}
}
return rv
}
func (s *DisjunctionSearcher) Min() int {
return s.min
}
func (s *DisjunctionSearcher) DocumentMatchPoolSize() int {
rv := len(s.currs)
for _, s := range s.searchers {
rv += s.DocumentMatchPoolSize()
}
return rv
}

View file

@ -0,0 +1,93 @@
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
)
// DocIDSearcher returns documents matching a predefined set of identifiers.
type DocIDSearcher struct {
reader index.DocIDReader
scorer *scorer.ConstantScorer
count int
}
func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64,
options search.SearcherOptions) (searcher *DocIDSearcher, err error) {
reader, err := indexReader.DocIDReaderOnly(ids)
if err != nil {
return nil, err
}
scorer := scorer.NewConstantScorer(1.0, boost, options)
return &DocIDSearcher{
scorer: scorer,
reader: reader,
count: len(ids),
}, nil
}
func (s *DocIDSearcher) Count() uint64 {
return uint64(s.count)
}
func (s *DocIDSearcher) Weight() float64 {
return s.scorer.Weight()
}
func (s *DocIDSearcher) SetQueryNorm(qnorm float64) {
s.scorer.SetQueryNorm(qnorm)
}
func (s *DocIDSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
docidMatch, err := s.reader.Next()
if err != nil {
return nil, err
}
if docidMatch == nil {
return nil, nil
}
docMatch := s.scorer.Score(ctx, docidMatch)
return docMatch, nil
}
func (s *DocIDSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
docidMatch, err := s.reader.Advance(ID)
if err != nil {
return nil, err
}
if docidMatch == nil {
return nil, nil
}
docMatch := s.scorer.Score(ctx, docidMatch)
return docMatch, nil
}
func (s *DocIDSearcher) Close() error {
return s.reader.Close()
}
func (s *DocIDSearcher) Min() int {
return 0
}
func (s *DocIDSearcher) DocumentMatchPoolSize() int {
return 1
}

View file

@ -0,0 +1,88 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
// FilterFunc defines a function which can filter documents
// returning true means keep the document
// returning false means do not keep the document
type FilterFunc func(d *search.DocumentMatch) bool
// FilteringSearcher wraps any other searcher, but checks any Next/Advance
// call against the supplied FilterFunc
type FilteringSearcher struct {
child search.Searcher
accept FilterFunc
}
func NewFilteringSearcher(s search.Searcher, filter FilterFunc) *FilteringSearcher {
return &FilteringSearcher{
child: s,
accept: filter,
}
}
func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
next, err := f.child.Next(ctx)
for next != nil && err == nil {
if f.accept(next) {
return next, nil
}
next, err = f.child.Next(ctx)
}
return nil, err
}
func (f *FilteringSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
adv, err := f.child.Advance(ctx, ID)
if err != nil {
return nil, err
}
if adv == nil {
return nil, nil
}
if f.accept(adv) {
return adv, nil
}
return f.Next(ctx)
}
func (f *FilteringSearcher) Close() error {
return f.child.Close()
}
func (f *FilteringSearcher) Weight() float64 {
return f.child.Weight()
}
func (f *FilteringSearcher) SetQueryNorm(n float64) {
f.child.SetQueryNorm(n)
}
func (f *FilteringSearcher) Count() uint64 {
return f.child.Count()
}
func (f *FilteringSearcher) Min() int {
return f.child.Min()
}
func (f *FilteringSearcher) DocumentMatchPoolSize() int {
return f.child.DocumentMatchPoolSize()
}

View file

@ -0,0 +1,74 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
func NewFuzzySearcher(indexReader index.IndexReader, term string,
prefix, fuzziness int, field string, boost float64,
options search.SearcherOptions) (search.Searcher, error) {
// Note: we don't byte slice the term for a prefix because of runes.
prefixTerm := ""
for i, r := range term {
if i < prefix {
prefixTerm += string(r)
} else {
break
}
}
candidateTerms, err := findFuzzyCandidateTerms(indexReader, term, fuzziness,
field, prefixTerm)
if err != nil {
return nil, err
}
return NewMultiTermSearcher(indexReader, candidateTerms, field,
boost, options, true)
}
func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
fuzziness int, field, prefixTerm string) (rv []string, err error) {
rv = make([]string, 0)
var fieldDict index.FieldDict
if len(prefixTerm) > 0 {
fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
} else {
fieldDict, err = indexReader.FieldDict(field)
}
defer func() {
if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr
}
}()
// enumerate terms and check levenshtein distance
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
ld, exceeded := search.LevenshteinDistanceMax(term, tfd.Term, fuzziness)
if !exceeded && ld <= fuzziness {
rv = append(rv, tfd.Term)
if tooManyClauses(len(rv)) {
return rv, tooManyClausesErr()
}
}
tfd, err = fieldDict.Next()
}
return rv, err
}

View file

@ -0,0 +1,173 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
)
func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
maxLon, maxLat float64, field string, boost float64,
options search.SearcherOptions, checkBoundaries bool) (
search.Searcher, error) {
// track list of opened searchers, for cleanup on early exit
var openedSearchers []search.Searcher
cleanupOpenedSearchers := func() {
for _, s := range openedSearchers {
_ = s.Close()
}
}
// do math to produce list of terms needed for this search
onBoundaryTerms, notOnBoundaryTerms := ComputeGeoRange(0, (geo.GeoBits<<1)-1,
minLon, minLat, maxLon, maxLat, checkBoundaries)
var onBoundarySearcher search.Searcher
if len(onBoundaryTerms) > 0 {
rawOnBoundarySearcher, err := NewMultiTermSearcherBytes(indexReader,
onBoundaryTerms, field, boost, options, false)
if err != nil {
return nil, err
}
// add filter to check points near the boundary
onBoundarySearcher = NewFilteringSearcher(rawOnBoundarySearcher,
buildRectFilter(indexReader, field, minLon, minLat, maxLon, maxLat))
openedSearchers = append(openedSearchers, onBoundarySearcher)
}
var notOnBoundarySearcher search.Searcher
if len(notOnBoundaryTerms) > 0 {
var err error
notOnBoundarySearcher, err = NewMultiTermSearcherBytes(indexReader,
notOnBoundaryTerms, field, boost, options, false)
if err != nil {
cleanupOpenedSearchers()
return nil, err
}
openedSearchers = append(openedSearchers, notOnBoundarySearcher)
}
if onBoundarySearcher != nil && notOnBoundarySearcher != nil {
rv, err := NewDisjunctionSearcher(indexReader,
[]search.Searcher{
onBoundarySearcher,
notOnBoundarySearcher,
},
0, options)
if err != nil {
cleanupOpenedSearchers()
return nil, err
}
return rv, nil
} else if onBoundarySearcher != nil {
return onBoundarySearcher, nil
} else if notOnBoundarySearcher != nil {
return notOnBoundarySearcher, nil
}
return NewMatchNoneSearcher(indexReader)
}
var geoMaxShift = document.GeoPrecisionStep * 4
var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2
func ComputeGeoRange(term uint64, shift uint,
sminLon, sminLat, smaxLon, smaxLat float64,
checkBoundaries bool) (
onBoundary [][]byte, notOnBoundary [][]byte) {
split := term | uint64(0x1)<<shift
var upperMax uint64
if shift < 63 {
upperMax = term | ((uint64(1) << (shift + 1)) - 1)
} else {
upperMax = 0xffffffffffffffff
}
lowerMax := split - 1
onBoundary, notOnBoundary = relateAndRecurse(term, lowerMax, shift,
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
plusOnBoundary, plusNotOnBoundary := relateAndRecurse(split, upperMax, shift,
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
onBoundary = append(onBoundary, plusOnBoundary...)
notOnBoundary = append(notOnBoundary, plusNotOnBoundary...)
return
}
func relateAndRecurse(start, end uint64, res uint,
sminLon, sminLat, smaxLon, smaxLat float64,
checkBoundaries bool) (
onBoundary [][]byte, notOnBoundary [][]byte) {
minLon := geo.MortonUnhashLon(start)
minLat := geo.MortonUnhashLat(start)
maxLon := geo.MortonUnhashLon(end)
maxLat := geo.MortonUnhashLat(end)
level := ((geo.GeoBits << 1) - res) >> 1
within := res%document.GeoPrecisionStep == 0 &&
geo.RectWithin(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)
if within || (level == geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)) {
if !within && checkBoundaries {
return [][]byte{
numeric.MustNewPrefixCodedInt64(int64(start), res),
}, nil
}
return nil,
[][]byte{
numeric.MustNewPrefixCodedInt64(int64(start), res),
}
} else if level < geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat) {
return ComputeGeoRange(start, res-1, sminLon, sminLat, smaxLon, smaxLat,
checkBoundaries)
}
return nil, nil
}
func buildRectFilter(indexReader index.IndexReader, field string,
minLon, minLat, maxLon, maxLat float64) FilterFunc {
return func(d *search.DocumentMatch) bool {
var lon, lat float64
var found bool
err := indexReader.DocumentVisitFieldTerms(d.IndexInternalID,
[]string{field}, func(field string, term []byte) {
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
var i64 int64
i64, err = prefixCoded.Int64()
if err == nil {
lon = geo.MortonUnhashLon(uint64(i64))
lat = geo.MortonUnhashLat(uint64(i64))
found = true
}
}
})
if err == nil && found {
return geo.BoundingBoxContains(lon, lat,
minLon, minLat, maxLon, maxLat)
}
return false
}
}

View file

@ -0,0 +1,115 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
)
func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
centerLat, dist float64, field string, boost float64,
options search.SearcherOptions) (search.Searcher, error) {
// compute bounding box containing the circle
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat :=
geo.ComputeBoundingBox(centerLon, centerLat, dist)
// build a searcher for the box
boxSearcher, err := boxSearcher(indexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat,
field, boost, options)
if err != nil {
return nil, err
}
// wrap it in a filtering searcher which checks the actual distance
return NewFilteringSearcher(boxSearcher,
buildDistFilter(indexReader, field, centerLon, centerLat, dist)), nil
}
// boxSearcher builds a searcher for the described bounding box
// if the desired box crosses the dateline, it is automatically split into
// two boxes joined through a disjunction searcher
func boxSearcher(indexReader index.IndexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
if bottomRightLon < topLeftLon {
// cross date line, rewrite as two parts
leftSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
-180, bottomRightLat, bottomRightLon, topLeftLat,
field, boost, options, false)
if err != nil {
return nil, err
}
rightSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options, false)
if err != nil {
_ = leftSearcher.Close()
return nil, err
}
boxSearcher, err := NewDisjunctionSearcher(indexReader,
[]search.Searcher{leftSearcher, rightSearcher}, 0, options)
if err != nil {
_ = leftSearcher.Close()
_ = rightSearcher.Close()
return nil, err
}
return boxSearcher, nil
}
// build geoboundinggox searcher for that bounding box
boxSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost,
options, false)
if err != nil {
return nil, err
}
return boxSearcher, nil
}
func buildDistFilter(indexReader index.IndexReader, field string,
centerLon, centerLat, maxDist float64) FilterFunc {
return func(d *search.DocumentMatch) bool {
var lon, lat float64
var found bool
err := indexReader.DocumentVisitFieldTerms(d.IndexInternalID,
[]string{field}, func(field string, term []byte) {
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
lon = geo.MortonUnhashLon(uint64(i64))
lat = geo.MortonUnhashLat(uint64(i64))
found = true
}
}
})
if err == nil && found {
dist := geo.Haversin(lon, lat, centerLon, centerLat)
if dist <= maxDist/1000 {
return true
}
}
return false
}
}

View file

@ -0,0 +1,105 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
)
type MatchAllSearcher struct {
indexReader index.IndexReader
reader index.DocIDReader
scorer *scorer.ConstantScorer
count uint64
}
func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, options search.SearcherOptions) (*MatchAllSearcher, error) {
reader, err := indexReader.DocIDReaderAll()
if err != nil {
return nil, err
}
count, err := indexReader.DocCount()
if err != nil {
_ = reader.Close()
return nil, err
}
scorer := scorer.NewConstantScorer(1.0, boost, options)
return &MatchAllSearcher{
indexReader: indexReader,
reader: reader,
scorer: scorer,
count: count,
}, nil
}
func (s *MatchAllSearcher) Count() uint64 {
return s.count
}
func (s *MatchAllSearcher) Weight() float64 {
return s.scorer.Weight()
}
func (s *MatchAllSearcher) SetQueryNorm(qnorm float64) {
s.scorer.SetQueryNorm(qnorm)
}
func (s *MatchAllSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
id, err := s.reader.Next()
if err != nil {
return nil, err
}
if id == nil {
return nil, nil
}
// score match
docMatch := s.scorer.Score(ctx, id)
// return doc match
return docMatch, nil
}
func (s *MatchAllSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
id, err := s.reader.Advance(ID)
if err != nil {
return nil, err
}
if id == nil {
return nil, nil
}
// score match
docMatch := s.scorer.Score(ctx, id)
// return doc match
return docMatch, nil
}
func (s *MatchAllSearcher) Close() error {
return s.reader.Close()
}
func (s *MatchAllSearcher) Min() int {
return 0
}
func (s *MatchAllSearcher) DocumentMatchPoolSize() int {
return 1
}

View file

@ -0,0 +1,62 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
type MatchNoneSearcher struct {
indexReader index.IndexReader
}
func NewMatchNoneSearcher(indexReader index.IndexReader) (*MatchNoneSearcher, error) {
return &MatchNoneSearcher{
indexReader: indexReader,
}, nil
}
func (s *MatchNoneSearcher) Count() uint64 {
return uint64(0)
}
func (s *MatchNoneSearcher) Weight() float64 {
return 0.0
}
func (s *MatchNoneSearcher) SetQueryNorm(qnorm float64) {
}
func (s *MatchNoneSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
return nil, nil
}
func (s *MatchNoneSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
return nil, nil
}
func (s *MatchNoneSearcher) Close() error {
return nil
}
func (s *MatchNoneSearcher) Min() int {
return 0
}
func (s *MatchNoneSearcher) DocumentMatchPoolSize() int {
return 0
}

View file

@ -0,0 +1,85 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
func NewMultiTermSearcher(indexReader index.IndexReader, terms []string,
field string, boost float64, options search.SearcherOptions, limit bool) (
search.Searcher, error) {
qsearchers := make([]search.Searcher, len(terms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
if searcher != nil {
_ = searcher.Close()
}
}
}
for i, term := range terms {
var err error
qsearchers[i], err = NewTermSearcher(indexReader, term, field, boost, options)
if err != nil {
qsearchersClose()
return nil, err
}
}
// build disjunction searcher of these ranges
return newMultiTermSearcherBytes(indexReader, qsearchers, field, boost,
options, limit)
}
func NewMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte,
field string, boost float64, options search.SearcherOptions, limit bool) (
search.Searcher, error) {
qsearchers := make([]search.Searcher, len(terms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
if searcher != nil {
_ = searcher.Close()
}
}
}
for i, term := range terms {
var err error
qsearchers[i], err = NewTermSearcherBytes(indexReader, term, field, boost, options)
if err != nil {
qsearchersClose()
return nil, err
}
}
return newMultiTermSearcherBytes(indexReader, qsearchers, field, boost,
options, limit)
}
func newMultiTermSearcherBytes(indexReader index.IndexReader,
searchers []search.Searcher, field string, boost float64,
options search.SearcherOptions, limit bool) (
search.Searcher, error) {
// build disjunction searcher of these ranges
searcher, err := newDisjunctionSearcher(indexReader, searchers, 0, options,
limit)
if err != nil {
for _, s := range searchers {
_ = s.Close()
}
return nil, err
}
return searcher, nil
}

View file

@ -1,33 +1,31 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searchers
package searcher
import (
"bytes"
"math"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric_util"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
)
type NumericRangeSearcher struct {
indexReader index.IndexReader
min *float64
max *float64
field string
explain bool
searcher *DisjunctionSearcher
}
func NewNumericRangeSearcher(indexReader index.IndexReader, min *float64, max *float64, inclusiveMin, inclusiveMax *bool, field string, boost float64, explain bool) (*NumericRangeSearcher, error) {
func NewNumericRangeSearcher(indexReader index.IndexReader,
min *float64, max *float64, inclusiveMin, inclusiveMax *bool, field string,
boost float64, options search.SearcherOptions) (search.Searcher, error) {
// account for unbounded edges
if min == nil {
negInf := math.Inf(-1)
@ -46,63 +44,23 @@ func NewNumericRangeSearcher(indexReader index.IndexReader, min *float64, max *f
inclusiveMax = &defaultInclusiveMax
}
// find all the ranges
minInt64 := numeric_util.Float64ToInt64(*min)
minInt64 := numeric.Float64ToInt64(*min)
if !*inclusiveMin && minInt64 != math.MaxInt64 {
minInt64++
}
maxInt64 := numeric_util.Float64ToInt64(*max)
maxInt64 := numeric.Float64ToInt64(*max)
if !*inclusiveMax && maxInt64 != math.MinInt64 {
maxInt64--
}
// FIXME hard-coded precision, should match field declaration
termRanges := splitInt64Range(minInt64, maxInt64, 4)
terms := termRanges.Enumerate()
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, len(terms))
for i, term := range terms {
var err error
qsearchers[i], err = NewTermSearcher(indexReader, string(term), field, 1.0, explain)
if err != nil {
return nil, err
}
if tooManyClauses(len(terms)) {
return nil, tooManyClausesErr()
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
if err != nil {
return nil, err
}
return &NumericRangeSearcher{
indexReader: indexReader,
min: min,
max: max,
field: field,
explain: explain,
searcher: searcher,
}, nil
}
func (s *NumericRangeSearcher) Count() uint64 {
return s.searcher.Count()
}
func (s *NumericRangeSearcher) Weight() float64 {
return s.searcher.Weight()
}
func (s *NumericRangeSearcher) SetQueryNorm(qnorm float64) {
s.searcher.SetQueryNorm(qnorm)
}
func (s *NumericRangeSearcher) Next() (*search.DocumentMatch, error) {
return s.searcher.Next()
}
func (s *NumericRangeSearcher) Advance(ID string) (*search.DocumentMatch, error) {
return s.searcher.Advance(ID)
}
func (s *NumericRangeSearcher) Close() error {
return s.searcher.Close()
return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options,
true)
}
type termRange struct {
@ -111,7 +69,7 @@ type termRange struct {
}
func (t *termRange) Enumerate() [][]byte {
rv := make([][]byte, 0)
var rv [][]byte
next := t.startTerm
for bytes.Compare(next, t.endTerm) <= 0 {
rv = append(rv, next)
@ -126,7 +84,7 @@ func incrementBytes(in []byte) []byte {
for i := len(rv) - 1; i >= 0; i-- {
rv[i] = rv[i] + 1
if rv[i] != 0 {
// didnt' overflow, so stop
// didn't overflow, so stop
break
}
}
@ -136,7 +94,7 @@ func incrementBytes(in []byte) []byte {
type termRanges []*termRange
func (tr termRanges) Enumerate() [][]byte {
rv := make([][]byte, 0)
var rv [][]byte
for _, tri := range tr {
trie := tri.Enumerate()
rv = append(rv, trie...)
@ -173,7 +131,8 @@ func splitInt64Range(minBound, maxBound int64, precisionStep uint) termRanges {
lowerWrapped := nextMinBound < minBound
upperWrapped := nextMaxBound > maxBound
if shift+precisionStep >= 64 || nextMinBound > nextMaxBound || lowerWrapped || upperWrapped {
if shift+precisionStep >= 64 || nextMinBound > nextMaxBound ||
lowerWrapped || upperWrapped {
// We are in the lowest precision or the next precision is not available.
rv = append(rv, newRange(minBound, maxBound, shift))
// exit the split recursion loop
@ -197,8 +156,8 @@ func splitInt64Range(minBound, maxBound int64, precisionStep uint) termRanges {
func newRange(minBound, maxBound int64, shift uint) *termRange {
maxBound |= (int64(1) << shift) - int64(1)
minBytes := numeric_util.MustNewPrefixCodedInt64(minBound, shift)
maxBytes := numeric_util.MustNewPrefixCodedInt64(maxBound, shift)
minBytes := numeric.MustNewPrefixCodedInt64(minBound, shift)
maxBytes := numeric.MustNewPrefixCodedInt64(maxBound, shift)
return newRangeBytes(minBytes, maxBytes)
}
@ -208,7 +167,3 @@ func newRangeBytes(minBytes, maxBytes []byte) *termRange {
endTerm: maxBytes,
}
}
func (s *NumericRangeSearcher) Min() int {
return 0
}

View file

@ -0,0 +1,341 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"fmt"
"math"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
type PhraseSearcher struct {
indexReader index.IndexReader
mustSearcher *ConjunctionSearcher
queryNorm float64
currMust *search.DocumentMatch
slop int
terms [][]string
initialized bool
}
func NewPhraseSearcher(indexReader index.IndexReader, terms []string, field string, options search.SearcherOptions) (*PhraseSearcher, error) {
// turn flat terms []string into [][]string
mterms := make([][]string, len(terms))
for i, term := range terms {
mterms[i] = []string{term}
}
return NewMultiPhraseSearcher(indexReader, mterms, field, options)
}
func NewMultiPhraseSearcher(indexReader index.IndexReader, terms [][]string, field string, options search.SearcherOptions) (*PhraseSearcher, error) {
options.IncludeTermVectors = true
var termPositionSearchers []search.Searcher
for _, termPos := range terms {
if len(termPos) == 1 && termPos[0] != "" {
// single term
ts, err := NewTermSearcher(indexReader, termPos[0], field, 1.0, options)
if err != nil {
// close any searchers already opened
for _, ts := range termPositionSearchers {
_ = ts.Close()
}
return nil, fmt.Errorf("phrase searcher error building term searcher: %v", err)
}
termPositionSearchers = append(termPositionSearchers, ts)
} else if len(termPos) > 1 {
// multiple terms
var termSearchers []search.Searcher
for _, term := range termPos {
if term == "" {
continue
}
ts, err := NewTermSearcher(indexReader, term, field, 1.0, options)
if err != nil {
// close any searchers already opened
for _, ts := range termPositionSearchers {
_ = ts.Close()
}
return nil, fmt.Errorf("phrase searcher error building term searcher: %v", err)
}
termSearchers = append(termSearchers, ts)
}
disjunction, err := NewDisjunctionSearcher(indexReader, termSearchers, 1, options)
if err != nil {
// close any searchers already opened
for _, ts := range termPositionSearchers {
_ = ts.Close()
}
return nil, fmt.Errorf("phrase searcher error building term position disjunction searcher: %v", err)
}
termPositionSearchers = append(termPositionSearchers, disjunction)
}
}
mustSearcher, err := NewConjunctionSearcher(indexReader, termPositionSearchers, options)
if err != nil {
// close any searchers already opened
for _, ts := range termPositionSearchers {
_ = ts.Close()
}
return nil, fmt.Errorf("phrase searcher error building conjunction searcher: %v", err)
}
// build our searcher
rv := PhraseSearcher{
indexReader: indexReader,
mustSearcher: mustSearcher,
terms: terms,
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *PhraseSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
if s.mustSearcher != nil {
sumOfSquaredWeights += s.mustSearcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
if s.mustSearcher != nil {
s.mustSearcher.SetQueryNorm(s.queryNorm)
}
}
func (s *PhraseSearcher) initSearchers(ctx *search.SearchContext) error {
err := s.advanceNextMust(ctx)
if err != nil {
return err
}
s.initialized = true
return nil
}
func (s *PhraseSearcher) advanceNextMust(ctx *search.SearchContext) error {
var err error
if s.mustSearcher != nil {
s.currMust, err = s.mustSearcher.Next(ctx)
if err != nil {
return err
}
}
return nil
}
func (s *PhraseSearcher) Weight() float64 {
return s.mustSearcher.Weight()
}
func (s *PhraseSearcher) SetQueryNorm(qnorm float64) {
s.mustSearcher.SetQueryNorm(qnorm)
}
func (s *PhraseSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
for s.currMust != nil {
// check this match against phrase constraints
rv := s.checkCurrMustMatch(ctx)
// prepare for next iteration (either loop or subsequent call to Next())
err := s.advanceNextMust(ctx)
if err != nil {
return nil, err
}
// if match satisfied phrase constraints return it as a hit
if rv != nil {
return rv, nil
}
}
return nil, nil
}
// checkCurrMustMatch is soley concerned with determining if the DocumentMatch
// pointed to by s.currMust (which satisifies the pre-condition searcher)
// also satisfies the phase constraints. if so, it returns a DocumentMatch
// for this document, otherwise nil
func (s *PhraseSearcher) checkCurrMustMatch(ctx *search.SearchContext) *search.DocumentMatch {
rvftlm := make(search.FieldTermLocationMap, 0)
freq := 0
// typically we would expect there to only actually be results in
// one field, but we allow for this to not be the case
// but, we note that phrase constraints can only be satisfied within
// a single field, so we can check them each independently
for field, tlm := range s.currMust.Locations {
f, rvtlm := s.checkCurrMustMatchField(ctx, tlm)
if f > 0 {
freq += f
rvftlm[field] = rvtlm
}
}
if freq > 0 {
// return match
rv := s.currMust
rv.Locations = rvftlm
return rv
}
return nil
}
// checkCurrMustMatchField is soley concerned with determining if one particular
// field within the currMust DocumentMatch Locations satisfies the phase
// constraints (possibly more than once). if so, the number of times it was
// satisfied, and these locations are returned. otherwise 0 and either
// a nil or empty TermLocationMap
func (s *PhraseSearcher) checkCurrMustMatchField(ctx *search.SearchContext, tlm search.TermLocationMap) (int, search.TermLocationMap) {
paths := findPhrasePaths(0, nil, s.terms, tlm, nil, 0)
rv := make(search.TermLocationMap, len(s.terms))
for _, p := range paths {
p.MergeInto(rv)
}
return len(paths), rv
}
type phrasePart struct {
term string
loc *search.Location
}
type phrasePath []*phrasePart
func (p phrasePath) MergeInto(in search.TermLocationMap) {
for _, pp := range p {
in[pp.term] = append(in[pp.term], pp.loc)
}
}
// findPhrasePaths is a function to identify phase matches from a set of known
// term locations. the implementation is recursive, so care must be taken
// with arguments and return values.
//
// prev - the previous location, nil on first invocation
// phraseTerms - slice containing the phrase terms themselves
// may contain empty string as placeholder (don't care)
// tlm - the Term Location Map containing all relevant term locations
// offset - the offset from the previous that this next term must match
// p - the current path being explored (appended to in recursive calls)
// this is the primary state being built during the traversal
//
// returns slice of paths, or nil if invocation did not find any successul paths
func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]string, tlm search.TermLocationMap, p phrasePath, remainingSlop int) []phrasePath {
// no more terms
if len(phraseTerms) < 1 {
return []phrasePath{p}
}
car := phraseTerms[0]
cdr := phraseTerms[1:]
// empty term is treated as match (continue)
if len(car) == 0 || (len(car) == 1 && car[0] == "") {
nextPos := prevPos + 1
if prevPos == 0 {
// if prevPos was 0, don't set it to 1 (as thats not a real abs pos)
nextPos = 0 // don't advance nextPos if prevPos was 0
}
return findPhrasePaths(nextPos, ap, cdr, tlm, p, remainingSlop)
}
var rv []phrasePath
// locations for this term
for _, carTerm := range car {
locations := tlm[carTerm]
for _, loc := range locations {
if prevPos != 0 && !loc.ArrayPositions.Equals(ap) {
// if the array positions are wrong, can't match, try next location
continue
}
// compute distance from previous phrase term
dist := 0
if prevPos != 0 {
dist = editDistance(prevPos+1, loc.Pos)
}
// if enough slop reamining, continue recursively
if prevPos == 0 || (remainingSlop-dist) >= 0 {
// this location works, add it to the path (but not for empty term)
px := append(p, &phrasePart{term: carTerm, loc: loc})
rv = append(rv, findPhrasePaths(loc.Pos, loc.ArrayPositions, cdr, tlm, px, remainingSlop-dist)...)
}
}
}
return rv
}
func editDistance(p1, p2 uint64) int {
dist := int(p1 - p2)
if dist < 0 {
return -dist
}
return dist
}
func (s *PhraseSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
var err error
s.currMust, err = s.mustSearcher.Advance(ctx, ID)
if err != nil {
return nil, err
}
return s.Next(ctx)
}
func (s *PhraseSearcher) Count() uint64 {
// for now return a worst case
return s.mustSearcher.Count()
}
func (s *PhraseSearcher) Close() error {
if s.mustSearcher != nil {
err := s.mustSearcher.Close()
if err != nil {
return err
}
}
return nil
}
func (s *PhraseSearcher) Min() int {
return 0
}
func (s *PhraseSearcher) DocumentMatchPoolSize() int {
return s.mustSearcher.DocumentMatchPoolSize() + 1
}

View file

@ -0,0 +1,80 @@
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"regexp"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
// NewRegexpSearcher creates a searcher which will match documents that
// contain terms which match the pattern regexp. The match must be EXACT
// matching the entire term. The provided regexp SHOULD NOT start with ^
// or end with $ as this can intefere with the implementation. Separately,
// matches will be checked to ensure they match the entire term.
func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
prefixTerm, complete := pattern.LiteralPrefix()
var candidateTerms []string
if complete {
// there is no pattern
candidateTerms = []string{prefixTerm}
} else {
var err error
candidateTerms, err = findRegexpCandidateTerms(indexReader, pattern, field,
prefixTerm)
if err != nil {
return nil, err
}
}
return NewMultiTermSearcher(indexReader, candidateTerms, field, boost,
options, true)
}
func findRegexpCandidateTerms(indexReader index.IndexReader,
pattern *regexp.Regexp, field, prefixTerm string) (rv []string, err error) {
rv = make([]string, 0)
var fieldDict index.FieldDict
if len(prefixTerm) > 0 {
fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
} else {
fieldDict, err = indexReader.FieldDict(field)
}
defer func() {
if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr
}
}()
// enumerate the terms and check against regexp
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
matchPos := pattern.FindStringIndex(tfd.Term)
if matchPos != nil && matchPos[0] == 0 && matchPos[1] == len(tfd.Term) {
rv = append(rv, tfd.Term)
if tooManyClauses(len(rv)) {
return rv, tooManyClausesErr()
}
}
tfd, err = fieldDict.Next()
}
return rv, err
}

View file

@ -0,0 +1,122 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
)
type TermSearcher struct {
indexReader index.IndexReader
reader index.TermFieldReader
scorer *scorer.TermQueryScorer
tfd index.TermFieldDoc
}
func NewTermSearcher(indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
reader, err := indexReader.TermFieldReader([]byte(term), field, true, true, options.IncludeTermVectors)
if err != nil {
return nil, err
}
count, err := indexReader.DocCount()
if err != nil {
_ = reader.Close()
return nil, err
}
scorer := scorer.NewTermQueryScorer([]byte(term), field, boost, count, reader.Count(), options)
return &TermSearcher{
indexReader: indexReader,
reader: reader,
scorer: scorer,
}, nil
}
func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
reader, err := indexReader.TermFieldReader(term, field, true, true, options.IncludeTermVectors)
if err != nil {
return nil, err
}
count, err := indexReader.DocCount()
if err != nil {
_ = reader.Close()
return nil, err
}
scorer := scorer.NewTermQueryScorer(term, field, boost, count, reader.Count(), options)
return &TermSearcher{
indexReader: indexReader,
reader: reader,
scorer: scorer,
}, nil
}
func (s *TermSearcher) Count() uint64 {
return s.reader.Count()
}
func (s *TermSearcher) Weight() float64 {
return s.scorer.Weight()
}
func (s *TermSearcher) SetQueryNorm(qnorm float64) {
s.scorer.SetQueryNorm(qnorm)
}
func (s *TermSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
termMatch, err := s.reader.Next(s.tfd.Reset())
if err != nil {
return nil, err
}
if termMatch == nil {
return nil, nil
}
// score match
docMatch := s.scorer.Score(ctx, termMatch)
// return doc match
return docMatch, nil
}
func (s *TermSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
termMatch, err := s.reader.Advance(ID, s.tfd.Reset())
if err != nil {
return nil, err
}
if termMatch == nil {
return nil, nil
}
// score match
docMatch := s.scorer.Score(ctx, termMatch)
// return doc match
return docMatch, nil
}
func (s *TermSearcher) Close() error {
return s.reader.Close()
}
func (s *TermSearcher) Min() int {
return 0
}
func (s *TermSearcher) DocumentMatchPoolSize() int {
return 1
}

View file

@ -0,0 +1,39 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
// find the terms with this prefix
fieldDict, err := indexReader.FieldDictPrefix(field, []byte(prefix))
if err != nil {
return nil, err
}
var terms []string
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
terms = append(terms, tfd.Term)
tfd, err = fieldDict.Next()
}
return NewMultiTermSearcher(indexReader, terms, field, boost, options, true)
}

View file

@ -0,0 +1,75 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
func NewTermRangeSearcher(indexReader index.IndexReader,
min, max []byte, inclusiveMin, inclusiveMax *bool, field string,
boost float64, options search.SearcherOptions) (search.Searcher, error) {
if inclusiveMin == nil {
defaultInclusiveMin := true
inclusiveMin = &defaultInclusiveMin
}
if inclusiveMax == nil {
defaultInclusiveMax := false
inclusiveMax = &defaultInclusiveMax
}
if min == nil {
min = []byte{}
}
rangeMax := max
if rangeMax != nil {
// the term dictionary range end has an unfortunate implementation
rangeMax = append(rangeMax, 0)
}
// find the terms with this prefix
fieldDict, err := indexReader.FieldDictRange(field, min, rangeMax)
if err != nil {
return nil, err
}
var terms []string
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
terms = append(terms, tfd.Term)
tfd, err = fieldDict.Next()
}
if err != nil {
return nil, err
}
if len(terms) < 1 {
return NewMatchNoneSearcher(indexReader)
}
if !*inclusiveMin && min != nil && string(min) == terms[0] {
terms = terms[1:]
}
// if our term list included the max, it would be the last item
if !*inclusiveMax && max != nil && string(max) == terms[len(terms)-1] {
terms = terms[:len(terms)-1]
}
return NewMultiTermSearcher(indexReader, terms, field, boost, options, true)
}

View file

@ -1,80 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"math"
"regexp"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/analysis/tokenizers/regexp_tokenizer"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store/inmem"
"github.com/blevesearch/bleve/index/upside_down"
)
var twoDocIndex index.Index //= upside_down.NewUpsideDownCouch(inmem.MustOpen())
func init() {
inMemStore, _ := inmem.New()
analysisQueue := upside_down.NewAnalysisQueue(1)
twoDocIndex = upside_down.NewUpsideDownCouch(inMemStore, analysisQueue)
err := twoDocIndex.Open()
if err != nil {
panic(err)
}
for _, doc := range twoDocIndexDocs {
err := twoDocIndex.Update(doc)
if err != nil {
panic(err)
}
}
}
// create a simpler analyzer which will support these tests
var testAnalyzer = &analysis.Analyzer{
Tokenizer: regexp_tokenizer.NewRegexpTokenizer(regexp.MustCompile(`\w+`)),
}
// sets up some mock data used in many tests in this package
var twoDocIndexDescIndexingOptions = document.DefaultTextIndexingOptions | document.IncludeTermVectors
var twoDocIndexDocs = []*document.Document{
// must have 4/4 beer
document.NewDocument("1").
AddField(document.NewTextField("name", []uint64{}, []byte("marty"))).
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("beer beer beer beer"), twoDocIndexDescIndexingOptions, testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("street", []uint64{}, []byte("couchbase way"), testAnalyzer)),
// must have 1/4 beer
document.NewDocument("2").
AddField(document.NewTextField("name", []uint64{}, []byte("steve"))).
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("angst beer couch database"), twoDocIndexDescIndexingOptions, testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("street", []uint64{}, []byte("couchbase way"), testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("title", []uint64{}, []byte("mister"), testAnalyzer)),
// must have 1/4 beer
document.NewDocument("3").
AddField(document.NewTextField("name", []uint64{}, []byte("dustin"))).
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("apple beer column dank"), twoDocIndexDescIndexingOptions, testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("title", []uint64{}, []byte("mister"), testAnalyzer)),
// must have 65/65 beer
document.NewDocument("4").
AddField(document.NewTextField("name", []uint64{}, []byte("ravi"))).
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer"), twoDocIndexDescIndexingOptions, testAnalyzer)),
// must have 0/x beer
document.NewDocument("5").
AddField(document.NewTextField("name", []uint64{}, []byte("bobert"))).
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("water"), twoDocIndexDescIndexingOptions, testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("title", []uint64{}, []byte("mister"), testAnalyzer)),
}
func scoresCloseEnough(a, b float64) bool {
return math.Abs(a-b) < 0.001
}

View file

@ -1,30 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"github.com/blevesearch/bleve/search"
)
type OrderedSearcherList []search.Searcher
// sort.Interface
func (otrl OrderedSearcherList) Len() int {
return len(otrl)
}
func (otrl OrderedSearcherList) Less(i, j int) bool {
return otrl[i].Count() < otrl[j].Count()
}
func (otrl OrderedSearcherList) Swap(i, j int) {
otrl[i], otrl[j] = otrl[j], otrl[i]
}

View file

@ -1,335 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"math"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorers"
)
type BooleanSearcher struct {
initialized bool
indexReader index.IndexReader
mustSearcher search.Searcher
shouldSearcher search.Searcher
mustNotSearcher search.Searcher
queryNorm float64
currMust *search.DocumentMatch
currShould *search.DocumentMatch
currMustNot *search.DocumentMatch
currentID string
min uint64
scorer *scorers.ConjunctionQueryScorer
}
func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, explain bool) (*BooleanSearcher, error) {
// build our searcher
rv := BooleanSearcher{
indexReader: indexReader,
mustSearcher: mustSearcher,
shouldSearcher: shouldSearcher,
mustNotSearcher: mustNotSearcher,
scorer: scorers.NewConjunctionQueryScorer(explain),
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *BooleanSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
if s.mustSearcher != nil {
sumOfSquaredWeights += s.mustSearcher.Weight()
}
if s.shouldSearcher != nil {
sumOfSquaredWeights += s.shouldSearcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
if s.mustSearcher != nil {
s.mustSearcher.SetQueryNorm(s.queryNorm)
}
if s.shouldSearcher != nil {
s.shouldSearcher.SetQueryNorm(s.queryNorm)
}
}
func (s *BooleanSearcher) initSearchers() error {
var err error
// get all searchers pointing at their first match
if s.mustSearcher != nil {
s.currMust, err = s.mustSearcher.Next()
if err != nil {
return err
}
}
if s.shouldSearcher != nil {
s.currShould, err = s.shouldSearcher.Next()
if err != nil {
return err
}
}
if s.mustNotSearcher != nil {
s.currMustNot, err = s.mustNotSearcher.Next()
if err != nil {
return err
}
}
if s.mustSearcher != nil && s.currMust != nil {
s.currentID = s.currMust.ID
} else if s.mustSearcher == nil && s.currShould != nil {
s.currentID = s.currShould.ID
} else {
s.currentID = ""
}
s.initialized = true
return nil
}
func (s *BooleanSearcher) advanceNextMust() error {
var err error
if s.mustSearcher != nil {
s.currMust, err = s.mustSearcher.Next()
if err != nil {
return err
}
} else if s.mustSearcher == nil {
s.currShould, err = s.shouldSearcher.Next()
if err != nil {
return err
}
}
if s.mustSearcher != nil && s.currMust != nil {
s.currentID = s.currMust.ID
} else if s.mustSearcher == nil && s.currShould != nil {
s.currentID = s.currShould.ID
} else {
s.currentID = ""
}
return nil
}
func (s *BooleanSearcher) Weight() float64 {
var rv float64
if s.mustSearcher != nil {
rv += s.mustSearcher.Weight()
}
if s.shouldSearcher != nil {
rv += s.shouldSearcher.Weight()
}
return rv
}
func (s *BooleanSearcher) SetQueryNorm(qnorm float64) {
if s.mustSearcher != nil {
s.mustSearcher.SetQueryNorm(qnorm)
}
if s.shouldSearcher != nil {
s.shouldSearcher.SetQueryNorm(qnorm)
}
}
func (s *BooleanSearcher) Next() (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers()
if err != nil {
return nil, err
}
}
var err error
var rv *search.DocumentMatch
for s.currentID != "" {
if s.currMustNot != nil && s.currMustNot.ID < s.currentID {
// advance must not searcher to our candidate entry
s.currMustNot, err = s.mustNotSearcher.Advance(s.currentID)
if err != nil {
return nil, err
}
if s.currMustNot != nil && s.currMustNot.ID == s.currentID {
// the candidate is excluded
err = s.advanceNextMust()
if err != nil {
return nil, err
}
continue
}
} else if s.currMustNot != nil && s.currMustNot.ID == s.currentID {
// the candidate is excluded
err = s.advanceNextMust()
if err != nil {
return nil, err
}
continue
}
if s.currShould != nil && s.currShould.ID < s.currentID {
// advance should searcher to our candidate entry
s.currShould, err = s.shouldSearcher.Advance(s.currentID)
if err != nil {
return nil, err
}
if s.currShould != nil && s.currShould.ID == s.currentID {
// score bonus matches should
var cons []*search.DocumentMatch
if s.currMust != nil {
cons = []*search.DocumentMatch{
s.currMust,
s.currShould,
}
} else {
cons = []*search.DocumentMatch{
s.currShould,
}
}
rv = s.scorer.Score(cons)
err = s.advanceNextMust()
if err != nil {
return nil, err
}
break
} else if s.shouldSearcher.Min() == 0 {
// match is OK anyway
rv = s.scorer.Score([]*search.DocumentMatch{s.currMust})
err = s.advanceNextMust()
if err != nil {
return nil, err
}
break
}
} else if s.currShould != nil && s.currShould.ID == s.currentID {
// score bonus matches should
var cons []*search.DocumentMatch
if s.currMust != nil {
cons = []*search.DocumentMatch{
s.currMust,
s.currShould,
}
} else {
cons = []*search.DocumentMatch{
s.currShould,
}
}
rv = s.scorer.Score(cons)
err = s.advanceNextMust()
if err != nil {
return nil, err
}
break
} else if s.shouldSearcher == nil || s.shouldSearcher.Min() == 0 {
// match is OK anyway
rv = s.scorer.Score([]*search.DocumentMatch{s.currMust})
err = s.advanceNextMust()
if err != nil {
return nil, err
}
break
}
err = s.advanceNextMust()
if err != nil {
return nil, err
}
}
return rv, nil
}
func (s *BooleanSearcher) Advance(ID string) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers()
if err != nil {
return nil, err
}
}
var err error
if s.mustSearcher != nil {
s.currMust, err = s.mustSearcher.Advance(ID)
if err != nil {
return nil, err
}
}
if s.shouldSearcher != nil {
s.currShould, err = s.shouldSearcher.Advance(ID)
if err != nil {
return nil, err
}
}
if s.mustNotSearcher != nil {
s.currMustNot, err = s.mustNotSearcher.Advance(ID)
if err != nil {
return nil, err
}
}
if s.mustSearcher != nil && s.currMust != nil {
s.currentID = s.currMust.ID
} else if s.mustSearcher == nil && s.currShould != nil {
s.currentID = s.currShould.ID
} else {
s.currentID = ""
}
return s.Next()
}
func (s *BooleanSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
if s.mustSearcher != nil {
sum += s.mustSearcher.Count()
}
if s.shouldSearcher != nil {
sum += s.shouldSearcher.Count()
}
return sum
}
func (s *BooleanSearcher) Close() error {
if s.mustSearcher != nil {
err := s.mustSearcher.Close()
if err != nil {
return err
}
}
if s.shouldSearcher != nil {
err := s.shouldSearcher.Close()
if err != nil {
return err
}
}
if s.mustNotSearcher != nil {
err := s.mustNotSearcher.Close()
if err != nil {
return err
}
}
return nil
}
func (s *BooleanSearcher) Min() int {
return 0
}

View file

@ -1,364 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"testing"
"github.com/blevesearch/bleve/search"
)
func TestBooleanSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
// test 0
beerTermSearcher, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher}, true)
if err != nil {
t.Fatal(err)
}
martyTermSearcher, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
shouldSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher}, 0, true)
if err != nil {
t.Fatal(err)
}
steveTermSearcher, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustNotSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher}, 0, true)
if err != nil {
t.Fatal(err)
}
booleanSearcher, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher, shouldSearcher, mustNotSearcher, true)
if err != nil {
t.Fatal(err)
}
// test 1
martyTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
shouldSearcher2, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher2, dustinTermSearcher2}, 0, true)
if err != nil {
t.Fatal(err)
}
steveTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustNotSearcher2, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher2}, 0, true)
if err != nil {
t.Fatal(err)
}
booleanSearcher2, err := NewBooleanSearcher(twoDocIndexReader, nil, shouldSearcher2, mustNotSearcher2, true)
if err != nil {
t.Fatal(err)
}
// test 2
steveTermSearcher3, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustNotSearcher3, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher3}, 0, true)
if err != nil {
t.Fatal(err)
}
booleanSearcher3, err := NewBooleanSearcher(twoDocIndexReader, nil, nil, mustNotSearcher3, true)
if err != nil {
t.Fatal(err)
}
// test 3
beerTermSearcher4, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustSearcher4, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher4}, true)
if err != nil {
t.Fatal(err)
}
steveTermSearcher4, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustNotSearcher4, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher4}, 0, true)
if err != nil {
t.Fatal(err)
}
booleanSearcher4, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher4, nil, mustNotSearcher4, true)
if err != nil {
t.Fatal(err)
}
// test 4
beerTermSearcher5, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustSearcher5, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher5}, true)
if err != nil {
t.Fatal(err)
}
steveTermSearcher5, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
martyTermSearcher5, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustNotSearcher5, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher5, martyTermSearcher5}, 0, true)
if err != nil {
t.Fatal(err)
}
booleanSearcher5, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher5, nil, mustNotSearcher5, true)
if err != nil {
t.Fatal(err)
}
// test 5
beerTermSearcher6, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustSearcher6, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher6}, true)
if err != nil {
t.Fatal(err)
}
martyTermSearcher6, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher6, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
shouldSearcher6, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher6, dustinTermSearcher6}, 2, true)
if err != nil {
t.Fatal(err)
}
booleanSearcher6, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher6, shouldSearcher6, nil, true)
if err != nil {
t.Fatal(err)
}
// test 6
beerTermSearcher7, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustSearcher7, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher7}, true)
if err != nil {
t.Fatal(err)
}
booleanSearcher7, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher7, nil, nil, true)
if err != nil {
t.Fatal(err)
}
martyTermSearcher7, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 5.0, true)
if err != nil {
t.Fatal(err)
}
conjunctionSearcher7, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher7, booleanSearcher7}, true)
// test 7
beerTermSearcher8, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustSearcher8, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher8}, true)
if err != nil {
t.Fatal(err)
}
martyTermSearcher8, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher8, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
shouldSearcher8, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher8, dustinTermSearcher8}, 0, true)
if err != nil {
t.Fatal(err)
}
steveTermSearcher8, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
mustNotSearcher8, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher8}, 0, true)
if err != nil {
t.Fatal(err)
}
booleanSearcher8, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher8, shouldSearcher8, mustNotSearcher8, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher8a, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 5.0, true)
if err != nil {
t.Fatal(err)
}
conjunctionSearcher8, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{booleanSearcher8, dustinTermSearcher8a}, true)
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
results []*search.DocumentMatch
}{
{
searcher: booleanSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 0.9818005051949021,
},
&search.DocumentMatch{
ID: "3",
Score: 0.808709699395535,
},
&search.DocumentMatch{
ID: "4",
Score: 0.34618161159873423,
},
},
},
{
searcher: booleanSearcher2,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 0.6775110856165737,
},
&search.DocumentMatch{
ID: "3",
Score: 0.6775110856165737,
},
},
},
// no MUST or SHOULD clauses yields no results
{
searcher: booleanSearcher3,
results: []*search.DocumentMatch{},
},
{
searcher: booleanSearcher4,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 1.0,
},
&search.DocumentMatch{
ID: "3",
Score: 0.5,
},
&search.DocumentMatch{
ID: "4",
Score: 1.0,
},
},
},
{
searcher: booleanSearcher5,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "3",
Score: 0.5,
},
&search.DocumentMatch{
ID: "4",
Score: 1.0,
},
},
},
{
searcher: booleanSearcher6,
results: []*search.DocumentMatch{},
},
// test a conjunction query with a nested boolean
{
searcher: conjunctionSearcher7,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 2.0097428702814377,
},
},
},
{
searcher: conjunctionSearcher8,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "3",
Score: 2.0681575785068107,
},
},
},
}
for testIndex, test := range tests {
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
next, err := test.searcher.Next()
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if next.ID != test.results[i].ID {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex)
}
if !scoresCloseEnough(next.Score, test.results[i].Score) {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
next, err = test.searcher.Next()
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}

View file

@ -1,197 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"math"
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorers"
)
type ConjunctionSearcher struct {
initialized bool
indexReader index.IndexReader
searchers OrderedSearcherList
explain bool
queryNorm float64
currs []*search.DocumentMatch
currentID string
scorer *scorers.ConjunctionQueryScorer
}
func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, explain bool) (*ConjunctionSearcher, error) {
// build the downstream searchers
searchers := make(OrderedSearcherList, len(qsearchers))
for i, searcher := range qsearchers {
searchers[i] = searcher
}
// sort the searchers
sort.Sort(searchers)
// build our searcher
rv := ConjunctionSearcher{
indexReader: indexReader,
explain: explain,
searchers: searchers,
currs: make([]*search.DocumentMatch, len(searchers)),
scorer: scorers.NewConjunctionQueryScorer(explain),
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *ConjunctionSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
for _, termSearcher := range s.searchers {
sumOfSquaredWeights += termSearcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
for _, termSearcher := range s.searchers {
termSearcher.SetQueryNorm(s.queryNorm)
}
}
func (s *ConjunctionSearcher) initSearchers() error {
var err error
// get all searchers pointing at their first match
for i, termSearcher := range s.searchers {
s.currs[i], err = termSearcher.Next()
if err != nil {
return err
}
}
if len(s.currs) > 0 {
if s.currs[0] != nil {
s.currentID = s.currs[0].ID
} else {
s.currentID = ""
}
}
s.initialized = true
return nil
}
func (s *ConjunctionSearcher) Weight() float64 {
var rv float64
for _, searcher := range s.searchers {
rv += searcher.Weight()
}
return rv
}
func (s *ConjunctionSearcher) SetQueryNorm(qnorm float64) {
for _, searcher := range s.searchers {
searcher.SetQueryNorm(qnorm)
}
}
func (s *ConjunctionSearcher) Next() (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers()
if err != nil {
return nil, err
}
}
var rv *search.DocumentMatch
var err error
OUTER:
for s.currentID != "" {
for i, termSearcher := range s.searchers {
if s.currs[i] != nil && s.currs[i].ID != s.currentID {
if s.currentID < s.currs[i].ID {
s.currentID = s.currs[i].ID
continue OUTER
}
// this reader doesn't have the currentID, try to advance
s.currs[i], err = termSearcher.Advance(s.currentID)
if err != nil {
return nil, err
}
if s.currs[i] == nil {
s.currentID = ""
continue OUTER
}
if s.currs[i].ID != s.currentID {
// we just advanced, so it doesn't match, it must be greater
// no need to call next
s.currentID = s.currs[i].ID
continue OUTER
}
} else if s.currs[i] == nil {
s.currentID = ""
continue OUTER
}
}
// if we get here, a doc matched all readers, sum the score and add it
rv = s.scorer.Score(s.currs)
// prepare for next entry
s.currs[0], err = s.searchers[0].Next()
if err != nil {
return nil, err
}
if s.currs[0] == nil {
s.currentID = ""
} else {
s.currentID = s.currs[0].ID
}
// don't continue now, wait for the next call to Next()
break
}
return rv, nil
}
func (s *ConjunctionSearcher) Advance(ID string) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers()
if err != nil {
return nil, err
}
}
var err error
for i, searcher := range s.searchers {
s.currs[i], err = searcher.Advance(ID)
if err != nil {
return nil, err
}
}
s.currentID = ID
return s.Next()
}
func (s *ConjunctionSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
for _, searcher := range s.searchers {
sum += searcher.Count()
}
return sum
}
func (s *ConjunctionSearcher) Close() error {
for _, searcher := range s.searchers {
err := searcher.Close()
if err != nil {
return err
}
}
return nil
}
func (s *ConjunctionSearcher) Min() int {
return 0
}

View file

@ -1,212 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"testing"
"github.com/blevesearch/bleve/search"
)
func TestConjunctionSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
// test 0
beerTermSearcher, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
martyTermSearcher, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 5.0, true)
if err != nil {
t.Fatal(err)
}
beerAndMartySearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher, martyTermSearcher}, true)
if err != nil {
t.Fatal(err)
}
// test 1
angstTermSearcher, err := NewTermSearcher(twoDocIndexReader, "angst", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
beerTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
angstAndBeerSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{angstTermSearcher, beerTermSearcher2}, true)
if err != nil {
t.Fatal(err)
}
// test 2
beerTermSearcher3, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
jackTermSearcher, err := NewTermSearcher(twoDocIndexReader, "jack", "name", 5.0, true)
if err != nil {
t.Fatal(err)
}
beerAndJackSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher3, jackTermSearcher}, true)
if err != nil {
t.Fatal(err)
}
// test 3
beerTermSearcher4, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true)
if err != nil {
t.Fatal(err)
}
misterTermSearcher, err := NewTermSearcher(twoDocIndexReader, "mister", "title", 5.0, true)
if err != nil {
t.Fatal(err)
}
beerAndMisterSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher4, misterTermSearcher}, true)
if err != nil {
t.Fatal(err)
}
// test 4
couchbaseTermSearcher, err := NewTermSearcher(twoDocIndexReader, "couchbase", "street", 1.0, true)
if err != nil {
t.Fatal(err)
}
misterTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "mister", "title", 5.0, true)
if err != nil {
t.Fatal(err)
}
couchbaseAndMisterSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{couchbaseTermSearcher, misterTermSearcher2}, true)
if err != nil {
t.Fatal(err)
}
// test 5
beerTermSearcher5, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 5.0, true)
if err != nil {
t.Fatal(err)
}
couchbaseTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "couchbase", "street", 1.0, true)
if err != nil {
t.Fatal(err)
}
misterTermSearcher3, err := NewTermSearcher(twoDocIndexReader, "mister", "title", 5.0, true)
if err != nil {
t.Fatal(err)
}
couchbaseAndMisterSearcher2, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{couchbaseTermSearcher2, misterTermSearcher3}, true)
if err != nil {
t.Fatal(err)
}
beerAndCouchbaseAndMisterSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher5, couchbaseAndMisterSearcher2}, true)
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
results []*search.DocumentMatch
}{
{
searcher: beerAndMartySearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 2.0097428702814377,
},
},
},
{
searcher: angstAndBeerSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "2",
Score: 1.0807601687084403,
},
},
},
{
searcher: beerAndJackSearcher,
results: []*search.DocumentMatch{},
},
{
searcher: beerAndMisterSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "2",
Score: 1.2877980334016337,
},
&search.DocumentMatch{
ID: "3",
Score: 1.2877980334016337,
},
},
},
{
searcher: couchbaseAndMisterSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "2",
Score: 1.4436599157093672,
},
},
},
{
searcher: beerAndCouchbaseAndMisterSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "2",
Score: 1.441614953806971,
},
},
},
}
for testIndex, test := range tests {
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
next, err := test.searcher.Next()
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if next.ID != test.results[i].ID {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex)
}
if !scoresCloseEnough(next.Score, test.results[i].Score) {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
next, err = test.searcher.Next()
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}

View file

@ -1,189 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"math"
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorers"
)
type DisjunctionSearcher struct {
initialized bool
indexReader index.IndexReader
searchers OrderedSearcherList
queryNorm float64
currs []*search.DocumentMatch
currentID string
scorer *scorers.DisjunctionQueryScorer
min float64
}
func NewDisjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, min float64, explain bool) (*DisjunctionSearcher, error) {
// build the downstream searchers
searchers := make(OrderedSearcherList, len(qsearchers))
for i, searcher := range qsearchers {
searchers[i] = searcher
}
// sort the searchers
sort.Sort(sort.Reverse(searchers))
// build our searcher
rv := DisjunctionSearcher{
indexReader: indexReader,
searchers: searchers,
currs: make([]*search.DocumentMatch, len(searchers)),
scorer: scorers.NewDisjunctionQueryScorer(explain),
min: min,
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *DisjunctionSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
for _, termSearcher := range s.searchers {
sumOfSquaredWeights += termSearcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
for _, termSearcher := range s.searchers {
termSearcher.SetQueryNorm(s.queryNorm)
}
}
func (s *DisjunctionSearcher) initSearchers() error {
var err error
// get all searchers pointing at their first match
for i, termSearcher := range s.searchers {
s.currs[i], err = termSearcher.Next()
if err != nil {
return err
}
}
s.currentID = s.nextSmallestID()
s.initialized = true
return nil
}
func (s *DisjunctionSearcher) nextSmallestID() string {
rv := ""
for _, curr := range s.currs {
if curr != nil && (curr.ID < rv || rv == "") {
rv = curr.ID
}
}
return rv
}
func (s *DisjunctionSearcher) Weight() float64 {
var rv float64
for _, searcher := range s.searchers {
rv += searcher.Weight()
}
return rv
}
func (s *DisjunctionSearcher) SetQueryNorm(qnorm float64) {
for _, searcher := range s.searchers {
searcher.SetQueryNorm(qnorm)
}
}
func (s *DisjunctionSearcher) Next() (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers()
if err != nil {
return nil, err
}
}
var err error
var rv *search.DocumentMatch
matching := make([]*search.DocumentMatch, 0, len(s.searchers))
found := false
for !found && s.currentID != "" {
for _, curr := range s.currs {
if curr != nil && curr.ID == s.currentID {
matching = append(matching, curr)
}
}
if len(matching) >= int(s.min) {
found = true
// score this match
rv = s.scorer.Score(matching, len(matching), len(s.searchers))
}
// reset matching
matching = make([]*search.DocumentMatch, 0)
// invoke next on all the matching searchers
for i, curr := range s.currs {
if curr != nil && curr.ID == s.currentID {
searcher := s.searchers[i]
s.currs[i], err = searcher.Next()
if err != nil {
return nil, err
}
}
}
s.currentID = s.nextSmallestID()
}
return rv, nil
}
func (s *DisjunctionSearcher) Advance(ID string) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers()
if err != nil {
return nil, err
}
}
// get all searchers pointing at their first match
var err error
for i, termSearcher := range s.searchers {
s.currs[i], err = termSearcher.Advance(ID)
if err != nil {
return nil, err
}
}
s.currentID = s.nextSmallestID()
return s.Next()
}
func (s *DisjunctionSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
for _, searcher := range s.searchers {
sum += searcher.Count()
}
return sum
}
func (s *DisjunctionSearcher) Close() error {
for _, searcher := range s.searchers {
err := searcher.Close()
if err != nil {
return err
}
}
return nil
}
func (s *DisjunctionSearcher) Min() int {
return int(s.min) // FIXME just make this an int
}

View file

@ -1,168 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"testing"
"github.com/blevesearch/bleve/search"
)
func TestDisjunctionSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
martyTermSearcher, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
martyOrDustinSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher}, 0, true)
if err != nil {
t.Fatal(err)
}
martyTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
martyOrDustinSearcher2, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher2, dustinTermSearcher2}, 0, true)
if err != nil {
t.Fatal(err)
}
raviTermSearcher, err := NewTermSearcher(twoDocIndexReader, "ravi", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
nestedRaviOrMartyOrDustinSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{raviTermSearcher, martyOrDustinSearcher2}, 0, true)
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
results []*search.DocumentMatch
}{
{
searcher: martyOrDustinSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 0.6775110856165737,
},
&search.DocumentMatch{
ID: "3",
Score: 0.6775110856165737,
},
},
},
// test a nested disjunction
{
searcher: nestedRaviOrMartyOrDustinSearcher,
results: []*search.DocumentMatch{
&search.DocumentMatch{
ID: "1",
Score: 0.2765927424732821,
},
&search.DocumentMatch{
ID: "3",
Score: 0.2765927424732821,
},
&search.DocumentMatch{
ID: "4",
Score: 0.5531854849465642,
},
},
},
}
for testIndex, test := range tests {
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
next, err := test.searcher.Next()
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if next.ID != test.results[i].ID {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex)
}
if !scoresCloseEnough(next.Score, test.results[i].Score) {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
next, err = test.searcher.Next()
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}
func TestDisjunctionAdvance(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
martyTermSearcher, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true)
if err != nil {
t.Fatal(err)
}
martyOrDustinSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher}, 0, true)
if err != nil {
t.Fatal(err)
}
match, err := martyOrDustinSearcher.Advance("3")
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if match == nil {
t.Errorf("expected 3, got nil")
}
}

View file

@ -1,112 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package searchers
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
type FuzzySearcher struct {
indexReader index.IndexReader
term string
prefix int
fuzziness int
field string
explain bool
searcher *DisjunctionSearcher
}
func NewFuzzySearcher(indexReader index.IndexReader, term string, prefix, fuzziness int, field string, boost float64, explain bool) (*FuzzySearcher, error) {
prefixTerm := ""
for i, r := range term {
if i < prefix {
prefixTerm += string(r)
}
}
// find the terms with this prefix
var fieldDict index.FieldDict
var err error
if len(prefixTerm) > 0 {
fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
} else {
fieldDict, err = indexReader.FieldDict(field)
}
// enumerate terms and check levenshtein distance
candidateTerms := make([]string, 0)
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
ld, exceeded := search.LevenshteinDistanceMax(&term, &tfd.Term, fuzziness)
if !exceeded && ld <= fuzziness {
candidateTerms = append(candidateTerms, tfd.Term)
}
tfd, err = fieldDict.Next()
}
if err != nil {
return nil, err
}
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, 0, 25)
for _, cterm := range candidateTerms {
qsearcher, err := NewTermSearcher(indexReader, cterm, field, 1.0, explain)
if err != nil {
return nil, err
}
qsearchers = append(qsearchers, qsearcher)
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
if err != nil {
return nil, err
}
return &FuzzySearcher{
indexReader: indexReader,
term: term,
prefix: prefix,
fuzziness: fuzziness,
field: field,
explain: explain,
searcher: searcher,
}, nil
}
func (s *FuzzySearcher) Count() uint64 {
return s.searcher.Count()
}
func (s *FuzzySearcher) Weight() float64 {
return s.searcher.Weight()
}
func (s *FuzzySearcher) SetQueryNorm(qnorm float64) {
s.searcher.SetQueryNorm(qnorm)
}
func (s *FuzzySearcher) Next() (*search.DocumentMatch, error) {
return s.searcher.Next()
}
func (s *FuzzySearcher) Advance(ID string) (*search.DocumentMatch, error) {
return s.searcher.Next()
}
func (s *FuzzySearcher) Close() error {
return s.searcher.Close()
}
func (s *FuzzySearcher) Min() int {
return 0
}

Some files were not shown because too many files have changed in this diff Show more