Update server dependencies

This commit is contained in:
Ken-Håvard Lieng 2018-05-04 23:39:27 +02:00
parent fb8fec38ff
commit de36fe682a
883 changed files with 147940 additions and 68404 deletions

View file

@ -15,11 +15,10 @@
package search
import (
"context"
"time"
"github.com/blevesearch/bleve/index"
"golang.org/x/net/context"
)
type Collector interface {

View file

@ -34,11 +34,20 @@ func newStoreHeap(cap int, compare collectorCompare) *collectStoreHeap {
return rv
}
func (c *collectStoreHeap) Add(doc *search.DocumentMatch) {
func (c *collectStoreHeap) AddNotExceedingSize(doc *search.DocumentMatch,
size int) *search.DocumentMatch {
c.add(doc)
if c.Len() > size {
return c.removeLast()
}
return nil
}
func (c *collectStoreHeap) add(doc *search.DocumentMatch) {
heap.Push(c, doc)
}
func (c *collectStoreHeap) RemoveLast() *search.DocumentMatch {
func (c *collectStoreHeap) removeLast() *search.DocumentMatch {
return heap.Pop(c).(*search.DocumentMatch)
}
@ -49,17 +58,12 @@ func (c *collectStoreHeap) Final(skip int, fixup collectorFixup) (search.Documen
return make(search.DocumentMatchCollection, 0), nil
}
rv := make(search.DocumentMatchCollection, size)
for count > 0 {
count--
if count >= skip {
size--
doc := heap.Pop(c).(*search.DocumentMatch)
rv[size] = doc
err := fixup(doc)
if err != nil {
return nil, err
}
for i := size - 1; i >= 0; i-- {
doc := heap.Pop(c).(*search.DocumentMatch)
rv[i] = doc
err := fixup(doc)
if err != nil {
return nil, err
}
}
return rv, nil

View file

@ -34,7 +34,16 @@ func newStoreList(cap int, compare collectorCompare) *collectStoreList {
return rv
}
func (c *collectStoreList) Add(doc *search.DocumentMatch) {
func (c *collectStoreList) AddNotExceedingSize(doc *search.DocumentMatch,
size int) *search.DocumentMatch {
c.add(doc)
if c.len() > size {
return c.removeLast()
}
return nil
}
func (c *collectStoreList) add(doc *search.DocumentMatch) {
for e := c.results.Front(); e != nil; e = e.Next() {
curr := e.Value.(*search.DocumentMatch)
if c.compare(doc, curr) >= 0 {
@ -46,7 +55,7 @@ func (c *collectStoreList) Add(doc *search.DocumentMatch) {
c.results.PushBack(doc)
}
func (c *collectStoreList) RemoveLast() *search.DocumentMatch {
func (c *collectStoreList) removeLast() *search.DocumentMatch {
return c.results.Remove(c.results.Front()).(*search.DocumentMatch)
}
@ -73,6 +82,6 @@ func (c *collectStoreList) Final(skip int, fixup collectorFixup) (search.Documen
return search.DocumentMatchCollection{}, nil
}
func (c *collectStoreList) Len() int {
func (c *collectStoreList) len() int {
return c.results.Len()
}

View file

@ -29,7 +29,16 @@ func newStoreSlice(cap int, compare collectorCompare) *collectStoreSlice {
return rv
}
func (c *collectStoreSlice) Add(doc *search.DocumentMatch) {
func (c *collectStoreSlice) AddNotExceedingSize(doc *search.DocumentMatch,
size int) *search.DocumentMatch {
c.add(doc)
if c.len() > size {
return c.removeLast()
}
return nil
}
func (c *collectStoreSlice) add(doc *search.DocumentMatch) {
// find where to insert, starting at end (lowest)
i := len(c.slice)
for ; i > 0; i-- {
@ -44,7 +53,7 @@ func (c *collectStoreSlice) Add(doc *search.DocumentMatch) {
c.slice[i] = doc
}
func (c *collectStoreSlice) RemoveLast() *search.DocumentMatch {
func (c *collectStoreSlice) removeLast() *search.DocumentMatch {
var rv *search.DocumentMatch
rv, c.slice = c.slice[len(c.slice)-1], c.slice[:len(c.slice)-1]
return rv
@ -63,6 +72,6 @@ func (c *collectStoreSlice) Final(skip int, fixup collectorFixup) (search.Docume
return search.DocumentMatchCollection{}, nil
}
func (c *collectStoreSlice) Len() int {
func (c *collectStoreSlice) len() int {
return len(c.slice)
}

View file

@ -15,13 +15,31 @@
package collector
import (
"context"
"reflect"
"time"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"golang.org/x/net/context"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeTopNCollector int
func init() {
var coll TopNCollector
reflectStaticSizeTopNCollector = int(reflect.TypeOf(coll).Size())
}
type collectorStore interface {
// Add the document, and if the new store size exceeds the provided size
// the last element is removed and returned. If the size has not been
// exceeded, nil is returned.
AddNotExceedingSize(doc *search.DocumentMatch, size int) *search.DocumentMatch
Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error)
}
// PreAllocSizeSkipCap will cap preallocation to this amount when
// size+skip exceeds this value
var PreAllocSizeSkipCap = 1000
@ -41,7 +59,7 @@ type TopNCollector struct {
results search.DocumentMatchCollection
facetsBuilder *search.FacetsBuilder
store *collectStoreHeap
store collectorStore
needDocIds bool
neededFields []string
@ -49,6 +67,8 @@ type TopNCollector struct {
cachedDesc []bool
lowestMatchOutsideResults *search.DocumentMatch
updateFieldVisitor index.DocumentFieldTermVisitor
dvReader index.DocValueReader
}
// CheckDoneEvery controls how frequently we check the context deadline
@ -68,9 +88,15 @@ func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector
backingSize = PreAllocSizeSkipCap + 1
}
hc.store = newStoreHeap(backingSize, func(i, j *search.DocumentMatch) int {
return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j)
})
if size+skip > 10 {
hc.store = newStoreHeap(backingSize, func(i, j *search.DocumentMatch) int {
return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j)
})
} else {
hc.store = newStoreSlice(backingSize, func(i, j *search.DocumentMatch) int {
return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j)
})
}
// these lookups traverse an interface, so do once up-front
if sort.RequiresDocID() {
@ -83,6 +109,22 @@ func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector
return hc
}
func (hc *TopNCollector) Size() int {
sizeInBytes := reflectStaticSizeTopNCollector + size.SizeOfPtr
if hc.facetsBuilder != nil {
sizeInBytes += hc.facetsBuilder.Size()
}
for _, entry := range hc.neededFields {
sizeInBytes += len(entry) + size.SizeOfString
}
sizeInBytes += len(hc.cachedScoring) + len(hc.cachedDesc)
return sizeInBytes
}
// Collect goes to the index to find the matching documents
func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error {
startTime := time.Now()
@ -100,6 +142,18 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)),
}
hc.dvReader, err = reader.DocValueReader(hc.neededFields)
if err != nil {
return err
}
hc.updateFieldVisitor = func(field string, term []byte) {
if hc.facetsBuilder != nil {
hc.facetsBuilder.UpdateVisitor(field, term)
}
hc.sort.UpdateVisitor(field, term)
}
select {
case <-ctx.Done():
return ctx.Err()
@ -184,9 +238,8 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I
}
}
hc.store.Add(d)
if hc.store.Len() > hc.size+hc.skip {
removed := hc.store.RemoveLast()
removed := hc.store.AddNotExceedingSize(d, hc.size+hc.skip)
if removed != nil {
if hc.lowestMatchOutsideResults == nil {
hc.lowestMatchOutsideResults = removed
} else {
@ -209,13 +262,7 @@ func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.Doc
hc.facetsBuilder.StartDoc()
}
err := reader.DocumentVisitFieldTerms(d.IndexInternalID, hc.neededFields, func(field string, term []byte) {
if hc.facetsBuilder != nil {
hc.facetsBuilder.UpdateVisitor(field, term)
}
hc.sort.UpdateVisitor(field, term)
})
err := hc.dvReader.VisitDocValues(d.IndexInternalID, hc.updateFieldVisitor)
if hc.facetsBuilder != nil {
hc.facetsBuilder.EndDoc()
}
@ -243,6 +290,7 @@ func (hc *TopNCollector) finalizeResults(r index.IndexReader) error {
return err
}
}
doc.Complete(nil)
return nil
})
@ -274,5 +322,5 @@ func (hc *TopNCollector) FacetResults() search.FacetResults {
if hc.facetsBuilder != nil {
return hc.facetsBuilder.Results()
}
return search.FacetResults{}
return nil
}

View file

@ -17,8 +17,18 @@ package search
import (
"encoding/json"
"fmt"
"reflect"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeExplanation int
func init() {
var e Explanation
reflectStaticSizeExplanation = int(reflect.TypeOf(e).Size())
}
type Explanation struct {
Value float64 `json:"value"`
Message string `json:"message"`
@ -32,3 +42,14 @@ func (expl *Explanation) String() string {
}
return string(js)
}
func (expl *Explanation) Size() int {
sizeInBytes := reflectStaticSizeExplanation + size.SizeOfPtr +
len(expl.Message)
for _, entry := range expl.Children {
sizeInBytes += entry.Size()
}
return sizeInBytes
}

View file

@ -15,13 +15,25 @@
package facet
import (
"reflect"
"sort"
"time"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeDateTimeFacetBuilder int
var reflectStaticSizedateTimeRange int
func init() {
var dtfb DateTimeFacetBuilder
reflectStaticSizeDateTimeFacetBuilder = int(reflect.TypeOf(dtfb).Size())
var dtr dateTimeRange
reflectStaticSizedateTimeRange = int(reflect.TypeOf(dtr).Size())
}
type dateTimeRange struct {
start time.Time
end time.Time
@ -46,6 +58,23 @@ func NewDateTimeFacetBuilder(field string, size int) *DateTimeFacetBuilder {
}
}
func (fb *DateTimeFacetBuilder) Size() int {
sizeInBytes := reflectStaticSizeDateTimeFacetBuilder + size.SizeOfPtr +
len(fb.field)
for k, _ := range fb.termsCount {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfInt
}
for k, _ := range fb.ranges {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfPtr + reflectStaticSizedateTimeRange
}
return sizeInBytes
}
func (fb *DateTimeFacetBuilder) AddRange(name string, start, end time.Time) {
r := dateTimeRange{
start: start,

View file

@ -15,12 +15,24 @@
package facet
import (
"reflect"
"sort"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeNumericFacetBuilder int
var reflectStaticSizenumericRange int
func init() {
var nfb NumericFacetBuilder
reflectStaticSizeNumericFacetBuilder = int(reflect.TypeOf(nfb).Size())
var nr numericRange
reflectStaticSizenumericRange = int(reflect.TypeOf(nr).Size())
}
type numericRange struct {
min *float64
max *float64
@ -45,6 +57,23 @@ func NewNumericFacetBuilder(field string, size int) *NumericFacetBuilder {
}
}
func (fb *NumericFacetBuilder) Size() int {
sizeInBytes := reflectStaticSizeNumericFacetBuilder + size.SizeOfPtr +
len(fb.field)
for k, _ := range fb.termsCount {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfInt
}
for k, _ := range fb.ranges {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfPtr + reflectStaticSizenumericRange
}
return sizeInBytes
}
func (fb *NumericFacetBuilder) AddRange(name string, min, max *float64) {
r := numericRange{
min: min,

View file

@ -15,11 +15,20 @@
package facet
import (
"reflect"
"sort"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeTermsFacetBuilder int
func init() {
var tfb TermsFacetBuilder
reflectStaticSizeTermsFacetBuilder = int(reflect.TypeOf(tfb).Size())
}
type TermsFacetBuilder struct {
size int
field string
@ -37,6 +46,18 @@ func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder {
}
}
func (fb *TermsFacetBuilder) Size() int {
sizeInBytes := reflectStaticSizeTermsFacetBuilder + size.SizeOfPtr +
len(fb.field)
for k, _ := range fb.termsCount {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfInt
}
return sizeInBytes
}
func (fb *TermsFacetBuilder) Field() string {
return fb.field
}

View file

@ -15,11 +15,32 @@
package search
import (
"reflect"
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeFacetsBuilder int
var reflectStaticSizeFacetResult int
var reflectStaticSizeTermFacet int
var reflectStaticSizeNumericRangeFacet int
var reflectStaticSizeDateRangeFacet int
func init() {
var fb FacetsBuilder
reflectStaticSizeFacetsBuilder = int(reflect.TypeOf(fb).Size())
var fr FacetResult
reflectStaticSizeFacetResult = int(reflect.TypeOf(fr).Size())
var tf TermFacet
reflectStaticSizeTermFacet = int(reflect.TypeOf(tf).Size())
var nrf NumericRangeFacet
reflectStaticSizeNumericRangeFacet = int(reflect.TypeOf(nrf).Size())
var drf DateRangeFacet
reflectStaticSizeDateRangeFacet = int(reflect.TypeOf(drf).Size())
}
type FacetBuilder interface {
StartDoc()
UpdateVisitor(field string, term []byte)
@ -27,6 +48,8 @@ type FacetBuilder interface {
Result() *FacetResult
Field() string
Size() int
}
type FacetsBuilder struct {
@ -42,6 +65,21 @@ func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {
}
}
func (fb *FacetsBuilder) Size() int {
sizeInBytes := reflectStaticSizeFacetsBuilder + size.SizeOfPtr
for k, v := range fb.facets {
sizeInBytes += size.SizeOfString + len(k) +
v.Size()
}
for _, entry := range fb.fields {
sizeInBytes += size.SizeOfString + len(entry)
}
return sizeInBytes
}
func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) {
fb.facets[name] = facetBuilder
fb.fields = append(fb.fields, facetBuilder.Field())
@ -213,6 +251,14 @@ type FacetResult struct {
DateRanges DateRangeFacets `json:"date_ranges,omitempty"`
}
func (fr *FacetResult) Size() int {
return reflectStaticSizeFacetResult + size.SizeOfPtr +
len(fr.Field) +
len(fr.Terms)*(reflectStaticSizeTermFacet+size.SizeOfPtr) +
len(fr.NumericRanges)*(reflectStaticSizeNumericRangeFacet+size.SizeOfPtr) +
len(fr.DateRanges)*(reflectStaticSizeDateRangeFacet+size.SizeOfPtr)
}
func (fr *FacetResult) Merge(other *FacetResult) {
fr.Total += other.Total
fr.Missing += other.Missing

View file

@ -57,15 +57,24 @@ func LevenshteinDistance(a, b string) int {
// in which case the first return val will be the max
// and the second will be true, indicating max was exceeded
func LevenshteinDistanceMax(a, b string, max int) (int, bool) {
v, wasMax, _ := LevenshteinDistanceMaxReuseSlice(a, b, max, nil)
return v, wasMax
}
func LevenshteinDistanceMaxReuseSlice(a, b string, max int, d []int) (int, bool, []int) {
la := len(a)
lb := len(b)
ld := int(math.Abs(float64(la - lb)))
if ld > max {
return max, true
return max, true, d
}
d := make([]int, la+1)
if cap(d) < la+1 {
d = make([]int, la+1)
}
d = d[:la+1]
var lastdiag, olddiag, temp int
for i := 1; i <= la; i++ {
@ -98,8 +107,8 @@ func LevenshteinDistanceMax(a, b string, max int) (int, bool) {
}
// after each row if rowmin isn't less than max stop
if rowmin > max {
return max, true
return max, true, d
}
}
return d[la], false
return d[la], false, d
}

View file

@ -14,6 +14,17 @@
package search
import (
"reflect"
)
var reflectStaticSizeDocumentMatchPool int
func init() {
var dmp DocumentMatchPool
reflectStaticSizeDocumentMatchPool = int(reflect.TypeOf(dmp).Size())
}
// DocumentMatchPoolTooSmall is a callback function that can be executed
// when the DocumentMatchPool does not have sufficient capacity
// By default we just perform just-in-time allocation, but you could log

View file

@ -43,6 +43,10 @@ func (q *QueryStringQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *QueryStringQuery) Parse() (Query, error) {
return parseQuerySyntax(q.Query)
}
func (q *QueryStringQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
newQuery, err := parseQuerySyntax(q.Query)
if err != nil {

View file

@ -15,13 +15,27 @@
package scorer
import (
"reflect"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeConjunctionQueryScorer int
func init() {
var cqs ConjunctionQueryScorer
reflectStaticSizeConjunctionQueryScorer = int(reflect.TypeOf(cqs).Size())
}
type ConjunctionQueryScorer struct {
options search.SearcherOptions
}
func (s *ConjunctionQueryScorer) Size() int {
return reflectStaticSizeConjunctionQueryScorer + size.SizeOfPtr
}
func NewConjunctionQueryScorer(options search.SearcherOptions) *ConjunctionQueryScorer {
return &ConjunctionQueryScorer{
options: options,
@ -35,15 +49,11 @@ func (s *ConjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [
childrenExplanations = make([]*search.Explanation, len(constituents))
}
locations := []search.FieldTermLocationMap{}
for i, docMatch := range constituents {
sum += docMatch.Score
if s.options.Explain {
childrenExplanations[i] = docMatch.Expl
}
if docMatch.Locations != nil {
locations = append(locations, docMatch.Locations)
}
}
newScore := sum
var newExpl *search.Explanation
@ -55,11 +65,8 @@ func (s *ConjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [
rv := constituents[0]
rv.Score = newScore
rv.Expl = newExpl
if len(locations) == 1 {
rv.Locations = locations[0]
} else if len(locations) > 1 {
rv.Locations = search.MergeLocations(locations)
}
rv.FieldTermLocations = search.MergeFieldTermLocations(
rv.FieldTermLocations, constituents[1:])
return rv
}

View file

@ -16,11 +16,20 @@ package scorer
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeConstantScorer int
func init() {
var cs ConstantScorer
reflectStaticSizeConstantScorer = int(reflect.TypeOf(cs).Size())
}
type ConstantScorer struct {
constant float64
boost float64
@ -30,6 +39,16 @@ type ConstantScorer struct {
queryWeightExplanation *search.Explanation
}
func (s *ConstantScorer) Size() int {
sizeInBytes := reflectStaticSizeConstantScorer + size.SizeOfPtr
if s.queryWeightExplanation != nil {
sizeInBytes += s.queryWeightExplanation.Size()
}
return sizeInBytes
}
func NewConstantScorer(constant float64, boost float64, options search.SearcherOptions) *ConstantScorer {
rv := ConstantScorer{
options: options,

View file

@ -16,14 +16,27 @@ package scorer
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeDisjunctionQueryScorer int
func init() {
var dqs DisjunctionQueryScorer
reflectStaticSizeDisjunctionQueryScorer = int(reflect.TypeOf(dqs).Size())
}
type DisjunctionQueryScorer struct {
options search.SearcherOptions
}
func (s *DisjunctionQueryScorer) Size() int {
return reflectStaticSizeDisjunctionQueryScorer + size.SizeOfPtr
}
func NewDisjunctionQueryScorer(options search.SearcherOptions) *DisjunctionQueryScorer {
return &DisjunctionQueryScorer{
options: options,
@ -37,15 +50,11 @@ func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [
childrenExplanations = make([]*search.Explanation, len(constituents))
}
var locations []search.FieldTermLocationMap
for i, docMatch := range constituents {
sum += docMatch.Score
if s.options.Explain {
childrenExplanations[i] = docMatch.Expl
}
if docMatch.Locations != nil {
locations = append(locations, docMatch.Locations)
}
}
var rawExpl *search.Explanation
@ -67,11 +76,8 @@ func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [
rv := constituents[0]
rv.Score = newScore
rv.Expl = newExpl
if len(locations) == 1 {
rv.Locations = locations[0]
} else if len(locations) > 1 {
rv.Locations = search.MergeLocations(locations)
}
rv.FieldTermLocations = search.MergeFieldTermLocations(
rv.FieldTermLocations, constituents[1:])
return rv
}

View file

@ -17,13 +17,22 @@ package scorer
import (
"fmt"
"math"
"reflect"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeTermQueryScorer int
func init() {
var tqs TermQueryScorer
reflectStaticSizeTermQueryScorer = int(reflect.TypeOf(tqs).Size())
}
type TermQueryScorer struct {
queryTerm []byte
queryTerm string
queryField string
queryBoost float64
docTerm uint64
@ -36,9 +45,24 @@ type TermQueryScorer struct {
queryWeightExplanation *search.Explanation
}
func (s *TermQueryScorer) Size() int {
sizeInBytes := reflectStaticSizeTermQueryScorer + size.SizeOfPtr +
len(s.queryTerm) + len(s.queryField)
if s.idfExplanation != nil {
sizeInBytes += s.idfExplanation.Size()
}
if s.queryWeightExplanation != nil {
sizeInBytes += s.queryWeightExplanation.Size()
}
return sizeInBytes
}
func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer {
rv := TermQueryScorer{
queryTerm: queryTerm,
queryTerm: string(queryTerm),
queryField: queryField,
queryBoost: queryBoost,
docTerm: docTerm,
@ -82,7 +106,7 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
}
s.queryWeightExplanation = &search.Explanation{
Value: s.queryWeight,
Message: fmt.Sprintf("queryWeight(%s:%s^%f), product of:", s.queryField, string(s.queryTerm), s.queryBoost),
Message: fmt.Sprintf("queryWeight(%s:%s^%f), product of:", s.queryField, s.queryTerm, s.queryBoost),
Children: childrenExplanations,
}
}
@ -104,7 +128,7 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
childrenExplanations := make([]*search.Explanation, 3)
childrenExplanations[0] = &search.Explanation{
Value: tf,
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, string(s.queryTerm), termMatch.Freq),
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq),
}
childrenExplanations[1] = &search.Explanation{
Value: termMatch.Norm,
@ -113,7 +137,7 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
childrenExplanations[2] = s.idfExplanation
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, string(s.queryTerm), termMatch.ID),
Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID),
Children: childrenExplanations,
}
}
@ -127,7 +151,7 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
childExplanations[1] = scoreExplanation
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, string(s.queryTerm), s.queryBoost, termMatch.ID),
Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID),
Children: childExplanations,
}
}
@ -140,41 +164,31 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
rv.Expl = scoreExplanation
}
if termMatch.Vectors != nil && len(termMatch.Vectors) > 0 {
locs := make([]search.Location, len(termMatch.Vectors))
locsUsed := 0
totalPositions := 0
for _, v := range termMatch.Vectors {
totalPositions += len(v.ArrayPositions)
if len(termMatch.Vectors) > 0 {
if cap(rv.FieldTermLocations) < len(termMatch.Vectors) {
rv.FieldTermLocations = make([]search.FieldTermLocation, 0, len(termMatch.Vectors))
}
positions := make(search.ArrayPositions, totalPositions)
positionsUsed := 0
rv.Locations = make(search.FieldTermLocationMap)
for _, v := range termMatch.Vectors {
tlm := rv.Locations[v.Field]
if tlm == nil {
tlm = make(search.TermLocationMap)
rv.Locations[v.Field] = tlm
}
loc := &locs[locsUsed]
locsUsed++
loc.Pos = v.Pos
loc.Start = v.Start
loc.End = v.End
var ap search.ArrayPositions
if len(v.ArrayPositions) > 0 {
loc.ArrayPositions = positions[positionsUsed : positionsUsed+len(v.ArrayPositions)]
for i, ap := range v.ArrayPositions {
loc.ArrayPositions[i] = ap
n := len(rv.FieldTermLocations)
if n < cap(rv.FieldTermLocations) { // reuse ap slice if available
ap = rv.FieldTermLocations[:n+1][n].Location.ArrayPositions[:0]
}
positionsUsed += len(v.ArrayPositions)
ap = append(ap, v.ArrayPositions...)
}
tlm[string(s.queryTerm)] = append(tlm[string(s.queryTerm)], loc)
rv.FieldTermLocations =
append(rv.FieldTermLocations, search.FieldTermLocation{
Field: v.Field,
Term: s.queryTerm,
Location: search.Location{
Pos: v.Pos,
Start: v.Start,
End: v.End,
ArrayPositions: ap,
},
})
}
}

View file

@ -16,11 +16,26 @@ package search
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeDocumentMatch int
var reflectStaticSizeSearchContext int
var reflectStaticSizeLocation int
func init() {
var dm DocumentMatch
reflectStaticSizeDocumentMatch = int(reflect.TypeOf(dm).Size())
var sc SearchContext
reflectStaticSizeSearchContext = int(reflect.TypeOf(sc).Size())
var l Location
reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
}
type ArrayPositions []uint64
func (ap ArrayPositions) Equals(other ArrayPositions) bool {
@ -36,12 +51,22 @@ func (ap ArrayPositions) Equals(other ArrayPositions) bool {
}
type Location struct {
Pos uint64 `json:"pos"`
Start uint64 `json:"start"`
End uint64 `json:"end"`
// Pos is the position of the term within the field, starting at 1
Pos uint64 `json:"pos"`
// Start and End are the byte offsets of the term in the field
Start uint64 `json:"start"`
End uint64 `json:"end"`
// ArrayPositions contains the positions of the term within any elements.
ArrayPositions ArrayPositions `json:"array_positions"`
}
func (l *Location) Size() int {
return reflectStaticSizeLocation + size.SizeOfPtr +
len(l.ArrayPositions)*size.SizeOfUint64
}
type Locations []*Location
type TermLocationMap map[string]Locations
@ -52,6 +77,12 @@ func (t TermLocationMap) AddLocation(term string, location *Location) {
type FieldTermLocationMap map[string]TermLocationMap
type FieldTermLocation struct {
Field string
Term string
Location Location
}
type FieldFragmentMap map[string][]string
type DocumentMatch struct {
@ -74,6 +105,12 @@ type DocumentMatch struct {
// used to maintain natural index order
HitNumber uint64 `json:"-"`
// used to temporarily hold field term location information during
// search processing in an efficient, recycle-friendly manner, to
// be later incorporated into the Locations map when search
// results are completed
FieldTermLocations []FieldTermLocation `json:"-"`
}
func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) {
@ -103,15 +140,120 @@ func (dm *DocumentMatch) Reset() *DocumentMatch {
indexInternalID := dm.IndexInternalID
// remember the []interface{} used for sort
sort := dm.Sort
// remember the FieldTermLocations backing array
ftls := dm.FieldTermLocations
for i := range ftls { // recycle the ArrayPositions of each location
ftls[i].Location.ArrayPositions = ftls[i].Location.ArrayPositions[:0]
}
// idiom to copy over from empty DocumentMatch (0 allocations)
*dm = DocumentMatch{}
// reuse the []byte already allocated (and reset len to 0)
dm.IndexInternalID = indexInternalID[:0]
// reuse the []interface{} already allocated (and reset len to 0)
dm.Sort = sort[:0]
// reuse the FieldTermLocations already allocated (and reset len to 0)
dm.FieldTermLocations = ftls[:0]
return dm
}
func (dm *DocumentMatch) Size() int {
sizeInBytes := reflectStaticSizeDocumentMatch + size.SizeOfPtr +
len(dm.Index) +
len(dm.ID) +
len(dm.IndexInternalID)
if dm.Expl != nil {
sizeInBytes += dm.Expl.Size()
}
for k, v := range dm.Locations {
sizeInBytes += size.SizeOfString + len(k)
for k1, v1 := range v {
sizeInBytes += size.SizeOfString + len(k1) +
size.SizeOfSlice
for _, entry := range v1 {
sizeInBytes += entry.Size()
}
}
}
for k, v := range dm.Fragments {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfSlice
for _, entry := range v {
sizeInBytes += size.SizeOfString + len(entry)
}
}
for _, entry := range dm.Sort {
sizeInBytes += size.SizeOfString + len(entry)
}
for k, _ := range dm.Fields {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfPtr
}
if dm.Document != nil {
sizeInBytes += dm.Document.Size()
}
return sizeInBytes
}
// Complete performs final preparation & transformation of the
// DocumentMatch at the end of search processing, also allowing the
// caller to provide an optional preallocated locations slice
func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
// transform the FieldTermLocations slice into the Locations map
nlocs := len(dm.FieldTermLocations)
if nlocs > 0 {
if cap(prealloc) < nlocs {
prealloc = make([]Location, nlocs)
}
prealloc = prealloc[:nlocs]
var lastField string
var tlm TermLocationMap
for i, ftl := range dm.FieldTermLocations {
if lastField != ftl.Field {
lastField = ftl.Field
if dm.Locations == nil {
dm.Locations = make(FieldTermLocationMap)
}
tlm = dm.Locations[ftl.Field]
if tlm == nil {
tlm = make(TermLocationMap)
dm.Locations[ftl.Field] = tlm
}
}
loc := &prealloc[i]
*loc = ftl.Location
if len(loc.ArrayPositions) > 0 { // copy
loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...)
}
tlm[ftl.Term] = append(tlm[ftl.Term], loc)
dm.FieldTermLocations[i] = FieldTermLocation{ // recycle
Location: Location{
ArrayPositions: ftl.Location.ArrayPositions[:0],
},
}
}
}
dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle
return prealloc
}
func (dm *DocumentMatch) String() string {
return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score)
}
@ -130,6 +272,7 @@ type Searcher interface {
SetQueryNorm(float64)
Count() uint64
Min() int
Size() int
DocumentMatchPoolSize() int
}
@ -143,3 +286,18 @@ type SearcherOptions struct {
type SearchContext struct {
DocumentMatchPool *DocumentMatchPool
}
func (sc *SearchContext) Size() int {
sizeInBytes := reflectStaticSizeSearchContext + size.SizeOfPtr +
reflectStaticSizeDocumentMatchPool + size.SizeOfPtr
if sc.DocumentMatchPool != nil {
for _, entry := range sc.DocumentMatchPool.avail {
if entry != nil {
sizeInBytes += entry.Size()
}
}
}
return sizeInBytes
}

View file

@ -16,12 +16,21 @@ package searcher
import (
"math"
"reflect"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeBooleanSearcher int
func init() {
var bs BooleanSearcher
reflectStaticSizeBooleanSearcher = int(reflect.TypeOf(bs).Size())
}
type BooleanSearcher struct {
indexReader index.IndexReader
mustSearcher search.Searcher
@ -52,6 +61,32 @@ func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searc
return &rv, nil
}
func (s *BooleanSearcher) Size() int {
sizeInBytes := reflectStaticSizeBooleanSearcher + size.SizeOfPtr
if s.mustSearcher != nil {
sizeInBytes += s.mustSearcher.Size()
}
if s.shouldSearcher != nil {
sizeInBytes += s.shouldSearcher.Size()
}
if s.mustNotSearcher != nil {
sizeInBytes += s.mustNotSearcher.Size()
}
sizeInBytes += s.scorer.Size()
for _, entry := range s.matches {
if entry != nil {
sizeInBytes += entry.Size()
}
}
return sizeInBytes
}
func (s *BooleanSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0

View file

@ -16,13 +16,22 @@ package searcher
import (
"math"
"reflect"
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeConjunctionSearcher int
func init() {
var cs ConjunctionSearcher
reflectStaticSizeConjunctionSearcher = int(reflect.TypeOf(cs).Size())
}
type ConjunctionSearcher struct {
indexReader index.IndexReader
searchers OrderedSearcherList
@ -51,31 +60,72 @@ func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S
scorer: scorer.NewConjunctionQueryScorer(options),
}
rv.computeQueryNorm()
// attempt push-down conjunction optimization when there's >1 searchers
if len(searchers) > 1 {
var octx index.OptimizableContext
for _, searcher := range searchers {
o, ok := searcher.(index.Optimizable)
if ok {
var err error
octx, err = o.Optimize("conjunction", octx)
if err != nil {
return nil, err
}
}
}
if octx != nil {
err := octx.Finish()
if err != nil {
return nil, err
}
}
}
return &rv, nil
}
func (s *ConjunctionSearcher) Size() int {
sizeInBytes := reflectStaticSizeConjunctionSearcher + size.SizeOfPtr +
s.scorer.Size()
for _, entry := range s.searchers {
sizeInBytes += entry.Size()
}
for _, entry := range s.currs {
if entry != nil {
sizeInBytes += entry.Size()
}
}
return sizeInBytes
}
func (s *ConjunctionSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
for _, termSearcher := range s.searchers {
sumOfSquaredWeights += termSearcher.Weight()
for _, searcher := range s.searchers {
sumOfSquaredWeights += searcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
for _, termSearcher := range s.searchers {
termSearcher.SetQueryNorm(s.queryNorm)
for _, searcher := range s.searchers {
searcher.SetQueryNorm(s.queryNorm)
}
}
func (s *ConjunctionSearcher) initSearchers(ctx *search.SearchContext) error {
var err error
// get all searchers pointing at their first match
for i, termSearcher := range s.searchers {
for i, searcher := range s.searchers {
if s.currs[i] != nil {
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = termSearcher.Next(ctx)
s.currs[i], err = searcher.Next(ctx)
if err != nil {
return err
}
@ -160,11 +210,11 @@ OUTER:
// we know all the searchers are pointing at the same thing
// so they all need to be bumped
for i, termSearcher := range s.searchers {
for i, searcher := range s.searchers {
if s.currs[i] != rv {
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = termSearcher.Next(ctx)
s.currs[i], err = searcher.Next(ctx)
if err != nil {
return nil, err
}
@ -184,6 +234,9 @@ func (s *ConjunctionSearcher) Advance(ctx *search.SearchContext, ID index.IndexI
}
}
for i := range s.searchers {
if s.currs[i] != nil && s.currs[i].IndexInternalID.Compare(ID) >= 0 {
continue
}
err := s.advanceChild(ctx, i, ID)
if err != nil {
return nil, err

View file

@ -1,4 +1,4 @@
// Copyright (c) 2014 Couchbase, Inc.
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -16,12 +16,9 @@ package searcher
import (
"fmt"
"math"
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
)
// DisjunctionMaxClauseCount is a compile time setting that applications can
@ -29,17 +26,26 @@ import (
// error instead of exeucting searches when the size exceeds this value.
var DisjunctionMaxClauseCount = 0
type DisjunctionSearcher struct {
indexReader index.IndexReader
searchers OrderedSearcherList
numSearchers int
queryNorm float64
currs []*search.DocumentMatch
scorer *scorer.DisjunctionQueryScorer
min int
matching []*search.DocumentMatch
matchingIdxs []int
initialized bool
// DisjunctionHeapTakeover is a compile time setting that applications can
// adjust to control when the DisjunctionSearcher will switch from a simple
// slice implementation to a heap implementation.
var DisjunctionHeapTakeover = 10
func NewDisjunctionSearcher(indexReader index.IndexReader,
qsearchers []search.Searcher, min float64, options search.SearcherOptions) (
search.Searcher, error) {
return newDisjunctionSearcher(indexReader, qsearchers, min, options, true)
}
func newDisjunctionSearcher(indexReader index.IndexReader,
qsearchers []search.Searcher, min float64, options search.SearcherOptions,
limit bool) (search.Searcher, error) {
if len(qsearchers) > DisjunctionHeapTakeover {
return newDisjunctionHeapSearcher(indexReader, qsearchers, min, options,
limit)
}
return newDisjunctionSliceSearcher(indexReader, qsearchers, min, options,
limit)
}
func tooManyClauses(count int) bool {
@ -53,219 +59,3 @@ func tooManyClausesErr() error {
return fmt.Errorf("TooManyClauses[maxClauseCount is set to %d]",
DisjunctionMaxClauseCount)
}
func NewDisjunctionSearcher(indexReader index.IndexReader,
qsearchers []search.Searcher, min float64, options search.SearcherOptions) (
*DisjunctionSearcher, error) {
return newDisjunctionSearcher(indexReader, qsearchers, min, options,
true)
}
func newDisjunctionSearcher(indexReader index.IndexReader,
qsearchers []search.Searcher, min float64, options search.SearcherOptions,
limit bool) (
*DisjunctionSearcher, error) {
if limit && tooManyClauses(len(qsearchers)) {
return nil, tooManyClausesErr()
}
// build the downstream searchers
searchers := make(OrderedSearcherList, len(qsearchers))
for i, searcher := range qsearchers {
searchers[i] = searcher
}
// sort the searchers
sort.Sort(sort.Reverse(searchers))
// build our searcher
rv := DisjunctionSearcher{
indexReader: indexReader,
searchers: searchers,
numSearchers: len(searchers),
currs: make([]*search.DocumentMatch, len(searchers)),
scorer: scorer.NewDisjunctionQueryScorer(options),
min: int(min),
matching: make([]*search.DocumentMatch, len(searchers)),
matchingIdxs: make([]int, len(searchers)),
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *DisjunctionSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
for _, termSearcher := range s.searchers {
sumOfSquaredWeights += termSearcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
for _, termSearcher := range s.searchers {
termSearcher.SetQueryNorm(s.queryNorm)
}
}
func (s *DisjunctionSearcher) initSearchers(ctx *search.SearchContext) error {
var err error
// get all searchers pointing at their first match
for i, termSearcher := range s.searchers {
if s.currs[i] != nil {
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = termSearcher.Next(ctx)
if err != nil {
return err
}
}
err = s.updateMatches()
if err != nil {
return err
}
s.initialized = true
return nil
}
func (s *DisjunctionSearcher) updateMatches() error {
matching := s.matching[:0]
matchingIdxs := s.matchingIdxs[:0]
for i := 0; i < len(s.currs); i++ {
curr := s.currs[i]
if curr == nil {
continue
}
if len(matching) > 0 {
cmp := curr.IndexInternalID.Compare(matching[0].IndexInternalID)
if cmp > 0 {
continue
}
if cmp < 0 {
matching = matching[:0]
matchingIdxs = matchingIdxs[:0]
}
}
matching = append(matching, curr)
matchingIdxs = append(matchingIdxs, i)
}
s.matching = matching
s.matchingIdxs = matchingIdxs
return nil
}
func (s *DisjunctionSearcher) Weight() float64 {
var rv float64
for _, searcher := range s.searchers {
rv += searcher.Weight()
}
return rv
}
func (s *DisjunctionSearcher) SetQueryNorm(qnorm float64) {
for _, searcher := range s.searchers {
searcher.SetQueryNorm(qnorm)
}
}
func (s *DisjunctionSearcher) Next(ctx *search.SearchContext) (
*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
var err error
var rv *search.DocumentMatch
found := false
for !found && len(s.matching) > 0 {
if len(s.matching) >= s.min {
found = true
// score this match
rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers)
}
// invoke next on all the matching searchers
for _, i := range s.matchingIdxs {
searcher := s.searchers[i]
if s.currs[i] != rv {
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = searcher.Next(ctx)
if err != nil {
return nil, err
}
}
err = s.updateMatches()
if err != nil {
return nil, err
}
}
return rv, nil
}
func (s *DisjunctionSearcher) Advance(ctx *search.SearchContext,
ID index.IndexInternalID) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
// get all searchers pointing at their first match
var err error
for i, termSearcher := range s.searchers {
if s.currs[i] != nil {
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = termSearcher.Advance(ctx, ID)
if err != nil {
return nil, err
}
}
err = s.updateMatches()
if err != nil {
return nil, err
}
return s.Next(ctx)
}
func (s *DisjunctionSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
for _, searcher := range s.searchers {
sum += searcher.Count()
}
return sum
}
func (s *DisjunctionSearcher) Close() (rv error) {
for _, searcher := range s.searchers {
err := searcher.Close()
if err != nil && rv == nil {
rv = err
}
}
return rv
}
func (s *DisjunctionSearcher) Min() int {
return s.min
}
func (s *DisjunctionSearcher) DocumentMatchPoolSize() int {
rv := len(s.currs)
for _, s := range s.searchers {
rv += s.DocumentMatchPoolSize()
}
return rv
}

View file

@ -0,0 +1,343 @@
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"bytes"
"container/heap"
"math"
"reflect"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeDisjunctionHeapSearcher int
var reflectStaticSizeSearcherCurr int
func init() {
var dhs DisjunctionHeapSearcher
reflectStaticSizeDisjunctionHeapSearcher = int(reflect.TypeOf(dhs).Size())
var sc SearcherCurr
reflectStaticSizeSearcherCurr = int(reflect.TypeOf(sc).Size())
}
type SearcherCurr struct {
searcher search.Searcher
curr *search.DocumentMatch
}
type DisjunctionHeapSearcher struct {
indexReader index.IndexReader
numSearchers int
scorer *scorer.DisjunctionQueryScorer
min int
queryNorm float64
initialized bool
searchers []search.Searcher
heap []*SearcherCurr
matching []*search.DocumentMatch
matchingCurrs []*SearcherCurr
}
func newDisjunctionHeapSearcher(indexReader index.IndexReader,
searchers []search.Searcher, min float64, options search.SearcherOptions,
limit bool) (
*DisjunctionHeapSearcher, error) {
if limit && tooManyClauses(len(searchers)) {
return nil, tooManyClausesErr()
}
// build our searcher
rv := DisjunctionHeapSearcher{
indexReader: indexReader,
searchers: searchers,
numSearchers: len(searchers),
scorer: scorer.NewDisjunctionQueryScorer(options),
min: int(min),
matching: make([]*search.DocumentMatch, len(searchers)),
matchingCurrs: make([]*SearcherCurr, len(searchers)),
heap: make([]*SearcherCurr, 0, len(searchers)),
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *DisjunctionHeapSearcher) Size() int {
sizeInBytes := reflectStaticSizeDisjunctionHeapSearcher + size.SizeOfPtr +
s.scorer.Size()
for _, entry := range s.searchers {
sizeInBytes += entry.Size()
}
for _, entry := range s.matching {
if entry != nil {
sizeInBytes += entry.Size()
}
}
// for matchingCurrs and heap, just use static size * len
// since searchers and document matches already counted above
sizeInBytes += len(s.matchingCurrs) * reflectStaticSizeSearcherCurr
sizeInBytes += len(s.heap) * reflectStaticSizeSearcherCurr
return sizeInBytes
}
func (s *DisjunctionHeapSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
for _, searcher := range s.searchers {
sumOfSquaredWeights += searcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
for _, searcher := range s.searchers {
searcher.SetQueryNorm(s.queryNorm)
}
}
func (s *DisjunctionHeapSearcher) initSearchers(ctx *search.SearchContext) error {
// alloc a single block of SearcherCurrs
block := make([]SearcherCurr, len(s.searchers))
// get all searchers pointing at their first match
for i, searcher := range s.searchers {
curr, err := searcher.Next(ctx)
if err != nil {
return err
}
if curr != nil {
block[i].searcher = searcher
block[i].curr = curr
heap.Push(s, &block[i])
}
}
err := s.updateMatches()
if err != nil {
return err
}
s.initialized = true
return nil
}
func (s *DisjunctionHeapSearcher) updateMatches() error {
matching := s.matching[:0]
matchingCurrs := s.matchingCurrs[:0]
if len(s.heap) > 0 {
// top of the heap is our next hit
next := heap.Pop(s).(*SearcherCurr)
matching = append(matching, next.curr)
matchingCurrs = append(matchingCurrs, next)
// now as long as top of heap matches, keep popping
for len(s.heap) > 0 && bytes.Compare(next.curr.IndexInternalID, s.heap[0].curr.IndexInternalID) == 0 {
next = heap.Pop(s).(*SearcherCurr)
matching = append(matching, next.curr)
matchingCurrs = append(matchingCurrs, next)
}
}
s.matching = matching
s.matchingCurrs = matchingCurrs
return nil
}
func (s *DisjunctionHeapSearcher) Weight() float64 {
var rv float64
for _, searcher := range s.searchers {
rv += searcher.Weight()
}
return rv
}
func (s *DisjunctionHeapSearcher) SetQueryNorm(qnorm float64) {
for _, searcher := range s.searchers {
searcher.SetQueryNorm(qnorm)
}
}
func (s *DisjunctionHeapSearcher) Next(ctx *search.SearchContext) (
*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
var rv *search.DocumentMatch
found := false
for !found && len(s.matching) > 0 {
if len(s.matching) >= s.min {
found = true
// score this match
rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers)
}
// invoke next on all the matching searchers
for _, matchingCurr := range s.matchingCurrs {
if matchingCurr.curr != rv {
ctx.DocumentMatchPool.Put(matchingCurr.curr)
}
curr, err := matchingCurr.searcher.Next(ctx)
if err != nil {
return nil, err
}
if curr != nil {
matchingCurr.curr = curr
heap.Push(s, matchingCurr)
}
}
err := s.updateMatches()
if err != nil {
return nil, err
}
}
return rv, nil
}
func (s *DisjunctionHeapSearcher) Advance(ctx *search.SearchContext,
ID index.IndexInternalID) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
// if there is anything in matching, toss it back onto the heap
for _, matchingCurr := range s.matchingCurrs {
heap.Push(s, matchingCurr)
}
s.matching = s.matching[:0]
s.matchingCurrs = s.matchingCurrs[:0]
// find all searchers that actually need to be advanced
// advance them, using s.matchingCurrs as temp storage
for len(s.heap) > 0 && bytes.Compare(s.heap[0].curr.IndexInternalID, ID) < 0 {
searcherCurr := heap.Pop(s).(*SearcherCurr)
ctx.DocumentMatchPool.Put(searcherCurr.curr)
curr, err := searcherCurr.searcher.Advance(ctx, ID)
if err != nil {
return nil, err
}
if curr != nil {
searcherCurr.curr = curr
s.matchingCurrs = append(s.matchingCurrs, searcherCurr)
}
}
// now all of the searchers that we advanced have to be pushed back
for _, matchingCurr := range s.matchingCurrs {
heap.Push(s, matchingCurr)
}
// reset our temp space
s.matchingCurrs = s.matchingCurrs[:0]
err := s.updateMatches()
if err != nil {
return nil, err
}
return s.Next(ctx)
}
func (s *DisjunctionHeapSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
for _, searcher := range s.searchers {
sum += searcher.Count()
}
return sum
}
func (s *DisjunctionHeapSearcher) Close() (rv error) {
for _, searcher := range s.searchers {
err := searcher.Close()
if err != nil && rv == nil {
rv = err
}
}
return rv
}
func (s *DisjunctionHeapSearcher) Min() int {
return s.min
}
func (s *DisjunctionHeapSearcher) DocumentMatchPoolSize() int {
rv := len(s.searchers)
for _, s := range s.searchers {
rv += s.DocumentMatchPoolSize()
}
return rv
}
// a disjunction searcher implements the index.Optimizable interface
// but only activates on an edge case where the disjunction is a
// wrapper around a single Optimizable child searcher
func (s *DisjunctionHeapSearcher) Optimize(kind string, octx index.OptimizableContext) (
index.OptimizableContext, error) {
if len(s.searchers) == 1 {
o, ok := s.searchers[0].(index.Optimizable)
if ok {
return o.Optimize(kind, octx)
}
}
return octx, nil
}
// heap impl
func (s *DisjunctionHeapSearcher) Len() int { return len(s.heap) }
func (s *DisjunctionHeapSearcher) Less(i, j int) bool {
if s.heap[i].curr == nil {
return true
} else if s.heap[j].curr == nil {
return false
}
return bytes.Compare(s.heap[i].curr.IndexInternalID, s.heap[j].curr.IndexInternalID) < 0
}
func (s *DisjunctionHeapSearcher) Swap(i, j int) {
s.heap[i], s.heap[j] = s.heap[j], s.heap[i]
}
func (s *DisjunctionHeapSearcher) Push(x interface{}) {
s.heap = append(s.heap, x.(*SearcherCurr))
}
func (s *DisjunctionHeapSearcher) Pop() interface{} {
old := s.heap
n := len(old)
x := old[n-1]
s.heap = old[0 : n-1]
return x
}

View file

@ -0,0 +1,298 @@
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"math"
"reflect"
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeDisjunctionSliceSearcher int
func init() {
var ds DisjunctionSliceSearcher
reflectStaticSizeDisjunctionSliceSearcher = int(reflect.TypeOf(ds).Size())
}
type DisjunctionSliceSearcher struct {
indexReader index.IndexReader
searchers OrderedSearcherList
numSearchers int
queryNorm float64
currs []*search.DocumentMatch
scorer *scorer.DisjunctionQueryScorer
min int
matching []*search.DocumentMatch
matchingIdxs []int
initialized bool
}
func newDisjunctionSliceSearcher(indexReader index.IndexReader,
qsearchers []search.Searcher, min float64, options search.SearcherOptions,
limit bool) (
*DisjunctionSliceSearcher, error) {
if limit && tooManyClauses(len(qsearchers)) {
return nil, tooManyClausesErr()
}
// build the downstream searchers
searchers := make(OrderedSearcherList, len(qsearchers))
for i, searcher := range qsearchers {
searchers[i] = searcher
}
// sort the searchers
sort.Sort(sort.Reverse(searchers))
// build our searcher
rv := DisjunctionSliceSearcher{
indexReader: indexReader,
searchers: searchers,
numSearchers: len(searchers),
currs: make([]*search.DocumentMatch, len(searchers)),
scorer: scorer.NewDisjunctionQueryScorer(options),
min: int(min),
matching: make([]*search.DocumentMatch, len(searchers)),
matchingIdxs: make([]int, len(searchers)),
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *DisjunctionSliceSearcher) Size() int {
sizeInBytes := reflectStaticSizeDisjunctionSliceSearcher + size.SizeOfPtr +
s.scorer.Size()
for _, entry := range s.searchers {
sizeInBytes += entry.Size()
}
for _, entry := range s.currs {
if entry != nil {
sizeInBytes += entry.Size()
}
}
for _, entry := range s.matching {
if entry != nil {
sizeInBytes += entry.Size()
}
}
sizeInBytes += len(s.matchingIdxs) * size.SizeOfInt
return sizeInBytes
}
func (s *DisjunctionSliceSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
for _, searcher := range s.searchers {
sumOfSquaredWeights += searcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
for _, searcher := range s.searchers {
searcher.SetQueryNorm(s.queryNorm)
}
}
func (s *DisjunctionSliceSearcher) initSearchers(ctx *search.SearchContext) error {
var err error
// get all searchers pointing at their first match
for i, searcher := range s.searchers {
if s.currs[i] != nil {
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = searcher.Next(ctx)
if err != nil {
return err
}
}
err = s.updateMatches()
if err != nil {
return err
}
s.initialized = true
return nil
}
func (s *DisjunctionSliceSearcher) updateMatches() error {
matching := s.matching[:0]
matchingIdxs := s.matchingIdxs[:0]
for i := 0; i < len(s.currs); i++ {
curr := s.currs[i]
if curr == nil {
continue
}
if len(matching) > 0 {
cmp := curr.IndexInternalID.Compare(matching[0].IndexInternalID)
if cmp > 0 {
continue
}
if cmp < 0 {
matching = matching[:0]
matchingIdxs = matchingIdxs[:0]
}
}
matching = append(matching, curr)
matchingIdxs = append(matchingIdxs, i)
}
s.matching = matching
s.matchingIdxs = matchingIdxs
return nil
}
func (s *DisjunctionSliceSearcher) Weight() float64 {
var rv float64
for _, searcher := range s.searchers {
rv += searcher.Weight()
}
return rv
}
func (s *DisjunctionSliceSearcher) SetQueryNorm(qnorm float64) {
for _, searcher := range s.searchers {
searcher.SetQueryNorm(qnorm)
}
}
func (s *DisjunctionSliceSearcher) Next(ctx *search.SearchContext) (
*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
var err error
var rv *search.DocumentMatch
found := false
for !found && len(s.matching) > 0 {
if len(s.matching) >= s.min {
found = true
// score this match
rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers)
}
// invoke next on all the matching searchers
for _, i := range s.matchingIdxs {
searcher := s.searchers[i]
if s.currs[i] != rv {
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = searcher.Next(ctx)
if err != nil {
return nil, err
}
}
err = s.updateMatches()
if err != nil {
return nil, err
}
}
return rv, nil
}
func (s *DisjunctionSliceSearcher) Advance(ctx *search.SearchContext,
ID index.IndexInternalID) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
// get all searchers pointing at their first match
var err error
for i, searcher := range s.searchers {
if s.currs[i] != nil {
if s.currs[i].IndexInternalID.Compare(ID) >= 0 {
continue
}
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = searcher.Advance(ctx, ID)
if err != nil {
return nil, err
}
}
err = s.updateMatches()
if err != nil {
return nil, err
}
return s.Next(ctx)
}
func (s *DisjunctionSliceSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
for _, searcher := range s.searchers {
sum += searcher.Count()
}
return sum
}
func (s *DisjunctionSliceSearcher) Close() (rv error) {
for _, searcher := range s.searchers {
err := searcher.Close()
if err != nil && rv == nil {
rv = err
}
}
return rv
}
func (s *DisjunctionSliceSearcher) Min() int {
return s.min
}
func (s *DisjunctionSliceSearcher) DocumentMatchPoolSize() int {
rv := len(s.currs)
for _, s := range s.searchers {
rv += s.DocumentMatchPoolSize()
}
return rv
}
// a disjunction searcher implements the index.Optimizable interface
// but only activates on an edge case where the disjunction is a
// wrapper around a single Optimizable child searcher
func (s *DisjunctionSliceSearcher) Optimize(kind string, octx index.OptimizableContext) (
index.OptimizableContext, error) {
if len(s.searchers) == 1 {
o, ok := s.searchers[0].(index.Optimizable)
if ok {
return o.Optimize(kind, octx)
}
}
return octx, nil
}

View file

@ -15,11 +15,21 @@
package searcher
import (
"reflect"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeDocIDSearcher int
func init() {
var ds DocIDSearcher
reflectStaticSizeDocIDSearcher = int(reflect.TypeOf(ds).Size())
}
// DocIDSearcher returns documents matching a predefined set of identifiers.
type DocIDSearcher struct {
reader index.DocIDReader
@ -42,6 +52,12 @@ func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64
}, nil
}
func (s *DocIDSearcher) Size() int {
return reflectStaticSizeDocIDSearcher + size.SizeOfPtr +
s.reader.Size() +
s.scorer.Size()
}
func (s *DocIDSearcher) Count() uint64 {
return uint64(s.count)
}

View file

@ -15,10 +15,20 @@
package searcher
import (
"reflect"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeFilteringSearcher int
func init() {
var fs FilteringSearcher
reflectStaticSizeFilteringSearcher = int(reflect.TypeOf(fs).Size())
}
// FilterFunc defines a function which can filter documents
// returning true means keep the document
// returning false means do not keep the document
@ -38,6 +48,11 @@ func NewFilteringSearcher(s search.Searcher, filter FilterFunc) *FilteringSearch
}
}
func (f *FilteringSearcher) Size() int {
return reflectStaticSizeFilteringSearcher + size.SizeOfPtr +
f.child.Size()
}
func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
next, err := f.child.Next(ctx)
for next != nil && err == nil {

View file

@ -15,13 +15,22 @@
package searcher
import (
"fmt"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
var MaxFuzziness = 2
func NewFuzzySearcher(indexReader index.IndexReader, term string,
prefix, fuzziness int, field string, boost float64,
options search.SearcherOptions) (search.Searcher, error) {
if fuzziness > MaxFuzziness {
return nil, fmt.Errorf("fuzziness exceeds max (%d)", MaxFuzziness)
}
// Note: we don't byte slice the term for a prefix because of runes.
prefixTerm := ""
for i, r := range term {
@ -31,7 +40,6 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string,
break
}
}
candidateTerms, err := findFuzzyCandidateTerms(indexReader, term, fuzziness,
field, prefixTerm)
if err != nil {
@ -49,6 +57,26 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
if len(prefixTerm) > 0 {
fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
} else {
// in case of advanced reader implementations directly call
// the levenshtein automaton based iterator to collect the
// candidate terms
if ir, ok := indexReader.(index.IndexReaderFuzzy); ok {
fieldDict, err = ir.FieldDictFuzzy(field, []byte(term), fuzziness)
if err != nil {
return rv, err
}
defer func() {
if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr
}
}()
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
rv = append(rv, tfd.Term)
tfd, err = fieldDict.Next()
}
return rv, err
}
fieldDict, err = indexReader.FieldDict(field)
}
defer func() {
@ -58,9 +86,12 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
}()
// enumerate terms and check levenshtein distance
var reuse []int
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
ld, exceeded := search.LevenshteinDistanceMax(term, tfd.Term, fuzziness)
var ld int
var exceeded bool
ld, exceeded, reuse = search.LevenshteinDistanceMaxReuseSlice(term, tfd.Term, fuzziness, reuse)
if !exceeded && ld <= fuzziness {
rv = append(rv, tfd.Term)
if tooManyClauses(len(rv)) {

View file

@ -24,10 +24,12 @@ import (
func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
centerLat, dist float64, field string, boost float64,
options search.SearcherOptions) (search.Searcher, error) {
// compute bounding box containing the circle
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat :=
geo.ComputeBoundingBox(centerLon, centerLat, dist)
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, err :=
geo.RectFromPointDistance(centerLon, centerLat, dist)
if err != nil {
return nil, err
}
// build a searcher for the box
boxSearcher, err := boxSearcher(indexReader,

View file

@ -15,11 +15,21 @@
package searcher
import (
"reflect"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeMatchAllSearcher int
func init() {
var mas MatchAllSearcher
reflectStaticSizeMatchAllSearcher = int(reflect.TypeOf(mas).Size())
}
type MatchAllSearcher struct {
indexReader index.IndexReader
reader index.DocIDReader
@ -46,6 +56,12 @@ func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, options s
}, nil
}
func (s *MatchAllSearcher) Size() int {
return reflectStaticSizeMatchAllSearcher + size.SizeOfPtr +
s.reader.Size() +
s.scorer.Size()
}
func (s *MatchAllSearcher) Count() uint64 {
return s.count
}

View file

@ -15,10 +15,20 @@
package searcher
import (
"reflect"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeMatchNoneSearcher int
func init() {
var mns MatchNoneSearcher
reflectStaticSizeMatchNoneSearcher = int(reflect.TypeOf(mns).Size())
}
type MatchNoneSearcher struct {
indexReader index.IndexReader
}
@ -29,6 +39,10 @@ func NewMatchNoneSearcher(indexReader index.IndexReader) (*MatchNoneSearcher, er
}, nil
}
func (s *MatchNoneSearcher) Size() int {
return reflectStaticSizeMatchNoneSearcher + size.SizeOfPtr
}
func (s *MatchNoneSearcher) Count() uint64 {
return uint64(0)
}

View file

@ -17,6 +17,7 @@ package searcher
import (
"bytes"
"math"
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric"
@ -55,6 +56,17 @@ func NewNumericRangeSearcher(indexReader index.IndexReader,
// FIXME hard-coded precision, should match field declaration
termRanges := splitInt64Range(minInt64, maxInt64, 4)
terms := termRanges.Enumerate()
if len(terms) < 1 {
// cannot return MatchNoneSearcher because of interaction with
// commit f391b991c20f02681bacd197afc6d8aed444e132
return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options,
true)
}
var err error
terms, err = filterCandidateTerms(indexReader, terms, field)
if err != nil {
return nil, err
}
if tooManyClauses(len(terms)) {
return nil, tooManyClausesErr()
}
@ -63,6 +75,51 @@ func NewNumericRangeSearcher(indexReader index.IndexReader,
true)
}
func filterCandidateTerms(indexReader index.IndexReader,
terms [][]byte, field string) (rv [][]byte, err error) {
if ir, ok := indexReader.(index.IndexReaderOnly); ok {
fieldDict, err := ir.FieldDictOnly(field, terms, false)
if err != nil {
return nil, err
}
// enumerate the terms (no need to check them again)
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
rv = append(rv, []byte(tfd.Term))
tfd, err = fieldDict.Next()
}
if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr
}
return rv, err
}
fieldDict, err := indexReader.FieldDictRange(field, terms[0], terms[len(terms)-1])
if err != nil {
return nil, err
}
// enumerate the terms and check against list of terms
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
termBytes := []byte(tfd.Term)
i := sort.Search(len(terms), func(i int) bool { return bytes.Compare(terms[i], termBytes) >= 0 })
if i < len(terms) && bytes.Compare(terms[i], termBytes) == 0 {
rv = append(rv, terms[i])
}
terms = terms[i:]
tfd, err = fieldDict.Next()
}
if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr
}
return rv, err
}
type termRange struct {
startTerm []byte
endTerm []byte

View file

@ -17,21 +17,52 @@ package searcher
import (
"fmt"
"math"
"reflect"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizePhraseSearcher int
func init() {
var ps PhraseSearcher
reflectStaticSizePhraseSearcher = int(reflect.TypeOf(ps).Size())
}
type PhraseSearcher struct {
indexReader index.IndexReader
mustSearcher *ConjunctionSearcher
queryNorm float64
currMust *search.DocumentMatch
slop int
terms [][]string
path phrasePath
paths []phrasePath
locations []search.Location
initialized bool
}
func (s *PhraseSearcher) Size() int {
sizeInBytes := reflectStaticSizePhraseSearcher + size.SizeOfPtr
if s.mustSearcher != nil {
sizeInBytes += s.mustSearcher.Size()
}
if s.currMust != nil {
sizeInBytes += s.currMust.Size()
}
for _, entry := range s.terms {
sizeInBytes += size.SizeOfSlice
for _, entry1 := range entry {
sizeInBytes += size.SizeOfString + len(entry1)
}
}
return sizeInBytes
}
func NewPhraseSearcher(indexReader index.IndexReader, terms []string, field string, options search.SearcherOptions) (*PhraseSearcher, error) {
// turn flat terms []string into [][]string
mterms := make([][]string, len(terms))
@ -96,7 +127,6 @@ func NewMultiPhraseSearcher(indexReader index.IndexReader, terms [][]string, fie
// build our searcher
rv := PhraseSearcher{
indexReader: indexReader,
mustSearcher: mustSearcher,
terms: terms,
}
@ -133,6 +163,9 @@ func (s *PhraseSearcher) advanceNextMust(ctx *search.SearchContext) error {
var err error
if s.mustSearcher != nil {
if s.currMust != nil {
ctx.DocumentMatchPool.Put(s.currMust)
}
s.currMust, err = s.mustSearcher.Next(ctx)
if err != nil {
return err
@ -182,43 +215,59 @@ func (s *PhraseSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch,
// also satisfies the phase constraints. if so, it returns a DocumentMatch
// for this document, otherwise nil
func (s *PhraseSearcher) checkCurrMustMatch(ctx *search.SearchContext) *search.DocumentMatch {
rvftlm := make(search.FieldTermLocationMap, 0)
freq := 0
s.locations = s.currMust.Complete(s.locations)
locations := s.currMust.Locations
s.currMust.Locations = nil
ftls := s.currMust.FieldTermLocations
// typically we would expect there to only actually be results in
// one field, but we allow for this to not be the case
// but, we note that phrase constraints can only be satisfied within
// a single field, so we can check them each independently
for field, tlm := range s.currMust.Locations {
f, rvtlm := s.checkCurrMustMatchField(ctx, tlm)
if f > 0 {
freq += f
rvftlm[field] = rvtlm
}
for field, tlm := range locations {
ftls = s.checkCurrMustMatchField(ctx, field, tlm, ftls)
}
if freq > 0 {
if len(ftls) > 0 {
// return match
rv := s.currMust
rv.Locations = rvftlm
s.currMust = nil
rv.FieldTermLocations = ftls
return rv
}
return nil
}
// checkCurrMustMatchField is soley concerned with determining if one particular
// field within the currMust DocumentMatch Locations satisfies the phase
// constraints (possibly more than once). if so, the number of times it was
// satisfied, and these locations are returned. otherwise 0 and either
// a nil or empty TermLocationMap
func (s *PhraseSearcher) checkCurrMustMatchField(ctx *search.SearchContext, tlm search.TermLocationMap) (int, search.TermLocationMap) {
paths := findPhrasePaths(0, nil, s.terms, tlm, nil, 0)
rv := make(search.TermLocationMap, len(s.terms))
for _, p := range paths {
p.MergeInto(rv)
// checkCurrMustMatchField is soley concerned with determining if one
// particular field within the currMust DocumentMatch Locations
// satisfies the phase constraints (possibly more than once). if so,
// the matching field term locations are appended to the provided
// slice
func (s *PhraseSearcher) checkCurrMustMatchField(ctx *search.SearchContext,
field string, tlm search.TermLocationMap,
ftls []search.FieldTermLocation) []search.FieldTermLocation {
if s.path == nil {
s.path = make(phrasePath, 0, len(s.terms))
}
return len(paths), rv
s.paths = findPhrasePaths(0, nil, s.terms, tlm, s.path[:0], 0, s.paths[:0])
for _, p := range s.paths {
for _, pp := range p {
ftls = append(ftls, search.FieldTermLocation{
Field: field,
Term: pp.term,
Location: search.Location{
Pos: pp.loc.Pos,
Start: pp.loc.Start,
End: pp.loc.End,
ArrayPositions: pp.loc.ArrayPositions,
},
})
}
}
return ftls
}
type phrasePart struct {
@ -226,7 +275,11 @@ type phrasePart struct {
loc *search.Location
}
type phrasePath []*phrasePart
func (p *phrasePart) String() string {
return fmt.Sprintf("[%s %v]", p.term, p.loc)
}
type phrasePath []phrasePart
func (p phrasePath) MergeInto(in search.TermLocationMap) {
for _, pp := range p {
@ -234,24 +287,51 @@ func (p phrasePath) MergeInto(in search.TermLocationMap) {
}
}
// findPhrasePaths is a function to identify phase matches from a set of known
// term locations. the implementation is recursive, so care must be taken
// with arguments and return values.
func (p phrasePath) String() string {
rv := "["
for i, pp := range p {
if i > 0 {
rv += ", "
}
rv += pp.String()
}
rv += "]"
return rv
}
// findPhrasePaths is a function to identify phase matches from a set
// of known term locations. it recursive so care must be taken with
// arguments and return values.
//
// prev - the previous location, nil on first invocation
// phraseTerms - slice containing the phrase terms themselves
// prevPos - the previous location, 0 on first invocation
// ap - array positions of the first candidate phrase part to
// which further recursive phrase parts must match,
// nil on initial invocation or when there are no array positions
// phraseTerms - slice containing the phrase terms,
// may contain empty string as placeholder (don't care)
// tlm - the Term Location Map containing all relevant term locations
// offset - the offset from the previous that this next term must match
// p - the current path being explored (appended to in recursive calls)
// this is the primary state being built during the traversal
// remainingSlop - amount of sloppiness that's allowed, which is the
// sum of the editDistances from each matching phrase part,
// where 0 means no sloppiness allowed (all editDistances must be 0),
// decremented during recursion
// rv - the final result being appended to by all the recursive calls
//
// returns slice of paths, or nil if invocation did not find any successul paths
func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]string, tlm search.TermLocationMap, p phrasePath, remainingSlop int) []phrasePath {
func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]string,
tlm search.TermLocationMap, p phrasePath, remainingSlop int, rv []phrasePath) []phrasePath {
// no more terms
if len(phraseTerms) < 1 {
return []phrasePath{p}
// snapshot or copy the recursively built phrasePath p and
// append it to the rv, also optimizing by checking if next
// phrasePath item in the rv (which we're about to overwrite)
// is available for reuse
var pcopy phrasePath
if len(rv) < cap(rv) {
pcopy = rv[:len(rv)+1][len(rv)][:0]
}
return append(rv, append(pcopy, p...))
}
car := phraseTerms[0]
@ -264,13 +344,13 @@ func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]s
// if prevPos was 0, don't set it to 1 (as thats not a real abs pos)
nextPos = 0 // don't advance nextPos if prevPos was 0
}
return findPhrasePaths(nextPos, ap, cdr, tlm, p, remainingSlop)
return findPhrasePaths(nextPos, ap, cdr, tlm, p, remainingSlop, rv)
}
var rv []phrasePath
// locations for this term
for _, carTerm := range car {
locations := tlm[carTerm]
LOCATIONS_LOOP:
for _, loc := range locations {
if prevPos != 0 && !loc.ArrayPositions.Equals(ap) {
// if the array positions are wrong, can't match, try next location
@ -283,11 +363,18 @@ func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]s
dist = editDistance(prevPos+1, loc.Pos)
}
// if enough slop reamining, continue recursively
// if enough slop remaining, continue recursively
if prevPos == 0 || (remainingSlop-dist) >= 0 {
// skip if we've already used this term+loc already
for _, ppart := range p {
if ppart.term == carTerm && ppart.loc == loc {
continue LOCATIONS_LOOP
}
}
// this location works, add it to the path (but not for empty term)
px := append(p, &phrasePart{term: carTerm, loc: loc})
rv = append(rv, findPhrasePaths(loc.Pos, loc.ArrayPositions, cdr, tlm, px, remainingSlop-dist)...)
px := append(p, phrasePart{term: carTerm, loc: loc})
rv = findPhrasePaths(loc.Pos, loc.ArrayPositions, cdr, tlm, px, remainingSlop-dist, rv)
}
}
}
@ -309,6 +396,15 @@ func (s *PhraseSearcher) Advance(ctx *search.SearchContext, ID index.IndexIntern
return nil, err
}
}
if s.currMust != nil {
if s.currMust.IndexInternalID.Compare(ID) >= 0 {
return s.Next(ctx)
}
ctx.DocumentMatchPool.Put(s.currMust)
}
if s.currMust == nil {
return nil, nil
}
var err error
s.currMust, err = s.mustSearcher.Advance(ctx, ID)
if err != nil {

View file

@ -29,19 +29,40 @@ import (
func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
prefixTerm, complete := pattern.LiteralPrefix()
var candidateTerms []string
if complete {
// there is no pattern
candidateTerms = []string{prefixTerm}
} else {
var err error
candidateTerms, err = findRegexpCandidateTerms(indexReader, pattern, field,
prefixTerm)
if ir, ok := indexReader.(index.IndexReaderRegexp); ok {
fieldDict, err := ir.FieldDictRegexp(field, []byte(pattern.String()))
if err != nil {
return nil, err
}
defer func() {
if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr
}
}()
// enumerate the terms and check against regexp
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
candidateTerms = append(candidateTerms, tfd.Term)
tfd, err = fieldDict.Next()
}
if err != nil {
return nil, err
}
} else {
prefixTerm, complete := pattern.LiteralPrefix()
if complete {
// there is no pattern
candidateTerms = []string{prefixTerm}
} else {
var err error
candidateTerms, err = findRegexpCandidateTerms(indexReader, pattern, field,
prefixTerm)
if err != nil {
return nil, err
}
}
}
return NewMultiTermSearcher(indexReader, candidateTerms, field, boost,

View file

@ -15,11 +15,21 @@
package searcher
import (
"reflect"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeTermSearcher int
func init() {
var ts TermSearcher
reflectStaticSizeTermSearcher = int(reflect.TypeOf(ts).Size())
}
type TermSearcher struct {
indexReader index.IndexReader
reader index.TermFieldReader
@ -63,6 +73,13 @@ func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field stri
}, nil
}
func (s *TermSearcher) Size() int {
return reflectStaticSizeTermSearcher + size.SizeOfPtr +
s.reader.Size() +
s.tfd.Size() +
s.scorer.Size()
}
func (s *TermSearcher) Count() uint64 {
return s.reader.Count()
}
@ -120,3 +137,13 @@ func (s *TermSearcher) Min() int {
func (s *TermSearcher) DocumentMatchPoolSize() int {
return 1
}
func (s *TermSearcher) Optimize(kind string, octx index.OptimizableContext) (
index.OptimizableContext, error) {
o, ok := s.reader.(index.Optimizable)
if ok {
return o.Optimize(kind, octx)
}
return octx, nil
}

View file

@ -64,6 +64,10 @@ func NewTermRangeSearcher(indexReader index.IndexReader,
if !*inclusiveMin && min != nil && string(min) == terms[0] {
terms = terms[1:]
// check again, as we might have removed only entry
if len(terms) < 1 {
return NewMatchNoneSearcher(indexReader)
}
}
// if our term list included the max, it would be the last item

View file

@ -67,8 +67,8 @@ func ParseSearchSortObj(input map[string]interface{}) (SearchSort, error) {
rvd := &SortGeoDistance{
Field: field,
Desc: descending,
lon: lon,
lat: lat,
Lon: lon,
Lat: lat,
unitMult: 1.0,
}
if distUnit, ok := input["unit"].(string); ok {
@ -251,23 +251,21 @@ func (so SortOrder) Compare(cachedScoring, cachedDesc []bool, i, j *DocumentMatc
}
func (so SortOrder) RequiresScore() bool {
rv := false
for _, soi := range so {
if soi.RequiresScoring() {
rv = true
return true
}
}
return rv
return false
}
func (so SortOrder) RequiresDocID() bool {
rv := false
for _, soi := range so {
if soi.RequiresDocID() {
rv = true
return true
}
}
return rv
return false
}
func (so SortOrder) RequiredFields() []string {
@ -279,7 +277,7 @@ func (so SortOrder) RequiredFields() []string {
}
func (so SortOrder) CacheIsScore() []bool {
var rv []bool
rv := make([]bool, 0, len(so))
for _, soi := range so {
rv = append(rv, soi.RequiresScoring())
}
@ -287,7 +285,7 @@ func (so SortOrder) CacheIsScore() []bool {
}
func (so SortOrder) CacheDescending() []bool {
var rv []bool
rv := make([]bool, 0, len(so))
for _, soi := range so {
rv = append(rv, soi.Descending())
}
@ -486,8 +484,7 @@ func (s *SortField) MarshalJSON() ([]byte, error) {
}
func (s *SortField) Copy() SearchSort {
var rv SortField
rv = *s
rv := *s
return &rv
}
@ -499,7 +496,6 @@ type SortDocID struct {
// UpdateVisitor is a no-op for SortDocID as it's value
// is not dependent on any field terms
func (s *SortDocID) UpdateVisitor(field string, term []byte) {
}
// Value returns the sort value of the DocumentMatch
@ -529,8 +525,7 @@ func (s *SortDocID) MarshalJSON() ([]byte, error) {
}
func (s *SortDocID) Copy() SearchSort {
var rv SortDocID
rv = *s
rv := *s
return &rv
}
@ -542,7 +537,6 @@ type SortScore struct {
// UpdateVisitor is a no-op for SortScore as it's value
// is not dependent on any field terms
func (s *SortScore) UpdateVisitor(field string, term []byte) {
}
// Value returns the sort value of the DocumentMatch
@ -572,8 +566,7 @@ func (s *SortScore) MarshalJSON() ([]byte, error) {
}
func (s *SortScore) Copy() SearchSort {
var rv SortScore
rv = *s
rv := *s
return &rv
}
@ -583,13 +576,12 @@ var maxDistance = string(numeric.MustNewPrefixCodedInt64(math.MaxInt64, 0))
// their distance from the specified point.
func NewSortGeoDistance(field, unit string, lon, lat float64, desc bool) (
*SortGeoDistance, error) {
rv := &SortGeoDistance{
Field: field,
Desc: desc,
Unit: unit,
lon: lon,
lat: lat,
Lon: lon,
Lat: lat,
}
var err error
rv.unitMult, err = geo.ParseDistanceUnit(unit)
@ -608,8 +600,8 @@ type SortGeoDistance struct {
Desc bool
Unit string
values []string
lon float64
lat float64
Lon float64
Lat float64
unitMult float64
}
@ -640,7 +632,7 @@ func (s *SortGeoDistance) Value(i *DocumentMatch) string {
docLon := geo.MortonUnhashLon(uint64(i64))
docLat := geo.MortonUnhashLat(uint64(i64))
dist := geo.Haversin(s.lon, s.lat, docLon, docLat)
dist := geo.Haversin(s.Lon, s.Lat, docLon, docLat)
// dist is returned in km, so convert to m
dist *= 1000
if s.unitMult != 0 {
@ -690,8 +682,8 @@ func (s *SortGeoDistance) MarshalJSON() ([]byte, error) {
"by": "geo_distance",
"field": s.Field,
"location": map[string]interface{}{
"lon": s.lon,
"lat": s.lat,
"lon": s.Lon,
"lat": s.Lat,
},
}
if s.Unit != "" {
@ -705,7 +697,6 @@ func (s *SortGeoDistance) MarshalJSON() ([]byte, error) {
}
func (s *SortGeoDistance) Copy() SearchSort {
var rv SortGeoDistance
rv = *s
rv := *s
return &rv
}

View file

@ -40,3 +40,30 @@ func MergeTermLocationMaps(rv, other TermLocationMap) TermLocationMap {
}
return rv
}
func MergeFieldTermLocations(dest []FieldTermLocation, matches []*DocumentMatch) []FieldTermLocation {
n := len(dest)
for _, dm := range matches {
n += len(dm.FieldTermLocations)
}
if cap(dest) < n {
dest = append(make([]FieldTermLocation, 0, n), dest...)
}
for _, dm := range matches {
for _, ftl := range dm.FieldTermLocations {
dest = append(dest, FieldTermLocation{
Field: ftl.Field,
Term: ftl.Term,
Location: Location{
Pos: ftl.Location.Pos,
Start: ftl.Location.Start,
End: ftl.Location.End,
ArrayPositions: append(ArrayPositions(nil), ftl.Location.ArrayPositions...),
},
})
}
}
return dest
}