Switch to bbolt
This commit is contained in:
parent
360bed00f9
commit
77543e3aed
617 changed files with 68468 additions and 97867 deletions
12
vendor/github.com/blevesearch/bleve/.travis.yml
generated
vendored
12
vendor/github.com/blevesearch/bleve/.travis.yml
generated
vendored
|
@ -3,10 +3,9 @@ sudo: false
|
|||
language: go
|
||||
|
||||
go:
|
||||
- 1.7.x
|
||||
- 1.8.x
|
||||
- 1.9.x
|
||||
- "1.10"
|
||||
- "1.12.x"
|
||||
- "1.13.x"
|
||||
- "1.14.x"
|
||||
|
||||
script:
|
||||
- go get golang.org/x/tools/cmd/cover
|
||||
|
@ -14,9 +13,10 @@ script:
|
|||
- go get github.com/kisielk/errcheck
|
||||
- go get -u github.com/FiloSottile/gvt
|
||||
- gvt restore
|
||||
- go test -v $(go list ./... | grep -v vendor/)
|
||||
- go test -race -v $(go list ./... | grep -v vendor/)
|
||||
- go vet $(go list ./... | grep -v vendor/)
|
||||
- errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/)
|
||||
- go test ./test -v -indexType scorch
|
||||
- errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/);
|
||||
- docs/project-code-coverage.sh
|
||||
- docs/build_children.sh
|
||||
|
||||
|
|
49
vendor/github.com/blevesearch/bleve/analysis/lang/en/stemmer_en_snowball.go
generated
vendored
Normal file
49
vendor/github.com/blevesearch/bleve/analysis/lang/en/stemmer_en_snowball.go
generated
vendored
Normal file
|
@ -0,0 +1,49 @@
|
|||
// Copyright (c) 2020 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package en
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
|
||||
"github.com/blevesearch/snowballstem"
|
||||
"github.com/blevesearch/snowballstem/english"
|
||||
)
|
||||
|
||||
const SnowballStemmerName = "stemmer_en_snowball"
|
||||
|
||||
type EnglishStemmerFilter struct {
|
||||
}
|
||||
|
||||
func NewEnglishStemmerFilter() *EnglishStemmerFilter {
|
||||
return &EnglishStemmerFilter{}
|
||||
}
|
||||
|
||||
func (s *EnglishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
for _, token := range input {
|
||||
env := snowballstem.NewEnv(string(token.Term))
|
||||
english.Stem(env)
|
||||
token.Term = []byte(env.Current())
|
||||
}
|
||||
return input
|
||||
}
|
||||
|
||||
func EnglishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewEnglishStemmerFilter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(SnowballStemmerName, EnglishStemmerFilterConstructor)
|
||||
}
|
4
vendor/github.com/blevesearch/bleve/document/field_text.go
generated
vendored
4
vendor/github.com/blevesearch/bleve/document/field_text.go
generated
vendored
|
@ -86,6 +86,10 @@ func (t *TextField) Analyze() (int, analysis.TokenFrequencies) {
|
|||
return fieldLength, tokenFreqs
|
||||
}
|
||||
|
||||
func (t *TextField) Analyzer() *analysis.Analyzer {
|
||||
return t.analyzer
|
||||
}
|
||||
|
||||
func (t *TextField) Value() []byte {
|
||||
return t.value
|
||||
}
|
||||
|
|
42
vendor/github.com/blevesearch/bleve/geo/geo.go
generated
vendored
42
vendor/github.com/blevesearch/bleve/geo/geo.go
generated
vendored
|
@ -33,10 +33,18 @@ var minLonRad = minLon * degreesToRadian
|
|||
var minLatRad = minLat * degreesToRadian
|
||||
var maxLonRad = maxLon * degreesToRadian
|
||||
var maxLatRad = maxLat * degreesToRadian
|
||||
var geoTolerance = 1E-6
|
||||
var geoTolerance = 1e-6
|
||||
var lonScale = float64((uint64(0x1)<<GeoBits)-1) / 360.0
|
||||
var latScale = float64((uint64(0x1)<<GeoBits)-1) / 180.0
|
||||
|
||||
var geoHashMaxLength = 12
|
||||
|
||||
// Point represents a geo point.
|
||||
type Point struct {
|
||||
Lon float64 `json:"lon"`
|
||||
Lat float64 `json:"lat"`
|
||||
}
|
||||
|
||||
// MortonHash computes the morton hash value for the provided geo point
|
||||
// This point is ordered as lon, lat.
|
||||
func MortonHash(lon, lat float64) uint64 {
|
||||
|
@ -168,3 +176,35 @@ func checkLongitude(longitude float64) error {
|
|||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func BoundingRectangleForPolygon(polygon []Point) (
|
||||
float64, float64, float64, float64, error) {
|
||||
err := checkLongitude(polygon[0].Lon)
|
||||
if err != nil {
|
||||
return 0, 0, 0, 0, err
|
||||
}
|
||||
err = checkLatitude(polygon[0].Lat)
|
||||
if err != nil {
|
||||
return 0, 0, 0, 0, err
|
||||
}
|
||||
maxY, minY := polygon[0].Lat, polygon[0].Lat
|
||||
maxX, minX := polygon[0].Lon, polygon[0].Lon
|
||||
for i := 1; i < len(polygon); i++ {
|
||||
err := checkLongitude(polygon[i].Lon)
|
||||
if err != nil {
|
||||
return 0, 0, 0, 0, err
|
||||
}
|
||||
err = checkLatitude(polygon[i].Lat)
|
||||
if err != nil {
|
||||
return 0, 0, 0, 0, err
|
||||
}
|
||||
|
||||
maxY = math.Max(maxY, polygon[i].Lat)
|
||||
minY = math.Min(minY, polygon[i].Lat)
|
||||
|
||||
maxX = math.Max(maxX, polygon[i].Lon)
|
||||
minX = math.Min(minX, polygon[i].Lon)
|
||||
}
|
||||
|
||||
return minX, maxY, maxX, minY, nil
|
||||
}
|
||||
|
|
111
vendor/github.com/blevesearch/bleve/geo/geohash.go
generated
vendored
Normal file
111
vendor/github.com/blevesearch/bleve/geo/geohash.go
generated
vendored
Normal file
|
@ -0,0 +1,111 @@
|
|||
// Copyright (c) 2019 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
// This implementation is inspired from the geohash-js
|
||||
// ref: https://github.com/davetroy/geohash-js
|
||||
|
||||
package geo
|
||||
|
||||
// encoding encapsulates an encoding defined by a given base32 alphabet.
|
||||
type encoding struct {
|
||||
enc string
|
||||
dec [256]byte
|
||||
}
|
||||
|
||||
// newEncoding constructs a new encoding defined by the given alphabet,
|
||||
// which must be a 32-byte string.
|
||||
func newEncoding(encoder string) *encoding {
|
||||
e := new(encoding)
|
||||
e.enc = encoder
|
||||
for i := 0; i < len(e.dec); i++ {
|
||||
e.dec[i] = 0xff
|
||||
}
|
||||
for i := 0; i < len(encoder); i++ {
|
||||
e.dec[encoder[i]] = byte(i)
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
// base32encoding with the Geohash alphabet.
|
||||
var base32encoding = newEncoding("0123456789bcdefghjkmnpqrstuvwxyz")
|
||||
|
||||
var masks = []uint64{16, 8, 4, 2, 1}
|
||||
|
||||
// DecodeGeoHash decodes the string geohash faster with
|
||||
// higher precision. This api is in experimental phase.
|
||||
func DecodeGeoHash(geoHash string) (float64, float64) {
|
||||
even := true
|
||||
lat := []float64{-90.0, 90.0}
|
||||
lon := []float64{-180.0, 180.0}
|
||||
|
||||
for i := 0; i < len(geoHash); i++ {
|
||||
cd := uint64(base32encoding.dec[geoHash[i]])
|
||||
for j := 0; j < 5; j++ {
|
||||
if even {
|
||||
if cd&masks[j] > 0 {
|
||||
lon[0] = (lon[0] + lon[1]) / 2
|
||||
} else {
|
||||
lon[1] = (lon[0] + lon[1]) / 2
|
||||
}
|
||||
} else {
|
||||
if cd&masks[j] > 0 {
|
||||
lat[0] = (lat[0] + lat[1]) / 2
|
||||
} else {
|
||||
lat[1] = (lat[0] + lat[1]) / 2
|
||||
}
|
||||
}
|
||||
even = !even
|
||||
}
|
||||
}
|
||||
|
||||
return (lat[0] + lat[1]) / 2, (lon[0] + lon[1]) / 2
|
||||
}
|
||||
|
||||
func EncodeGeoHash(lat, lon float64) string {
|
||||
even := true
|
||||
lats := []float64{-90.0, 90.0}
|
||||
lons := []float64{-180.0, 180.0}
|
||||
precision := 12
|
||||
var ch, bit uint64
|
||||
var geoHash string
|
||||
|
||||
for len(geoHash) < precision {
|
||||
if even {
|
||||
mid := (lons[0] + lons[1]) / 2
|
||||
if lon > mid {
|
||||
ch |= masks[bit]
|
||||
lons[0] = mid
|
||||
} else {
|
||||
lons[1] = mid
|
||||
}
|
||||
} else {
|
||||
mid := (lats[0] + lats[1]) / 2
|
||||
if lat > mid {
|
||||
ch |= masks[bit]
|
||||
lats[0] = mid
|
||||
} else {
|
||||
lats[1] = mid
|
||||
}
|
||||
}
|
||||
even = !even
|
||||
if bit < 4 {
|
||||
bit++
|
||||
} else {
|
||||
geoHash += string(base32encoding.enc[ch])
|
||||
ch = 0
|
||||
bit = 0
|
||||
}
|
||||
}
|
||||
|
||||
return geoHash
|
||||
}
|
45
vendor/github.com/blevesearch/bleve/geo/parse.go
generated
vendored
45
vendor/github.com/blevesearch/bleve/geo/parse.go
generated
vendored
|
@ -16,6 +16,7 @@ package geo
|
|||
|
||||
import (
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
|
@ -24,6 +25,8 @@ import (
|
|||
// Container:
|
||||
// slice length 2 (GeoJSON)
|
||||
// first element lon, second element lat
|
||||
// string (coordinates separated by comma, or a geohash)
|
||||
// first element lat, second element lon
|
||||
// map[string]interface{}
|
||||
// exact keys lat and lon or lng
|
||||
// struct
|
||||
|
@ -36,10 +39,14 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
|
|||
var foundLon, foundLat bool
|
||||
|
||||
thingVal := reflect.ValueOf(thing)
|
||||
if !thingVal.IsValid() {
|
||||
return lon, lat, false
|
||||
}
|
||||
|
||||
thingTyp := thingVal.Type()
|
||||
|
||||
// is it a slice
|
||||
if thingVal.IsValid() && thingVal.Kind() == reflect.Slice {
|
||||
if thingVal.Kind() == reflect.Slice {
|
||||
// must be length 2
|
||||
if thingVal.Len() == 2 {
|
||||
first := thingVal.Index(0)
|
||||
|
@ -55,6 +62,37 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
|
|||
}
|
||||
}
|
||||
|
||||
// is it a string
|
||||
if thingVal.Kind() == reflect.String {
|
||||
geoStr := thingVal.Interface().(string)
|
||||
if strings.Contains(geoStr, ",") {
|
||||
// geo point with coordinates split by comma
|
||||
points := strings.Split(geoStr, ",")
|
||||
for i, point := range points {
|
||||
// trim any leading or trailing white spaces
|
||||
points[i] = strings.TrimSpace(point)
|
||||
}
|
||||
if len(points) == 2 {
|
||||
var err error
|
||||
lat, err = strconv.ParseFloat(points[0], 64)
|
||||
if err == nil {
|
||||
foundLat = true
|
||||
}
|
||||
lon, err = strconv.ParseFloat(points[1], 64)
|
||||
if err == nil {
|
||||
foundLon = true
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// geohash
|
||||
if len(geoStr) <= geoHashMaxLength {
|
||||
lat, lon = DecodeGeoHash(geoStr)
|
||||
foundLat = true
|
||||
foundLon = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// is it a map
|
||||
if l, ok := thing.(map[string]interface{}); ok {
|
||||
if lval, ok := l["lon"]; ok {
|
||||
|
@ -68,7 +106,7 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
|
|||
}
|
||||
|
||||
// now try reflection on struct fields
|
||||
if thingVal.IsValid() && thingVal.Kind() == reflect.Struct {
|
||||
if thingVal.Kind() == reflect.Struct {
|
||||
for i := 0; i < thingVal.NumField(); i++ {
|
||||
fieldName := thingTyp.Field(i).Name
|
||||
if strings.HasPrefix(strings.ToLower(fieldName), "lon") {
|
||||
|
@ -113,6 +151,9 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
|
|||
// extract numeric value (if possible) and returns a float64
|
||||
func extractNumericVal(v interface{}) (float64, bool) {
|
||||
val := reflect.ValueOf(v)
|
||||
if !val.IsValid() {
|
||||
return 0, false
|
||||
}
|
||||
typ := val.Type()
|
||||
switch typ.Kind() {
|
||||
case reflect.Float32, reflect.Float64:
|
||||
|
|
25
vendor/github.com/blevesearch/bleve/go.mod
generated
vendored
Normal file
25
vendor/github.com/blevesearch/bleve/go.mod
generated
vendored
Normal file
|
@ -0,0 +1,25 @@
|
|||
module github.com/blevesearch/bleve
|
||||
|
||||
go 1.13
|
||||
|
||||
require (
|
||||
github.com/RoaringBitmap/roaring v0.4.21
|
||||
github.com/blevesearch/blevex v0.0.0-20190916190636-152f0fe5c040
|
||||
github.com/blevesearch/go-porterstemmer v1.0.3
|
||||
github.com/blevesearch/segment v0.9.0
|
||||
github.com/blevesearch/snowballstem v0.9.0
|
||||
github.com/blevesearch/zap/v11 v11.0.7
|
||||
github.com/blevesearch/zap/v12 v12.0.7
|
||||
github.com/couchbase/ghistogram v0.1.0 // indirect
|
||||
github.com/couchbase/moss v0.1.0
|
||||
github.com/couchbase/vellum v1.0.1
|
||||
github.com/golang/protobuf v1.3.2
|
||||
github.com/kljensen/snowball v0.6.0
|
||||
github.com/rcrowley/go-metrics v0.0.0-20190826022208-cac0b30c2563
|
||||
github.com/spf13/cobra v0.0.5
|
||||
github.com/steveyen/gtreap v0.1.0
|
||||
github.com/syndtr/goleveldb v1.0.0
|
||||
github.com/willf/bitset v1.1.10
|
||||
go.etcd.io/bbolt v1.3.4
|
||||
golang.org/x/text v0.3.0
|
||||
)
|
20
vendor/github.com/blevesearch/bleve/index.go
generated
vendored
20
vendor/github.com/blevesearch/bleve/index.go
generated
vendored
|
@ -117,6 +117,26 @@ func (b *Batch) String() string {
|
|||
// be re-used in the future.
|
||||
func (b *Batch) Reset() {
|
||||
b.internal.Reset()
|
||||
b.lastDocSize = 0
|
||||
b.totalSize = 0
|
||||
}
|
||||
|
||||
func (b *Batch) Merge(o *Batch) {
|
||||
if o != nil && o.internal != nil {
|
||||
b.internal.Merge(o.internal)
|
||||
if o.LastDocSize() > 0 {
|
||||
b.lastDocSize = o.LastDocSize()
|
||||
}
|
||||
b.totalSize = uint64(b.internal.TotalDocSize())
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Batch) SetPersistedCallback(f index.BatchCallback) {
|
||||
b.internal.SetPersistedCallback(f)
|
||||
}
|
||||
|
||||
func (b *Batch) PersistedCallback() index.BatchCallback {
|
||||
return b.internal.PersistedCallback()
|
||||
}
|
||||
|
||||
// An Index implements all the indexing and searching
|
||||
|
|
69
vendor/github.com/blevesearch/bleve/index/index.go
generated
vendored
69
vendor/github.com/blevesearch/bleve/index/index.go
generated
vendored
|
@ -98,18 +98,33 @@ type IndexReader interface {
|
|||
Close() error
|
||||
}
|
||||
|
||||
// The Regexp interface defines the subset of the regexp.Regexp API
|
||||
// methods that are used by bleve indexes, allowing callers to pass in
|
||||
// alternate implementations.
|
||||
type Regexp interface {
|
||||
FindStringIndex(s string) (loc []int)
|
||||
|
||||
LiteralPrefix() (prefix string, complete bool)
|
||||
|
||||
String() string
|
||||
}
|
||||
|
||||
type IndexReaderRegexp interface {
|
||||
FieldDictRegexp(field string, regex []byte) (FieldDict, error)
|
||||
FieldDictRegexp(field string, regex string) (FieldDict, error)
|
||||
}
|
||||
|
||||
type IndexReaderFuzzy interface {
|
||||
FieldDictFuzzy(field string, term []byte, fuzziness int) (FieldDict, error)
|
||||
FieldDictFuzzy(field string, term string, fuzziness int, prefix string) (FieldDict, error)
|
||||
}
|
||||
|
||||
type IndexReaderOnly interface {
|
||||
FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error)
|
||||
}
|
||||
|
||||
type IndexReaderContains interface {
|
||||
FieldDictContains(field string) (FieldDictContains, error)
|
||||
}
|
||||
|
||||
// FieldTerms contains the terms used by a document, keyed by field
|
||||
type FieldTerms map[string][]string
|
||||
|
||||
|
@ -219,6 +234,10 @@ type FieldDict interface {
|
|||
Close() error
|
||||
}
|
||||
|
||||
type FieldDictContains interface {
|
||||
Contains(key []byte) (bool, error)
|
||||
}
|
||||
|
||||
// DocIDReader is the interface exposing enumeration of documents identifiers.
|
||||
// Close the reader to release associated resources.
|
||||
type DocIDReader interface {
|
||||
|
@ -237,9 +256,12 @@ type DocIDReader interface {
|
|||
Close() error
|
||||
}
|
||||
|
||||
type BatchCallback func(error)
|
||||
|
||||
type Batch struct {
|
||||
IndexOps map[string]*document.Document
|
||||
InternalOps map[string][]byte
|
||||
IndexOps map[string]*document.Document
|
||||
InternalOps map[string][]byte
|
||||
persistedCallback BatchCallback
|
||||
}
|
||||
|
||||
func NewBatch() *Batch {
|
||||
|
@ -265,6 +287,14 @@ func (b *Batch) DeleteInternal(key []byte) {
|
|||
b.InternalOps[string(key)] = nil
|
||||
}
|
||||
|
||||
func (b *Batch) SetPersistedCallback(f BatchCallback) {
|
||||
b.persistedCallback = f
|
||||
}
|
||||
|
||||
func (b *Batch) PersistedCallback() BatchCallback {
|
||||
return b.persistedCallback
|
||||
}
|
||||
|
||||
func (b *Batch) String() string {
|
||||
rv := fmt.Sprintf("Batch (%d ops, %d internal ops)\n", len(b.IndexOps), len(b.InternalOps))
|
||||
for k, v := range b.IndexOps {
|
||||
|
@ -287,6 +317,27 @@ func (b *Batch) String() string {
|
|||
func (b *Batch) Reset() {
|
||||
b.IndexOps = make(map[string]*document.Document)
|
||||
b.InternalOps = make(map[string][]byte)
|
||||
b.persistedCallback = nil
|
||||
}
|
||||
|
||||
func (b *Batch) Merge(o *Batch) {
|
||||
for k, v := range o.IndexOps {
|
||||
b.IndexOps[k] = v
|
||||
}
|
||||
for k, v := range o.InternalOps {
|
||||
b.InternalOps[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Batch) TotalDocSize() int {
|
||||
var s int
|
||||
for k, v := range b.IndexOps {
|
||||
if v != nil {
|
||||
s += v.Size() + size.SizeOfString
|
||||
}
|
||||
s += len(k)
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// Optimizable represents an optional interface that implementable by
|
||||
|
@ -298,11 +349,19 @@ type Optimizable interface {
|
|||
Optimize(kind string, octx OptimizableContext) (OptimizableContext, error)
|
||||
}
|
||||
|
||||
// Represents a result of optimization -- see the Finish() method.
|
||||
type Optimized interface{}
|
||||
|
||||
type OptimizableContext interface {
|
||||
// Once all the optimzable resources have been provided the same
|
||||
// OptimizableContext instance, the optimization preparations are
|
||||
// finished or completed via the Finish() method.
|
||||
Finish() error
|
||||
//
|
||||
// Depending on the optimization being performed, the Finish()
|
||||
// method might return a non-nil Optimized instance. For example,
|
||||
// the Optimized instance might represent an optimized
|
||||
// TermFieldReader instance.
|
||||
Finish() (Optimized, error)
|
||||
}
|
||||
|
||||
type DocValueReader interface {
|
||||
|
|
2
vendor/github.com/blevesearch/bleve/index/scorch/README.md
generated
vendored
2
vendor/github.com/blevesearch/bleve/index/scorch/README.md
generated
vendored
|
@ -302,7 +302,7 @@ Map local bitsets into global number space (global meaning cross-segment but sti
|
|||
IndexSnapshot already should have mapping something like:
|
||||
0 - Offset 0
|
||||
1 - Offset 3 (because segment 0 had 3 docs)
|
||||
2 - Offset 4 (becuase segment 1 had 1 doc)
|
||||
2 - Offset 4 (because segment 1 had 1 doc)
|
||||
|
||||
This maps to search result bitset:
|
||||
|
||||
|
|
137
vendor/github.com/blevesearch/bleve/index/scorch/introducer.go
generated
vendored
137
vendor/github.com/blevesearch/bleve/index/scorch/introducer.go
generated
vendored
|
@ -19,6 +19,7 @@ import (
|
|||
"sync/atomic"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
)
|
||||
|
||||
|
@ -29,8 +30,9 @@ type segmentIntroduction struct {
|
|||
ids []string
|
||||
internal map[string][]byte
|
||||
|
||||
applied chan error
|
||||
persisted chan error
|
||||
applied chan error
|
||||
persisted chan error
|
||||
persistedCallback index.BatchCallback
|
||||
}
|
||||
|
||||
type persistIntroduction struct {
|
||||
|
@ -74,11 +76,6 @@ OUTER:
|
|||
case persist := <-s.persists:
|
||||
s.introducePersist(persist)
|
||||
|
||||
case revertTo := <-s.revertToSnapshots:
|
||||
err := s.revertToSnapshot(revertTo)
|
||||
if err != nil {
|
||||
continue OUTER
|
||||
}
|
||||
}
|
||||
|
||||
var epochCurr uint64
|
||||
|
@ -107,8 +104,11 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
|
|||
|
||||
s.rootLock.RLock()
|
||||
root := s.root
|
||||
root.AddRef()
|
||||
s.rootLock.RUnlock()
|
||||
|
||||
defer func() { _ = root.DecRef() }()
|
||||
|
||||
nsegs := len(root.segment)
|
||||
|
||||
// prepare new index snapshot
|
||||
|
@ -123,6 +123,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
|
|||
|
||||
// iterate through current segments
|
||||
var running uint64
|
||||
var docsToPersistCount, memSegments, fileSegments uint64
|
||||
for i := range root.segment {
|
||||
// see if optimistic work included this segment
|
||||
delta, ok := next.obsoletes[root.segment[i].id]
|
||||
|
@ -161,8 +162,19 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
|
|||
newSnapshot.offsets = append(newSnapshot.offsets, running)
|
||||
running += newss.segment.Count()
|
||||
}
|
||||
|
||||
if isMemorySegment(root.segment[i]) {
|
||||
docsToPersistCount += root.segment[i].Count()
|
||||
memSegments++
|
||||
} else {
|
||||
fileSegments++
|
||||
}
|
||||
}
|
||||
|
||||
atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
|
||||
atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
|
||||
atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
|
||||
|
||||
// append new segment, if any, to end of the new index snapshot
|
||||
if next.data != nil {
|
||||
newSegmentSnapshot := &SegmentSnapshot{
|
||||
|
@ -197,6 +209,9 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
|
|||
if next.persisted != nil {
|
||||
s.rootPersisted = append(s.rootPersisted, next.persisted)
|
||||
}
|
||||
if next.persistedCallback != nil {
|
||||
s.persistedCallbacks = append(s.persistedCallbacks, next.persistedCallback)
|
||||
}
|
||||
// swap in new index snapshot
|
||||
newSnapshot.epoch = s.nextSnapshotEpoch
|
||||
s.nextSnapshotEpoch++
|
||||
|
@ -221,10 +236,13 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
|
|||
|
||||
s.rootLock.Lock()
|
||||
root := s.root
|
||||
root.AddRef()
|
||||
nextSnapshotEpoch := s.nextSnapshotEpoch
|
||||
s.nextSnapshotEpoch++
|
||||
s.rootLock.Unlock()
|
||||
|
||||
defer func() { _ = root.DecRef() }()
|
||||
|
||||
newIndexSnapshot := &IndexSnapshot{
|
||||
parent: s,
|
||||
epoch: nextSnapshotEpoch,
|
||||
|
@ -235,6 +253,7 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
|
|||
creator: "introducePersist",
|
||||
}
|
||||
|
||||
var docsToPersistCount, memSegments, fileSegments uint64
|
||||
for i, segmentSnapshot := range root.segment {
|
||||
// see if this segment has been replaced
|
||||
if replacement, ok := persist.persisted[segmentSnapshot.id]; ok {
|
||||
|
@ -251,9 +270,17 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
|
|||
// update items persisted incase of a new segment snapshot
|
||||
atomic.AddUint64(&s.stats.TotPersistedItems, newSegmentSnapshot.Count())
|
||||
atomic.AddUint64(&s.stats.TotPersistedSegments, 1)
|
||||
fileSegments++
|
||||
} else {
|
||||
newIndexSnapshot.segment[i] = root.segment[i]
|
||||
newIndexSnapshot.segment[i].segment.AddRef()
|
||||
|
||||
if isMemorySegment(root.segment[i]) {
|
||||
docsToPersistCount += root.segment[i].Count()
|
||||
memSegments++
|
||||
} else {
|
||||
fileSegments++
|
||||
}
|
||||
}
|
||||
newIndexSnapshot.offsets[i] = root.offsets[i]
|
||||
}
|
||||
|
@ -262,6 +289,9 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
|
|||
newIndexSnapshot.internal[k] = v
|
||||
}
|
||||
|
||||
atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
|
||||
atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
|
||||
atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
|
||||
newIndexSnapshot.updateSize()
|
||||
s.rootLock.Lock()
|
||||
rootPrev := s.root
|
||||
|
@ -276,14 +306,19 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
|
|||
close(persist.applied)
|
||||
}
|
||||
|
||||
// The introducer should definitely handle the segmentMerge.notify
|
||||
// channel before exiting the introduceMerge.
|
||||
func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
|
||||
atomic.AddUint64(&s.stats.TotIntroduceMergeBeg, 1)
|
||||
defer atomic.AddUint64(&s.stats.TotIntroduceMergeEnd, 1)
|
||||
|
||||
s.rootLock.RLock()
|
||||
root := s.root
|
||||
root.AddRef()
|
||||
s.rootLock.RUnlock()
|
||||
|
||||
defer func() { _ = root.DecRef() }()
|
||||
|
||||
newSnapshot := &IndexSnapshot{
|
||||
parent: s,
|
||||
internal: root.internal,
|
||||
|
@ -293,7 +328,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
|
|||
|
||||
// iterate through current segments
|
||||
newSegmentDeleted := roaring.NewBitmap()
|
||||
var running uint64
|
||||
var running, docsToPersistCount, memSegments, fileSegments uint64
|
||||
for i := range root.segment {
|
||||
segmentID := root.segment[i].id
|
||||
if segSnapAtMerge, ok := nextMerge.old[segmentID]; ok {
|
||||
|
@ -329,7 +364,15 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
|
|||
root.segment[i].segment.AddRef()
|
||||
newSnapshot.offsets = append(newSnapshot.offsets, running)
|
||||
running += root.segment[i].segment.Count()
|
||||
|
||||
if isMemorySegment(root.segment[i]) {
|
||||
docsToPersistCount += root.segment[i].Count()
|
||||
memSegments++
|
||||
} else {
|
||||
fileSegments++
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// before the newMerge introduction, need to clean the newly
|
||||
|
@ -360,8 +403,20 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
|
|||
})
|
||||
newSnapshot.offsets = append(newSnapshot.offsets, running)
|
||||
atomic.AddUint64(&s.stats.TotIntroducedSegmentsMerge, 1)
|
||||
|
||||
switch nextMerge.new.(type) {
|
||||
case segment.PersistedSegment:
|
||||
fileSegments++
|
||||
default:
|
||||
docsToPersistCount += nextMerge.new.Count() - newSegmentDeleted.GetCardinality()
|
||||
memSegments++
|
||||
}
|
||||
}
|
||||
|
||||
atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
|
||||
atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
|
||||
atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
|
||||
|
||||
newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
|
||||
|
||||
newSnapshot.updateSize()
|
||||
|
@ -384,65 +439,11 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
|
|||
close(nextMerge.notify)
|
||||
}
|
||||
|
||||
func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
|
||||
atomic.AddUint64(&s.stats.TotIntroduceRevertBeg, 1)
|
||||
defer atomic.AddUint64(&s.stats.TotIntroduceRevertEnd, 1)
|
||||
|
||||
if revertTo.snapshot == nil {
|
||||
err := fmt.Errorf("Cannot revert to a nil snapshot")
|
||||
revertTo.applied <- err
|
||||
return err
|
||||
func isMemorySegment(s *SegmentSnapshot) bool {
|
||||
switch s.segment.(type) {
|
||||
case segment.PersistedSegment:
|
||||
return false
|
||||
default:
|
||||
return true
|
||||
}
|
||||
|
||||
// acquire lock
|
||||
s.rootLock.Lock()
|
||||
|
||||
// prepare a new index snapshot, based on next snapshot
|
||||
newSnapshot := &IndexSnapshot{
|
||||
parent: s,
|
||||
segment: make([]*SegmentSnapshot, len(revertTo.snapshot.segment)),
|
||||
offsets: revertTo.snapshot.offsets,
|
||||
internal: revertTo.snapshot.internal,
|
||||
epoch: s.nextSnapshotEpoch,
|
||||
refs: 1,
|
||||
creator: "revertToSnapshot",
|
||||
}
|
||||
s.nextSnapshotEpoch++
|
||||
|
||||
// iterate through segments
|
||||
for i, segmentSnapshot := range revertTo.snapshot.segment {
|
||||
newSnapshot.segment[i] = &SegmentSnapshot{
|
||||
id: segmentSnapshot.id,
|
||||
segment: segmentSnapshot.segment,
|
||||
deleted: segmentSnapshot.deleted,
|
||||
cachedDocs: segmentSnapshot.cachedDocs,
|
||||
creator: segmentSnapshot.creator,
|
||||
}
|
||||
newSnapshot.segment[i].segment.AddRef()
|
||||
|
||||
// remove segment from ineligibleForRemoval map
|
||||
filename := zapFileName(segmentSnapshot.id)
|
||||
delete(s.ineligibleForRemoval, filename)
|
||||
}
|
||||
|
||||
if revertTo.persisted != nil {
|
||||
s.rootPersisted = append(s.rootPersisted, revertTo.persisted)
|
||||
}
|
||||
|
||||
newSnapshot.updateSize()
|
||||
// swap in new snapshot
|
||||
rootPrev := s.root
|
||||
s.root = newSnapshot
|
||||
|
||||
atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch)
|
||||
// release lock
|
||||
s.rootLock.Unlock()
|
||||
|
||||
if rootPrev != nil {
|
||||
_ = rootPrev.DecRef()
|
||||
}
|
||||
|
||||
close(revertTo.applied)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
114
vendor/github.com/blevesearch/bleve/index/scorch/merge.go
generated
vendored
114
vendor/github.com/blevesearch/bleve/index/scorch/merge.go
generated
vendored
|
@ -18,13 +18,13 @@ import (
|
|||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index/scorch/mergeplan"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment/zap"
|
||||
)
|
||||
|
||||
func (s *Scorch) mergerLoop() {
|
||||
|
@ -46,12 +46,12 @@ OUTER:
|
|||
|
||||
default:
|
||||
// check to see if there is a new snapshot to persist
|
||||
s.rootLock.RLock()
|
||||
s.rootLock.Lock()
|
||||
ourSnapshot := s.root
|
||||
ourSnapshot.AddRef()
|
||||
atomic.StoreUint64(&s.iStats.mergeSnapshotSize, uint64(ourSnapshot.Size()))
|
||||
atomic.StoreUint64(&s.iStats.mergeEpoch, ourSnapshot.epoch)
|
||||
s.rootLock.RUnlock()
|
||||
s.rootLock.Unlock()
|
||||
|
||||
if ourSnapshot.epoch != lastEpochMergePlanned {
|
||||
startTime := time.Now()
|
||||
|
@ -60,7 +60,7 @@ OUTER:
|
|||
err := s.planMergeAtSnapshot(ourSnapshot, mergePlannerOptions)
|
||||
if err != nil {
|
||||
atomic.StoreUint64(&s.iStats.mergeEpoch, 0)
|
||||
if err == ErrClosed {
|
||||
if err == segment.ErrClosed {
|
||||
// index has been closed
|
||||
_ = ourSnapshot.DecRef()
|
||||
break OUTER
|
||||
|
@ -130,18 +130,18 @@ func (s *Scorch) parseMergePlannerOptions() (*mergeplan.MergePlanOptions,
|
|||
|
||||
func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
|
||||
options *mergeplan.MergePlanOptions) error {
|
||||
// build list of zap segments in this snapshot
|
||||
var onlyZapSnapshots []mergeplan.Segment
|
||||
// build list of persisted segments in this snapshot
|
||||
var onlyPersistedSnapshots []mergeplan.Segment
|
||||
for _, segmentSnapshot := range ourSnapshot.segment {
|
||||
if _, ok := segmentSnapshot.segment.(*zap.Segment); ok {
|
||||
onlyZapSnapshots = append(onlyZapSnapshots, segmentSnapshot)
|
||||
if _, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
|
||||
onlyPersistedSnapshots = append(onlyPersistedSnapshots, segmentSnapshot)
|
||||
}
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlan, 1)
|
||||
|
||||
// give this list to the planner
|
||||
resultMergePlan, err := mergeplan.Plan(onlyZapSnapshots, options)
|
||||
resultMergePlan, err := mergeplan.Plan(onlyPersistedSnapshots, options)
|
||||
if err != nil {
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanErr, 1)
|
||||
return fmt.Errorf("merge planning err: %v", err)
|
||||
|
@ -151,13 +151,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
|
|||
atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1)
|
||||
return nil
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1)
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks)))
|
||||
|
||||
// process tasks in serial for now
|
||||
var notifications []chan *IndexSnapshot
|
||||
var filenames []string
|
||||
|
||||
for _, task := range resultMergePlan.Tasks {
|
||||
if len(task.Segments) == 0 {
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1)
|
||||
|
@ -168,26 +168,32 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
|
|||
|
||||
oldMap := make(map[uint64]*SegmentSnapshot)
|
||||
newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
|
||||
segmentsToMerge := make([]*zap.Segment, 0, len(task.Segments))
|
||||
segmentsToMerge := make([]segment.Segment, 0, len(task.Segments))
|
||||
docsToDrop := make([]*roaring.Bitmap, 0, len(task.Segments))
|
||||
|
||||
for _, planSegment := range task.Segments {
|
||||
if segSnapshot, ok := planSegment.(*SegmentSnapshot); ok {
|
||||
oldMap[segSnapshot.id] = segSnapshot
|
||||
if zapSeg, ok := segSnapshot.segment.(*zap.Segment); ok {
|
||||
if persistedSeg, ok := segSnapshot.segment.(segment.PersistedSegment); ok {
|
||||
if segSnapshot.LiveSize() == 0 {
|
||||
atomic.AddUint64(&s.stats.TotFileMergeSegmentsEmpty, 1)
|
||||
oldMap[segSnapshot.id] = nil
|
||||
} else {
|
||||
segmentsToMerge = append(segmentsToMerge, zapSeg)
|
||||
segmentsToMerge = append(segmentsToMerge, segSnapshot.segment)
|
||||
docsToDrop = append(docsToDrop, segSnapshot.deleted)
|
||||
}
|
||||
// track the files getting merged for unsetting the
|
||||
// removal ineligibility. This helps to unflip files
|
||||
// even with fast merger, slow persister work flows.
|
||||
path := persistedSeg.Path()
|
||||
filenames = append(filenames,
|
||||
strings.TrimPrefix(path, s.path+string(os.PathSeparator)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var oldNewDocNums map[uint64][]uint64
|
||||
var segment segment.Segment
|
||||
var seg segment.Segment
|
||||
if len(segmentsToMerge) > 0 {
|
||||
filename := zapFileName(newSegmentID)
|
||||
s.markIneligibleForRemoval(filename)
|
||||
|
@ -196,9 +202,9 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
|
|||
fileMergeZapStartTime := time.Now()
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
|
||||
newDocNums, nBytes, err := zap.Merge(segmentsToMerge, docsToDrop, path, DefaultChunkFactor)
|
||||
newDocNums, _, err := s.segPlugin.Merge(segmentsToMerge, docsToDrop, path,
|
||||
s.closeCh, s)
|
||||
atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
|
||||
atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, nBytes)
|
||||
|
||||
fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime))
|
||||
atomic.AddUint64(&s.stats.TotFileMergeZapTime, fileMergeZapTime)
|
||||
|
@ -209,10 +215,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
|
|||
if err != nil {
|
||||
s.unmarkIneligibleForRemoval(filename)
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
|
||||
if err == segment.ErrClosed {
|
||||
return err
|
||||
}
|
||||
return fmt.Errorf("merging failed: %v", err)
|
||||
}
|
||||
|
||||
segment, err = zap.Open(path)
|
||||
seg, err = s.segPlugin.Open(path)
|
||||
if err != nil {
|
||||
s.unmarkIneligibleForRemoval(filename)
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
|
||||
|
@ -230,33 +239,41 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
|
|||
id: newSegmentID,
|
||||
old: oldMap,
|
||||
oldNewDocNums: oldNewDocNums,
|
||||
new: segment,
|
||||
notify: make(chan *IndexSnapshot, 1),
|
||||
new: seg,
|
||||
notify: make(chan *IndexSnapshot),
|
||||
}
|
||||
notifications = append(notifications, sm.notify)
|
||||
|
||||
// give it to the introducer
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
_ = segment.Close()
|
||||
return ErrClosed
|
||||
_ = seg.Close()
|
||||
return segment.ErrClosed
|
||||
case s.merges <- sm:
|
||||
atomic.AddUint64(&s.stats.TotFileMergeIntroductions, 1)
|
||||
}
|
||||
|
||||
introStartTime := time.Now()
|
||||
// it is safe to blockingly wait for the merge introduction
|
||||
// here as the introducer is bound to handle the notify channel.
|
||||
newSnapshot := <-sm.notify
|
||||
introTime := uint64(time.Since(introStartTime))
|
||||
atomic.AddUint64(&s.stats.TotFileMergeZapIntroductionTime, introTime)
|
||||
if atomic.LoadUint64(&s.stats.MaxFileMergeZapIntroductionTime) < introTime {
|
||||
atomic.StoreUint64(&s.stats.MaxFileMergeZapIntroductionTime, introTime)
|
||||
}
|
||||
atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1)
|
||||
if newSnapshot != nil {
|
||||
_ = newSnapshot.DecRef()
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksDone, 1)
|
||||
}
|
||||
|
||||
for _, notification := range notifications {
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
return ErrClosed
|
||||
case newSnapshot := <-notification:
|
||||
atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1)
|
||||
if newSnapshot != nil {
|
||||
_ = newSnapshot.DecRef()
|
||||
}
|
||||
}
|
||||
// once all the newly merged segment introductions are done,
|
||||
// its safe to unflip the removal ineligibility for the replaced
|
||||
// older segments
|
||||
for _, f := range filenames {
|
||||
s.unmarkIneligibleForRemoval(f)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
@ -274,8 +291,8 @@ type segmentMerge struct {
|
|||
// persisted segment, and synchronously introduce that new segment
|
||||
// into the root
|
||||
func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
|
||||
sbs []*zap.SegmentBase, sbsDrops []*roaring.Bitmap, sbsIndexes []int,
|
||||
chunkFactor uint32) (*IndexSnapshot, uint64, error) {
|
||||
sbs []segment.Segment, sbsDrops []*roaring.Bitmap,
|
||||
sbsIndexes []int) (*IndexSnapshot, uint64, error) {
|
||||
atomic.AddUint64(&s.stats.TotMemMergeBeg, 1)
|
||||
|
||||
memMergeZapStartTime := time.Now()
|
||||
|
@ -287,7 +304,7 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
|
|||
path := s.path + string(os.PathSeparator) + filename
|
||||
|
||||
newDocNums, _, err :=
|
||||
zap.MergeSegmentBases(sbs, sbsDrops, path, chunkFactor)
|
||||
s.segPlugin.Merge(sbs, sbsDrops, path, s.closeCh, s)
|
||||
|
||||
atomic.AddUint64(&s.stats.TotMemMergeZapEnd, 1)
|
||||
|
||||
|
@ -302,22 +319,22 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
|
|||
return nil, 0, err
|
||||
}
|
||||
|
||||
segment, err := zap.Open(path)
|
||||
seg, err := s.segPlugin.Open(path)
|
||||
if err != nil {
|
||||
atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
// update persisted stats
|
||||
atomic.AddUint64(&s.stats.TotPersistedItems, segment.Count())
|
||||
atomic.AddUint64(&s.stats.TotPersistedItems, seg.Count())
|
||||
atomic.AddUint64(&s.stats.TotPersistedSegments, 1)
|
||||
|
||||
sm := &segmentMerge{
|
||||
id: newSegmentID,
|
||||
old: make(map[uint64]*SegmentSnapshot),
|
||||
oldNewDocNums: make(map[uint64][]uint64),
|
||||
new: segment,
|
||||
notify: make(chan *IndexSnapshot, 1),
|
||||
new: seg,
|
||||
notify: make(chan *IndexSnapshot),
|
||||
}
|
||||
|
||||
for i, idx := range sbsIndexes {
|
||||
|
@ -328,17 +345,20 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
|
|||
|
||||
select { // send to introducer
|
||||
case <-s.closeCh:
|
||||
_ = segment.DecRef()
|
||||
return nil, 0, ErrClosed
|
||||
_ = seg.DecRef()
|
||||
return nil, 0, segment.ErrClosed
|
||||
case s.merges <- sm:
|
||||
}
|
||||
|
||||
select { // wait for introduction to complete
|
||||
case <-s.closeCh:
|
||||
return nil, 0, ErrClosed
|
||||
case newSnapshot := <-sm.notify:
|
||||
// blockingly wait for the introduction to complete
|
||||
newSnapshot := <-sm.notify
|
||||
if newSnapshot != nil {
|
||||
atomic.AddUint64(&s.stats.TotMemMergeSegments, uint64(len(sbs)))
|
||||
atomic.AddUint64(&s.stats.TotMemMergeDone, 1)
|
||||
return newSnapshot, newSegmentID, nil
|
||||
}
|
||||
return newSnapshot, newSegmentID, nil
|
||||
}
|
||||
|
||||
func (s *Scorch) ReportBytesWritten(bytesWritten uint64) {
|
||||
atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, bytesWritten)
|
||||
}
|
||||
|
|
4
vendor/github.com/blevesearch/bleve/index/scorch/mergeplan/merge_plan.go
generated
vendored
4
vendor/github.com/blevesearch/bleve/index/scorch/mergeplan/merge_plan.go
generated
vendored
|
@ -217,14 +217,14 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
|
|||
if len(roster) > 0 {
|
||||
rosterScore := scoreSegments(roster, o)
|
||||
|
||||
if len(bestRoster) <= 0 || rosterScore < bestRosterScore {
|
||||
if len(bestRoster) == 0 || rosterScore < bestRosterScore {
|
||||
bestRoster = roster
|
||||
bestRosterScore = rosterScore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(bestRoster) <= 0 {
|
||||
if len(bestRoster) == 0 {
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
|
|
347
vendor/github.com/blevesearch/bleve/index/scorch/optimize.go
generated
vendored
347
vendor/github.com/blevesearch/bleve/index/scorch/optimize.go
generated
vendored
|
@ -18,17 +18,37 @@ import (
|
|||
"fmt"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment/zap"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
)
|
||||
|
||||
func (s *IndexSnapshotTermFieldReader) Optimize(kind string, octx index.OptimizableContext) (
|
||||
index.OptimizableContext, error) {
|
||||
if kind != "conjunction" {
|
||||
return octx, nil
|
||||
var OptimizeConjunction = true
|
||||
var OptimizeConjunctionUnadorned = true
|
||||
var OptimizeDisjunctionUnadorned = true
|
||||
|
||||
func (s *IndexSnapshotTermFieldReader) Optimize(kind string,
|
||||
octx index.OptimizableContext) (index.OptimizableContext, error) {
|
||||
if OptimizeConjunction && kind == "conjunction" {
|
||||
return s.optimizeConjunction(octx)
|
||||
}
|
||||
|
||||
if OptimizeConjunctionUnadorned && kind == "conjunction:unadorned" {
|
||||
return s.optimizeConjunctionUnadorned(octx)
|
||||
}
|
||||
|
||||
if OptimizeDisjunctionUnadorned && kind == "disjunction:unadorned" {
|
||||
return s.optimizeDisjunctionUnadorned(octx)
|
||||
}
|
||||
|
||||
return octx, nil
|
||||
}
|
||||
|
||||
var OptimizeDisjunctionUnadornedMinChildCardinality = uint64(256)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
func (s *IndexSnapshotTermFieldReader) optimizeConjunction(
|
||||
octx index.OptimizableContext) (index.OptimizableContext, error) {
|
||||
if octx == nil {
|
||||
octx = &OptimizeTFRConjunction{snapshot: s.snapshot}
|
||||
}
|
||||
|
@ -39,7 +59,7 @@ func (s *IndexSnapshotTermFieldReader) Optimize(kind string, octx index.Optimiza
|
|||
}
|
||||
|
||||
if o.snapshot != s.snapshot {
|
||||
return nil, fmt.Errorf("tried to optimize across different snapshots")
|
||||
return nil, fmt.Errorf("tried to optimize conjunction across different snapshots")
|
||||
}
|
||||
|
||||
o.tfrs = append(o.tfrs, s)
|
||||
|
@ -53,41 +73,324 @@ type OptimizeTFRConjunction struct {
|
|||
tfrs []*IndexSnapshotTermFieldReader
|
||||
}
|
||||
|
||||
func (o *OptimizeTFRConjunction) Finish() error {
|
||||
func (o *OptimizeTFRConjunction) Finish() (index.Optimized, error) {
|
||||
if len(o.tfrs) <= 1 {
|
||||
return nil
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
for i := range o.snapshot.segment {
|
||||
itr0, ok := o.tfrs[0].iterators[i].(*zap.PostingsIterator)
|
||||
if !ok || itr0.ActualBM == nil {
|
||||
itr0, ok := o.tfrs[0].iterators[i].(segment.OptimizablePostingsIterator)
|
||||
if !ok || itr0.ActualBitmap() == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
itr1, ok := o.tfrs[1].iterators[i].(*zap.PostingsIterator)
|
||||
if !ok || itr1.ActualBM == nil {
|
||||
itr1, ok := o.tfrs[1].iterators[i].(segment.OptimizablePostingsIterator)
|
||||
if !ok || itr1.ActualBitmap() == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
bm := roaring.And(itr0.ActualBM, itr1.ActualBM)
|
||||
bm := roaring.And(itr0.ActualBitmap(), itr1.ActualBitmap())
|
||||
|
||||
for _, tfr := range o.tfrs[2:] {
|
||||
itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
|
||||
if !ok || itr.ActualBM == nil {
|
||||
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
|
||||
if !ok || itr.ActualBitmap() == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
bm.And(itr.ActualBM)
|
||||
bm.And(itr.ActualBitmap())
|
||||
}
|
||||
|
||||
// in this conjunction optimization, the postings iterators
|
||||
// will all share the same AND'ed together actual bitmap. The
|
||||
// regular conjunction searcher machinery will still be used,
|
||||
// but the underlying bitmap will be smaller.
|
||||
for _, tfr := range o.tfrs {
|
||||
itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
|
||||
if ok && itr.ActualBM != nil {
|
||||
itr.ActualBM = bm
|
||||
itr.Actual = bm.Iterator()
|
||||
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
|
||||
if ok && itr.ActualBitmap() != nil {
|
||||
itr.ReplaceActual(bm)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
// An "unadorned" conjunction optimization is appropriate when
|
||||
// additional or subsidiary information like freq-norm's and
|
||||
// term-vectors are not required, and instead only the internal-id's
|
||||
// are needed.
|
||||
func (s *IndexSnapshotTermFieldReader) optimizeConjunctionUnadorned(
|
||||
octx index.OptimizableContext) (index.OptimizableContext, error) {
|
||||
if octx == nil {
|
||||
octx = &OptimizeTFRConjunctionUnadorned{snapshot: s.snapshot}
|
||||
}
|
||||
|
||||
o, ok := octx.(*OptimizeTFRConjunctionUnadorned)
|
||||
if !ok {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if o.snapshot != s.snapshot {
|
||||
return nil, fmt.Errorf("tried to optimize unadorned conjunction across different snapshots")
|
||||
}
|
||||
|
||||
o.tfrs = append(o.tfrs, s)
|
||||
|
||||
return o, nil
|
||||
}
|
||||
|
||||
type OptimizeTFRConjunctionUnadorned struct {
|
||||
snapshot *IndexSnapshot
|
||||
|
||||
tfrs []*IndexSnapshotTermFieldReader
|
||||
}
|
||||
|
||||
var OptimizeTFRConjunctionUnadornedTerm = []byte("<conjunction:unadorned>")
|
||||
var OptimizeTFRConjunctionUnadornedField = "*"
|
||||
|
||||
// Finish of an unadorned conjunction optimization will compute a
|
||||
// termFieldReader with an "actual" bitmap that represents the
|
||||
// constituent bitmaps AND'ed together. This termFieldReader cannot
|
||||
// provide any freq-norm or termVector associated information.
|
||||
func (o *OptimizeTFRConjunctionUnadorned) Finish() (rv index.Optimized, err error) {
|
||||
if len(o.tfrs) <= 1 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// We use an artificial term and field because the optimized
|
||||
// termFieldReader can represent multiple terms and fields.
|
||||
oTFR := &IndexSnapshotTermFieldReader{
|
||||
term: OptimizeTFRConjunctionUnadornedTerm,
|
||||
field: OptimizeTFRConjunctionUnadornedField,
|
||||
snapshot: o.snapshot,
|
||||
iterators: make([]segment.PostingsIterator, len(o.snapshot.segment)),
|
||||
segmentOffset: 0,
|
||||
includeFreq: false,
|
||||
includeNorm: false,
|
||||
includeTermVectors: false,
|
||||
}
|
||||
|
||||
var actualBMs []*roaring.Bitmap // Collected from regular posting lists.
|
||||
|
||||
OUTER:
|
||||
for i := range o.snapshot.segment {
|
||||
actualBMs = actualBMs[:0]
|
||||
|
||||
var docNum1HitLast uint64
|
||||
var docNum1HitLastOk bool
|
||||
|
||||
for _, tfr := range o.tfrs {
|
||||
if _, ok := tfr.iterators[i].(*segment.EmptyPostingsIterator); ok {
|
||||
// An empty postings iterator means the entire AND is empty.
|
||||
oTFR.iterators[i] = segment.AnEmptyPostingsIterator
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
|
||||
if !ok {
|
||||
// We only optimize postings iterators that support this operation.
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// If the postings iterator is "1-hit" optimized, then we
|
||||
// can perform several optimizations up-front here.
|
||||
docNum1Hit, ok := itr.DocNum1Hit()
|
||||
if ok {
|
||||
if docNum1HitLastOk && docNum1HitLast != docNum1Hit {
|
||||
// The docNum1Hit doesn't match the previous
|
||||
// docNum1HitLast, so the entire AND is empty.
|
||||
oTFR.iterators[i] = segment.AnEmptyPostingsIterator
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
docNum1HitLast = docNum1Hit
|
||||
docNum1HitLastOk = true
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
if itr.ActualBitmap() == nil {
|
||||
// An empty actual bitmap means the entire AND is empty.
|
||||
oTFR.iterators[i] = segment.AnEmptyPostingsIterator
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
// Collect the actual bitmap for more processing later.
|
||||
actualBMs = append(actualBMs, itr.ActualBitmap())
|
||||
}
|
||||
|
||||
if docNum1HitLastOk {
|
||||
// We reach here if all the 1-hit optimized posting
|
||||
// iterators had the same 1-hit docNum, so we can check if
|
||||
// our collected actual bitmaps also have that docNum.
|
||||
for _, bm := range actualBMs {
|
||||
if !bm.Contains(uint32(docNum1HitLast)) {
|
||||
// The docNum1Hit isn't in one of our actual
|
||||
// bitmaps, so the entire AND is empty.
|
||||
oTFR.iterators[i] = segment.AnEmptyPostingsIterator
|
||||
continue OUTER
|
||||
}
|
||||
}
|
||||
|
||||
// The actual bitmaps and docNum1Hits all contain or have
|
||||
// the same 1-hit docNum, so that's our AND'ed result.
|
||||
oTFR.iterators[i] = segment.NewUnadornedPostingsIteratorFrom1Hit(docNum1HitLast)
|
||||
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
if len(actualBMs) == 0 {
|
||||
// If we've collected no actual bitmaps at this point,
|
||||
// then the entire AND is empty.
|
||||
oTFR.iterators[i] = segment.AnEmptyPostingsIterator
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
if len(actualBMs) == 1 {
|
||||
// If we've only 1 actual bitmap, then that's our result.
|
||||
oTFR.iterators[i] = segment.NewUnadornedPostingsIteratorFromBitmap(actualBMs[0])
|
||||
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
// Else, AND together our collected bitmaps as our result.
|
||||
bm := roaring.And(actualBMs[0], actualBMs[1])
|
||||
|
||||
for _, actualBM := range actualBMs[2:] {
|
||||
bm.And(actualBM)
|
||||
}
|
||||
|
||||
oTFR.iterators[i] = segment.NewUnadornedPostingsIteratorFromBitmap(bm)
|
||||
}
|
||||
|
||||
return oTFR, nil
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
// An "unadorned" disjunction optimization is appropriate when
|
||||
// additional or subsidiary information like freq-norm's and
|
||||
// term-vectors are not required, and instead only the internal-id's
|
||||
// are needed.
|
||||
func (s *IndexSnapshotTermFieldReader) optimizeDisjunctionUnadorned(
|
||||
octx index.OptimizableContext) (index.OptimizableContext, error) {
|
||||
if octx == nil {
|
||||
octx = &OptimizeTFRDisjunctionUnadorned{snapshot: s.snapshot}
|
||||
}
|
||||
|
||||
o, ok := octx.(*OptimizeTFRDisjunctionUnadorned)
|
||||
if !ok {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if o.snapshot != s.snapshot {
|
||||
return nil, fmt.Errorf("tried to optimize unadorned disjunction across different snapshots")
|
||||
}
|
||||
|
||||
o.tfrs = append(o.tfrs, s)
|
||||
|
||||
return o, nil
|
||||
}
|
||||
|
||||
type OptimizeTFRDisjunctionUnadorned struct {
|
||||
snapshot *IndexSnapshot
|
||||
|
||||
tfrs []*IndexSnapshotTermFieldReader
|
||||
}
|
||||
|
||||
var OptimizeTFRDisjunctionUnadornedTerm = []byte("<disjunction:unadorned>")
|
||||
var OptimizeTFRDisjunctionUnadornedField = "*"
|
||||
|
||||
// Finish of an unadorned disjunction optimization will compute a
|
||||
// termFieldReader with an "actual" bitmap that represents the
|
||||
// constituent bitmaps OR'ed together. This termFieldReader cannot
|
||||
// provide any freq-norm or termVector associated information.
|
||||
func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err error) {
|
||||
if len(o.tfrs) <= 1 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
for i := range o.snapshot.segment {
|
||||
var cMax uint64
|
||||
|
||||
for _, tfr := range o.tfrs {
|
||||
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
|
||||
if !ok {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if itr.ActualBitmap() != nil {
|
||||
c := itr.ActualBitmap().GetCardinality()
|
||||
if cMax < c {
|
||||
cMax = c
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Heuristic to skip the optimization if all the constituent
|
||||
// bitmaps are too small, where the processing & resource
|
||||
// overhead to create the OR'ed bitmap outweighs the benefit.
|
||||
if cMax < OptimizeDisjunctionUnadornedMinChildCardinality {
|
||||
return nil, nil
|
||||
}
|
||||
}
|
||||
|
||||
// We use an artificial term and field because the optimized
|
||||
// termFieldReader can represent multiple terms and fields.
|
||||
oTFR := &IndexSnapshotTermFieldReader{
|
||||
term: OptimizeTFRDisjunctionUnadornedTerm,
|
||||
field: OptimizeTFRDisjunctionUnadornedField,
|
||||
snapshot: o.snapshot,
|
||||
iterators: make([]segment.PostingsIterator, len(o.snapshot.segment)),
|
||||
segmentOffset: 0,
|
||||
includeFreq: false,
|
||||
includeNorm: false,
|
||||
includeTermVectors: false,
|
||||
}
|
||||
|
||||
var docNums []uint32 // Collected docNum's from 1-hit posting lists.
|
||||
var actualBMs []*roaring.Bitmap // Collected from regular posting lists.
|
||||
|
||||
for i := range o.snapshot.segment {
|
||||
docNums = docNums[:0]
|
||||
actualBMs = actualBMs[:0]
|
||||
|
||||
for _, tfr := range o.tfrs {
|
||||
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
|
||||
if !ok {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
docNum, ok := itr.DocNum1Hit()
|
||||
if ok {
|
||||
docNums = append(docNums, uint32(docNum))
|
||||
continue
|
||||
}
|
||||
|
||||
if itr.ActualBitmap() != nil {
|
||||
actualBMs = append(actualBMs, itr.ActualBitmap())
|
||||
}
|
||||
}
|
||||
|
||||
var bm *roaring.Bitmap
|
||||
if len(actualBMs) > 2 {
|
||||
bm = roaring.HeapOr(actualBMs...)
|
||||
} else if len(actualBMs) == 2 {
|
||||
bm = roaring.Or(actualBMs[0], actualBMs[1])
|
||||
} else if len(actualBMs) == 1 {
|
||||
bm = actualBMs[0].Clone()
|
||||
}
|
||||
|
||||
if bm == nil {
|
||||
bm = roaring.New()
|
||||
}
|
||||
|
||||
bm.AddMany(docNums)
|
||||
|
||||
oTFR.iterators[i] = segment.NewUnadornedPostingsIteratorFromBitmap(bm)
|
||||
}
|
||||
|
||||
return oTFR, nil
|
||||
}
|
||||
|
|
265
vendor/github.com/blevesearch/bleve/index/scorch/persister.go
generated
vendored
265
vendor/github.com/blevesearch/bleve/index/scorch/persister.go
generated
vendored
|
@ -17,9 +17,11 @@ package scorch
|
|||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
@ -28,23 +30,54 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment/zap"
|
||||
"github.com/boltdb/bolt"
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
var DefaultChunkFactor uint32 = 1024
|
||||
// DefaultPersisterNapTimeMSec is kept to zero as this helps in direct
|
||||
// persistence of segments with the default safe batch option.
|
||||
// If the default safe batch option results in high number of
|
||||
// files on disk, then users may initialise this configuration parameter
|
||||
// with higher values so that the persister will nap a bit within it's
|
||||
// work loop to favour better in-memory merging of segments to result
|
||||
// in fewer segment files on disk. But that may come with an indexing
|
||||
// performance overhead.
|
||||
// Unsafe batch users are advised to override this to higher value
|
||||
// for better performance especially with high data density.
|
||||
var DefaultPersisterNapTimeMSec int = 0 // ms
|
||||
|
||||
// Arbitrary number, need to make it configurable.
|
||||
// Lower values like 10/making persister really slow
|
||||
// doesn't work well as it is creating more files to
|
||||
// persist for in next persist iteration and spikes the # FDs.
|
||||
// Ideal value should let persister also proceed at
|
||||
// an optimum pace so that the merger can skip
|
||||
// many intermediate snapshots.
|
||||
// This needs to be based on empirical data.
|
||||
// TODO - may need to revisit this approach/value.
|
||||
var epochDistance = uint64(5)
|
||||
// DefaultPersisterNapUnderNumFiles helps in controlling the pace of
|
||||
// persister. At times of a slow merger progress with heavy file merging
|
||||
// operations, its better to pace down the persister for letting the merger
|
||||
// to catch up within a range defined by this parameter.
|
||||
// Fewer files on disk (as per the merge plan) would result in keeping the
|
||||
// file handle usage under limit, faster disk merger and a healthier index.
|
||||
// Its been observed that such a loosely sync'ed introducer-persister-merger
|
||||
// trio results in better overall performance.
|
||||
var DefaultPersisterNapUnderNumFiles int = 1000
|
||||
|
||||
var DefaultMemoryPressurePauseThreshold uint64 = math.MaxUint64
|
||||
|
||||
type persisterOptions struct {
|
||||
// PersisterNapTimeMSec controls the wait/delay injected into
|
||||
// persistence workloop to improve the chances for
|
||||
// a healthier and heavier in-memory merging
|
||||
PersisterNapTimeMSec int
|
||||
|
||||
// PersisterNapTimeMSec > 0, and the number of files is less than
|
||||
// PersisterNapUnderNumFiles, then the persister will sleep
|
||||
// PersisterNapTimeMSec amount of time to improve the chances for
|
||||
// a healthier and heavier in-memory merging
|
||||
PersisterNapUnderNumFiles int
|
||||
|
||||
// MemoryPressurePauseThreshold let persister to have a better leeway
|
||||
// for prudently performing the memory merge of segments on a memory
|
||||
// pressure situation. Here the config value is an upper threshold
|
||||
// for the number of paused application threads. The default value would
|
||||
// be a very high number to always favour the merging of memory segments.
|
||||
MemoryPressurePauseThreshold uint64
|
||||
}
|
||||
|
||||
type notificationChan chan struct{}
|
||||
|
||||
|
@ -54,6 +87,16 @@ func (s *Scorch) persisterLoop() {
|
|||
var persistWatchers []*epochWatcher
|
||||
var lastPersistedEpoch, lastMergedEpoch uint64
|
||||
var ew *epochWatcher
|
||||
|
||||
var unpersistedCallbacks []index.BatchCallback
|
||||
|
||||
po, err := s.parsePersisterOptions()
|
||||
if err != nil {
|
||||
s.fireAsyncError(fmt.Errorf("persisterOptions json parsing err: %v", err))
|
||||
s.asyncTasks.Done()
|
||||
return
|
||||
}
|
||||
|
||||
OUTER:
|
||||
for {
|
||||
atomic.AddUint64(&s.stats.TotPersistLoopBeg, 1)
|
||||
|
@ -69,10 +112,11 @@ OUTER:
|
|||
lastMergedEpoch = ew.epoch
|
||||
}
|
||||
lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
|
||||
lastMergedEpoch, persistWatchers)
|
||||
lastMergedEpoch, persistWatchers, po)
|
||||
|
||||
var ourSnapshot *IndexSnapshot
|
||||
var ourPersisted []chan error
|
||||
var ourPersistedCallbacks []index.BatchCallback
|
||||
|
||||
// check to see if there is a new snapshot to persist
|
||||
s.rootLock.Lock()
|
||||
|
@ -81,6 +125,8 @@ OUTER:
|
|||
ourSnapshot.AddRef()
|
||||
ourPersisted = s.rootPersisted
|
||||
s.rootPersisted = nil
|
||||
ourPersistedCallbacks = s.persistedCallbacks
|
||||
s.persistedCallbacks = nil
|
||||
atomic.StoreUint64(&s.iStats.persistSnapshotSize, uint64(ourSnapshot.Size()))
|
||||
atomic.StoreUint64(&s.iStats.persistEpoch, ourSnapshot.epoch)
|
||||
}
|
||||
|
@ -89,7 +135,7 @@ OUTER:
|
|||
if ourSnapshot != nil {
|
||||
startTime := time.Now()
|
||||
|
||||
err := s.persistSnapshot(ourSnapshot)
|
||||
err := s.persistSnapshot(ourSnapshot, po)
|
||||
for _, ch := range ourPersisted {
|
||||
if err != nil {
|
||||
ch <- err
|
||||
|
@ -98,17 +144,34 @@ OUTER:
|
|||
}
|
||||
if err != nil {
|
||||
atomic.StoreUint64(&s.iStats.persistEpoch, 0)
|
||||
if err == ErrClosed {
|
||||
if err == segment.ErrClosed {
|
||||
// index has been closed
|
||||
_ = ourSnapshot.DecRef()
|
||||
break OUTER
|
||||
}
|
||||
|
||||
// save this current snapshot's persistedCallbacks, to invoke during
|
||||
// the retry attempt
|
||||
unpersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
|
||||
|
||||
s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err))
|
||||
_ = ourSnapshot.DecRef()
|
||||
atomic.AddUint64(&s.stats.TotPersistLoopErr, 1)
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
if unpersistedCallbacks != nil {
|
||||
// in the event of this being a retry attempt for persisting a snapshot
|
||||
// that had earlier failed, prepend the persistedCallbacks associated
|
||||
// with earlier segment(s) to the latest persistedCallbacks
|
||||
ourPersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
|
||||
unpersistedCallbacks = nil
|
||||
}
|
||||
|
||||
for i := range ourPersistedCallbacks {
|
||||
ourPersistedCallbacks[i](err)
|
||||
}
|
||||
|
||||
atomic.StoreUint64(&s.stats.LastPersistedEpoch, ourSnapshot.epoch)
|
||||
|
||||
lastPersistedEpoch = ourSnapshot.epoch
|
||||
|
@ -179,15 +242,51 @@ func notifyMergeWatchers(lastPersistedEpoch uint64,
|
|||
return watchersNext
|
||||
}
|
||||
|
||||
func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch uint64,
|
||||
persistWatchers []*epochWatcher) (uint64, []*epochWatcher) {
|
||||
func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64,
|
||||
lastMergedEpoch uint64, persistWatchers []*epochWatcher,
|
||||
po *persisterOptions) (uint64, []*epochWatcher) {
|
||||
|
||||
// first, let the watchers proceed if they lag behind
|
||||
// First, let the watchers proceed if they lag behind
|
||||
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
|
||||
|
||||
// Check the merger lag by counting the segment files on disk,
|
||||
numFilesOnDisk, _, _ := s.diskFileStats(nil)
|
||||
|
||||
// On finding fewer files on disk, persister takes a short pause
|
||||
// for sufficient in-memory segments to pile up for the next
|
||||
// memory merge cum persist loop.
|
||||
if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) &&
|
||||
po.PersisterNapTimeMSec > 0 && s.paused() == 0 {
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
case <-time.After(time.Millisecond * time.Duration(po.PersisterNapTimeMSec)):
|
||||
atomic.AddUint64(&s.stats.TotPersisterNapPauseCompleted, 1)
|
||||
|
||||
case ew := <-s.persisterNotifier:
|
||||
// unblock the merger in meantime
|
||||
persistWatchers = append(persistWatchers, ew)
|
||||
lastMergedEpoch = ew.epoch
|
||||
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
|
||||
atomic.AddUint64(&s.stats.TotPersisterMergerNapBreak, 1)
|
||||
}
|
||||
return lastMergedEpoch, persistWatchers
|
||||
}
|
||||
|
||||
// Finding too many files on disk could be due to two reasons.
|
||||
// 1. Too many older snapshots awaiting the clean up.
|
||||
// 2. The merger could be lagging behind on merging the disk files.
|
||||
if numFilesOnDisk > uint64(po.PersisterNapUnderNumFiles) {
|
||||
s.removeOldData()
|
||||
numFilesOnDisk, _, _ = s.diskFileStats(nil)
|
||||
}
|
||||
|
||||
// Persister pause until the merger catches up to reduce the segment
|
||||
// file count under the threshold.
|
||||
// But if there is memory pressure, then skip this sleep maneuvers.
|
||||
OUTER:
|
||||
// check for slow merger and await until the merger catch up
|
||||
for lastPersistedEpoch > lastMergedEpoch+epochDistance {
|
||||
for po.PersisterNapUnderNumFiles > 0 &&
|
||||
numFilesOnDisk >= uint64(po.PersisterNapUnderNumFiles) &&
|
||||
lastMergedEpoch < lastPersistedEpoch {
|
||||
atomic.AddUint64(&s.stats.TotPersisterSlowMergerPause, 1)
|
||||
|
||||
select {
|
||||
|
@ -202,18 +301,46 @@ OUTER:
|
|||
|
||||
// let the watchers proceed if they lag behind
|
||||
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
|
||||
|
||||
numFilesOnDisk, _, _ = s.diskFileStats(nil)
|
||||
}
|
||||
|
||||
return lastMergedEpoch, persistWatchers
|
||||
}
|
||||
|
||||
func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
|
||||
persisted, err := s.persistSnapshotMaybeMerge(snapshot)
|
||||
if err != nil {
|
||||
return err
|
||||
func (s *Scorch) parsePersisterOptions() (*persisterOptions, error) {
|
||||
po := persisterOptions{
|
||||
PersisterNapTimeMSec: DefaultPersisterNapTimeMSec,
|
||||
PersisterNapUnderNumFiles: DefaultPersisterNapUnderNumFiles,
|
||||
MemoryPressurePauseThreshold: DefaultMemoryPressurePauseThreshold,
|
||||
}
|
||||
if persisted {
|
||||
return nil
|
||||
if v, ok := s.config["scorchPersisterOptions"]; ok {
|
||||
b, err := json.Marshal(v)
|
||||
if err != nil {
|
||||
return &po, err
|
||||
}
|
||||
|
||||
err = json.Unmarshal(b, &po)
|
||||
if err != nil {
|
||||
return &po, err
|
||||
}
|
||||
}
|
||||
return &po, nil
|
||||
}
|
||||
|
||||
func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot,
|
||||
po *persisterOptions) error {
|
||||
// Perform in-memory segment merging only when the memory pressure is
|
||||
// below the configured threshold, else the persister performs the
|
||||
// direct persistence of segments.
|
||||
if s.paused() < po.MemoryPressurePauseThreshold {
|
||||
persisted, err := s.persistSnapshotMaybeMerge(snapshot)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if persisted {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
return s.persistSnapshotDirect(snapshot)
|
||||
|
@ -230,13 +357,13 @@ var DefaultMinSegmentsForInMemoryMerge = 2
|
|||
func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) (
|
||||
bool, error) {
|
||||
// collect the in-memory zap segments (SegmentBase instances)
|
||||
var sbs []*zap.SegmentBase
|
||||
var sbs []segment.Segment
|
||||
var sbsDrops []*roaring.Bitmap
|
||||
var sbsIndexes []int
|
||||
|
||||
for i, segmentSnapshot := range snapshot.segment {
|
||||
if sb, ok := segmentSnapshot.segment.(*zap.SegmentBase); ok {
|
||||
sbs = append(sbs, sb)
|
||||
if _, ok := segmentSnapshot.segment.(segment.PersistedSegment); !ok {
|
||||
sbs = append(sbs, segmentSnapshot.segment)
|
||||
sbsDrops = append(sbsDrops, segmentSnapshot.deleted)
|
||||
sbsIndexes = append(sbsIndexes, i)
|
||||
}
|
||||
|
@ -247,7 +374,7 @@ func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) (
|
|||
}
|
||||
|
||||
newSnapshot, newSegmentID, err := s.mergeSegmentBases(
|
||||
snapshot, sbs, sbsDrops, sbsIndexes, DefaultChunkFactor)
|
||||
snapshot, sbs, sbsDrops, sbsIndexes)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
@ -329,13 +456,13 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = metaBucket.Put([]byte("type"), []byte(zap.Type))
|
||||
err = metaBucket.Put(boltMetaDataSegmentTypeKey, []byte(s.segPlugin.Type()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
buf := make([]byte, binary.MaxVarintLen32)
|
||||
binary.BigEndian.PutUint32(buf, zap.Version)
|
||||
err = metaBucket.Put([]byte("version"), buf)
|
||||
binary.BigEndian.PutUint32(buf, s.segPlugin.Version())
|
||||
err = metaBucket.Put(boltMetaDataSegmentVersionKey, buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -364,11 +491,19 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
|
|||
return err
|
||||
}
|
||||
switch seg := segmentSnapshot.segment.(type) {
|
||||
case *zap.SegmentBase:
|
||||
case segment.PersistedSegment:
|
||||
path := seg.Path()
|
||||
filename := strings.TrimPrefix(path, s.path+string(os.PathSeparator))
|
||||
err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
filenames = append(filenames, filename)
|
||||
case segment.UnpersistedSegment:
|
||||
// need to persist this to disk
|
||||
filename := zapFileName(segmentSnapshot.id)
|
||||
path := s.path + string(os.PathSeparator) + filename
|
||||
err = zap.PersistSegmentBase(seg, path)
|
||||
err = seg.Persist(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error persisting segment: %v", err)
|
||||
}
|
||||
|
@ -378,14 +513,7 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
|
|||
return err
|
||||
}
|
||||
filenames = append(filenames, filename)
|
||||
case *zap.Segment:
|
||||
path := seg.Path()
|
||||
filename := strings.TrimPrefix(path, s.path+string(os.PathSeparator))
|
||||
err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
filenames = append(filenames, filename)
|
||||
|
||||
default:
|
||||
return fmt.Errorf("unknown segment type: %T", seg)
|
||||
}
|
||||
|
@ -423,7 +551,7 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
|
|||
}
|
||||
}()
|
||||
for segmentID, path := range newSegmentPaths {
|
||||
newSegments[segmentID], err = zap.Open(path)
|
||||
newSegments[segmentID], err = s.segPlugin.Open(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening new segment at %s, %v", path, err)
|
||||
}
|
||||
|
@ -436,15 +564,13 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
|
|||
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
err = ErrClosed
|
||||
return err
|
||||
return segment.ErrClosed
|
||||
case s.persists <- persist:
|
||||
}
|
||||
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
err = ErrClosed
|
||||
return err
|
||||
return segment.ErrClosed
|
||||
case <-persist.applied:
|
||||
}
|
||||
}
|
||||
|
@ -481,6 +607,8 @@ var boltPathKey = []byte{'p'}
|
|||
var boltDeletedKey = []byte{'d'}
|
||||
var boltInternalKey = []byte{'i'}
|
||||
var boltMetaDataKey = []byte{'m'}
|
||||
var boltMetaDataSegmentTypeKey = []byte("type")
|
||||
var boltMetaDataSegmentVersionKey = []byte("version")
|
||||
|
||||
func (s *Scorch) loadFromBolt() error {
|
||||
return s.rootBolt.View(func(tx *bolt.Tx) error {
|
||||
|
@ -521,11 +649,14 @@ func (s *Scorch) loadFromBolt() error {
|
|||
s.nextSegmentID++
|
||||
s.rootLock.Lock()
|
||||
s.nextSnapshotEpoch = snapshotEpoch + 1
|
||||
if s.root != nil {
|
||||
_ = s.root.DecRef()
|
||||
}
|
||||
rootPrev := s.root
|
||||
s.root = indexSnapshot
|
||||
s.rootLock.Unlock()
|
||||
|
||||
if rootPrev != nil {
|
||||
_ = rootPrev.DecRef()
|
||||
}
|
||||
|
||||
foundRoot = true
|
||||
}
|
||||
return nil
|
||||
|
@ -562,6 +693,23 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
|
|||
refs: 1,
|
||||
creator: "loadSnapshot",
|
||||
}
|
||||
// first we look for the meta-data bucket, this will tell us
|
||||
// which segment type/version was used for this snapshot
|
||||
// all operations for this scorch will use this type/version
|
||||
metaBucket := snapshot.Bucket(boltMetaDataKey)
|
||||
if metaBucket == nil {
|
||||
_ = rv.DecRef()
|
||||
return nil, fmt.Errorf("meta-data bucket missing")
|
||||
}
|
||||
segmentType := string(metaBucket.Get(boltMetaDataSegmentTypeKey))
|
||||
segmentVersion := binary.BigEndian.Uint32(
|
||||
metaBucket.Get(boltMetaDataSegmentVersionKey))
|
||||
err := s.loadSegmentPlugin(segmentType, segmentVersion)
|
||||
if err != nil {
|
||||
_ = rv.DecRef()
|
||||
return nil, fmt.Errorf(
|
||||
"unable to load correct segment wrapper: %v", err)
|
||||
}
|
||||
var running uint64
|
||||
c := snapshot.Cursor()
|
||||
for k, _ := c.First(); k != nil; k, _ = c.Next() {
|
||||
|
@ -606,7 +754,7 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
|
|||
return nil, fmt.Errorf("segment path missing")
|
||||
}
|
||||
segmentPath := s.path + string(os.PathSeparator) + string(pathBytes)
|
||||
segment, err := zap.Open(segmentPath)
|
||||
segment, err := s.segPlugin.Open(segmentPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error opening bolt segment: %v", err)
|
||||
}
|
||||
|
@ -643,12 +791,11 @@ func (s *Scorch) removeOldData() {
|
|||
if err != nil {
|
||||
s.fireAsyncError(fmt.Errorf("got err removing old bolt snapshots: %v", err))
|
||||
}
|
||||
atomic.AddUint64(&s.stats.TotSnapshotsRemovedFromMetaStore, uint64(removed))
|
||||
|
||||
if removed > 0 {
|
||||
err = s.removeOldZapFiles()
|
||||
if err != nil {
|
||||
s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err))
|
||||
}
|
||||
err = s.removeOldZapFiles()
|
||||
if err != nil {
|
||||
s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -690,7 +837,7 @@ func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) {
|
|||
s.eligibleForRemoval = newEligible
|
||||
s.rootLock.Unlock()
|
||||
|
||||
if len(epochsToRemove) <= 0 {
|
||||
if len(epochsToRemove) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
|
|
165
vendor/github.com/blevesearch/bleve/index/scorch/scorch.go
generated
vendored
165
vendor/github.com/blevesearch/bleve/index/scorch/scorch.go
generated
vendored
|
@ -28,10 +28,9 @@ import (
|
|||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment/zap"
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
"github.com/boltdb/bolt"
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
const Name = "scorch"
|
||||
|
@ -41,12 +40,14 @@ const Version uint8 = 2
|
|||
var ErrClosed = fmt.Errorf("scorch closed")
|
||||
|
||||
type Scorch struct {
|
||||
nextSegmentID uint64
|
||||
stats Stats
|
||||
iStats internalStats
|
||||
|
||||
readOnly bool
|
||||
version uint8
|
||||
config map[string]interface{}
|
||||
analysisQueue *index.AnalysisQueue
|
||||
stats Stats
|
||||
nextSegmentID uint64
|
||||
path string
|
||||
|
||||
unsafeBatch bool
|
||||
|
@ -54,6 +55,7 @@ type Scorch struct {
|
|||
rootLock sync.RWMutex
|
||||
root *IndexSnapshot // holds 1 ref-count on the root
|
||||
rootPersisted []chan error // closed when root is persisted
|
||||
persistedCallbacks []index.BatchCallback
|
||||
nextSnapshotEpoch uint64
|
||||
eligibleForRemoval []uint64 // Index snapshot epochs that are safe to GC.
|
||||
ineligibleForRemoval map[string]bool // Filenames that should not be GC'ed yet.
|
||||
|
@ -64,7 +66,6 @@ type Scorch struct {
|
|||
persists chan *persistIntroduction
|
||||
merges chan *segmentMerge
|
||||
introducerNotifier chan *epochWatcher
|
||||
revertToSnapshots chan *snapshotReversion
|
||||
persisterNotifier chan *epochWatcher
|
||||
rootBolt *bolt.DB
|
||||
asyncTasks sync.WaitGroup
|
||||
|
@ -72,7 +73,11 @@ type Scorch struct {
|
|||
onEvent func(event Event)
|
||||
onAsyncError func(err error)
|
||||
|
||||
iStats internalStats
|
||||
pauseLock sync.RWMutex
|
||||
|
||||
pauseCount uint64
|
||||
|
||||
segPlugin segment.Plugin
|
||||
}
|
||||
|
||||
type internalStats struct {
|
||||
|
@ -96,7 +101,25 @@ func NewScorch(storeName string,
|
|||
nextSnapshotEpoch: 1,
|
||||
closeCh: make(chan struct{}),
|
||||
ineligibleForRemoval: map[string]bool{},
|
||||
segPlugin: defaultSegmentPlugin,
|
||||
}
|
||||
|
||||
// check if the caller has requested a specific segment type/version
|
||||
forcedSegmentVersion, ok := config["forceSegmentVersion"].(int)
|
||||
if ok {
|
||||
forcedSegmentType, ok2 := config["forceSegmentType"].(string)
|
||||
if !ok2 {
|
||||
return nil, fmt.Errorf(
|
||||
"forceSegmentVersion set to %d, must also specify forceSegmentType", forcedSegmentVersion)
|
||||
}
|
||||
|
||||
err := rv.loadSegmentPlugin(forcedSegmentType,
|
||||
uint32(forcedSegmentVersion))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
rv.root = &IndexSnapshot{parent: rv, refs: 1, creator: "NewScorch"}
|
||||
ro, ok := config["read_only"].(bool)
|
||||
if ok {
|
||||
|
@ -117,9 +140,30 @@ func NewScorch(storeName string,
|
|||
return rv, nil
|
||||
}
|
||||
|
||||
func (s *Scorch) paused() uint64 {
|
||||
s.pauseLock.Lock()
|
||||
pc := s.pauseCount
|
||||
s.pauseLock.Unlock()
|
||||
return pc
|
||||
}
|
||||
|
||||
func (s *Scorch) incrPause() {
|
||||
s.pauseLock.Lock()
|
||||
s.pauseCount++
|
||||
s.pauseLock.Unlock()
|
||||
}
|
||||
|
||||
func (s *Scorch) decrPause() {
|
||||
s.pauseLock.Lock()
|
||||
s.pauseCount--
|
||||
s.pauseLock.Unlock()
|
||||
}
|
||||
|
||||
func (s *Scorch) fireEvent(kind EventKind, dur time.Duration) {
|
||||
if s.onEvent != nil {
|
||||
s.incrPause()
|
||||
s.onEvent(Event{Kind: kind, Scorch: s, Duration: dur})
|
||||
s.decrPause()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -189,12 +233,14 @@ func (s *Scorch) openBolt() error {
|
|||
}
|
||||
}
|
||||
|
||||
atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, uint64(len(s.root.segment)))
|
||||
|
||||
s.introductions = make(chan *segmentIntroduction)
|
||||
s.persists = make(chan *persistIntroduction)
|
||||
s.merges = make(chan *segmentMerge)
|
||||
s.introducerNotifier = make(chan *epochWatcher, 1)
|
||||
s.revertToSnapshots = make(chan *snapshotReversion)
|
||||
s.persisterNotifier = make(chan *epochWatcher, 1)
|
||||
s.closeCh = make(chan struct{})
|
||||
|
||||
if !s.readOnly && s.path != "" {
|
||||
err := s.removeOldZapFiles() // Before persister or merger create any new files.
|
||||
|
@ -235,7 +281,10 @@ func (s *Scorch) Close() (err error) {
|
|||
err = s.rootBolt.Close()
|
||||
s.rootLock.Lock()
|
||||
if s.root != nil {
|
||||
_ = s.root.DecRef()
|
||||
err2 := s.root.DecRef()
|
||||
if err == nil {
|
||||
err = err2
|
||||
}
|
||||
}
|
||||
s.root = nil
|
||||
s.rootLock.Unlock()
|
||||
|
@ -284,15 +333,17 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
|
|||
|
||||
// FIXME could sort ids list concurrent with analysis?
|
||||
|
||||
go func() {
|
||||
for _, doc := range batch.IndexOps {
|
||||
if doc != nil {
|
||||
aw := index.NewAnalysisWork(s, doc, resultChan)
|
||||
// put the work on the queue
|
||||
s.analysisQueue.Queue(aw)
|
||||
if numUpdates > 0 {
|
||||
go func() {
|
||||
for _, doc := range batch.IndexOps {
|
||||
if doc != nil {
|
||||
aw := index.NewAnalysisWork(s, doc, resultChan)
|
||||
// put the work on the queue
|
||||
s.analysisQueue.Queue(aw)
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
}()
|
||||
}
|
||||
|
||||
// wait for analysis result
|
||||
analysisResults := make([]*index.AnalysisResult, int(numUpdates))
|
||||
|
@ -319,7 +370,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
|
|||
var newSegment segment.Segment
|
||||
var bufBytes uint64
|
||||
if len(analysisResults) > 0 {
|
||||
newSegment, bufBytes, err = zap.AnalysisResultsToSegmentBase(analysisResults, DefaultChunkFactor)
|
||||
newSegment, bufBytes, err = s.segPlugin.New(analysisResults)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -328,7 +379,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
|
|||
atomic.AddUint64(&s.stats.TotBatchesEmpty, 1)
|
||||
}
|
||||
|
||||
err = s.prepareSegment(newSegment, ids, batch.InternalOps)
|
||||
err = s.prepareSegment(newSegment, ids, batch.InternalOps, batch.PersistedCallback())
|
||||
if err != nil {
|
||||
if newSegment != nil {
|
||||
_ = newSegment.Close()
|
||||
|
@ -348,16 +399,17 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
|
|||
}
|
||||
|
||||
func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
|
||||
internalOps map[string][]byte) error {
|
||||
internalOps map[string][]byte, persistedCallback index.BatchCallback) error {
|
||||
|
||||
// new introduction
|
||||
introduction := &segmentIntroduction{
|
||||
id: atomic.AddUint64(&s.nextSegmentID, 1),
|
||||
data: newSegment,
|
||||
ids: ids,
|
||||
obsoletes: make(map[uint64]*roaring.Bitmap),
|
||||
internal: internalOps,
|
||||
applied: make(chan error),
|
||||
id: atomic.AddUint64(&s.nextSegmentID, 1),
|
||||
data: newSegment,
|
||||
ids: ids,
|
||||
obsoletes: make(map[uint64]*roaring.Bitmap),
|
||||
internal: internalOps,
|
||||
applied: make(chan error),
|
||||
persistedCallback: persistedCallback,
|
||||
}
|
||||
|
||||
if !s.unsafeBatch {
|
||||
|
@ -370,6 +422,8 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
|
|||
root.AddRef()
|
||||
s.rootLock.RUnlock()
|
||||
|
||||
defer func() { _ = root.DecRef() }()
|
||||
|
||||
for _, seg := range root.segment {
|
||||
delta, err := seg.segment.DocNumbers(ids)
|
||||
if err != nil {
|
||||
|
@ -378,8 +432,6 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
|
|||
introduction.obsoletes[seg.id] = delta
|
||||
}
|
||||
|
||||
_ = root.DecRef()
|
||||
|
||||
introStartTime := time.Now()
|
||||
|
||||
s.introductions <- introduction
|
||||
|
@ -434,24 +486,57 @@ func (s *Scorch) currentSnapshot() *IndexSnapshot {
|
|||
func (s *Scorch) Stats() json.Marshaler {
|
||||
return &s.stats
|
||||
}
|
||||
func (s *Scorch) StatsMap() map[string]interface{} {
|
||||
m := s.stats.ToMap()
|
||||
|
||||
func (s *Scorch) diskFileStats(rootSegmentPaths map[string]struct{}) (uint64,
|
||||
uint64, uint64) {
|
||||
var numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot uint64
|
||||
if s.path != "" {
|
||||
finfos, err := ioutil.ReadDir(s.path)
|
||||
if err == nil {
|
||||
var numFilesOnDisk, numBytesUsedDisk uint64
|
||||
for _, finfo := range finfos {
|
||||
if !finfo.IsDir() {
|
||||
numBytesUsedDisk += uint64(finfo.Size())
|
||||
numFilesOnDisk++
|
||||
if rootSegmentPaths != nil {
|
||||
fname := s.path + string(os.PathSeparator) + finfo.Name()
|
||||
if _, fileAtRoot := rootSegmentPaths[fname]; fileAtRoot {
|
||||
numBytesOnDiskByRoot += uint64(finfo.Size())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m["CurOnDiskBytes"] = numBytesUsedDisk
|
||||
m["CurOnDiskFiles"] = numFilesOnDisk
|
||||
}
|
||||
}
|
||||
// if no root files path given, then consider all disk files.
|
||||
if rootSegmentPaths == nil {
|
||||
return numFilesOnDisk, numBytesUsedDisk, numBytesUsedDisk
|
||||
}
|
||||
|
||||
return numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot
|
||||
}
|
||||
|
||||
func (s *Scorch) rootDiskSegmentsPaths() map[string]struct{} {
|
||||
rv := make(map[string]struct{}, len(s.root.segment))
|
||||
for _, segmentSnapshot := range s.root.segment {
|
||||
if seg, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
|
||||
rv[seg.Path()] = struct{}{}
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s *Scorch) StatsMap() map[string]interface{} {
|
||||
m := s.stats.ToMap()
|
||||
|
||||
s.rootLock.RLock()
|
||||
rootSegPaths := s.rootDiskSegmentsPaths()
|
||||
m["CurFilesIneligibleForRemoval"] = uint64(len(s.ineligibleForRemoval))
|
||||
s.rootLock.RUnlock()
|
||||
|
||||
numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot := s.diskFileStats(rootSegPaths)
|
||||
|
||||
m["CurOnDiskBytes"] = numBytesUsedDisk
|
||||
m["CurOnDiskFiles"] = numFilesOnDisk
|
||||
|
||||
// TODO: consider one day removing these backwards compatible
|
||||
// names for apps using the old names
|
||||
|
@ -466,8 +551,16 @@ func (s *Scorch) StatsMap() map[string]interface{} {
|
|||
m["num_plain_text_bytes_indexed"] = m["TotIndexedPlainTextBytes"]
|
||||
m["num_items_introduced"] = m["TotIntroducedItems"]
|
||||
m["num_items_persisted"] = m["TotPersistedItems"]
|
||||
m["num_bytes_used_disk"] = m["CurOnDiskBytes"]
|
||||
m["num_files_on_disk"] = m["CurOnDiskFiles"]
|
||||
m["num_recs_to_persist"] = m["TotItemsToPersist"]
|
||||
// total disk bytes found in index directory inclusive of older snapshots
|
||||
m["num_bytes_used_disk"] = numBytesUsedDisk
|
||||
// total disk bytes by the latest root index, exclusive of older snapshots
|
||||
m["num_bytes_used_disk_by_root"] = numBytesOnDiskByRoot
|
||||
m["num_files_on_disk"] = numFilesOnDisk
|
||||
m["num_root_memorysegments"] = m["TotMemorySegmentsAtRoot"]
|
||||
m["num_root_filesegments"] = m["TotFileSegmentsAtRoot"]
|
||||
m["num_persister_nap_pause_completed"] = m["TotPersisterNapPauseCompleted"]
|
||||
m["num_persister_nap_merger_break"] = m["TotPersisterMergerNapBreak"]
|
||||
m["total_compaction_written_bytes"] = m["TotFileMergeWrittenBytes"]
|
||||
|
||||
return m
|
||||
|
@ -486,7 +579,7 @@ func (s *Scorch) Analyze(d *document.Document) *index.AnalysisResult {
|
|||
rv.Analyzed[i] = tokenFreqs
|
||||
rv.Length[i] = fieldLength
|
||||
|
||||
if len(d.CompositeFields) > 0 {
|
||||
if len(d.CompositeFields) > 0 && field.Name() != "_id" {
|
||||
// see if any of the composite fields need this
|
||||
for _, compositeField := range d.CompositeFields {
|
||||
compositeField.Compose(field.Name(), fieldLength, tokenFreqs)
|
||||
|
|
23
vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go
generated
vendored
23
vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go
generated
vendored
|
@ -17,6 +17,7 @@ package segment
|
|||
import (
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/couchbase/vellum"
|
||||
)
|
||||
|
||||
type EmptySegment struct{}
|
||||
|
@ -80,12 +81,8 @@ func (e *EmptyDictionary) RangeIterator(start, end string) DictionaryIterator {
|
|||
return &EmptyDictionaryIterator{}
|
||||
}
|
||||
|
||||
func (e *EmptyDictionary) RegexpIterator(start string) DictionaryIterator {
|
||||
return &EmptyDictionaryIterator{}
|
||||
}
|
||||
|
||||
func (e *EmptyDictionary) FuzzyIterator(term string,
|
||||
fuzziness int) DictionaryIterator {
|
||||
func (e *EmptyDictionary) AutomatonIterator(a vellum.Automaton,
|
||||
startKeyInclusive, endKeyExclusive []byte) DictionaryIterator {
|
||||
return &EmptyDictionaryIterator{}
|
||||
}
|
||||
|
||||
|
@ -94,14 +91,18 @@ func (e *EmptyDictionary) OnlyIterator(onlyTerms [][]byte,
|
|||
return &EmptyDictionaryIterator{}
|
||||
}
|
||||
|
||||
func (e *EmptyDictionary) Contains(key []byte) (bool, error) {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
type EmptyDictionaryIterator struct{}
|
||||
|
||||
func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
|
||||
return nil, nil
|
||||
func (e *EmptyDictionaryIterator) Contains(key []byte) (bool, error) {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
type EmptyPostingsList struct{}
|
||||
|
@ -125,6 +126,12 @@ func (e *EmptyPostingsIterator) Next() (Posting, error) {
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (e *EmptyPostingsIterator) Size() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
var AnEmptyPostingsIterator = &EmptyPostingsIterator{}
|
||||
|
|
84
vendor/github.com/blevesearch/bleve/index/scorch/segment/int.go
generated
vendored
84
vendor/github.com/blevesearch/bleve/index/scorch/segment/int.go
generated
vendored
|
@ -19,7 +19,10 @@
|
|||
|
||||
package segment
|
||||
|
||||
import "fmt"
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
const (
|
||||
MaxVarintSize = 9
|
||||
|
@ -92,3 +95,82 @@ func DecodeUvarintAscending(b []byte) ([]byte, uint64, error) {
|
|||
}
|
||||
return b[length:], v, nil
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
|
||||
type MemUvarintReader struct {
|
||||
C int // index of next byte to read from S
|
||||
S []byte
|
||||
}
|
||||
|
||||
func NewMemUvarintReader(s []byte) *MemUvarintReader {
|
||||
return &MemUvarintReader{S: s}
|
||||
}
|
||||
|
||||
// Len returns the number of unread bytes.
|
||||
func (r *MemUvarintReader) Len() int {
|
||||
n := len(r.S) - r.C
|
||||
if n < 0 {
|
||||
return 0
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
var ErrMemUvarintReaderOverflow = errors.New("MemUvarintReader overflow")
|
||||
|
||||
// ReadUvarint reads an encoded uint64. The original code this was
|
||||
// based on is at encoding/binary/ReadUvarint().
|
||||
func (r *MemUvarintReader) ReadUvarint() (uint64, error) {
|
||||
var x uint64
|
||||
var s uint
|
||||
var C = r.C
|
||||
var S = r.S
|
||||
|
||||
for {
|
||||
b := S[C]
|
||||
C++
|
||||
|
||||
if b < 0x80 {
|
||||
r.C = C
|
||||
|
||||
// why 63? The original code had an 'i += 1' loop var and
|
||||
// checked for i > 9 || i == 9 ...; but, we no longer
|
||||
// check for the i var, but instead check here for s,
|
||||
// which is incremented by 7. So, 7*9 == 63.
|
||||
//
|
||||
// why the "extra" >= check? The normal case is that s <
|
||||
// 63, so we check this single >= guard first so that we
|
||||
// hit the normal, nil-error return pathway sooner.
|
||||
if s >= 63 && (s > 63 || s == 63 && b > 1) {
|
||||
return 0, ErrMemUvarintReaderOverflow
|
||||
}
|
||||
|
||||
return x | uint64(b)<<s, nil
|
||||
}
|
||||
|
||||
x |= uint64(b&0x7f) << s
|
||||
s += 7
|
||||
}
|
||||
}
|
||||
|
||||
// SkipUvarint skips ahead one encoded uint64.
|
||||
func (r *MemUvarintReader) SkipUvarint() {
|
||||
for {
|
||||
b := r.S[r.C]
|
||||
r.C++
|
||||
|
||||
if b < 0x80 {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// SkipBytes skips a count number of bytes.
|
||||
func (r *MemUvarintReader) SkipBytes(count int) {
|
||||
r.C = r.C + count
|
||||
}
|
||||
|
||||
func (r *MemUvarintReader) Reset(s []byte) {
|
||||
r.C = 0
|
||||
r.S = s
|
||||
}
|
||||
|
|
58
vendor/github.com/blevesearch/bleve/index/scorch/segment/plugin.go
generated
vendored
Normal file
58
vendor/github.com/blevesearch/bleve/index/scorch/segment/plugin.go
generated
vendored
Normal file
|
@ -0,0 +1,58 @@
|
|||
// Copyright (c) 2020 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package segment
|
||||
|
||||
import (
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
)
|
||||
|
||||
// Plugin represents the essential functions required by a package to plug in
|
||||
// it's segment implementation
|
||||
type Plugin interface {
|
||||
|
||||
// Type is the name for this segment plugin
|
||||
Type() string
|
||||
|
||||
// Version is a numeric value identifying a specific version of this type.
|
||||
// When incompatible changes are made to a particular type of plugin, the
|
||||
// version must be incremented.
|
||||
Version() uint32
|
||||
|
||||
// New takes a set of AnalysisResults and turns them into a new Segment
|
||||
New(results []*index.AnalysisResult) (Segment, uint64, error)
|
||||
|
||||
// Open attempts to open the file at the specified path and
|
||||
// return the corresponding Segment
|
||||
Open(path string) (Segment, error)
|
||||
|
||||
// Merge takes a set of Segments, and creates a new segment on disk at
|
||||
// the specified path.
|
||||
// Drops is a set of bitmaps (one for each segment) indicating which
|
||||
// documents can be dropped from the segments during the merge.
|
||||
// If the closeCh channel is closed, Merge will cease doing work at
|
||||
// the next opportunity, and return an error (closed).
|
||||
// StatsReporter can optionally be provided, in which case progress
|
||||
// made during the merge is reported while operation continues.
|
||||
// Returns:
|
||||
// A slice of new document numbers (one for each input segment),
|
||||
// this allows the caller to know a particular document's new
|
||||
// document number in the newly merged segment.
|
||||
// The number of bytes written to the new segment file.
|
||||
// An error, if any occurred.
|
||||
Merge(segments []Segment, drops []*roaring.Bitmap, path string,
|
||||
closeCh chan struct{}, s StatsReporter) (
|
||||
[][]uint64, uint64, error)
|
||||
}
|
75
vendor/github.com/blevesearch/bleve/index/scorch/segment/regexp.go
generated
vendored
Normal file
75
vendor/github.com/blevesearch/bleve/index/scorch/segment/regexp.go
generated
vendored
Normal file
|
@ -0,0 +1,75 @@
|
|||
// Copyright (c) 2018 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package segment
|
||||
|
||||
import (
|
||||
"regexp/syntax"
|
||||
|
||||
"github.com/couchbase/vellum/regexp"
|
||||
)
|
||||
|
||||
func ParseRegexp(pattern string) (a *regexp.Regexp, prefixBeg, prefixEnd []byte, err error) {
|
||||
// TODO: potential optimization where syntax.Regexp supports a Simplify() API?
|
||||
|
||||
parsed, err := syntax.Parse(pattern, syntax.Perl)
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
|
||||
re, err := regexp.NewParsedWithLimit(pattern, parsed, regexp.DefaultLimit)
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
|
||||
prefix := LiteralPrefix(parsed)
|
||||
if prefix != "" {
|
||||
prefixBeg := []byte(prefix)
|
||||
prefixEnd := IncrementBytes(prefixBeg)
|
||||
return re, prefixBeg, prefixEnd, nil
|
||||
}
|
||||
|
||||
return re, nil, nil, nil
|
||||
}
|
||||
|
||||
// Returns the literal prefix given the parse tree for a regexp
|
||||
func LiteralPrefix(s *syntax.Regexp) string {
|
||||
// traverse the left-most branch in the parse tree as long as the
|
||||
// node represents a concatenation
|
||||
for s != nil && s.Op == syntax.OpConcat {
|
||||
if len(s.Sub) < 1 {
|
||||
return ""
|
||||
}
|
||||
|
||||
s = s.Sub[0]
|
||||
}
|
||||
|
||||
if s.Op == syntax.OpLiteral && (s.Flags&syntax.FoldCase == 0) {
|
||||
return string(s.Rune)
|
||||
}
|
||||
|
||||
return "" // no literal prefix
|
||||
}
|
||||
|
||||
func IncrementBytes(in []byte) []byte {
|
||||
rv := make([]byte, len(in))
|
||||
copy(rv, in)
|
||||
for i := len(rv) - 1; i >= 0; i-- {
|
||||
rv[i] = rv[i] + 1
|
||||
if rv[i] != 0 {
|
||||
return rv // didn't overflow, so stop
|
||||
}
|
||||
}
|
||||
return nil // overflowed
|
||||
}
|
31
vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go
generated
vendored
31
vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go
generated
vendored
|
@ -15,10 +15,15 @@
|
|||
package segment
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/couchbase/vellum"
|
||||
)
|
||||
|
||||
var ErrClosed = fmt.Errorf("index closed")
|
||||
|
||||
// DocumentFieldValueVisitor defines a callback to be visited for each
|
||||
// stored field value. The return value determines if the visitor
|
||||
// should keep going. Returning true continues visiting, false stops.
|
||||
|
@ -45,15 +50,27 @@ type Segment interface {
|
|||
DecRef() error
|
||||
}
|
||||
|
||||
type UnpersistedSegment interface {
|
||||
Segment
|
||||
Persist(path string) error
|
||||
}
|
||||
|
||||
type PersistedSegment interface {
|
||||
Segment
|
||||
Path() string
|
||||
}
|
||||
|
||||
type TermDictionary interface {
|
||||
PostingsList(term []byte, except *roaring.Bitmap, prealloc PostingsList) (PostingsList, error)
|
||||
|
||||
Iterator() DictionaryIterator
|
||||
PrefixIterator(prefix string) DictionaryIterator
|
||||
RangeIterator(start, end string) DictionaryIterator
|
||||
RegexpIterator(regex string) DictionaryIterator
|
||||
FuzzyIterator(term string, fuzziness int) DictionaryIterator
|
||||
AutomatonIterator(a vellum.Automaton,
|
||||
startKeyInclusive, endKeyExclusive []byte) DictionaryIterator
|
||||
OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator
|
||||
|
||||
Contains(key []byte) (bool, error)
|
||||
}
|
||||
|
||||
type DictionaryIterator interface {
|
||||
|
@ -89,6 +106,12 @@ type PostingsIterator interface {
|
|||
Size() int
|
||||
}
|
||||
|
||||
type OptimizablePostingsIterator interface {
|
||||
ActualBitmap() *roaring.Bitmap
|
||||
DocNum1Hit() (uint64, bool)
|
||||
ReplaceActual(*roaring.Bitmap)
|
||||
}
|
||||
|
||||
type Posting interface {
|
||||
Number() uint64
|
||||
|
||||
|
@ -124,3 +147,7 @@ type DocumentFieldTermVisitable interface {
|
|||
|
||||
type DocVisitState interface {
|
||||
}
|
||||
|
||||
type StatsReporter interface {
|
||||
ReportBytesWritten(bytesWritten uint64)
|
||||
}
|
||||
|
|
148
vendor/github.com/blevesearch/bleve/index/scorch/segment/unadorned.go
generated
vendored
Normal file
148
vendor/github.com/blevesearch/bleve/index/scorch/segment/unadorned.go
generated
vendored
Normal file
|
@ -0,0 +1,148 @@
|
|||
// Copyright (c) 2020 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package segment
|
||||
|
||||
import (
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"math"
|
||||
"reflect"
|
||||
)
|
||||
|
||||
var reflectStaticSizeUnadornedPostingsIteratorBitmap int
|
||||
var reflectStaticSizeUnadornedPostingsIterator1Hit int
|
||||
var reflectStaticSizeUnadornedPosting int
|
||||
|
||||
|
||||
func init() {
|
||||
var pib UnadornedPostingsIteratorBitmap
|
||||
reflectStaticSizeUnadornedPostingsIteratorBitmap = int(reflect.TypeOf(pib).Size())
|
||||
var pi1h UnadornedPostingsIterator1Hit
|
||||
reflectStaticSizeUnadornedPostingsIterator1Hit = int(reflect.TypeOf(pi1h).Size())
|
||||
var up UnadornedPosting
|
||||
reflectStaticSizeUnadornedPosting = int(reflect.TypeOf(up).Size())
|
||||
}
|
||||
|
||||
type UnadornedPostingsIteratorBitmap struct{
|
||||
actual roaring.IntPeekable
|
||||
actualBM *roaring.Bitmap
|
||||
}
|
||||
|
||||
func (i *UnadornedPostingsIteratorBitmap) Next() (Posting, error) {
|
||||
return i.nextAtOrAfter(0)
|
||||
}
|
||||
|
||||
func (i *UnadornedPostingsIteratorBitmap) Advance(docNum uint64) (Posting, error) {
|
||||
return i.nextAtOrAfter(docNum)
|
||||
}
|
||||
|
||||
func (i *UnadornedPostingsIteratorBitmap) nextAtOrAfter(atOrAfter uint64) (Posting, error) {
|
||||
docNum, exists := i.nextDocNumAtOrAfter(atOrAfter)
|
||||
if !exists {
|
||||
return nil, nil
|
||||
}
|
||||
return UnadornedPosting(docNum), nil
|
||||
}
|
||||
|
||||
func (i *UnadornedPostingsIteratorBitmap) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool) {
|
||||
if i.actual == nil || !i.actual.HasNext() {
|
||||
return 0, false
|
||||
}
|
||||
i.actual.AdvanceIfNeeded(uint32(atOrAfter))
|
||||
|
||||
if !i.actual.HasNext() {
|
||||
return 0, false // couldn't find anything
|
||||
}
|
||||
|
||||
return uint64(i.actual.Next()), true
|
||||
}
|
||||
|
||||
func (i *UnadornedPostingsIteratorBitmap) Size() int {
|
||||
return reflectStaticSizeUnadornedPostingsIteratorBitmap
|
||||
}
|
||||
|
||||
func NewUnadornedPostingsIteratorFromBitmap(bm *roaring.Bitmap) PostingsIterator {
|
||||
return &UnadornedPostingsIteratorBitmap{
|
||||
actualBM: bm,
|
||||
actual: bm.Iterator(),
|
||||
}
|
||||
}
|
||||
|
||||
const docNum1HitFinished = math.MaxUint64
|
||||
|
||||
type UnadornedPostingsIterator1Hit struct{
|
||||
docNum uint64
|
||||
}
|
||||
|
||||
func (i *UnadornedPostingsIterator1Hit) Next() (Posting, error) {
|
||||
return i.nextAtOrAfter(0)
|
||||
}
|
||||
|
||||
func (i *UnadornedPostingsIterator1Hit) Advance(docNum uint64) (Posting, error) {
|
||||
return i.nextAtOrAfter(docNum)
|
||||
}
|
||||
|
||||
func (i *UnadornedPostingsIterator1Hit) nextAtOrAfter(atOrAfter uint64) (Posting, error) {
|
||||
docNum, exists := i.nextDocNumAtOrAfter(atOrAfter)
|
||||
if !exists {
|
||||
return nil, nil
|
||||
}
|
||||
return UnadornedPosting(docNum), nil
|
||||
}
|
||||
|
||||
func (i *UnadornedPostingsIterator1Hit) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool) {
|
||||
if i.docNum == docNum1HitFinished {
|
||||
return 0, false
|
||||
}
|
||||
if i.docNum < atOrAfter {
|
||||
// advanced past our 1-hit
|
||||
i.docNum = docNum1HitFinished // consume our 1-hit docNum
|
||||
return 0, false
|
||||
}
|
||||
docNum := i.docNum
|
||||
i.docNum = docNum1HitFinished // consume our 1-hit docNum
|
||||
return docNum, true
|
||||
}
|
||||
|
||||
func (i *UnadornedPostingsIterator1Hit) Size() int {
|
||||
return reflectStaticSizeUnadornedPostingsIterator1Hit
|
||||
}
|
||||
|
||||
func NewUnadornedPostingsIteratorFrom1Hit(docNum1Hit uint64) PostingsIterator {
|
||||
return &UnadornedPostingsIterator1Hit{
|
||||
docNum1Hit,
|
||||
}
|
||||
}
|
||||
|
||||
type UnadornedPosting uint64
|
||||
|
||||
func (p UnadornedPosting) Number() uint64 {
|
||||
return uint64(p)
|
||||
}
|
||||
|
||||
func (p UnadornedPosting) Frequency() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (p UnadornedPosting) Norm() float64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (p UnadornedPosting) Locations() []Location {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p UnadornedPosting) Size() int {
|
||||
return reflectStaticSizeUnadornedPosting
|
||||
}
|
77
vendor/github.com/blevesearch/bleve/index/scorch/segment_plugin.go
generated
vendored
Normal file
77
vendor/github.com/blevesearch/bleve/index/scorch/segment_plugin.go
generated
vendored
Normal file
|
@ -0,0 +1,77 @@
|
|||
// Copyright (c) 2019 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
|
||||
zapv11 "github.com/blevesearch/zap/v11"
|
||||
zapv12 "github.com/blevesearch/zap/v12"
|
||||
)
|
||||
|
||||
var supportedSegmentPlugins map[string]map[uint32]segment.Plugin
|
||||
var defaultSegmentPlugin segment.Plugin
|
||||
|
||||
func init() {
|
||||
ResetPlugins()
|
||||
RegisterPlugin(zapv12.Plugin(), false)
|
||||
RegisterPlugin(zapv11.Plugin(), true)
|
||||
}
|
||||
|
||||
func ResetPlugins() {
|
||||
supportedSegmentPlugins = map[string]map[uint32]segment.Plugin{}
|
||||
}
|
||||
|
||||
func RegisterPlugin(plugin segment.Plugin, makeDefault bool) {
|
||||
if _, ok := supportedSegmentPlugins[plugin.Type()]; !ok {
|
||||
supportedSegmentPlugins[plugin.Type()] = map[uint32]segment.Plugin{}
|
||||
}
|
||||
supportedSegmentPlugins[plugin.Type()][plugin.Version()] = plugin
|
||||
if makeDefault {
|
||||
defaultSegmentPlugin = plugin
|
||||
}
|
||||
}
|
||||
|
||||
func SupportedSegmentTypes() (rv []string) {
|
||||
for k := range supportedSegmentPlugins {
|
||||
rv = append(rv, k)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func SupportedSegmentTypeVersions(typ string) (rv []uint32) {
|
||||
for k := range supportedSegmentPlugins[typ] {
|
||||
rv = append(rv, k)
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s *Scorch) loadSegmentPlugin(forcedSegmentType string,
|
||||
forcedSegmentVersion uint32) error {
|
||||
if versions, ok := supportedSegmentPlugins[forcedSegmentType]; ok {
|
||||
if segPlugin, ok := versions[uint32(forcedSegmentVersion)]; ok {
|
||||
s.segPlugin = segPlugin
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf(
|
||||
"unsupported version %d for segment type: %s, supported: %v",
|
||||
forcedSegmentVersion, forcedSegmentType,
|
||||
SupportedSegmentTypeVersions(forcedSegmentType))
|
||||
}
|
||||
return fmt.Errorf("unsupported segment type: %s, supported: %v",
|
||||
forcedSegmentType, SupportedSegmentTypes())
|
||||
}
|
155
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go
generated
vendored
155
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go
generated
vendored
|
@ -27,9 +27,15 @@ import (
|
|||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/couchbase/vellum"
|
||||
lev "github.com/couchbase/vellum/levenshtein"
|
||||
)
|
||||
|
||||
// re usable, threadsafe levenshtein builders
|
||||
var lb1, lb2 *lev.LevenshteinAutomatonBuilder
|
||||
|
||||
type asynchSegmentResult struct {
|
||||
dict segment.TermDictionary
|
||||
dictItr segment.DictionaryIterator
|
||||
|
||||
index int
|
||||
|
@ -45,6 +51,15 @@ var reflectStaticSizeIndexSnapshot int
|
|||
func init() {
|
||||
var is interface{} = IndexSnapshot{}
|
||||
reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size())
|
||||
var err error
|
||||
lb1, err = lev.NewLevenshteinAutomatonBuilder(1, true)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("Levenshtein automaton ed1 builder err: %v", err))
|
||||
}
|
||||
lb2, err = lev.NewLevenshteinAutomatonBuilder(2, true)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("Levenshtein automaton ed2 builder err: %v", err))
|
||||
}
|
||||
}
|
||||
|
||||
type IndexSnapshot struct {
|
||||
|
@ -59,9 +74,8 @@ type IndexSnapshot struct {
|
|||
m sync.Mutex // Protects the fields that follow.
|
||||
refs int64
|
||||
|
||||
m2 sync.Mutex // Protects the fields that follow.
|
||||
fieldTFRs map[string][]*IndexSnapshotTermFieldReader // keyed by field, recycled TFR's
|
||||
fieldDicts map[string][]segment.TermDictionary // keyed by field, recycled dicts
|
||||
m2 sync.Mutex // Protects the fields that follow.
|
||||
fieldTFRs map[string][]*IndexSnapshotTermFieldReader // keyed by field, recycled TFR's
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) Segments() []*SegmentSnapshot {
|
||||
|
@ -113,16 +127,22 @@ func (i *IndexSnapshot) updateSize() {
|
|||
}
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
|
||||
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string,
|
||||
makeItr func(i segment.TermDictionary) segment.DictionaryIterator,
|
||||
randomLookup bool) (*IndexSnapshotFieldDict, error) {
|
||||
|
||||
results := make(chan *asynchSegmentResult)
|
||||
for index, segment := range i.segment {
|
||||
go func(index int, segment *SegmentSnapshot) {
|
||||
dict, err := segment.Dictionary(field)
|
||||
dict, err := segment.segment.Dictionary(field)
|
||||
if err != nil {
|
||||
results <- &asynchSegmentResult{err: err}
|
||||
} else {
|
||||
results <- &asynchSegmentResult{dictItr: makeItr(dict)}
|
||||
if randomLookup {
|
||||
results <- &asynchSegmentResult{dict: dict}
|
||||
} else {
|
||||
results <- &asynchSegmentResult{dictItr: makeItr(dict)}
|
||||
}
|
||||
}
|
||||
}(index, segment)
|
||||
}
|
||||
|
@ -137,14 +157,20 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
|
|||
if asr.err != nil && err == nil {
|
||||
err = asr.err
|
||||
} else {
|
||||
next, err2 := asr.dictItr.Next()
|
||||
if err2 != nil && err == nil {
|
||||
err = err2
|
||||
}
|
||||
if next != nil {
|
||||
if !randomLookup {
|
||||
next, err2 := asr.dictItr.Next()
|
||||
if err2 != nil && err == nil {
|
||||
err = err2
|
||||
}
|
||||
if next != nil {
|
||||
rv.cursors = append(rv.cursors, &segmentDictCursor{
|
||||
itr: asr.dictItr,
|
||||
curr: *next,
|
||||
})
|
||||
}
|
||||
} else {
|
||||
rv.cursors = append(rv.cursors, &segmentDictCursor{
|
||||
itr: asr.dictItr,
|
||||
curr: *next,
|
||||
dict: asr.dict,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -153,8 +179,11 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// prepare heap
|
||||
heap.Init(rv)
|
||||
|
||||
if !randomLookup {
|
||||
// prepare heap
|
||||
heap.Init(rv)
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
@ -162,42 +191,75 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
|
|||
func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) {
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.Iterator()
|
||||
})
|
||||
}, false)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte,
|
||||
endTerm []byte) (index.FieldDict, error) {
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.RangeIterator(string(startTerm), string(endTerm))
|
||||
})
|
||||
}, false)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictPrefix(field string,
|
||||
termPrefix []byte) (index.FieldDict, error) {
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.PrefixIterator(string(termPrefix))
|
||||
})
|
||||
}, false)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictRegexp(field string,
|
||||
termRegex []byte) (index.FieldDict, error) {
|
||||
termRegex string) (index.FieldDict, error) {
|
||||
// TODO: potential optimization where the literal prefix represents the,
|
||||
// entire regexp, allowing us to use PrefixIterator(prefixTerm)?
|
||||
|
||||
a, prefixBeg, prefixEnd, err := segment.ParseRegexp(termRegex)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.RegexpIterator(string(termRegex))
|
||||
})
|
||||
return i.AutomatonIterator(a, prefixBeg, prefixEnd)
|
||||
}, false)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) getLevAutomaton(term string,
|
||||
fuzziness uint8) (vellum.Automaton, error) {
|
||||
if fuzziness == 1 {
|
||||
return lb1.BuildDfa(term, fuzziness)
|
||||
} else if fuzziness == 2 {
|
||||
return lb2.BuildDfa(term, fuzziness)
|
||||
}
|
||||
return nil, fmt.Errorf("fuzziness exceeds the max limit")
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictFuzzy(field string,
|
||||
term []byte, fuzziness int) (index.FieldDict, error) {
|
||||
term string, fuzziness int, prefix string) (index.FieldDict, error) {
|
||||
a, err := i.getLevAutomaton(term, uint8(fuzziness))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var prefixBeg, prefixEnd []byte
|
||||
if prefix != "" {
|
||||
prefixBeg = []byte(prefix)
|
||||
prefixEnd = segment.IncrementBytes(prefixBeg)
|
||||
}
|
||||
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.FuzzyIterator(string(term), fuzziness)
|
||||
})
|
||||
return i.AutomatonIterator(a, prefixBeg, prefixEnd)
|
||||
}, false)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictOnly(field string,
|
||||
onlyTerms [][]byte, includeCount bool) (index.FieldDict, error) {
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.OnlyIterator(onlyTerms, includeCount)
|
||||
})
|
||||
}, false)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictContains(field string) (index.FieldDictContains, error) {
|
||||
return i.newIndexSnapshotFieldDict(field, nil, true)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
|
||||
|
@ -393,8 +455,8 @@ func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err err
|
|||
}
|
||||
|
||||
func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
|
||||
includeNorm, includeTermVectors bool) (tfr index.TermFieldReader, err error) {
|
||||
rv, dicts := i.allocTermFieldReaderDicts(field)
|
||||
includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
|
||||
rv := i.allocTermFieldReaderDicts(field)
|
||||
|
||||
rv.term = term
|
||||
rv.field = field
|
||||
|
@ -412,20 +474,19 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
|
|||
rv.currPosting = nil
|
||||
rv.currID = rv.currID[:0]
|
||||
|
||||
if dicts == nil {
|
||||
dicts = make([]segment.TermDictionary, len(i.segment))
|
||||
if rv.dicts == nil {
|
||||
rv.dicts = make([]segment.TermDictionary, len(i.segment))
|
||||
for i, segment := range i.segment {
|
||||
dict, err := segment.Dictionary(field)
|
||||
dict, err := segment.segment.Dictionary(field)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dicts[i] = dict
|
||||
rv.dicts[i] = dict
|
||||
}
|
||||
}
|
||||
rv.dicts = dicts
|
||||
|
||||
for i := range i.segment {
|
||||
pl, err := dicts[i].PostingsList(term, nil, rv.postings[i])
|
||||
for i, segment := range i.segment {
|
||||
pl, err := rv.dicts[i].PostingsList(term, segment.deleted, rv.postings[i])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -436,37 +497,37 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
|
|||
return rv, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) allocTermFieldReaderDicts(field string) (
|
||||
tfr *IndexSnapshotTermFieldReader, dicts []segment.TermDictionary) {
|
||||
func (i *IndexSnapshot) allocTermFieldReaderDicts(field string) (tfr *IndexSnapshotTermFieldReader) {
|
||||
i.m2.Lock()
|
||||
if i.fieldDicts != nil {
|
||||
dicts = i.fieldDicts[field]
|
||||
}
|
||||
if i.fieldTFRs != nil {
|
||||
tfrs := i.fieldTFRs[field]
|
||||
last := len(tfrs) - 1
|
||||
if last >= 0 {
|
||||
rv := tfrs[last]
|
||||
tfr = tfrs[last]
|
||||
tfrs[last] = nil
|
||||
i.fieldTFRs[field] = tfrs[:last]
|
||||
i.m2.Unlock()
|
||||
return rv, dicts
|
||||
return
|
||||
}
|
||||
}
|
||||
i.m2.Unlock()
|
||||
return &IndexSnapshotTermFieldReader{}, dicts
|
||||
return &IndexSnapshotTermFieldReader{}
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader) {
|
||||
i.parent.rootLock.RLock()
|
||||
obsolete := i.parent.root != i
|
||||
i.parent.rootLock.RUnlock()
|
||||
if obsolete {
|
||||
// if we're not the current root (mutations happened), don't bother recycling
|
||||
return
|
||||
}
|
||||
|
||||
i.m2.Lock()
|
||||
if i.fieldTFRs == nil {
|
||||
i.fieldTFRs = map[string][]*IndexSnapshotTermFieldReader{}
|
||||
}
|
||||
i.fieldTFRs[tfr.field] = append(i.fieldTFRs[tfr.field], tfr)
|
||||
if i.fieldDicts == nil {
|
||||
i.fieldDicts = map[string][]segment.TermDictionary{}
|
||||
}
|
||||
i.fieldDicts[tfr.field] = tfr.dicts
|
||||
i.m2.Unlock()
|
||||
}
|
||||
|
||||
|
@ -636,7 +697,7 @@ func (i *IndexSnapshot) DumpFields() chan interface{} {
|
|||
|
||||
// subtractStrings returns set a minus elements of set b.
|
||||
func subtractStrings(a, b []string) []string {
|
||||
if len(b) <= 0 {
|
||||
if len(b) == 0 {
|
||||
return a
|
||||
}
|
||||
|
||||
|
|
17
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go
generated
vendored
17
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go
generated
vendored
|
@ -22,6 +22,7 @@ import (
|
|||
)
|
||||
|
||||
type segmentDictCursor struct {
|
||||
dict segment.TermDictionary
|
||||
itr segment.DictionaryIterator
|
||||
curr index.DictEntry
|
||||
}
|
||||
|
@ -52,7 +53,7 @@ func (i *IndexSnapshotFieldDict) Pop() interface{} {
|
|||
}
|
||||
|
||||
func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
|
||||
if len(i.cursors) <= 0 {
|
||||
if len(i.cursors) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
i.entry = i.cursors[0].curr
|
||||
|
@ -91,3 +92,17 @@ func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
|
|||
func (i *IndexSnapshotFieldDict) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotFieldDict) Contains(key []byte) (bool, error) {
|
||||
if len(i.cursors) == 0 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
for _, cursor := range i.cursors {
|
||||
if found, _ := cursor.dict.Contains(key); found {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
|
2
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_tfr.go
generated
vendored
2
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_tfr.go
generated
vendored
|
@ -74,7 +74,7 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in
|
|||
rv = &index.TermFieldDoc{}
|
||||
}
|
||||
// find the next hit
|
||||
for i.segmentOffset < len(i.postings) {
|
||||
for i.segmentOffset < len(i.iterators) {
|
||||
next, err := i.iterators[i.segmentOffset].Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
138
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_rollback.go
generated
vendored
138
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_rollback.go
generated
vendored
|
@ -17,9 +17,10 @@ package scorch
|
|||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/boltdb/bolt"
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
type RollbackPoint struct {
|
||||
|
@ -34,13 +35,22 @@ func (r *RollbackPoint) GetInternal(key []byte) []byte {
|
|||
// RollbackPoints returns an array of rollback points available for
|
||||
// the application to rollback to, with more recent rollback points
|
||||
// (higher epochs) coming first.
|
||||
func (s *Scorch) RollbackPoints() ([]*RollbackPoint, error) {
|
||||
if s.rootBolt == nil {
|
||||
return nil, fmt.Errorf("RollbackPoints: root is nil")
|
||||
func RollbackPoints(path string) ([]*RollbackPoint, error) {
|
||||
if len(path) == 0 {
|
||||
return nil, fmt.Errorf("RollbackPoints: invalid path")
|
||||
}
|
||||
|
||||
rootBoltPath := path + string(os.PathSeparator) + "root.bolt"
|
||||
rootBoltOpt := &bolt.Options{
|
||||
ReadOnly: true,
|
||||
}
|
||||
rootBolt, err := bolt.Open(rootBoltPath, 0600, rootBoltOpt)
|
||||
if err != nil || rootBolt == nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// start a read-only bolt transaction
|
||||
tx, err := s.rootBolt.Begin(false)
|
||||
tx, err := rootBolt.Begin(false)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("RollbackPoints: failed to start" +
|
||||
" read-only transaction")
|
||||
|
@ -49,6 +59,7 @@ func (s *Scorch) RollbackPoints() ([]*RollbackPoint, error) {
|
|||
// read-only bolt transactions to be rolled back
|
||||
defer func() {
|
||||
_ = tx.Rollback()
|
||||
_ = rootBolt.Close()
|
||||
}()
|
||||
|
||||
snapshots := tx.Bucket(boltSnapshotsBucket)
|
||||
|
@ -105,69 +116,98 @@ func (s *Scorch) RollbackPoints() ([]*RollbackPoint, error) {
|
|||
return rollbackPoints, nil
|
||||
}
|
||||
|
||||
// Rollback atomically and durably (if unsafeBatch is unset) brings
|
||||
// the store back to the point in time as represented by the
|
||||
// RollbackPoint. Rollback() should only be passed a RollbackPoint
|
||||
// that came from the same store using the RollbackPoints() API.
|
||||
func (s *Scorch) Rollback(to *RollbackPoint) error {
|
||||
// Rollback atomically and durably brings the store back to the point
|
||||
// in time as represented by the RollbackPoint.
|
||||
// Rollback() should only be passed a RollbackPoint that came from the
|
||||
// same store using the RollbackPoints() API along with the index path.
|
||||
func Rollback(path string, to *RollbackPoint) error {
|
||||
if to == nil {
|
||||
return fmt.Errorf("Rollback: RollbackPoint is nil")
|
||||
}
|
||||
|
||||
if s.rootBolt == nil {
|
||||
return fmt.Errorf("Rollback: root is nil")
|
||||
if len(path) == 0 {
|
||||
return fmt.Errorf("Rollback: index path is empty")
|
||||
}
|
||||
|
||||
revert := &snapshotReversion{}
|
||||
rootBoltPath := path + string(os.PathSeparator) + "root.bolt"
|
||||
rootBoltOpt := &bolt.Options{
|
||||
ReadOnly: false,
|
||||
}
|
||||
rootBolt, err := bolt.Open(rootBoltPath, 0600, rootBoltOpt)
|
||||
if err != nil || rootBolt == nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
err1 := rootBolt.Close()
|
||||
if err1 != nil && err == nil {
|
||||
err = err1
|
||||
}
|
||||
}()
|
||||
|
||||
s.rootLock.Lock()
|
||||
|
||||
err := s.rootBolt.View(func(tx *bolt.Tx) error {
|
||||
// pick all the younger persisted epochs in bolt store
|
||||
// including the target one.
|
||||
var found bool
|
||||
var eligibleEpochs []uint64
|
||||
err = rootBolt.View(func(tx *bolt.Tx) error {
|
||||
snapshots := tx.Bucket(boltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return fmt.Errorf("Rollback: no snapshots available")
|
||||
return nil
|
||||
}
|
||||
|
||||
pos := segment.EncodeUvarintAscending(nil, to.epoch)
|
||||
|
||||
snapshot := snapshots.Bucket(pos)
|
||||
if snapshot == nil {
|
||||
return fmt.Errorf("Rollback: snapshot not found")
|
||||
sc := snapshots.Cursor()
|
||||
for sk, _ := sc.Last(); sk != nil && !found; sk, _ = sc.Prev() {
|
||||
_, snapshotEpoch, err := segment.DecodeUvarintAscending(sk)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if snapshotEpoch == to.epoch {
|
||||
found = true
|
||||
}
|
||||
eligibleEpochs = append(eligibleEpochs, snapshotEpoch)
|
||||
}
|
||||
|
||||
indexSnapshot, err := s.loadSnapshot(snapshot)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Rollback: unable to load snapshot: %v", err)
|
||||
}
|
||||
|
||||
// add segments referenced by loaded index snapshot to the
|
||||
// ineligibleForRemoval map
|
||||
for _, segSnap := range indexSnapshot.segment {
|
||||
filename := zapFileName(segSnap.id)
|
||||
s.ineligibleForRemoval[filename] = true
|
||||
}
|
||||
|
||||
revert.snapshot = indexSnapshot
|
||||
revert.applied = make(chan error)
|
||||
revert.persisted = make(chan error)
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
s.rootLock.Unlock()
|
||||
if len(eligibleEpochs) == 0 {
|
||||
return fmt.Errorf("Rollback: no persisted epochs found in bolt")
|
||||
}
|
||||
if !found {
|
||||
return fmt.Errorf("Rollback: target epoch %d not found in bolt", to.epoch)
|
||||
}
|
||||
|
||||
// start a write transaction
|
||||
tx, err := rootBolt.Begin(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// introduce the reversion
|
||||
s.revertToSnapshots <- revert
|
||||
defer func() {
|
||||
if err == nil {
|
||||
err = tx.Commit()
|
||||
} else {
|
||||
_ = tx.Rollback()
|
||||
}
|
||||
if err == nil {
|
||||
err = rootBolt.Sync()
|
||||
}
|
||||
}()
|
||||
|
||||
// block until this snapshot is applied
|
||||
err = <-revert.applied
|
||||
if err != nil {
|
||||
return fmt.Errorf("Rollback: failed with err: %v", err)
|
||||
snapshots := tx.Bucket(boltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil
|
||||
}
|
||||
for _, epoch := range eligibleEpochs {
|
||||
k := segment.EncodeUvarintAscending(nil, epoch)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if epoch == to.epoch {
|
||||
// return here as it already processed until the given epoch
|
||||
return nil
|
||||
}
|
||||
err = snapshots.DeleteBucket(k)
|
||||
if err == bolt.ErrBucketNotFound {
|
||||
err = nil
|
||||
}
|
||||
}
|
||||
|
||||
return <-revert.persisted
|
||||
return err
|
||||
}
|
||||
|
|
73
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go
generated
vendored
73
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go
generated
vendored
|
@ -29,43 +29,6 @@ var TermSeparator byte = 0xff
|
|||
|
||||
var TermSeparatorSplitSlice = []byte{TermSeparator}
|
||||
|
||||
type SegmentDictionarySnapshot struct {
|
||||
s *SegmentSnapshot
|
||||
d segment.TermDictionary
|
||||
}
|
||||
|
||||
func (s *SegmentDictionarySnapshot) PostingsList(term []byte, except *roaring.Bitmap,
|
||||
prealloc segment.PostingsList) (segment.PostingsList, error) {
|
||||
// TODO: if except is non-nil, perhaps need to OR it with s.s.deleted?
|
||||
return s.d.PostingsList(term, s.s.deleted, prealloc)
|
||||
}
|
||||
|
||||
func (s *SegmentDictionarySnapshot) Iterator() segment.DictionaryIterator {
|
||||
return s.d.Iterator()
|
||||
}
|
||||
|
||||
func (s *SegmentDictionarySnapshot) PrefixIterator(prefix string) segment.DictionaryIterator {
|
||||
return s.d.PrefixIterator(prefix)
|
||||
}
|
||||
|
||||
func (s *SegmentDictionarySnapshot) RangeIterator(start, end string) segment.DictionaryIterator {
|
||||
return s.d.RangeIterator(start, end)
|
||||
}
|
||||
|
||||
func (s *SegmentDictionarySnapshot) RegexpIterator(regex string) segment.DictionaryIterator {
|
||||
return s.d.RegexpIterator(regex)
|
||||
}
|
||||
|
||||
func (s *SegmentDictionarySnapshot) FuzzyIterator(term string,
|
||||
fuzziness int) segment.DictionaryIterator {
|
||||
return s.d.FuzzyIterator(term, fuzziness)
|
||||
}
|
||||
|
||||
func (s *SegmentDictionarySnapshot) OnlyIterator(onlyTerms [][]byte,
|
||||
includeCount bool) segment.DictionaryIterator {
|
||||
return s.d.OnlyIterator(onlyTerms, includeCount)
|
||||
}
|
||||
|
||||
type SegmentSnapshot struct {
|
||||
id uint64
|
||||
segment segment.Segment
|
||||
|
@ -115,17 +78,6 @@ func (s *SegmentSnapshot) Count() uint64 {
|
|||
return rv
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) Dictionary(field string) (segment.TermDictionary, error) {
|
||||
d, err := s.segment.Dictionary(field)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &SegmentDictionarySnapshot{
|
||||
s: s,
|
||||
d: d,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) DocNumbers(docIDs []string) (*roaring.Bitmap, error) {
|
||||
rv, err := s.segment.DocNumbers(docIDs)
|
||||
if err != nil {
|
||||
|
@ -137,7 +89,7 @@ func (s *SegmentSnapshot) DocNumbers(docIDs []string) (*roaring.Bitmap, error) {
|
|||
return rv, nil
|
||||
}
|
||||
|
||||
// DocNumbersLive returns bitsit containing doc numbers for all live docs
|
||||
// DocNumbersLive returns a bitmap containing doc numbers for all live docs
|
||||
func (s *SegmentSnapshot) DocNumbersLive() *roaring.Bitmap {
|
||||
rv := roaring.NewBitmap()
|
||||
rv.AddRange(0, s.segment.Count())
|
||||
|
@ -161,14 +113,29 @@ func (s *SegmentSnapshot) Size() (rv int) {
|
|||
}
|
||||
|
||||
type cachedFieldDocs struct {
|
||||
m sync.Mutex
|
||||
readyCh chan struct{} // closed when the cachedFieldDocs.docs is ready to be used.
|
||||
err error // Non-nil if there was an error when preparing this cachedFieldDocs.
|
||||
docs map[uint64][]byte // Keyed by localDocNum, value is a list of terms delimited by 0xFF.
|
||||
size uint64
|
||||
}
|
||||
|
||||
func (cfd *cachedFieldDocs) Size() int {
|
||||
var rv int
|
||||
cfd.m.Lock()
|
||||
for _, entry := range cfd.docs {
|
||||
rv += 8 /* size of uint64 */ + len(entry)
|
||||
}
|
||||
cfd.m.Unlock()
|
||||
return rv
|
||||
}
|
||||
|
||||
func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) {
|
||||
defer close(cfd.readyCh)
|
||||
cfd.m.Lock()
|
||||
defer func() {
|
||||
close(cfd.readyCh)
|
||||
cfd.m.Unlock()
|
||||
}()
|
||||
|
||||
cfd.size += uint64(size.SizeOfUint64) /* size field */
|
||||
dict, err := ss.segment.Dictionary(field)
|
||||
|
@ -216,9 +183,9 @@ func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) {
|
|||
}
|
||||
|
||||
type cachedDocs struct {
|
||||
size uint64
|
||||
m sync.Mutex // As the cache is asynchronously prepared, need a lock
|
||||
cache map[string]*cachedFieldDocs // Keyed by field
|
||||
size uint64
|
||||
}
|
||||
|
||||
func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error {
|
||||
|
@ -279,9 +246,7 @@ func (c *cachedDocs) updateSizeLOCKED() {
|
|||
for k, v := range c.cache { // cachedFieldDocs
|
||||
sizeInBytes += len(k)
|
||||
if v != nil {
|
||||
for _, entry := range v.docs { // docs
|
||||
sizeInBytes += 8 /* size of uint64 */ + len(entry)
|
||||
}
|
||||
sizeInBytes += v.Size()
|
||||
}
|
||||
}
|
||||
atomic.StoreUint64(&c.size, uint64(sizeInBytes))
|
||||
|
|
40
vendor/github.com/blevesearch/bleve/index/scorch/stats.go
generated
vendored
40
vendor/github.com/blevesearch/bleve/index/scorch/stats.go
generated
vendored
|
@ -69,11 +69,15 @@ type Stats struct {
|
|||
TotPersistLoopEnd uint64
|
||||
|
||||
TotPersistedItems uint64
|
||||
TotItemsToPersist uint64
|
||||
TotPersistedSegments uint64
|
||||
|
||||
TotPersisterSlowMergerPause uint64
|
||||
TotPersisterSlowMergerResume uint64
|
||||
|
||||
TotPersisterNapPauseCompleted uint64
|
||||
TotPersisterMergerNapBreak uint64
|
||||
|
||||
TotFileMergeLoopBeg uint64
|
||||
TotFileMergeLoopErr uint64
|
||||
TotFileMergeLoopEnd uint64
|
||||
|
@ -91,24 +95,32 @@ type Stats struct {
|
|||
|
||||
TotFileMergeSegmentsEmpty uint64
|
||||
TotFileMergeSegments uint64
|
||||
TotFileSegmentsAtRoot uint64
|
||||
TotFileMergeWrittenBytes uint64
|
||||
|
||||
TotFileMergeZapBeg uint64
|
||||
TotFileMergeZapEnd uint64
|
||||
TotFileMergeZapTime uint64
|
||||
MaxFileMergeZapTime uint64
|
||||
TotFileMergeZapBeg uint64
|
||||
TotFileMergeZapEnd uint64
|
||||
TotFileMergeZapTime uint64
|
||||
MaxFileMergeZapTime uint64
|
||||
TotFileMergeZapIntroductionTime uint64
|
||||
MaxFileMergeZapIntroductionTime uint64
|
||||
|
||||
TotFileMergeIntroductions uint64
|
||||
TotFileMergeIntroductionsDone uint64
|
||||
TotFileMergeIntroductions uint64
|
||||
TotFileMergeIntroductionsDone uint64
|
||||
TotFileMergeIntroductionsSkipped uint64
|
||||
|
||||
TotMemMergeBeg uint64
|
||||
TotMemMergeErr uint64
|
||||
TotMemMergeDone uint64
|
||||
TotMemMergeZapBeg uint64
|
||||
TotMemMergeZapEnd uint64
|
||||
TotMemMergeZapTime uint64
|
||||
MaxMemMergeZapTime uint64
|
||||
TotMemMergeSegments uint64
|
||||
CurFilesIneligibleForRemoval uint64
|
||||
TotSnapshotsRemovedFromMetaStore uint64
|
||||
|
||||
TotMemMergeBeg uint64
|
||||
TotMemMergeErr uint64
|
||||
TotMemMergeDone uint64
|
||||
TotMemMergeZapBeg uint64
|
||||
TotMemMergeZapEnd uint64
|
||||
TotMemMergeZapTime uint64
|
||||
MaxMemMergeZapTime uint64
|
||||
TotMemMergeSegments uint64
|
||||
TotMemorySegmentsAtRoot uint64
|
||||
}
|
||||
|
||||
// atomically populates the returned map
|
||||
|
|
2
vendor/github.com/blevesearch/bleve/index/store/boltdb/iterator.go
generated
vendored
2
vendor/github.com/blevesearch/bleve/index/store/boltdb/iterator.go
generated
vendored
|
@ -17,7 +17,7 @@ package boltdb
|
|||
import (
|
||||
"bytes"
|
||||
|
||||
"github.com/boltdb/bolt"
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
type Iterator struct {
|
||||
|
|
2
vendor/github.com/blevesearch/bleve/index/store/boltdb/reader.go
generated
vendored
2
vendor/github.com/blevesearch/bleve/index/store/boltdb/reader.go
generated
vendored
|
@ -16,7 +16,7 @@ package boltdb
|
|||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
"github.com/boltdb/bolt"
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
type Reader struct {
|
||||
|
|
8
vendor/github.com/blevesearch/bleve/index/store/boltdb/store.go
generated
vendored
8
vendor/github.com/blevesearch/bleve/index/store/boltdb/store.go
generated
vendored
|
@ -30,7 +30,7 @@ import (
|
|||
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
"github.com/boltdb/bolt"
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -74,6 +74,12 @@ func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore,
|
|||
bo.ReadOnly = ro
|
||||
}
|
||||
|
||||
if initialMmapSize, ok := config["initialMmapSize"].(int); ok {
|
||||
bo.InitialMmapSize = initialMmapSize
|
||||
} else if initialMmapSize, ok := config["initialMmapSize"].(float64); ok {
|
||||
bo.InitialMmapSize = int(initialMmapSize)
|
||||
}
|
||||
|
||||
db, err := bolt.Open(path, 0600, bo)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
2
vendor/github.com/blevesearch/bleve/index/upsidedown/row.go
generated
vendored
2
vendor/github.com/blevesearch/bleve/index/upsidedown/row.go
generated
vendored
|
@ -584,7 +584,7 @@ func (tfr *TermFrequencyRow) parseK(key []byte) error {
|
|||
|
||||
func (tfr *TermFrequencyRow) parseKDoc(key []byte, term []byte) error {
|
||||
tfr.doc = key[3+len(term)+1:]
|
||||
if len(tfr.doc) <= 0 {
|
||||
if len(tfr.doc) == 0 {
|
||||
return fmt.Errorf("invalid term frequency key, empty docid")
|
||||
}
|
||||
|
||||
|
|
34
vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go
generated
vendored
34
vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go
generated
vendored
|
@ -415,7 +415,6 @@ func (udc *UpsideDownCouch) Close() error {
|
|||
func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
|
||||
// do analysis before acquiring write lock
|
||||
analysisStart := time.Now()
|
||||
numPlainTextBytes := doc.NumPlainTextBytes()
|
||||
resultChan := make(chan *index.AnalysisResult)
|
||||
aw := index.NewAnalysisWork(udc, doc, resultChan)
|
||||
|
||||
|
@ -452,6 +451,11 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
return udc.UpdateWithAnalysis(doc, result, backIndexRow)
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) UpdateWithAnalysis(doc *document.Document,
|
||||
result *index.AnalysisResult, backIndexRow *BackIndexRow) (err error) {
|
||||
// start a writer for this update
|
||||
indexStart := time.Now()
|
||||
var kvwriter store.KVWriter
|
||||
|
@ -490,7 +494,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
|
|||
atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))
|
||||
if err == nil {
|
||||
atomic.AddUint64(&udc.stats.updates, 1)
|
||||
atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes)
|
||||
atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, doc.NumPlainTextBytes())
|
||||
} else {
|
||||
atomic.AddUint64(&udc.stats.errors, 1)
|
||||
}
|
||||
|
@ -775,7 +779,7 @@ func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.
|
|||
}
|
||||
|
||||
func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []*index.TermFieldVector {
|
||||
if len(in) <= 0 {
|
||||
if len(in) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -797,6 +801,10 @@ func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []
|
|||
}
|
||||
|
||||
func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
|
||||
persistedCallback := batch.PersistedCallback()
|
||||
if persistedCallback != nil {
|
||||
defer persistedCallback(err)
|
||||
}
|
||||
analysisStart := time.Now()
|
||||
|
||||
resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps))
|
||||
|
@ -810,15 +818,18 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
|
|||
}
|
||||
}
|
||||
|
||||
go func() {
|
||||
for _, doc := range batch.IndexOps {
|
||||
if doc != nil {
|
||||
aw := index.NewAnalysisWork(udc, doc, resultChan)
|
||||
// put the work on the queue
|
||||
udc.analysisQueue.Queue(aw)
|
||||
if numUpdates > 0 {
|
||||
go func() {
|
||||
for k := range batch.IndexOps {
|
||||
doc := batch.IndexOps[k]
|
||||
if doc != nil {
|
||||
aw := index.NewAnalysisWork(udc, doc, resultChan)
|
||||
// put the work on the queue
|
||||
udc.analysisQueue.Queue(aw)
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
}()
|
||||
}
|
||||
|
||||
// retrieve back index rows concurrent with analysis
|
||||
docBackIndexRowErr := error(nil)
|
||||
|
@ -958,6 +969,7 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
|
|||
} else {
|
||||
atomic.AddUint64(&udc.stats.errors, 1)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
|
|
47
vendor/github.com/blevesearch/bleve/index_alias_impl.go
generated
vendored
47
vendor/github.com/blevesearch/bleve/index_alias_impl.go
generated
vendored
|
@ -433,6 +433,9 @@ func createChildSearchRequest(req *SearchRequest) *SearchRequest {
|
|||
Explain: req.Explain,
|
||||
Sort: req.Sort.Copy(),
|
||||
IncludeLocations: req.IncludeLocations,
|
||||
Score: req.Score,
|
||||
SearchAfter: req.SearchAfter,
|
||||
SearchBefore: req.SearchBefore,
|
||||
}
|
||||
return &rv
|
||||
}
|
||||
|
@ -450,6 +453,14 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
|
|||
searchStart := time.Now()
|
||||
asyncResults := make(chan *asyncSearchResult, len(indexes))
|
||||
|
||||
var reverseQueryExecution bool
|
||||
if req.SearchBefore != nil {
|
||||
reverseQueryExecution = true
|
||||
req.Sort.Reverse()
|
||||
req.SearchAfter = req.SearchBefore
|
||||
req.SearchBefore = nil
|
||||
}
|
||||
|
||||
// run search on each index in separate go routine
|
||||
var waitGroup sync.WaitGroup
|
||||
|
||||
|
@ -502,7 +513,7 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
|
|||
|
||||
// sort all hits with the requested order
|
||||
if len(req.Sort) > 0 {
|
||||
sorter := newMultiSearchHitSorter(req.Sort, sr.Hits)
|
||||
sorter := newSearchHitSorter(req.Sort, sr.Hits)
|
||||
sort.Sort(sorter)
|
||||
}
|
||||
|
||||
|
@ -523,6 +534,17 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
|
|||
sr.Facets.Fixup(name, fr.Size)
|
||||
}
|
||||
|
||||
if reverseQueryExecution {
|
||||
// reverse the sort back to the original
|
||||
req.Sort.Reverse()
|
||||
// resort using the original order
|
||||
mhs := newSearchHitSorter(req.Sort, sr.Hits)
|
||||
sort.Sort(mhs)
|
||||
// reset request
|
||||
req.SearchBefore = req.SearchAfter
|
||||
req.SearchAfter = nil
|
||||
}
|
||||
|
||||
// fix up original request
|
||||
sr.Request = req
|
||||
searchDuration := time.Since(searchStart)
|
||||
|
@ -580,26 +602,3 @@ func (f *indexAliasImplFieldDict) Close() error {
|
|||
defer f.index.mutex.RUnlock()
|
||||
return f.fieldDict.Close()
|
||||
}
|
||||
|
||||
type multiSearchHitSorter struct {
|
||||
hits search.DocumentMatchCollection
|
||||
sort search.SortOrder
|
||||
cachedScoring []bool
|
||||
cachedDesc []bool
|
||||
}
|
||||
|
||||
func newMultiSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *multiSearchHitSorter {
|
||||
return &multiSearchHitSorter{
|
||||
sort: sort,
|
||||
hits: hits,
|
||||
cachedScoring: sort.CacheIsScore(),
|
||||
cachedDesc: sort.CacheDescending(),
|
||||
}
|
||||
}
|
||||
|
||||
func (m *multiSearchHitSorter) Len() int { return len(m.hits) }
|
||||
func (m *multiSearchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] }
|
||||
func (m *multiSearchHitSorter) Less(i, j int) bool {
|
||||
c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j])
|
||||
return c < 0
|
||||
}
|
||||
|
|
200
vendor/github.com/blevesearch/bleve/index_impl.go
generated
vendored
200
vendor/github.com/blevesearch/bleve/index_impl.go
generated
vendored
|
@ -19,6 +19,7 @@ import (
|
|||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"sort"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
@ -442,7 +443,20 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
|||
return nil, ErrorIndexClosed
|
||||
}
|
||||
|
||||
collector := collector.NewTopNCollector(req.Size, req.From, req.Sort)
|
||||
var reverseQueryExecution bool
|
||||
if req.SearchBefore != nil {
|
||||
reverseQueryExecution = true
|
||||
req.Sort.Reverse()
|
||||
req.SearchAfter = req.SearchBefore
|
||||
req.SearchBefore = nil
|
||||
}
|
||||
|
||||
var coll *collector.TopNCollector
|
||||
if req.SearchAfter != nil {
|
||||
coll = collector.NewTopNCollectorAfter(req.Size, req.Sort, req.SearchAfter)
|
||||
} else {
|
||||
coll = collector.NewTopNCollector(req.Size, req.From, req.Sort)
|
||||
}
|
||||
|
||||
// open a reader for this search
|
||||
indexReader, err := i.i.Reader()
|
||||
|
@ -458,6 +472,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
|||
searcher, err := req.Query.Searcher(indexReader, i.m, search.SearcherOptions{
|
||||
Explain: req.Explain,
|
||||
IncludeTermVectors: req.IncludeLocations || req.Highlight != nil,
|
||||
Score: req.Score,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -493,10 +508,10 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
|||
facetsBuilder.Add(facetName, facetBuilder)
|
||||
}
|
||||
}
|
||||
collector.SetFacetsBuilder(facetsBuilder)
|
||||
coll.SetFacetsBuilder(facetsBuilder)
|
||||
}
|
||||
|
||||
memNeeded := memNeededForSearch(req, searcher, collector)
|
||||
memNeeded := memNeededForSearch(req, searcher, coll)
|
||||
if cb := ctx.Value(SearchQueryStartCallbackKey); cb != nil {
|
||||
if cbF, ok := cb.(SearchQueryStartCallbackFn); ok {
|
||||
err = cbF(memNeeded)
|
||||
|
@ -514,12 +529,12 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
|||
}
|
||||
}
|
||||
|
||||
err = collector.Collect(ctx, searcher, indexReader)
|
||||
err = coll.Collect(ctx, searcher, indexReader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
hits := collector.Results()
|
||||
hits := coll.Results()
|
||||
|
||||
var highlighter highlight.Highlighter
|
||||
|
||||
|
@ -541,71 +556,13 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
|||
}
|
||||
|
||||
for _, hit := range hits {
|
||||
if len(req.Fields) > 0 || highlighter != nil {
|
||||
doc, err := indexReader.Document(hit.ID)
|
||||
if err == nil && doc != nil {
|
||||
if len(req.Fields) > 0 {
|
||||
fieldsToLoad := deDuplicate(req.Fields)
|
||||
for _, f := range fieldsToLoad {
|
||||
for _, docF := range doc.Fields {
|
||||
if f == "*" || docF.Name() == f {
|
||||
var value interface{}
|
||||
switch docF := docF.(type) {
|
||||
case *document.TextField:
|
||||
value = string(docF.Value())
|
||||
case *document.NumericField:
|
||||
num, err := docF.Number()
|
||||
if err == nil {
|
||||
value = num
|
||||
}
|
||||
case *document.DateTimeField:
|
||||
datetime, err := docF.DateTime()
|
||||
if err == nil {
|
||||
value = datetime.Format(time.RFC3339)
|
||||
}
|
||||
case *document.BooleanField:
|
||||
boolean, err := docF.Boolean()
|
||||
if err == nil {
|
||||
value = boolean
|
||||
}
|
||||
case *document.GeoPointField:
|
||||
lon, err := docF.Lon()
|
||||
if err == nil {
|
||||
lat, err := docF.Lat()
|
||||
if err == nil {
|
||||
value = []float64{lon, lat}
|
||||
}
|
||||
}
|
||||
}
|
||||
if value != nil {
|
||||
hit.AddFieldValue(docF.Name(), value)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if highlighter != nil {
|
||||
highlightFields := req.Highlight.Fields
|
||||
if highlightFields == nil {
|
||||
// add all fields with matches
|
||||
highlightFields = make([]string, 0, len(hit.Locations))
|
||||
for k := range hit.Locations {
|
||||
highlightFields = append(highlightFields, k)
|
||||
}
|
||||
}
|
||||
for _, hf := range highlightFields {
|
||||
highlighter.BestFragmentsInField(hit, doc, hf, 1)
|
||||
}
|
||||
}
|
||||
} else if doc == nil {
|
||||
// unexpected case, a doc ID that was found as a search hit
|
||||
// was unable to be found during document lookup
|
||||
return nil, ErrorIndexReadInconsistency
|
||||
}
|
||||
}
|
||||
if i.name != "" {
|
||||
hit.Index = i.name
|
||||
}
|
||||
err = LoadAndHighlightFields(hit, req, i.name, indexReader, highlighter)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
atomic.AddUint64(&i.stats.searches, 1)
|
||||
|
@ -617,6 +574,17 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
|||
logger.Printf("slow search took %s - %v", searchDuration, req)
|
||||
}
|
||||
|
||||
if reverseQueryExecution {
|
||||
// reverse the sort back to the original
|
||||
req.Sort.Reverse()
|
||||
// resort using the original order
|
||||
mhs := newSearchHitSorter(req.Sort, hits)
|
||||
sort.Sort(mhs)
|
||||
// reset request
|
||||
req.SearchBefore = req.SearchAfter
|
||||
req.SearchAfter = nil
|
||||
}
|
||||
|
||||
return &SearchResult{
|
||||
Status: &SearchStatus{
|
||||
Total: 1,
|
||||
|
@ -624,13 +592,82 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
|||
},
|
||||
Request: req,
|
||||
Hits: hits,
|
||||
Total: collector.Total(),
|
||||
MaxScore: collector.MaxScore(),
|
||||
Total: coll.Total(),
|
||||
MaxScore: coll.MaxScore(),
|
||||
Took: searchDuration,
|
||||
Facets: collector.FacetResults(),
|
||||
Facets: coll.FacetResults(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest,
|
||||
indexName string, r index.IndexReader,
|
||||
highlighter highlight.Highlighter) error {
|
||||
if len(req.Fields) > 0 || highlighter != nil {
|
||||
doc, err := r.Document(hit.ID)
|
||||
if err == nil && doc != nil {
|
||||
if len(req.Fields) > 0 {
|
||||
fieldsToLoad := deDuplicate(req.Fields)
|
||||
for _, f := range fieldsToLoad {
|
||||
for _, docF := range doc.Fields {
|
||||
if f == "*" || docF.Name() == f {
|
||||
var value interface{}
|
||||
switch docF := docF.(type) {
|
||||
case *document.TextField:
|
||||
value = string(docF.Value())
|
||||
case *document.NumericField:
|
||||
num, err := docF.Number()
|
||||
if err == nil {
|
||||
value = num
|
||||
}
|
||||
case *document.DateTimeField:
|
||||
datetime, err := docF.DateTime()
|
||||
if err == nil {
|
||||
value = datetime.Format(time.RFC3339)
|
||||
}
|
||||
case *document.BooleanField:
|
||||
boolean, err := docF.Boolean()
|
||||
if err == nil {
|
||||
value = boolean
|
||||
}
|
||||
case *document.GeoPointField:
|
||||
lon, err := docF.Lon()
|
||||
if err == nil {
|
||||
lat, err := docF.Lat()
|
||||
if err == nil {
|
||||
value = []float64{lon, lat}
|
||||
}
|
||||
}
|
||||
}
|
||||
if value != nil {
|
||||
hit.AddFieldValue(docF.Name(), value)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if highlighter != nil {
|
||||
highlightFields := req.Highlight.Fields
|
||||
if highlightFields == nil {
|
||||
// add all fields with matches
|
||||
highlightFields = make([]string, 0, len(hit.Locations))
|
||||
for k := range hit.Locations {
|
||||
highlightFields = append(highlightFields, k)
|
||||
}
|
||||
}
|
||||
for _, hf := range highlightFields {
|
||||
highlighter.BestFragmentsInField(hit, doc, hf, 1)
|
||||
}
|
||||
}
|
||||
} else if doc == nil {
|
||||
// unexpected case, a doc ID that was found as a search hit
|
||||
// was unable to be found during document lookup
|
||||
return ErrorIndexReadInconsistency
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Fields returns the name of all the fields this
|
||||
// Index has operated on.
|
||||
func (i *indexImpl) Fields() (fields []string, err error) {
|
||||
|
@ -853,3 +890,26 @@ func deDuplicate(fields []string) []string {
|
|||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
type searchHitSorter struct {
|
||||
hits search.DocumentMatchCollection
|
||||
sort search.SortOrder
|
||||
cachedScoring []bool
|
||||
cachedDesc []bool
|
||||
}
|
||||
|
||||
func newSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *searchHitSorter {
|
||||
return &searchHitSorter{
|
||||
sort: sort,
|
||||
hits: hits,
|
||||
cachedScoring: sort.CacheIsScore(),
|
||||
cachedDesc: sort.CacheDescending(),
|
||||
}
|
||||
}
|
||||
|
||||
func (m *searchHitSorter) Len() int { return len(m.hits) }
|
||||
func (m *searchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] }
|
||||
func (m *searchHitSorter) Less(i, j int) bool {
|
||||
c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j])
|
||||
return c < 0
|
||||
}
|
||||
|
|
3
vendor/github.com/blevesearch/bleve/index_meta.go
generated
vendored
3
vendor/github.com/blevesearch/bleve/index_meta.go
generated
vendored
|
@ -18,6 +18,7 @@ import (
|
|||
"encoding/json"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/blevesearch/bleve/index/upsidedown"
|
||||
)
|
||||
|
@ -92,5 +93,5 @@ func (i *indexMeta) Save(path string) (err error) {
|
|||
}
|
||||
|
||||
func indexMetaPath(path string) string {
|
||||
return path + string(os.PathSeparator) + metaFilename
|
||||
return filepath.Join(path, metaFilename)
|
||||
}
|
||||
|
|
40
vendor/github.com/blevesearch/bleve/mapping/document.go
generated
vendored
40
vendor/github.com/blevesearch/bleve/mapping/document.go
generated
vendored
|
@ -42,7 +42,7 @@ type DocumentMapping struct {
|
|||
Dynamic bool `json:"dynamic"`
|
||||
Properties map[string]*DocumentMapping `json:"properties,omitempty"`
|
||||
Fields []*FieldMapping `json:"fields,omitempty"`
|
||||
DefaultAnalyzer string `json:"default_analyzer"`
|
||||
DefaultAnalyzer string `json:"default_analyzer,omitempty"`
|
||||
|
||||
// StructTagKey overrides "json" when looking for field names in struct tags
|
||||
StructTagKey string `json:"struct_tag_key,omitempty"`
|
||||
|
@ -324,13 +324,17 @@ func (dm *DocumentMapping) defaultAnalyzerName(path []string) string {
|
|||
}
|
||||
|
||||
func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) {
|
||||
// allow default "json" tag to be overriden
|
||||
// allow default "json" tag to be overridden
|
||||
structTagKey := dm.StructTagKey
|
||||
if structTagKey == "" {
|
||||
structTagKey = "json"
|
||||
}
|
||||
|
||||
val := reflect.ValueOf(data)
|
||||
if !val.IsValid() {
|
||||
return
|
||||
}
|
||||
|
||||
typ := val.Type()
|
||||
switch typ.Kind() {
|
||||
case reflect.Map:
|
||||
|
@ -420,7 +424,11 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
|
|||
if subDocMapping != nil {
|
||||
// index by explicit mapping
|
||||
for _, fieldMapping := range subDocMapping.Fields {
|
||||
fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
|
||||
if fieldMapping.Type == "geopoint" {
|
||||
fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
|
||||
} else {
|
||||
fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
|
||||
}
|
||||
}
|
||||
} else if closestDocMapping.Dynamic {
|
||||
// automatic indexing behavior
|
||||
|
@ -517,19 +525,27 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
|
|||
if !propertyValue.IsNil() {
|
||||
switch property := property.(type) {
|
||||
case encoding.TextMarshaler:
|
||||
|
||||
txt, err := property.MarshalText()
|
||||
if err == nil && subDocMapping != nil {
|
||||
// index by explicit mapping
|
||||
// ONLY process TextMarshaler if there is an explicit mapping
|
||||
// AND all of the fiels are of type text
|
||||
// OTHERWISE process field without TextMarshaler
|
||||
if subDocMapping != nil {
|
||||
allFieldsText := true
|
||||
for _, fieldMapping := range subDocMapping.Fields {
|
||||
if fieldMapping.Type == "text" {
|
||||
fieldMapping.processString(string(txt), pathString, path, indexes, context)
|
||||
if fieldMapping.Type != "text" {
|
||||
allFieldsText = false
|
||||
break
|
||||
}
|
||||
}
|
||||
} else {
|
||||
dm.walkDocument(property, path, indexes, context)
|
||||
txt, err := property.MarshalText()
|
||||
if err == nil && allFieldsText {
|
||||
txtStr := string(txt)
|
||||
for _, fieldMapping := range subDocMapping.Fields {
|
||||
fieldMapping.processString(txtStr, pathString, path, indexes, context)
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
dm.walkDocument(property, path, indexes, context)
|
||||
default:
|
||||
dm.walkDocument(property, path, indexes, context)
|
||||
}
|
||||
|
|
2
vendor/github.com/blevesearch/bleve/mapping/index.go
generated
vendored
2
vendor/github.com/blevesearch/bleve/mapping/index.go
generated
vendored
|
@ -320,8 +320,8 @@ func (im *IndexMappingImpl) determineType(data interface{}) string {
|
|||
func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}) error {
|
||||
docType := im.determineType(data)
|
||||
docMapping := im.mappingForType(docType)
|
||||
walkContext := im.newWalkContext(doc, docMapping)
|
||||
if docMapping.Enabled {
|
||||
walkContext := im.newWalkContext(doc, docMapping)
|
||||
docMapping.walkDocument(data, []string{}, []uint64{}, walkContext)
|
||||
|
||||
// see if the _all field was disabled
|
||||
|
|
3
vendor/github.com/blevesearch/bleve/mapping/reflect.go
generated
vendored
3
vendor/github.com/blevesearch/bleve/mapping/reflect.go
generated
vendored
|
@ -35,6 +35,9 @@ func lookupPropertyPath(data interface{}, path string) interface{} {
|
|||
|
||||
func lookupPropertyPathPart(data interface{}, part string) interface{} {
|
||||
val := reflect.ValueOf(data)
|
||||
if !val.IsValid() {
|
||||
return nil
|
||||
}
|
||||
typ := val.Type()
|
||||
switch typ.Kind() {
|
||||
case reflect.Map:
|
||||
|
|
2
vendor/github.com/blevesearch/bleve/numeric/bin.go
generated
vendored
2
vendor/github.com/blevesearch/bleve/numeric/bin.go
generated
vendored
|
@ -14,7 +14,7 @@ var interleaveShift = []uint{1, 2, 4, 8, 16}
|
|||
|
||||
// Interleave the first 32 bits of each uint64
|
||||
// apdated from org.apache.lucene.util.BitUtil
|
||||
// whcih was adapted from:
|
||||
// which was adapted from:
|
||||
// http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN
|
||||
func Interleave(v1, v2 uint64) uint64 {
|
||||
v1 = (v1 | (v1 << interleaveShift[4])) & interleaveMagic[4]
|
||||
|
|
21
vendor/github.com/blevesearch/bleve/numeric/prefix_coded.go
generated
vendored
21
vendor/github.com/blevesearch/bleve/numeric/prefix_coded.go
generated
vendored
|
@ -23,12 +23,26 @@ const ShiftStartInt64 byte = 0x20
|
|||
type PrefixCoded []byte
|
||||
|
||||
func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) {
|
||||
rv, _, err := NewPrefixCodedInt64Prealloc(in, shift, nil)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func NewPrefixCodedInt64Prealloc(in int64, shift uint, prealloc []byte) (
|
||||
rv PrefixCoded, preallocRest []byte, err error) {
|
||||
if shift > 63 {
|
||||
return nil, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift)
|
||||
return nil, prealloc, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift)
|
||||
}
|
||||
|
||||
nChars := ((63 - shift) / 7) + 1
|
||||
rv := make(PrefixCoded, nChars+1)
|
||||
|
||||
size := int(nChars + 1)
|
||||
if len(prealloc) >= size {
|
||||
rv = PrefixCoded(prealloc[0:size])
|
||||
preallocRest = prealloc[size:]
|
||||
} else {
|
||||
rv = make(PrefixCoded, size)
|
||||
}
|
||||
|
||||
rv[0] = ShiftStartInt64 + byte(shift)
|
||||
|
||||
sortableBits := int64(uint64(in) ^ 0x8000000000000000)
|
||||
|
@ -40,7 +54,8 @@ func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) {
|
|||
nChars--
|
||||
sortableBits = int64(uint64(sortableBits) >> 7)
|
||||
}
|
||||
return rv, nil
|
||||
|
||||
return rv, preallocRest, nil
|
||||
}
|
||||
|
||||
func MustNewPrefixCodedInt64(in int64, shift uint) PrefixCoded {
|
||||
|
|
45
vendor/github.com/blevesearch/bleve/search.go
generated
vendored
45
vendor/github.com/blevesearch/bleve/search.go
generated
vendored
|
@ -261,6 +261,9 @@ func (h *HighlightRequest) AddField(field string) {
|
|||
// Explain triggers inclusion of additional search
|
||||
// result score explanations.
|
||||
// Sort describes the desired order for the results to be returned.
|
||||
// Score controls the kind of scoring performed
|
||||
// SearchAfter supports deep paging by providing a minimum sort key
|
||||
// SearchBefore supports deep paging by providing a maximum sort key
|
||||
//
|
||||
// A special field named "*" can be used to return all fields.
|
||||
type SearchRequest struct {
|
||||
|
@ -273,6 +276,9 @@ type SearchRequest struct {
|
|||
Explain bool `json:"explain"`
|
||||
Sort search.SortOrder `json:"sort"`
|
||||
IncludeLocations bool `json:"includeLocations"`
|
||||
Score string `json:"score,omitempty"`
|
||||
SearchAfter []string `json:"search_after"`
|
||||
SearchBefore []string `json:"search_before"`
|
||||
}
|
||||
|
||||
func (r *SearchRequest) Validate() error {
|
||||
|
@ -283,6 +289,27 @@ func (r *SearchRequest) Validate() error {
|
|||
}
|
||||
}
|
||||
|
||||
if r.SearchAfter != nil && r.SearchBefore != nil {
|
||||
return fmt.Errorf("cannot use search after and search before together")
|
||||
}
|
||||
|
||||
if r.SearchAfter != nil {
|
||||
if r.From != 0 {
|
||||
return fmt.Errorf("cannot use search after with from !=0")
|
||||
}
|
||||
if len(r.SearchAfter) != len(r.Sort) {
|
||||
return fmt.Errorf("search after must have same size as sort order")
|
||||
}
|
||||
}
|
||||
if r.SearchBefore != nil {
|
||||
if r.From != 0 {
|
||||
return fmt.Errorf("cannot use search before with from !=0")
|
||||
}
|
||||
if len(r.SearchBefore) != len(r.Sort) {
|
||||
return fmt.Errorf("search before must have same size as sort order")
|
||||
}
|
||||
}
|
||||
|
||||
return r.Facets.Validate()
|
||||
}
|
||||
|
||||
|
@ -309,6 +336,18 @@ func (r *SearchRequest) SortByCustom(order search.SortOrder) {
|
|||
r.Sort = order
|
||||
}
|
||||
|
||||
// SetSearchAfter sets the request to skip over hits with a sort
|
||||
// value less than the provided sort after key
|
||||
func (r *SearchRequest) SetSearchAfter(after []string) {
|
||||
r.SearchAfter = after
|
||||
}
|
||||
|
||||
// SetSearchBefore sets the request to skip over hits with a sort
|
||||
// value greater than the provided sort before key
|
||||
func (r *SearchRequest) SetSearchBefore(before []string) {
|
||||
r.SearchBefore = before
|
||||
}
|
||||
|
||||
// UnmarshalJSON deserializes a JSON representation of
|
||||
// a SearchRequest
|
||||
func (r *SearchRequest) UnmarshalJSON(input []byte) error {
|
||||
|
@ -322,6 +361,9 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
|
|||
Explain bool `json:"explain"`
|
||||
Sort []json.RawMessage `json:"sort"`
|
||||
IncludeLocations bool `json:"includeLocations"`
|
||||
Score string `json:"score"`
|
||||
SearchAfter []string `json:"search_after"`
|
||||
SearchBefore []string `json:"search_before"`
|
||||
}
|
||||
|
||||
err := json.Unmarshal(input, &temp)
|
||||
|
@ -348,6 +390,9 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
|
|||
r.Fields = temp.Fields
|
||||
r.Facets = temp.Facets
|
||||
r.IncludeLocations = temp.IncludeLocations
|
||||
r.Score = temp.Score
|
||||
r.SearchAfter = temp.SearchAfter
|
||||
r.SearchBefore = temp.SearchBefore
|
||||
r.Query, err = query.ParseQuery(temp.Q)
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
20
vendor/github.com/blevesearch/bleve/search/collector.go
generated
vendored
20
vendor/github.com/blevesearch/bleve/search/collector.go
generated
vendored
|
@ -30,3 +30,23 @@ type Collector interface {
|
|||
SetFacetsBuilder(facetsBuilder *FacetsBuilder)
|
||||
FacetResults() FacetResults
|
||||
}
|
||||
|
||||
// DocumentMatchHandler is the type of document match callback
|
||||
// bleve will invoke during the search.
|
||||
// Eventually, bleve will indicate the completion of an ongoing search,
|
||||
// by passing a nil value for the document match callback.
|
||||
// The application should take a copy of the hit/documentMatch
|
||||
// if it wish to own it or need prolonged access to it.
|
||||
type DocumentMatchHandler func(hit *DocumentMatch) error
|
||||
|
||||
type MakeDocumentMatchHandlerKeyType string
|
||||
|
||||
var MakeDocumentMatchHandlerKey = MakeDocumentMatchHandlerKeyType(
|
||||
"MakeDocumentMatchHandlerKey")
|
||||
|
||||
// MakeDocumentMatchHandler is an optional DocumentMatchHandler
|
||||
// builder function which the applications can pass to bleve.
|
||||
// These builder methods gives a DocumentMatchHandler function
|
||||
// to bleve, which it will invoke on every document matches.
|
||||
type MakeDocumentMatchHandler func(ctx *SearchContext) (
|
||||
callback DocumentMatchHandler, loadID bool, err error)
|
||||
|
|
4
vendor/github.com/blevesearch/bleve/search/collector/heap.go
generated
vendored
4
vendor/github.com/blevesearch/bleve/search/collector/heap.go
generated
vendored
|
@ -25,9 +25,9 @@ type collectStoreHeap struct {
|
|||
compare collectorCompare
|
||||
}
|
||||
|
||||
func newStoreHeap(cap int, compare collectorCompare) *collectStoreHeap {
|
||||
func newStoreHeap(capacity int, compare collectorCompare) *collectStoreHeap {
|
||||
rv := &collectStoreHeap{
|
||||
heap: make(search.DocumentMatchCollection, 0, cap),
|
||||
heap: make(search.DocumentMatchCollection, 0, capacity),
|
||||
compare: compare,
|
||||
}
|
||||
heap.Init(rv)
|
||||
|
|
5
vendor/github.com/blevesearch/bleve/search/collector/list.go
generated
vendored
5
vendor/github.com/blevesearch/bleve/search/collector/list.go
generated
vendored
|
@ -25,7 +25,7 @@ type collectStoreList struct {
|
|||
compare collectorCompare
|
||||
}
|
||||
|
||||
func newStoreList(cap int, compare collectorCompare) *collectStoreList {
|
||||
func newStoreList(capacity int, compare collectorCompare) *collectStoreList {
|
||||
rv := &collectStoreList{
|
||||
results: list.New(),
|
||||
compare: compare,
|
||||
|
@ -34,8 +34,7 @@ func newStoreList(cap int, compare collectorCompare) *collectStoreList {
|
|||
return rv
|
||||
}
|
||||
|
||||
func (c *collectStoreList) AddNotExceedingSize(doc *search.DocumentMatch,
|
||||
size int) *search.DocumentMatch {
|
||||
func (c *collectStoreList) AddNotExceedingSize(doc *search.DocumentMatch, size int) *search.DocumentMatch {
|
||||
c.add(doc)
|
||||
if c.len() > size {
|
||||
return c.removeLast()
|
||||
|
|
4
vendor/github.com/blevesearch/bleve/search/collector/slice.go
generated
vendored
4
vendor/github.com/blevesearch/bleve/search/collector/slice.go
generated
vendored
|
@ -21,9 +21,9 @@ type collectStoreSlice struct {
|
|||
compare collectorCompare
|
||||
}
|
||||
|
||||
func newStoreSlice(cap int, compare collectorCompare) *collectStoreSlice {
|
||||
func newStoreSlice(capacity int, compare collectorCompare) *collectStoreSlice {
|
||||
rv := &collectStoreSlice{
|
||||
slice: make(search.DocumentMatchCollection, 0, cap),
|
||||
slice: make(search.DocumentMatchCollection, 0, capacity),
|
||||
compare: compare,
|
||||
}
|
||||
return rv
|
||||
|
|
144
vendor/github.com/blevesearch/bleve/search/collector/topn.go
generated
vendored
144
vendor/github.com/blevesearch/bleve/search/collector/topn.go
generated
vendored
|
@ -17,6 +17,7 @@ package collector
|
|||
import (
|
||||
"context"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
|
@ -69,6 +70,7 @@ type TopNCollector struct {
|
|||
lowestMatchOutsideResults *search.DocumentMatch
|
||||
updateFieldVisitor index.DocumentFieldTermVisitor
|
||||
dvReader index.DocValueReader
|
||||
searchAfter *search.DocumentMatch
|
||||
}
|
||||
|
||||
// CheckDoneEvery controls how frequently we check the context deadline
|
||||
|
@ -78,6 +80,33 @@ const CheckDoneEvery = uint64(1024)
|
|||
// skipping over the first 'skip' hits
|
||||
// ordering hits by the provided sort order
|
||||
func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
|
||||
return newTopNCollector(size, skip, sort)
|
||||
}
|
||||
|
||||
// NewTopNCollector builds a collector to find the top 'size' hits
|
||||
// skipping over the first 'skip' hits
|
||||
// ordering hits by the provided sort order
|
||||
func NewTopNCollectorAfter(size int, sort search.SortOrder, after []string) *TopNCollector {
|
||||
rv := newTopNCollector(size, 0, sort)
|
||||
rv.searchAfter = &search.DocumentMatch{
|
||||
Sort: after,
|
||||
}
|
||||
|
||||
for pos, ss := range sort {
|
||||
if ss.RequiresDocID() {
|
||||
rv.searchAfter.ID = after[pos]
|
||||
}
|
||||
if ss.RequiresScoring() {
|
||||
if score, err := strconv.ParseFloat(after[pos], 64); err == nil {
|
||||
rv.searchAfter.Score = score
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func newTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
|
||||
hc := &TopNCollector{size: size, skip: skip, sort: sort}
|
||||
|
||||
// pre-allocate space on the store to avoid reslicing
|
||||
|
@ -140,6 +169,8 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
|
|||
}
|
||||
searchContext := &search.SearchContext{
|
||||
DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)),
|
||||
Collector: hc,
|
||||
IndexReader: reader,
|
||||
}
|
||||
|
||||
hc.dvReader, err = reader.DocValueReader(hc.neededFields)
|
||||
|
@ -154,6 +185,19 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
|
|||
hc.sort.UpdateVisitor(field, term)
|
||||
}
|
||||
|
||||
dmHandlerMaker := MakeTopNDocumentMatchHandler
|
||||
if cv := ctx.Value(search.MakeDocumentMatchHandlerKey); cv != nil {
|
||||
dmHandlerMaker = cv.(search.MakeDocumentMatchHandler)
|
||||
}
|
||||
// use the application given builder for making the custom document match
|
||||
// handler and perform callbacks/invocations on the newly made handler.
|
||||
dmHandler, loadID, err := dmHandlerMaker(searchContext)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
hc.needDocIds = hc.needDocIds || loadID
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
|
@ -169,13 +213,26 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
|
|||
}
|
||||
}
|
||||
|
||||
err = hc.collectSingle(searchContext, reader, next)
|
||||
err = hc.prepareDocumentMatch(searchContext, reader, next)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
err = dmHandler(next)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
next, err = searcher.Next(searchContext)
|
||||
}
|
||||
|
||||
// help finalize/flush the results in case
|
||||
// of custom document match handlers.
|
||||
err = dmHandler(nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// compute search duration
|
||||
hc.took = time.Since(startTime)
|
||||
if err != nil {
|
||||
|
@ -191,8 +248,8 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
|
|||
|
||||
var sortByScoreOpt = []string{"_score"}
|
||||
|
||||
func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.IndexReader, d *search.DocumentMatch) error {
|
||||
var err error
|
||||
func (hc *TopNCollector) prepareDocumentMatch(ctx *search.SearchContext,
|
||||
reader index.IndexReader, d *search.DocumentMatch) (err error) {
|
||||
|
||||
// visit field terms for features that require it (sort, facets)
|
||||
if len(hc.neededFields) > 0 {
|
||||
|
@ -226,35 +283,64 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I
|
|||
hc.sort.Value(d)
|
||||
}
|
||||
|
||||
// optimization, we track lowest sorting hit already removed from heap
|
||||
// with this one comparison, we can avoid all heap operations if
|
||||
// this hit would have been added and then immediately removed
|
||||
if hc.lowestMatchOutsideResults != nil {
|
||||
cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.lowestMatchOutsideResults)
|
||||
if cmp >= 0 {
|
||||
// this hit can't possibly be in the result set, so avoid heap ops
|
||||
ctx.DocumentMatchPool.Put(d)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
removed := hc.store.AddNotExceedingSize(d, hc.size+hc.skip)
|
||||
if removed != nil {
|
||||
if hc.lowestMatchOutsideResults == nil {
|
||||
hc.lowestMatchOutsideResults = removed
|
||||
} else {
|
||||
cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, removed, hc.lowestMatchOutsideResults)
|
||||
if cmp < 0 {
|
||||
tmp := hc.lowestMatchOutsideResults
|
||||
hc.lowestMatchOutsideResults = removed
|
||||
ctx.DocumentMatchPool.Put(tmp)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func MakeTopNDocumentMatchHandler(
|
||||
ctx *search.SearchContext) (search.DocumentMatchHandler, bool, error) {
|
||||
var hc *TopNCollector
|
||||
var ok bool
|
||||
if hc, ok = ctx.Collector.(*TopNCollector); ok {
|
||||
return func(d *search.DocumentMatch) error {
|
||||
if d == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// support search after based pagination,
|
||||
// if this hit is <= the search after sort key
|
||||
// we should skip it
|
||||
if hc.searchAfter != nil {
|
||||
// exact sort order matches use hit number to break tie
|
||||
// but we want to allow for exact match, so we pretend
|
||||
hc.searchAfter.HitNumber = d.HitNumber
|
||||
if hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.searchAfter) <= 0 {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// optimization, we track lowest sorting hit already removed from heap
|
||||
// with this one comparison, we can avoid all heap operations if
|
||||
// this hit would have been added and then immediately removed
|
||||
if hc.lowestMatchOutsideResults != nil {
|
||||
cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d,
|
||||
hc.lowestMatchOutsideResults)
|
||||
if cmp >= 0 {
|
||||
// this hit can't possibly be in the result set, so avoid heap ops
|
||||
ctx.DocumentMatchPool.Put(d)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
removed := hc.store.AddNotExceedingSize(d, hc.size+hc.skip)
|
||||
if removed != nil {
|
||||
if hc.lowestMatchOutsideResults == nil {
|
||||
hc.lowestMatchOutsideResults = removed
|
||||
} else {
|
||||
cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc,
|
||||
removed, hc.lowestMatchOutsideResults)
|
||||
if cmp < 0 {
|
||||
tmp := hc.lowestMatchOutsideResults
|
||||
hc.lowestMatchOutsideResults = removed
|
||||
ctx.DocumentMatchPool.Put(tmp)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}, false, nil
|
||||
}
|
||||
return nil, false, nil
|
||||
}
|
||||
|
||||
// visitFieldTerms is responsible for visiting the field terms of the
|
||||
// search hit, and passing visited terms to the sort and facet builder
|
||||
func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.DocumentMatch) error {
|
||||
|
|
14
vendor/github.com/blevesearch/bleve/search/facets_builder.go
generated
vendored
14
vendor/github.com/blevesearch/bleve/search/facets_builder.go
generated
vendored
|
@ -54,14 +54,14 @@ type FacetBuilder interface {
|
|||
|
||||
type FacetsBuilder struct {
|
||||
indexReader index.IndexReader
|
||||
facets map[string]FacetBuilder
|
||||
facetNames []string
|
||||
facets []FacetBuilder
|
||||
fields []string
|
||||
}
|
||||
|
||||
func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {
|
||||
return &FacetsBuilder{
|
||||
indexReader: indexReader,
|
||||
facets: make(map[string]FacetBuilder, 0),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -69,8 +69,7 @@ func (fb *FacetsBuilder) Size() int {
|
|||
sizeInBytes := reflectStaticSizeFacetsBuilder + size.SizeOfPtr
|
||||
|
||||
for k, v := range fb.facets {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
v.Size()
|
||||
sizeInBytes += size.SizeOfString + v.Size() + len(fb.facetNames[k])
|
||||
}
|
||||
|
||||
for _, entry := range fb.fields {
|
||||
|
@ -81,7 +80,8 @@ func (fb *FacetsBuilder) Size() int {
|
|||
}
|
||||
|
||||
func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) {
|
||||
fb.facets[name] = facetBuilder
|
||||
fb.facetNames = append(fb.facetNames, name)
|
||||
fb.facets = append(fb.facets, facetBuilder)
|
||||
fb.fields = append(fb.fields, facetBuilder.Field())
|
||||
}
|
||||
|
||||
|
@ -333,9 +333,9 @@ func (fr FacetResults) Fixup(name string, size int) {
|
|||
|
||||
func (fb *FacetsBuilder) Results() FacetResults {
|
||||
fr := make(FacetResults)
|
||||
for facetName, facetBuilder := range fb.facets {
|
||||
for i, facetBuilder := range fb.facets {
|
||||
facetResult := facetBuilder.Result()
|
||||
fr[facetName] = facetResult
|
||||
fr[fb.facetNames[i]] = facetResult
|
||||
}
|
||||
return fr
|
||||
}
|
||||
|
|
5
vendor/github.com/blevesearch/bleve/search/highlight/fragmenter/simple/simple.go
generated
vendored
5
vendor/github.com/blevesearch/bleve/search/highlight/fragmenter/simple/simple.go
generated
vendored
|
@ -58,6 +58,11 @@ OUTER:
|
|||
// push back towards beginning
|
||||
// without cross maxbegin
|
||||
for start > 0 && used < s.fragmentSize {
|
||||
if start > len(orig) {
|
||||
// bail if out of bounds, possibly due to token replacement
|
||||
// e.g with a regexp replacement
|
||||
continue OUTER
|
||||
}
|
||||
r, size := utf8.DecodeLastRune(orig[0:start])
|
||||
if r == utf8.RuneError {
|
||||
continue OUTER // bail
|
||||
|
|
2
vendor/github.com/blevesearch/bleve/search/query/conjunction.go
generated
vendored
2
vendor/github.com/blevesearch/bleve/search/query/conjunction.go
generated
vendored
|
@ -70,9 +70,11 @@ func (q *ConjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
|
|||
}
|
||||
ss = append(ss, sr)
|
||||
}
|
||||
|
||||
if len(ss) < 1 {
|
||||
return searcher.NewMatchNoneSearcher(i)
|
||||
}
|
||||
|
||||
return searcher.NewConjunctionSearcher(i, ss, options)
|
||||
}
|
||||
|
||||
|
|
31
vendor/github.com/blevesearch/bleve/search/query/date_range.go
generated
vendored
31
vendor/github.com/blevesearch/bleve/search/query/date_range.go
generated
vendored
|
@ -41,6 +41,14 @@ type BleveQueryTime struct {
|
|||
time.Time
|
||||
}
|
||||
|
||||
var MinRFC3339CompatibleTime time.Time
|
||||
var MaxRFC3339CompatibleTime time.Time
|
||||
|
||||
func init() {
|
||||
MinRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "1677-12-01T00:00:00Z")
|
||||
MaxRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "2262-04-11T11:59:59Z")
|
||||
}
|
||||
|
||||
func queryTimeFromString(t string) (time.Time, error) {
|
||||
dateTimeParser, err := cache.DateTimeParserNamed(QueryDateTimeParser)
|
||||
if err != nil {
|
||||
|
@ -143,10 +151,20 @@ func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) {
|
|||
min := math.Inf(-1)
|
||||
max := math.Inf(1)
|
||||
if !q.Start.IsZero() {
|
||||
min = numeric.Int64ToFloat64(q.Start.UnixNano())
|
||||
if !isDatetimeCompatible(q.Start) {
|
||||
// overflow
|
||||
return nil, nil, fmt.Errorf("invalid/unsupported date range, start: %v", q.Start)
|
||||
}
|
||||
startInt64 := q.Start.UnixNano()
|
||||
min = numeric.Int64ToFloat64(startInt64)
|
||||
}
|
||||
if !q.End.IsZero() {
|
||||
max = numeric.Int64ToFloat64(q.End.UnixNano())
|
||||
if !isDatetimeCompatible(q.End) {
|
||||
// overflow
|
||||
return nil, nil, fmt.Errorf("invalid/unsupported date range, end: %v", q.End)
|
||||
}
|
||||
endInt64 := q.End.UnixNano()
|
||||
max = numeric.Int64ToFloat64(endInt64)
|
||||
}
|
||||
|
||||
return &min, &max, nil
|
||||
|
@ -162,3 +180,12 @@ func (q *DateRangeQuery) Validate() error {
|
|||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func isDatetimeCompatible(t BleveQueryTime) bool {
|
||||
if QueryDateTimeFormat == time.RFC3339 &&
|
||||
(t.Before(MinRFC3339CompatibleTime) || t.After(MaxRFC3339CompatibleTime)) {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
|
5
vendor/github.com/blevesearch/bleve/search/query/disjunction.go
generated
vendored
5
vendor/github.com/blevesearch/bleve/search/query/disjunction.go
generated
vendored
|
@ -58,7 +58,8 @@ func (q *DisjunctionQuery) SetMin(m float64) {
|
|||
q.Min = m
|
||||
}
|
||||
|
||||
func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
|
||||
func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
|
||||
options search.SearcherOptions) (search.Searcher, error) {
|
||||
ss := make([]search.Searcher, 0, len(q.Disjuncts))
|
||||
for _, disjunct := range q.Disjuncts {
|
||||
sr, err := disjunct.Searcher(i, m, options)
|
||||
|
@ -76,9 +77,11 @@ func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
|
|||
}
|
||||
ss = append(ss, sr)
|
||||
}
|
||||
|
||||
if len(ss) < 1 {
|
||||
return searcher.NewMatchNoneSearcher(i)
|
||||
}
|
||||
|
||||
return searcher.NewDisjunctionSearcher(i, ss, q.Min, options)
|
||||
}
|
||||
|
||||
|
|
94
vendor/github.com/blevesearch/bleve/search/query/geo_boundingpolygon.go
generated
vendored
Normal file
94
vendor/github.com/blevesearch/bleve/search/query/geo_boundingpolygon.go
generated
vendored
Normal file
|
@ -0,0 +1,94 @@
|
|||
// Copyright (c) 2019 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package query
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/geo"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/mapping"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/search/searcher"
|
||||
)
|
||||
|
||||
type GeoBoundingPolygonQuery struct {
|
||||
Points []geo.Point `json:"polygon_points"`
|
||||
FieldVal string `json:"field,omitempty"`
|
||||
BoostVal *Boost `json:"boost,omitempty"`
|
||||
}
|
||||
|
||||
func NewGeoBoundingPolygonQuery(points []geo.Point) *GeoBoundingPolygonQuery {
|
||||
return &GeoBoundingPolygonQuery{
|
||||
Points: points}
|
||||
}
|
||||
|
||||
func (q *GeoBoundingPolygonQuery) SetBoost(b float64) {
|
||||
boost := Boost(b)
|
||||
q.BoostVal = &boost
|
||||
}
|
||||
|
||||
func (q *GeoBoundingPolygonQuery) Boost() float64 {
|
||||
return q.BoostVal.Value()
|
||||
}
|
||||
|
||||
func (q *GeoBoundingPolygonQuery) SetField(f string) {
|
||||
q.FieldVal = f
|
||||
}
|
||||
|
||||
func (q *GeoBoundingPolygonQuery) Field() string {
|
||||
return q.FieldVal
|
||||
}
|
||||
|
||||
func (q *GeoBoundingPolygonQuery) Searcher(i index.IndexReader,
|
||||
m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
|
||||
field := q.FieldVal
|
||||
if q.FieldVal == "" {
|
||||
field = m.DefaultSearchField()
|
||||
}
|
||||
|
||||
return searcher.NewGeoBoundedPolygonSearcher(i, q.Points, field, q.BoostVal.Value(), options)
|
||||
}
|
||||
|
||||
func (q *GeoBoundingPolygonQuery) Validate() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (q *GeoBoundingPolygonQuery) UnmarshalJSON(data []byte) error {
|
||||
tmp := struct {
|
||||
Points []interface{} `json:"polygon_points"`
|
||||
FieldVal string `json:"field,omitempty"`
|
||||
BoostVal *Boost `json:"boost,omitempty"`
|
||||
}{}
|
||||
err := json.Unmarshal(data, &tmp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
q.Points = make([]geo.Point, 0, len(tmp.Points))
|
||||
for _, i := range tmp.Points {
|
||||
// now use our generic point parsing code from the geo package
|
||||
lon, lat, found := geo.ExtractGeoPoint(i)
|
||||
if !found {
|
||||
return fmt.Errorf("geo polygon point: %v is not in a valid format", i)
|
||||
}
|
||||
q.Points = append(q.Points, geo.Point{Lon: lon, Lat: lat})
|
||||
}
|
||||
|
||||
q.FieldVal = tmp.FieldVal
|
||||
q.BoostVal = tmp.BoostVal
|
||||
return nil
|
||||
}
|
21
vendor/github.com/blevesearch/bleve/search/query/query.go
generated
vendored
21
vendor/github.com/blevesearch/bleve/search/query/query.go
generated
vendored
|
@ -273,6 +273,15 @@ func ParseQuery(input []byte) (Query, error) {
|
|||
}
|
||||
return &rv, nil
|
||||
}
|
||||
_, hasPoints := tmp["polygon_points"]
|
||||
if hasPoints {
|
||||
var rv GeoBoundingPolygonQuery
|
||||
err := json.Unmarshal(input, &rv)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
return nil, fmt.Errorf("unknown query type")
|
||||
}
|
||||
|
||||
|
@ -296,32 +305,28 @@ func expandQuery(m mapping.IndexMapping, query Query) (Query, error) {
|
|||
}
|
||||
|
||||
expand = func(query Query) (Query, error) {
|
||||
switch query.(type) {
|
||||
switch q := query.(type) {
|
||||
case *QueryStringQuery:
|
||||
q := query.(*QueryStringQuery)
|
||||
parsed, err := parseQuerySyntax(q.Query)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not parse '%s': %s", q.Query, err)
|
||||
}
|
||||
return expand(parsed)
|
||||
case *ConjunctionQuery:
|
||||
q := *query.(*ConjunctionQuery)
|
||||
children, err := expandSlice(q.Conjuncts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
q.Conjuncts = children
|
||||
return &q, nil
|
||||
return q, nil
|
||||
case *DisjunctionQuery:
|
||||
q := *query.(*DisjunctionQuery)
|
||||
children, err := expandSlice(q.Disjuncts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
q.Disjuncts = children
|
||||
return &q, nil
|
||||
return q, nil
|
||||
case *BooleanQuery:
|
||||
q := *query.(*BooleanQuery)
|
||||
var err error
|
||||
q.Must, err = expand(q.Must)
|
||||
if err != nil {
|
||||
|
@ -335,7 +340,7 @@ func expandQuery(m mapping.IndexMapping, query Query) (Query, error) {
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &q, nil
|
||||
return q, nil
|
||||
default:
|
||||
return query, nil
|
||||
}
|
||||
|
|
1
vendor/github.com/blevesearch/bleve/search/query/query_string_lex.go
generated
vendored
1
vendor/github.com/blevesearch/bleve/search/query/query_string_lex.go
generated
vendored
|
@ -273,6 +273,7 @@ func inNumOrStrState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
|
|||
// see where to go
|
||||
if !l.seenDot && next == '.' {
|
||||
// stay in this state
|
||||
l.seenDot = true
|
||||
l.buf += string(next)
|
||||
return inNumOrStrState, true
|
||||
} else if unicode.IsDigit(next) {
|
||||
|
|
37
vendor/github.com/blevesearch/bleve/search/query/regexp.go
generated
vendored
37
vendor/github.com/blevesearch/bleve/search/query/regexp.go
generated
vendored
|
@ -15,7 +15,6 @@
|
|||
package query
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
|
@ -28,7 +27,6 @@ type RegexpQuery struct {
|
|||
Regexp string `json:"regexp"`
|
||||
FieldVal string `json:"field,omitempty"`
|
||||
BoostVal *Boost `json:"boost,omitempty"`
|
||||
compiled *regexp.Regexp
|
||||
}
|
||||
|
||||
// NewRegexpQuery creates a new Query which finds
|
||||
|
@ -64,33 +62,20 @@ func (q *RegexpQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, opti
|
|||
if q.FieldVal == "" {
|
||||
field = m.DefaultSearchField()
|
||||
}
|
||||
err := q.compile()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
||||
// require that pattern NOT be anchored to start and end of term.
|
||||
// do not attempt to remove trailing $, its presence is not
|
||||
// known to interfere with LiteralPrefix() the way ^ does
|
||||
// and removing $ introduces possible ambiguities with escaped \$, \\$, etc
|
||||
actualRegexp := q.Regexp
|
||||
if strings.HasPrefix(actualRegexp, "^") {
|
||||
actualRegexp = actualRegexp[1:] // remove leading ^
|
||||
}
|
||||
|
||||
return searcher.NewRegexpSearcher(i, q.compiled, field, q.BoostVal.Value(), options)
|
||||
return searcher.NewRegexpStringSearcher(i, actualRegexp, field,
|
||||
q.BoostVal.Value(), options)
|
||||
}
|
||||
|
||||
func (q *RegexpQuery) Validate() error {
|
||||
return q.compile()
|
||||
}
|
||||
|
||||
func (q *RegexpQuery) compile() error {
|
||||
if q.compiled == nil {
|
||||
// require that pattern NOT be anchored to start and end of term
|
||||
actualRegexp := q.Regexp
|
||||
if strings.HasPrefix(actualRegexp, "^") {
|
||||
actualRegexp = actualRegexp[1:] // remove leading ^
|
||||
}
|
||||
// do not attempt to remove trailing $, it's presence is not
|
||||
// known to interfere with LiteralPrefix() the way ^ does
|
||||
// and removing $ introduces possible ambiguities with escaped \$, \\$, etc
|
||||
var err error
|
||||
q.compiled, err = regexp.Compile(actualRegexp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return nil // real validation delayed until searcher constructor
|
||||
}
|
||||
|
|
23
vendor/github.com/blevesearch/bleve/search/query/wildcard.go
generated
vendored
23
vendor/github.com/blevesearch/bleve/search/query/wildcard.go
generated
vendored
|
@ -15,7 +15,6 @@
|
|||
package query
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
|
@ -47,7 +46,6 @@ type WildcardQuery struct {
|
|||
Wildcard string `json:"wildcard"`
|
||||
FieldVal string `json:"field,omitempty"`
|
||||
BoostVal *Boost `json:"boost,omitempty"`
|
||||
compiled *regexp.Regexp
|
||||
}
|
||||
|
||||
// NewWildcardQuery creates a new Query which finds
|
||||
|
@ -83,24 +81,13 @@ func (q *WildcardQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, op
|
|||
if q.FieldVal == "" {
|
||||
field = m.DefaultSearchField()
|
||||
}
|
||||
if q.compiled == nil {
|
||||
var err error
|
||||
q.compiled, err = q.convertToRegexp()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return searcher.NewRegexpSearcher(i, q.compiled, field, q.BoostVal.Value(), options)
|
||||
regexpString := wildcardRegexpReplacer.Replace(q.Wildcard)
|
||||
|
||||
return searcher.NewRegexpStringSearcher(i, regexpString, field,
|
||||
q.BoostVal.Value(), options)
|
||||
}
|
||||
|
||||
func (q *WildcardQuery) Validate() error {
|
||||
var err error
|
||||
q.compiled, err = q.convertToRegexp()
|
||||
return err
|
||||
}
|
||||
|
||||
func (q *WildcardQuery) convertToRegexp() (*regexp.Regexp, error) {
|
||||
regexpString := wildcardRegexpReplacer.Replace(q.Wildcard)
|
||||
return regexp.Compile(regexpString)
|
||||
return nil // real validation delayed until searcher constructor
|
||||
}
|
||||
|
|
103
vendor/github.com/blevesearch/bleve/search/scorer/scorer_term.go
generated
vendored
103
vendor/github.com/blevesearch/bleve/search/scorer/scorer_term.go
generated
vendored
|
@ -40,6 +40,7 @@ type TermQueryScorer struct {
|
|||
idf float64
|
||||
options search.SearcherOptions
|
||||
idfExplanation *search.Explanation
|
||||
includeScore bool
|
||||
queryNorm float64
|
||||
queryWeight float64
|
||||
queryWeightExplanation *search.Explanation
|
||||
|
@ -62,14 +63,15 @@ func (s *TermQueryScorer) Size() int {
|
|||
|
||||
func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer {
|
||||
rv := TermQueryScorer{
|
||||
queryTerm: string(queryTerm),
|
||||
queryField: queryField,
|
||||
queryBoost: queryBoost,
|
||||
docTerm: docTerm,
|
||||
docTotal: docTotal,
|
||||
idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)),
|
||||
options: options,
|
||||
queryWeight: 1.0,
|
||||
queryTerm: string(queryTerm),
|
||||
queryField: queryField,
|
||||
queryBoost: queryBoost,
|
||||
docTerm: docTerm,
|
||||
docTotal: docTotal,
|
||||
idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)),
|
||||
options: options,
|
||||
queryWeight: 1.0,
|
||||
includeScore: options.Score != "none",
|
||||
}
|
||||
|
||||
if options.Explain {
|
||||
|
@ -113,56 +115,61 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
|
|||
}
|
||||
|
||||
func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.TermFieldDoc) *search.DocumentMatch {
|
||||
var scoreExplanation *search.Explanation
|
||||
|
||||
// need to compute score
|
||||
var tf float64
|
||||
if termMatch.Freq < MaxSqrtCache {
|
||||
tf = SqrtCache[int(termMatch.Freq)]
|
||||
} else {
|
||||
tf = math.Sqrt(float64(termMatch.Freq))
|
||||
}
|
||||
score := tf * termMatch.Norm * s.idf
|
||||
|
||||
if s.options.Explain {
|
||||
childrenExplanations := make([]*search.Explanation, 3)
|
||||
childrenExplanations[0] = &search.Explanation{
|
||||
Value: tf,
|
||||
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq),
|
||||
rv := ctx.DocumentMatchPool.Get()
|
||||
// perform any score computations only when needed
|
||||
if s.includeScore || s.options.Explain {
|
||||
var scoreExplanation *search.Explanation
|
||||
var tf float64
|
||||
if termMatch.Freq < MaxSqrtCache {
|
||||
tf = SqrtCache[int(termMatch.Freq)]
|
||||
} else {
|
||||
tf = math.Sqrt(float64(termMatch.Freq))
|
||||
}
|
||||
childrenExplanations[1] = &search.Explanation{
|
||||
Value: termMatch.Norm,
|
||||
Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID),
|
||||
}
|
||||
childrenExplanations[2] = s.idfExplanation
|
||||
scoreExplanation = &search.Explanation{
|
||||
Value: score,
|
||||
Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID),
|
||||
Children: childrenExplanations,
|
||||
}
|
||||
}
|
||||
score := tf * termMatch.Norm * s.idf
|
||||
|
||||
// if the query weight isn't 1, multiply
|
||||
if s.queryWeight != 1.0 {
|
||||
score = score * s.queryWeight
|
||||
if s.options.Explain {
|
||||
childExplanations := make([]*search.Explanation, 2)
|
||||
childExplanations[0] = s.queryWeightExplanation
|
||||
childExplanations[1] = scoreExplanation
|
||||
childrenExplanations := make([]*search.Explanation, 3)
|
||||
childrenExplanations[0] = &search.Explanation{
|
||||
Value: tf,
|
||||
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq),
|
||||
}
|
||||
childrenExplanations[1] = &search.Explanation{
|
||||
Value: termMatch.Norm,
|
||||
Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID),
|
||||
}
|
||||
childrenExplanations[2] = s.idfExplanation
|
||||
scoreExplanation = &search.Explanation{
|
||||
Value: score,
|
||||
Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID),
|
||||
Children: childExplanations,
|
||||
Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID),
|
||||
Children: childrenExplanations,
|
||||
}
|
||||
}
|
||||
|
||||
// if the query weight isn't 1, multiply
|
||||
if s.queryWeight != 1.0 {
|
||||
score = score * s.queryWeight
|
||||
if s.options.Explain {
|
||||
childExplanations := make([]*search.Explanation, 2)
|
||||
childExplanations[0] = s.queryWeightExplanation
|
||||
childExplanations[1] = scoreExplanation
|
||||
scoreExplanation = &search.Explanation{
|
||||
Value: score,
|
||||
Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID),
|
||||
Children: childExplanations,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if s.includeScore {
|
||||
rv.Score = score
|
||||
}
|
||||
|
||||
if s.options.Explain {
|
||||
rv.Expl = scoreExplanation
|
||||
}
|
||||
}
|
||||
|
||||
rv := ctx.DocumentMatchPool.Get()
|
||||
rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...)
|
||||
rv.Score = score
|
||||
if s.options.Explain {
|
||||
rv.Expl = scoreExplanation
|
||||
}
|
||||
|
||||
if len(termMatch.Vectors) > 0 {
|
||||
if cap(rv.FieldTermLocations) < len(termMatch.Vectors) {
|
||||
|
|
93
vendor/github.com/blevesearch/bleve/search/search.go
generated
vendored
93
vendor/github.com/blevesearch/bleve/search/search.go
generated
vendored
|
@ -17,8 +17,8 @@ package search
|
|||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
@ -50,6 +50,24 @@ func (ap ArrayPositions) Equals(other ArrayPositions) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
func (ap ArrayPositions) Compare(other ArrayPositions) int {
|
||||
for i, p := range ap {
|
||||
if i >= len(other) {
|
||||
return 1
|
||||
}
|
||||
if p < other[i] {
|
||||
return -1
|
||||
}
|
||||
if p > other[i] {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
if len(ap) < len(other) {
|
||||
return -1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
type Location struct {
|
||||
// Pos is the position of the term within the field, starting at 1
|
||||
Pos uint64 `json:"pos"`
|
||||
|
@ -69,6 +87,46 @@ func (l *Location) Size() int {
|
|||
|
||||
type Locations []*Location
|
||||
|
||||
func (p Locations) Len() int { return len(p) }
|
||||
func (p Locations) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
|
||||
|
||||
func (p Locations) Less(i, j int) bool {
|
||||
c := p[i].ArrayPositions.Compare(p[j].ArrayPositions)
|
||||
if c < 0 {
|
||||
return true
|
||||
}
|
||||
if c > 0 {
|
||||
return false
|
||||
}
|
||||
return p[i].Pos < p[j].Pos
|
||||
}
|
||||
|
||||
func (p Locations) Dedupe() Locations { // destructive!
|
||||
if len(p) <= 1 {
|
||||
return p
|
||||
}
|
||||
|
||||
sort.Sort(p)
|
||||
|
||||
slow := 0
|
||||
|
||||
for _, pfast := range p {
|
||||
pslow := p[slow]
|
||||
if pslow.Pos == pfast.Pos &&
|
||||
pslow.Start == pfast.Start &&
|
||||
pslow.End == pfast.End &&
|
||||
pslow.ArrayPositions.Equals(pfast.ArrayPositions) {
|
||||
continue // duplicate, so only move fast ahead
|
||||
}
|
||||
|
||||
slow++
|
||||
|
||||
p[slow] = pfast
|
||||
}
|
||||
|
||||
return p[:slow+1]
|
||||
}
|
||||
|
||||
type TermLocationMap map[string]Locations
|
||||
|
||||
func (t TermLocationMap) AddLocation(term string, location *Location) {
|
||||
|
@ -100,9 +158,6 @@ type DocumentMatch struct {
|
|||
// fields as float64s and date fields as time.RFC3339 formatted strings.
|
||||
Fields map[string]interface{} `json:"fields,omitempty"`
|
||||
|
||||
// if we load the document for this hit, remember it so we dont load again
|
||||
Document *document.Document `json:"-"`
|
||||
|
||||
// used to maintain natural index order
|
||||
HitNumber uint64 `json:"-"`
|
||||
|
||||
|
@ -195,10 +250,6 @@ func (dm *DocumentMatch) Size() int {
|
|||
size.SizeOfPtr
|
||||
}
|
||||
|
||||
if dm.Document != nil {
|
||||
sizeInBytes += dm.Document.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
|
@ -216,6 +267,7 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
|
|||
|
||||
var lastField string
|
||||
var tlm TermLocationMap
|
||||
var needsDedupe bool
|
||||
|
||||
for i, ftl := range dm.FieldTermLocations {
|
||||
if lastField != ftl.Field {
|
||||
|
@ -239,7 +291,19 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
|
|||
loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...)
|
||||
}
|
||||
|
||||
tlm[ftl.Term] = append(tlm[ftl.Term], loc)
|
||||
locs := tlm[ftl.Term]
|
||||
|
||||
// if the loc is before or at the last location, then there
|
||||
// might be duplicates that need to be deduplicated
|
||||
if !needsDedupe && len(locs) > 0 {
|
||||
last := locs[len(locs)-1]
|
||||
cmp := loc.ArrayPositions.Compare(last.ArrayPositions)
|
||||
if cmp < 0 || (cmp == 0 && loc.Pos <= last.Pos) {
|
||||
needsDedupe = true
|
||||
}
|
||||
}
|
||||
|
||||
tlm[ftl.Term] = append(locs, loc)
|
||||
|
||||
dm.FieldTermLocations[i] = FieldTermLocation{ // recycle
|
||||
Location: Location{
|
||||
|
@ -247,6 +311,14 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
|
|||
},
|
||||
}
|
||||
}
|
||||
|
||||
if needsDedupe {
|
||||
for _, tlm := range dm.Locations {
|
||||
for term, locs := range tlm {
|
||||
tlm[term] = locs.Dedupe()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle
|
||||
|
@ -280,11 +352,14 @@ type Searcher interface {
|
|||
type SearcherOptions struct {
|
||||
Explain bool
|
||||
IncludeTermVectors bool
|
||||
Score string
|
||||
}
|
||||
|
||||
// SearchContext represents the context around a single search
|
||||
type SearchContext struct {
|
||||
DocumentMatchPool *DocumentMatchPool
|
||||
Collector Collector
|
||||
IndexReader index.IndexReader
|
||||
}
|
||||
|
||||
func (sc *SearchContext) Size() int {
|
||||
|
|
90
vendor/github.com/blevesearch/bleve/search/searcher/search_boolean.go
generated
vendored
90
vendor/github.com/blevesearch/bleve/search/searcher/search_boolean.go
generated
vendored
|
@ -45,6 +45,7 @@ type BooleanSearcher struct {
|
|||
scorer *scorer.ConjunctionQueryScorer
|
||||
matches []*search.DocumentMatch
|
||||
initialized bool
|
||||
done bool
|
||||
}
|
||||
|
||||
func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) {
|
||||
|
@ -207,6 +208,10 @@ func (s *BooleanSearcher) SetQueryNorm(qnorm float64) {
|
|||
|
||||
func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
|
||||
|
||||
if s.done {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if !s.initialized {
|
||||
err := s.initSearchers(ctx)
|
||||
if err != nil {
|
||||
|
@ -319,11 +324,20 @@ func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch
|
|||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if rv == nil {
|
||||
s.done = true
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
|
||||
|
||||
if s.done {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if !s.initialized {
|
||||
err := s.initSearchers(ctx)
|
||||
if err != nil {
|
||||
|
@ -331,41 +345,51 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
|
|||
}
|
||||
}
|
||||
|
||||
var err error
|
||||
if s.mustSearcher != nil {
|
||||
if s.currMust != nil {
|
||||
ctx.DocumentMatchPool.Put(s.currMust)
|
||||
// Advance the searcher only if the cursor is trailing the lookup ID
|
||||
if s.currentID == nil || s.currentID.Compare(ID) < 0 {
|
||||
var err error
|
||||
if s.mustSearcher != nil {
|
||||
if s.currMust != nil {
|
||||
ctx.DocumentMatchPool.Put(s.currMust)
|
||||
}
|
||||
s.currMust, err = s.mustSearcher.Advance(ctx, ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
s.currMust, err = s.mustSearcher.Advance(ctx, ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if s.shouldSearcher != nil {
|
||||
if s.currShould != nil {
|
||||
ctx.DocumentMatchPool.Put(s.currShould)
|
||||
}
|
||||
s.currShould, err = s.shouldSearcher.Advance(ctx, ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if s.mustNotSearcher != nil {
|
||||
if s.currMustNot != nil {
|
||||
ctx.DocumentMatchPool.Put(s.currMustNot)
|
||||
}
|
||||
s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if s.mustSearcher != nil && s.currMust != nil {
|
||||
s.currentID = s.currMust.IndexInternalID
|
||||
} else if s.mustSearcher == nil && s.currShould != nil {
|
||||
s.currentID = s.currShould.IndexInternalID
|
||||
} else {
|
||||
s.currentID = nil
|
||||
if s.shouldSearcher != nil {
|
||||
if s.currShould != nil {
|
||||
ctx.DocumentMatchPool.Put(s.currShould)
|
||||
}
|
||||
s.currShould, err = s.shouldSearcher.Advance(ctx, ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if s.mustNotSearcher != nil {
|
||||
// Additional check for mustNotSearcher, whose cursor isn't tracked by
|
||||
// currentID to prevent it from moving when the searcher's tracked
|
||||
// position is already ahead of or at the requested ID.
|
||||
if s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0 {
|
||||
if s.currMustNot != nil {
|
||||
ctx.DocumentMatchPool.Put(s.currMustNot)
|
||||
}
|
||||
s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if s.mustSearcher != nil && s.currMust != nil {
|
||||
s.currentID = s.currMust.IndexInternalID
|
||||
} else if s.mustSearcher == nil && s.currShould != nil {
|
||||
s.currentID = s.currShould.IndexInternalID
|
||||
} else {
|
||||
s.currentID = nil
|
||||
}
|
||||
}
|
||||
|
||||
return s.Next(ctx)
|
||||
|
|
43
vendor/github.com/blevesearch/bleve/search/searcher/search_conjunction.go
generated
vendored
43
vendor/github.com/blevesearch/bleve/search/searcher/search_conjunction.go
generated
vendored
|
@ -43,14 +43,27 @@ type ConjunctionSearcher struct {
|
|||
options search.SearcherOptions
|
||||
}
|
||||
|
||||
func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, options search.SearcherOptions) (*ConjunctionSearcher, error) {
|
||||
// build the downstream searchers
|
||||
func NewConjunctionSearcher(indexReader index.IndexReader,
|
||||
qsearchers []search.Searcher, options search.SearcherOptions) (
|
||||
search.Searcher, error) {
|
||||
// build the sorted downstream searchers
|
||||
searchers := make(OrderedSearcherList, len(qsearchers))
|
||||
for i, searcher := range qsearchers {
|
||||
searchers[i] = searcher
|
||||
}
|
||||
// sort the searchers
|
||||
sort.Sort(searchers)
|
||||
|
||||
// attempt the "unadorned" conjunction optimization only when we
|
||||
// do not need extra information like freq-norm's or term vectors
|
||||
if len(searchers) > 1 &&
|
||||
options.Score == "none" && !options.IncludeTermVectors {
|
||||
rv, err := optimizeCompositeSearcher("conjunction:unadorned",
|
||||
indexReader, searchers, options)
|
||||
if err != nil || rv != nil {
|
||||
return rv, err
|
||||
}
|
||||
}
|
||||
|
||||
// build our searcher
|
||||
rv := ConjunctionSearcher{
|
||||
indexReader: indexReader,
|
||||
|
@ -63,24 +76,10 @@ func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S
|
|||
|
||||
// attempt push-down conjunction optimization when there's >1 searchers
|
||||
if len(searchers) > 1 {
|
||||
var octx index.OptimizableContext
|
||||
|
||||
for _, searcher := range searchers {
|
||||
o, ok := searcher.(index.Optimizable)
|
||||
if ok {
|
||||
var err error
|
||||
octx, err = o.Optimize("conjunction", octx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if octx != nil {
|
||||
err := octx.Finish()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv, err := optimizeCompositeSearcher("conjunction",
|
||||
indexReader, searchers, options)
|
||||
if err != nil || rv != nil {
|
||||
return rv, err
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -158,7 +157,7 @@ func (s *ConjunctionSearcher) Next(ctx *search.SearchContext) (*search.DocumentM
|
|||
var rv *search.DocumentMatch
|
||||
var err error
|
||||
OUTER:
|
||||
for s.currs[s.maxIDIdx] != nil {
|
||||
for s.maxIDIdx < len(s.currs) && s.currs[s.maxIDIdx] != nil {
|
||||
maxID := s.currs[s.maxIDIdx].IndexInternalID
|
||||
|
||||
i := 0
|
||||
|
|
54
vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction.go
generated
vendored
54
vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction.go
generated
vendored
|
@ -40,6 +40,18 @@ func NewDisjunctionSearcher(indexReader index.IndexReader,
|
|||
func newDisjunctionSearcher(indexReader index.IndexReader,
|
||||
qsearchers []search.Searcher, min float64, options search.SearcherOptions,
|
||||
limit bool) (search.Searcher, error) {
|
||||
// attempt the "unadorned" disjunction optimization only when we
|
||||
// do not need extra information like freq-norm's or term vectors
|
||||
// and the requested min is simple
|
||||
if len(qsearchers) > 1 && min <= 1 &&
|
||||
options.Score == "none" && !options.IncludeTermVectors {
|
||||
rv, err := optimizeCompositeSearcher("disjunction:unadorned",
|
||||
indexReader, qsearchers, options)
|
||||
if err != nil || rv != nil {
|
||||
return rv, err
|
||||
}
|
||||
}
|
||||
|
||||
if len(qsearchers) > DisjunctionHeapTakeover {
|
||||
return newDisjunctionHeapSearcher(indexReader, qsearchers, min, options,
|
||||
limit)
|
||||
|
@ -48,6 +60,42 @@ func newDisjunctionSearcher(indexReader index.IndexReader,
|
|||
limit)
|
||||
}
|
||||
|
||||
func optimizeCompositeSearcher(optimizationKind string,
|
||||
indexReader index.IndexReader, qsearchers []search.Searcher,
|
||||
options search.SearcherOptions) (search.Searcher, error) {
|
||||
var octx index.OptimizableContext
|
||||
|
||||
for _, searcher := range qsearchers {
|
||||
o, ok := searcher.(index.Optimizable)
|
||||
if !ok {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var err error
|
||||
octx, err = o.Optimize(optimizationKind, octx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if octx == nil {
|
||||
return nil, nil
|
||||
}
|
||||
}
|
||||
|
||||
optimized, err := octx.Finish()
|
||||
if err != nil || optimized == nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tfr, ok := optimized.(index.TermFieldReader)
|
||||
if !ok {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return newTermSearcherFromReader(indexReader, tfr,
|
||||
[]byte(optimizationKind), "*", 1.0, options)
|
||||
}
|
||||
|
||||
func tooManyClauses(count int) bool {
|
||||
if DisjunctionMaxClauseCount != 0 && count > DisjunctionMaxClauseCount {
|
||||
return true
|
||||
|
@ -55,7 +103,7 @@ func tooManyClauses(count int) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
func tooManyClausesErr() error {
|
||||
return fmt.Errorf("TooManyClauses[maxClauseCount is set to %d]",
|
||||
DisjunctionMaxClauseCount)
|
||||
func tooManyClausesErr(count int) error {
|
||||
return fmt.Errorf("TooManyClauses[%d > maxClauseCount, which is set to %d]",
|
||||
count, DisjunctionMaxClauseCount)
|
||||
}
|
||||
|
|
2
vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction_heap.go
generated
vendored
2
vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction_heap.go
generated
vendored
|
@ -62,7 +62,7 @@ func newDisjunctionHeapSearcher(indexReader index.IndexReader,
|
|||
limit bool) (
|
||||
*DisjunctionHeapSearcher, error) {
|
||||
if limit && tooManyClauses(len(searchers)) {
|
||||
return nil, tooManyClausesErr()
|
||||
return nil, tooManyClausesErr(len(searchers))
|
||||
}
|
||||
|
||||
// build our searcher
|
||||
|
|
2
vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction_slice.go
generated
vendored
2
vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction_slice.go
generated
vendored
|
@ -50,7 +50,7 @@ func newDisjunctionSliceSearcher(indexReader index.IndexReader,
|
|||
limit bool) (
|
||||
*DisjunctionSliceSearcher, error) {
|
||||
if limit && tooManyClauses(len(qsearchers)) {
|
||||
return nil, tooManyClausesErr()
|
||||
return nil, tooManyClausesErr(len(qsearchers))
|
||||
}
|
||||
// build the downstream searchers
|
||||
searchers := make(OrderedSearcherList, len(qsearchers))
|
||||
|
|
54
vendor/github.com/blevesearch/bleve/search/searcher/search_fuzzy.go
generated
vendored
54
vendor/github.com/blevesearch/bleve/search/searcher/search_fuzzy.go
generated
vendored
|
@ -31,6 +31,10 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string,
|
|||
return nil, fmt.Errorf("fuzziness exceeds max (%d)", MaxFuzziness)
|
||||
}
|
||||
|
||||
if fuzziness < 0 {
|
||||
return nil, fmt.Errorf("invalid fuzziness, negative")
|
||||
}
|
||||
|
||||
// Note: we don't byte slice the term for a prefix because of runes.
|
||||
prefixTerm := ""
|
||||
for i, r := range term {
|
||||
|
@ -53,32 +57,40 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string,
|
|||
func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
|
||||
fuzziness int, field, prefixTerm string) (rv []string, err error) {
|
||||
rv = make([]string, 0)
|
||||
|
||||
// in case of advanced reader implementations directly call
|
||||
// the levenshtein automaton based iterator to collect the
|
||||
// candidate terms
|
||||
if ir, ok := indexReader.(index.IndexReaderFuzzy); ok {
|
||||
fieldDict, err := ir.FieldDictFuzzy(field, term, fuzziness, prefixTerm)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if cerr := fieldDict.Close(); cerr != nil && err == nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
tfd, err := fieldDict.Next()
|
||||
for err == nil && tfd != nil {
|
||||
rv = append(rv, tfd.Term)
|
||||
if tooManyClauses(len(rv)) {
|
||||
return nil, tooManyClausesErr(len(rv))
|
||||
}
|
||||
tfd, err = fieldDict.Next()
|
||||
}
|
||||
return rv, err
|
||||
}
|
||||
|
||||
var fieldDict index.FieldDict
|
||||
if len(prefixTerm) > 0 {
|
||||
fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
|
||||
} else {
|
||||
// in case of advanced reader implementations directly call
|
||||
// the levenshtein automaton based iterator to collect the
|
||||
// candidate terms
|
||||
if ir, ok := indexReader.(index.IndexReaderFuzzy); ok {
|
||||
fieldDict, err = ir.FieldDictFuzzy(field, []byte(term), fuzziness)
|
||||
if err != nil {
|
||||
return rv, err
|
||||
}
|
||||
defer func() {
|
||||
if cerr := fieldDict.Close(); cerr != nil && err == nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
tfd, err := fieldDict.Next()
|
||||
for err == nil && tfd != nil {
|
||||
rv = append(rv, tfd.Term)
|
||||
tfd, err = fieldDict.Next()
|
||||
}
|
||||
return rv, err
|
||||
}
|
||||
fieldDict, err = indexReader.FieldDict(field)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if cerr := fieldDict.Close(); cerr != nil && err == nil {
|
||||
err = cerr
|
||||
|
@ -95,7 +107,7 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
|
|||
if !exceeded && ld <= fuzziness {
|
||||
rv = append(rv, tfd.Term)
|
||||
if tooManyClauses(len(rv)) {
|
||||
return rv, tooManyClausesErr()
|
||||
return nil, tooManyClausesErr(len(rv))
|
||||
}
|
||||
}
|
||||
tfd, err = fieldDict.Next()
|
||||
|
|
223
vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go
generated
vendored
223
vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go
generated
vendored
|
@ -22,6 +22,11 @@ import (
|
|||
"github.com/blevesearch/bleve/search"
|
||||
)
|
||||
|
||||
type filterFunc func(key []byte) bool
|
||||
|
||||
var GeoBitsShift1 = (geo.GeoBits << 1)
|
||||
var GeoBitsShift1Minus1 = GeoBitsShift1 - 1
|
||||
|
||||
func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
|
||||
maxLon, maxLat float64, field string, boost float64,
|
||||
options search.SearcherOptions, checkBoundaries bool) (
|
||||
|
@ -36,10 +41,18 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
|
|||
}
|
||||
|
||||
// do math to produce list of terms needed for this search
|
||||
onBoundaryTerms, notOnBoundaryTerms := ComputeGeoRange(0, (geo.GeoBits<<1)-1,
|
||||
minLon, minLat, maxLon, maxLat, checkBoundaries)
|
||||
onBoundaryTerms, notOnBoundaryTerms, err := ComputeGeoRange(0, GeoBitsShift1Minus1,
|
||||
minLon, minLat, maxLon, maxLat, checkBoundaries, indexReader, field)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var onBoundarySearcher search.Searcher
|
||||
dvReader, err := indexReader.DocValueReader([]string{field})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(onBoundaryTerms) > 0 {
|
||||
rawOnBoundarySearcher, err := NewMultiTermSearcherBytes(indexReader,
|
||||
onBoundaryTerms, field, boost, options, false)
|
||||
|
@ -48,7 +61,7 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
|
|||
}
|
||||
// add filter to check points near the boundary
|
||||
onBoundarySearcher = NewFilteringSearcher(rawOnBoundarySearcher,
|
||||
buildRectFilter(indexReader, field, minLon, minLat, maxLon, maxLat))
|
||||
buildRectFilter(dvReader, field, minLon, minLat, maxLon, maxLat))
|
||||
openedSearchers = append(openedSearchers, onBoundarySearcher)
|
||||
}
|
||||
|
||||
|
@ -89,84 +102,152 @@ var geoMaxShift = document.GeoPrecisionStep * 4
|
|||
var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2
|
||||
|
||||
func ComputeGeoRange(term uint64, shift uint,
|
||||
sminLon, sminLat, smaxLon, smaxLat float64,
|
||||
checkBoundaries bool) (
|
||||
onBoundary [][]byte, notOnBoundary [][]byte) {
|
||||
split := term | uint64(0x1)<<shift
|
||||
var upperMax uint64
|
||||
if shift < 63 {
|
||||
upperMax = term | ((uint64(1) << (shift + 1)) - 1)
|
||||
} else {
|
||||
upperMax = 0xffffffffffffffff
|
||||
}
|
||||
lowerMax := split - 1
|
||||
onBoundary, notOnBoundary = relateAndRecurse(term, lowerMax, shift,
|
||||
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
|
||||
plusOnBoundary, plusNotOnBoundary := relateAndRecurse(split, upperMax, shift,
|
||||
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
|
||||
onBoundary = append(onBoundary, plusOnBoundary...)
|
||||
notOnBoundary = append(notOnBoundary, plusNotOnBoundary...)
|
||||
return
|
||||
}
|
||||
sminLon, sminLat, smaxLon, smaxLat float64, checkBoundaries bool,
|
||||
indexReader index.IndexReader, field string) (
|
||||
onBoundary [][]byte, notOnBoundary [][]byte, err error) {
|
||||
preallocBytesLen := 32
|
||||
preallocBytes := make([]byte, preallocBytesLen)
|
||||
|
||||
func relateAndRecurse(start, end uint64, res uint,
|
||||
sminLon, sminLat, smaxLon, smaxLat float64,
|
||||
checkBoundaries bool) (
|
||||
onBoundary [][]byte, notOnBoundary [][]byte) {
|
||||
minLon := geo.MortonUnhashLon(start)
|
||||
minLat := geo.MortonUnhashLat(start)
|
||||
maxLon := geo.MortonUnhashLon(end)
|
||||
maxLat := geo.MortonUnhashLat(end)
|
||||
|
||||
level := ((geo.GeoBits << 1) - res) >> 1
|
||||
|
||||
within := res%document.GeoPrecisionStep == 0 &&
|
||||
geo.RectWithin(minLon, minLat, maxLon, maxLat,
|
||||
sminLon, sminLat, smaxLon, smaxLat)
|
||||
if within || (level == geoDetailLevel &&
|
||||
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
|
||||
sminLon, sminLat, smaxLon, smaxLat)) {
|
||||
if !within && checkBoundaries {
|
||||
return [][]byte{
|
||||
numeric.MustNewPrefixCodedInt64(int64(start), res),
|
||||
}, nil
|
||||
makePrefixCoded := func(in int64, shift uint) (rv numeric.PrefixCoded) {
|
||||
if len(preallocBytes) <= 0 {
|
||||
preallocBytesLen = preallocBytesLen * 2
|
||||
preallocBytes = make([]byte, preallocBytesLen)
|
||||
}
|
||||
return nil,
|
||||
[][]byte{
|
||||
numeric.MustNewPrefixCodedInt64(int64(start), res),
|
||||
}
|
||||
} else if level < geoDetailLevel &&
|
||||
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
|
||||
sminLon, sminLat, smaxLon, smaxLat) {
|
||||
return ComputeGeoRange(start, res-1, sminLon, sminLat, smaxLon, smaxLat,
|
||||
checkBoundaries)
|
||||
|
||||
rv, preallocBytes, err =
|
||||
numeric.NewPrefixCodedInt64Prealloc(in, shift, preallocBytes)
|
||||
|
||||
return rv
|
||||
}
|
||||
return nil, nil
|
||||
|
||||
var fieldDict index.FieldDictContains
|
||||
var isIndexed filterFunc
|
||||
if irr, ok := indexReader.(index.IndexReaderContains); ok {
|
||||
fieldDict, err = irr.FieldDictContains(field)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
isIndexed = func(term []byte) bool {
|
||||
found, err := fieldDict.Contains(term)
|
||||
return err == nil && found
|
||||
}
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if fieldDict != nil {
|
||||
if fd, ok := fieldDict.(index.FieldDict); ok {
|
||||
cerr := fd.Close()
|
||||
if cerr != nil {
|
||||
err = cerr
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
if isIndexed == nil {
|
||||
isIndexed = func(term []byte) bool {
|
||||
if indexReader != nil {
|
||||
reader, err := indexReader.TermFieldReader(term, field, false, false, false)
|
||||
if err != nil || reader == nil {
|
||||
return false
|
||||
}
|
||||
if reader.Count() == 0 {
|
||||
_ = reader.Close()
|
||||
return false
|
||||
}
|
||||
_ = reader.Close()
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
var computeGeoRange func(term uint64, shift uint) // declare for recursion
|
||||
|
||||
relateAndRecurse := func(start, end uint64, res, level uint) {
|
||||
minLon := geo.MortonUnhashLon(start)
|
||||
minLat := geo.MortonUnhashLat(start)
|
||||
maxLon := geo.MortonUnhashLon(end)
|
||||
maxLat := geo.MortonUnhashLat(end)
|
||||
|
||||
within := res%document.GeoPrecisionStep == 0 &&
|
||||
geo.RectWithin(minLon, minLat, maxLon, maxLat,
|
||||
sminLon, sminLat, smaxLon, smaxLat)
|
||||
if within || (level == geoDetailLevel &&
|
||||
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
|
||||
sminLon, sminLat, smaxLon, smaxLat)) {
|
||||
codedTerm := makePrefixCoded(int64(start), res)
|
||||
if isIndexed(codedTerm) {
|
||||
if !within && checkBoundaries {
|
||||
onBoundary = append(onBoundary, codedTerm)
|
||||
} else {
|
||||
notOnBoundary = append(notOnBoundary, codedTerm)
|
||||
}
|
||||
}
|
||||
} else if level < geoDetailLevel &&
|
||||
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
|
||||
sminLon, sminLat, smaxLon, smaxLat) {
|
||||
computeGeoRange(start, res-1)
|
||||
}
|
||||
}
|
||||
|
||||
computeGeoRange = func(term uint64, shift uint) {
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
split := term | uint64(0x1)<<shift
|
||||
var upperMax uint64
|
||||
if shift < 63 {
|
||||
upperMax = term | ((uint64(1) << (shift + 1)) - 1)
|
||||
} else {
|
||||
upperMax = 0xffffffffffffffff
|
||||
}
|
||||
|
||||
lowerMax := split - 1
|
||||
|
||||
level := (GeoBitsShift1 - shift) >> 1
|
||||
|
||||
relateAndRecurse(term, lowerMax, shift, level)
|
||||
relateAndRecurse(split, upperMax, shift, level)
|
||||
}
|
||||
|
||||
computeGeoRange(term, shift)
|
||||
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
return onBoundary, notOnBoundary, err
|
||||
}
|
||||
|
||||
func buildRectFilter(indexReader index.IndexReader, field string,
|
||||
func buildRectFilter(dvReader index.DocValueReader, field string,
|
||||
minLon, minLat, maxLon, maxLat float64) FilterFunc {
|
||||
return func(d *search.DocumentMatch) bool {
|
||||
var lon, lat float64
|
||||
// check geo matches against all numeric type terms indexed
|
||||
var lons, lats []float64
|
||||
var found bool
|
||||
err := indexReader.DocumentVisitFieldTerms(d.IndexInternalID,
|
||||
[]string{field}, func(field string, term []byte) {
|
||||
// only consider the values which are shifted 0
|
||||
prefixCoded := numeric.PrefixCoded(term)
|
||||
shift, err := prefixCoded.Shift()
|
||||
if err == nil && shift == 0 {
|
||||
var i64 int64
|
||||
i64, err = prefixCoded.Int64()
|
||||
if err == nil {
|
||||
lon = geo.MortonUnhashLon(uint64(i64))
|
||||
lat = geo.MortonUnhashLat(uint64(i64))
|
||||
found = true
|
||||
}
|
||||
err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
|
||||
// only consider the values which are shifted 0
|
||||
prefixCoded := numeric.PrefixCoded(term)
|
||||
shift, err := prefixCoded.Shift()
|
||||
if err == nil && shift == 0 {
|
||||
var i64 int64
|
||||
i64, err = prefixCoded.Int64()
|
||||
if err == nil {
|
||||
lons = append(lons, geo.MortonUnhashLon(uint64(i64)))
|
||||
lats = append(lats, geo.MortonUnhashLat(uint64(i64)))
|
||||
found = true
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
if err == nil && found {
|
||||
return geo.BoundingBoxContains(lon, lat,
|
||||
minLon, minLat, maxLon, maxLat)
|
||||
for i := range lons {
|
||||
if geo.BoundingBoxContains(lons[i], lats[i],
|
||||
minLon, minLat, maxLon, maxLat) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
|
59
vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go
generated
vendored
59
vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go
generated
vendored
|
@ -34,14 +34,19 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
|
|||
// build a searcher for the box
|
||||
boxSearcher, err := boxSearcher(indexReader,
|
||||
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat,
|
||||
field, boost, options)
|
||||
field, boost, options, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
dvReader, err := indexReader.DocValueReader([]string{field})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// wrap it in a filtering searcher which checks the actual distance
|
||||
return NewFilteringSearcher(boxSearcher,
|
||||
buildDistFilter(indexReader, field, centerLon, centerLat, dist)), nil
|
||||
buildDistFilter(dvReader, field, centerLon, centerLat, dist)), nil
|
||||
}
|
||||
|
||||
// boxSearcher builds a searcher for the described bounding box
|
||||
|
@ -49,19 +54,20 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
|
|||
// two boxes joined through a disjunction searcher
|
||||
func boxSearcher(indexReader index.IndexReader,
|
||||
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64,
|
||||
field string, boost float64, options search.SearcherOptions) (
|
||||
field string, boost float64, options search.SearcherOptions, checkBoundaries bool) (
|
||||
search.Searcher, error) {
|
||||
if bottomRightLon < topLeftLon {
|
||||
// cross date line, rewrite as two parts
|
||||
|
||||
leftSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
|
||||
-180, bottomRightLat, bottomRightLon, topLeftLat,
|
||||
field, boost, options, false)
|
||||
field, boost, options, checkBoundaries)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rightSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
|
||||
topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options, false)
|
||||
topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options,
|
||||
checkBoundaries)
|
||||
if err != nil {
|
||||
_ = leftSearcher.Close()
|
||||
return nil, err
|
||||
|
@ -77,39 +83,42 @@ func boxSearcher(indexReader index.IndexReader,
|
|||
return boxSearcher, nil
|
||||
}
|
||||
|
||||
// build geoboundinggox searcher for that bounding box
|
||||
// build geoboundingbox searcher for that bounding box
|
||||
boxSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
|
||||
topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost,
|
||||
options, false)
|
||||
options, checkBoundaries)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return boxSearcher, nil
|
||||
}
|
||||
|
||||
func buildDistFilter(indexReader index.IndexReader, field string,
|
||||
func buildDistFilter(dvReader index.DocValueReader, field string,
|
||||
centerLon, centerLat, maxDist float64) FilterFunc {
|
||||
return func(d *search.DocumentMatch) bool {
|
||||
var lon, lat float64
|
||||
// check geo matches against all numeric type terms indexed
|
||||
var lons, lats []float64
|
||||
var found bool
|
||||
err := indexReader.DocumentVisitFieldTerms(d.IndexInternalID,
|
||||
[]string{field}, func(field string, term []byte) {
|
||||
// only consider the values which are shifted 0
|
||||
prefixCoded := numeric.PrefixCoded(term)
|
||||
shift, err := prefixCoded.Shift()
|
||||
if err == nil && shift == 0 {
|
||||
i64, err := prefixCoded.Int64()
|
||||
if err == nil {
|
||||
lon = geo.MortonUnhashLon(uint64(i64))
|
||||
lat = geo.MortonUnhashLat(uint64(i64))
|
||||
found = true
|
||||
}
|
||||
|
||||
err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
|
||||
// only consider the values which are shifted 0
|
||||
prefixCoded := numeric.PrefixCoded(term)
|
||||
shift, err := prefixCoded.Shift()
|
||||
if err == nil && shift == 0 {
|
||||
i64, err := prefixCoded.Int64()
|
||||
if err == nil {
|
||||
lons = append(lons, geo.MortonUnhashLon(uint64(i64)))
|
||||
lats = append(lats, geo.MortonUnhashLat(uint64(i64)))
|
||||
found = true
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
if err == nil && found {
|
||||
dist := geo.Haversin(lon, lat, centerLon, centerLat)
|
||||
if dist <= maxDist/1000 {
|
||||
return true
|
||||
for i := range lons {
|
||||
dist := geo.Haversin(lons[i], lats[i], centerLon, centerLat)
|
||||
if dist <= maxDist/1000 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
|
|
126
vendor/github.com/blevesearch/bleve/search/searcher/search_geopolygon.go
generated
vendored
Normal file
126
vendor/github.com/blevesearch/bleve/search/searcher/search_geopolygon.go
generated
vendored
Normal file
|
@ -0,0 +1,126 @@
|
|||
// Copyright (c) 2019 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package searcher
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/blevesearch/bleve/geo"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/numeric"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"math"
|
||||
)
|
||||
|
||||
func NewGeoBoundedPolygonSearcher(indexReader index.IndexReader,
|
||||
polygon []geo.Point, field string, boost float64,
|
||||
options search.SearcherOptions) (search.Searcher, error) {
|
||||
|
||||
if len(polygon) < 3 {
|
||||
return nil, fmt.Errorf("Too few points specified for the polygon boundary")
|
||||
}
|
||||
|
||||
// compute the bounding box enclosing the polygon
|
||||
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, err :=
|
||||
geo.BoundingRectangleForPolygon(polygon)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// build a searcher for the bounding box on the polygon
|
||||
boxSearcher, err := boxSearcher(indexReader,
|
||||
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat,
|
||||
field, boost, options, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
dvReader, err := indexReader.DocValueReader([]string{field})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// wrap it in a filtering searcher that checks for the polygon inclusivity
|
||||
return NewFilteringSearcher(boxSearcher,
|
||||
buildPolygonFilter(dvReader, field, polygon)), nil
|
||||
}
|
||||
|
||||
const float64EqualityThreshold = 1e-6
|
||||
|
||||
func almostEqual(a, b float64) bool {
|
||||
return math.Abs(a-b) <= float64EqualityThreshold
|
||||
}
|
||||
|
||||
// buildPolygonFilter returns true if the point lies inside the
|
||||
// polygon. It is based on the ray-casting technique as referred
|
||||
// here: https://wrf.ecse.rpi.edu/nikola/pubdetails/pnpoly.html
|
||||
func buildPolygonFilter(dvReader index.DocValueReader, field string,
|
||||
polygon []geo.Point) FilterFunc {
|
||||
return func(d *search.DocumentMatch) bool {
|
||||
// check geo matches against all numeric type terms indexed
|
||||
var lons, lats []float64
|
||||
var found bool
|
||||
|
||||
err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
|
||||
// only consider the values which are shifted 0
|
||||
prefixCoded := numeric.PrefixCoded(term)
|
||||
shift, err := prefixCoded.Shift()
|
||||
if err == nil && shift == 0 {
|
||||
i64, err := prefixCoded.Int64()
|
||||
if err == nil {
|
||||
lons = append(lons, geo.MortonUnhashLon(uint64(i64)))
|
||||
lats = append(lats, geo.MortonUnhashLat(uint64(i64)))
|
||||
found = true
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
// Note: this approach works for points which are strictly inside
|
||||
// the polygon. ie it might fail for certain points on the polygon boundaries.
|
||||
if err == nil && found {
|
||||
nVertices := len(polygon)
|
||||
if len(polygon) < 3 {
|
||||
return false
|
||||
}
|
||||
rayIntersectsSegment := func(point, a, b geo.Point) bool {
|
||||
return (a.Lat > point.Lat) != (b.Lat > point.Lat) &&
|
||||
point.Lon < (b.Lon-a.Lon)*(point.Lat-a.Lat)/(b.Lat-a.Lat)+a.Lon
|
||||
}
|
||||
|
||||
for i := range lons {
|
||||
pt := geo.Point{Lon: lons[i], Lat: lats[i]}
|
||||
inside := rayIntersectsSegment(pt, polygon[len(polygon)-1], polygon[0])
|
||||
// check for a direct vertex match
|
||||
if almostEqual(polygon[0].Lat, lats[i]) &&
|
||||
almostEqual(polygon[0].Lon, lons[i]) {
|
||||
return true
|
||||
}
|
||||
|
||||
for j := 1; j < nVertices; j++ {
|
||||
if almostEqual(polygon[j].Lat, lats[i]) &&
|
||||
almostEqual(polygon[j].Lon, lons[i]) {
|
||||
return true
|
||||
}
|
||||
if rayIntersectsSegment(pt, polygon[j-1], polygon[j]) {
|
||||
inside = !inside
|
||||
}
|
||||
}
|
||||
if inside {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
8
vendor/github.com/blevesearch/bleve/search/searcher/search_multi_term.go
generated
vendored
8
vendor/github.com/blevesearch/bleve/search/searcher/search_multi_term.go
generated
vendored
|
@ -22,6 +22,10 @@ import (
|
|||
func NewMultiTermSearcher(indexReader index.IndexReader, terms []string,
|
||||
field string, boost float64, options search.SearcherOptions, limit bool) (
|
||||
search.Searcher, error) {
|
||||
if limit && tooManyClauses(len(terms)) {
|
||||
return nil, tooManyClausesErr(len(terms))
|
||||
}
|
||||
|
||||
qsearchers := make([]search.Searcher, len(terms))
|
||||
qsearchersClose := func() {
|
||||
for _, searcher := range qsearchers {
|
||||
|
@ -46,6 +50,10 @@ func NewMultiTermSearcher(indexReader index.IndexReader, terms []string,
|
|||
func NewMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte,
|
||||
field string, boost float64, options search.SearcherOptions, limit bool) (
|
||||
search.Searcher, error) {
|
||||
if limit && tooManyClauses(len(terms)) {
|
||||
return nil, tooManyClausesErr(len(terms))
|
||||
}
|
||||
|
||||
qsearchers := make([]search.Searcher, len(terms))
|
||||
qsearchersClose := func() {
|
||||
for _, searcher := range qsearchers {
|
||||
|
|
55
vendor/github.com/blevesearch/bleve/search/searcher/search_numeric_range.go
generated
vendored
55
vendor/github.com/blevesearch/bleve/search/searcher/search_numeric_range.go
generated
vendored
|
@ -53,22 +53,51 @@ func NewNumericRangeSearcher(indexReader index.IndexReader,
|
|||
if !*inclusiveMax && maxInt64 != math.MinInt64 {
|
||||
maxInt64--
|
||||
}
|
||||
|
||||
var fieldDict index.FieldDictContains
|
||||
var isIndexed filterFunc
|
||||
var err error
|
||||
if irr, ok := indexReader.(index.IndexReaderContains); ok {
|
||||
fieldDict, err = irr.FieldDictContains(field)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
isIndexed = func(term []byte) bool {
|
||||
found, err := fieldDict.Contains(term)
|
||||
return err == nil && found
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME hard-coded precision, should match field declaration
|
||||
termRanges := splitInt64Range(minInt64, maxInt64, 4)
|
||||
terms := termRanges.Enumerate()
|
||||
terms := termRanges.Enumerate(isIndexed)
|
||||
if fieldDict != nil {
|
||||
if fd, ok := fieldDict.(index.FieldDict); ok {
|
||||
cerr := fd.Close()
|
||||
if cerr != nil {
|
||||
err = cerr
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(terms) < 1 {
|
||||
// cannot return MatchNoneSearcher because of interaction with
|
||||
// commit f391b991c20f02681bacd197afc6d8aed444e132
|
||||
return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options,
|
||||
true)
|
||||
}
|
||||
var err error
|
||||
terms, err = filterCandidateTerms(indexReader, terms, field)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
||||
// for upside_down
|
||||
if isIndexed == nil {
|
||||
terms, err = filterCandidateTerms(indexReader, terms, field)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if tooManyClauses(len(terms)) {
|
||||
return nil, tooManyClausesErr()
|
||||
return nil, tooManyClausesErr(len(terms))
|
||||
}
|
||||
|
||||
return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options,
|
||||
|
@ -125,11 +154,17 @@ type termRange struct {
|
|||
endTerm []byte
|
||||
}
|
||||
|
||||
func (t *termRange) Enumerate() [][]byte {
|
||||
func (t *termRange) Enumerate(filter filterFunc) [][]byte {
|
||||
var rv [][]byte
|
||||
next := t.startTerm
|
||||
for bytes.Compare(next, t.endTerm) <= 0 {
|
||||
rv = append(rv, next)
|
||||
if filter != nil {
|
||||
if filter(next) {
|
||||
rv = append(rv, next)
|
||||
}
|
||||
} else {
|
||||
rv = append(rv, next)
|
||||
}
|
||||
next = incrementBytes(next)
|
||||
}
|
||||
return rv
|
||||
|
@ -150,10 +185,10 @@ func incrementBytes(in []byte) []byte {
|
|||
|
||||
type termRanges []*termRange
|
||||
|
||||
func (tr termRanges) Enumerate() [][]byte {
|
||||
func (tr termRanges) Enumerate(filter filterFunc) [][]byte {
|
||||
var rv [][]byte
|
||||
for _, tri := range tr {
|
||||
trie := tri.Enumerate()
|
||||
trie := tri.Enumerate(filter)
|
||||
rv = append(rv, trie...)
|
||||
}
|
||||
return rv
|
||||
|
|
6
vendor/github.com/blevesearch/bleve/search/searcher/search_phrase.go
generated
vendored
6
vendor/github.com/blevesearch/bleve/search/searcher/search_phrase.go
generated
vendored
|
@ -32,7 +32,7 @@ func init() {
|
|||
}
|
||||
|
||||
type PhraseSearcher struct {
|
||||
mustSearcher *ConjunctionSearcher
|
||||
mustSearcher search.Searcher
|
||||
queryNorm float64
|
||||
currMust *search.DocumentMatch
|
||||
terms [][]string
|
||||
|
@ -210,7 +210,7 @@ func (s *PhraseSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch,
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
// checkCurrMustMatch is soley concerned with determining if the DocumentMatch
|
||||
// checkCurrMustMatch is solely concerned with determining if the DocumentMatch
|
||||
// pointed to by s.currMust (which satisifies the pre-condition searcher)
|
||||
// also satisfies the phase constraints. if so, it returns a DocumentMatch
|
||||
// for this document, otherwise nil
|
||||
|
@ -241,7 +241,7 @@ func (s *PhraseSearcher) checkCurrMustMatch(ctx *search.SearchContext) *search.D
|
|||
return nil
|
||||
}
|
||||
|
||||
// checkCurrMustMatchField is soley concerned with determining if one
|
||||
// checkCurrMustMatchField is solely concerned with determining if one
|
||||
// particular field within the currMust DocumentMatch Locations
|
||||
// satisfies the phase constraints (possibly more than once). if so,
|
||||
// the matching field term locations are appended to the provided
|
||||
|
|
83
vendor/github.com/blevesearch/bleve/search/searcher/search_regexp.go
generated
vendored
83
vendor/github.com/blevesearch/bleve/search/searcher/search_regexp.go
generated
vendored
|
@ -21,48 +21,67 @@ import (
|
|||
"github.com/blevesearch/bleve/search"
|
||||
)
|
||||
|
||||
// NewRegexpStringSearcher is similar to NewRegexpSearcher, but
|
||||
// additionally optimizes for index readers that handle regexp's.
|
||||
func NewRegexpStringSearcher(indexReader index.IndexReader, pattern string,
|
||||
field string, boost float64, options search.SearcherOptions) (
|
||||
search.Searcher, error) {
|
||||
ir, ok := indexReader.(index.IndexReaderRegexp)
|
||||
if !ok {
|
||||
r, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return NewRegexpSearcher(indexReader, r, field, boost, options)
|
||||
}
|
||||
|
||||
fieldDict, err := ir.FieldDictRegexp(field, pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if cerr := fieldDict.Close(); cerr != nil && err == nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
var candidateTerms []string
|
||||
|
||||
tfd, err := fieldDict.Next()
|
||||
for err == nil && tfd != nil {
|
||||
candidateTerms = append(candidateTerms, tfd.Term)
|
||||
tfd, err = fieldDict.Next()
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return NewMultiTermSearcher(indexReader, candidateTerms, field, boost,
|
||||
options, true)
|
||||
}
|
||||
|
||||
// NewRegexpSearcher creates a searcher which will match documents that
|
||||
// contain terms which match the pattern regexp. The match must be EXACT
|
||||
// matching the entire term. The provided regexp SHOULD NOT start with ^
|
||||
// or end with $ as this can intefere with the implementation. Separately,
|
||||
// matches will be checked to ensure they match the entire term.
|
||||
func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp,
|
||||
func NewRegexpSearcher(indexReader index.IndexReader, pattern index.Regexp,
|
||||
field string, boost float64, options search.SearcherOptions) (
|
||||
search.Searcher, error) {
|
||||
var candidateTerms []string
|
||||
if ir, ok := indexReader.(index.IndexReaderRegexp); ok {
|
||||
fieldDict, err := ir.FieldDictRegexp(field, []byte(pattern.String()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if cerr := fieldDict.Close(); cerr != nil && err == nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
// enumerate the terms and check against regexp
|
||||
tfd, err := fieldDict.Next()
|
||||
for err == nil && tfd != nil {
|
||||
candidateTerms = append(candidateTerms, tfd.Term)
|
||||
tfd, err = fieldDict.Next()
|
||||
}
|
||||
prefixTerm, complete := pattern.LiteralPrefix()
|
||||
if complete {
|
||||
// there is no pattern
|
||||
candidateTerms = []string{prefixTerm}
|
||||
} else {
|
||||
var err error
|
||||
candidateTerms, err = findRegexpCandidateTerms(indexReader, pattern, field,
|
||||
prefixTerm)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
prefixTerm, complete := pattern.LiteralPrefix()
|
||||
if complete {
|
||||
// there is no pattern
|
||||
candidateTerms = []string{prefixTerm}
|
||||
} else {
|
||||
var err error
|
||||
candidateTerms, err = findRegexpCandidateTerms(indexReader, pattern, field,
|
||||
prefixTerm)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NewMultiTermSearcher(indexReader, candidateTerms, field, boost,
|
||||
|
@ -70,7 +89,7 @@ func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp,
|
|||
}
|
||||
|
||||
func findRegexpCandidateTerms(indexReader index.IndexReader,
|
||||
pattern *regexp.Regexp, field, prefixTerm string) (rv []string, err error) {
|
||||
pattern index.Regexp, field, prefixTerm string) (rv []string, err error) {
|
||||
rv = make([]string, 0)
|
||||
var fieldDict index.FieldDict
|
||||
if len(prefixTerm) > 0 {
|
||||
|
@ -91,7 +110,7 @@ func findRegexpCandidateTerms(indexReader index.IndexReader,
|
|||
if matchPos != nil && matchPos[0] == 0 && matchPos[1] == len(tfd.Term) {
|
||||
rv = append(rv, tfd.Term)
|
||||
if tooManyClauses(len(rv)) {
|
||||
return rv, tooManyClausesErr()
|
||||
return rv, tooManyClausesErr(len(rv))
|
||||
}
|
||||
}
|
||||
tfd, err = fieldDict.Next()
|
||||
|
|
24
vendor/github.com/blevesearch/bleve/search/searcher/search_term.go
generated
vendored
24
vendor/github.com/blevesearch/bleve/search/searcher/search_term.go
generated
vendored
|
@ -38,28 +38,20 @@ type TermSearcher struct {
|
|||
}
|
||||
|
||||
func NewTermSearcher(indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
|
||||
reader, err := indexReader.TermFieldReader([]byte(term), field, true, true, options.IncludeTermVectors)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
count, err := indexReader.DocCount()
|
||||
if err != nil {
|
||||
_ = reader.Close()
|
||||
return nil, err
|
||||
}
|
||||
scorer := scorer.NewTermQueryScorer([]byte(term), field, boost, count, reader.Count(), options)
|
||||
return &TermSearcher{
|
||||
indexReader: indexReader,
|
||||
reader: reader,
|
||||
scorer: scorer,
|
||||
}, nil
|
||||
return NewTermSearcherBytes(indexReader, []byte(term), field, boost, options)
|
||||
}
|
||||
|
||||
func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
|
||||
reader, err := indexReader.TermFieldReader(term, field, true, true, options.IncludeTermVectors)
|
||||
needFreqNorm := options.Score != "none"
|
||||
reader, err := indexReader.TermFieldReader(term, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return newTermSearcherFromReader(indexReader, reader, term, field, boost, options)
|
||||
}
|
||||
|
||||
func newTermSearcherFromReader(indexReader index.IndexReader, reader index.TermFieldReader,
|
||||
term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
|
||||
count, err := indexReader.DocCount()
|
||||
if err != nil {
|
||||
_ = reader.Close()
|
||||
|
|
11
vendor/github.com/blevesearch/bleve/search/searcher/search_term_prefix.go
generated
vendored
11
vendor/github.com/blevesearch/bleve/search/searcher/search_term_prefix.go
generated
vendored
|
@ -27,13 +27,24 @@ func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string,
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if cerr := fieldDict.Close(); cerr != nil && err == nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
var terms []string
|
||||
tfd, err := fieldDict.Next()
|
||||
for err == nil && tfd != nil {
|
||||
terms = append(terms, tfd.Term)
|
||||
if tooManyClauses(len(terms)) {
|
||||
return nil, tooManyClausesErr(len(terms))
|
||||
}
|
||||
tfd, err = fieldDict.Next()
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return NewMultiTermSearcher(indexReader, terms, field, boost, options, true)
|
||||
}
|
||||
|
|
6
vendor/github.com/blevesearch/bleve/search/searcher/search_term_range.go
generated
vendored
6
vendor/github.com/blevesearch/bleve/search/searcher/search_term_range.go
generated
vendored
|
@ -48,6 +48,12 @@ func NewTermRangeSearcher(indexReader index.IndexReader,
|
|||
return nil, err
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if cerr := fieldDict.Close(); cerr != nil && err == nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
var terms []string
|
||||
tfd, err := fieldDict.Next()
|
||||
for err == nil && tfd != nil {
|
||||
|
|
29
vendor/github.com/blevesearch/bleve/search/sort.go
generated
vendored
29
vendor/github.com/blevesearch/bleve/search/sort.go
generated
vendored
|
@ -38,6 +38,8 @@ type SearchSort interface {
|
|||
RequiresScoring() bool
|
||||
RequiresFields() []string
|
||||
|
||||
Reverse()
|
||||
|
||||
Copy() SearchSort
|
||||
}
|
||||
|
||||
|
@ -293,6 +295,12 @@ func (so SortOrder) CacheDescending() []bool {
|
|||
return rv
|
||||
}
|
||||
|
||||
func (so SortOrder) Reverse() {
|
||||
for _, soi := range so {
|
||||
soi.Reverse()
|
||||
}
|
||||
}
|
||||
|
||||
// SortFieldType lets you control some internal sort behavior
|
||||
// normally leaving this to the zero-value of SortFieldAuto is fine
|
||||
type SortFieldType int
|
||||
|
@ -492,6 +500,15 @@ func (s *SortField) Copy() SearchSort {
|
|||
return &rv
|
||||
}
|
||||
|
||||
func (s *SortField) Reverse() {
|
||||
s.Desc = !s.Desc
|
||||
if s.Missing == SortFieldMissingFirst {
|
||||
s.Missing = SortFieldMissingLast
|
||||
} else {
|
||||
s.Missing = SortFieldMissingFirst
|
||||
}
|
||||
}
|
||||
|
||||
// SortDocID will sort results by the document identifier
|
||||
type SortDocID struct {
|
||||
Desc bool
|
||||
|
@ -533,6 +550,10 @@ func (s *SortDocID) Copy() SearchSort {
|
|||
return &rv
|
||||
}
|
||||
|
||||
func (s *SortDocID) Reverse() {
|
||||
s.Desc = !s.Desc
|
||||
}
|
||||
|
||||
// SortScore will sort results by the document match score
|
||||
type SortScore struct {
|
||||
Desc bool
|
||||
|
@ -574,6 +595,10 @@ func (s *SortScore) Copy() SearchSort {
|
|||
return &rv
|
||||
}
|
||||
|
||||
func (s *SortScore) Reverse() {
|
||||
s.Desc = !s.Desc
|
||||
}
|
||||
|
||||
var maxDistance = string(numeric.MustNewPrefixCodedInt64(math.MaxInt64, 0))
|
||||
|
||||
// NewSortGeoDistance creates SearchSort instance for sorting documents by
|
||||
|
@ -705,6 +730,10 @@ func (s *SortGeoDistance) Copy() SearchSort {
|
|||
return &rv
|
||||
}
|
||||
|
||||
func (s *SortGeoDistance) Reverse() {
|
||||
s.Desc = !s.Desc
|
||||
}
|
||||
|
||||
type BytesSlice [][]byte
|
||||
|
||||
func (p BytesSlice) Len() int { return len(p) }
|
||||
|
|
3
vendor/github.com/blevesearch/go-porterstemmer/go.mod
generated
vendored
Normal file
3
vendor/github.com/blevesearch/go-porterstemmer/go.mod
generated
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
module github.com/blevesearch/go-porterstemmer
|
||||
|
||||
go 1.13
|
10
vendor/github.com/blevesearch/mmap-go/.gitignore
generated
vendored
Normal file
10
vendor/github.com/blevesearch/mmap-go/.gitignore
generated
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
*.out
|
||||
*.5
|
||||
*.6
|
||||
*.8
|
||||
*.swp
|
||||
_obj
|
||||
_test
|
||||
testdata
|
||||
/.idea
|
||||
*.iml
|
16
vendor/github.com/blevesearch/mmap-go/.travis.yml
generated
vendored
Normal file
16
vendor/github.com/blevesearch/mmap-go/.travis.yml
generated
vendored
Normal file
|
@ -0,0 +1,16 @@
|
|||
language: go
|
||||
os:
|
||||
- linux
|
||||
- osx
|
||||
- windows
|
||||
go:
|
||||
- 1.11.4
|
||||
env:
|
||||
global:
|
||||
- GO111MODULE=on
|
||||
install:
|
||||
- go mod download
|
||||
- go get github.com/mattn/goveralls
|
||||
script:
|
||||
- go test -v -covermode=count -coverprofile=coverage.out -bench . -cpu 1,4
|
||||
- '[ "${TRAVIS_PULL_REQUEST}" = "false" ] && $HOME/gopath/bin/goveralls -coverprofile=coverage.out -service=travis-ci -repotoken $COVERALLS_TOKEN || true'
|
25
vendor/github.com/blevesearch/mmap-go/LICENSE
generated
vendored
Normal file
25
vendor/github.com/blevesearch/mmap-go/LICENSE
generated
vendored
Normal file
|
@ -0,0 +1,25 @@
|
|||
Copyright (c) 2011, Evan Shaw <edsrzf@gmail.com>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
12
vendor/github.com/blevesearch/mmap-go/README.md
generated
vendored
Normal file
12
vendor/github.com/blevesearch/mmap-go/README.md
generated
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
mmap-go
|
||||
=======
|
||||
|
||||
mmap-go is a portable mmap package for the [Go programming language](http://golang.org).
|
||||
It has been tested on Linux (386, amd64), OS X, and Windows (386). It should also
|
||||
work on other Unix-like platforms, but hasn't been tested with them. I'm interested
|
||||
to hear about the results.
|
||||
|
||||
I haven't been able to add more features without adding significant complexity,
|
||||
so mmap-go doesn't support mprotect, mincore, and maybe a few other things.
|
||||
If you're running on a Unix-like platform and need some of these features,
|
||||
I suggest Gustavo Niemeyer's [gommap](http://labix.org/gommap).
|
3
vendor/github.com/blevesearch/mmap-go/go.mod
generated
vendored
Normal file
3
vendor/github.com/blevesearch/mmap-go/go.mod
generated
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
module github.com/blevesearch/mmap-go
|
||||
|
||||
require golang.org/x/sys v0.0.0-20181221143128-b4a75ba826a6
|
2
vendor/github.com/blevesearch/mmap-go/go.sum
generated
vendored
Normal file
2
vendor/github.com/blevesearch/mmap-go/go.sum
generated
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
golang.org/x/sys v0.0.0-20181221143128-b4a75ba826a6 h1:IcgEB62HYgAhX0Nd/QrVgZlxlcyxbGQHElLUhW2X4Fo=
|
||||
golang.org/x/sys v0.0.0-20181221143128-b4a75ba826a6/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
117
vendor/github.com/blevesearch/mmap-go/mmap.go
generated
vendored
Normal file
117
vendor/github.com/blevesearch/mmap-go/mmap.go
generated
vendored
Normal file
|
@ -0,0 +1,117 @@
|
|||
// Copyright 2011 Evan Shaw. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// This file defines the common package interface and contains a little bit of
|
||||
// factored out logic.
|
||||
|
||||
// Package mmap allows mapping files into memory. It tries to provide a simple, reasonably portable interface,
|
||||
// but doesn't go out of its way to abstract away every little platform detail.
|
||||
// This specifically means:
|
||||
// * forked processes may or may not inherit mappings
|
||||
// * a file's timestamp may or may not be updated by writes through mappings
|
||||
// * specifying a size larger than the file's actual size can increase the file's size
|
||||
// * If the mapped file is being modified by another process while your program's running, don't expect consistent results between platforms
|
||||
package mmap
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"reflect"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const (
|
||||
// RDONLY maps the memory read-only.
|
||||
// Attempts to write to the MMap object will result in undefined behavior.
|
||||
RDONLY = 0
|
||||
// RDWR maps the memory as read-write. Writes to the MMap object will update the
|
||||
// underlying file.
|
||||
RDWR = 1 << iota
|
||||
// COPY maps the memory as copy-on-write. Writes to the MMap object will affect
|
||||
// memory, but the underlying file will remain unchanged.
|
||||
COPY
|
||||
// If EXEC is set, the mapped memory is marked as executable.
|
||||
EXEC
|
||||
)
|
||||
|
||||
const (
|
||||
// If the ANON flag is set, the mapped memory will not be backed by a file.
|
||||
ANON = 1 << iota
|
||||
)
|
||||
|
||||
// MMap represents a file mapped into memory.
|
||||
type MMap []byte
|
||||
|
||||
// Map maps an entire file into memory.
|
||||
// If ANON is set in flags, f is ignored.
|
||||
func Map(f *os.File, prot, flags int) (MMap, error) {
|
||||
return MapRegion(f, -1, prot, flags, 0)
|
||||
}
|
||||
|
||||
// MapRegion maps part of a file into memory.
|
||||
// The offset parameter must be a multiple of the system's page size.
|
||||
// If length < 0, the entire file will be mapped.
|
||||
// If ANON is set in flags, f is ignored.
|
||||
func MapRegion(f *os.File, length int, prot, flags int, offset int64) (MMap, error) {
|
||||
if offset%int64(os.Getpagesize()) != 0 {
|
||||
return nil, errors.New("offset parameter must be a multiple of the system's page size")
|
||||
}
|
||||
|
||||
var fd uintptr
|
||||
if flags&ANON == 0 {
|
||||
fd = uintptr(f.Fd())
|
||||
if length < 0 {
|
||||
fi, err := f.Stat()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
length = int(fi.Size())
|
||||
}
|
||||
} else {
|
||||
if length <= 0 {
|
||||
return nil, errors.New("anonymous mapping requires non-zero length")
|
||||
}
|
||||
fd = ^uintptr(0)
|
||||
}
|
||||
return mmap(length, uintptr(prot), uintptr(flags), fd, offset)
|
||||
}
|
||||
|
||||
func (m *MMap) header() *reflect.SliceHeader {
|
||||
return (*reflect.SliceHeader)(unsafe.Pointer(m))
|
||||
}
|
||||
|
||||
func (m *MMap) addrLen() (uintptr, uintptr) {
|
||||
header := m.header()
|
||||
return header.Data, uintptr(header.Len)
|
||||
}
|
||||
|
||||
// Lock keeps the mapped region in physical memory, ensuring that it will not be
|
||||
// swapped out.
|
||||
func (m MMap) Lock() error {
|
||||
return m.lock()
|
||||
}
|
||||
|
||||
// Unlock reverses the effect of Lock, allowing the mapped region to potentially
|
||||
// be swapped out.
|
||||
// If m is already unlocked, aan error will result.
|
||||
func (m MMap) Unlock() error {
|
||||
return m.unlock()
|
||||
}
|
||||
|
||||
// Flush synchronizes the mapping's contents to the file's contents on disk.
|
||||
func (m MMap) Flush() error {
|
||||
return m.flush()
|
||||
}
|
||||
|
||||
// Unmap deletes the memory mapped region, flushes any remaining changes, and sets
|
||||
// m to nil.
|
||||
// Trying to read or write any remaining references to m after Unmap is called will
|
||||
// result in undefined behavior.
|
||||
// Unmap should only be called on the slice value that was originally returned from
|
||||
// a call to Map. Calling Unmap on a derived slice may cause errors.
|
||||
func (m *MMap) Unmap() error {
|
||||
err := m.unmap()
|
||||
*m = nil
|
||||
return err
|
||||
}
|
51
vendor/github.com/blevesearch/mmap-go/mmap_unix.go
generated
vendored
Normal file
51
vendor/github.com/blevesearch/mmap-go/mmap_unix.go
generated
vendored
Normal file
|
@ -0,0 +1,51 @@
|
|||
// Copyright 2011 Evan Shaw. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build darwin dragonfly freebsd linux openbsd solaris netbsd
|
||||
|
||||
package mmap
|
||||
|
||||
import (
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func mmap(len int, inprot, inflags, fd uintptr, off int64) ([]byte, error) {
|
||||
flags := unix.MAP_SHARED
|
||||
prot := unix.PROT_READ
|
||||
switch {
|
||||
case inprot© != 0:
|
||||
prot |= unix.PROT_WRITE
|
||||
flags = unix.MAP_PRIVATE
|
||||
case inprot&RDWR != 0:
|
||||
prot |= unix.PROT_WRITE
|
||||
}
|
||||
if inprot&EXEC != 0 {
|
||||
prot |= unix.PROT_EXEC
|
||||
}
|
||||
if inflags&ANON != 0 {
|
||||
flags |= unix.MAP_ANON
|
||||
}
|
||||
|
||||
b, err := unix.Mmap(int(fd), off, len, prot, flags)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
func (m MMap) flush() error {
|
||||
return unix.Msync([]byte(m), unix.MS_SYNC)
|
||||
}
|
||||
|
||||
func (m MMap) lock() error {
|
||||
return unix.Mlock([]byte(m))
|
||||
}
|
||||
|
||||
func (m MMap) unlock() error {
|
||||
return unix.Munlock([]byte(m))
|
||||
}
|
||||
|
||||
func (m MMap) unmap() error {
|
||||
return unix.Munmap([]byte(m))
|
||||
}
|
153
vendor/github.com/blevesearch/mmap-go/mmap_windows.go
generated
vendored
Normal file
153
vendor/github.com/blevesearch/mmap-go/mmap_windows.go
generated
vendored
Normal file
|
@ -0,0 +1,153 @@
|
|||
// Copyright 2011 Evan Shaw. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package mmap
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
"golang.org/x/sys/windows"
|
||||
)
|
||||
|
||||
// mmap on Windows is a two-step process.
|
||||
// First, we call CreateFileMapping to get a handle.
|
||||
// Then, we call MapviewToFile to get an actual pointer into memory.
|
||||
// Because we want to emulate a POSIX-style mmap, we don't want to expose
|
||||
// the handle -- only the pointer. We also want to return only a byte slice,
|
||||
// not a struct, so it's convenient to manipulate.
|
||||
|
||||
// We keep this map so that we can get back the original handle from the memory address.
|
||||
|
||||
type addrinfo struct {
|
||||
file windows.Handle
|
||||
mapview windows.Handle
|
||||
writable bool
|
||||
}
|
||||
|
||||
var handleLock sync.Mutex
|
||||
var handleMap = map[uintptr]*addrinfo{}
|
||||
|
||||
func mmap(len int, prot, flags, hfile uintptr, off int64) ([]byte, error) {
|
||||
flProtect := uint32(windows.PAGE_READONLY)
|
||||
dwDesiredAccess := uint32(windows.FILE_MAP_READ)
|
||||
writable := false
|
||||
switch {
|
||||
case prot© != 0:
|
||||
flProtect = windows.PAGE_WRITECOPY
|
||||
dwDesiredAccess = windows.FILE_MAP_COPY
|
||||
writable = true
|
||||
case prot&RDWR != 0:
|
||||
flProtect = windows.PAGE_READWRITE
|
||||
dwDesiredAccess = windows.FILE_MAP_WRITE
|
||||
writable = true
|
||||
}
|
||||
if prot&EXEC != 0 {
|
||||
flProtect <<= 4
|
||||
dwDesiredAccess |= windows.FILE_MAP_EXECUTE
|
||||
}
|
||||
|
||||
// The maximum size is the area of the file, starting from 0,
|
||||
// that we wish to allow to be mappable. It is the sum of
|
||||
// the length the user requested, plus the offset where that length
|
||||
// is starting from. This does not map the data into memory.
|
||||
maxSizeHigh := uint32((off + int64(len)) >> 32)
|
||||
maxSizeLow := uint32((off + int64(len)) & 0xFFFFFFFF)
|
||||
// TODO: Do we need to set some security attributes? It might help portability.
|
||||
h, errno := windows.CreateFileMapping(windows.Handle(hfile), nil, flProtect, maxSizeHigh, maxSizeLow, nil)
|
||||
if h == 0 {
|
||||
return nil, os.NewSyscallError("CreateFileMapping", errno)
|
||||
}
|
||||
|
||||
// Actually map a view of the data into memory. The view's size
|
||||
// is the length the user requested.
|
||||
fileOffsetHigh := uint32(off >> 32)
|
||||
fileOffsetLow := uint32(off & 0xFFFFFFFF)
|
||||
addr, errno := windows.MapViewOfFile(h, dwDesiredAccess, fileOffsetHigh, fileOffsetLow, uintptr(len))
|
||||
if addr == 0 {
|
||||
return nil, os.NewSyscallError("MapViewOfFile", errno)
|
||||
}
|
||||
handleLock.Lock()
|
||||
handleMap[addr] = &addrinfo{
|
||||
file: windows.Handle(hfile),
|
||||
mapview: h,
|
||||
writable: writable,
|
||||
}
|
||||
handleLock.Unlock()
|
||||
|
||||
m := MMap{}
|
||||
dh := m.header()
|
||||
dh.Data = addr
|
||||
dh.Len = len
|
||||
dh.Cap = dh.Len
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func (m MMap) flush() error {
|
||||
addr, len := m.addrLen()
|
||||
errno := windows.FlushViewOfFile(addr, len)
|
||||
if errno != nil {
|
||||
return os.NewSyscallError("FlushViewOfFile", errno)
|
||||
}
|
||||
|
||||
handleLock.Lock()
|
||||
defer handleLock.Unlock()
|
||||
handle, ok := handleMap[addr]
|
||||
if !ok {
|
||||
// should be impossible; we would've errored above
|
||||
return errors.New("unknown base address")
|
||||
}
|
||||
|
||||
if handle.writable {
|
||||
if err := windows.FlushFileBuffers(handle.file); err != nil {
|
||||
return os.NewSyscallError("FlushFileBuffers", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m MMap) lock() error {
|
||||
addr, len := m.addrLen()
|
||||
errno := windows.VirtualLock(addr, len)
|
||||
return os.NewSyscallError("VirtualLock", errno)
|
||||
}
|
||||
|
||||
func (m MMap) unlock() error {
|
||||
addr, len := m.addrLen()
|
||||
errno := windows.VirtualUnlock(addr, len)
|
||||
return os.NewSyscallError("VirtualUnlock", errno)
|
||||
}
|
||||
|
||||
func (m MMap) unmap() error {
|
||||
err := m.flush()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
addr := m.header().Data
|
||||
// Lock the UnmapViewOfFile along with the handleMap deletion.
|
||||
// As soon as we unmap the view, the OS is free to give the
|
||||
// same addr to another new map. We don't want another goroutine
|
||||
// to insert and remove the same addr into handleMap while
|
||||
// we're trying to remove our old addr/handle pair.
|
||||
handleLock.Lock()
|
||||
defer handleLock.Unlock()
|
||||
err = windows.UnmapViewOfFile(addr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
handle, ok := handleMap[addr]
|
||||
if !ok {
|
||||
// should be impossible; we would've errored above
|
||||
return errors.New("unknown base address")
|
||||
}
|
||||
delete(handleMap, addr)
|
||||
|
||||
e := windows.CloseHandle(windows.Handle(handle.mapview))
|
||||
return os.NewSyscallError("CloseHandle", e)
|
||||
}
|
3
vendor/github.com/blevesearch/segment/go.mod
generated
vendored
Normal file
3
vendor/github.com/blevesearch/segment/go.mod
generated
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
module github.com/blevesearch/segment
|
||||
|
||||
go 1.13
|
29
vendor/github.com/blevesearch/snowballstem/COPYING
generated
vendored
Normal file
29
vendor/github.com/blevesearch/snowballstem/COPYING
generated
vendored
Normal file
|
@ -0,0 +1,29 @@
|
|||
Copyright (c) 2001, Dr Martin Porter
|
||||
Copyright (c) 2004,2005, Richard Boulton
|
||||
Copyright (c) 2013, Yoshiki Shibukawa
|
||||
Copyright (c) 2006,2007,2009,2010,2011,2014-2019, Olly Betts
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
3. Neither the name of the Snowball project nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
66
vendor/github.com/blevesearch/snowballstem/README.md
generated
vendored
Normal file
66
vendor/github.com/blevesearch/snowballstem/README.md
generated
vendored
Normal file
|
@ -0,0 +1,66 @@
|
|||
# snowballstem
|
||||
|
||||
This repository contains the Go stemmers generated by the [Snowball](https://github.com/snowballstem/snowball) project. They are maintained outside of the core bleve package so that they may be more easily be reused in other contexts.
|
||||
|
||||
## Usage
|
||||
|
||||
All these stemmers export a single `Stem()` method which operates on a snowball `Env` structure. The `Env` structure maintains all state for the stemmer. A new `Env` is created to point at an initial string. After stemming, the results of the `Stem()` operation can be retrieved using the `Current()` method. The `Env` structure can be reused for subsequent calls by using the `SetCurrent()` method.
|
||||
|
||||
## Example
|
||||
|
||||
```
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/snowballstem"
|
||||
"github.com/blevesearch/snowballstem/english"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
// words to stem
|
||||
words := []string{
|
||||
"running",
|
||||
"jumping",
|
||||
}
|
||||
|
||||
// build new environment
|
||||
env := snowballstem.NewEnv("")
|
||||
|
||||
for _, word := range words {
|
||||
// set up environment for word
|
||||
env.SetCurrent(word)
|
||||
// invoke stemmer
|
||||
english.Stem(env)
|
||||
// print results
|
||||
fmt.Printf("%s stemmed to %s\n", word, env.Current())
|
||||
}
|
||||
}
|
||||
```
|
||||
Produces Output:
|
||||
```
|
||||
$ ./snowtest
|
||||
running stemmed to run
|
||||
jumping stemmed to jump
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
The test harness for these stemmers is hosted in the main [Snowball](https://github.com/snowballstem/snowball) repository. There are functional tests built around the separate [snowballstem-data](https://github.com/snowballstem/snowball-data) repository, and there is support for fuzz-testing the stemmers there as well.
|
||||
|
||||
## Generating the Stemmers
|
||||
|
||||
```
|
||||
$ export SNOWBALL=/path/to/github.com/snowballstem/snowball/after/snowball/built
|
||||
$ go generate
|
||||
```
|
||||
|
||||
## Updated the Go Generate Commands
|
||||
|
||||
A simple tool is provided to automate these from the snowball algorithms directory:
|
||||
|
||||
```
|
||||
$ go run gengen.go /path/to/github.com/snowballstem/snowball/algorithms
|
||||
```
|
16
vendor/github.com/blevesearch/snowballstem/among.go
generated
vendored
Normal file
16
vendor/github.com/blevesearch/snowballstem/among.go
generated
vendored
Normal file
|
@ -0,0 +1,16 @@
|
|||
package snowballstem
|
||||
|
||||
import "fmt"
|
||||
|
||||
type AmongF func(env *Env, ctx interface{}) bool
|
||||
|
||||
type Among struct {
|
||||
Str string
|
||||
A int32
|
||||
B int32
|
||||
F AmongF
|
||||
}
|
||||
|
||||
func (a *Among) String() string {
|
||||
return fmt.Sprintf("str: `%s`, a: %d, b: %d, f: %p", a.Str, a.A, a.B, a.F)
|
||||
}
|
1341
vendor/github.com/blevesearch/snowballstem/english/english_stemmer.go
generated
vendored
Normal file
1341
vendor/github.com/blevesearch/snowballstem/english/english_stemmer.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
389
vendor/github.com/blevesearch/snowballstem/env.go
generated
vendored
Normal file
389
vendor/github.com/blevesearch/snowballstem/env.go
generated
vendored
Normal file
|
@ -0,0 +1,389 @@
|
|||
package snowballstem
|
||||
|
||||
import (
|
||||
"log"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Env represents the Snowball execution environment
|
||||
type Env struct {
|
||||
current string
|
||||
Cursor int
|
||||
Limit int
|
||||
LimitBackward int
|
||||
Bra int
|
||||
Ket int
|
||||
}
|
||||
|
||||
// NewEnv creates a new Snowball execution environment on the provided string
|
||||
func NewEnv(val string) *Env {
|
||||
return &Env{
|
||||
current: val,
|
||||
Cursor: 0,
|
||||
Limit: len(val),
|
||||
LimitBackward: 0,
|
||||
Bra: 0,
|
||||
Ket: len(val),
|
||||
}
|
||||
}
|
||||
|
||||
func (env *Env) Current() string {
|
||||
return env.current
|
||||
}
|
||||
|
||||
func (env *Env) SetCurrent(s string) {
|
||||
env.current = s
|
||||
env.Cursor = 0
|
||||
env.Limit = len(s)
|
||||
env.LimitBackward = 0
|
||||
env.Bra = 0
|
||||
env.Ket = len(s)
|
||||
}
|
||||
|
||||
func (env *Env) ReplaceS(bra, ket int, s string) int32 {
|
||||
adjustment := int32(len(s)) - (int32(ket) - int32(bra))
|
||||
result, _ := splitAt(env.current, bra)
|
||||
rsplit := ket
|
||||
if ket < bra {
|
||||
rsplit = bra
|
||||
}
|
||||
_, rhs := splitAt(env.current, rsplit)
|
||||
result += s
|
||||
result += rhs
|
||||
|
||||
newLim := int32(env.Limit) + adjustment
|
||||
env.Limit = int(newLim)
|
||||
|
||||
if env.Cursor >= ket {
|
||||
newCur := int32(env.Cursor) + adjustment
|
||||
env.Cursor = int(newCur)
|
||||
} else if env.Cursor > bra {
|
||||
env.Cursor = bra
|
||||
}
|
||||
|
||||
env.current = result
|
||||
return adjustment
|
||||
}
|
||||
|
||||
func (env *Env) EqS(s string) bool {
|
||||
if env.Cursor >= env.Limit {
|
||||
return false
|
||||
}
|
||||
|
||||
if strings.HasPrefix(env.current[env.Cursor:], s) {
|
||||
env.Cursor += len(s)
|
||||
for !onCharBoundary(env.current, env.Cursor) {
|
||||
env.Cursor++
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (env *Env) EqSB(s string) bool {
|
||||
if int32(env.Cursor)-int32(env.LimitBackward) < int32(len(s)) {
|
||||
return false
|
||||
} else if !onCharBoundary(env.current, env.Cursor-len(s)) ||
|
||||
!strings.HasPrefix(env.current[env.Cursor-len(s):], s) {
|
||||
return false
|
||||
} else {
|
||||
env.Cursor -= len(s)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
func (env *Env) SliceFrom(s string) bool {
|
||||
bra, ket := env.Bra, env.Ket
|
||||
env.ReplaceS(bra, ket, s)
|
||||
return true
|
||||
}
|
||||
|
||||
func (env *Env) NextChar() {
|
||||
env.Cursor++
|
||||
for !onCharBoundary(env.current, env.Cursor) {
|
||||
env.Cursor++
|
||||
}
|
||||
}
|
||||
|
||||
func (env *Env) PrevChar() {
|
||||
env.Cursor--
|
||||
for !onCharBoundary(env.current, env.Cursor) {
|
||||
env.Cursor--
|
||||
}
|
||||
}
|
||||
|
||||
func (env *Env) ByteIndexForHop(delta int32) int32 {
|
||||
if delta > 0 {
|
||||
res := env.Cursor
|
||||
for delta > 0 {
|
||||
res++
|
||||
delta--
|
||||
for res <= len(env.current) && !onCharBoundary(env.current, res) {
|
||||
res++
|
||||
}
|
||||
}
|
||||
return int32(res)
|
||||
} else if delta < 0 {
|
||||
res := env.Cursor
|
||||
for delta < 0 {
|
||||
res--
|
||||
delta++
|
||||
for res >= 0 && !onCharBoundary(env.current, res) {
|
||||
res--
|
||||
}
|
||||
}
|
||||
return int32(res)
|
||||
} else {
|
||||
return int32(env.Cursor)
|
||||
}
|
||||
}
|
||||
|
||||
func (env *Env) InGrouping(chars []byte, min, max int32) bool {
|
||||
if env.Cursor >= env.Limit {
|
||||
return false
|
||||
}
|
||||
|
||||
r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
|
||||
if r != utf8.RuneError {
|
||||
if r > max || r < min {
|
||||
return false
|
||||
}
|
||||
r -= min
|
||||
if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
|
||||
return false
|
||||
}
|
||||
env.NextChar()
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (env *Env) InGroupingB(chars []byte, min, max int32) bool {
|
||||
if env.Cursor <= env.LimitBackward {
|
||||
return false
|
||||
}
|
||||
env.PrevChar()
|
||||
r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
|
||||
if r != utf8.RuneError {
|
||||
env.NextChar()
|
||||
if r > max || r < min {
|
||||
return false
|
||||
}
|
||||
r -= min
|
||||
if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
|
||||
return false
|
||||
}
|
||||
env.PrevChar()
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (env *Env) OutGrouping(chars []byte, min, max int32) bool {
|
||||
if env.Cursor >= env.Limit {
|
||||
return false
|
||||
}
|
||||
r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
|
||||
if r != utf8.RuneError {
|
||||
if r > max || r < min {
|
||||
env.NextChar()
|
||||
return true
|
||||
}
|
||||
r -= min
|
||||
if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
|
||||
env.NextChar()
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (env *Env) OutGroupingB(chars []byte, min, max int32) bool {
|
||||
if env.Cursor <= env.LimitBackward {
|
||||
return false
|
||||
}
|
||||
env.PrevChar()
|
||||
r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
|
||||
if r != utf8.RuneError {
|
||||
env.NextChar()
|
||||
if r > max || r < min {
|
||||
env.PrevChar()
|
||||
return true
|
||||
}
|
||||
r -= min
|
||||
if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
|
||||
env.PrevChar()
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (env *Env) SliceDel() bool {
|
||||
return env.SliceFrom("")
|
||||
}
|
||||
|
||||
func (env *Env) Insert(bra, ket int, s string) {
|
||||
adjustment := env.ReplaceS(bra, ket, s)
|
||||
if bra <= env.Bra {
|
||||
env.Bra = int(int32(env.Bra) + adjustment)
|
||||
}
|
||||
if bra <= env.Ket {
|
||||
env.Ket = int(int32(env.Ket) + adjustment)
|
||||
}
|
||||
}
|
||||
|
||||
func (env *Env) SliceTo() string {
|
||||
return env.current[env.Bra:env.Ket]
|
||||
}
|
||||
|
||||
func (env *Env) FindAmong(amongs []*Among, ctx interface{}) int32 {
|
||||
var i int32
|
||||
j := int32(len(amongs))
|
||||
|
||||
c := env.Cursor
|
||||
l := env.Limit
|
||||
|
||||
var commonI, commonJ int
|
||||
|
||||
firstKeyInspected := false
|
||||
for {
|
||||
k := i + ((j - i) >> 1)
|
||||
var diff int32
|
||||
common := min(commonI, commonJ)
|
||||
w := amongs[k]
|
||||
for lvar := common; lvar < len(w.Str); lvar++ {
|
||||
if c+common == l {
|
||||
diff--
|
||||
break
|
||||
}
|
||||
diff = int32(env.current[c+common]) - int32(w.Str[lvar])
|
||||
if diff != 0 {
|
||||
break
|
||||
}
|
||||
common++
|
||||
}
|
||||
if diff < 0 {
|
||||
j = k
|
||||
commonJ = common
|
||||
} else {
|
||||
i = k
|
||||
commonI = common
|
||||
}
|
||||
if j-i <= 1 {
|
||||
if i > 0 {
|
||||
break
|
||||
}
|
||||
if j == i {
|
||||
break
|
||||
}
|
||||
if firstKeyInspected {
|
||||
break
|
||||
}
|
||||
firstKeyInspected = true
|
||||
}
|
||||
}
|
||||
|
||||
for {
|
||||
w := amongs[i]
|
||||
if commonI >= len(w.Str) {
|
||||
env.Cursor = c + len(w.Str)
|
||||
if w.F != nil {
|
||||
res := w.F(env, ctx)
|
||||
env.Cursor = c + len(w.Str)
|
||||
if res {
|
||||
return w.B
|
||||
}
|
||||
} else {
|
||||
return w.B
|
||||
}
|
||||
}
|
||||
i = w.A
|
||||
if i < 0 {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (env *Env) FindAmongB(amongs []*Among, ctx interface{}) int32 {
|
||||
var i int32
|
||||
j := int32(len(amongs))
|
||||
|
||||
c := env.Cursor
|
||||
lb := env.LimitBackward
|
||||
|
||||
var commonI, commonJ int
|
||||
|
||||
firstKeyInspected := false
|
||||
|
||||
for {
|
||||
k := i + ((j - i) >> 1)
|
||||
diff := int32(0)
|
||||
common := min(commonI, commonJ)
|
||||
w := amongs[k]
|
||||
for lvar := len(w.Str) - int(common) - 1; lvar >= 0; lvar-- {
|
||||
if c-common == lb {
|
||||
diff--
|
||||
break
|
||||
}
|
||||
diff = int32(env.current[c-common-1]) - int32(w.Str[lvar])
|
||||
if diff != 0 {
|
||||
break
|
||||
}
|
||||
// Count up commons. But not one character but the byte width of that char
|
||||
common++
|
||||
}
|
||||
if diff < 0 {
|
||||
j = k
|
||||
commonJ = common
|
||||
} else {
|
||||
i = k
|
||||
commonI = common
|
||||
}
|
||||
if j-i <= 1 {
|
||||
if i > 0 {
|
||||
break
|
||||
}
|
||||
if j == i {
|
||||
break
|
||||
}
|
||||
if firstKeyInspected {
|
||||
break
|
||||
}
|
||||
firstKeyInspected = true
|
||||
}
|
||||
}
|
||||
for {
|
||||
w := amongs[i]
|
||||
if commonI >= len(w.Str) {
|
||||
env.Cursor = c - len(w.Str)
|
||||
if w.F != nil {
|
||||
res := w.F(env, ctx)
|
||||
env.Cursor = c - len(w.Str)
|
||||
if res {
|
||||
return w.B
|
||||
}
|
||||
} else {
|
||||
return w.B
|
||||
}
|
||||
}
|
||||
i = w.A
|
||||
if i < 0 {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (env *Env) Debug(count, lineNumber int) {
|
||||
log.Printf("snowball debug, count: %d, line: %d", count, lineNumber)
|
||||
}
|
||||
|
||||
func (env *Env) Clone() *Env {
|
||||
clone := *env
|
||||
return &clone
|
||||
}
|
||||
|
||||
func (env *Env) AssignTo() string {
|
||||
return env.Current()
|
||||
}
|
61
vendor/github.com/blevesearch/snowballstem/gen.go
generated
vendored
Normal file
61
vendor/github.com/blevesearch/snowballstem/gen.go
generated
vendored
Normal file
|
@ -0,0 +1,61 @@
|
|||
package snowballstem
|
||||
|
||||
// to regenerate these commands, run
|
||||
// go run gengen.go /path/to/snowball/algorithms/directory
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/arabic/stem_Unicode.sbl -go -o arabic/arabic_stemmer -gop arabic -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w arabic/arabic_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/danish/stem_ISO_8859_1.sbl -go -o danish/danish_stemmer -gop danish -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w danish/danish_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/dutch/stem_ISO_8859_1.sbl -go -o dutch/dutch_stemmer -gop dutch -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w dutch/dutch_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/english/stem_ISO_8859_1.sbl -go -o english/english_stemmer -gop english -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w english/english_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/finnish/stem_ISO_8859_1.sbl -go -o finnish/finnish_stemmer -gop finnish -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w finnish/finnish_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/french/stem_ISO_8859_1.sbl -go -o french/french_stemmer -gop french -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w french/french_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/german/stem_ISO_8859_1.sbl -go -o german/german_stemmer -gop german -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w german/german_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/hungarian/stem_Unicode.sbl -go -o hungarian/hungarian_stemmer -gop hungarian -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w hungarian/hungarian_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/irish/stem_ISO_8859_1.sbl -go -o irish/irish_stemmer -gop irish -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w irish/irish_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/italian/stem_ISO_8859_1.sbl -go -o italian/italian_stemmer -gop italian -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w italian/italian_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/norwegian/stem_ISO_8859_1.sbl -go -o norwegian/norwegian_stemmer -gop norwegian -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w norwegian/norwegian_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/porter/stem_ISO_8859_1.sbl -go -o porter/porter_stemmer -gop porter -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w porter/porter_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/portuguese/stem_ISO_8859_1.sbl -go -o portuguese/portuguese_stemmer -gop portuguese -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w portuguese/portuguese_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/romanian/stem_Unicode.sbl -go -o romanian/romanian_stemmer -gop romanian -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w romanian/romanian_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/russian/stem_Unicode.sbl -go -o russian/russian_stemmer -gop russian -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w russian/russian_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/spanish/stem_ISO_8859_1.sbl -go -o spanish/spanish_stemmer -gop spanish -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w spanish/spanish_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/swedish/stem_ISO_8859_1.sbl -go -o swedish/swedish_stemmer -gop swedish -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w swedish/swedish_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/tamil/stem_Unicode.sbl -go -o tamil/tamil_stemmer -gop tamil -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w tamil/tamil_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/turkish/stem_Unicode.sbl -go -o turkish/turkish_stemmer -gop turkish -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w turkish/turkish_stemmer.go
|
3
vendor/github.com/blevesearch/snowballstem/go.mod
generated
vendored
Normal file
3
vendor/github.com/blevesearch/snowballstem/go.mod
generated
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
module github.com/blevesearch/snowballstem
|
||||
|
||||
go 1.13
|
34
vendor/github.com/blevesearch/snowballstem/util.go
generated
vendored
Normal file
34
vendor/github.com/blevesearch/snowballstem/util.go
generated
vendored
Normal file
|
@ -0,0 +1,34 @@
|
|||
package snowballstem
|
||||
|
||||
import (
|
||||
"math"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
const MaxInt = math.MaxInt32
|
||||
const MinInt = math.MinInt32
|
||||
|
||||
func splitAt(str string, mid int) (string, string) {
|
||||
return str[:mid], str[mid:]
|
||||
}
|
||||
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func onCharBoundary(s string, pos int) bool {
|
||||
if pos <= 0 || pos >= len(s) {
|
||||
return true
|
||||
}
|
||||
return utf8.RuneStart(s[pos])
|
||||
}
|
||||
|
||||
// RuneCountInString is a wrapper around utf8.RuneCountInString
|
||||
// this allows us to not have to conditionally include
|
||||
// the utf8 package into some stemmers and not others
|
||||
func RuneCountInString(str string) int {
|
||||
return utf8.RuneCountInString(str)
|
||||
}
|
12
vendor/github.com/blevesearch/zap/v11/.gitignore
generated
vendored
Normal file
12
vendor/github.com/blevesearch/zap/v11/.gitignore
generated
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
#*
|
||||
*.sublime-*
|
||||
*~
|
||||
.#*
|
||||
.project
|
||||
.settings
|
||||
**/.idea/
|
||||
**/*.iml
|
||||
.DS_Store
|
||||
/cmd/zap/zap
|
||||
*.test
|
||||
tags
|
202
vendor/github.com/blevesearch/zap/v11/LICENSE
generated
vendored
Normal file
202
vendor/github.com/blevesearch/zap/v11/LICENSE
generated
vendored
Normal file
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -1,5 +1,7 @@
|
|||
# zap file format
|
||||
|
||||
Advanced ZAP File Format Documentation is [here](zap.md).
|
||||
|
||||
The file is written in the reverse order that we typically access data. This helps us write in one pass since later sections of the file require file offsets of things we've already written.
|
||||
|
||||
Current usage:
|
||||
|
@ -90,16 +92,6 @@ If you know the doc number you're interested in, this format lets you jump to th
|
|||
|
||||
If you know the doc number you're interested in, this format lets you jump to the correct chunk (docNum/chunkFactor) directly and then seek within that chunk until you find it.
|
||||
|
||||
## bitmaps of hits with location info
|
||||
|
||||
- for each posting list
|
||||
- preparation phase:
|
||||
- encode roaring bitmap (inidicating which hits have location details indexed) posting list to bytes (so we know the length)
|
||||
- file writing phase:
|
||||
- remember the start position for this bitmap
|
||||
- write length of encoded roaring bitmap
|
||||
- write the serialized roaring bitmap data
|
||||
|
||||
## postings list section
|
||||
|
||||
- for each posting list
|
||||
|
@ -109,7 +101,6 @@ If you know the doc number you're interested in, this format lets you jump to th
|
|||
- remember the start position for this posting list
|
||||
- write freq/norm details offset (remembered from previous, as varint uint64)
|
||||
- write location details offset (remembered from previous, as varint uint64)
|
||||
- write location bitmap offset (remembered from pervious, as varint uint64)
|
||||
- write length of encoded roaring bitmap
|
||||
- write the serialized roaring bitmap data
|
||||
|
|
@ -18,6 +18,8 @@ import (
|
|||
"bufio"
|
||||
"math"
|
||||
"os"
|
||||
|
||||
"github.com/couchbase/vellum"
|
||||
)
|
||||
|
||||
const Version uint32 = 11
|
||||
|
@ -26,6 +28,10 @@ const Type string = "zap"
|
|||
|
||||
const fieldNotUninverted = math.MaxUint64
|
||||
|
||||
func (sb *SegmentBase) Persist(path string) error {
|
||||
return PersistSegmentBase(sb, path)
|
||||
}
|
||||
|
||||
// PersistSegmentBase persists SegmentBase in the zap file format.
|
||||
func PersistSegmentBase(sb *SegmentBase, path string) error {
|
||||
flag := os.O_RDWR | os.O_CREATE
|
||||
|
@ -137,6 +143,7 @@ func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32,
|
|||
docValueOffset: docValueOffset,
|
||||
dictLocs: dictLocs,
|
||||
fieldDvReaders: make(map[uint16]*docValueReader),
|
||||
fieldFSTs: make(map[uint16]*vellum.FST),
|
||||
}
|
||||
sb.updateSize()
|
||||
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue