Switch to bbolt

This commit is contained in:
Ken-Håvard Lieng 2020-04-23 01:06:36 +02:00
parent 360bed00f9
commit 77543e3aed
617 changed files with 68468 additions and 97867 deletions

View file

@ -98,18 +98,33 @@ type IndexReader interface {
Close() error
}
// The Regexp interface defines the subset of the regexp.Regexp API
// methods that are used by bleve indexes, allowing callers to pass in
// alternate implementations.
type Regexp interface {
FindStringIndex(s string) (loc []int)
LiteralPrefix() (prefix string, complete bool)
String() string
}
type IndexReaderRegexp interface {
FieldDictRegexp(field string, regex []byte) (FieldDict, error)
FieldDictRegexp(field string, regex string) (FieldDict, error)
}
type IndexReaderFuzzy interface {
FieldDictFuzzy(field string, term []byte, fuzziness int) (FieldDict, error)
FieldDictFuzzy(field string, term string, fuzziness int, prefix string) (FieldDict, error)
}
type IndexReaderOnly interface {
FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error)
}
type IndexReaderContains interface {
FieldDictContains(field string) (FieldDictContains, error)
}
// FieldTerms contains the terms used by a document, keyed by field
type FieldTerms map[string][]string
@ -219,6 +234,10 @@ type FieldDict interface {
Close() error
}
type FieldDictContains interface {
Contains(key []byte) (bool, error)
}
// DocIDReader is the interface exposing enumeration of documents identifiers.
// Close the reader to release associated resources.
type DocIDReader interface {
@ -237,9 +256,12 @@ type DocIDReader interface {
Close() error
}
type BatchCallback func(error)
type Batch struct {
IndexOps map[string]*document.Document
InternalOps map[string][]byte
IndexOps map[string]*document.Document
InternalOps map[string][]byte
persistedCallback BatchCallback
}
func NewBatch() *Batch {
@ -265,6 +287,14 @@ func (b *Batch) DeleteInternal(key []byte) {
b.InternalOps[string(key)] = nil
}
func (b *Batch) SetPersistedCallback(f BatchCallback) {
b.persistedCallback = f
}
func (b *Batch) PersistedCallback() BatchCallback {
return b.persistedCallback
}
func (b *Batch) String() string {
rv := fmt.Sprintf("Batch (%d ops, %d internal ops)\n", len(b.IndexOps), len(b.InternalOps))
for k, v := range b.IndexOps {
@ -287,6 +317,27 @@ func (b *Batch) String() string {
func (b *Batch) Reset() {
b.IndexOps = make(map[string]*document.Document)
b.InternalOps = make(map[string][]byte)
b.persistedCallback = nil
}
func (b *Batch) Merge(o *Batch) {
for k, v := range o.IndexOps {
b.IndexOps[k] = v
}
for k, v := range o.InternalOps {
b.InternalOps[k] = v
}
}
func (b *Batch) TotalDocSize() int {
var s int
for k, v := range b.IndexOps {
if v != nil {
s += v.Size() + size.SizeOfString
}
s += len(k)
}
return s
}
// Optimizable represents an optional interface that implementable by
@ -298,11 +349,19 @@ type Optimizable interface {
Optimize(kind string, octx OptimizableContext) (OptimizableContext, error)
}
// Represents a result of optimization -- see the Finish() method.
type Optimized interface{}
type OptimizableContext interface {
// Once all the optimzable resources have been provided the same
// OptimizableContext instance, the optimization preparations are
// finished or completed via the Finish() method.
Finish() error
//
// Depending on the optimization being performed, the Finish()
// method might return a non-nil Optimized instance. For example,
// the Optimized instance might represent an optimized
// TermFieldReader instance.
Finish() (Optimized, error)
}
type DocValueReader interface {

View file

@ -302,7 +302,7 @@ Map local bitsets into global number space (global meaning cross-segment but sti
IndexSnapshot already should have mapping something like:
0 - Offset 0
1 - Offset 3 (because segment 0 had 3 docs)
2 - Offset 4 (becuase segment 1 had 1 doc)
2 - Offset 4 (because segment 1 had 1 doc)
This maps to search result bitset:

View file

@ -19,6 +19,7 @@ import (
"sync/atomic"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
)
@ -29,8 +30,9 @@ type segmentIntroduction struct {
ids []string
internal map[string][]byte
applied chan error
persisted chan error
applied chan error
persisted chan error
persistedCallback index.BatchCallback
}
type persistIntroduction struct {
@ -74,11 +76,6 @@ OUTER:
case persist := <-s.persists:
s.introducePersist(persist)
case revertTo := <-s.revertToSnapshots:
err := s.revertToSnapshot(revertTo)
if err != nil {
continue OUTER
}
}
var epochCurr uint64
@ -107,8 +104,11 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
s.rootLock.RLock()
root := s.root
root.AddRef()
s.rootLock.RUnlock()
defer func() { _ = root.DecRef() }()
nsegs := len(root.segment)
// prepare new index snapshot
@ -123,6 +123,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
// iterate through current segments
var running uint64
var docsToPersistCount, memSegments, fileSegments uint64
for i := range root.segment {
// see if optimistic work included this segment
delta, ok := next.obsoletes[root.segment[i].id]
@ -161,8 +162,19 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
newSnapshot.offsets = append(newSnapshot.offsets, running)
running += newss.segment.Count()
}
if isMemorySegment(root.segment[i]) {
docsToPersistCount += root.segment[i].Count()
memSegments++
} else {
fileSegments++
}
}
atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
// append new segment, if any, to end of the new index snapshot
if next.data != nil {
newSegmentSnapshot := &SegmentSnapshot{
@ -197,6 +209,9 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
if next.persisted != nil {
s.rootPersisted = append(s.rootPersisted, next.persisted)
}
if next.persistedCallback != nil {
s.persistedCallbacks = append(s.persistedCallbacks, next.persistedCallback)
}
// swap in new index snapshot
newSnapshot.epoch = s.nextSnapshotEpoch
s.nextSnapshotEpoch++
@ -221,10 +236,13 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
s.rootLock.Lock()
root := s.root
root.AddRef()
nextSnapshotEpoch := s.nextSnapshotEpoch
s.nextSnapshotEpoch++
s.rootLock.Unlock()
defer func() { _ = root.DecRef() }()
newIndexSnapshot := &IndexSnapshot{
parent: s,
epoch: nextSnapshotEpoch,
@ -235,6 +253,7 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
creator: "introducePersist",
}
var docsToPersistCount, memSegments, fileSegments uint64
for i, segmentSnapshot := range root.segment {
// see if this segment has been replaced
if replacement, ok := persist.persisted[segmentSnapshot.id]; ok {
@ -251,9 +270,17 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
// update items persisted incase of a new segment snapshot
atomic.AddUint64(&s.stats.TotPersistedItems, newSegmentSnapshot.Count())
atomic.AddUint64(&s.stats.TotPersistedSegments, 1)
fileSegments++
} else {
newIndexSnapshot.segment[i] = root.segment[i]
newIndexSnapshot.segment[i].segment.AddRef()
if isMemorySegment(root.segment[i]) {
docsToPersistCount += root.segment[i].Count()
memSegments++
} else {
fileSegments++
}
}
newIndexSnapshot.offsets[i] = root.offsets[i]
}
@ -262,6 +289,9 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
newIndexSnapshot.internal[k] = v
}
atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
newIndexSnapshot.updateSize()
s.rootLock.Lock()
rootPrev := s.root
@ -276,14 +306,19 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
close(persist.applied)
}
// The introducer should definitely handle the segmentMerge.notify
// channel before exiting the introduceMerge.
func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
atomic.AddUint64(&s.stats.TotIntroduceMergeBeg, 1)
defer atomic.AddUint64(&s.stats.TotIntroduceMergeEnd, 1)
s.rootLock.RLock()
root := s.root
root.AddRef()
s.rootLock.RUnlock()
defer func() { _ = root.DecRef() }()
newSnapshot := &IndexSnapshot{
parent: s,
internal: root.internal,
@ -293,7 +328,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
// iterate through current segments
newSegmentDeleted := roaring.NewBitmap()
var running uint64
var running, docsToPersistCount, memSegments, fileSegments uint64
for i := range root.segment {
segmentID := root.segment[i].id
if segSnapAtMerge, ok := nextMerge.old[segmentID]; ok {
@ -329,7 +364,15 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
root.segment[i].segment.AddRef()
newSnapshot.offsets = append(newSnapshot.offsets, running)
running += root.segment[i].segment.Count()
if isMemorySegment(root.segment[i]) {
docsToPersistCount += root.segment[i].Count()
memSegments++
} else {
fileSegments++
}
}
}
// before the newMerge introduction, need to clean the newly
@ -360,8 +403,20 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
})
newSnapshot.offsets = append(newSnapshot.offsets, running)
atomic.AddUint64(&s.stats.TotIntroducedSegmentsMerge, 1)
switch nextMerge.new.(type) {
case segment.PersistedSegment:
fileSegments++
default:
docsToPersistCount += nextMerge.new.Count() - newSegmentDeleted.GetCardinality()
memSegments++
}
}
atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
newSnapshot.updateSize()
@ -384,65 +439,11 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
close(nextMerge.notify)
}
func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
atomic.AddUint64(&s.stats.TotIntroduceRevertBeg, 1)
defer atomic.AddUint64(&s.stats.TotIntroduceRevertEnd, 1)
if revertTo.snapshot == nil {
err := fmt.Errorf("Cannot revert to a nil snapshot")
revertTo.applied <- err
return err
func isMemorySegment(s *SegmentSnapshot) bool {
switch s.segment.(type) {
case segment.PersistedSegment:
return false
default:
return true
}
// acquire lock
s.rootLock.Lock()
// prepare a new index snapshot, based on next snapshot
newSnapshot := &IndexSnapshot{
parent: s,
segment: make([]*SegmentSnapshot, len(revertTo.snapshot.segment)),
offsets: revertTo.snapshot.offsets,
internal: revertTo.snapshot.internal,
epoch: s.nextSnapshotEpoch,
refs: 1,
creator: "revertToSnapshot",
}
s.nextSnapshotEpoch++
// iterate through segments
for i, segmentSnapshot := range revertTo.snapshot.segment {
newSnapshot.segment[i] = &SegmentSnapshot{
id: segmentSnapshot.id,
segment: segmentSnapshot.segment,
deleted: segmentSnapshot.deleted,
cachedDocs: segmentSnapshot.cachedDocs,
creator: segmentSnapshot.creator,
}
newSnapshot.segment[i].segment.AddRef()
// remove segment from ineligibleForRemoval map
filename := zapFileName(segmentSnapshot.id)
delete(s.ineligibleForRemoval, filename)
}
if revertTo.persisted != nil {
s.rootPersisted = append(s.rootPersisted, revertTo.persisted)
}
newSnapshot.updateSize()
// swap in new snapshot
rootPrev := s.root
s.root = newSnapshot
atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch)
// release lock
s.rootLock.Unlock()
if rootPrev != nil {
_ = rootPrev.DecRef()
}
close(revertTo.applied)
return nil
}

View file

@ -18,13 +18,13 @@ import (
"encoding/json"
"fmt"
"os"
"strings"
"sync/atomic"
"time"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index/scorch/mergeplan"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/index/scorch/segment/zap"
)
func (s *Scorch) mergerLoop() {
@ -46,12 +46,12 @@ OUTER:
default:
// check to see if there is a new snapshot to persist
s.rootLock.RLock()
s.rootLock.Lock()
ourSnapshot := s.root
ourSnapshot.AddRef()
atomic.StoreUint64(&s.iStats.mergeSnapshotSize, uint64(ourSnapshot.Size()))
atomic.StoreUint64(&s.iStats.mergeEpoch, ourSnapshot.epoch)
s.rootLock.RUnlock()
s.rootLock.Unlock()
if ourSnapshot.epoch != lastEpochMergePlanned {
startTime := time.Now()
@ -60,7 +60,7 @@ OUTER:
err := s.planMergeAtSnapshot(ourSnapshot, mergePlannerOptions)
if err != nil {
atomic.StoreUint64(&s.iStats.mergeEpoch, 0)
if err == ErrClosed {
if err == segment.ErrClosed {
// index has been closed
_ = ourSnapshot.DecRef()
break OUTER
@ -130,18 +130,18 @@ func (s *Scorch) parseMergePlannerOptions() (*mergeplan.MergePlanOptions,
func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
options *mergeplan.MergePlanOptions) error {
// build list of zap segments in this snapshot
var onlyZapSnapshots []mergeplan.Segment
// build list of persisted segments in this snapshot
var onlyPersistedSnapshots []mergeplan.Segment
for _, segmentSnapshot := range ourSnapshot.segment {
if _, ok := segmentSnapshot.segment.(*zap.Segment); ok {
onlyZapSnapshots = append(onlyZapSnapshots, segmentSnapshot)
if _, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
onlyPersistedSnapshots = append(onlyPersistedSnapshots, segmentSnapshot)
}
}
atomic.AddUint64(&s.stats.TotFileMergePlan, 1)
// give this list to the planner
resultMergePlan, err := mergeplan.Plan(onlyZapSnapshots, options)
resultMergePlan, err := mergeplan.Plan(onlyPersistedSnapshots, options)
if err != nil {
atomic.AddUint64(&s.stats.TotFileMergePlanErr, 1)
return fmt.Errorf("merge planning err: %v", err)
@ -151,13 +151,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1)
return nil
}
atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1)
atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks)))
// process tasks in serial for now
var notifications []chan *IndexSnapshot
var filenames []string
for _, task := range resultMergePlan.Tasks {
if len(task.Segments) == 0 {
atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1)
@ -168,26 +168,32 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
oldMap := make(map[uint64]*SegmentSnapshot)
newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
segmentsToMerge := make([]*zap.Segment, 0, len(task.Segments))
segmentsToMerge := make([]segment.Segment, 0, len(task.Segments))
docsToDrop := make([]*roaring.Bitmap, 0, len(task.Segments))
for _, planSegment := range task.Segments {
if segSnapshot, ok := planSegment.(*SegmentSnapshot); ok {
oldMap[segSnapshot.id] = segSnapshot
if zapSeg, ok := segSnapshot.segment.(*zap.Segment); ok {
if persistedSeg, ok := segSnapshot.segment.(segment.PersistedSegment); ok {
if segSnapshot.LiveSize() == 0 {
atomic.AddUint64(&s.stats.TotFileMergeSegmentsEmpty, 1)
oldMap[segSnapshot.id] = nil
} else {
segmentsToMerge = append(segmentsToMerge, zapSeg)
segmentsToMerge = append(segmentsToMerge, segSnapshot.segment)
docsToDrop = append(docsToDrop, segSnapshot.deleted)
}
// track the files getting merged for unsetting the
// removal ineligibility. This helps to unflip files
// even with fast merger, slow persister work flows.
path := persistedSeg.Path()
filenames = append(filenames,
strings.TrimPrefix(path, s.path+string(os.PathSeparator)))
}
}
}
var oldNewDocNums map[uint64][]uint64
var segment segment.Segment
var seg segment.Segment
if len(segmentsToMerge) > 0 {
filename := zapFileName(newSegmentID)
s.markIneligibleForRemoval(filename)
@ -196,9 +202,9 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
fileMergeZapStartTime := time.Now()
atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
newDocNums, nBytes, err := zap.Merge(segmentsToMerge, docsToDrop, path, DefaultChunkFactor)
newDocNums, _, err := s.segPlugin.Merge(segmentsToMerge, docsToDrop, path,
s.closeCh, s)
atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, nBytes)
fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime))
atomic.AddUint64(&s.stats.TotFileMergeZapTime, fileMergeZapTime)
@ -209,10 +215,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
if err != nil {
s.unmarkIneligibleForRemoval(filename)
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
if err == segment.ErrClosed {
return err
}
return fmt.Errorf("merging failed: %v", err)
}
segment, err = zap.Open(path)
seg, err = s.segPlugin.Open(path)
if err != nil {
s.unmarkIneligibleForRemoval(filename)
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
@ -230,33 +239,41 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
id: newSegmentID,
old: oldMap,
oldNewDocNums: oldNewDocNums,
new: segment,
notify: make(chan *IndexSnapshot, 1),
new: seg,
notify: make(chan *IndexSnapshot),
}
notifications = append(notifications, sm.notify)
// give it to the introducer
select {
case <-s.closeCh:
_ = segment.Close()
return ErrClosed
_ = seg.Close()
return segment.ErrClosed
case s.merges <- sm:
atomic.AddUint64(&s.stats.TotFileMergeIntroductions, 1)
}
introStartTime := time.Now()
// it is safe to blockingly wait for the merge introduction
// here as the introducer is bound to handle the notify channel.
newSnapshot := <-sm.notify
introTime := uint64(time.Since(introStartTime))
atomic.AddUint64(&s.stats.TotFileMergeZapIntroductionTime, introTime)
if atomic.LoadUint64(&s.stats.MaxFileMergeZapIntroductionTime) < introTime {
atomic.StoreUint64(&s.stats.MaxFileMergeZapIntroductionTime, introTime)
}
atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1)
if newSnapshot != nil {
_ = newSnapshot.DecRef()
}
atomic.AddUint64(&s.stats.TotFileMergePlanTasksDone, 1)
}
for _, notification := range notifications {
select {
case <-s.closeCh:
return ErrClosed
case newSnapshot := <-notification:
atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1)
if newSnapshot != nil {
_ = newSnapshot.DecRef()
}
}
// once all the newly merged segment introductions are done,
// its safe to unflip the removal ineligibility for the replaced
// older segments
for _, f := range filenames {
s.unmarkIneligibleForRemoval(f)
}
return nil
@ -274,8 +291,8 @@ type segmentMerge struct {
// persisted segment, and synchronously introduce that new segment
// into the root
func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
sbs []*zap.SegmentBase, sbsDrops []*roaring.Bitmap, sbsIndexes []int,
chunkFactor uint32) (*IndexSnapshot, uint64, error) {
sbs []segment.Segment, sbsDrops []*roaring.Bitmap,
sbsIndexes []int) (*IndexSnapshot, uint64, error) {
atomic.AddUint64(&s.stats.TotMemMergeBeg, 1)
memMergeZapStartTime := time.Now()
@ -287,7 +304,7 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
path := s.path + string(os.PathSeparator) + filename
newDocNums, _, err :=
zap.MergeSegmentBases(sbs, sbsDrops, path, chunkFactor)
s.segPlugin.Merge(sbs, sbsDrops, path, s.closeCh, s)
atomic.AddUint64(&s.stats.TotMemMergeZapEnd, 1)
@ -302,22 +319,22 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
return nil, 0, err
}
segment, err := zap.Open(path)
seg, err := s.segPlugin.Open(path)
if err != nil {
atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
return nil, 0, err
}
// update persisted stats
atomic.AddUint64(&s.stats.TotPersistedItems, segment.Count())
atomic.AddUint64(&s.stats.TotPersistedItems, seg.Count())
atomic.AddUint64(&s.stats.TotPersistedSegments, 1)
sm := &segmentMerge{
id: newSegmentID,
old: make(map[uint64]*SegmentSnapshot),
oldNewDocNums: make(map[uint64][]uint64),
new: segment,
notify: make(chan *IndexSnapshot, 1),
new: seg,
notify: make(chan *IndexSnapshot),
}
for i, idx := range sbsIndexes {
@ -328,17 +345,20 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
select { // send to introducer
case <-s.closeCh:
_ = segment.DecRef()
return nil, 0, ErrClosed
_ = seg.DecRef()
return nil, 0, segment.ErrClosed
case s.merges <- sm:
}
select { // wait for introduction to complete
case <-s.closeCh:
return nil, 0, ErrClosed
case newSnapshot := <-sm.notify:
// blockingly wait for the introduction to complete
newSnapshot := <-sm.notify
if newSnapshot != nil {
atomic.AddUint64(&s.stats.TotMemMergeSegments, uint64(len(sbs)))
atomic.AddUint64(&s.stats.TotMemMergeDone, 1)
return newSnapshot, newSegmentID, nil
}
return newSnapshot, newSegmentID, nil
}
func (s *Scorch) ReportBytesWritten(bytesWritten uint64) {
atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, bytesWritten)
}

View file

@ -217,14 +217,14 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
if len(roster) > 0 {
rosterScore := scoreSegments(roster, o)
if len(bestRoster) <= 0 || rosterScore < bestRosterScore {
if len(bestRoster) == 0 || rosterScore < bestRosterScore {
bestRoster = roster
bestRosterScore = rosterScore
}
}
}
if len(bestRoster) <= 0 {
if len(bestRoster) == 0 {
return rv, nil
}

View file

@ -18,17 +18,37 @@ import (
"fmt"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment/zap"
"github.com/blevesearch/bleve/index/scorch/segment"
)
func (s *IndexSnapshotTermFieldReader) Optimize(kind string, octx index.OptimizableContext) (
index.OptimizableContext, error) {
if kind != "conjunction" {
return octx, nil
var OptimizeConjunction = true
var OptimizeConjunctionUnadorned = true
var OptimizeDisjunctionUnadorned = true
func (s *IndexSnapshotTermFieldReader) Optimize(kind string,
octx index.OptimizableContext) (index.OptimizableContext, error) {
if OptimizeConjunction && kind == "conjunction" {
return s.optimizeConjunction(octx)
}
if OptimizeConjunctionUnadorned && kind == "conjunction:unadorned" {
return s.optimizeConjunctionUnadorned(octx)
}
if OptimizeDisjunctionUnadorned && kind == "disjunction:unadorned" {
return s.optimizeDisjunctionUnadorned(octx)
}
return octx, nil
}
var OptimizeDisjunctionUnadornedMinChildCardinality = uint64(256)
// ----------------------------------------------------------------
func (s *IndexSnapshotTermFieldReader) optimizeConjunction(
octx index.OptimizableContext) (index.OptimizableContext, error) {
if octx == nil {
octx = &OptimizeTFRConjunction{snapshot: s.snapshot}
}
@ -39,7 +59,7 @@ func (s *IndexSnapshotTermFieldReader) Optimize(kind string, octx index.Optimiza
}
if o.snapshot != s.snapshot {
return nil, fmt.Errorf("tried to optimize across different snapshots")
return nil, fmt.Errorf("tried to optimize conjunction across different snapshots")
}
o.tfrs = append(o.tfrs, s)
@ -53,41 +73,324 @@ type OptimizeTFRConjunction struct {
tfrs []*IndexSnapshotTermFieldReader
}
func (o *OptimizeTFRConjunction) Finish() error {
func (o *OptimizeTFRConjunction) Finish() (index.Optimized, error) {
if len(o.tfrs) <= 1 {
return nil
return nil, nil
}
for i := range o.snapshot.segment {
itr0, ok := o.tfrs[0].iterators[i].(*zap.PostingsIterator)
if !ok || itr0.ActualBM == nil {
itr0, ok := o.tfrs[0].iterators[i].(segment.OptimizablePostingsIterator)
if !ok || itr0.ActualBitmap() == nil {
continue
}
itr1, ok := o.tfrs[1].iterators[i].(*zap.PostingsIterator)
if !ok || itr1.ActualBM == nil {
itr1, ok := o.tfrs[1].iterators[i].(segment.OptimizablePostingsIterator)
if !ok || itr1.ActualBitmap() == nil {
continue
}
bm := roaring.And(itr0.ActualBM, itr1.ActualBM)
bm := roaring.And(itr0.ActualBitmap(), itr1.ActualBitmap())
for _, tfr := range o.tfrs[2:] {
itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
if !ok || itr.ActualBM == nil {
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
if !ok || itr.ActualBitmap() == nil {
continue
}
bm.And(itr.ActualBM)
bm.And(itr.ActualBitmap())
}
// in this conjunction optimization, the postings iterators
// will all share the same AND'ed together actual bitmap. The
// regular conjunction searcher machinery will still be used,
// but the underlying bitmap will be smaller.
for _, tfr := range o.tfrs {
itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
if ok && itr.ActualBM != nil {
itr.ActualBM = bm
itr.Actual = bm.Iterator()
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
if ok && itr.ActualBitmap() != nil {
itr.ReplaceActual(bm)
}
}
}
return nil
return nil, nil
}
// ----------------------------------------------------------------
// An "unadorned" conjunction optimization is appropriate when
// additional or subsidiary information like freq-norm's and
// term-vectors are not required, and instead only the internal-id's
// are needed.
func (s *IndexSnapshotTermFieldReader) optimizeConjunctionUnadorned(
octx index.OptimizableContext) (index.OptimizableContext, error) {
if octx == nil {
octx = &OptimizeTFRConjunctionUnadorned{snapshot: s.snapshot}
}
o, ok := octx.(*OptimizeTFRConjunctionUnadorned)
if !ok {
return nil, nil
}
if o.snapshot != s.snapshot {
return nil, fmt.Errorf("tried to optimize unadorned conjunction across different snapshots")
}
o.tfrs = append(o.tfrs, s)
return o, nil
}
type OptimizeTFRConjunctionUnadorned struct {
snapshot *IndexSnapshot
tfrs []*IndexSnapshotTermFieldReader
}
var OptimizeTFRConjunctionUnadornedTerm = []byte("<conjunction:unadorned>")
var OptimizeTFRConjunctionUnadornedField = "*"
// Finish of an unadorned conjunction optimization will compute a
// termFieldReader with an "actual" bitmap that represents the
// constituent bitmaps AND'ed together. This termFieldReader cannot
// provide any freq-norm or termVector associated information.
func (o *OptimizeTFRConjunctionUnadorned) Finish() (rv index.Optimized, err error) {
if len(o.tfrs) <= 1 {
return nil, nil
}
// We use an artificial term and field because the optimized
// termFieldReader can represent multiple terms and fields.
oTFR := &IndexSnapshotTermFieldReader{
term: OptimizeTFRConjunctionUnadornedTerm,
field: OptimizeTFRConjunctionUnadornedField,
snapshot: o.snapshot,
iterators: make([]segment.PostingsIterator, len(o.snapshot.segment)),
segmentOffset: 0,
includeFreq: false,
includeNorm: false,
includeTermVectors: false,
}
var actualBMs []*roaring.Bitmap // Collected from regular posting lists.
OUTER:
for i := range o.snapshot.segment {
actualBMs = actualBMs[:0]
var docNum1HitLast uint64
var docNum1HitLastOk bool
for _, tfr := range o.tfrs {
if _, ok := tfr.iterators[i].(*segment.EmptyPostingsIterator); ok {
// An empty postings iterator means the entire AND is empty.
oTFR.iterators[i] = segment.AnEmptyPostingsIterator
continue OUTER
}
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
if !ok {
// We only optimize postings iterators that support this operation.
return nil, nil
}
// If the postings iterator is "1-hit" optimized, then we
// can perform several optimizations up-front here.
docNum1Hit, ok := itr.DocNum1Hit()
if ok {
if docNum1HitLastOk && docNum1HitLast != docNum1Hit {
// The docNum1Hit doesn't match the previous
// docNum1HitLast, so the entire AND is empty.
oTFR.iterators[i] = segment.AnEmptyPostingsIterator
continue OUTER
}
docNum1HitLast = docNum1Hit
docNum1HitLastOk = true
continue
}
if itr.ActualBitmap() == nil {
// An empty actual bitmap means the entire AND is empty.
oTFR.iterators[i] = segment.AnEmptyPostingsIterator
continue OUTER
}
// Collect the actual bitmap for more processing later.
actualBMs = append(actualBMs, itr.ActualBitmap())
}
if docNum1HitLastOk {
// We reach here if all the 1-hit optimized posting
// iterators had the same 1-hit docNum, so we can check if
// our collected actual bitmaps also have that docNum.
for _, bm := range actualBMs {
if !bm.Contains(uint32(docNum1HitLast)) {
// The docNum1Hit isn't in one of our actual
// bitmaps, so the entire AND is empty.
oTFR.iterators[i] = segment.AnEmptyPostingsIterator
continue OUTER
}
}
// The actual bitmaps and docNum1Hits all contain or have
// the same 1-hit docNum, so that's our AND'ed result.
oTFR.iterators[i] = segment.NewUnadornedPostingsIteratorFrom1Hit(docNum1HitLast)
continue OUTER
}
if len(actualBMs) == 0 {
// If we've collected no actual bitmaps at this point,
// then the entire AND is empty.
oTFR.iterators[i] = segment.AnEmptyPostingsIterator
continue OUTER
}
if len(actualBMs) == 1 {
// If we've only 1 actual bitmap, then that's our result.
oTFR.iterators[i] = segment.NewUnadornedPostingsIteratorFromBitmap(actualBMs[0])
continue OUTER
}
// Else, AND together our collected bitmaps as our result.
bm := roaring.And(actualBMs[0], actualBMs[1])
for _, actualBM := range actualBMs[2:] {
bm.And(actualBM)
}
oTFR.iterators[i] = segment.NewUnadornedPostingsIteratorFromBitmap(bm)
}
return oTFR, nil
}
// ----------------------------------------------------------------
// An "unadorned" disjunction optimization is appropriate when
// additional or subsidiary information like freq-norm's and
// term-vectors are not required, and instead only the internal-id's
// are needed.
func (s *IndexSnapshotTermFieldReader) optimizeDisjunctionUnadorned(
octx index.OptimizableContext) (index.OptimizableContext, error) {
if octx == nil {
octx = &OptimizeTFRDisjunctionUnadorned{snapshot: s.snapshot}
}
o, ok := octx.(*OptimizeTFRDisjunctionUnadorned)
if !ok {
return nil, nil
}
if o.snapshot != s.snapshot {
return nil, fmt.Errorf("tried to optimize unadorned disjunction across different snapshots")
}
o.tfrs = append(o.tfrs, s)
return o, nil
}
type OptimizeTFRDisjunctionUnadorned struct {
snapshot *IndexSnapshot
tfrs []*IndexSnapshotTermFieldReader
}
var OptimizeTFRDisjunctionUnadornedTerm = []byte("<disjunction:unadorned>")
var OptimizeTFRDisjunctionUnadornedField = "*"
// Finish of an unadorned disjunction optimization will compute a
// termFieldReader with an "actual" bitmap that represents the
// constituent bitmaps OR'ed together. This termFieldReader cannot
// provide any freq-norm or termVector associated information.
func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err error) {
if len(o.tfrs) <= 1 {
return nil, nil
}
for i := range o.snapshot.segment {
var cMax uint64
for _, tfr := range o.tfrs {
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
if !ok {
return nil, nil
}
if itr.ActualBitmap() != nil {
c := itr.ActualBitmap().GetCardinality()
if cMax < c {
cMax = c
}
}
}
// Heuristic to skip the optimization if all the constituent
// bitmaps are too small, where the processing & resource
// overhead to create the OR'ed bitmap outweighs the benefit.
if cMax < OptimizeDisjunctionUnadornedMinChildCardinality {
return nil, nil
}
}
// We use an artificial term and field because the optimized
// termFieldReader can represent multiple terms and fields.
oTFR := &IndexSnapshotTermFieldReader{
term: OptimizeTFRDisjunctionUnadornedTerm,
field: OptimizeTFRDisjunctionUnadornedField,
snapshot: o.snapshot,
iterators: make([]segment.PostingsIterator, len(o.snapshot.segment)),
segmentOffset: 0,
includeFreq: false,
includeNorm: false,
includeTermVectors: false,
}
var docNums []uint32 // Collected docNum's from 1-hit posting lists.
var actualBMs []*roaring.Bitmap // Collected from regular posting lists.
for i := range o.snapshot.segment {
docNums = docNums[:0]
actualBMs = actualBMs[:0]
for _, tfr := range o.tfrs {
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
if !ok {
return nil, nil
}
docNum, ok := itr.DocNum1Hit()
if ok {
docNums = append(docNums, uint32(docNum))
continue
}
if itr.ActualBitmap() != nil {
actualBMs = append(actualBMs, itr.ActualBitmap())
}
}
var bm *roaring.Bitmap
if len(actualBMs) > 2 {
bm = roaring.HeapOr(actualBMs...)
} else if len(actualBMs) == 2 {
bm = roaring.Or(actualBMs[0], actualBMs[1])
} else if len(actualBMs) == 1 {
bm = actualBMs[0].Clone()
}
if bm == nil {
bm = roaring.New()
}
bm.AddMany(docNums)
oTFR.iterators[i] = segment.NewUnadornedPostingsIteratorFromBitmap(bm)
}
return oTFR, nil
}

View file

@ -17,9 +17,11 @@ package scorch
import (
"bytes"
"encoding/binary"
"encoding/json"
"fmt"
"io/ioutil"
"log"
"math"
"os"
"path/filepath"
"strconv"
@ -28,23 +30,54 @@ import (
"time"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/index/scorch/segment/zap"
"github.com/boltdb/bolt"
bolt "go.etcd.io/bbolt"
)
var DefaultChunkFactor uint32 = 1024
// DefaultPersisterNapTimeMSec is kept to zero as this helps in direct
// persistence of segments with the default safe batch option.
// If the default safe batch option results in high number of
// files on disk, then users may initialise this configuration parameter
// with higher values so that the persister will nap a bit within it's
// work loop to favour better in-memory merging of segments to result
// in fewer segment files on disk. But that may come with an indexing
// performance overhead.
// Unsafe batch users are advised to override this to higher value
// for better performance especially with high data density.
var DefaultPersisterNapTimeMSec int = 0 // ms
// Arbitrary number, need to make it configurable.
// Lower values like 10/making persister really slow
// doesn't work well as it is creating more files to
// persist for in next persist iteration and spikes the # FDs.
// Ideal value should let persister also proceed at
// an optimum pace so that the merger can skip
// many intermediate snapshots.
// This needs to be based on empirical data.
// TODO - may need to revisit this approach/value.
var epochDistance = uint64(5)
// DefaultPersisterNapUnderNumFiles helps in controlling the pace of
// persister. At times of a slow merger progress with heavy file merging
// operations, its better to pace down the persister for letting the merger
// to catch up within a range defined by this parameter.
// Fewer files on disk (as per the merge plan) would result in keeping the
// file handle usage under limit, faster disk merger and a healthier index.
// Its been observed that such a loosely sync'ed introducer-persister-merger
// trio results in better overall performance.
var DefaultPersisterNapUnderNumFiles int = 1000
var DefaultMemoryPressurePauseThreshold uint64 = math.MaxUint64
type persisterOptions struct {
// PersisterNapTimeMSec controls the wait/delay injected into
// persistence workloop to improve the chances for
// a healthier and heavier in-memory merging
PersisterNapTimeMSec int
// PersisterNapTimeMSec > 0, and the number of files is less than
// PersisterNapUnderNumFiles, then the persister will sleep
// PersisterNapTimeMSec amount of time to improve the chances for
// a healthier and heavier in-memory merging
PersisterNapUnderNumFiles int
// MemoryPressurePauseThreshold let persister to have a better leeway
// for prudently performing the memory merge of segments on a memory
// pressure situation. Here the config value is an upper threshold
// for the number of paused application threads. The default value would
// be a very high number to always favour the merging of memory segments.
MemoryPressurePauseThreshold uint64
}
type notificationChan chan struct{}
@ -54,6 +87,16 @@ func (s *Scorch) persisterLoop() {
var persistWatchers []*epochWatcher
var lastPersistedEpoch, lastMergedEpoch uint64
var ew *epochWatcher
var unpersistedCallbacks []index.BatchCallback
po, err := s.parsePersisterOptions()
if err != nil {
s.fireAsyncError(fmt.Errorf("persisterOptions json parsing err: %v", err))
s.asyncTasks.Done()
return
}
OUTER:
for {
atomic.AddUint64(&s.stats.TotPersistLoopBeg, 1)
@ -69,10 +112,11 @@ OUTER:
lastMergedEpoch = ew.epoch
}
lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
lastMergedEpoch, persistWatchers)
lastMergedEpoch, persistWatchers, po)
var ourSnapshot *IndexSnapshot
var ourPersisted []chan error
var ourPersistedCallbacks []index.BatchCallback
// check to see if there is a new snapshot to persist
s.rootLock.Lock()
@ -81,6 +125,8 @@ OUTER:
ourSnapshot.AddRef()
ourPersisted = s.rootPersisted
s.rootPersisted = nil
ourPersistedCallbacks = s.persistedCallbacks
s.persistedCallbacks = nil
atomic.StoreUint64(&s.iStats.persistSnapshotSize, uint64(ourSnapshot.Size()))
atomic.StoreUint64(&s.iStats.persistEpoch, ourSnapshot.epoch)
}
@ -89,7 +135,7 @@ OUTER:
if ourSnapshot != nil {
startTime := time.Now()
err := s.persistSnapshot(ourSnapshot)
err := s.persistSnapshot(ourSnapshot, po)
for _, ch := range ourPersisted {
if err != nil {
ch <- err
@ -98,17 +144,34 @@ OUTER:
}
if err != nil {
atomic.StoreUint64(&s.iStats.persistEpoch, 0)
if err == ErrClosed {
if err == segment.ErrClosed {
// index has been closed
_ = ourSnapshot.DecRef()
break OUTER
}
// save this current snapshot's persistedCallbacks, to invoke during
// the retry attempt
unpersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err))
_ = ourSnapshot.DecRef()
atomic.AddUint64(&s.stats.TotPersistLoopErr, 1)
continue OUTER
}
if unpersistedCallbacks != nil {
// in the event of this being a retry attempt for persisting a snapshot
// that had earlier failed, prepend the persistedCallbacks associated
// with earlier segment(s) to the latest persistedCallbacks
ourPersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
unpersistedCallbacks = nil
}
for i := range ourPersistedCallbacks {
ourPersistedCallbacks[i](err)
}
atomic.StoreUint64(&s.stats.LastPersistedEpoch, ourSnapshot.epoch)
lastPersistedEpoch = ourSnapshot.epoch
@ -179,15 +242,51 @@ func notifyMergeWatchers(lastPersistedEpoch uint64,
return watchersNext
}
func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch uint64,
persistWatchers []*epochWatcher) (uint64, []*epochWatcher) {
func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64,
lastMergedEpoch uint64, persistWatchers []*epochWatcher,
po *persisterOptions) (uint64, []*epochWatcher) {
// first, let the watchers proceed if they lag behind
// First, let the watchers proceed if they lag behind
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
// Check the merger lag by counting the segment files on disk,
numFilesOnDisk, _, _ := s.diskFileStats(nil)
// On finding fewer files on disk, persister takes a short pause
// for sufficient in-memory segments to pile up for the next
// memory merge cum persist loop.
if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) &&
po.PersisterNapTimeMSec > 0 && s.paused() == 0 {
select {
case <-s.closeCh:
case <-time.After(time.Millisecond * time.Duration(po.PersisterNapTimeMSec)):
atomic.AddUint64(&s.stats.TotPersisterNapPauseCompleted, 1)
case ew := <-s.persisterNotifier:
// unblock the merger in meantime
persistWatchers = append(persistWatchers, ew)
lastMergedEpoch = ew.epoch
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
atomic.AddUint64(&s.stats.TotPersisterMergerNapBreak, 1)
}
return lastMergedEpoch, persistWatchers
}
// Finding too many files on disk could be due to two reasons.
// 1. Too many older snapshots awaiting the clean up.
// 2. The merger could be lagging behind on merging the disk files.
if numFilesOnDisk > uint64(po.PersisterNapUnderNumFiles) {
s.removeOldData()
numFilesOnDisk, _, _ = s.diskFileStats(nil)
}
// Persister pause until the merger catches up to reduce the segment
// file count under the threshold.
// But if there is memory pressure, then skip this sleep maneuvers.
OUTER:
// check for slow merger and await until the merger catch up
for lastPersistedEpoch > lastMergedEpoch+epochDistance {
for po.PersisterNapUnderNumFiles > 0 &&
numFilesOnDisk >= uint64(po.PersisterNapUnderNumFiles) &&
lastMergedEpoch < lastPersistedEpoch {
atomic.AddUint64(&s.stats.TotPersisterSlowMergerPause, 1)
select {
@ -202,18 +301,46 @@ OUTER:
// let the watchers proceed if they lag behind
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
numFilesOnDisk, _, _ = s.diskFileStats(nil)
}
return lastMergedEpoch, persistWatchers
}
func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
persisted, err := s.persistSnapshotMaybeMerge(snapshot)
if err != nil {
return err
func (s *Scorch) parsePersisterOptions() (*persisterOptions, error) {
po := persisterOptions{
PersisterNapTimeMSec: DefaultPersisterNapTimeMSec,
PersisterNapUnderNumFiles: DefaultPersisterNapUnderNumFiles,
MemoryPressurePauseThreshold: DefaultMemoryPressurePauseThreshold,
}
if persisted {
return nil
if v, ok := s.config["scorchPersisterOptions"]; ok {
b, err := json.Marshal(v)
if err != nil {
return &po, err
}
err = json.Unmarshal(b, &po)
if err != nil {
return &po, err
}
}
return &po, nil
}
func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot,
po *persisterOptions) error {
// Perform in-memory segment merging only when the memory pressure is
// below the configured threshold, else the persister performs the
// direct persistence of segments.
if s.paused() < po.MemoryPressurePauseThreshold {
persisted, err := s.persistSnapshotMaybeMerge(snapshot)
if err != nil {
return err
}
if persisted {
return nil
}
}
return s.persistSnapshotDirect(snapshot)
@ -230,13 +357,13 @@ var DefaultMinSegmentsForInMemoryMerge = 2
func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) (
bool, error) {
// collect the in-memory zap segments (SegmentBase instances)
var sbs []*zap.SegmentBase
var sbs []segment.Segment
var sbsDrops []*roaring.Bitmap
var sbsIndexes []int
for i, segmentSnapshot := range snapshot.segment {
if sb, ok := segmentSnapshot.segment.(*zap.SegmentBase); ok {
sbs = append(sbs, sb)
if _, ok := segmentSnapshot.segment.(segment.PersistedSegment); !ok {
sbs = append(sbs, segmentSnapshot.segment)
sbsDrops = append(sbsDrops, segmentSnapshot.deleted)
sbsIndexes = append(sbsIndexes, i)
}
@ -247,7 +374,7 @@ func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) (
}
newSnapshot, newSegmentID, err := s.mergeSegmentBases(
snapshot, sbs, sbsDrops, sbsIndexes, DefaultChunkFactor)
snapshot, sbs, sbsDrops, sbsIndexes)
if err != nil {
return false, err
}
@ -329,13 +456,13 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
if err != nil {
return err
}
err = metaBucket.Put([]byte("type"), []byte(zap.Type))
err = metaBucket.Put(boltMetaDataSegmentTypeKey, []byte(s.segPlugin.Type()))
if err != nil {
return err
}
buf := make([]byte, binary.MaxVarintLen32)
binary.BigEndian.PutUint32(buf, zap.Version)
err = metaBucket.Put([]byte("version"), buf)
binary.BigEndian.PutUint32(buf, s.segPlugin.Version())
err = metaBucket.Put(boltMetaDataSegmentVersionKey, buf)
if err != nil {
return err
}
@ -364,11 +491,19 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
return err
}
switch seg := segmentSnapshot.segment.(type) {
case *zap.SegmentBase:
case segment.PersistedSegment:
path := seg.Path()
filename := strings.TrimPrefix(path, s.path+string(os.PathSeparator))
err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
if err != nil {
return err
}
filenames = append(filenames, filename)
case segment.UnpersistedSegment:
// need to persist this to disk
filename := zapFileName(segmentSnapshot.id)
path := s.path + string(os.PathSeparator) + filename
err = zap.PersistSegmentBase(seg, path)
err = seg.Persist(path)
if err != nil {
return fmt.Errorf("error persisting segment: %v", err)
}
@ -378,14 +513,7 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
return err
}
filenames = append(filenames, filename)
case *zap.Segment:
path := seg.Path()
filename := strings.TrimPrefix(path, s.path+string(os.PathSeparator))
err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
if err != nil {
return err
}
filenames = append(filenames, filename)
default:
return fmt.Errorf("unknown segment type: %T", seg)
}
@ -423,7 +551,7 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
}
}()
for segmentID, path := range newSegmentPaths {
newSegments[segmentID], err = zap.Open(path)
newSegments[segmentID], err = s.segPlugin.Open(path)
if err != nil {
return fmt.Errorf("error opening new segment at %s, %v", path, err)
}
@ -436,15 +564,13 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
select {
case <-s.closeCh:
err = ErrClosed
return err
return segment.ErrClosed
case s.persists <- persist:
}
select {
case <-s.closeCh:
err = ErrClosed
return err
return segment.ErrClosed
case <-persist.applied:
}
}
@ -481,6 +607,8 @@ var boltPathKey = []byte{'p'}
var boltDeletedKey = []byte{'d'}
var boltInternalKey = []byte{'i'}
var boltMetaDataKey = []byte{'m'}
var boltMetaDataSegmentTypeKey = []byte("type")
var boltMetaDataSegmentVersionKey = []byte("version")
func (s *Scorch) loadFromBolt() error {
return s.rootBolt.View(func(tx *bolt.Tx) error {
@ -521,11 +649,14 @@ func (s *Scorch) loadFromBolt() error {
s.nextSegmentID++
s.rootLock.Lock()
s.nextSnapshotEpoch = snapshotEpoch + 1
if s.root != nil {
_ = s.root.DecRef()
}
rootPrev := s.root
s.root = indexSnapshot
s.rootLock.Unlock()
if rootPrev != nil {
_ = rootPrev.DecRef()
}
foundRoot = true
}
return nil
@ -562,6 +693,23 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
refs: 1,
creator: "loadSnapshot",
}
// first we look for the meta-data bucket, this will tell us
// which segment type/version was used for this snapshot
// all operations for this scorch will use this type/version
metaBucket := snapshot.Bucket(boltMetaDataKey)
if metaBucket == nil {
_ = rv.DecRef()
return nil, fmt.Errorf("meta-data bucket missing")
}
segmentType := string(metaBucket.Get(boltMetaDataSegmentTypeKey))
segmentVersion := binary.BigEndian.Uint32(
metaBucket.Get(boltMetaDataSegmentVersionKey))
err := s.loadSegmentPlugin(segmentType, segmentVersion)
if err != nil {
_ = rv.DecRef()
return nil, fmt.Errorf(
"unable to load correct segment wrapper: %v", err)
}
var running uint64
c := snapshot.Cursor()
for k, _ := c.First(); k != nil; k, _ = c.Next() {
@ -606,7 +754,7 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
return nil, fmt.Errorf("segment path missing")
}
segmentPath := s.path + string(os.PathSeparator) + string(pathBytes)
segment, err := zap.Open(segmentPath)
segment, err := s.segPlugin.Open(segmentPath)
if err != nil {
return nil, fmt.Errorf("error opening bolt segment: %v", err)
}
@ -643,12 +791,11 @@ func (s *Scorch) removeOldData() {
if err != nil {
s.fireAsyncError(fmt.Errorf("got err removing old bolt snapshots: %v", err))
}
atomic.AddUint64(&s.stats.TotSnapshotsRemovedFromMetaStore, uint64(removed))
if removed > 0 {
err = s.removeOldZapFiles()
if err != nil {
s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err))
}
err = s.removeOldZapFiles()
if err != nil {
s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err))
}
}
@ -690,7 +837,7 @@ func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) {
s.eligibleForRemoval = newEligible
s.rootLock.Unlock()
if len(epochsToRemove) <= 0 {
if len(epochsToRemove) == 0 {
return 0, nil
}

View file

@ -28,10 +28,9 @@ import (
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/index/scorch/segment/zap"
"github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/registry"
"github.com/boltdb/bolt"
bolt "go.etcd.io/bbolt"
)
const Name = "scorch"
@ -41,12 +40,14 @@ const Version uint8 = 2
var ErrClosed = fmt.Errorf("scorch closed")
type Scorch struct {
nextSegmentID uint64
stats Stats
iStats internalStats
readOnly bool
version uint8
config map[string]interface{}
analysisQueue *index.AnalysisQueue
stats Stats
nextSegmentID uint64
path string
unsafeBatch bool
@ -54,6 +55,7 @@ type Scorch struct {
rootLock sync.RWMutex
root *IndexSnapshot // holds 1 ref-count on the root
rootPersisted []chan error // closed when root is persisted
persistedCallbacks []index.BatchCallback
nextSnapshotEpoch uint64
eligibleForRemoval []uint64 // Index snapshot epochs that are safe to GC.
ineligibleForRemoval map[string]bool // Filenames that should not be GC'ed yet.
@ -64,7 +66,6 @@ type Scorch struct {
persists chan *persistIntroduction
merges chan *segmentMerge
introducerNotifier chan *epochWatcher
revertToSnapshots chan *snapshotReversion
persisterNotifier chan *epochWatcher
rootBolt *bolt.DB
asyncTasks sync.WaitGroup
@ -72,7 +73,11 @@ type Scorch struct {
onEvent func(event Event)
onAsyncError func(err error)
iStats internalStats
pauseLock sync.RWMutex
pauseCount uint64
segPlugin segment.Plugin
}
type internalStats struct {
@ -96,7 +101,25 @@ func NewScorch(storeName string,
nextSnapshotEpoch: 1,
closeCh: make(chan struct{}),
ineligibleForRemoval: map[string]bool{},
segPlugin: defaultSegmentPlugin,
}
// check if the caller has requested a specific segment type/version
forcedSegmentVersion, ok := config["forceSegmentVersion"].(int)
if ok {
forcedSegmentType, ok2 := config["forceSegmentType"].(string)
if !ok2 {
return nil, fmt.Errorf(
"forceSegmentVersion set to %d, must also specify forceSegmentType", forcedSegmentVersion)
}
err := rv.loadSegmentPlugin(forcedSegmentType,
uint32(forcedSegmentVersion))
if err != nil {
return nil, err
}
}
rv.root = &IndexSnapshot{parent: rv, refs: 1, creator: "NewScorch"}
ro, ok := config["read_only"].(bool)
if ok {
@ -117,9 +140,30 @@ func NewScorch(storeName string,
return rv, nil
}
func (s *Scorch) paused() uint64 {
s.pauseLock.Lock()
pc := s.pauseCount
s.pauseLock.Unlock()
return pc
}
func (s *Scorch) incrPause() {
s.pauseLock.Lock()
s.pauseCount++
s.pauseLock.Unlock()
}
func (s *Scorch) decrPause() {
s.pauseLock.Lock()
s.pauseCount--
s.pauseLock.Unlock()
}
func (s *Scorch) fireEvent(kind EventKind, dur time.Duration) {
if s.onEvent != nil {
s.incrPause()
s.onEvent(Event{Kind: kind, Scorch: s, Duration: dur})
s.decrPause()
}
}
@ -189,12 +233,14 @@ func (s *Scorch) openBolt() error {
}
}
atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, uint64(len(s.root.segment)))
s.introductions = make(chan *segmentIntroduction)
s.persists = make(chan *persistIntroduction)
s.merges = make(chan *segmentMerge)
s.introducerNotifier = make(chan *epochWatcher, 1)
s.revertToSnapshots = make(chan *snapshotReversion)
s.persisterNotifier = make(chan *epochWatcher, 1)
s.closeCh = make(chan struct{})
if !s.readOnly && s.path != "" {
err := s.removeOldZapFiles() // Before persister or merger create any new files.
@ -235,7 +281,10 @@ func (s *Scorch) Close() (err error) {
err = s.rootBolt.Close()
s.rootLock.Lock()
if s.root != nil {
_ = s.root.DecRef()
err2 := s.root.DecRef()
if err == nil {
err = err2
}
}
s.root = nil
s.rootLock.Unlock()
@ -284,15 +333,17 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
// FIXME could sort ids list concurrent with analysis?
go func() {
for _, doc := range batch.IndexOps {
if doc != nil {
aw := index.NewAnalysisWork(s, doc, resultChan)
// put the work on the queue
s.analysisQueue.Queue(aw)
if numUpdates > 0 {
go func() {
for _, doc := range batch.IndexOps {
if doc != nil {
aw := index.NewAnalysisWork(s, doc, resultChan)
// put the work on the queue
s.analysisQueue.Queue(aw)
}
}
}
}()
}()
}
// wait for analysis result
analysisResults := make([]*index.AnalysisResult, int(numUpdates))
@ -319,7 +370,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
var newSegment segment.Segment
var bufBytes uint64
if len(analysisResults) > 0 {
newSegment, bufBytes, err = zap.AnalysisResultsToSegmentBase(analysisResults, DefaultChunkFactor)
newSegment, bufBytes, err = s.segPlugin.New(analysisResults)
if err != nil {
return err
}
@ -328,7 +379,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
atomic.AddUint64(&s.stats.TotBatchesEmpty, 1)
}
err = s.prepareSegment(newSegment, ids, batch.InternalOps)
err = s.prepareSegment(newSegment, ids, batch.InternalOps, batch.PersistedCallback())
if err != nil {
if newSegment != nil {
_ = newSegment.Close()
@ -348,16 +399,17 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
}
func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
internalOps map[string][]byte) error {
internalOps map[string][]byte, persistedCallback index.BatchCallback) error {
// new introduction
introduction := &segmentIntroduction{
id: atomic.AddUint64(&s.nextSegmentID, 1),
data: newSegment,
ids: ids,
obsoletes: make(map[uint64]*roaring.Bitmap),
internal: internalOps,
applied: make(chan error),
id: atomic.AddUint64(&s.nextSegmentID, 1),
data: newSegment,
ids: ids,
obsoletes: make(map[uint64]*roaring.Bitmap),
internal: internalOps,
applied: make(chan error),
persistedCallback: persistedCallback,
}
if !s.unsafeBatch {
@ -370,6 +422,8 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
root.AddRef()
s.rootLock.RUnlock()
defer func() { _ = root.DecRef() }()
for _, seg := range root.segment {
delta, err := seg.segment.DocNumbers(ids)
if err != nil {
@ -378,8 +432,6 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
introduction.obsoletes[seg.id] = delta
}
_ = root.DecRef()
introStartTime := time.Now()
s.introductions <- introduction
@ -434,24 +486,57 @@ func (s *Scorch) currentSnapshot() *IndexSnapshot {
func (s *Scorch) Stats() json.Marshaler {
return &s.stats
}
func (s *Scorch) StatsMap() map[string]interface{} {
m := s.stats.ToMap()
func (s *Scorch) diskFileStats(rootSegmentPaths map[string]struct{}) (uint64,
uint64, uint64) {
var numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot uint64
if s.path != "" {
finfos, err := ioutil.ReadDir(s.path)
if err == nil {
var numFilesOnDisk, numBytesUsedDisk uint64
for _, finfo := range finfos {
if !finfo.IsDir() {
numBytesUsedDisk += uint64(finfo.Size())
numFilesOnDisk++
if rootSegmentPaths != nil {
fname := s.path + string(os.PathSeparator) + finfo.Name()
if _, fileAtRoot := rootSegmentPaths[fname]; fileAtRoot {
numBytesOnDiskByRoot += uint64(finfo.Size())
}
}
}
}
m["CurOnDiskBytes"] = numBytesUsedDisk
m["CurOnDiskFiles"] = numFilesOnDisk
}
}
// if no root files path given, then consider all disk files.
if rootSegmentPaths == nil {
return numFilesOnDisk, numBytesUsedDisk, numBytesUsedDisk
}
return numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot
}
func (s *Scorch) rootDiskSegmentsPaths() map[string]struct{} {
rv := make(map[string]struct{}, len(s.root.segment))
for _, segmentSnapshot := range s.root.segment {
if seg, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
rv[seg.Path()] = struct{}{}
}
}
return rv
}
func (s *Scorch) StatsMap() map[string]interface{} {
m := s.stats.ToMap()
s.rootLock.RLock()
rootSegPaths := s.rootDiskSegmentsPaths()
m["CurFilesIneligibleForRemoval"] = uint64(len(s.ineligibleForRemoval))
s.rootLock.RUnlock()
numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot := s.diskFileStats(rootSegPaths)
m["CurOnDiskBytes"] = numBytesUsedDisk
m["CurOnDiskFiles"] = numFilesOnDisk
// TODO: consider one day removing these backwards compatible
// names for apps using the old names
@ -466,8 +551,16 @@ func (s *Scorch) StatsMap() map[string]interface{} {
m["num_plain_text_bytes_indexed"] = m["TotIndexedPlainTextBytes"]
m["num_items_introduced"] = m["TotIntroducedItems"]
m["num_items_persisted"] = m["TotPersistedItems"]
m["num_bytes_used_disk"] = m["CurOnDiskBytes"]
m["num_files_on_disk"] = m["CurOnDiskFiles"]
m["num_recs_to_persist"] = m["TotItemsToPersist"]
// total disk bytes found in index directory inclusive of older snapshots
m["num_bytes_used_disk"] = numBytesUsedDisk
// total disk bytes by the latest root index, exclusive of older snapshots
m["num_bytes_used_disk_by_root"] = numBytesOnDiskByRoot
m["num_files_on_disk"] = numFilesOnDisk
m["num_root_memorysegments"] = m["TotMemorySegmentsAtRoot"]
m["num_root_filesegments"] = m["TotFileSegmentsAtRoot"]
m["num_persister_nap_pause_completed"] = m["TotPersisterNapPauseCompleted"]
m["num_persister_nap_merger_break"] = m["TotPersisterMergerNapBreak"]
m["total_compaction_written_bytes"] = m["TotFileMergeWrittenBytes"]
return m
@ -486,7 +579,7 @@ func (s *Scorch) Analyze(d *document.Document) *index.AnalysisResult {
rv.Analyzed[i] = tokenFreqs
rv.Length[i] = fieldLength
if len(d.CompositeFields) > 0 {
if len(d.CompositeFields) > 0 && field.Name() != "_id" {
// see if any of the composite fields need this
for _, compositeField := range d.CompositeFields {
compositeField.Compose(field.Name(), fieldLength, tokenFreqs)

View file

@ -17,6 +17,7 @@ package segment
import (
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index"
"github.com/couchbase/vellum"
)
type EmptySegment struct{}
@ -80,12 +81,8 @@ func (e *EmptyDictionary) RangeIterator(start, end string) DictionaryIterator {
return &EmptyDictionaryIterator{}
}
func (e *EmptyDictionary) RegexpIterator(start string) DictionaryIterator {
return &EmptyDictionaryIterator{}
}
func (e *EmptyDictionary) FuzzyIterator(term string,
fuzziness int) DictionaryIterator {
func (e *EmptyDictionary) AutomatonIterator(a vellum.Automaton,
startKeyInclusive, endKeyExclusive []byte) DictionaryIterator {
return &EmptyDictionaryIterator{}
}
@ -94,14 +91,18 @@ func (e *EmptyDictionary) OnlyIterator(onlyTerms [][]byte,
return &EmptyDictionaryIterator{}
}
func (e *EmptyDictionary) Contains(key []byte) (bool, error) {
return false, nil
}
type EmptyDictionaryIterator struct{}
func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
return nil, nil
}
func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
return nil, nil
func (e *EmptyDictionaryIterator) Contains(key []byte) (bool, error) {
return false, nil
}
type EmptyPostingsList struct{}
@ -125,6 +126,12 @@ func (e *EmptyPostingsIterator) Next() (Posting, error) {
return nil, nil
}
func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
return nil, nil
}
func (e *EmptyPostingsIterator) Size() int {
return 0
}
var AnEmptyPostingsIterator = &EmptyPostingsIterator{}

View file

@ -19,7 +19,10 @@
package segment
import "fmt"
import (
"errors"
"fmt"
)
const (
MaxVarintSize = 9
@ -92,3 +95,82 @@ func DecodeUvarintAscending(b []byte) ([]byte, uint64, error) {
}
return b[length:], v, nil
}
// ------------------------------------------------------------
type MemUvarintReader struct {
C int // index of next byte to read from S
S []byte
}
func NewMemUvarintReader(s []byte) *MemUvarintReader {
return &MemUvarintReader{S: s}
}
// Len returns the number of unread bytes.
func (r *MemUvarintReader) Len() int {
n := len(r.S) - r.C
if n < 0 {
return 0
}
return n
}
var ErrMemUvarintReaderOverflow = errors.New("MemUvarintReader overflow")
// ReadUvarint reads an encoded uint64. The original code this was
// based on is at encoding/binary/ReadUvarint().
func (r *MemUvarintReader) ReadUvarint() (uint64, error) {
var x uint64
var s uint
var C = r.C
var S = r.S
for {
b := S[C]
C++
if b < 0x80 {
r.C = C
// why 63? The original code had an 'i += 1' loop var and
// checked for i > 9 || i == 9 ...; but, we no longer
// check for the i var, but instead check here for s,
// which is incremented by 7. So, 7*9 == 63.
//
// why the "extra" >= check? The normal case is that s <
// 63, so we check this single >= guard first so that we
// hit the normal, nil-error return pathway sooner.
if s >= 63 && (s > 63 || s == 63 && b > 1) {
return 0, ErrMemUvarintReaderOverflow
}
return x | uint64(b)<<s, nil
}
x |= uint64(b&0x7f) << s
s += 7
}
}
// SkipUvarint skips ahead one encoded uint64.
func (r *MemUvarintReader) SkipUvarint() {
for {
b := r.S[r.C]
r.C++
if b < 0x80 {
return
}
}
}
// SkipBytes skips a count number of bytes.
func (r *MemUvarintReader) SkipBytes(count int) {
r.C = r.C + count
}
func (r *MemUvarintReader) Reset(s []byte) {
r.C = 0
r.S = s
}

View file

@ -0,0 +1,58 @@
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package segment
import (
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index"
)
// Plugin represents the essential functions required by a package to plug in
// it's segment implementation
type Plugin interface {
// Type is the name for this segment plugin
Type() string
// Version is a numeric value identifying a specific version of this type.
// When incompatible changes are made to a particular type of plugin, the
// version must be incremented.
Version() uint32
// New takes a set of AnalysisResults and turns them into a new Segment
New(results []*index.AnalysisResult) (Segment, uint64, error)
// Open attempts to open the file at the specified path and
// return the corresponding Segment
Open(path string) (Segment, error)
// Merge takes a set of Segments, and creates a new segment on disk at
// the specified path.
// Drops is a set of bitmaps (one for each segment) indicating which
// documents can be dropped from the segments during the merge.
// If the closeCh channel is closed, Merge will cease doing work at
// the next opportunity, and return an error (closed).
// StatsReporter can optionally be provided, in which case progress
// made during the merge is reported while operation continues.
// Returns:
// A slice of new document numbers (one for each input segment),
// this allows the caller to know a particular document's new
// document number in the newly merged segment.
// The number of bytes written to the new segment file.
// An error, if any occurred.
Merge(segments []Segment, drops []*roaring.Bitmap, path string,
closeCh chan struct{}, s StatsReporter) (
[][]uint64, uint64, error)
}

View file

@ -0,0 +1,75 @@
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package segment
import (
"regexp/syntax"
"github.com/couchbase/vellum/regexp"
)
func ParseRegexp(pattern string) (a *regexp.Regexp, prefixBeg, prefixEnd []byte, err error) {
// TODO: potential optimization where syntax.Regexp supports a Simplify() API?
parsed, err := syntax.Parse(pattern, syntax.Perl)
if err != nil {
return nil, nil, nil, err
}
re, err := regexp.NewParsedWithLimit(pattern, parsed, regexp.DefaultLimit)
if err != nil {
return nil, nil, nil, err
}
prefix := LiteralPrefix(parsed)
if prefix != "" {
prefixBeg := []byte(prefix)
prefixEnd := IncrementBytes(prefixBeg)
return re, prefixBeg, prefixEnd, nil
}
return re, nil, nil, nil
}
// Returns the literal prefix given the parse tree for a regexp
func LiteralPrefix(s *syntax.Regexp) string {
// traverse the left-most branch in the parse tree as long as the
// node represents a concatenation
for s != nil && s.Op == syntax.OpConcat {
if len(s.Sub) < 1 {
return ""
}
s = s.Sub[0]
}
if s.Op == syntax.OpLiteral && (s.Flags&syntax.FoldCase == 0) {
return string(s.Rune)
}
return "" // no literal prefix
}
func IncrementBytes(in []byte) []byte {
rv := make([]byte, len(in))
copy(rv, in)
for i := len(rv) - 1; i >= 0; i-- {
rv[i] = rv[i] + 1
if rv[i] != 0 {
return rv // didn't overflow, so stop
}
}
return nil // overflowed
}

View file

@ -15,10 +15,15 @@
package segment
import (
"fmt"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index"
"github.com/couchbase/vellum"
)
var ErrClosed = fmt.Errorf("index closed")
// DocumentFieldValueVisitor defines a callback to be visited for each
// stored field value. The return value determines if the visitor
// should keep going. Returning true continues visiting, false stops.
@ -45,15 +50,27 @@ type Segment interface {
DecRef() error
}
type UnpersistedSegment interface {
Segment
Persist(path string) error
}
type PersistedSegment interface {
Segment
Path() string
}
type TermDictionary interface {
PostingsList(term []byte, except *roaring.Bitmap, prealloc PostingsList) (PostingsList, error)
Iterator() DictionaryIterator
PrefixIterator(prefix string) DictionaryIterator
RangeIterator(start, end string) DictionaryIterator
RegexpIterator(regex string) DictionaryIterator
FuzzyIterator(term string, fuzziness int) DictionaryIterator
AutomatonIterator(a vellum.Automaton,
startKeyInclusive, endKeyExclusive []byte) DictionaryIterator
OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator
Contains(key []byte) (bool, error)
}
type DictionaryIterator interface {
@ -89,6 +106,12 @@ type PostingsIterator interface {
Size() int
}
type OptimizablePostingsIterator interface {
ActualBitmap() *roaring.Bitmap
DocNum1Hit() (uint64, bool)
ReplaceActual(*roaring.Bitmap)
}
type Posting interface {
Number() uint64
@ -124,3 +147,7 @@ type DocumentFieldTermVisitable interface {
type DocVisitState interface {
}
type StatsReporter interface {
ReportBytesWritten(bytesWritten uint64)
}

View file

@ -0,0 +1,148 @@
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package segment
import (
"github.com/RoaringBitmap/roaring"
"math"
"reflect"
)
var reflectStaticSizeUnadornedPostingsIteratorBitmap int
var reflectStaticSizeUnadornedPostingsIterator1Hit int
var reflectStaticSizeUnadornedPosting int
func init() {
var pib UnadornedPostingsIteratorBitmap
reflectStaticSizeUnadornedPostingsIteratorBitmap = int(reflect.TypeOf(pib).Size())
var pi1h UnadornedPostingsIterator1Hit
reflectStaticSizeUnadornedPostingsIterator1Hit = int(reflect.TypeOf(pi1h).Size())
var up UnadornedPosting
reflectStaticSizeUnadornedPosting = int(reflect.TypeOf(up).Size())
}
type UnadornedPostingsIteratorBitmap struct{
actual roaring.IntPeekable
actualBM *roaring.Bitmap
}
func (i *UnadornedPostingsIteratorBitmap) Next() (Posting, error) {
return i.nextAtOrAfter(0)
}
func (i *UnadornedPostingsIteratorBitmap) Advance(docNum uint64) (Posting, error) {
return i.nextAtOrAfter(docNum)
}
func (i *UnadornedPostingsIteratorBitmap) nextAtOrAfter(atOrAfter uint64) (Posting, error) {
docNum, exists := i.nextDocNumAtOrAfter(atOrAfter)
if !exists {
return nil, nil
}
return UnadornedPosting(docNum), nil
}
func (i *UnadornedPostingsIteratorBitmap) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool) {
if i.actual == nil || !i.actual.HasNext() {
return 0, false
}
i.actual.AdvanceIfNeeded(uint32(atOrAfter))
if !i.actual.HasNext() {
return 0, false // couldn't find anything
}
return uint64(i.actual.Next()), true
}
func (i *UnadornedPostingsIteratorBitmap) Size() int {
return reflectStaticSizeUnadornedPostingsIteratorBitmap
}
func NewUnadornedPostingsIteratorFromBitmap(bm *roaring.Bitmap) PostingsIterator {
return &UnadornedPostingsIteratorBitmap{
actualBM: bm,
actual: bm.Iterator(),
}
}
const docNum1HitFinished = math.MaxUint64
type UnadornedPostingsIterator1Hit struct{
docNum uint64
}
func (i *UnadornedPostingsIterator1Hit) Next() (Posting, error) {
return i.nextAtOrAfter(0)
}
func (i *UnadornedPostingsIterator1Hit) Advance(docNum uint64) (Posting, error) {
return i.nextAtOrAfter(docNum)
}
func (i *UnadornedPostingsIterator1Hit) nextAtOrAfter(atOrAfter uint64) (Posting, error) {
docNum, exists := i.nextDocNumAtOrAfter(atOrAfter)
if !exists {
return nil, nil
}
return UnadornedPosting(docNum), nil
}
func (i *UnadornedPostingsIterator1Hit) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool) {
if i.docNum == docNum1HitFinished {
return 0, false
}
if i.docNum < atOrAfter {
// advanced past our 1-hit
i.docNum = docNum1HitFinished // consume our 1-hit docNum
return 0, false
}
docNum := i.docNum
i.docNum = docNum1HitFinished // consume our 1-hit docNum
return docNum, true
}
func (i *UnadornedPostingsIterator1Hit) Size() int {
return reflectStaticSizeUnadornedPostingsIterator1Hit
}
func NewUnadornedPostingsIteratorFrom1Hit(docNum1Hit uint64) PostingsIterator {
return &UnadornedPostingsIterator1Hit{
docNum1Hit,
}
}
type UnadornedPosting uint64
func (p UnadornedPosting) Number() uint64 {
return uint64(p)
}
func (p UnadornedPosting) Frequency() uint64 {
return 0
}
func (p UnadornedPosting) Norm() float64 {
return 0
}
func (p UnadornedPosting) Locations() []Location {
return nil
}
func (p UnadornedPosting) Size() int {
return reflectStaticSizeUnadornedPosting
}

View file

@ -1,167 +0,0 @@
# zap file format
The file is written in the reverse order that we typically access data. This helps us write in one pass since later sections of the file require file offsets of things we've already written.
Current usage:
- mmap the entire file
- crc-32 bytes and version are in fixed position at end of the file
- reading remainder of footer could be version specific
- remainder of footer gives us:
- 3 important offsets (docValue , fields index and stored data index)
- 2 important values (number of docs and chunk factor)
- field data is processed once and memoized onto the heap so that we never have to go back to disk for it
- access to stored data by doc number means first navigating to the stored data index, then accessing a fixed position offset into that slice, which gives us the actual address of the data. the first bytes of that section tell us the size of data so that we know where it ends.
- access to all other indexed data follows the following pattern:
- first know the field name -> convert to id
- next navigate to term dictionary for that field
- some operations stop here and do dictionary ops
- next use dictionary to navigate to posting list for a specific term
- walk posting list
- if necessary, walk posting details as we go
- if location info is desired, consult location bitmap to see if it is there
## stored fields section
- for each document
- preparation phase:
- produce a slice of metadata bytes and data bytes
- produce these slices in field id order
- field value is appended to the data slice
- metadata slice is varint encoded with the following values for each field value
- field id (uint16)
- field type (byte)
- field value start offset in uncompressed data slice (uint64)
- field value length (uint64)
- field number of array positions (uint64)
- one additional value for each array position (uint64)
- compress the data slice using snappy
- file writing phase:
- remember the start offset for this document
- write out meta data length (varint uint64)
- write out compressed data length (varint uint64)
- write out the metadata bytes
- write out the compressed data bytes
## stored fields idx
- for each document
- write start offset (remembered from previous section) of stored data (big endian uint64)
With this index and a known document number, we have direct access to all the stored field data.
## posting details (freq/norm) section
- for each posting list
- produce a slice containing multiple consecutive chunks (each chunk is varint stream)
- produce a slice remembering offsets of where each chunk starts
- preparation phase:
- for each hit in the posting list
- if this hit is in next chunk close out encoding of last chunk and record offset start of next
- encode term frequency (uint64)
- encode norm factor (float32)
- file writing phase:
- remember start position for this posting list details
- write out number of chunks that follow (varint uint64)
- write out length of each chunk (each a varint uint64)
- write out the byte slice containing all the chunk data
If you know the doc number you're interested in, this format lets you jump to the correct chunk (docNum/chunkFactor) directly and then seek within that chunk until you find it.
## posting details (location) section
- for each posting list
- produce a slice containing multiple consecutive chunks (each chunk is varint stream)
- produce a slice remembering offsets of where each chunk starts
- preparation phase:
- for each hit in the posting list
- if this hit is in next chunk close out encoding of last chunk and record offset start of next
- encode field (uint16)
- encode field pos (uint64)
- encode field start (uint64)
- encode field end (uint64)
- encode number of array positions to follow (uint64)
- encode each array position (each uint64)
- file writing phase:
- remember start position for this posting list details
- write out number of chunks that follow (varint uint64)
- write out length of each chunk (each a varint uint64)
- write out the byte slice containing all the chunk data
If you know the doc number you're interested in, this format lets you jump to the correct chunk (docNum/chunkFactor) directly and then seek within that chunk until you find it.
## bitmaps of hits with location info
- for each posting list
- preparation phase:
- encode roaring bitmap (inidicating which hits have location details indexed) posting list to bytes (so we know the length)
- file writing phase:
- remember the start position for this bitmap
- write length of encoded roaring bitmap
- write the serialized roaring bitmap data
## postings list section
- for each posting list
- preparation phase:
- encode roaring bitmap posting list to bytes (so we know the length)
- file writing phase:
- remember the start position for this posting list
- write freq/norm details offset (remembered from previous, as varint uint64)
- write location details offset (remembered from previous, as varint uint64)
- write location bitmap offset (remembered from pervious, as varint uint64)
- write length of encoded roaring bitmap
- write the serialized roaring bitmap data
## dictionary
- for each field
- preparation phase:
- encode vellum FST with dictionary data pointing to file offset of posting list (remembered from previous)
- file writing phase:
- remember the start position of this persistDictionary
- write length of vellum data (varint uint64)
- write out vellum data
## fields section
- for each field
- file writing phase:
- remember start offset for each field
- write dictionary address (remembered from previous) (varint uint64)
- write length of field name (varint uint64)
- write field name bytes
## fields idx
- for each field
- file writing phase:
- write big endian uint64 of start offset for each field
NOTE: currently we don't know or record the length of this fields index. Instead we rely on the fact that we know it immediately precedes a footer of known size.
## fields DocValue
- for each field
- preparation phase:
- produce a slice containing multiple consecutive chunks, where each chunk is composed of a meta section followed by compressed columnar field data
- produce a slice remembering the length of each chunk
- file writing phase:
- remember the start position of this first field DocValue offset in the footer
- write out number of chunks that follow (varint uint64)
- write out length of each chunk (each a varint uint64)
- write out the byte slice containing all the chunk data
NOTE: currently the meta header inside each chunk gives clue to the location offsets and size of the data pertaining to a given docID and any
read operation leverage that meta information to extract the document specific data from the file.
## footer
- file writing phase
- write number of docs (big endian uint64)
- write stored field index location (big endian uint64)
- write field index location (big endian uint64)
- write field docValue location (big endian uint64)
- write out chunk factor (big endian uint32)
- write out version (big endian uint32)
- write out file CRC of everything preceding this (big endian uint32)

View file

@ -1,149 +0,0 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package zap
import (
"bufio"
"math"
"os"
)
const Version uint32 = 11
const Type string = "zap"
const fieldNotUninverted = math.MaxUint64
// PersistSegmentBase persists SegmentBase in the zap file format.
func PersistSegmentBase(sb *SegmentBase, path string) error {
flag := os.O_RDWR | os.O_CREATE
f, err := os.OpenFile(path, flag, 0600)
if err != nil {
return err
}
cleanup := func() {
_ = f.Close()
_ = os.Remove(path)
}
br := bufio.NewWriter(f)
_, err = br.Write(sb.mem)
if err != nil {
cleanup()
return err
}
err = persistFooter(sb.numDocs, sb.storedIndexOffset, sb.fieldsIndexOffset, sb.docValueOffset,
sb.chunkFactor, sb.memCRC, br)
if err != nil {
cleanup()
return err
}
err = br.Flush()
if err != nil {
cleanup()
return err
}
err = f.Sync()
if err != nil {
cleanup()
return err
}
err = f.Close()
if err != nil {
cleanup()
return err
}
return nil
}
func persistStoredFieldValues(fieldID int,
storedFieldValues [][]byte, stf []byte, spf [][]uint64,
curr int, metaEncode varintEncoder, data []byte) (
int, []byte, error) {
for i := 0; i < len(storedFieldValues); i++ {
// encode field
_, err := metaEncode(uint64(fieldID))
if err != nil {
return 0, nil, err
}
// encode type
_, err = metaEncode(uint64(stf[i]))
if err != nil {
return 0, nil, err
}
// encode start offset
_, err = metaEncode(uint64(curr))
if err != nil {
return 0, nil, err
}
// end len
_, err = metaEncode(uint64(len(storedFieldValues[i])))
if err != nil {
return 0, nil, err
}
// encode number of array pos
_, err = metaEncode(uint64(len(spf[i])))
if err != nil {
return 0, nil, err
}
// encode all array positions
for _, pos := range spf[i] {
_, err = metaEncode(pos)
if err != nil {
return 0, nil, err
}
}
data = append(data, storedFieldValues[i]...)
curr += len(storedFieldValues[i])
}
return curr, data, nil
}
func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32,
fieldsMap map[string]uint16, fieldsInv []string, numDocs uint64,
storedIndexOffset uint64, fieldsIndexOffset uint64, docValueOffset uint64,
dictLocs []uint64) (*SegmentBase, error) {
sb := &SegmentBase{
mem: mem,
memCRC: memCRC,
chunkFactor: chunkFactor,
fieldsMap: fieldsMap,
fieldsInv: fieldsInv,
numDocs: numDocs,
storedIndexOffset: storedIndexOffset,
fieldsIndexOffset: fieldsIndexOffset,
docValueOffset: docValueOffset,
dictLocs: dictLocs,
fieldDvReaders: make(map[uint16]*docValueReader),
}
sb.updateSize()
err := sb.loadDvReaders()
if err != nil {
return nil, err
}
return sb, nil
}

View file

@ -1,230 +0,0 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package zap
import (
"bytes"
"encoding/binary"
"io"
"reflect"
"github.com/golang/snappy"
)
var reflectStaticSizeMetaData int
func init() {
var md MetaData
reflectStaticSizeMetaData = int(reflect.TypeOf(md).Size())
}
var termSeparator byte = 0xff
var termSeparatorSplitSlice = []byte{termSeparator}
type chunkedContentCoder struct {
final []byte
chunkSize uint64
currChunk uint64
chunkLens []uint64
w io.Writer
progressiveWrite bool
chunkMetaBuf bytes.Buffer
chunkBuf bytes.Buffer
chunkMeta []MetaData
compressed []byte // temp buf for snappy compression
}
// MetaData represents the data information inside a
// chunk.
type MetaData struct {
DocNum uint64 // docNum of the data inside the chunk
DocDvOffset uint64 // offset of data inside the chunk for the given docid
}
// newChunkedContentCoder returns a new chunk content coder which
// packs data into chunks based on the provided chunkSize
func newChunkedContentCoder(chunkSize uint64, maxDocNum uint64,
w io.Writer, progressiveWrite bool) *chunkedContentCoder {
total := maxDocNum/chunkSize + 1
rv := &chunkedContentCoder{
chunkSize: chunkSize,
chunkLens: make([]uint64, total),
chunkMeta: make([]MetaData, 0, total),
w: w,
progressiveWrite: progressiveWrite,
}
return rv
}
// Reset lets you reuse this chunked content coder. Buffers are reset
// and re used. You cannot change the chunk size.
func (c *chunkedContentCoder) Reset() {
c.currChunk = 0
c.final = c.final[:0]
c.chunkBuf.Reset()
c.chunkMetaBuf.Reset()
for i := range c.chunkLens {
c.chunkLens[i] = 0
}
c.chunkMeta = c.chunkMeta[:0]
}
// Close indicates you are done calling Add() this allows
// the final chunk to be encoded.
func (c *chunkedContentCoder) Close() error {
return c.flushContents()
}
func (c *chunkedContentCoder) flushContents() error {
// flush the contents, with meta information at first
buf := make([]byte, binary.MaxVarintLen64)
n := binary.PutUvarint(buf, uint64(len(c.chunkMeta)))
_, err := c.chunkMetaBuf.Write(buf[:n])
if err != nil {
return err
}
// write out the metaData slice
for _, meta := range c.chunkMeta {
_, err := writeUvarints(&c.chunkMetaBuf, meta.DocNum, meta.DocDvOffset)
if err != nil {
return err
}
}
// write the metadata to final data
metaData := c.chunkMetaBuf.Bytes()
c.final = append(c.final, c.chunkMetaBuf.Bytes()...)
// write the compressed data to the final data
c.compressed = snappy.Encode(c.compressed[:cap(c.compressed)], c.chunkBuf.Bytes())
c.final = append(c.final, c.compressed...)
c.chunkLens[c.currChunk] = uint64(len(c.compressed) + len(metaData))
if c.progressiveWrite {
_, err := c.w.Write(c.final)
if err != nil {
return err
}
c.final = c.final[:0]
}
return nil
}
// Add encodes the provided byte slice into the correct chunk for the provided
// doc num. You MUST call Add() with increasing docNums.
func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
chunk := docNum / c.chunkSize
if chunk != c.currChunk {
// flush out the previous chunk details
err := c.flushContents()
if err != nil {
return err
}
// clearing the chunk specific meta for next chunk
c.chunkBuf.Reset()
c.chunkMetaBuf.Reset()
c.chunkMeta = c.chunkMeta[:0]
c.currChunk = chunk
}
// get the starting offset for this doc
dvOffset := c.chunkBuf.Len()
dvSize, err := c.chunkBuf.Write(vals)
if err != nil {
return err
}
c.chunkMeta = append(c.chunkMeta, MetaData{
DocNum: docNum,
DocDvOffset: uint64(dvOffset + dvSize),
})
return nil
}
// Write commits all the encoded chunked contents to the provided writer.
//
// | ..... data ..... | chunk offsets (varints)
// | position of chunk offsets (uint64) | number of offsets (uint64) |
//
func (c *chunkedContentCoder) Write() (int, error) {
var tw int
if c.final != nil {
// write out the data section first
nw, err := c.w.Write(c.final)
tw += nw
if err != nil {
return tw, err
}
}
chunkOffsetsStart := uint64(tw)
if cap(c.final) < binary.MaxVarintLen64 {
c.final = make([]byte, binary.MaxVarintLen64)
} else {
c.final = c.final[0:binary.MaxVarintLen64]
}
chunkOffsets := modifyLengthsToEndOffsets(c.chunkLens)
// write out the chunk offsets
for _, chunkOffset := range chunkOffsets {
n := binary.PutUvarint(c.final, chunkOffset)
nw, err := c.w.Write(c.final[:n])
tw += nw
if err != nil {
return tw, err
}
}
chunkOffsetsLen := uint64(tw) - chunkOffsetsStart
c.final = c.final[0:8]
// write out the length of chunk offsets
binary.BigEndian.PutUint64(c.final, chunkOffsetsLen)
nw, err := c.w.Write(c.final)
tw += nw
if err != nil {
return tw, err
}
// write out the number of chunks
binary.BigEndian.PutUint64(c.final, uint64(len(c.chunkLens)))
nw, err = c.w.Write(c.final)
tw += nw
if err != nil {
return tw, err
}
c.final = c.final[:0]
return tw, nil
}
// ReadDocValueBoundary elicits the start, end offsets from a
// metaData header slice
func ReadDocValueBoundary(chunk int, metaHeaders []MetaData) (uint64, uint64) {
var start uint64
if chunk > 0 {
start = metaHeaders[chunk-1].DocDvOffset
}
return start, metaHeaders[chunk].DocDvOffset
}

View file

@ -1,51 +0,0 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package zap
import (
"hash/crc32"
"io"
)
// CountHashWriter is a wrapper around a Writer which counts the number of
// bytes which have been written and computes a crc32 hash
type CountHashWriter struct {
w io.Writer
crc uint32
n int
}
// NewCountHashWriter returns a CountHashWriter which wraps the provided Writer
func NewCountHashWriter(w io.Writer) *CountHashWriter {
return &CountHashWriter{w: w}
}
// Write writes the provided bytes to the wrapped writer and counts the bytes
func (c *CountHashWriter) Write(b []byte) (int, error) {
n, err := c.w.Write(b)
c.crc = crc32.Update(c.crc, crc32.IEEETable, b[:n])
c.n += n
return n, err
}
// Count returns the number of bytes written
func (c *CountHashWriter) Count() int {
return c.n
}
// Sum32 returns the CRC-32 hash of the content written to this writer
func (c *CountHashWriter) Sum32() uint32 {
return c.crc
}

View file

@ -1,288 +0,0 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package zap
import (
"bytes"
"fmt"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/couchbase/vellum"
"github.com/couchbase/vellum/levenshtein"
"github.com/couchbase/vellum/regexp"
)
// Dictionary is the zap representation of the term dictionary
type Dictionary struct {
sb *SegmentBase
field string
fieldID uint16
fst *vellum.FST
}
// PostingsList returns the postings list for the specified term
func (d *Dictionary) PostingsList(term []byte, except *roaring.Bitmap,
prealloc segment.PostingsList) (segment.PostingsList, error) {
var preallocPL *PostingsList
pl, ok := prealloc.(*PostingsList)
if ok && pl != nil {
preallocPL = pl
}
return d.postingsList(term, except, preallocPL)
}
func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
if d.fst == nil {
if rv == nil || rv == emptyPostingsList {
return emptyPostingsList, nil
}
return d.postingsListInit(rv, except), nil
}
postingsOffset, exists, err := d.fst.Get(term)
if err != nil {
return nil, fmt.Errorf("vellum err: %v", err)
}
if !exists {
if rv == nil || rv == emptyPostingsList {
return emptyPostingsList, nil
}
return d.postingsListInit(rv, except), nil
}
return d.postingsListFromOffset(postingsOffset, except, rv)
}
func (d *Dictionary) postingsListFromOffset(postingsOffset uint64, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
rv = d.postingsListInit(rv, except)
err := rv.read(postingsOffset, d)
if err != nil {
return nil, err
}
return rv, nil
}
func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap) *PostingsList {
if rv == nil || rv == emptyPostingsList {
rv = &PostingsList{}
} else {
postings := rv.postings
if postings != nil {
postings.Clear()
}
*rv = PostingsList{} // clear the struct
rv.postings = postings
}
rv.sb = d.sb
rv.except = except
return rv
}
// Iterator returns an iterator for this dictionary
func (d *Dictionary) Iterator() segment.DictionaryIterator {
rv := &DictionaryIterator{
d: d,
}
if d.fst != nil {
itr, err := d.fst.Iterator(nil, nil)
if err == nil {
rv.itr = itr
} else if err != nil && err != vellum.ErrIteratorDone {
rv.err = err
}
}
return rv
}
// PrefixIterator returns an iterator which only visits terms having the
// the specified prefix
func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
rv := &DictionaryIterator{
d: d,
}
if d.fst != nil {
r, err := regexp.New(prefix + ".*")
if err == nil {
itr, err := d.fst.Search(r, nil, nil)
if err == nil {
rv.itr = itr
} else if err != nil && err != vellum.ErrIteratorDone {
rv.err = err
}
} else {
rv.err = err
}
}
return rv
}
// RangeIterator returns an iterator which only visits terms between the
// start and end terms. NOTE: bleve.index API specifies the end is inclusive.
func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator {
rv := &DictionaryIterator{
d: d,
}
// need to increment the end position to be inclusive
endBytes := []byte(end)
if endBytes[len(endBytes)-1] < 0xff {
endBytes[len(endBytes)-1]++
} else {
endBytes = append(endBytes, 0xff)
}
if d.fst != nil {
itr, err := d.fst.Iterator([]byte(start), endBytes)
if err == nil {
rv.itr = itr
} else if err != nil && err != vellum.ErrIteratorDone {
rv.err = err
}
}
return rv
}
// RegexpIterator returns an iterator which only visits terms having the
// the specified regex
func (d *Dictionary) RegexpIterator(regex string) segment.DictionaryIterator {
rv := &DictionaryIterator{
d: d,
}
if d.fst != nil {
r, err := regexp.New(regex)
if err == nil {
itr, err2 := d.fst.Search(r, nil, nil)
if err2 == nil {
rv.itr = itr
} else if err2 != nil && err2 != vellum.ErrIteratorDone {
rv.err = err2
}
} else {
rv.err = err
}
}
return rv
}
// FuzzyIterator returns an iterator which only visits terms having the
// the specified edit/levenshtein distance
func (d *Dictionary) FuzzyIterator(term string,
fuzziness int) segment.DictionaryIterator {
rv := &DictionaryIterator{
d: d,
}
if d.fst != nil {
la, err := levenshtein.New(term, fuzziness)
if err == nil {
itr, err2 := d.fst.Search(la, nil, nil)
if err2 == nil {
rv.itr = itr
} else if err2 != nil && err2 != vellum.ErrIteratorDone {
rv.err = err2
}
} else {
rv.err = err
}
}
return rv
}
func (d *Dictionary) OnlyIterator(onlyTerms [][]byte,
includeCount bool) segment.DictionaryIterator {
rv := &DictionaryIterator{
d: d,
omitCount: !includeCount,
}
var buf bytes.Buffer
builder, err := vellum.New(&buf, nil)
if err != nil {
rv.err = err
return rv
}
for _, term := range onlyTerms {
err = builder.Insert(term, 0)
if err != nil {
rv.err = err
return rv
}
}
err = builder.Close()
if err != nil {
rv.err = err
return rv
}
onlyFST, err := vellum.Load(buf.Bytes())
if err != nil {
rv.err = err
return rv
}
itr, err := d.fst.Search(onlyFST, nil, nil)
if err == nil {
rv.itr = itr
} else if err != nil && err != vellum.ErrIteratorDone {
rv.err = err
}
return rv
}
// DictionaryIterator is an iterator for term dictionary
type DictionaryIterator struct {
d *Dictionary
itr vellum.Iterator
err error
tmp PostingsList
entry index.DictEntry
omitCount bool
}
// Next returns the next entry in the dictionary
func (i *DictionaryIterator) Next() (*index.DictEntry, error) {
if i.err != nil && i.err != vellum.ErrIteratorDone {
return nil, i.err
} else if i.itr == nil || i.err == vellum.ErrIteratorDone {
return nil, nil
}
term, postingsOffset := i.itr.Current()
i.entry.Term = string(term)
if !i.omitCount {
i.err = i.tmp.read(postingsOffset, i.d)
if i.err != nil {
return nil, i.err
}
i.entry.Count = i.tmp.Count()
}
i.err = i.itr.Next()
return &i.entry, nil
}

View file

@ -1,309 +0,0 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package zap
import (
"bytes"
"encoding/binary"
"fmt"
"math"
"reflect"
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/size"
"github.com/golang/snappy"
)
var reflectStaticSizedocValueReader int
func init() {
var dvi docValueReader
reflectStaticSizedocValueReader = int(reflect.TypeOf(dvi).Size())
}
type docNumTermsVisitor func(docNum uint64, terms []byte) error
type docVisitState struct {
dvrs map[uint16]*docValueReader
segment *Segment
}
type docValueReader struct {
field string
curChunkNum uint64
chunkOffsets []uint64
dvDataLoc uint64
curChunkHeader []MetaData
curChunkData []byte // compressed data cache
uncompressed []byte // temp buf for snappy decompression
}
func (di *docValueReader) size() int {
return reflectStaticSizedocValueReader + size.SizeOfPtr +
len(di.field) +
len(di.chunkOffsets)*size.SizeOfUint64 +
len(di.curChunkHeader)*reflectStaticSizeMetaData +
len(di.curChunkData)
}
func (di *docValueReader) cloneInto(rv *docValueReader) *docValueReader {
if rv == nil {
rv = &docValueReader{}
}
rv.field = di.field
rv.curChunkNum = math.MaxUint64
rv.chunkOffsets = di.chunkOffsets // immutable, so it's sharable
rv.dvDataLoc = di.dvDataLoc
rv.curChunkHeader = rv.curChunkHeader[:0]
rv.curChunkData = nil
rv.uncompressed = rv.uncompressed[:0]
return rv
}
func (di *docValueReader) fieldName() string {
return di.field
}
func (di *docValueReader) curChunkNumber() uint64 {
return di.curChunkNum
}
func (s *SegmentBase) loadFieldDocValueReader(field string,
fieldDvLocStart, fieldDvLocEnd uint64) (*docValueReader, error) {
// get the docValue offset for the given fields
if fieldDvLocStart == fieldNotUninverted {
return nil, fmt.Errorf("loadFieldDocValueReader: "+
"no docValues found for field: %s", field)
}
// read the number of chunks, and chunk offsets position
var numChunks, chunkOffsetsPosition uint64
if fieldDvLocEnd-fieldDvLocStart > 16 {
numChunks = binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-8 : fieldDvLocEnd])
// read the length of chunk offsets
chunkOffsetsLen := binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-16 : fieldDvLocEnd-8])
// acquire position of chunk offsets
chunkOffsetsPosition = (fieldDvLocEnd - 16) - chunkOffsetsLen
}
fdvIter := &docValueReader{
curChunkNum: math.MaxUint64,
field: field,
chunkOffsets: make([]uint64, int(numChunks)),
}
// read the chunk offsets
var offset uint64
for i := 0; i < int(numChunks); i++ {
loc, read := binary.Uvarint(s.mem[chunkOffsetsPosition+offset : chunkOffsetsPosition+offset+binary.MaxVarintLen64])
if read <= 0 {
return nil, fmt.Errorf("corrupted chunk offset during segment load")
}
fdvIter.chunkOffsets[i] = loc
offset += uint64(read)
}
// set the data offset
fdvIter.dvDataLoc = fieldDvLocStart
return fdvIter, nil
}
func (di *docValueReader) loadDvChunk(chunkNumber uint64, s *SegmentBase) error {
// advance to the chunk where the docValues
// reside for the given docNum
destChunkDataLoc, curChunkEnd := di.dvDataLoc, di.dvDataLoc
start, end := readChunkBoundary(int(chunkNumber), di.chunkOffsets)
if start >= end {
di.curChunkHeader = di.curChunkHeader[:0]
di.curChunkData = nil
di.curChunkNum = chunkNumber
di.uncompressed = di.uncompressed[:0]
return nil
}
destChunkDataLoc += start
curChunkEnd += end
// read the number of docs reside in the chunk
numDocs, read := binary.Uvarint(s.mem[destChunkDataLoc : destChunkDataLoc+binary.MaxVarintLen64])
if read <= 0 {
return fmt.Errorf("failed to read the chunk")
}
chunkMetaLoc := destChunkDataLoc + uint64(read)
offset := uint64(0)
if cap(di.curChunkHeader) < int(numDocs) {
di.curChunkHeader = make([]MetaData, int(numDocs))
} else {
di.curChunkHeader = di.curChunkHeader[:int(numDocs)]
}
for i := 0; i < int(numDocs); i++ {
di.curChunkHeader[i].DocNum, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
offset += uint64(read)
di.curChunkHeader[i].DocDvOffset, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
offset += uint64(read)
}
compressedDataLoc := chunkMetaLoc + offset
dataLength := curChunkEnd - compressedDataLoc
di.curChunkData = s.mem[compressedDataLoc : compressedDataLoc+dataLength]
di.curChunkNum = chunkNumber
di.uncompressed = di.uncompressed[:0]
return nil
}
func (di *docValueReader) iterateAllDocValues(s *SegmentBase, visitor docNumTermsVisitor) error {
for i := 0; i < len(di.chunkOffsets); i++ {
err := di.loadDvChunk(uint64(i), s)
if err != nil {
return err
}
if di.curChunkData == nil || len(di.curChunkHeader) <= 0 {
continue
}
// uncompress the already loaded data
uncompressed, err := snappy.Decode(di.uncompressed[:cap(di.uncompressed)], di.curChunkData)
if err != nil {
return err
}
di.uncompressed = uncompressed
start := uint64(0)
for _, entry := range di.curChunkHeader {
err = visitor(entry.DocNum, uncompressed[start:entry.DocDvOffset])
if err != nil {
return err
}
start = entry.DocDvOffset
}
}
return nil
}
func (di *docValueReader) visitDocValues(docNum uint64,
visitor index.DocumentFieldTermVisitor) error {
// binary search the term locations for the docNum
start, end := di.getDocValueLocs(docNum)
if start == math.MaxUint64 || end == math.MaxUint64 || start == end {
return nil
}
var uncompressed []byte
var err error
// use the uncompressed copy if available
if len(di.uncompressed) > 0 {
uncompressed = di.uncompressed
} else {
// uncompress the already loaded data
uncompressed, err = snappy.Decode(di.uncompressed[:cap(di.uncompressed)], di.curChunkData)
if err != nil {
return err
}
di.uncompressed = uncompressed
}
// pick the terms for the given docNum
uncompressed = uncompressed[start:end]
for {
i := bytes.Index(uncompressed, termSeparatorSplitSlice)
if i < 0 {
break
}
visitor(di.field, uncompressed[0:i])
uncompressed = uncompressed[i+1:]
}
return nil
}
func (di *docValueReader) getDocValueLocs(docNum uint64) (uint64, uint64) {
i := sort.Search(len(di.curChunkHeader), func(i int) bool {
return di.curChunkHeader[i].DocNum >= docNum
})
if i < len(di.curChunkHeader) && di.curChunkHeader[i].DocNum == docNum {
return ReadDocValueBoundary(i, di.curChunkHeader)
}
return math.MaxUint64, math.MaxUint64
}
// VisitDocumentFieldTerms is an implementation of the
// DocumentFieldTermVisitable interface
func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
visitor index.DocumentFieldTermVisitor, dvsIn segment.DocVisitState) (
segment.DocVisitState, error) {
dvs, ok := dvsIn.(*docVisitState)
if !ok || dvs == nil {
dvs = &docVisitState{}
} else {
if dvs.segment != s {
dvs.segment = s
dvs.dvrs = nil
}
}
var fieldIDPlus1 uint16
if dvs.dvrs == nil {
dvs.dvrs = make(map[uint16]*docValueReader, len(fields))
for _, field := range fields {
if fieldIDPlus1, ok = s.fieldsMap[field]; !ok {
continue
}
fieldID := fieldIDPlus1 - 1
if dvIter, exists := s.fieldDvReaders[fieldID]; exists &&
dvIter != nil {
dvs.dvrs[fieldID] = dvIter.cloneInto(dvs.dvrs[fieldID])
}
}
}
// find the chunkNumber where the docValues are stored
docInChunk := localDocNum / uint64(s.chunkFactor)
var dvr *docValueReader
for _, field := range fields {
if fieldIDPlus1, ok = s.fieldsMap[field]; !ok {
continue
}
fieldID := fieldIDPlus1 - 1
if dvr, ok = dvs.dvrs[fieldID]; ok && dvr != nil {
// check if the chunk is already loaded
if docInChunk != dvr.curChunkNumber() {
err := dvr.loadDvChunk(docInChunk, &s.SegmentBase)
if err != nil {
return dvs, err
}
}
_ = dvr.visitDocValues(localDocNum, visitor)
}
}
return dvs, nil
}
// VisitableDocValueFields returns the list of fields with
// persisted doc value terms ready to be visitable using the
// VisitDocumentFieldTerms method.
func (s *Segment) VisitableDocValueFields() ([]string, error) {
return s.fieldDvNames, nil
}

View file

@ -1,124 +0,0 @@
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package zap
import (
"bytes"
"github.com/couchbase/vellum"
)
// enumerator provides an ordered traversal of multiple vellum
// iterators. Like JOIN of iterators, the enumerator produces a
// sequence of (key, iteratorIndex, value) tuples, sorted by key ASC,
// then iteratorIndex ASC, where the same key might be seen or
// repeated across multiple child iterators.
type enumerator struct {
itrs []vellum.Iterator
currKs [][]byte
currVs []uint64
lowK []byte
lowIdxs []int
lowCurr int
}
// newEnumerator returns a new enumerator over the vellum Iterators
func newEnumerator(itrs []vellum.Iterator) (*enumerator, error) {
rv := &enumerator{
itrs: itrs,
currKs: make([][]byte, len(itrs)),
currVs: make([]uint64, len(itrs)),
lowIdxs: make([]int, 0, len(itrs)),
}
for i, itr := range rv.itrs {
rv.currKs[i], rv.currVs[i] = itr.Current()
}
rv.updateMatches()
if rv.lowK == nil {
return rv, vellum.ErrIteratorDone
}
return rv, nil
}
// updateMatches maintains the low key matches based on the currKs
func (m *enumerator) updateMatches() {
m.lowK = nil
m.lowIdxs = m.lowIdxs[:0]
m.lowCurr = 0
for i, key := range m.currKs {
if key == nil {
continue
}
cmp := bytes.Compare(key, m.lowK)
if cmp < 0 || m.lowK == nil {
// reached a new low
m.lowK = key
m.lowIdxs = m.lowIdxs[:0]
m.lowIdxs = append(m.lowIdxs, i)
} else if cmp == 0 {
m.lowIdxs = append(m.lowIdxs, i)
}
}
}
// Current returns the enumerator's current key, iterator-index, and
// value. If the enumerator is not pointing at a valid value (because
// Next returned an error previously), Current will return nil,0,0.
func (m *enumerator) Current() ([]byte, int, uint64) {
var i int
var v uint64
if m.lowCurr < len(m.lowIdxs) {
i = m.lowIdxs[m.lowCurr]
v = m.currVs[i]
}
return m.lowK, i, v
}
// Next advances the enumerator to the next key/iterator/value result,
// else vellum.ErrIteratorDone is returned.
func (m *enumerator) Next() error {
m.lowCurr += 1
if m.lowCurr >= len(m.lowIdxs) {
// move all the current low iterators forwards
for _, vi := range m.lowIdxs {
err := m.itrs[vi].Next()
if err != nil && err != vellum.ErrIteratorDone {
return err
}
m.currKs[vi], m.currVs[vi] = m.itrs[vi].Current()
}
m.updateMatches()
}
if m.lowK == nil {
return vellum.ErrIteratorDone
}
return nil
}
// Close all the underlying Iterators. The first error, if any, will
// be returned.
func (m *enumerator) Close() error {
var rv error
for _, itr := range m.itrs {
err := itr.Close()
if rv == nil {
rv = err
}
}
return rv
}

View file

@ -1,172 +0,0 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package zap
import (
"bytes"
"encoding/binary"
"io"
)
type chunkedIntCoder struct {
final []byte
chunkSize uint64
chunkBuf bytes.Buffer
chunkLens []uint64
currChunk uint64
buf []byte
}
// newChunkedIntCoder returns a new chunk int coder which packs data into
// chunks based on the provided chunkSize and supports up to the specified
// maxDocNum
func newChunkedIntCoder(chunkSize uint64, maxDocNum uint64) *chunkedIntCoder {
total := maxDocNum/chunkSize + 1
rv := &chunkedIntCoder{
chunkSize: chunkSize,
chunkLens: make([]uint64, total),
final: make([]byte, 0, 64),
}
return rv
}
// Reset lets you reuse this chunked int coder. buffers are reset and reused
// from previous use. you cannot change the chunk size or max doc num.
func (c *chunkedIntCoder) Reset() {
c.final = c.final[:0]
c.chunkBuf.Reset()
c.currChunk = 0
for i := range c.chunkLens {
c.chunkLens[i] = 0
}
}
// Add encodes the provided integers into the correct chunk for the provided
// doc num. You MUST call Add() with increasing docNums.
func (c *chunkedIntCoder) Add(docNum uint64, vals ...uint64) error {
chunk := docNum / c.chunkSize
if chunk != c.currChunk {
// starting a new chunk
c.Close()
c.chunkBuf.Reset()
c.currChunk = chunk
}
if len(c.buf) < binary.MaxVarintLen64 {
c.buf = make([]byte, binary.MaxVarintLen64)
}
for _, val := range vals {
wb := binary.PutUvarint(c.buf, val)
_, err := c.chunkBuf.Write(c.buf[:wb])
if err != nil {
return err
}
}
return nil
}
func (c *chunkedIntCoder) AddBytes(docNum uint64, buf []byte) error {
chunk := docNum / c.chunkSize
if chunk != c.currChunk {
// starting a new chunk
c.Close()
c.chunkBuf.Reset()
c.currChunk = chunk
}
_, err := c.chunkBuf.Write(buf)
return err
}
// Close indicates you are done calling Add() this allows the final chunk
// to be encoded.
func (c *chunkedIntCoder) Close() {
encodingBytes := c.chunkBuf.Bytes()
c.chunkLens[c.currChunk] = uint64(len(encodingBytes))
c.final = append(c.final, encodingBytes...)
c.currChunk = uint64(cap(c.chunkLens)) // sentinel to detect double close
}
// Write commits all the encoded chunked integers to the provided writer.
func (c *chunkedIntCoder) Write(w io.Writer) (int, error) {
bufNeeded := binary.MaxVarintLen64 * (1 + len(c.chunkLens))
if len(c.buf) < bufNeeded {
c.buf = make([]byte, bufNeeded)
}
buf := c.buf
// convert the chunk lengths into chunk offsets
chunkOffsets := modifyLengthsToEndOffsets(c.chunkLens)
// write out the number of chunks & each chunk offsets
n := binary.PutUvarint(buf, uint64(len(chunkOffsets)))
for _, chunkOffset := range chunkOffsets {
n += binary.PutUvarint(buf[n:], chunkOffset)
}
tw, err := w.Write(buf[:n])
if err != nil {
return tw, err
}
// write out the data
nw, err := w.Write(c.final)
tw += nw
if err != nil {
return tw, err
}
return tw, nil
}
func (c *chunkedIntCoder) FinalSize() int {
return len(c.final)
}
// modifyLengthsToEndOffsets converts the chunk length array
// to a chunk offset array. The readChunkBoundary
// will figure out the start and end of every chunk from
// these offsets. Starting offset of i'th index is stored
// in i-1'th position except for 0'th index and ending offset
// is stored at i'th index position.
// For 0'th element, starting position is always zero.
// eg:
// Lens -> 5 5 5 5 => 5 10 15 20
// Lens -> 0 5 0 5 => 0 5 5 10
// Lens -> 0 0 0 5 => 0 0 0 5
// Lens -> 5 0 0 0 => 5 5 5 5
// Lens -> 0 5 0 0 => 0 5 5 5
// Lens -> 0 0 5 0 => 0 0 5 5
func modifyLengthsToEndOffsets(lengths []uint64) []uint64 {
var runningOffset uint64
var index, i int
for i = 1; i <= len(lengths); i++ {
runningOffset += lengths[i-1]
lengths[index] = runningOffset
index++
}
return lengths
}
func readChunkBoundary(chunk int, offsets []uint64) (uint64, uint64) {
var start uint64
if chunk > 0 {
start = offsets[chunk-1]
}
return start, offsets[chunk]
}

View file

@ -1,821 +0,0 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package zap
import (
"bufio"
"bytes"
"encoding/binary"
"fmt"
"math"
"os"
"sort"
"github.com/RoaringBitmap/roaring"
"github.com/couchbase/vellum"
"github.com/golang/snappy"
)
var DefaultFileMergerBufferSize = 1024 * 1024
const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc
// Merge takes a slice of zap segments and bit masks describing which
// documents may be dropped, and creates a new segment containing the
// remaining data. This new segment is built at the specified path,
// with the provided chunkFactor.
func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
chunkFactor uint32) ([][]uint64, uint64, error) {
segmentBases := make([]*SegmentBase, len(segments))
for segmenti, segment := range segments {
segmentBases[segmenti] = &segment.SegmentBase
}
return MergeSegmentBases(segmentBases, drops, path, chunkFactor)
}
func MergeSegmentBases(segmentBases []*SegmentBase, drops []*roaring.Bitmap, path string,
chunkFactor uint32) ([][]uint64, uint64, error) {
flag := os.O_RDWR | os.O_CREATE
f, err := os.OpenFile(path, flag, 0600)
if err != nil {
return nil, 0, err
}
cleanup := func() {
_ = f.Close()
_ = os.Remove(path)
}
// buffer the output
br := bufio.NewWriterSize(f, DefaultFileMergerBufferSize)
// wrap it for counting (tracking offsets)
cr := NewCountHashWriter(br)
newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, _, _, _, err :=
MergeToWriter(segmentBases, drops, chunkFactor, cr)
if err != nil {
cleanup()
return nil, 0, err
}
err = persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset,
docValueOffset, chunkFactor, cr.Sum32(), cr)
if err != nil {
cleanup()
return nil, 0, err
}
err = br.Flush()
if err != nil {
cleanup()
return nil, 0, err
}
err = f.Sync()
if err != nil {
cleanup()
return nil, 0, err
}
err = f.Close()
if err != nil {
cleanup()
return nil, 0, err
}
return newDocNums, uint64(cr.Count()), nil
}
func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap,
chunkFactor uint32, cr *CountHashWriter) (
newDocNums [][]uint64,
numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64,
dictLocs []uint64, fieldsInv []string, fieldsMap map[string]uint16,
err error) {
docValueOffset = uint64(fieldNotUninverted)
var fieldsSame bool
fieldsSame, fieldsInv = mergeFields(segments)
fieldsMap = mapFields(fieldsInv)
numDocs = computeNewDocCount(segments, drops)
if numDocs > 0 {
storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops,
fieldsMap, fieldsInv, fieldsSame, numDocs, cr)
if err != nil {
return nil, 0, 0, 0, 0, nil, nil, nil, err
}
dictLocs, docValueOffset, err = persistMergedRest(segments, drops,
fieldsInv, fieldsMap, fieldsSame,
newDocNums, numDocs, chunkFactor, cr)
if err != nil {
return nil, 0, 0, 0, 0, nil, nil, nil, err
}
} else {
dictLocs = make([]uint64, len(fieldsInv))
}
fieldsIndexOffset, err = persistFields(fieldsInv, cr, dictLocs)
if err != nil {
return nil, 0, 0, 0, 0, nil, nil, nil, err
}
return newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, dictLocs, fieldsInv, fieldsMap, nil
}
// mapFields takes the fieldsInv list and returns a map of fieldName
// to fieldID+1
func mapFields(fields []string) map[string]uint16 {
rv := make(map[string]uint16, len(fields))
for i, fieldName := range fields {
rv[fieldName] = uint16(i) + 1
}
return rv
}
// computeNewDocCount determines how many documents will be in the newly
// merged segment when obsoleted docs are dropped
func computeNewDocCount(segments []*SegmentBase, drops []*roaring.Bitmap) uint64 {
var newDocCount uint64
for segI, segment := range segments {
newDocCount += segment.numDocs
if drops[segI] != nil {
newDocCount -= drops[segI].GetCardinality()
}
}
return newDocCount
}
func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
fieldsInv []string, fieldsMap map[string]uint16, fieldsSame bool,
newDocNumsIn [][]uint64, newSegDocCount uint64, chunkFactor uint32,
w *CountHashWriter) ([]uint64, uint64, error) {
var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
var bufLoc []uint64
var postings *PostingsList
var postItr *PostingsIterator
rv := make([]uint64, len(fieldsInv))
fieldDvLocsStart := make([]uint64, len(fieldsInv))
fieldDvLocsEnd := make([]uint64, len(fieldsInv))
tfEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
locEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
var vellumBuf bytes.Buffer
newVellum, err := vellum.New(&vellumBuf, nil)
if err != nil {
return nil, 0, err
}
newRoaring := roaring.NewBitmap()
// for each field
for fieldID, fieldName := range fieldsInv {
// collect FST iterators from all active segments for this field
var newDocNums [][]uint64
var drops []*roaring.Bitmap
var dicts []*Dictionary
var itrs []vellum.Iterator
var segmentsInFocus []*SegmentBase
for segmentI, segment := range segments {
dict, err2 := segment.dictionary(fieldName)
if err2 != nil {
return nil, 0, err2
}
if dict != nil && dict.fst != nil {
itr, err2 := dict.fst.Iterator(nil, nil)
if err2 != nil && err2 != vellum.ErrIteratorDone {
return nil, 0, err2
}
if itr != nil {
newDocNums = append(newDocNums, newDocNumsIn[segmentI])
if dropsIn[segmentI] != nil && !dropsIn[segmentI].IsEmpty() {
drops = append(drops, dropsIn[segmentI])
} else {
drops = append(drops, nil)
}
dicts = append(dicts, dict)
itrs = append(itrs, itr)
segmentsInFocus = append(segmentsInFocus, segment)
}
}
}
var prevTerm []byte
newRoaring.Clear()
var lastDocNum, lastFreq, lastNorm uint64
// determines whether to use "1-hit" encoding optimization
// when a term appears in only 1 doc, with no loc info,
// has freq of 1, and the docNum fits into 31-bits
use1HitEncoding := func(termCardinality uint64) (bool, uint64, uint64) {
if termCardinality == uint64(1) && locEncoder.FinalSize() <= 0 {
docNum := uint64(newRoaring.Minimum())
if under32Bits(docNum) && docNum == lastDocNum && lastFreq == 1 {
return true, docNum, lastNorm
}
}
return false, 0, 0
}
finishTerm := func(term []byte) error {
if term == nil {
return nil
}
tfEncoder.Close()
locEncoder.Close()
postingsOffset, err := writePostings(newRoaring,
tfEncoder, locEncoder, use1HitEncoding, w, bufMaxVarintLen64)
if err != nil {
return err
}
if postingsOffset > 0 {
err = newVellum.Insert(term, postingsOffset)
if err != nil {
return err
}
}
newRoaring.Clear()
tfEncoder.Reset()
locEncoder.Reset()
lastDocNum = 0
lastFreq = 0
lastNorm = 0
return nil
}
enumerator, err := newEnumerator(itrs)
for err == nil {
term, itrI, postingsOffset := enumerator.Current()
if !bytes.Equal(prevTerm, term) {
// if the term changed, write out the info collected
// for the previous term
err2 := finishTerm(prevTerm)
if err2 != nil {
return nil, 0, err2
}
}
var err2 error
postings, err2 = dicts[itrI].postingsListFromOffset(
postingsOffset, drops[itrI], postings)
if err2 != nil {
return nil, 0, err2
}
postItr = postings.iterator(true, true, true, postItr)
if fieldsSame {
// can optimize by copying freq/norm/loc bytes directly
lastDocNum, lastFreq, lastNorm, err = mergeTermFreqNormLocsByCopying(
term, postItr, newDocNums[itrI], newRoaring,
tfEncoder, locEncoder)
} else {
lastDocNum, lastFreq, lastNorm, bufLoc, err = mergeTermFreqNormLocs(
fieldsMap, term, postItr, newDocNums[itrI], newRoaring,
tfEncoder, locEncoder, bufLoc)
}
if err != nil {
return nil, 0, err
}
prevTerm = prevTerm[:0] // copy to prevTerm in case Next() reuses term mem
prevTerm = append(prevTerm, term...)
err = enumerator.Next()
}
if err != nil && err != vellum.ErrIteratorDone {
return nil, 0, err
}
err = finishTerm(prevTerm)
if err != nil {
return nil, 0, err
}
dictOffset := uint64(w.Count())
err = newVellum.Close()
if err != nil {
return nil, 0, err
}
vellumData := vellumBuf.Bytes()
// write out the length of the vellum data
n := binary.PutUvarint(bufMaxVarintLen64, uint64(len(vellumData)))
_, err = w.Write(bufMaxVarintLen64[:n])
if err != nil {
return nil, 0, err
}
// write this vellum to disk
_, err = w.Write(vellumData)
if err != nil {
return nil, 0, err
}
rv[fieldID] = dictOffset
// get the field doc value offset (start)
fieldDvLocsStart[fieldID] = uint64(w.Count())
// update the field doc values
fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), newSegDocCount-1, w, true)
fdvReadersAvailable := false
var dvIterClone *docValueReader
for segmentI, segment := range segmentsInFocus {
fieldIDPlus1 := uint16(segment.fieldsMap[fieldName])
if dvIter, exists := segment.fieldDvReaders[fieldIDPlus1-1]; exists &&
dvIter != nil {
fdvReadersAvailable = true
dvIterClone = dvIter.cloneInto(dvIterClone)
err = dvIterClone.iterateAllDocValues(segment, func(docNum uint64, terms []byte) error {
if newDocNums[segmentI][docNum] == docDropped {
return nil
}
err := fdvEncoder.Add(newDocNums[segmentI][docNum], terms)
if err != nil {
return err
}
return nil
})
if err != nil {
return nil, 0, err
}
}
}
if fdvReadersAvailable {
err = fdvEncoder.Close()
if err != nil {
return nil, 0, err
}
// persist the doc value details for this field
_, err = fdvEncoder.Write()
if err != nil {
return nil, 0, err
}
// get the field doc value offset (end)
fieldDvLocsEnd[fieldID] = uint64(w.Count())
} else {
fieldDvLocsStart[fieldID] = fieldNotUninverted
fieldDvLocsEnd[fieldID] = fieldNotUninverted
}
// reset vellum buffer and vellum builder
vellumBuf.Reset()
err = newVellum.Reset(&vellumBuf)
if err != nil {
return nil, 0, err
}
}
fieldDvLocsOffset := uint64(w.Count())
buf := bufMaxVarintLen64
for i := 0; i < len(fieldDvLocsStart); i++ {
n := binary.PutUvarint(buf, fieldDvLocsStart[i])
_, err := w.Write(buf[:n])
if err != nil {
return nil, 0, err
}
n = binary.PutUvarint(buf, fieldDvLocsEnd[i])
_, err = w.Write(buf[:n])
if err != nil {
return nil, 0, err
}
}
return rv, fieldDvLocsOffset, nil
}
func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *PostingsIterator,
newDocNums []uint64, newRoaring *roaring.Bitmap,
tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder, bufLoc []uint64) (
lastDocNum uint64, lastFreq uint64, lastNorm uint64, bufLocOut []uint64, err error) {
next, err := postItr.Next()
for next != nil && err == nil {
hitNewDocNum := newDocNums[next.Number()]
if hitNewDocNum == docDropped {
return 0, 0, 0, nil, fmt.Errorf("see hit with dropped docNum")
}
newRoaring.Add(uint32(hitNewDocNum))
nextFreq := next.Frequency()
nextNorm := uint64(math.Float32bits(float32(next.Norm())))
locs := next.Locations()
err = tfEncoder.Add(hitNewDocNum,
encodeFreqHasLocs(nextFreq, len(locs) > 0), nextNorm)
if err != nil {
return 0, 0, 0, nil, err
}
if len(locs) > 0 {
numBytesLocs := 0
for _, loc := range locs {
ap := loc.ArrayPositions()
numBytesLocs += totalUvarintBytes(uint64(fieldsMap[loc.Field()]-1),
loc.Pos(), loc.Start(), loc.End(), uint64(len(ap)), ap)
}
err = locEncoder.Add(hitNewDocNum, uint64(numBytesLocs))
if err != nil {
return 0, 0, 0, nil, err
}
for _, loc := range locs {
ap := loc.ArrayPositions()
if cap(bufLoc) < 5+len(ap) {
bufLoc = make([]uint64, 0, 5+len(ap))
}
args := bufLoc[0:5]
args[0] = uint64(fieldsMap[loc.Field()] - 1)
args[1] = loc.Pos()
args[2] = loc.Start()
args[3] = loc.End()
args[4] = uint64(len(ap))
args = append(args, ap...)
err = locEncoder.Add(hitNewDocNum, args...)
if err != nil {
return 0, 0, 0, nil, err
}
}
}
lastDocNum = hitNewDocNum
lastFreq = nextFreq
lastNorm = nextNorm
next, err = postItr.Next()
}
return lastDocNum, lastFreq, lastNorm, bufLoc, err
}
func mergeTermFreqNormLocsByCopying(term []byte, postItr *PostingsIterator,
newDocNums []uint64, newRoaring *roaring.Bitmap,
tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder) (
lastDocNum uint64, lastFreq uint64, lastNorm uint64, err error) {
nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err :=
postItr.nextBytes()
for err == nil && len(nextFreqNormBytes) > 0 {
hitNewDocNum := newDocNums[nextDocNum]
if hitNewDocNum == docDropped {
return 0, 0, 0, fmt.Errorf("see hit with dropped doc num")
}
newRoaring.Add(uint32(hitNewDocNum))
err = tfEncoder.AddBytes(hitNewDocNum, nextFreqNormBytes)
if err != nil {
return 0, 0, 0, err
}
if len(nextLocBytes) > 0 {
err = locEncoder.AddBytes(hitNewDocNum, nextLocBytes)
if err != nil {
return 0, 0, 0, err
}
}
lastDocNum = hitNewDocNum
lastFreq = nextFreq
lastNorm = nextNorm
nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err =
postItr.nextBytes()
}
return lastDocNum, lastFreq, lastNorm, err
}
func writePostings(postings *roaring.Bitmap, tfEncoder, locEncoder *chunkedIntCoder,
use1HitEncoding func(uint64) (bool, uint64, uint64),
w *CountHashWriter, bufMaxVarintLen64 []byte) (
offset uint64, err error) {
termCardinality := postings.GetCardinality()
if termCardinality <= 0 {
return 0, nil
}
if use1HitEncoding != nil {
encodeAs1Hit, docNum1Hit, normBits1Hit := use1HitEncoding(termCardinality)
if encodeAs1Hit {
return FSTValEncode1Hit(docNum1Hit, normBits1Hit), nil
}
}
tfOffset := uint64(w.Count())
_, err = tfEncoder.Write(w)
if err != nil {
return 0, err
}
locOffset := uint64(w.Count())
_, err = locEncoder.Write(w)
if err != nil {
return 0, err
}
postingsOffset := uint64(w.Count())
n := binary.PutUvarint(bufMaxVarintLen64, tfOffset)
_, err = w.Write(bufMaxVarintLen64[:n])
if err != nil {
return 0, err
}
n = binary.PutUvarint(bufMaxVarintLen64, locOffset)
_, err = w.Write(bufMaxVarintLen64[:n])
if err != nil {
return 0, err
}
_, err = writeRoaringWithLen(postings, w, bufMaxVarintLen64)
if err != nil {
return 0, err
}
return postingsOffset, nil
}
type varintEncoder func(uint64) (int, error)
func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
fieldsMap map[string]uint16, fieldsInv []string, fieldsSame bool, newSegDocCount uint64,
w *CountHashWriter) (uint64, [][]uint64, error) {
var rv [][]uint64 // The remapped or newDocNums for each segment.
var newDocNum uint64
var curr int
var data, compressed []byte
var metaBuf bytes.Buffer
varBuf := make([]byte, binary.MaxVarintLen64)
metaEncode := func(val uint64) (int, error) {
wb := binary.PutUvarint(varBuf, val)
return metaBuf.Write(varBuf[:wb])
}
vals := make([][][]byte, len(fieldsInv))
typs := make([][]byte, len(fieldsInv))
poss := make([][][]uint64, len(fieldsInv))
var posBuf []uint64
docNumOffsets := make([]uint64, newSegDocCount)
vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx)
defer visitDocumentCtxPool.Put(vdc)
// for each segment
for segI, segment := range segments {
segNewDocNums := make([]uint64, segment.numDocs)
dropsI := drops[segI]
// optimize when the field mapping is the same across all
// segments and there are no deletions, via byte-copying
// of stored docs bytes directly to the writer
if fieldsSame && (dropsI == nil || dropsI.GetCardinality() == 0) {
err := segment.copyStoredDocs(newDocNum, docNumOffsets, w)
if err != nil {
return 0, nil, err
}
for i := uint64(0); i < segment.numDocs; i++ {
segNewDocNums[i] = newDocNum
newDocNum++
}
rv = append(rv, segNewDocNums)
continue
}
// for each doc num
for docNum := uint64(0); docNum < segment.numDocs; docNum++ {
// TODO: roaring's API limits docNums to 32-bits?
if dropsI != nil && dropsI.Contains(uint32(docNum)) {
segNewDocNums[docNum] = docDropped
continue
}
segNewDocNums[docNum] = newDocNum
curr = 0
metaBuf.Reset()
data = data[:0]
posTemp := posBuf
// collect all the data
for i := 0; i < len(fieldsInv); i++ {
vals[i] = vals[i][:0]
typs[i] = typs[i][:0]
poss[i] = poss[i][:0]
}
err := segment.visitDocument(vdc, docNum, func(field string, typ byte, value []byte, pos []uint64) bool {
fieldID := int(fieldsMap[field]) - 1
vals[fieldID] = append(vals[fieldID], value)
typs[fieldID] = append(typs[fieldID], typ)
// copy array positions to preserve them beyond the scope of this callback
var curPos []uint64
if len(pos) > 0 {
if cap(posTemp) < len(pos) {
posBuf = make([]uint64, len(pos)*len(fieldsInv))
posTemp = posBuf
}
curPos = posTemp[0:len(pos)]
copy(curPos, pos)
posTemp = posTemp[len(pos):]
}
poss[fieldID] = append(poss[fieldID], curPos)
return true
})
if err != nil {
return 0, nil, err
}
// _id field special case optimizes ExternalID() lookups
idFieldVal := vals[uint16(0)][0]
_, err = metaEncode(uint64(len(idFieldVal)))
if err != nil {
return 0, nil, err
}
// now walk the non-"_id" fields in order
for fieldID := 1; fieldID < len(fieldsInv); fieldID++ {
storedFieldValues := vals[fieldID]
stf := typs[fieldID]
spf := poss[fieldID]
var err2 error
curr, data, err2 = persistStoredFieldValues(fieldID,
storedFieldValues, stf, spf, curr, metaEncode, data)
if err2 != nil {
return 0, nil, err2
}
}
metaBytes := metaBuf.Bytes()
compressed = snappy.Encode(compressed[:cap(compressed)], data)
// record where we're about to start writing
docNumOffsets[newDocNum] = uint64(w.Count())
// write out the meta len and compressed data len
_, err = writeUvarints(w,
uint64(len(metaBytes)),
uint64(len(idFieldVal)+len(compressed)))
if err != nil {
return 0, nil, err
}
// now write the meta
_, err = w.Write(metaBytes)
if err != nil {
return 0, nil, err
}
// now write the _id field val (counted as part of the 'compressed' data)
_, err = w.Write(idFieldVal)
if err != nil {
return 0, nil, err
}
// now write the compressed data
_, err = w.Write(compressed)
if err != nil {
return 0, nil, err
}
newDocNum++
}
rv = append(rv, segNewDocNums)
}
// return value is the start of the stored index
storedIndexOffset := uint64(w.Count())
// now write out the stored doc index
for _, docNumOffset := range docNumOffsets {
err := binary.Write(w, binary.BigEndian, docNumOffset)
if err != nil {
return 0, nil, err
}
}
return storedIndexOffset, rv, nil
}
// copyStoredDocs writes out a segment's stored doc info, optimized by
// using a single Write() call for the entire set of bytes. The
// newDocNumOffsets is filled with the new offsets for each doc.
func (s *SegmentBase) copyStoredDocs(newDocNum uint64, newDocNumOffsets []uint64,
w *CountHashWriter) error {
if s.numDocs <= 0 {
return nil
}
indexOffset0, storedOffset0, _, _, _ :=
s.getDocStoredOffsets(0) // the segment's first doc
indexOffsetN, storedOffsetN, readN, metaLenN, dataLenN :=
s.getDocStoredOffsets(s.numDocs - 1) // the segment's last doc
storedOffset0New := uint64(w.Count())
storedBytes := s.mem[storedOffset0 : storedOffsetN+readN+metaLenN+dataLenN]
_, err := w.Write(storedBytes)
if err != nil {
return err
}
// remap the storedOffset's for the docs into new offsets relative
// to storedOffset0New, filling the given docNumOffsetsOut array
for indexOffset := indexOffset0; indexOffset <= indexOffsetN; indexOffset += 8 {
storedOffset := binary.BigEndian.Uint64(s.mem[indexOffset : indexOffset+8])
storedOffsetNew := storedOffset - storedOffset0 + storedOffset0New
newDocNumOffsets[newDocNum] = storedOffsetNew
newDocNum += 1
}
return nil
}
// mergeFields builds a unified list of fields used across all the
// input segments, and computes whether the fields are the same across
// segments (which depends on fields to be sorted in the same way
// across segments)
func mergeFields(segments []*SegmentBase) (bool, []string) {
fieldsSame := true
var segment0Fields []string
if len(segments) > 0 {
segment0Fields = segments[0].Fields()
}
fieldsExist := map[string]struct{}{}
for _, segment := range segments {
fields := segment.Fields()
for fieldi, field := range fields {
fieldsExist[field] = struct{}{}
if len(segment0Fields) != len(fields) || segment0Fields[fieldi] != field {
fieldsSame = false
}
}
}
rv := make([]string, 0, len(fieldsExist))
// ensure _id stays first
rv = append(rv, "_id")
for k := range fieldsExist {
if k != "_id" {
rv = append(rv, k)
}
}
sort.Strings(rv[1:]) // leave _id as first
return fieldsSame, rv
}

View file

@ -1,826 +0,0 @@
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package zap
import (
"bytes"
"encoding/binary"
"math"
"sort"
"sync"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/couchbase/vellum"
"github.com/golang/snappy"
)
var NewSegmentBufferNumResultsBump int = 100
var NewSegmentBufferNumResultsFactor float64 = 1.0
var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0
// AnalysisResultsToSegmentBase produces an in-memory zap-encoded
// SegmentBase from analysis results
func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
chunkFactor uint32) (*SegmentBase, uint64, error) {
s := interimPool.Get().(*interim)
var br bytes.Buffer
if s.lastNumDocs > 0 {
// use previous results to initialize the buf with an estimate
// size, but note that the interim instance comes from a
// global interimPool, so multiple scorch instances indexing
// different docs can lead to low quality estimates
estimateAvgBytesPerDoc := int(float64(s.lastOutSize/s.lastNumDocs) *
NewSegmentBufferNumResultsFactor)
estimateNumResults := int(float64(len(results)+NewSegmentBufferNumResultsBump) *
NewSegmentBufferAvgBytesPerDocFactor)
br.Grow(estimateAvgBytesPerDoc * estimateNumResults)
}
s.results = results
s.chunkFactor = chunkFactor
s.w = NewCountHashWriter(&br)
storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets,
err := s.convert()
if err != nil {
return nil, uint64(0), err
}
sb, err := InitSegmentBase(br.Bytes(), s.w.Sum32(), chunkFactor,
s.FieldsMap, s.FieldsInv, uint64(len(results)),
storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets)
if err == nil && s.reset() == nil {
s.lastNumDocs = len(results)
s.lastOutSize = len(br.Bytes())
interimPool.Put(s)
}
return sb, uint64(len(br.Bytes())), err
}
var interimPool = sync.Pool{New: func() interface{} { return &interim{} }}
// interim holds temporary working data used while converting from
// analysis results to a zap-encoded segment
type interim struct {
results []*index.AnalysisResult
chunkFactor uint32
w *CountHashWriter
// FieldsMap adds 1 to field id to avoid zero value issues
// name -> field id + 1
FieldsMap map[string]uint16
// FieldsInv is the inverse of FieldsMap
// field id -> name
FieldsInv []string
// Term dictionaries for each field
// field id -> term -> postings list id + 1
Dicts []map[string]uint64
// Terms for each field, where terms are sorted ascending
// field id -> []term
DictKeys [][]string
// Fields whose IncludeDocValues is true
// field id -> bool
IncludeDocValues []bool
// postings id -> bitmap of docNums
Postings []*roaring.Bitmap
// postings id -> freq/norm's, one for each docNum in postings
FreqNorms [][]interimFreqNorm
freqNormsBacking []interimFreqNorm
// postings id -> locs, one for each freq
Locs [][]interimLoc
locsBacking []interimLoc
numTermsPerPostingsList []int // key is postings list id
numLocsPerPostingsList []int // key is postings list id
builder *vellum.Builder
builderBuf bytes.Buffer
metaBuf bytes.Buffer
tmp0 []byte
tmp1 []byte
lastNumDocs int
lastOutSize int
}
func (s *interim) reset() (err error) {
s.results = nil
s.chunkFactor = 0
s.w = nil
s.FieldsMap = nil
s.FieldsInv = nil
for i := range s.Dicts {
s.Dicts[i] = nil
}
s.Dicts = s.Dicts[:0]
for i := range s.DictKeys {
s.DictKeys[i] = s.DictKeys[i][:0]
}
s.DictKeys = s.DictKeys[:0]
for i := range s.IncludeDocValues {
s.IncludeDocValues[i] = false
}
s.IncludeDocValues = s.IncludeDocValues[:0]
for _, idn := range s.Postings {
idn.Clear()
}
s.Postings = s.Postings[:0]
s.FreqNorms = s.FreqNorms[:0]
for i := range s.freqNormsBacking {
s.freqNormsBacking[i] = interimFreqNorm{}
}
s.freqNormsBacking = s.freqNormsBacking[:0]
s.Locs = s.Locs[:0]
for i := range s.locsBacking {
s.locsBacking[i] = interimLoc{}
}
s.locsBacking = s.locsBacking[:0]
s.numTermsPerPostingsList = s.numTermsPerPostingsList[:0]
s.numLocsPerPostingsList = s.numLocsPerPostingsList[:0]
s.builderBuf.Reset()
if s.builder != nil {
err = s.builder.Reset(&s.builderBuf)
}
s.metaBuf.Reset()
s.tmp0 = s.tmp0[:0]
s.tmp1 = s.tmp1[:0]
s.lastNumDocs = 0
s.lastOutSize = 0
return err
}
func (s *interim) grabBuf(size int) []byte {
buf := s.tmp0
if cap(buf) < size {
buf = make([]byte, size)
s.tmp0 = buf
}
return buf[0:size]
}
type interimStoredField struct {
vals [][]byte
typs []byte
arrayposs [][]uint64 // array positions
}
type interimFreqNorm struct {
freq uint64
norm float32
numLocs int
}
type interimLoc struct {
fieldID uint16
pos uint64
start uint64
end uint64
arrayposs []uint64
}
func (s *interim) convert() (uint64, uint64, uint64, []uint64, error) {
s.FieldsMap = map[string]uint16{}
s.getOrDefineField("_id") // _id field is fieldID 0
for _, result := range s.results {
for _, field := range result.Document.CompositeFields {
s.getOrDefineField(field.Name())
}
for _, field := range result.Document.Fields {
s.getOrDefineField(field.Name())
}
}
sort.Strings(s.FieldsInv[1:]) // keep _id as first field
for fieldID, fieldName := range s.FieldsInv {
s.FieldsMap[fieldName] = uint16(fieldID + 1)
}
if cap(s.IncludeDocValues) >= len(s.FieldsInv) {
s.IncludeDocValues = s.IncludeDocValues[:len(s.FieldsInv)]
} else {
s.IncludeDocValues = make([]bool, len(s.FieldsInv))
}
s.prepareDicts()
for _, dict := range s.DictKeys {
sort.Strings(dict)
}
s.processDocuments()
storedIndexOffset, err := s.writeStoredFields()
if err != nil {
return 0, 0, 0, nil, err
}
var fdvIndexOffset uint64
var dictOffsets []uint64
if len(s.results) > 0 {
fdvIndexOffset, dictOffsets, err = s.writeDicts()
if err != nil {
return 0, 0, 0, nil, err
}
} else {
dictOffsets = make([]uint64, len(s.FieldsInv))
}
fieldsIndexOffset, err := persistFields(s.FieldsInv, s.w, dictOffsets)
if err != nil {
return 0, 0, 0, nil, err
}
return storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets, nil
}
func (s *interim) getOrDefineField(fieldName string) int {
fieldIDPlus1, exists := s.FieldsMap[fieldName]
if !exists {
fieldIDPlus1 = uint16(len(s.FieldsInv) + 1)
s.FieldsMap[fieldName] = fieldIDPlus1
s.FieldsInv = append(s.FieldsInv, fieldName)
s.Dicts = append(s.Dicts, make(map[string]uint64))
n := len(s.DictKeys)
if n < cap(s.DictKeys) {
s.DictKeys = s.DictKeys[:n+1]
s.DictKeys[n] = s.DictKeys[n][:0]
} else {
s.DictKeys = append(s.DictKeys, []string(nil))
}
}
return int(fieldIDPlus1 - 1)
}
// fill Dicts and DictKeys from analysis results
func (s *interim) prepareDicts() {
var pidNext int
var totTFs int
var totLocs int
visitField := func(fieldID uint16, tfs analysis.TokenFrequencies) {
dict := s.Dicts[fieldID]
dictKeys := s.DictKeys[fieldID]
for term, tf := range tfs {
pidPlus1, exists := dict[term]
if !exists {
pidNext++
pidPlus1 = uint64(pidNext)
dict[term] = pidPlus1
dictKeys = append(dictKeys, term)
s.numTermsPerPostingsList = append(s.numTermsPerPostingsList, 0)
s.numLocsPerPostingsList = append(s.numLocsPerPostingsList, 0)
}
pid := pidPlus1 - 1
s.numTermsPerPostingsList[pid] += 1
s.numLocsPerPostingsList[pid] += len(tf.Locations)
totLocs += len(tf.Locations)
}
totTFs += len(tfs)
s.DictKeys[fieldID] = dictKeys
}
for _, result := range s.results {
// walk each composite field
for _, field := range result.Document.CompositeFields {
fieldID := uint16(s.getOrDefineField(field.Name()))
_, tf := field.Analyze()
visitField(fieldID, tf)
}
// walk each field
for i, field := range result.Document.Fields {
fieldID := uint16(s.getOrDefineField(field.Name()))
tf := result.Analyzed[i]
visitField(fieldID, tf)
}
}
numPostingsLists := pidNext
if cap(s.Postings) >= numPostingsLists {
s.Postings = s.Postings[:numPostingsLists]
} else {
postings := make([]*roaring.Bitmap, numPostingsLists)
copy(postings, s.Postings[:cap(s.Postings)])
for i := 0; i < numPostingsLists; i++ {
if postings[i] == nil {
postings[i] = roaring.New()
}
}
s.Postings = postings
}
if cap(s.FreqNorms) >= numPostingsLists {
s.FreqNorms = s.FreqNorms[:numPostingsLists]
} else {
s.FreqNorms = make([][]interimFreqNorm, numPostingsLists)
}
if cap(s.freqNormsBacking) >= totTFs {
s.freqNormsBacking = s.freqNormsBacking[:totTFs]
} else {
s.freqNormsBacking = make([]interimFreqNorm, totTFs)
}
freqNormsBacking := s.freqNormsBacking
for pid, numTerms := range s.numTermsPerPostingsList {
s.FreqNorms[pid] = freqNormsBacking[0:0]
freqNormsBacking = freqNormsBacking[numTerms:]
}
if cap(s.Locs) >= numPostingsLists {
s.Locs = s.Locs[:numPostingsLists]
} else {
s.Locs = make([][]interimLoc, numPostingsLists)
}
if cap(s.locsBacking) >= totLocs {
s.locsBacking = s.locsBacking[:totLocs]
} else {
s.locsBacking = make([]interimLoc, totLocs)
}
locsBacking := s.locsBacking
for pid, numLocs := range s.numLocsPerPostingsList {
s.Locs[pid] = locsBacking[0:0]
locsBacking = locsBacking[numLocs:]
}
}
func (s *interim) processDocuments() {
numFields := len(s.FieldsInv)
reuseFieldLens := make([]int, numFields)
reuseFieldTFs := make([]analysis.TokenFrequencies, numFields)
for docNum, result := range s.results {
for i := 0; i < numFields; i++ { // clear these for reuse
reuseFieldLens[i] = 0
reuseFieldTFs[i] = nil
}
s.processDocument(uint64(docNum), result,
reuseFieldLens, reuseFieldTFs)
}
}
func (s *interim) processDocument(docNum uint64,
result *index.AnalysisResult,
fieldLens []int, fieldTFs []analysis.TokenFrequencies) {
visitField := func(fieldID uint16, fieldName string,
ln int, tf analysis.TokenFrequencies) {
fieldLens[fieldID] += ln
existingFreqs := fieldTFs[fieldID]
if existingFreqs != nil {
existingFreqs.MergeAll(fieldName, tf)
} else {
fieldTFs[fieldID] = tf
}
}
// walk each composite field
for _, field := range result.Document.CompositeFields {
fieldID := uint16(s.getOrDefineField(field.Name()))
ln, tf := field.Analyze()
visitField(fieldID, field.Name(), ln, tf)
}
// walk each field
for i, field := range result.Document.Fields {
fieldID := uint16(s.getOrDefineField(field.Name()))
ln := result.Length[i]
tf := result.Analyzed[i]
visitField(fieldID, field.Name(), ln, tf)
}
// now that it's been rolled up into fieldTFs, walk that
for fieldID, tfs := range fieldTFs {
dict := s.Dicts[fieldID]
norm := float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))
for term, tf := range tfs {
pid := dict[term] - 1
bs := s.Postings[pid]
bs.Add(uint32(docNum))
s.FreqNorms[pid] = append(s.FreqNorms[pid],
interimFreqNorm{
freq: uint64(tf.Frequency()),
norm: norm,
numLocs: len(tf.Locations),
})
if len(tf.Locations) > 0 {
locs := s.Locs[pid]
for _, loc := range tf.Locations {
var locf = uint16(fieldID)
if loc.Field != "" {
locf = uint16(s.getOrDefineField(loc.Field))
}
var arrayposs []uint64
if len(loc.ArrayPositions) > 0 {
arrayposs = loc.ArrayPositions
}
locs = append(locs, interimLoc{
fieldID: locf,
pos: uint64(loc.Position),
start: uint64(loc.Start),
end: uint64(loc.End),
arrayposs: arrayposs,
})
}
s.Locs[pid] = locs
}
}
}
}
func (s *interim) writeStoredFields() (
storedIndexOffset uint64, err error) {
varBuf := make([]byte, binary.MaxVarintLen64)
metaEncode := func(val uint64) (int, error) {
wb := binary.PutUvarint(varBuf, val)
return s.metaBuf.Write(varBuf[:wb])
}
data, compressed := s.tmp0[:0], s.tmp1[:0]
defer func() { s.tmp0, s.tmp1 = data, compressed }()
// keyed by docNum
docStoredOffsets := make([]uint64, len(s.results))
// keyed by fieldID, for the current doc in the loop
docStoredFields := map[uint16]interimStoredField{}
for docNum, result := range s.results {
for fieldID := range docStoredFields { // reset for next doc
delete(docStoredFields, fieldID)
}
for _, field := range result.Document.Fields {
fieldID := uint16(s.getOrDefineField(field.Name()))
opts := field.Options()
if opts.IsStored() {
isf := docStoredFields[fieldID]
isf.vals = append(isf.vals, field.Value())
isf.typs = append(isf.typs, encodeFieldType(field))
isf.arrayposs = append(isf.arrayposs, field.ArrayPositions())
docStoredFields[fieldID] = isf
}
if opts.IncludeDocValues() {
s.IncludeDocValues[fieldID] = true
}
}
var curr int
s.metaBuf.Reset()
data = data[:0]
// _id field special case optimizes ExternalID() lookups
idFieldVal := docStoredFields[uint16(0)].vals[0]
_, err = metaEncode(uint64(len(idFieldVal)))
if err != nil {
return 0, err
}
// handle non-"_id" fields
for fieldID := 1; fieldID < len(s.FieldsInv); fieldID++ {
isf, exists := docStoredFields[uint16(fieldID)]
if exists {
curr, data, err = persistStoredFieldValues(
fieldID, isf.vals, isf.typs, isf.arrayposs,
curr, metaEncode, data)
if err != nil {
return 0, err
}
}
}
metaBytes := s.metaBuf.Bytes()
compressed = snappy.Encode(compressed[:cap(compressed)], data)
docStoredOffsets[docNum] = uint64(s.w.Count())
_, err := writeUvarints(s.w,
uint64(len(metaBytes)),
uint64(len(idFieldVal)+len(compressed)))
if err != nil {
return 0, err
}
_, err = s.w.Write(metaBytes)
if err != nil {
return 0, err
}
_, err = s.w.Write(idFieldVal)
if err != nil {
return 0, err
}
_, err = s.w.Write(compressed)
if err != nil {
return 0, err
}
}
storedIndexOffset = uint64(s.w.Count())
for _, docStoredOffset := range docStoredOffsets {
err = binary.Write(s.w, binary.BigEndian, docStoredOffset)
if err != nil {
return 0, err
}
}
return storedIndexOffset, nil
}
func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err error) {
dictOffsets = make([]uint64, len(s.FieldsInv))
fdvOffsetsStart := make([]uint64, len(s.FieldsInv))
fdvOffsetsEnd := make([]uint64, len(s.FieldsInv))
buf := s.grabBuf(binary.MaxVarintLen64)
tfEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1))
locEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1))
fdvEncoder := newChunkedContentCoder(uint64(s.chunkFactor), uint64(len(s.results)-1), s.w, false)
var docTermMap [][]byte
if s.builder == nil {
s.builder, err = vellum.New(&s.builderBuf, nil)
if err != nil {
return 0, nil, err
}
}
for fieldID, terms := range s.DictKeys {
if cap(docTermMap) < len(s.results) {
docTermMap = make([][]byte, len(s.results))
} else {
docTermMap = docTermMap[0:len(s.results)]
for docNum := range docTermMap { // reset the docTermMap
docTermMap[docNum] = docTermMap[docNum][:0]
}
}
dict := s.Dicts[fieldID]
for _, term := range terms { // terms are already sorted
pid := dict[term] - 1
postingsBS := s.Postings[pid]
freqNorms := s.FreqNorms[pid]
freqNormOffset := 0
locs := s.Locs[pid]
locOffset := 0
postingsItr := postingsBS.Iterator()
for postingsItr.HasNext() {
docNum := uint64(postingsItr.Next())
freqNorm := freqNorms[freqNormOffset]
err = tfEncoder.Add(docNum,
encodeFreqHasLocs(freqNorm.freq, freqNorm.numLocs > 0),
uint64(math.Float32bits(freqNorm.norm)))
if err != nil {
return 0, nil, err
}
if freqNorm.numLocs > 0 {
numBytesLocs := 0
for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] {
numBytesLocs += totalUvarintBytes(
uint64(loc.fieldID), loc.pos, loc.start, loc.end,
uint64(len(loc.arrayposs)), loc.arrayposs)
}
err = locEncoder.Add(docNum, uint64(numBytesLocs))
if err != nil {
return 0, nil, err
}
for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] {
err = locEncoder.Add(docNum,
uint64(loc.fieldID), loc.pos, loc.start, loc.end,
uint64(len(loc.arrayposs)))
if err != nil {
return 0, nil, err
}
err = locEncoder.Add(docNum, loc.arrayposs...)
if err != nil {
return 0, nil, err
}
}
locOffset += freqNorm.numLocs
}
freqNormOffset++
docTermMap[docNum] = append(
append(docTermMap[docNum], term...),
termSeparator)
}
tfEncoder.Close()
locEncoder.Close()
postingsOffset, err :=
writePostings(postingsBS, tfEncoder, locEncoder, nil, s.w, buf)
if err != nil {
return 0, nil, err
}
if postingsOffset > uint64(0) {
err = s.builder.Insert([]byte(term), postingsOffset)
if err != nil {
return 0, nil, err
}
}
tfEncoder.Reset()
locEncoder.Reset()
}
err = s.builder.Close()
if err != nil {
return 0, nil, err
}
// record where this dictionary starts
dictOffsets[fieldID] = uint64(s.w.Count())
vellumData := s.builderBuf.Bytes()
// write out the length of the vellum data
n := binary.PutUvarint(buf, uint64(len(vellumData)))
_, err = s.w.Write(buf[:n])
if err != nil {
return 0, nil, err
}
// write this vellum to disk
_, err = s.w.Write(vellumData)
if err != nil {
return 0, nil, err
}
// reset vellum for reuse
s.builderBuf.Reset()
err = s.builder.Reset(&s.builderBuf)
if err != nil {
return 0, nil, err
}
// write the field doc values
if s.IncludeDocValues[fieldID] {
for docNum, docTerms := range docTermMap {
if len(docTerms) > 0 {
err = fdvEncoder.Add(uint64(docNum), docTerms)
if err != nil {
return 0, nil, err
}
}
}
err = fdvEncoder.Close()
if err != nil {
return 0, nil, err
}
fdvOffsetsStart[fieldID] = uint64(s.w.Count())
_, err = fdvEncoder.Write()
if err != nil {
return 0, nil, err
}
fdvOffsetsEnd[fieldID] = uint64(s.w.Count())
fdvEncoder.Reset()
} else {
fdvOffsetsStart[fieldID] = fieldNotUninverted
fdvOffsetsEnd[fieldID] = fieldNotUninverted
}
}
fdvIndexOffset = uint64(s.w.Count())
for i := 0; i < len(fdvOffsetsStart); i++ {
n := binary.PutUvarint(buf, fdvOffsetsStart[i])
_, err := s.w.Write(buf[:n])
if err != nil {
return 0, nil, err
}
n = binary.PutUvarint(buf, fdvOffsetsEnd[i])
_, err = s.w.Write(buf[:n])
if err != nil {
return 0, nil, err
}
}
return fdvIndexOffset, dictOffsets, nil
}
func encodeFieldType(f document.Field) byte {
fieldType := byte('x')
switch f.(type) {
case *document.TextField:
fieldType = 't'
case *document.NumericField:
fieldType = 'n'
case *document.DateTimeField:
fieldType = 'd'
case *document.BooleanField:
fieldType = 'b'
case *document.GeoPointField:
fieldType = 'g'
case *document.CompositeField:
fieldType = 'c'
}
return fieldType
}
// returns the total # of bytes needed to encode the given uint64's
// into binary.PutUVarint() encoding
func totalUvarintBytes(a, b, c, d, e uint64, more []uint64) (n int) {
n = numUvarintBytes(a)
n += numUvarintBytes(b)
n += numUvarintBytes(c)
n += numUvarintBytes(d)
n += numUvarintBytes(e)
for _, v := range more {
n += numUvarintBytes(v)
}
return n
}
// returns # of bytes needed to encode x in binary.PutUvarint() encoding
func numUvarintBytes(x uint64) (n int) {
for x >= 0x80 {
x >>= 7
n++
}
return n + 1
}

View file

@ -1,790 +0,0 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package zap
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"math"
"reflect"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizePostingsList int
var reflectStaticSizePostingsIterator int
var reflectStaticSizePosting int
var reflectStaticSizeLocation int
func init() {
var pl PostingsList
reflectStaticSizePostingsList = int(reflect.TypeOf(pl).Size())
var pi PostingsIterator
reflectStaticSizePostingsIterator = int(reflect.TypeOf(pi).Size())
var p Posting
reflectStaticSizePosting = int(reflect.TypeOf(p).Size())
var l Location
reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
}
// FST or vellum value (uint64) encoding is determined by the top two
// highest-order or most significant bits...
//
// encoding : MSB
// name : 63 62 61...to...bit #0 (LSB)
// ----------+---+---+---------------------------------------------------
// general : 0 | 0 | 62-bits of postingsOffset.
// ~ : 0 | 1 | reserved for future.
// 1-hit : 1 | 0 | 31-bits of positive float31 norm | 31-bits docNum.
// ~ : 1 | 1 | reserved for future.
//
// Encoding "general" is able to handle all cases, where the
// postingsOffset points to more information about the postings for
// the term.
//
// Encoding "1-hit" is used to optimize a commonly seen case when a
// term has only a single hit. For example, a term in the _id field
// will have only 1 hit. The "1-hit" encoding is used for a term
// in a field when...
//
// - term vector info is disabled for that field;
// - and, the term appears in only a single doc for that field;
// - and, the term's freq is exactly 1 in that single doc for that field;
// - and, the docNum must fit into 31-bits;
//
// Otherwise, the "general" encoding is used instead.
//
// In the "1-hit" encoding, the field in that single doc may have
// other terms, which is supported in the "1-hit" encoding by the
// positive float31 norm.
const FSTValEncodingMask = uint64(0xc000000000000000)
const FSTValEncodingGeneral = uint64(0x0000000000000000)
const FSTValEncoding1Hit = uint64(0x8000000000000000)
func FSTValEncode1Hit(docNum uint64, normBits uint64) uint64 {
return FSTValEncoding1Hit | ((mask31Bits & normBits) << 31) | (mask31Bits & docNum)
}
func FSTValDecode1Hit(v uint64) (docNum uint64, normBits uint64) {
return (mask31Bits & v), (mask31Bits & (v >> 31))
}
const mask31Bits = uint64(0x000000007fffffff)
func under32Bits(x uint64) bool {
return x <= mask31Bits
}
const docNum1HitFinished = math.MaxUint64
// PostingsList is an in-memory represenation of a postings list
type PostingsList struct {
sb *SegmentBase
postingsOffset uint64
freqOffset uint64
locOffset uint64
postings *roaring.Bitmap
except *roaring.Bitmap
// when normBits1Hit != 0, then this postings list came from a
// 1-hit encoding, and only the docNum1Hit & normBits1Hit apply
docNum1Hit uint64
normBits1Hit uint64
}
// represents an immutable, empty postings list
var emptyPostingsList = &PostingsList{}
func (p *PostingsList) Size() int {
sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr
if p.except != nil {
sizeInBytes += int(p.except.GetSizeInBytes())
}
return sizeInBytes
}
func (p *PostingsList) OrInto(receiver *roaring.Bitmap) {
if p.normBits1Hit != 0 {
receiver.Add(uint32(p.docNum1Hit))
return
}
if p.postings != nil {
receiver.Or(p.postings)
}
}
// Iterator returns an iterator for this postings list
func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocs bool,
prealloc segment.PostingsIterator) segment.PostingsIterator {
if p.normBits1Hit == 0 && p.postings == nil {
return emptyPostingsIterator
}
var preallocPI *PostingsIterator
pi, ok := prealloc.(*PostingsIterator)
if ok && pi != nil {
preallocPI = pi
}
if preallocPI == emptyPostingsIterator {
preallocPI = nil
}
return p.iterator(includeFreq, includeNorm, includeLocs, preallocPI)
}
func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
rv *PostingsIterator) *PostingsIterator {
if rv == nil {
rv = &PostingsIterator{}
} else {
freqNormReader := rv.freqNormReader
if freqNormReader != nil {
freqNormReader.Reset([]byte(nil))
}
locReader := rv.locReader
if locReader != nil {
locReader.Reset([]byte(nil))
}
freqChunkOffsets := rv.freqChunkOffsets[:0]
locChunkOffsets := rv.locChunkOffsets[:0]
nextLocs := rv.nextLocs[:0]
nextSegmentLocs := rv.nextSegmentLocs[:0]
buf := rv.buf
*rv = PostingsIterator{} // clear the struct
rv.freqNormReader = freqNormReader
rv.locReader = locReader
rv.freqChunkOffsets = freqChunkOffsets
rv.locChunkOffsets = locChunkOffsets
rv.nextLocs = nextLocs
rv.nextSegmentLocs = nextSegmentLocs
rv.buf = buf
}
rv.postings = p
if p.normBits1Hit != 0 {
// "1-hit" encoding
rv.docNum1Hit = p.docNum1Hit
rv.normBits1Hit = p.normBits1Hit
if p.except != nil && p.except.Contains(uint32(rv.docNum1Hit)) {
rv.docNum1Hit = docNum1HitFinished
}
return rv
}
// "general" encoding, check if empty
if p.postings == nil {
return rv
}
var n uint64
var read int
// prepare the freq chunk details
rv.includeFreqNorm = includeFreq || includeNorm
if rv.includeFreqNorm {
var numFreqChunks uint64
numFreqChunks, read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
n += uint64(read)
if cap(rv.freqChunkOffsets) >= int(numFreqChunks) {
rv.freqChunkOffsets = rv.freqChunkOffsets[:int(numFreqChunks)]
} else {
rv.freqChunkOffsets = make([]uint64, int(numFreqChunks))
}
for i := 0; i < int(numFreqChunks); i++ {
rv.freqChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
n += uint64(read)
}
rv.freqChunkStart = p.freqOffset + n
}
// prepare the loc chunk details
rv.includeLocs = includeLocs
if rv.includeLocs {
n = 0
var numLocChunks uint64
numLocChunks, read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
n += uint64(read)
if cap(rv.locChunkOffsets) >= int(numLocChunks) {
rv.locChunkOffsets = rv.locChunkOffsets[:int(numLocChunks)]
} else {
rv.locChunkOffsets = make([]uint64, int(numLocChunks))
}
for i := 0; i < int(numLocChunks); i++ {
rv.locChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
n += uint64(read)
}
rv.locChunkStart = p.locOffset + n
}
rv.all = p.postings.Iterator()
if p.except != nil {
rv.ActualBM = roaring.AndNot(p.postings, p.except)
rv.Actual = rv.ActualBM.Iterator()
} else {
rv.ActualBM = p.postings
rv.Actual = p.postings.Iterator()
}
return rv
}
// Count returns the number of items on this postings list
func (p *PostingsList) Count() uint64 {
var n uint64
if p.normBits1Hit != 0 {
n = 1
} else if p.postings != nil {
n = p.postings.GetCardinality()
}
var e uint64
if p.except != nil {
e = p.except.GetCardinality()
}
if n <= e {
return 0
}
return n - e
}
func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
rv.postingsOffset = postingsOffset
// handle "1-hit" encoding special case
if rv.postingsOffset&FSTValEncodingMask == FSTValEncoding1Hit {
return rv.init1Hit(postingsOffset)
}
// read the location of the freq/norm details
var n uint64
var read int
rv.freqOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+binary.MaxVarintLen64])
n += uint64(read)
rv.locOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
n += uint64(read)
var postingsLen uint64
postingsLen, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
n += uint64(read)
roaringBytes := d.sb.mem[postingsOffset+n : postingsOffset+n+postingsLen]
if rv.postings == nil {
rv.postings = roaring.NewBitmap()
}
_, err := rv.postings.FromBuffer(roaringBytes)
if err != nil {
return fmt.Errorf("error loading roaring bitmap: %v", err)
}
return nil
}
func (rv *PostingsList) init1Hit(fstVal uint64) error {
docNum, normBits := FSTValDecode1Hit(fstVal)
rv.docNum1Hit = docNum
rv.normBits1Hit = normBits
return nil
}
// PostingsIterator provides a way to iterate through the postings list
type PostingsIterator struct {
postings *PostingsList
all roaring.IntIterable
Actual roaring.IntIterable
ActualBM *roaring.Bitmap
currChunk uint32
currChunkFreqNorm []byte
currChunkLoc []byte
freqNormReader *bytes.Reader
locReader *bytes.Reader
freqChunkOffsets []uint64
freqChunkStart uint64
locChunkOffsets []uint64
locChunkStart uint64
next Posting // reused across Next() calls
nextLocs []Location // reused across Next() calls
nextSegmentLocs []segment.Location // reused across Next() calls
docNum1Hit uint64
normBits1Hit uint64
buf []byte
includeFreqNorm bool
includeLocs bool
}
var emptyPostingsIterator = &PostingsIterator{}
func (i *PostingsIterator) Size() int {
sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr +
len(i.currChunkFreqNorm) +
len(i.currChunkLoc) +
len(i.freqChunkOffsets)*size.SizeOfUint64 +
len(i.locChunkOffsets)*size.SizeOfUint64 +
i.next.Size()
for _, entry := range i.nextLocs {
sizeInBytes += entry.Size()
}
return sizeInBytes
}
func (i *PostingsIterator) loadChunk(chunk int) error {
if i.includeFreqNorm {
if chunk >= len(i.freqChunkOffsets) {
return fmt.Errorf("tried to load freq chunk that doesn't exist %d/(%d)",
chunk, len(i.freqChunkOffsets))
}
end, start := i.freqChunkStart, i.freqChunkStart
s, e := readChunkBoundary(chunk, i.freqChunkOffsets)
start += s
end += e
i.currChunkFreqNorm = i.postings.sb.mem[start:end]
if i.freqNormReader == nil {
i.freqNormReader = bytes.NewReader(i.currChunkFreqNorm)
} else {
i.freqNormReader.Reset(i.currChunkFreqNorm)
}
}
if i.includeLocs {
if chunk >= len(i.locChunkOffsets) {
return fmt.Errorf("tried to load loc chunk that doesn't exist %d/(%d)",
chunk, len(i.locChunkOffsets))
}
end, start := i.locChunkStart, i.locChunkStart
s, e := readChunkBoundary(chunk, i.locChunkOffsets)
start += s
end += e
i.currChunkLoc = i.postings.sb.mem[start:end]
if i.locReader == nil {
i.locReader = bytes.NewReader(i.currChunkLoc)
} else {
i.locReader.Reset(i.currChunkLoc)
}
}
i.currChunk = uint32(chunk)
return nil
}
func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) {
if i.normBits1Hit != 0 {
return 1, i.normBits1Hit, false, nil
}
freqHasLocs, err := binary.ReadUvarint(i.freqNormReader)
if err != nil {
return 0, 0, false, fmt.Errorf("error reading frequency: %v", err)
}
freq, hasLocs := decodeFreqHasLocs(freqHasLocs)
normBits, err := binary.ReadUvarint(i.freqNormReader)
if err != nil {
return 0, 0, false, fmt.Errorf("error reading norm: %v", err)
}
return freq, normBits, hasLocs, err
}
func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 {
rv := freq << 1
if hasLocs {
rv = rv | 0x01 // 0'th LSB encodes whether there are locations
}
return rv
}
func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) {
freq := freqHasLocs >> 1
hasLocs := freqHasLocs&0x01 != 0
return freq, hasLocs
}
// readLocation processes all the integers on the stream representing a single
// location. if you care about it, pass in a non-nil location struct, and we
// will fill it. if you don't care about it, pass in nil and we safely consume
// the contents.
func (i *PostingsIterator) readLocation(l *Location) error {
// read off field
fieldID, err := binary.ReadUvarint(i.locReader)
if err != nil {
return fmt.Errorf("error reading location field: %v", err)
}
// read off pos
pos, err := binary.ReadUvarint(i.locReader)
if err != nil {
return fmt.Errorf("error reading location pos: %v", err)
}
// read off start
start, err := binary.ReadUvarint(i.locReader)
if err != nil {
return fmt.Errorf("error reading location start: %v", err)
}
// read off end
end, err := binary.ReadUvarint(i.locReader)
if err != nil {
return fmt.Errorf("error reading location end: %v", err)
}
// read off num array pos
numArrayPos, err := binary.ReadUvarint(i.locReader)
if err != nil {
return fmt.Errorf("error reading location num array pos: %v", err)
}
// group these together for less branching
if l != nil {
l.field = i.postings.sb.fieldsInv[fieldID]
l.pos = pos
l.start = start
l.end = end
if cap(l.ap) < int(numArrayPos) {
l.ap = make([]uint64, int(numArrayPos))
} else {
l.ap = l.ap[:int(numArrayPos)]
}
}
// read off array positions
for k := 0; k < int(numArrayPos); k++ {
ap, err := binary.ReadUvarint(i.locReader)
if err != nil {
return fmt.Errorf("error reading array position: %v", err)
}
if l != nil {
l.ap[k] = ap
}
}
return nil
}
// Next returns the next posting on the postings list, or nil at the end
func (i *PostingsIterator) Next() (segment.Posting, error) {
return i.nextAtOrAfter(0)
}
// Advance returns the posting at the specified docNum or it is not present
// the next posting, or if the end is reached, nil
func (i *PostingsIterator) Advance(docNum uint64) (segment.Posting, error) {
return i.nextAtOrAfter(docNum)
}
// Next returns the next posting on the postings list, or nil at the end
func (i *PostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.Posting, error) {
docNum, exists, err := i.nextDocNumAtOrAfter(atOrAfter)
if err != nil || !exists {
return nil, err
}
i.next = Posting{} // clear the struct
rv := &i.next
rv.docNum = docNum
if !i.includeFreqNorm {
return rv, nil
}
var normBits uint64
var hasLocs bool
rv.freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
if err != nil {
return nil, err
}
rv.norm = math.Float32frombits(uint32(normBits))
if i.includeLocs && hasLocs {
// prepare locations into reused slices, where we assume
// rv.freq >= "number of locs", since in a composite field,
// some component fields might have their IncludeTermVector
// flags disabled while other component fields are enabled
if cap(i.nextLocs) >= int(rv.freq) {
i.nextLocs = i.nextLocs[0:rv.freq]
} else {
i.nextLocs = make([]Location, rv.freq, rv.freq*2)
}
if cap(i.nextSegmentLocs) < int(rv.freq) {
i.nextSegmentLocs = make([]segment.Location, rv.freq, rv.freq*2)
}
rv.locs = i.nextSegmentLocs[:0]
numLocsBytes, err := binary.ReadUvarint(i.locReader)
if err != nil {
return nil, fmt.Errorf("error reading location numLocsBytes: %v", err)
}
j := 0
startBytesRemaining := i.locReader.Len() // # bytes remaining in the locReader
for startBytesRemaining-i.locReader.Len() < int(numLocsBytes) {
err := i.readLocation(&i.nextLocs[j])
if err != nil {
return nil, err
}
rv.locs = append(rv.locs, &i.nextLocs[j])
j++
}
}
return rv, nil
}
var freqHasLocs1Hit = encodeFreqHasLocs(1, false)
// nextBytes returns the docNum and the encoded freq & loc bytes for
// the next posting
func (i *PostingsIterator) nextBytes() (
docNumOut uint64, freq uint64, normBits uint64,
bytesFreqNorm []byte, bytesLoc []byte, err error) {
docNum, exists, err := i.nextDocNumAtOrAfter(0)
if err != nil || !exists {
return 0, 0, 0, nil, nil, err
}
if i.normBits1Hit != 0 {
if i.buf == nil {
i.buf = make([]byte, binary.MaxVarintLen64*2)
}
n := binary.PutUvarint(i.buf, freqHasLocs1Hit)
n += binary.PutUvarint(i.buf[n:], i.normBits1Hit)
return docNum, uint64(1), i.normBits1Hit, i.buf[:n], nil, nil
}
startFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
var hasLocs bool
freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
if err != nil {
return 0, 0, 0, nil, nil, err
}
endFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
bytesFreqNorm = i.currChunkFreqNorm[startFreqNorm:endFreqNorm]
if hasLocs {
startLoc := len(i.currChunkLoc) - i.locReader.Len()
numLocsBytes, err := binary.ReadUvarint(i.locReader)
if err != nil {
return 0, 0, 0, nil, nil,
fmt.Errorf("error reading location nextBytes numLocs: %v", err)
}
// skip over all the location bytes
_, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent)
if err != nil {
return 0, 0, 0, nil, nil, err
}
endLoc := len(i.currChunkLoc) - i.locReader.Len()
bytesLoc = i.currChunkLoc[startLoc:endLoc]
}
return docNum, freq, normBits, bytesFreqNorm, bytesLoc, nil
}
// nextDocNum returns the next docNum on the postings list, and also
// sets up the currChunk / loc related fields of the iterator.
func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool, error) {
if i.normBits1Hit != 0 {
if i.docNum1Hit == docNum1HitFinished {
return 0, false, nil
}
if i.docNum1Hit < atOrAfter {
// advanced past our 1-hit
i.docNum1Hit = docNum1HitFinished // consume our 1-hit docNum
return 0, false, nil
}
docNum := i.docNum1Hit
i.docNum1Hit = docNum1HitFinished // consume our 1-hit docNum
return docNum, true, nil
}
if i.Actual == nil || !i.Actual.HasNext() {
return 0, false, nil
}
n := i.Actual.Next()
for uint64(n) < atOrAfter && i.Actual.HasNext() {
n = i.Actual.Next()
}
if uint64(n) < atOrAfter {
// couldn't find anything
return 0, false, nil
}
allN := i.all.Next()
nChunk := n / i.postings.sb.chunkFactor
allNChunk := allN / i.postings.sb.chunkFactor
// n is the next actual hit (excluding some postings), and
// allN is the next hit in the full postings, and
// if they don't match, move 'all' forwards until they do
for allN != n {
// in the same chunk, so move the freq/norm/loc decoders forward
if i.includeFreqNorm && allNChunk == nChunk {
if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
err := i.loadChunk(int(nChunk))
if err != nil {
return 0, false, fmt.Errorf("error loading chunk: %v", err)
}
}
// read off freq/offsets even though we don't care about them
_, _, hasLocs, err := i.readFreqNormHasLocs()
if err != nil {
return 0, false, err
}
if i.includeLocs && hasLocs {
numLocsBytes, err := binary.ReadUvarint(i.locReader)
if err != nil {
return 0, false, fmt.Errorf("error reading location numLocsBytes: %v", err)
}
// skip over all the location bytes
_, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent)
if err != nil {
return 0, false, err
}
}
}
allN = i.all.Next()
allNChunk = allN / i.postings.sb.chunkFactor
}
if i.includeFreqNorm && (i.currChunk != nChunk || i.currChunkFreqNorm == nil) {
err := i.loadChunk(int(nChunk))
if err != nil {
return 0, false, fmt.Errorf("error loading chunk: %v", err)
}
}
return uint64(n), true, nil
}
// Posting is a single entry in a postings list
type Posting struct {
docNum uint64
freq uint64
norm float32
locs []segment.Location
}
func (p *Posting) Size() int {
sizeInBytes := reflectStaticSizePosting
for _, entry := range p.locs {
sizeInBytes += entry.Size()
}
return sizeInBytes
}
// Number returns the document number of this posting in this segment
func (p *Posting) Number() uint64 {
return p.docNum
}
// Frequency returns the frequence of occurance of this term in this doc/field
func (p *Posting) Frequency() uint64 {
return p.freq
}
// Norm returns the normalization factor for this posting
func (p *Posting) Norm() float64 {
return float64(p.norm)
}
// Locations returns the location information for each occurance
func (p *Posting) Locations() []segment.Location {
return p.locs
}
// Location represents the location of a single occurance
type Location struct {
field string
pos uint64
start uint64
end uint64
ap []uint64
}
func (l *Location) Size() int {
return reflectStaticSizeLocation +
len(l.field) +
len(l.ap)*size.SizeOfUint64
}
// Field returns the name of the field (useful in composite fields to know
// which original field the value came from)
func (l *Location) Field() string {
return l.field
}
// Start returns the start byte offset of this occurance
func (l *Location) Start() uint64 {
return l.start
}
// End returns the end byte offset of this occurance
func (l *Location) End() uint64 {
return l.end
}
// Pos returns the 1-based phrase position of this occurance
func (l *Location) Pos() uint64 {
return l.pos
}
// ArrayPositions returns the array position vector associated with this occurance
func (l *Location) ArrayPositions() []uint64 {
return l.ap
}

View file

@ -1,43 +0,0 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package zap
import "encoding/binary"
func (s *SegmentBase) getDocStoredMetaAndCompressed(docNum uint64) ([]byte, []byte) {
_, storedOffset, n, metaLen, dataLen := s.getDocStoredOffsets(docNum)
meta := s.mem[storedOffset+n : storedOffset+n+metaLen]
data := s.mem[storedOffset+n+metaLen : storedOffset+n+metaLen+dataLen]
return meta, data
}
func (s *SegmentBase) getDocStoredOffsets(docNum uint64) (
uint64, uint64, uint64, uint64, uint64) {
indexOffset := s.storedIndexOffset + (8 * docNum)
storedOffset := binary.BigEndian.Uint64(s.mem[indexOffset : indexOffset+8])
var n uint64
metaLen, read := binary.Uvarint(s.mem[storedOffset : storedOffset+binary.MaxVarintLen64])
n += uint64(read)
dataLen, read := binary.Uvarint(s.mem[storedOffset+n : storedOffset+n+binary.MaxVarintLen64])
n += uint64(read)
return indexOffset, storedOffset, n, metaLen, dataLen
}

View file

@ -1,540 +0,0 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package zap
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"os"
"reflect"
"sync"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/size"
"github.com/couchbase/vellum"
mmap "github.com/edsrzf/mmap-go"
"github.com/golang/snappy"
)
var reflectStaticSizeSegmentBase int
func init() {
var sb SegmentBase
reflectStaticSizeSegmentBase = int(reflect.TypeOf(sb).Size())
}
// Open returns a zap impl of a segment
func Open(path string) (segment.Segment, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
mm, err := mmap.Map(f, mmap.RDONLY, 0)
if err != nil {
// mmap failed, try to close the file
_ = f.Close()
return nil, err
}
rv := &Segment{
SegmentBase: SegmentBase{
mem: mm[0 : len(mm)-FooterSize],
fieldsMap: make(map[string]uint16),
fieldDvReaders: make(map[uint16]*docValueReader),
},
f: f,
mm: mm,
path: path,
refs: 1,
}
rv.SegmentBase.updateSize()
err = rv.loadConfig()
if err != nil {
_ = rv.Close()
return nil, err
}
err = rv.loadFields()
if err != nil {
_ = rv.Close()
return nil, err
}
err = rv.loadDvReaders()
if err != nil {
_ = rv.Close()
return nil, err
}
return rv, nil
}
// SegmentBase is a memory only, read-only implementation of the
// segment.Segment interface, using zap's data representation.
type SegmentBase struct {
mem []byte
memCRC uint32
chunkFactor uint32
fieldsMap map[string]uint16 // fieldName -> fieldID+1
fieldsInv []string // fieldID -> fieldName
numDocs uint64
storedIndexOffset uint64
fieldsIndexOffset uint64
docValueOffset uint64
dictLocs []uint64
fieldDvReaders map[uint16]*docValueReader // naive chunk cache per field
fieldDvNames []string // field names cached in fieldDvReaders
size uint64
}
func (sb *SegmentBase) Size() int {
return int(sb.size)
}
func (sb *SegmentBase) updateSize() {
sizeInBytes := reflectStaticSizeSegmentBase +
cap(sb.mem)
// fieldsMap
for k, _ := range sb.fieldsMap {
sizeInBytes += (len(k) + size.SizeOfString) + size.SizeOfUint16
}
// fieldsInv, dictLocs
for _, entry := range sb.fieldsInv {
sizeInBytes += len(entry) + size.SizeOfString
}
sizeInBytes += len(sb.dictLocs) * size.SizeOfUint64
// fieldDvReaders
for _, v := range sb.fieldDvReaders {
sizeInBytes += size.SizeOfUint16 + size.SizeOfPtr
if v != nil {
sizeInBytes += v.size()
}
}
sb.size = uint64(sizeInBytes)
}
func (sb *SegmentBase) AddRef() {}
func (sb *SegmentBase) DecRef() (err error) { return nil }
func (sb *SegmentBase) Close() (err error) { return nil }
// Segment implements a persisted segment.Segment interface, by
// embedding an mmap()'ed SegmentBase.
type Segment struct {
SegmentBase
f *os.File
mm mmap.MMap
path string
version uint32
crc uint32
m sync.Mutex // Protects the fields that follow.
refs int64
}
func (s *Segment) Size() int {
// 8 /* size of file pointer */
// 4 /* size of version -> uint32 */
// 4 /* size of crc -> uint32 */
sizeOfUints := 16
sizeInBytes := (len(s.path) + size.SizeOfString) + sizeOfUints
// mutex, refs -> int64
sizeInBytes += 16
// do not include the mmap'ed part
return sizeInBytes + s.SegmentBase.Size() - cap(s.mem)
}
func (s *Segment) AddRef() {
s.m.Lock()
s.refs++
s.m.Unlock()
}
func (s *Segment) DecRef() (err error) {
s.m.Lock()
s.refs--
if s.refs == 0 {
err = s.closeActual()
}
s.m.Unlock()
return err
}
func (s *Segment) loadConfig() error {
crcOffset := len(s.mm) - 4
s.crc = binary.BigEndian.Uint32(s.mm[crcOffset : crcOffset+4])
verOffset := crcOffset - 4
s.version = binary.BigEndian.Uint32(s.mm[verOffset : verOffset+4])
if s.version != Version {
return fmt.Errorf("unsupported version %d", s.version)
}
chunkOffset := verOffset - 4
s.chunkFactor = binary.BigEndian.Uint32(s.mm[chunkOffset : chunkOffset+4])
docValueOffset := chunkOffset - 8
s.docValueOffset = binary.BigEndian.Uint64(s.mm[docValueOffset : docValueOffset+8])
fieldsIndexOffset := docValueOffset - 8
s.fieldsIndexOffset = binary.BigEndian.Uint64(s.mm[fieldsIndexOffset : fieldsIndexOffset+8])
storedIndexOffset := fieldsIndexOffset - 8
s.storedIndexOffset = binary.BigEndian.Uint64(s.mm[storedIndexOffset : storedIndexOffset+8])
numDocsOffset := storedIndexOffset - 8
s.numDocs = binary.BigEndian.Uint64(s.mm[numDocsOffset : numDocsOffset+8])
return nil
}
func (s *SegmentBase) loadFields() error {
// NOTE for now we assume the fields index immediately preceeds
// the footer, and if this changes, need to adjust accordingly (or
// store explicit length), where s.mem was sliced from s.mm in Open().
fieldsIndexEnd := uint64(len(s.mem))
// iterate through fields index
var fieldID uint64
for s.fieldsIndexOffset+(8*fieldID) < fieldsIndexEnd {
addr := binary.BigEndian.Uint64(s.mem[s.fieldsIndexOffset+(8*fieldID) : s.fieldsIndexOffset+(8*fieldID)+8])
dictLoc, read := binary.Uvarint(s.mem[addr:fieldsIndexEnd])
n := uint64(read)
s.dictLocs = append(s.dictLocs, dictLoc)
var nameLen uint64
nameLen, read = binary.Uvarint(s.mem[addr+n : fieldsIndexEnd])
n += uint64(read)
name := string(s.mem[addr+n : addr+n+nameLen])
s.fieldsInv = append(s.fieldsInv, name)
s.fieldsMap[name] = uint16(fieldID + 1)
fieldID++
}
return nil
}
// Dictionary returns the term dictionary for the specified field
func (s *SegmentBase) Dictionary(field string) (segment.TermDictionary, error) {
dict, err := s.dictionary(field)
if err == nil && dict == nil {
return &segment.EmptyDictionary{}, nil
}
return dict, err
}
func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
fieldIDPlus1 := sb.fieldsMap[field]
if fieldIDPlus1 > 0 {
rv = &Dictionary{
sb: sb,
field: field,
fieldID: fieldIDPlus1 - 1,
}
dictStart := sb.dictLocs[rv.fieldID]
if dictStart > 0 {
// read the length of the vellum data
vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64])
fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
if fstBytes != nil {
rv.fst, err = vellum.Load(fstBytes)
if err != nil {
return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err)
}
}
}
}
return rv, nil
}
// visitDocumentCtx holds data structures that are reusable across
// multiple VisitDocument() calls to avoid memory allocations
type visitDocumentCtx struct {
buf []byte
reader bytes.Reader
arrayPos []uint64
}
var visitDocumentCtxPool = sync.Pool{
New: func() interface{} {
reuse := &visitDocumentCtx{}
return reuse
},
}
// VisitDocument invokes the DocFieldValueVistor for each stored field
// for the specified doc number
func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx)
defer visitDocumentCtxPool.Put(vdc)
return s.visitDocument(vdc, num, visitor)
}
func (s *SegmentBase) visitDocument(vdc *visitDocumentCtx, num uint64,
visitor segment.DocumentFieldValueVisitor) error {
// first make sure this is a valid number in this segment
if num < s.numDocs {
meta, compressed := s.getDocStoredMetaAndCompressed(num)
vdc.reader.Reset(meta)
// handle _id field special case
idFieldValLen, err := binary.ReadUvarint(&vdc.reader)
if err != nil {
return err
}
idFieldVal := compressed[:idFieldValLen]
keepGoing := visitor("_id", byte('t'), idFieldVal, nil)
if !keepGoing {
visitDocumentCtxPool.Put(vdc)
return nil
}
// handle non-"_id" fields
compressed = compressed[idFieldValLen:]
uncompressed, err := snappy.Decode(vdc.buf[:cap(vdc.buf)], compressed)
if err != nil {
return err
}
for keepGoing {
field, err := binary.ReadUvarint(&vdc.reader)
if err == io.EOF {
break
}
if err != nil {
return err
}
typ, err := binary.ReadUvarint(&vdc.reader)
if err != nil {
return err
}
offset, err := binary.ReadUvarint(&vdc.reader)
if err != nil {
return err
}
l, err := binary.ReadUvarint(&vdc.reader)
if err != nil {
return err
}
numap, err := binary.ReadUvarint(&vdc.reader)
if err != nil {
return err
}
var arrayPos []uint64
if numap > 0 {
if cap(vdc.arrayPos) < int(numap) {
vdc.arrayPos = make([]uint64, numap)
}
arrayPos = vdc.arrayPos[:numap]
for i := 0; i < int(numap); i++ {
ap, err := binary.ReadUvarint(&vdc.reader)
if err != nil {
return err
}
arrayPos[i] = ap
}
}
value := uncompressed[offset : offset+l]
keepGoing = visitor(s.fieldsInv[field], byte(typ), value, arrayPos)
}
vdc.buf = uncompressed
}
return nil
}
// DocID returns the value of the _id field for the given docNum
func (s *SegmentBase) DocID(num uint64) ([]byte, error) {
if num >= s.numDocs {
return nil, nil
}
vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx)
meta, compressed := s.getDocStoredMetaAndCompressed(num)
vdc.reader.Reset(meta)
// handle _id field special case
idFieldValLen, err := binary.ReadUvarint(&vdc.reader)
if err != nil {
return nil, err
}
idFieldVal := compressed[:idFieldValLen]
visitDocumentCtxPool.Put(vdc)
return idFieldVal, nil
}
// Count returns the number of documents in this segment.
func (s *SegmentBase) Count() uint64 {
return s.numDocs
}
// DocNumbers returns a bitset corresponding to the doc numbers of all the
// provided _id strings
func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
rv := roaring.New()
if len(s.fieldsMap) > 0 {
idDict, err := s.dictionary("_id")
if err != nil {
return nil, err
}
postingsList := emptyPostingsList
for _, id := range ids {
postingsList, err = idDict.postingsList([]byte(id), nil, postingsList)
if err != nil {
return nil, err
}
postingsList.OrInto(rv)
}
}
return rv, nil
}
// Fields returns the field names used in this segment
func (s *SegmentBase) Fields() []string {
return s.fieldsInv
}
// Path returns the path of this segment on disk
func (s *Segment) Path() string {
return s.path
}
// Close releases all resources associated with this segment
func (s *Segment) Close() (err error) {
return s.DecRef()
}
func (s *Segment) closeActual() (err error) {
if s.mm != nil {
err = s.mm.Unmap()
}
// try to close file even if unmap failed
if s.f != nil {
err2 := s.f.Close()
if err == nil {
// try to return first error
err = err2
}
}
return
}
// some helpers i started adding for the command-line utility
// Data returns the underlying mmaped data slice
func (s *Segment) Data() []byte {
return s.mm
}
// CRC returns the CRC value stored in the file footer
func (s *Segment) CRC() uint32 {
return s.crc
}
// Version returns the file version in the file footer
func (s *Segment) Version() uint32 {
return s.version
}
// ChunkFactor returns the chunk factor in the file footer
func (s *Segment) ChunkFactor() uint32 {
return s.chunkFactor
}
// FieldsIndexOffset returns the fields index offset in the file footer
func (s *Segment) FieldsIndexOffset() uint64 {
return s.fieldsIndexOffset
}
// StoredIndexOffset returns the stored value index offset in the file footer
func (s *Segment) StoredIndexOffset() uint64 {
return s.storedIndexOffset
}
// DocValueOffset returns the docValue offset in the file footer
func (s *Segment) DocValueOffset() uint64 {
return s.docValueOffset
}
// NumDocs returns the number of documents in the file footer
func (s *Segment) NumDocs() uint64 {
return s.numDocs
}
// DictAddr is a helper function to compute the file offset where the
// dictionary is stored for the specified field.
func (s *Segment) DictAddr(field string) (uint64, error) {
fieldIDPlus1, ok := s.fieldsMap[field]
if !ok {
return 0, fmt.Errorf("no such field '%s'", field)
}
return s.dictLocs[fieldIDPlus1-1], nil
}
func (s *SegmentBase) loadDvReaders() error {
if s.docValueOffset == fieldNotUninverted {
return nil
}
var read uint64
for fieldID, field := range s.fieldsInv {
var fieldLocStart, fieldLocEnd uint64
var n int
fieldLocStart, n = binary.Uvarint(s.mem[s.docValueOffset+read : s.docValueOffset+read+binary.MaxVarintLen64])
if n <= 0 {
return fmt.Errorf("loadDvReaders: failed to read the docvalue offset start for field %d", fieldID)
}
read += uint64(n)
fieldLocEnd, n = binary.Uvarint(s.mem[s.docValueOffset+read : s.docValueOffset+read+binary.MaxVarintLen64])
if n <= 0 {
return fmt.Errorf("loadDvReaders: failed to read the docvalue offset end for field %d", fieldID)
}
read += uint64(n)
fieldDvReader, _ := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd)
if fieldDvReader != nil {
s.fieldDvReaders[uint16(fieldID)] = fieldDvReader
s.fieldDvNames = append(s.fieldDvNames, field)
}
}
return nil
}

View file

@ -1,145 +0,0 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package zap
import (
"encoding/binary"
"io"
"github.com/RoaringBitmap/roaring"
)
// writes out the length of the roaring bitmap in bytes as varint
// then writes out the roaring bitmap itself
func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer,
reuseBufVarint []byte) (int, error) {
buf, err := r.ToBytes()
if err != nil {
return 0, err
}
var tw int
// write out the length
n := binary.PutUvarint(reuseBufVarint, uint64(len(buf)))
nw, err := w.Write(reuseBufVarint[:n])
tw += nw
if err != nil {
return tw, err
}
// write out the roaring bytes
nw, err = w.Write(buf)
tw += nw
if err != nil {
return tw, err
}
return tw, nil
}
func persistFields(fieldsInv []string, w *CountHashWriter, dictLocs []uint64) (uint64, error) {
var rv uint64
var fieldsOffsets []uint64
for fieldID, fieldName := range fieldsInv {
// record start of this field
fieldsOffsets = append(fieldsOffsets, uint64(w.Count()))
// write out the dict location and field name length
_, err := writeUvarints(w, dictLocs[fieldID], uint64(len(fieldName)))
if err != nil {
return 0, err
}
// write out the field name
_, err = w.Write([]byte(fieldName))
if err != nil {
return 0, err
}
}
// now write out the fields index
rv = uint64(w.Count())
for fieldID := range fieldsInv {
err := binary.Write(w, binary.BigEndian, fieldsOffsets[fieldID])
if err != nil {
return 0, err
}
}
return rv, nil
}
// FooterSize is the size of the footer record in bytes
// crc + ver + chunk + field offset + stored offset + num docs + docValueOffset
const FooterSize = 4 + 4 + 4 + 8 + 8 + 8 + 8
func persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64,
chunkFactor uint32, crcBeforeFooter uint32, writerIn io.Writer) error {
w := NewCountHashWriter(writerIn)
w.crc = crcBeforeFooter
// write out the number of docs
err := binary.Write(w, binary.BigEndian, numDocs)
if err != nil {
return err
}
// write out the stored field index location:
err = binary.Write(w, binary.BigEndian, storedIndexOffset)
if err != nil {
return err
}
// write out the field index location
err = binary.Write(w, binary.BigEndian, fieldsIndexOffset)
if err != nil {
return err
}
// write out the fieldDocValue location
err = binary.Write(w, binary.BigEndian, docValueOffset)
if err != nil {
return err
}
// write out 32-bit chunk factor
err = binary.Write(w, binary.BigEndian, chunkFactor)
if err != nil {
return err
}
// write out 32-bit version
err = binary.Write(w, binary.BigEndian, Version)
if err != nil {
return err
}
// write out CRC-32 of everything upto but not including this CRC
err = binary.Write(w, binary.BigEndian, w.crc)
if err != nil {
return err
}
return nil
}
func writeUvarints(w io.Writer, vals ...uint64) (tw int, err error) {
buf := make([]byte, binary.MaxVarintLen64)
for _, val := range vals {
n := binary.PutUvarint(buf, val)
var nw int
nw, err = w.Write(buf[:n])
tw += nw
if err != nil {
return tw, err
}
}
return tw, err
}

View file

@ -0,0 +1,77 @@
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"fmt"
"github.com/blevesearch/bleve/index/scorch/segment"
zapv11 "github.com/blevesearch/zap/v11"
zapv12 "github.com/blevesearch/zap/v12"
)
var supportedSegmentPlugins map[string]map[uint32]segment.Plugin
var defaultSegmentPlugin segment.Plugin
func init() {
ResetPlugins()
RegisterPlugin(zapv12.Plugin(), false)
RegisterPlugin(zapv11.Plugin(), true)
}
func ResetPlugins() {
supportedSegmentPlugins = map[string]map[uint32]segment.Plugin{}
}
func RegisterPlugin(plugin segment.Plugin, makeDefault bool) {
if _, ok := supportedSegmentPlugins[plugin.Type()]; !ok {
supportedSegmentPlugins[plugin.Type()] = map[uint32]segment.Plugin{}
}
supportedSegmentPlugins[plugin.Type()][plugin.Version()] = plugin
if makeDefault {
defaultSegmentPlugin = plugin
}
}
func SupportedSegmentTypes() (rv []string) {
for k := range supportedSegmentPlugins {
rv = append(rv, k)
}
return
}
func SupportedSegmentTypeVersions(typ string) (rv []uint32) {
for k := range supportedSegmentPlugins[typ] {
rv = append(rv, k)
}
return rv
}
func (s *Scorch) loadSegmentPlugin(forcedSegmentType string,
forcedSegmentVersion uint32) error {
if versions, ok := supportedSegmentPlugins[forcedSegmentType]; ok {
if segPlugin, ok := versions[uint32(forcedSegmentVersion)]; ok {
s.segPlugin = segPlugin
return nil
}
return fmt.Errorf(
"unsupported version %d for segment type: %s, supported: %v",
forcedSegmentVersion, forcedSegmentType,
SupportedSegmentTypeVersions(forcedSegmentType))
}
return fmt.Errorf("unsupported segment type: %s, supported: %v",
forcedSegmentType, SupportedSegmentTypes())
}

View file

@ -27,9 +27,15 @@ import (
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/couchbase/vellum"
lev "github.com/couchbase/vellum/levenshtein"
)
// re usable, threadsafe levenshtein builders
var lb1, lb2 *lev.LevenshteinAutomatonBuilder
type asynchSegmentResult struct {
dict segment.TermDictionary
dictItr segment.DictionaryIterator
index int
@ -45,6 +51,15 @@ var reflectStaticSizeIndexSnapshot int
func init() {
var is interface{} = IndexSnapshot{}
reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size())
var err error
lb1, err = lev.NewLevenshteinAutomatonBuilder(1, true)
if err != nil {
panic(fmt.Errorf("Levenshtein automaton ed1 builder err: %v", err))
}
lb2, err = lev.NewLevenshteinAutomatonBuilder(2, true)
if err != nil {
panic(fmt.Errorf("Levenshtein automaton ed2 builder err: %v", err))
}
}
type IndexSnapshot struct {
@ -59,9 +74,8 @@ type IndexSnapshot struct {
m sync.Mutex // Protects the fields that follow.
refs int64
m2 sync.Mutex // Protects the fields that follow.
fieldTFRs map[string][]*IndexSnapshotTermFieldReader // keyed by field, recycled TFR's
fieldDicts map[string][]segment.TermDictionary // keyed by field, recycled dicts
m2 sync.Mutex // Protects the fields that follow.
fieldTFRs map[string][]*IndexSnapshotTermFieldReader // keyed by field, recycled TFR's
}
func (i *IndexSnapshot) Segments() []*SegmentSnapshot {
@ -113,16 +127,22 @@ func (i *IndexSnapshot) updateSize() {
}
}
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string,
makeItr func(i segment.TermDictionary) segment.DictionaryIterator,
randomLookup bool) (*IndexSnapshotFieldDict, error) {
results := make(chan *asynchSegmentResult)
for index, segment := range i.segment {
go func(index int, segment *SegmentSnapshot) {
dict, err := segment.Dictionary(field)
dict, err := segment.segment.Dictionary(field)
if err != nil {
results <- &asynchSegmentResult{err: err}
} else {
results <- &asynchSegmentResult{dictItr: makeItr(dict)}
if randomLookup {
results <- &asynchSegmentResult{dict: dict}
} else {
results <- &asynchSegmentResult{dictItr: makeItr(dict)}
}
}
}(index, segment)
}
@ -137,14 +157,20 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
if asr.err != nil && err == nil {
err = asr.err
} else {
next, err2 := asr.dictItr.Next()
if err2 != nil && err == nil {
err = err2
}
if next != nil {
if !randomLookup {
next, err2 := asr.dictItr.Next()
if err2 != nil && err == nil {
err = err2
}
if next != nil {
rv.cursors = append(rv.cursors, &segmentDictCursor{
itr: asr.dictItr,
curr: *next,
})
}
} else {
rv.cursors = append(rv.cursors, &segmentDictCursor{
itr: asr.dictItr,
curr: *next,
dict: asr.dict,
})
}
}
@ -153,8 +179,11 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
if err != nil {
return nil, err
}
// prepare heap
heap.Init(rv)
if !randomLookup {
// prepare heap
heap.Init(rv)
}
return rv, nil
}
@ -162,42 +191,75 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.Iterator()
})
}, false)
}
func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte,
endTerm []byte) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.RangeIterator(string(startTerm), string(endTerm))
})
}, false)
}
func (i *IndexSnapshot) FieldDictPrefix(field string,
termPrefix []byte) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.PrefixIterator(string(termPrefix))
})
}, false)
}
func (i *IndexSnapshot) FieldDictRegexp(field string,
termRegex []byte) (index.FieldDict, error) {
termRegex string) (index.FieldDict, error) {
// TODO: potential optimization where the literal prefix represents the,
// entire regexp, allowing us to use PrefixIterator(prefixTerm)?
a, prefixBeg, prefixEnd, err := segment.ParseRegexp(termRegex)
if err != nil {
return nil, err
}
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.RegexpIterator(string(termRegex))
})
return i.AutomatonIterator(a, prefixBeg, prefixEnd)
}, false)
}
func (i *IndexSnapshot) getLevAutomaton(term string,
fuzziness uint8) (vellum.Automaton, error) {
if fuzziness == 1 {
return lb1.BuildDfa(term, fuzziness)
} else if fuzziness == 2 {
return lb2.BuildDfa(term, fuzziness)
}
return nil, fmt.Errorf("fuzziness exceeds the max limit")
}
func (i *IndexSnapshot) FieldDictFuzzy(field string,
term []byte, fuzziness int) (index.FieldDict, error) {
term string, fuzziness int, prefix string) (index.FieldDict, error) {
a, err := i.getLevAutomaton(term, uint8(fuzziness))
if err != nil {
return nil, err
}
var prefixBeg, prefixEnd []byte
if prefix != "" {
prefixBeg = []byte(prefix)
prefixEnd = segment.IncrementBytes(prefixBeg)
}
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.FuzzyIterator(string(term), fuzziness)
})
return i.AutomatonIterator(a, prefixBeg, prefixEnd)
}, false)
}
func (i *IndexSnapshot) FieldDictOnly(field string,
onlyTerms [][]byte, includeCount bool) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.OnlyIterator(onlyTerms, includeCount)
})
}, false)
}
func (i *IndexSnapshot) FieldDictContains(field string) (index.FieldDictContains, error) {
return i.newIndexSnapshotFieldDict(field, nil, true)
}
func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
@ -393,8 +455,8 @@ func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err err
}
func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
includeNorm, includeTermVectors bool) (tfr index.TermFieldReader, err error) {
rv, dicts := i.allocTermFieldReaderDicts(field)
includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
rv := i.allocTermFieldReaderDicts(field)
rv.term = term
rv.field = field
@ -412,20 +474,19 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
rv.currPosting = nil
rv.currID = rv.currID[:0]
if dicts == nil {
dicts = make([]segment.TermDictionary, len(i.segment))
if rv.dicts == nil {
rv.dicts = make([]segment.TermDictionary, len(i.segment))
for i, segment := range i.segment {
dict, err := segment.Dictionary(field)
dict, err := segment.segment.Dictionary(field)
if err != nil {
return nil, err
}
dicts[i] = dict
rv.dicts[i] = dict
}
}
rv.dicts = dicts
for i := range i.segment {
pl, err := dicts[i].PostingsList(term, nil, rv.postings[i])
for i, segment := range i.segment {
pl, err := rv.dicts[i].PostingsList(term, segment.deleted, rv.postings[i])
if err != nil {
return nil, err
}
@ -436,37 +497,37 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
return rv, nil
}
func (i *IndexSnapshot) allocTermFieldReaderDicts(field string) (
tfr *IndexSnapshotTermFieldReader, dicts []segment.TermDictionary) {
func (i *IndexSnapshot) allocTermFieldReaderDicts(field string) (tfr *IndexSnapshotTermFieldReader) {
i.m2.Lock()
if i.fieldDicts != nil {
dicts = i.fieldDicts[field]
}
if i.fieldTFRs != nil {
tfrs := i.fieldTFRs[field]
last := len(tfrs) - 1
if last >= 0 {
rv := tfrs[last]
tfr = tfrs[last]
tfrs[last] = nil
i.fieldTFRs[field] = tfrs[:last]
i.m2.Unlock()
return rv, dicts
return
}
}
i.m2.Unlock()
return &IndexSnapshotTermFieldReader{}, dicts
return &IndexSnapshotTermFieldReader{}
}
func (i *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader) {
i.parent.rootLock.RLock()
obsolete := i.parent.root != i
i.parent.rootLock.RUnlock()
if obsolete {
// if we're not the current root (mutations happened), don't bother recycling
return
}
i.m2.Lock()
if i.fieldTFRs == nil {
i.fieldTFRs = map[string][]*IndexSnapshotTermFieldReader{}
}
i.fieldTFRs[tfr.field] = append(i.fieldTFRs[tfr.field], tfr)
if i.fieldDicts == nil {
i.fieldDicts = map[string][]segment.TermDictionary{}
}
i.fieldDicts[tfr.field] = tfr.dicts
i.m2.Unlock()
}
@ -636,7 +697,7 @@ func (i *IndexSnapshot) DumpFields() chan interface{} {
// subtractStrings returns set a minus elements of set b.
func subtractStrings(a, b []string) []string {
if len(b) <= 0 {
if len(b) == 0 {
return a
}

View file

@ -22,6 +22,7 @@ import (
)
type segmentDictCursor struct {
dict segment.TermDictionary
itr segment.DictionaryIterator
curr index.DictEntry
}
@ -52,7 +53,7 @@ func (i *IndexSnapshotFieldDict) Pop() interface{} {
}
func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
if len(i.cursors) <= 0 {
if len(i.cursors) == 0 {
return nil, nil
}
i.entry = i.cursors[0].curr
@ -91,3 +92,17 @@ func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
func (i *IndexSnapshotFieldDict) Close() error {
return nil
}
func (i *IndexSnapshotFieldDict) Contains(key []byte) (bool, error) {
if len(i.cursors) == 0 {
return false, nil
}
for _, cursor := range i.cursors {
if found, _ := cursor.dict.Contains(key); found {
return true, nil
}
}
return false, nil
}

View file

@ -74,7 +74,7 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in
rv = &index.TermFieldDoc{}
}
// find the next hit
for i.segmentOffset < len(i.postings) {
for i.segmentOffset < len(i.iterators) {
next, err := i.iterators[i.segmentOffset].Next()
if err != nil {
return nil, err

View file

@ -17,9 +17,10 @@ package scorch
import (
"fmt"
"log"
"os"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/boltdb/bolt"
bolt "go.etcd.io/bbolt"
)
type RollbackPoint struct {
@ -34,13 +35,22 @@ func (r *RollbackPoint) GetInternal(key []byte) []byte {
// RollbackPoints returns an array of rollback points available for
// the application to rollback to, with more recent rollback points
// (higher epochs) coming first.
func (s *Scorch) RollbackPoints() ([]*RollbackPoint, error) {
if s.rootBolt == nil {
return nil, fmt.Errorf("RollbackPoints: root is nil")
func RollbackPoints(path string) ([]*RollbackPoint, error) {
if len(path) == 0 {
return nil, fmt.Errorf("RollbackPoints: invalid path")
}
rootBoltPath := path + string(os.PathSeparator) + "root.bolt"
rootBoltOpt := &bolt.Options{
ReadOnly: true,
}
rootBolt, err := bolt.Open(rootBoltPath, 0600, rootBoltOpt)
if err != nil || rootBolt == nil {
return nil, err
}
// start a read-only bolt transaction
tx, err := s.rootBolt.Begin(false)
tx, err := rootBolt.Begin(false)
if err != nil {
return nil, fmt.Errorf("RollbackPoints: failed to start" +
" read-only transaction")
@ -49,6 +59,7 @@ func (s *Scorch) RollbackPoints() ([]*RollbackPoint, error) {
// read-only bolt transactions to be rolled back
defer func() {
_ = tx.Rollback()
_ = rootBolt.Close()
}()
snapshots := tx.Bucket(boltSnapshotsBucket)
@ -105,69 +116,98 @@ func (s *Scorch) RollbackPoints() ([]*RollbackPoint, error) {
return rollbackPoints, nil
}
// Rollback atomically and durably (if unsafeBatch is unset) brings
// the store back to the point in time as represented by the
// RollbackPoint. Rollback() should only be passed a RollbackPoint
// that came from the same store using the RollbackPoints() API.
func (s *Scorch) Rollback(to *RollbackPoint) error {
// Rollback atomically and durably brings the store back to the point
// in time as represented by the RollbackPoint.
// Rollback() should only be passed a RollbackPoint that came from the
// same store using the RollbackPoints() API along with the index path.
func Rollback(path string, to *RollbackPoint) error {
if to == nil {
return fmt.Errorf("Rollback: RollbackPoint is nil")
}
if s.rootBolt == nil {
return fmt.Errorf("Rollback: root is nil")
if len(path) == 0 {
return fmt.Errorf("Rollback: index path is empty")
}
revert := &snapshotReversion{}
rootBoltPath := path + string(os.PathSeparator) + "root.bolt"
rootBoltOpt := &bolt.Options{
ReadOnly: false,
}
rootBolt, err := bolt.Open(rootBoltPath, 0600, rootBoltOpt)
if err != nil || rootBolt == nil {
return err
}
defer func() {
err1 := rootBolt.Close()
if err1 != nil && err == nil {
err = err1
}
}()
s.rootLock.Lock()
err := s.rootBolt.View(func(tx *bolt.Tx) error {
// pick all the younger persisted epochs in bolt store
// including the target one.
var found bool
var eligibleEpochs []uint64
err = rootBolt.View(func(tx *bolt.Tx) error {
snapshots := tx.Bucket(boltSnapshotsBucket)
if snapshots == nil {
return fmt.Errorf("Rollback: no snapshots available")
return nil
}
pos := segment.EncodeUvarintAscending(nil, to.epoch)
snapshot := snapshots.Bucket(pos)
if snapshot == nil {
return fmt.Errorf("Rollback: snapshot not found")
sc := snapshots.Cursor()
for sk, _ := sc.Last(); sk != nil && !found; sk, _ = sc.Prev() {
_, snapshotEpoch, err := segment.DecodeUvarintAscending(sk)
if err != nil {
continue
}
if snapshotEpoch == to.epoch {
found = true
}
eligibleEpochs = append(eligibleEpochs, snapshotEpoch)
}
indexSnapshot, err := s.loadSnapshot(snapshot)
if err != nil {
return fmt.Errorf("Rollback: unable to load snapshot: %v", err)
}
// add segments referenced by loaded index snapshot to the
// ineligibleForRemoval map
for _, segSnap := range indexSnapshot.segment {
filename := zapFileName(segSnap.id)
s.ineligibleForRemoval[filename] = true
}
revert.snapshot = indexSnapshot
revert.applied = make(chan error)
revert.persisted = make(chan error)
return nil
})
s.rootLock.Unlock()
if len(eligibleEpochs) == 0 {
return fmt.Errorf("Rollback: no persisted epochs found in bolt")
}
if !found {
return fmt.Errorf("Rollback: target epoch %d not found in bolt", to.epoch)
}
// start a write transaction
tx, err := rootBolt.Begin(true)
if err != nil {
return err
}
// introduce the reversion
s.revertToSnapshots <- revert
defer func() {
if err == nil {
err = tx.Commit()
} else {
_ = tx.Rollback()
}
if err == nil {
err = rootBolt.Sync()
}
}()
// block until this snapshot is applied
err = <-revert.applied
if err != nil {
return fmt.Errorf("Rollback: failed with err: %v", err)
snapshots := tx.Bucket(boltSnapshotsBucket)
if snapshots == nil {
return nil
}
for _, epoch := range eligibleEpochs {
k := segment.EncodeUvarintAscending(nil, epoch)
if err != nil {
continue
}
if epoch == to.epoch {
// return here as it already processed until the given epoch
return nil
}
err = snapshots.DeleteBucket(k)
if err == bolt.ErrBucketNotFound {
err = nil
}
}
return <-revert.persisted
return err
}

View file

@ -29,43 +29,6 @@ var TermSeparator byte = 0xff
var TermSeparatorSplitSlice = []byte{TermSeparator}
type SegmentDictionarySnapshot struct {
s *SegmentSnapshot
d segment.TermDictionary
}
func (s *SegmentDictionarySnapshot) PostingsList(term []byte, except *roaring.Bitmap,
prealloc segment.PostingsList) (segment.PostingsList, error) {
// TODO: if except is non-nil, perhaps need to OR it with s.s.deleted?
return s.d.PostingsList(term, s.s.deleted, prealloc)
}
func (s *SegmentDictionarySnapshot) Iterator() segment.DictionaryIterator {
return s.d.Iterator()
}
func (s *SegmentDictionarySnapshot) PrefixIterator(prefix string) segment.DictionaryIterator {
return s.d.PrefixIterator(prefix)
}
func (s *SegmentDictionarySnapshot) RangeIterator(start, end string) segment.DictionaryIterator {
return s.d.RangeIterator(start, end)
}
func (s *SegmentDictionarySnapshot) RegexpIterator(regex string) segment.DictionaryIterator {
return s.d.RegexpIterator(regex)
}
func (s *SegmentDictionarySnapshot) FuzzyIterator(term string,
fuzziness int) segment.DictionaryIterator {
return s.d.FuzzyIterator(term, fuzziness)
}
func (s *SegmentDictionarySnapshot) OnlyIterator(onlyTerms [][]byte,
includeCount bool) segment.DictionaryIterator {
return s.d.OnlyIterator(onlyTerms, includeCount)
}
type SegmentSnapshot struct {
id uint64
segment segment.Segment
@ -115,17 +78,6 @@ func (s *SegmentSnapshot) Count() uint64 {
return rv
}
func (s *SegmentSnapshot) Dictionary(field string) (segment.TermDictionary, error) {
d, err := s.segment.Dictionary(field)
if err != nil {
return nil, err
}
return &SegmentDictionarySnapshot{
s: s,
d: d,
}, nil
}
func (s *SegmentSnapshot) DocNumbers(docIDs []string) (*roaring.Bitmap, error) {
rv, err := s.segment.DocNumbers(docIDs)
if err != nil {
@ -137,7 +89,7 @@ func (s *SegmentSnapshot) DocNumbers(docIDs []string) (*roaring.Bitmap, error) {
return rv, nil
}
// DocNumbersLive returns bitsit containing doc numbers for all live docs
// DocNumbersLive returns a bitmap containing doc numbers for all live docs
func (s *SegmentSnapshot) DocNumbersLive() *roaring.Bitmap {
rv := roaring.NewBitmap()
rv.AddRange(0, s.segment.Count())
@ -161,14 +113,29 @@ func (s *SegmentSnapshot) Size() (rv int) {
}
type cachedFieldDocs struct {
m sync.Mutex
readyCh chan struct{} // closed when the cachedFieldDocs.docs is ready to be used.
err error // Non-nil if there was an error when preparing this cachedFieldDocs.
docs map[uint64][]byte // Keyed by localDocNum, value is a list of terms delimited by 0xFF.
size uint64
}
func (cfd *cachedFieldDocs) Size() int {
var rv int
cfd.m.Lock()
for _, entry := range cfd.docs {
rv += 8 /* size of uint64 */ + len(entry)
}
cfd.m.Unlock()
return rv
}
func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) {
defer close(cfd.readyCh)
cfd.m.Lock()
defer func() {
close(cfd.readyCh)
cfd.m.Unlock()
}()
cfd.size += uint64(size.SizeOfUint64) /* size field */
dict, err := ss.segment.Dictionary(field)
@ -216,9 +183,9 @@ func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) {
}
type cachedDocs struct {
size uint64
m sync.Mutex // As the cache is asynchronously prepared, need a lock
cache map[string]*cachedFieldDocs // Keyed by field
size uint64
}
func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error {
@ -279,9 +246,7 @@ func (c *cachedDocs) updateSizeLOCKED() {
for k, v := range c.cache { // cachedFieldDocs
sizeInBytes += len(k)
if v != nil {
for _, entry := range v.docs { // docs
sizeInBytes += 8 /* size of uint64 */ + len(entry)
}
sizeInBytes += v.Size()
}
}
atomic.StoreUint64(&c.size, uint64(sizeInBytes))

View file

@ -69,11 +69,15 @@ type Stats struct {
TotPersistLoopEnd uint64
TotPersistedItems uint64
TotItemsToPersist uint64
TotPersistedSegments uint64
TotPersisterSlowMergerPause uint64
TotPersisterSlowMergerResume uint64
TotPersisterNapPauseCompleted uint64
TotPersisterMergerNapBreak uint64
TotFileMergeLoopBeg uint64
TotFileMergeLoopErr uint64
TotFileMergeLoopEnd uint64
@ -91,24 +95,32 @@ type Stats struct {
TotFileMergeSegmentsEmpty uint64
TotFileMergeSegments uint64
TotFileSegmentsAtRoot uint64
TotFileMergeWrittenBytes uint64
TotFileMergeZapBeg uint64
TotFileMergeZapEnd uint64
TotFileMergeZapTime uint64
MaxFileMergeZapTime uint64
TotFileMergeZapBeg uint64
TotFileMergeZapEnd uint64
TotFileMergeZapTime uint64
MaxFileMergeZapTime uint64
TotFileMergeZapIntroductionTime uint64
MaxFileMergeZapIntroductionTime uint64
TotFileMergeIntroductions uint64
TotFileMergeIntroductionsDone uint64
TotFileMergeIntroductions uint64
TotFileMergeIntroductionsDone uint64
TotFileMergeIntroductionsSkipped uint64
TotMemMergeBeg uint64
TotMemMergeErr uint64
TotMemMergeDone uint64
TotMemMergeZapBeg uint64
TotMemMergeZapEnd uint64
TotMemMergeZapTime uint64
MaxMemMergeZapTime uint64
TotMemMergeSegments uint64
CurFilesIneligibleForRemoval uint64
TotSnapshotsRemovedFromMetaStore uint64
TotMemMergeBeg uint64
TotMemMergeErr uint64
TotMemMergeDone uint64
TotMemMergeZapBeg uint64
TotMemMergeZapEnd uint64
TotMemMergeZapTime uint64
MaxMemMergeZapTime uint64
TotMemMergeSegments uint64
TotMemorySegmentsAtRoot uint64
}
// atomically populates the returned map

View file

@ -17,7 +17,7 @@ package boltdb
import (
"bytes"
"github.com/boltdb/bolt"
bolt "go.etcd.io/bbolt"
)
type Iterator struct {

View file

@ -16,7 +16,7 @@ package boltdb
import (
"github.com/blevesearch/bleve/index/store"
"github.com/boltdb/bolt"
bolt "go.etcd.io/bbolt"
)
type Reader struct {

View file

@ -30,7 +30,7 @@ import (
"github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/registry"
"github.com/boltdb/bolt"
bolt "go.etcd.io/bbolt"
)
const (
@ -74,6 +74,12 @@ func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore,
bo.ReadOnly = ro
}
if initialMmapSize, ok := config["initialMmapSize"].(int); ok {
bo.InitialMmapSize = initialMmapSize
} else if initialMmapSize, ok := config["initialMmapSize"].(float64); ok {
bo.InitialMmapSize = int(initialMmapSize)
}
db, err := bolt.Open(path, 0600, bo)
if err != nil {
return nil, err

View file

@ -584,7 +584,7 @@ func (tfr *TermFrequencyRow) parseK(key []byte) error {
func (tfr *TermFrequencyRow) parseKDoc(key []byte, term []byte) error {
tfr.doc = key[3+len(term)+1:]
if len(tfr.doc) <= 0 {
if len(tfr.doc) == 0 {
return fmt.Errorf("invalid term frequency key, empty docid")
}

View file

@ -415,7 +415,6 @@ func (udc *UpsideDownCouch) Close() error {
func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
// do analysis before acquiring write lock
analysisStart := time.Now()
numPlainTextBytes := doc.NumPlainTextBytes()
resultChan := make(chan *index.AnalysisResult)
aw := index.NewAnalysisWork(udc, doc, resultChan)
@ -452,6 +451,11 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
return
}
return udc.UpdateWithAnalysis(doc, result, backIndexRow)
}
func (udc *UpsideDownCouch) UpdateWithAnalysis(doc *document.Document,
result *index.AnalysisResult, backIndexRow *BackIndexRow) (err error) {
// start a writer for this update
indexStart := time.Now()
var kvwriter store.KVWriter
@ -490,7 +494,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))
if err == nil {
atomic.AddUint64(&udc.stats.updates, 1)
atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes)
atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, doc.NumPlainTextBytes())
} else {
atomic.AddUint64(&udc.stats.errors, 1)
}
@ -775,7 +779,7 @@ func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.
}
func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []*index.TermFieldVector {
if len(in) <= 0 {
if len(in) == 0 {
return nil
}
@ -797,6 +801,10 @@ func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []
}
func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
persistedCallback := batch.PersistedCallback()
if persistedCallback != nil {
defer persistedCallback(err)
}
analysisStart := time.Now()
resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps))
@ -810,15 +818,18 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
}
}
go func() {
for _, doc := range batch.IndexOps {
if doc != nil {
aw := index.NewAnalysisWork(udc, doc, resultChan)
// put the work on the queue
udc.analysisQueue.Queue(aw)
if numUpdates > 0 {
go func() {
for k := range batch.IndexOps {
doc := batch.IndexOps[k]
if doc != nil {
aw := index.NewAnalysisWork(udc, doc, resultChan)
// put the work on the queue
udc.analysisQueue.Queue(aw)
}
}
}
}()
}()
}
// retrieve back index rows concurrent with analysis
docBackIndexRowErr := error(nil)
@ -958,6 +969,7 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
} else {
atomic.AddUint64(&udc.stats.errors, 1)
}
return
}