Switch to bbolt

This commit is contained in:
Ken-Håvard Lieng 2020-04-23 01:06:36 +02:00
parent 360bed00f9
commit 77543e3aed
617 changed files with 68468 additions and 97867 deletions

20
vendor/github.com/RoaringBitmap/roaring/.drone.yml generated vendored Normal file
View file

@ -0,0 +1,20 @@
kind: pipeline
name: default
workspace:
base: /go
path: src/github.com/RoaringBitmap/roaring
steps:
- name: test
image: golang
commands:
- go get -t
- go test
- go test -race -run TestConcurrent*
- go build -tags appengine
- go test -tags appengine
- GOARCH=386 go build
- GOARCH=386 go test
- GOARCH=arm go build
- GOARCH=arm64 go build

View file

@ -14,6 +14,7 @@ go:
- "1.10.x"
- "1.11.x"
- "1.12.x"
- "1.13.x"
- tip
# whitelist
@ -23,10 +24,14 @@ branches:
script:
- goveralls -v -service travis-ci -ignore arraycontainer_gen.go,bitmapcontainer_gen.go,rle16_gen.go,rle_gen.go,roaringarray_gen.go,rle.go || go test
- go test -race -run TestConcurrent*
- go build -tags appengine
- go test -tags appengine
- GOARCH=arm64 go build
- GOARCH=386 go build
- GOARCH=386 go test
- GOARCH=arm go build
- GOARCH=arm64 go build
matrix:
allow_failures:
- go: tip

View file

@ -12,4 +12,5 @@ Vali Malinoiu (@0x4139),
Forud Ghafouri (@fzerorubigd),
Joe Nall (@joenall),
(@fredim),
Edd Robinson (@e-dard)
Edd Robinson (@e-dard),
Alexander Petrov (@alldroll)

View file

@ -63,7 +63,7 @@ qa: fmtcheck test vet lint
# Get the dependencies
deps:
GOPATH=$(GOPATH) go get github.com/smartystreets/goconvey/convey
GOPATH=$(GOPATH) go get github.com/stretchr/testify
GOPATH=$(GOPATH) go get github.com/willf/bitset
GOPATH=$(GOPATH) go get github.com/golang/lint/golint
GOPATH=$(GOPATH) go get github.com/mschoch/smat

View file

@ -1,4 +1,5 @@
roaring [![Build Status](https://travis-ci.org/RoaringBitmap/roaring.png)](https://travis-ci.org/RoaringBitmap/roaring) [![Coverage Status](https://coveralls.io/repos/github/RoaringBitmap/roaring/badge.svg?branch=master)](https://coveralls.io/github/RoaringBitmap/roaring?branch=master) [![GoDoc](https://godoc.org/github.com/RoaringBitmap/roaring?status.svg)](https://godoc.org/github.com/RoaringBitmap/roaring) [![Go Report Card](https://goreportcard.com/badge/RoaringBitmap/roaring)](https://goreportcard.com/report/github.com/RoaringBitmap/roaring)
[![Build Status](https://cloud.drone.io/api/badges/RoaringBitmap/roaring/status.svg)](https://cloud.drone.io/RoaringBitmap/roaring)
=============
This is a go version of the Roaring bitmap data structure.
@ -6,12 +7,12 @@ This is a go version of the Roaring bitmap data structure.
Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and
[Elasticsearch][elasticsearch], [Metamarkets' Druid][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin].
[Elasticsearch][elasticsearch], [Apache Druid (Incubating)][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin].
[lucene]: https://lucene.apache.org/
[solr]: https://lucene.apache.org/solr/
[elasticsearch]: https://www.elastic.co/products/elasticsearch
[druid]: http://druid.io/
[druid]: https://druid.apache.org/
[spark]: https://spark.apache.org/
[opensearchserver]: http://www.opensearchserver.com
[cloudtorrent]: https://github.com/jpillora/cloud-torrent
@ -28,11 +29,17 @@ Roaring bitmaps are found to work well in many important applications:
The ``roaring`` Go library is used by
* [Cloud Torrent](https://github.com/jpillora/cloud-torrent): a self-hosted remote torrent client
* [runv](https://github.com/hyperhq/runv): an Hypervisor-based runtime for the Open Containers Initiative
* [Cloud Torrent](https://github.com/jpillora/cloud-torrent)
* [runv](https://github.com/hyperhq/runv)
* [InfluxDB](https://www.influxdata.com)
* [Pilosa](https://www.pilosa.com/)
* [Bleve](http://www.blevesearch.com)
* [lindb](https://github.com/lindb/lindb)
* [Elasticell](https://github.com/deepfabric/elasticell)
* [SourceGraph](https://github.com/sourcegraph/sourcegraph)
* [M3](https://github.com/m3db/m3)
* [trident](https://github.com/NetApp/trident)
This library is used in production in several systems, it is part of the [Awesome Go collection](https://awesome-go.com).
@ -61,7 +68,6 @@ http://arxiv.org/abs/1402.6407 This paper used data from http://lemire.me/data/r
Dependencies are fetched automatically by giving the `-t` flag to `go get`.
they include
- github.com/smartystreets/goconvey/convey
- github.com/willf/bitset
- github.com/mschoch/smat
- github.com/glycerine/go-unsnap-stream
@ -207,7 +213,7 @@ You can use roaring with gore:
- go get -u github.com/motemen/gore
- Make sure that ``$GOPATH/bin`` is in your ``$PATH``.
- go get github/RoaringBitmap/roaring
- go get github.com/RoaringBitmap/roaring
```go
$ gore

View file

@ -24,7 +24,19 @@ func (ac *arrayContainer) fillLeastSignificant16bits(x []uint32, i int, mask uin
}
}
func (ac *arrayContainer) getShortIterator() shortIterable {
func (ac *arrayContainer) iterate(cb func(x uint16) bool) bool {
iterator := shortIterator{ac.content, 0}
for iterator.hasNext() {
if !cb(iterator.next()) {
return false
}
}
return true
}
func (ac *arrayContainer) getShortIterator() shortPeekable {
return &shortIterator{ac.content, 0}
}
@ -33,7 +45,7 @@ func (ac *arrayContainer) getReverseIterator() shortIterable {
}
func (ac *arrayContainer) getManyIterator() manyIterable {
return &manyIterator{ac.content, 0}
return &shortIterator{ac.content, 0}
}
func (ac *arrayContainer) minimum() uint16 {

View file

@ -96,6 +96,18 @@ func (bc *bitmapContainer) maximum() uint16 {
return uint16(0)
}
func (bc *bitmapContainer) iterate(cb func(x uint16) bool) bool {
iterator := bitmapContainerShortIterator{bc, bc.NextSetBit(0)}
for iterator.hasNext() {
if !cb(iterator.next()) {
return false
}
}
return true
}
type bitmapContainerShortIterator struct {
ptr *bitmapContainer
i int
@ -110,11 +122,21 @@ func (bcsi *bitmapContainerShortIterator) hasNext() bool {
return bcsi.i >= 0
}
func (bcsi *bitmapContainerShortIterator) peekNext() uint16 {
return uint16(bcsi.i)
}
func (bcsi *bitmapContainerShortIterator) advanceIfNeeded(minval uint16) {
if bcsi.hasNext() && bcsi.peekNext() < minval {
bcsi.i = bcsi.ptr.NextSetBit(int(minval))
}
}
func newBitmapContainerShortIterator(a *bitmapContainer) *bitmapContainerShortIterator {
return &bitmapContainerShortIterator{a, a.NextSetBit(0)}
}
func (bc *bitmapContainer) getShortIterator() shortIterable {
func (bc *bitmapContainer) getShortIterator() shortPeekable {
return newBitmapContainerShortIterator(bc)
}

161
vendor/github.com/RoaringBitmap/roaring/byte_input.go generated vendored Normal file
View file

@ -0,0 +1,161 @@
package roaring
import (
"encoding/binary"
"io"
)
type byteInput interface {
// next returns a slice containing the next n bytes from the buffer,
// advancing the buffer as if the bytes had been returned by Read.
next(n int) ([]byte, error)
// readUInt32 reads uint32 with LittleEndian order
readUInt32() (uint32, error)
// readUInt16 reads uint16 with LittleEndian order
readUInt16() (uint16, error)
// getReadBytes returns read bytes
getReadBytes() int64
// skipBytes skips exactly n bytes
skipBytes(n int) error
}
func newByteInputFromReader(reader io.Reader) byteInput {
return &byteInputAdapter{
r: reader,
readBytes: 0,
}
}
func newByteInput(buf []byte) byteInput {
return &byteBuffer{
buf: buf,
off: 0,
}
}
type byteBuffer struct {
buf []byte
off int
}
// next returns a slice containing the next n bytes from the reader
// If there are fewer bytes than the given n, io.ErrUnexpectedEOF will be returned
func (b *byteBuffer) next(n int) ([]byte, error) {
m := len(b.buf) - b.off
if n > m {
return nil, io.ErrUnexpectedEOF
}
data := b.buf[b.off : b.off+n]
b.off += n
return data, nil
}
// readUInt32 reads uint32 with LittleEndian order
func (b *byteBuffer) readUInt32() (uint32, error) {
if len(b.buf)-b.off < 4 {
return 0, io.ErrUnexpectedEOF
}
v := binary.LittleEndian.Uint32(b.buf[b.off:])
b.off += 4
return v, nil
}
// readUInt16 reads uint16 with LittleEndian order
func (b *byteBuffer) readUInt16() (uint16, error) {
if len(b.buf)-b.off < 2 {
return 0, io.ErrUnexpectedEOF
}
v := binary.LittleEndian.Uint16(b.buf[b.off:])
b.off += 2
return v, nil
}
// getReadBytes returns read bytes
func (b *byteBuffer) getReadBytes() int64 {
return int64(b.off)
}
// skipBytes skips exactly n bytes
func (b *byteBuffer) skipBytes(n int) error {
m := len(b.buf) - b.off
if n > m {
return io.ErrUnexpectedEOF
}
b.off += n
return nil
}
// reset resets the given buffer with a new byte slice
func (b *byteBuffer) reset(buf []byte) {
b.buf = buf
b.off = 0
}
type byteInputAdapter struct {
r io.Reader
readBytes int
}
// next returns a slice containing the next n bytes from the buffer,
// advancing the buffer as if the bytes had been returned by Read.
func (b *byteInputAdapter) next(n int) ([]byte, error) {
buf := make([]byte, n)
m, err := io.ReadAtLeast(b.r, buf, n)
b.readBytes += m
if err != nil {
return nil, err
}
return buf, nil
}
// readUInt32 reads uint32 with LittleEndian order
func (b *byteInputAdapter) readUInt32() (uint32, error) {
buf, err := b.next(4)
if err != nil {
return 0, err
}
return binary.LittleEndian.Uint32(buf), nil
}
// readUInt16 reads uint16 with LittleEndian order
func (b *byteInputAdapter) readUInt16() (uint16, error) {
buf, err := b.next(2)
if err != nil {
return 0, err
}
return binary.LittleEndian.Uint16(buf), nil
}
// getReadBytes returns read bytes
func (b *byteInputAdapter) getReadBytes() int64 {
return int64(b.readBytes)
}
// skipBytes skips exactly n bytes
func (b *byteInputAdapter) skipBytes(n int) error {
_, err := b.next(n)
return err
}
// reset resets the given buffer with a new stream
func (b *byteInputAdapter) reset(stream io.Reader) {
b.r = stream
b.readBytes = 0
}

View file

@ -4,12 +4,13 @@ go 1.12
require (
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2
github.com/glycerine/goconvey v0.0.0-20180728074245-46e3a41ad493 // indirect
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 // indirect
github.com/golang/snappy v0.0.1 // indirect
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 // indirect
github.com/jtolds/gls v4.20.0+incompatible // indirect
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae
github.com/philhofer/fwd v1.0.0 // indirect
github.com/smartystreets/goconvey v0.0.0-20190306220146-200a235640ff
github.com/stretchr/testify v1.3.0
github.com/stretchr/testify v1.4.0
github.com/tinylib/msgp v1.1.0
github.com/willf/bitset v1.1.10
)

View file

@ -2,12 +2,12 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2 h1:Ujru1hufTHVb++eG6OuNDKMxZnGIvF6o/u8q/8h2+I4=
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
github.com/glycerine/goconvey v0.0.0-20180728074245-46e3a41ad493 h1:OTanQnFt0bi5iLFSdbEVA/idR6Q2WhCm+deb7ir2CcM=
github.com/glycerine/goconvey v0.0.0-20180728074245-46e3a41ad493/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24=
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8=
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24=
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8=
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 h1:twflg0XRTjwKpxb/jFExr4HGq6on2dEOmnL6FV+fgPw=
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae h1:VeRdUYdCw49yizlSbMEn2SZ+gT+3IUKx8BqxyQdz+BY=
@ -16,14 +16,15 @@ github.com/philhofer/fwd v1.0.0 h1:UbZqGr5Y38ApvM/V/jEljVxwocdweyH+vmYvRPBnbqQ=
github.com/philhofer/fwd v1.0.0/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d h1:zE9ykElWQ6/NYmHa3jpm/yHnI4xSofP+UP6SpjHcSeM=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v0.0.0-20190306220146-200a235640ff h1:86HlEv0yBCry9syNuylzqznKXDK11p6D0DT596yNMys=
github.com/smartystreets/goconvey v0.0.0-20190306220146-200a235640ff/go.mod h1:KSQcGKpxUMHk3nbYzs/tIBAM2iDooCn0BmttHOJEbLs=
github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/tinylib/msgp v1.1.0 h1:9fQd+ICuRIu/ue4vxJZu6/LzxN0HwMds2nq/0cFvxHU=
github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE=
github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc=
github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

View file

@ -4,12 +4,7 @@ type manyIterable interface {
nextMany(hs uint32, buf []uint32) int
}
type manyIterator struct {
slice []uint16
loc int
}
func (si *manyIterator) nextMany(hs uint32, buf []uint32) int {
func (si *shortIterator) nextMany(hs uint32, buf []uint32) int {
n := 0
l := si.loc
s := si.slice

View file

@ -6,12 +6,12 @@
package roaring
import (
"bufio"
"bytes"
"encoding/base64"
"fmt"
"io"
"strconv"
"sync"
)
// Bitmap represents a compressed bitmap where you can add integers.
@ -67,8 +67,14 @@ func (rb *Bitmap) WriteToMsgpack(stream io.Writer) (int64, error) {
// The format is compatible with other RoaringBitmap
// implementations (Java, C) and is documented here:
// https://github.com/RoaringBitmap/RoaringFormatSpec
func (rb *Bitmap) ReadFrom(stream io.Reader) (int64, error) {
return rb.highlowcontainer.readFrom(stream)
func (rb *Bitmap) ReadFrom(reader io.Reader) (p int64, err error) {
stream := byteInputAdapterPool.Get().(*byteInputAdapter)
stream.reset(reader)
p, err = rb.highlowcontainer.readFrom(stream)
byteInputAdapterPool.Put(stream)
return
}
// FromBuffer creates a bitmap from its serialized version stored in buffer
@ -87,10 +93,36 @@ func (rb *Bitmap) ReadFrom(stream io.Reader) (int64, error) {
// You should *not* change the copy-on-write status of the resulting
// bitmaps (SetCopyOnWrite).
//
func (rb *Bitmap) FromBuffer(buf []byte) (int64, error) {
return rb.highlowcontainer.fromBuffer(buf)
// If buf becomes unavailable, then a bitmap created with
// FromBuffer would be effectively broken. Furthermore, any
// bitmap derived from this bitmap (e.g., via Or, And) might
// also be broken. Thus, before making buf unavailable, you should
// call CloneCopyOnWriteContainers on all such bitmaps.
//
func (rb *Bitmap) FromBuffer(buf []byte) (p int64, err error) {
stream := byteBufferPool.Get().(*byteBuffer)
stream.reset(buf)
p, err = rb.highlowcontainer.readFrom(stream)
byteBufferPool.Put(stream)
return
}
var (
byteBufferPool = sync.Pool{
New: func() interface{} {
return &byteBuffer{}
},
}
byteInputAdapterPool = sync.Pool{
New: func() interface{} {
return &byteInputAdapter{}
},
}
)
// RunOptimize attempts to further compress the runs of consecutive values found in the bitmap
func (rb *Bitmap) RunOptimize() {
rb.highlowcontainer.runOptimize()
@ -110,29 +142,15 @@ func (rb *Bitmap) ReadFromMsgpack(stream io.Reader) (int64, error) {
}
// MarshalBinary implements the encoding.BinaryMarshaler interface for the bitmap
// (same as ToBytes)
func (rb *Bitmap) MarshalBinary() ([]byte, error) {
var buf bytes.Buffer
writer := bufio.NewWriter(&buf)
_, err := rb.WriteTo(writer)
if err != nil {
return nil, err
}
err = writer.Flush()
if err != nil {
return nil, err
}
return buf.Bytes(), nil
return rb.ToBytes()
}
// UnmarshalBinary implements the encoding.BinaryUnmarshaler interface for the bitmap
func (rb *Bitmap) UnmarshalBinary(data []byte) error {
var buf bytes.Buffer
_, err := buf.Write(data)
if err != nil {
return err
}
reader := bufio.NewReader(&buf)
_, err = rb.ReadFrom(reader)
r := bytes.NewReader(data)
_, err := rb.ReadFrom(r)
return err
}
@ -215,10 +233,20 @@ type IntIterable interface {
Next() uint32
}
// IntPeekable allows you to look at the next value without advancing and
// advance as long as the next value is smaller than minval
type IntPeekable interface {
IntIterable
// PeekNext peeks the next value without advancing the iterator
PeekNext() uint32
// AdvanceIfNeeded advances as long as the next value is smaller than minval
AdvanceIfNeeded(minval uint32)
}
type intIterator struct {
pos int
hs uint32
iter shortIterable
iter shortPeekable
highlowcontainer *roaringArray
}
@ -244,6 +272,30 @@ func (ii *intIterator) Next() uint32 {
return x
}
// PeekNext peeks the next value without advancing the iterator
func (ii *intIterator) PeekNext() uint32 {
return uint32(ii.iter.peekNext()&maxLowBit) | ii.hs
}
// AdvanceIfNeeded advances as long as the next value is smaller than minval
func (ii *intIterator) AdvanceIfNeeded(minval uint32) {
to := minval >> 16
for ii.HasNext() && (ii.hs>>16) < to {
ii.pos++
ii.init()
}
if ii.HasNext() && (ii.hs>>16) == to {
ii.iter.advanceIfNeeded(lowbits(minval))
if !ii.iter.hasNext() {
ii.pos++
ii.init()
}
}
}
func newIntIterator(a *Bitmap) *intIterator {
p := new(intIterator)
p.pos = 0
@ -364,9 +416,41 @@ func (rb *Bitmap) String() string {
return buffer.String()
}
// Iterator creates a new IntIterable to iterate over the integers contained in the bitmap, in sorted order;
// Iterate iterates over the bitmap, calling the given callback with each value in the bitmap. If the callback returns
// false, the iteration is halted.
// The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove).
// There is no guarantee as to what order the values will be iterated
func (rb *Bitmap) Iterate(cb func(x uint32) bool) {
for i := 0; i < rb.highlowcontainer.size(); i++ {
hs := uint32(rb.highlowcontainer.getKeyAtIndex(i)) << 16
c := rb.highlowcontainer.getContainerAtIndex(i)
var shouldContinue bool
// This is hacky but it avoids allocations from invoking an interface method with a closure
switch t := c.(type) {
case *arrayContainer:
shouldContinue = t.iterate(func(x uint16) bool {
return cb(uint32(x) | hs)
})
case *runContainer16:
shouldContinue = t.iterate(func(x uint16) bool {
return cb(uint32(x) | hs)
})
case *bitmapContainer:
shouldContinue = t.iterate(func(x uint16) bool {
return cb(uint32(x) | hs)
})
}
if !shouldContinue {
break
}
}
}
// Iterator creates a new IntPeekable to iterate over the integers contained in the bitmap, in sorted order;
// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
func (rb *Bitmap) Iterator() IntIterable {
func (rb *Bitmap) Iterator() IntPeekable {
return newIntIterator(rb)
}
@ -423,41 +507,72 @@ func (rb *Bitmap) Equals(o interface{}) bool {
// AddOffset adds the value 'offset' to each and every value in a bitmap, generating a new bitmap in the process
func AddOffset(x *Bitmap, offset uint32) (answer *Bitmap) {
containerOffset := highbits(offset)
inOffset := lowbits(offset)
return AddOffset64(x, int64(offset))
}
// AddOffset64 adds the value 'offset' to each and every value in a bitmap, generating a new bitmap in the process
// If offset + element is outside of the range [0,2^32), that the element will be dropped
func AddOffset64(x *Bitmap, offset int64) (answer *Bitmap) {
// we need "offset" to be a long because we want to support values
// between -0xFFFFFFFF up to +-0xFFFFFFFF
var containerOffset64 int64
if offset < 0 {
containerOffset64 = (offset - (1 << 16) + 1) / (1 << 16)
} else {
containerOffset64 = offset >> 16
}
if containerOffset64 >= (1<<16) || containerOffset64 <= -(1<<16) {
return New()
}
containerOffset := int32(containerOffset64)
inOffset := (uint16)(offset - containerOffset64*(1<<16))
if inOffset == 0 {
answer = x.Clone()
for pos := 0; pos < answer.highlowcontainer.size(); pos++ {
key := answer.highlowcontainer.getKeyAtIndex(pos)
key := int32(answer.highlowcontainer.getKeyAtIndex(pos))
key += containerOffset
answer.highlowcontainer.keys[pos] = key
if key >= 0 && key <= MaxUint16 {
answer.highlowcontainer.keys[pos] = uint16(key)
}
}
} else {
answer = New()
for pos := 0; pos < x.highlowcontainer.size(); pos++ {
key := x.highlowcontainer.getKeyAtIndex(pos)
key := int32(x.highlowcontainer.getKeyAtIndex(pos))
key += containerOffset
c := x.highlowcontainer.getContainerAtIndex(pos)
offsetted := c.addOffset(inOffset)
if offsetted[0].getCardinality() > 0 {
if offsetted[0].getCardinality() > 0 && (key >= 0 && key <= MaxUint16) {
curSize := answer.highlowcontainer.size()
lastkey := uint16(0)
lastkey := int32(0)
if curSize > 0 {
lastkey = answer.highlowcontainer.getKeyAtIndex(curSize - 1)
lastkey = int32(answer.highlowcontainer.getKeyAtIndex(curSize - 1))
}
if curSize > 0 && lastkey == key {
prev := answer.highlowcontainer.getContainerAtIndex(curSize - 1)
orrseult := prev.ior(offsetted[0])
answer.highlowcontainer.setContainerAtIndex(curSize-1, orrseult)
} else {
answer.highlowcontainer.appendContainer(key, offsetted[0], false)
answer.highlowcontainer.appendContainer(uint16(key), offsetted[0], false)
}
}
if offsetted[1].getCardinality() > 0 {
answer.highlowcontainer.appendContainer(key+1, offsetted[1], false)
if offsetted[1].getCardinality() > 0 && ((key+1) >= 0 && (key+1) <= MaxUint16) {
answer.highlowcontainer.appendContainer(uint16(key+1), offsetted[1], false)
}
}
}
return answer
}
@ -1378,6 +1493,21 @@ func (rb *Bitmap) GetCopyOnWrite() (val bool) {
return rb.highlowcontainer.copyOnWrite
}
// CloneCopyOnWriteContainers clones all containers which have
// needCopyOnWrite set to true.
// This can be used to make sure it is safe to munmap a []byte
// that the roaring array may still have a reference to, after
// calling FromBuffer.
// More generally this function is useful if you call FromBuffer
// to construct a bitmap with a backing array buf
// and then later discard the buf array. Note that you should call
// CloneCopyOnWriteContainers on all bitmaps that were derived
// from the 'FromBuffer' bitmap since they map have dependencies
// on the buf array as well.
func (rb *Bitmap) CloneCopyOnWriteContainers() {
rb.highlowcontainer.cloneCopyOnWriteContainers()
}
// FlipInt calls Flip after casting the parameters (convenience method)
func FlipInt(bm *Bitmap, rangeStart, rangeEnd int) *Bitmap {
return Flip(bm, uint64(rangeStart), uint64(rangeEnd))

View file

@ -5,7 +5,6 @@ import (
"encoding/binary"
"fmt"
"io"
"io/ioutil"
snappy "github.com/glycerine/go-unsnap-stream"
"github.com/tinylib/msgp/msgp"
@ -39,7 +38,8 @@ type container interface {
not(start, final int) container // range is [firstOfRange,lastOfRange)
inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx)
xor(r container) container
getShortIterator() shortIterable
getShortIterator() shortPeekable
iterate(cb func(x uint16) bool) bool
getReverseIterator() shortIterable
getManyIterator() manyIterable
contains(i uint16) bool
@ -64,7 +64,6 @@ type container interface {
iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange)
selectInt(x uint16) int // selectInt returns the xth integer in the container
serializedSizeInBytes() int
readFrom(io.Reader) (int, error)
writeTo(io.Writer) (int, error)
numberOfRuns() int
@ -283,6 +282,18 @@ func (ra *roaringArray) clone() *roaringArray {
return &sa
}
// clone all containers which have needCopyOnWrite set to true
// This can be used to make sure it is safe to munmap a []byte
// that the roaring array may still have a reference to.
func (ra *roaringArray) cloneCopyOnWriteContainers() {
for i, needCopyOnWrite := range ra.needCopyOnWrite {
if needCopyOnWrite {
ra.containers[i] = ra.containers[i].clone()
ra.needCopyOnWrite[i] = false
}
}
}
// unused function:
//func (ra *roaringArray) containsKey(x uint16) bool {
// return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0)
@ -479,20 +490,15 @@ func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) {
nw += 2
binary.LittleEndian.PutUint16(buf[2:], uint16(len(ra.keys)-1))
nw += 2
// compute isRun bitmap
var ir []byte
isRun := newBitmapContainer()
// compute isRun bitmap without temporary allocation
var runbitmapslice = buf[nw:nw+isRunSizeInBytes]
for i, c := range ra.containers {
switch c.(type) {
case *runContainer16:
isRun.iadd(uint16(i))
runbitmapslice[i / 8] |= 1<<(uint(i)%8)
}
}
// convert to little endian
ir = isRun.asLittleEndianByteSlice()[:isRunSizeInBytes]
nw += copy(buf[nw:], ir)
nw += isRunSizeInBytes
} else {
binary.LittleEndian.PutUint32(buf[0:], uint32(serialCookieNoRunContainer))
nw += 4
@ -549,50 +555,52 @@ func (ra *roaringArray) toBytes() ([]byte, error) {
return buf.Bytes(), err
}
func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) {
pos := 0
if len(buf) < 8 {
return 0, fmt.Errorf("buffer too small, expecting at least 8 bytes, was %d", len(buf))
func (ra *roaringArray) readFrom(stream byteInput) (int64, error) {
cookie, err := stream.readUInt32()
if err != nil {
return stream.getReadBytes(), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err)
}
cookie := binary.LittleEndian.Uint32(buf)
pos += 4
var size uint32 // number of containers
haveRunContainers := false
var size uint32
var isRunBitmap []byte
// cookie header
if cookie&0x0000FFFF == serialCookie {
haveRunContainers = true
size = uint32(uint16(cookie>>16) + 1) // number of containers
size = uint32(uint16(cookie>>16) + 1)
// create is-run-container bitmap
isRunBitmapSize := (int(size) + 7) / 8
if pos+isRunBitmapSize > len(buf) {
return 0, fmt.Errorf("malformed bitmap, is-run bitmap overruns buffer at %d", pos+isRunBitmapSize)
isRunBitmap, err = stream.next(isRunBitmapSize)
if err != nil {
return stream.getReadBytes(), fmt.Errorf("malformed bitmap, failed to read is-run bitmap, got: %s", err)
}
isRunBitmap = buf[pos : pos+isRunBitmapSize]
pos += isRunBitmapSize
} else if cookie == serialCookieNoRunContainer {
size = binary.LittleEndian.Uint32(buf[pos:])
pos += 4
} else {
return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
}
if size > (1 << 16) {
return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.")
}
// descriptive header
// keycard - is {key, cardinality} tuple slice
if pos+2*2*int(size) > len(buf) {
return 0, fmt.Errorf("malfomred bitmap, key-cardinality slice overruns buffer at %d", pos+2*2*int(size))
}
keycard := byteSliceAsUint16Slice(buf[pos : pos+2*2*int(size)])
pos += 2 * 2 * int(size)
size, err = stream.readUInt32()
if !haveRunContainers || size >= noOffsetThreshold {
pos += 4 * int(size)
if err != nil {
return stream.getReadBytes(), fmt.Errorf("malformed bitmap, failed to read a bitmap size: %s", err)
}
} else {
return stream.getReadBytes(), fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
}
if size > (1 << 16) {
return stream.getReadBytes(), fmt.Errorf("it is logically impossible to have more than (1<<16) containers")
}
// descriptive header
buf, err := stream.next(2 * 2 * int(size))
if err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to read descriptive header: %s", err)
}
keycard := byteSliceAsUint16Slice(buf)
if isRunBitmap == nil || size >= noOffsetThreshold {
if err := stream.skipBytes(int(size) * 4); err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to skip bytes: %s", err)
}
}
// Allocate slices upfront as number of containers is known
@ -601,11 +609,13 @@ func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) {
} else {
ra.containers = make([]container, size)
}
if cap(ra.keys) >= int(size) {
ra.keys = ra.keys[:size]
} else {
ra.keys = make([]uint16, size)
}
if cap(ra.needCopyOnWrite) >= int(size) {
ra.needCopyOnWrite = ra.needCopyOnWrite[:size]
} else {
@ -613,129 +623,62 @@ func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) {
}
for i := uint32(0); i < size; i++ {
key := uint16(keycard[2*i])
key := keycard[2*i]
card := int(keycard[2*i+1]) + 1
ra.keys[i] = key
ra.needCopyOnWrite[i] = true
if haveRunContainers && isRunBitmap[i/8]&(1<<(i%8)) != 0 {
if isRunBitmap != nil && isRunBitmap[i/8]&(1<<(i%8)) != 0 {
// run container
nr := binary.LittleEndian.Uint16(buf[pos:])
pos += 2
if pos+int(nr)*4 > len(buf) {
return 0, fmt.Errorf("malformed bitmap, a run container overruns buffer at %d:%d", pos, pos+int(nr)*4)
nr, err := stream.readUInt16()
if err != nil {
return 0, fmt.Errorf("failed to read runtime container size: %s", err)
}
buf, err := stream.next(int(nr) * 4)
if err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to read runtime container content: %s", err)
}
nb := runContainer16{
iv: byteSliceAsInterval16Slice(buf[pos : pos+int(nr)*4]),
iv: byteSliceAsInterval16Slice(buf),
card: int64(card),
}
pos += int(nr) * 4
ra.containers[i] = &nb
} else if card > arrayDefaultMaxSize {
// bitmap container
buf, err := stream.next(arrayDefaultMaxSize * 2)
if err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to read bitmap container: %s", err)
}
nb := bitmapContainer{
cardinality: card,
bitmap: byteSliceAsUint64Slice(buf[pos : pos+arrayDefaultMaxSize*2]),
bitmap: byteSliceAsUint64Slice(buf),
}
pos += arrayDefaultMaxSize * 2
ra.containers[i] = &nb
} else {
// array container
nb := arrayContainer{
byteSliceAsUint16Slice(buf[pos : pos+card*2]),
buf, err := stream.next(card * 2)
if err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to read array container: %s", err)
}
pos += card * 2
nb := arrayContainer{
byteSliceAsUint16Slice(buf),
}
ra.containers[i] = &nb
}
}
return int64(pos), nil
}
func (ra *roaringArray) readFrom(stream io.Reader) (int64, error) {
pos := 0
var cookie uint32
err := binary.Read(stream, binary.LittleEndian, &cookie)
if err != nil {
return 0, fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err)
}
pos += 4
var size uint32
haveRunContainers := false
var isRun *bitmapContainer
if cookie&0x0000FFFF == serialCookie {
haveRunContainers = true
size = uint32(uint16(cookie>>16) + 1)
bytesToRead := (int(size) + 7) / 8
numwords := (bytesToRead + 7) / 8
by := make([]byte, bytesToRead, numwords*8)
nr, err := io.ReadFull(stream, by)
if err != nil {
return 8 + int64(nr), fmt.Errorf("error in readFrom: could not read the "+
"runContainer bit flags of length %v bytes: %v", bytesToRead, err)
}
pos += bytesToRead
by = by[:cap(by)]
isRun = newBitmapContainer()
for i := 0; i < numwords; i++ {
isRun.bitmap[i] = binary.LittleEndian.Uint64(by)
by = by[8:]
}
} else if cookie == serialCookieNoRunContainer {
err = binary.Read(stream, binary.LittleEndian, &size)
if err != nil {
return 0, fmt.Errorf("error in roaringArray.readFrom: when reading size, got: %s", err)
}
pos += 4
} else {
return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
}
if size > (1 << 16) {
return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.")
}
// descriptive header
keycard := make([]uint16, 2*size, 2*size)
err = binary.Read(stream, binary.LittleEndian, keycard)
if err != nil {
return 0, err
}
pos += 2 * 2 * int(size)
// offset header
if !haveRunContainers || size >= noOffsetThreshold {
io.CopyN(ioutil.Discard, stream, 4*int64(size)) // we never skip ahead so this data can be ignored
pos += 4 * int(size)
}
for i := uint32(0); i < size; i++ {
key := int(keycard[2*i])
card := int(keycard[2*i+1]) + 1
if haveRunContainers && isRun.contains(uint16(i)) {
nb := newRunContainer16()
nr, err := nb.readFrom(stream)
if err != nil {
return 0, err
}
pos += nr
ra.appendContainer(uint16(key), nb, false)
} else if card > arrayDefaultMaxSize {
nb := newBitmapContainer()
nr, err := nb.readFrom(stream)
if err != nil {
return 0, err
}
nb.cardinality = card
pos += nr
ra.appendContainer(keycard[2*i], nb, false)
} else {
nb := newArrayContainerSize(card)
nr, err := nb.readFrom(stream)
if err != nil {
return 0, err
}
pos += nr
ra.appendContainer(keycard[2*i], nb, false)
}
}
return int64(pos), nil
return stream.getReadBytes(), nil
}
func (ra *roaringArray) hasRunCompression() bool {

View file

@ -1149,207 +1149,175 @@ func (rc *runContainer16) Add(k uint16) (wasNew bool) {
//msgp:ignore runIterator
// runIterator16 advice: you must call Next() at least once
// before calling Cur(); and you should call HasNext()
// before calling Next() to insure there are contents.
// runIterator16 advice: you must call hasNext()
// before calling next()/peekNext() to insure there are contents.
type runIterator16 struct {
rc *runContainer16
curIndex int64
curPosInIndex uint16
curSeq int64
}
// newRunIterator16 returns a new empty run container.
func (rc *runContainer16) newRunIterator16() *runIterator16 {
return &runIterator16{rc: rc, curIndex: -1}
return &runIterator16{rc: rc, curIndex: 0, curPosInIndex: 0}
}
// HasNext returns false if calling Next will panic. It
func (rc *runContainer16) iterate(cb func(x uint16) bool) bool {
iterator := runIterator16{rc, 0, 0}
for iterator.hasNext() {
if !cb(iterator.next()) {
return false
}
}
return true
}
// hasNext returns false if calling next will panic. It
// returns true when there is at least one more value
// available in the iteration sequence.
func (ri *runIterator16) hasNext() bool {
if len(ri.rc.iv) == 0 {
return false
}
if ri.curIndex == -1 {
return true
}
return ri.curSeq+1 < ri.rc.cardinality()
return int64(len(ri.rc.iv)) > ri.curIndex+1 ||
(int64(len(ri.rc.iv)) == ri.curIndex+1 && ri.rc.iv[ri.curIndex].length >= ri.curPosInIndex)
}
// cur returns the current value pointed to by the iterator.
func (ri *runIterator16) cur() uint16 {
// next returns the next value in the iteration sequence.
func (ri *runIterator16) next() uint16 {
next := ri.rc.iv[ri.curIndex].start + ri.curPosInIndex
if ri.curPosInIndex == ri.rc.iv[ri.curIndex].length {
ri.curPosInIndex = 0
ri.curIndex++
} else {
ri.curPosInIndex++
}
return next
}
// peekNext returns the next value in the iteration sequence without advancing the iterator
func (ri *runIterator16) peekNext() uint16 {
return ri.rc.iv[ri.curIndex].start + ri.curPosInIndex
}
// Next returns the next value in the iteration sequence.
func (ri *runIterator16) next() uint16 {
if !ri.hasNext() {
panic("no Next available")
// advanceIfNeeded advances as long as the next value is smaller than minval
func (ri *runIterator16) advanceIfNeeded(minval uint16) {
if !ri.hasNext() || ri.peekNext() >= minval {
return
}
if ri.curIndex >= int64(len(ri.rc.iv)) {
panic("runIterator.Next() going beyond what is available")
opt := &searchOptions{
startIndex: ri.curIndex,
endxIndex: int64(len(ri.rc.iv)),
}
if ri.curIndex == -1 {
// first time is special
ri.curIndex = 0
// interval cannot be -1 because of minval > peekNext
interval, isPresent, _ := ri.rc.search(int64(minval), opt)
// if the minval is present, set the curPosIndex at the right position
if isPresent {
ri.curIndex = interval
ri.curPosInIndex = minval - ri.rc.iv[ri.curIndex].start
} else {
ri.curPosInIndex++
if int64(ri.rc.iv[ri.curIndex].start)+int64(ri.curPosInIndex) == int64(ri.rc.iv[ri.curIndex].last())+1 {
ri.curPosInIndex = 0
ri.curIndex++
}
ri.curSeq++
// otherwise interval is set to to the minimum index of rc.iv
// which comes strictly before the key, that's why we set the next interval
ri.curIndex = interval + 1
ri.curPosInIndex = 0
}
return ri.cur()
}
// remove removes the element that the iterator
// is on from the run container. You can use
// Cur if you want to double check what is about
// to be deleted.
func (ri *runIterator16) remove() uint16 {
n := ri.rc.cardinality()
if n == 0 {
panic("runIterator.Remove called on empty runContainer16")
}
cur := ri.cur()
ri.rc.deleteAt(&ri.curIndex, &ri.curPosInIndex, &ri.curSeq)
return cur
}
// runReverseIterator16 advice: you must call next() at least once
// before calling cur(); and you should call hasNext()
// runReverseIterator16 advice: you must call hasNext()
// before calling next() to insure there are contents.
type runReverseIterator16 struct {
rc *runContainer16
curIndex int64 // index into rc.iv
curPosInIndex uint16 // offset in rc.iv[curIndex]
curSeq int64 // 0->cardinality, performance optimization in hasNext()
}
// newRunReverseIterator16 returns a new empty run iterator.
func (rc *runContainer16) newRunReverseIterator16() *runReverseIterator16 {
return &runReverseIterator16{rc: rc, curIndex: -2}
index := int64(len(rc.iv)) - 1
pos := uint16(0)
if index >= 0 {
pos = rc.iv[index].length
}
return &runReverseIterator16{
rc: rc,
curIndex: index,
curPosInIndex: pos,
}
}
// hasNext returns false if calling next will panic. It
// returns true when there is at least one more value
// available in the iteration sequence.
func (ri *runReverseIterator16) hasNext() bool {
if len(ri.rc.iv) == 0 {
return false
}
if ri.curIndex == -2 {
return true
}
return ri.rc.cardinality()-ri.curSeq > 1
}
// cur returns the current value pointed to by the iterator.
func (ri *runReverseIterator16) cur() uint16 {
return ri.rc.iv[ri.curIndex].start + ri.curPosInIndex
return ri.curIndex > 0 || ri.curIndex == 0 && ri.curPosInIndex >= 0
}
// next returns the next value in the iteration sequence.
func (ri *runReverseIterator16) next() uint16 {
if !ri.hasNext() {
panic("no next available")
}
if ri.curIndex == -1 {
panic("runReverseIterator.next() going beyond what is available")
}
if ri.curIndex == -2 {
// first time is special
ri.curIndex = int64(len(ri.rc.iv)) - 1
ri.curPosInIndex = ri.rc.iv[ri.curIndex].length
next := ri.rc.iv[ri.curIndex].start + ri.curPosInIndex
if ri.curPosInIndex > 0 {
ri.curPosInIndex--
} else {
if ri.curPosInIndex > 0 {
ri.curPosInIndex--
} else {
ri.curIndex--
ri.curIndex--
if ri.curIndex >= 0 {
ri.curPosInIndex = ri.rc.iv[ri.curIndex].length
}
ri.curSeq++
}
return ri.cur()
return next
}
// remove removes the element that the iterator
// is on from the run container. You can use
// cur if you want to double check what is about
// to be deleted.
func (ri *runReverseIterator16) remove() uint16 {
n := ri.rc.cardinality()
if n == 0 {
panic("runReverseIterator.Remove called on empty runContainer16")
}
cur := ri.cur()
ri.rc.deleteAt(&ri.curIndex, &ri.curPosInIndex, &ri.curSeq)
return cur
}
type manyRunIterator16 struct {
rc *runContainer16
curIndex int64
curPosInIndex uint16
curSeq int64
}
func (rc *runContainer16) newManyRunIterator16() *manyRunIterator16 {
return &manyRunIterator16{rc: rc, curIndex: -1}
}
func (ri *manyRunIterator16) hasNext() bool {
if len(ri.rc.iv) == 0 {
return false
}
if ri.curIndex == -1 {
return true
}
return ri.curSeq+1 < ri.rc.cardinality()
func (rc *runContainer16) newManyRunIterator16() *runIterator16 {
return rc.newRunIterator16()
}
// hs are the high bits to include to avoid needing to reiterate over the buffer in NextMany
func (ri *manyRunIterator16) nextMany(hs uint32, buf []uint32) int {
func (ri *runIterator16) nextMany(hs uint32, buf []uint32) int {
n := 0
if !ri.hasNext() {
return n
}
// start and end are inclusive
for n < len(buf) {
if ri.curIndex == -1 || int(ri.rc.iv[ri.curIndex].length-ri.curPosInIndex) <= 0 {
moreVals := 0
if ri.rc.iv[ri.curIndex].length >= ri.curPosInIndex {
// add as many as you can from this seq
moreVals = minOfInt(int(ri.rc.iv[ri.curIndex].length-ri.curPosInIndex)+1, len(buf)-n)
base := uint32(ri.rc.iv[ri.curIndex].start+ri.curPosInIndex) | hs
// allows BCE
buf2 := buf[n : n+moreVals]
for i := range buf2 {
buf2[i] = base + uint32(i)
}
// update values
n += moreVals
}
if moreVals+int(ri.curPosInIndex) > int(ri.rc.iv[ri.curIndex].length) {
ri.curPosInIndex = 0
ri.curIndex++
if ri.curIndex == int64(len(ri.rc.iv)) {
break
}
buf[n] = uint32(ri.rc.iv[ri.curIndex].start) | hs
if ri.curIndex != 0 {
ri.curSeq++
}
n++
// not strictly necessarily due to len(buf)-n min check, but saves some work
continue
} else {
ri.curPosInIndex += uint16(moreVals) //moreVals always fits in uint16
}
// add as many as you can from this seq
moreVals := minOfInt(int(ri.rc.iv[ri.curIndex].length-ri.curPosInIndex), len(buf)-n)
base := uint32(ri.rc.iv[ri.curIndex].start+ri.curPosInIndex+1) | hs
// allows BCE
buf2 := buf[n : n+moreVals]
for i := range buf2 {
buf2[i] = base + uint32(i)
}
// update values
ri.curPosInIndex += uint16(moreVals) //moreVals always fits in uint16
ri.curSeq += int64(moreVals)
n += moreVals
}
return n
}
@ -1357,21 +1325,19 @@ func (ri *manyRunIterator16) nextMany(hs uint32, buf []uint32) int {
func (rc *runContainer16) removeKey(key uint16) (wasPresent bool) {
var index int64
var curSeq int64
index, wasPresent, _ = rc.search(int64(key), nil)
if !wasPresent {
return // already removed, nothing to do.
}
pos := key - rc.iv[index].start
rc.deleteAt(&index, &pos, &curSeq)
rc.deleteAt(&index, &pos)
return
}
// internal helper functions
func (rc *runContainer16) deleteAt(curIndex *int64, curPosInIndex *uint16, curSeq *int64) {
func (rc *runContainer16) deleteAt(curIndex *int64, curPosInIndex *uint16) {
rc.card--
*curSeq--
ci := *curIndex
pos := *curPosInIndex
@ -1488,7 +1454,7 @@ func (rc *runContainer16) selectInt16(j uint16) int {
var offset int64
for k := range rc.iv {
nextOffset := offset + rc.iv[k].runlen() + 1
nextOffset := offset + rc.iv[k].runlen()
if nextOffset > int64(j) {
return int(int64(rc.iv[k].start) + (int64(j) - offset))
}
@ -1993,7 +1959,7 @@ func (rc *runContainer16) fillLeastSignificant16bits(x []uint32, i int, mask uin
}
}
func (rc *runContainer16) getShortIterator() shortIterable {
func (rc *runContainer16) getShortIterator() shortPeekable {
return rc.newRunIterator16()
}

View file

@ -891,11 +891,6 @@ func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) {
if err != nil {
return
}
case "curSeq":
z.curSeq, err = dc.ReadInt64()
if err != nil {
return
}
default:
err = dc.Skip()
if err != nil {
@ -908,9 +903,9 @@ func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) {
// Deprecated: EncodeMsg implements msgp.Encodable
func (z *runIterator16) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 4
// map header, size 3
// write "rc"
err = en.Append(0x84, 0xa2, 0x72, 0x63)
err = en.Append(0x83, 0xa2, 0x72, 0x63)
if err != nil {
return err
}
@ -943,24 +938,15 @@ func (z *runIterator16) EncodeMsg(en *msgp.Writer) (err error) {
if err != nil {
return
}
// write "curSeq"
err = en.Append(0xa6, 0x63, 0x75, 0x72, 0x53, 0x65, 0x71)
if err != nil {
return err
}
err = en.WriteInt64(z.curSeq)
if err != nil {
return
}
return
}
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z *runIterator16) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 4
// map header, size 3
// string "rc"
o = append(o, 0x84, 0xa2, 0x72, 0x63)
o = append(o, 0x83, 0xa2, 0x72, 0x63)
if z.rc == nil {
o = msgp.AppendNil(o)
} else {
@ -975,9 +961,6 @@ func (z *runIterator16) MarshalMsg(b []byte) (o []byte, err error) {
// string "curPosInIndex"
o = append(o, 0xad, 0x63, 0x75, 0x72, 0x50, 0x6f, 0x73, 0x49, 0x6e, 0x49, 0x6e, 0x64, 0x65, 0x78)
o = msgp.AppendUint16(o, z.curPosInIndex)
// string "curSeq"
o = append(o, 0xa6, 0x63, 0x75, 0x72, 0x53, 0x65, 0x71)
o = msgp.AppendInt64(o, z.curSeq)
return
}
@ -1023,11 +1006,6 @@ func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) {
if err != nil {
return
}
case "curSeq":
z.curSeq, bts, err = msgp.ReadInt64Bytes(bts)
if err != nil {
return
}
default:
bts, err = msgp.Skip(bts)
if err != nil {
@ -1047,7 +1025,7 @@ func (z *runIterator16) Msgsize() (s int) {
} else {
s += z.rc.Msgsize()
}
s += 9 + msgp.Int64Size + 14 + msgp.Uint16Size + 7 + msgp.Int64Size
s += 9 + msgp.Int64Size + 14 + msgp.Uint16Size
return
}

View file

@ -2,8 +2,6 @@ package roaring
import (
"encoding/binary"
"errors"
"fmt"
"io"
"github.com/tinylib/msgp/msgp"
@ -34,37 +32,3 @@ func (b *runContainer16) readFromMsgpack(stream io.Reader) (int, error) {
err := msgp.Decode(stream, b)
return 0, err
}
var errCorruptedStream = errors.New("insufficient/odd number of stored bytes, corrupted stream detected")
func (b *runContainer16) readFrom(stream io.Reader) (int, error) {
b.iv = b.iv[:0]
b.card = 0
var numRuns uint16
err := binary.Read(stream, binary.LittleEndian, &numRuns)
if err != nil {
return 0, err
}
nr := int(numRuns)
encRun := make([]uint16, 2*nr)
by := make([]byte, 4*nr)
err = binary.Read(stream, binary.LittleEndian, &by)
if err != nil {
return 0, err
}
for i := range encRun {
if len(by) < 2 {
return 0, errCorruptedStream
}
encRun[i] = binary.LittleEndian.Uint16(by)
by = by[2:]
}
for i := 0; i < nr; i++ {
if i > 0 && b.iv[i-1].last() >= encRun[i*2] {
return 0, fmt.Errorf("error: stored runContainer had runs that were not in sorted order!! (b.iv[i-1=%v].last = %v >= encRun[i=%v] = %v)", i-1, b.iv[i-1].last(), i, encRun[i*2])
}
b.iv = append(b.iv, interval16{start: encRun[i*2], length: encRun[i*2+1]})
b.card += int64(encRun[i*2+1]) + 1
}
return 0, err
}

View file

@ -74,6 +74,16 @@ func uint64SliceAsByteSlice(slice []uint64) []byte {
return by
}
func uint16SliceAsByteSlice(slice []uint16) []byte {
by := make([]byte, len(slice)*2)
for i, v := range slice {
binary.LittleEndian.PutUint16(by[i*2:], v)
}
return by
}
func byteSliceAsUint16Slice(slice []byte) []uint16 {
if len(slice)%2 != 0 {
panic("Slice size should be divisible by 2")

View file

@ -6,6 +6,7 @@ import (
"errors"
"io"
"reflect"
"runtime"
"unsafe"
)
@ -22,22 +23,6 @@ func (bc *bitmapContainer) writeTo(stream io.Writer) (int, error) {
return stream.Write(buf)
}
// readFrom reads an arrayContainer from stream.
// PRE-REQUISITE: you must size the arrayContainer correctly (allocate b.content)
// *before* you call readFrom. We can't guess the size in the stream
// by this point.
func (ac *arrayContainer) readFrom(stream io.Reader) (int, error) {
buf := uint16SliceAsByteSlice(ac.content)
return io.ReadFull(stream, buf)
}
func (bc *bitmapContainer) readFrom(stream io.Reader) (int, error) {
buf := uint64SliceAsByteSlice(bc.bitmap)
n, err := io.ReadFull(stream, buf)
bc.computeCardinality()
return n, err
}
func uint64SliceAsByteSlice(slice []uint64) []byte {
// make a new slice header
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
@ -46,8 +31,12 @@ func uint64SliceAsByteSlice(slice []uint64) []byte {
header.Len *= 8
header.Cap *= 8
// instantiate result and use KeepAlive so data isn't unmapped.
result := *(*[]byte)(unsafe.Pointer(&header))
runtime.KeepAlive(&slice)
// return it
return *(*[]byte)(unsafe.Pointer(&header))
return result
}
func uint16SliceAsByteSlice(slice []uint16) []byte {
@ -58,8 +47,12 @@ func uint16SliceAsByteSlice(slice []uint16) []byte {
header.Len *= 2
header.Cap *= 2
// instantiate result and use KeepAlive so data isn't unmapped.
result := *(*[]byte)(unsafe.Pointer(&header))
runtime.KeepAlive(&slice)
// return it
return *(*[]byte)(unsafe.Pointer(&header))
return result
}
func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
@ -68,50 +61,74 @@ func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
// Deserialization code follows
func byteSliceAsUint16Slice(slice []byte) []uint16 {
////
// These methods (byteSliceAsUint16Slice,...) do not make copies,
// they are pointer-based (unsafe). The caller is responsible to
// ensure that the input slice does not get garbage collected, deleted
// or modified while you hold the returned slince.
////
func byteSliceAsUint16Slice(slice []byte) (result []uint16) { // here we create a new slice holder
if len(slice)%2 != 0 {
panic("Slice size should be divisible by 2")
}
// reference: https://go101.org/article/unsafe.html
// make a new slice header
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
// update its capacity and length
header.Len /= 2
header.Cap /= 2
// transfer the data from the given slice to a new variable (our result)
rHeader.Data = bHeader.Data
rHeader.Len = bHeader.Len / 2
rHeader.Cap = bHeader.Cap / 2
// return it
return *(*[]uint16)(unsafe.Pointer(&header))
// instantiate result and use KeepAlive so data isn't unmapped.
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
// return result
return
}
func byteSliceAsUint64Slice(slice []byte) []uint64 {
func byteSliceAsUint64Slice(slice []byte) (result []uint64) {
if len(slice)%8 != 0 {
panic("Slice size should be divisible by 8")
}
// reference: https://go101.org/article/unsafe.html
// make a new slice header
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
// update its capacity and length
header.Len /= 8
header.Cap /= 8
// transfer the data from the given slice to a new variable (our result)
rHeader.Data = bHeader.Data
rHeader.Len = bHeader.Len / 8
rHeader.Cap = bHeader.Cap / 8
// return it
return *(*[]uint64)(unsafe.Pointer(&header))
// instantiate result and use KeepAlive so data isn't unmapped.
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
// return result
return
}
func byteSliceAsInterval16Slice(slice []byte) []interval16 {
func byteSliceAsInterval16Slice(slice []byte) (result []interval16) {
if len(slice)%4 != 0 {
panic("Slice size should be divisible by 4")
}
// reference: https://go101.org/article/unsafe.html
// make a new slice header
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
// update its capacity and length
header.Len /= 4
header.Cap /= 4
// transfer the data from the given slice to a new variable (our result)
rHeader.Data = bHeader.Data
rHeader.Len = bHeader.Len / 4
rHeader.Cap = bHeader.Cap / 4
// return it
return *(*[]interval16)(unsafe.Pointer(&header))
// instantiate result and use KeepAlive so data isn't unmapped.
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
// return result
return
}

View file

@ -14,6 +14,7 @@ func equal(a, b []uint16) bool {
func difference(set1 []uint16, set2 []uint16, buffer []uint16) int {
if 0 == len(set2) {
buffer = buffer[:len(set1)]
for k := 0; k < len(set1); k++ {
buffer[k] = set1[k]
}

View file

@ -5,6 +5,12 @@ type shortIterable interface {
next() uint16
}
type shortPeekable interface {
shortIterable
peekNext() uint16
advanceIfNeeded(minval uint16)
}
type shortIterator struct {
slice []uint16
loc int
@ -20,6 +26,16 @@ func (si *shortIterator) next() uint16 {
return a
}
func (si *shortIterator) peekNext() uint16 {
return si.slice[si.loc]
}
func (si *shortIterator) advanceIfNeeded(minval uint16) {
if si.hasNext() && si.peekNext() < minval {
si.loc = advanceUntil(si.slice, si.loc, len(si.slice), minval)
}
}
type reverseIterator struct {
slice []uint16
loc int

View file

@ -112,7 +112,7 @@ func highbits(x uint32) uint16 {
return uint16(x >> 16)
}
func lowbits(x uint32) uint16 {
return uint16(x & 0xFFFF)
return uint16(x & maxLowBit)
}
const maxLowBit = 0xFFFF
@ -302,24 +302,3 @@ func minOfUint16(a, b uint16) uint16 {
}
return b
}
func maxInt(a, b int) int {
if a > b {
return a
}
return b
}
func maxUint16(a, b uint16) uint16 {
if a > b {
return a
}
return b
}
func minUint16(a, b uint16) uint16 {
if a < b {
return a
}
return b
}