Switch to bbolt

This commit is contained in:
Ken-Håvard Lieng 2020-04-23 01:06:36 +02:00
parent 360bed00f9
commit 77543e3aed
617 changed files with 68468 additions and 97867 deletions

View File

@ -93,7 +93,7 @@ The libraries this project is built with.
### Server ### Server
- [Bolt](https://github.com/boltdb/bolt) - [Bolt](https://github.com/etcd-io/bbolt)
- [Bleve](https://github.com/blevesearch/bleve) - [Bleve](https://github.com/blevesearch/bleve)
- [Cobra](https://github.com/spf13/cobra) - [Cobra](https://github.com/spf13/cobra)
- [Viper](https://github.com/spf13/viper) - [Viper](https://github.com/spf13/viper)

29
go.mod
View File

@ -3,25 +3,19 @@ module github.com/khlieng/dispatch
go 1.14 go 1.14
require ( require (
github.com/RoaringBitmap/roaring v0.4.17 // indirect github.com/RoaringBitmap/roaring v0.4.23 // indirect
github.com/blevesearch/bleve v0.0.0-20180525174403-1d6d47ed3ad9 github.com/blevesearch/bleve v1.0.7
github.com/blevesearch/blevex v0.0.0-20180227211930-4b158bb555a3 // indirect
github.com/blevesearch/go-porterstemmer v1.0.2 // indirect
github.com/blevesearch/segment v0.0.0-20160915185041-762005e7a34f // indirect
github.com/boltdb/bolt v0.0.0-20180302180052-fd01fc79c553
github.com/couchbase/vellum v0.0.0-20190606010143-5f4edc22838b // indirect
github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d // indirect github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d // indirect
github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 // indirect github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 // indirect
github.com/cznic/strutil v0.0.0-20181122101858-275e90344537 // indirect github.com/cznic/strutil v0.0.0-20181122101858-275e90344537 // indirect
github.com/dsnet/compress v0.0.1 github.com/dsnet/compress v0.0.1
github.com/edsrzf/mmap-go v1.0.0 // indirect
github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51 // indirect github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51 // indirect
github.com/facebookgo/stack v0.0.0-20160209184415-751773369052 // indirect github.com/facebookgo/stack v0.0.0-20160209184415-751773369052 // indirect
github.com/facebookgo/subset v0.0.0-20150612182917-8dac2c3c4870 // indirect github.com/facebookgo/subset v0.0.0-20150612182917-8dac2c3c4870 // indirect
github.com/fsnotify/fsnotify v1.4.7 github.com/fsnotify/fsnotify v1.4.7
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 // indirect github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a // indirect
github.com/go-acme/lego v2.6.0+incompatible // indirect github.com/go-acme/lego v2.6.0+incompatible // indirect
github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c // indirect github.com/golang/protobuf v1.4.0 // indirect
github.com/gorilla/websocket v1.4.0 github.com/gorilla/websocket v1.4.0
github.com/jmhodges/levigo v1.0.0 // indirect github.com/jmhodges/levigo v1.0.0 // indirect
github.com/jpillora/backoff v0.0.0-20180909062703-3050d21c67d7 github.com/jpillora/backoff v0.0.0-20180909062703-3050d21c67d7
@ -36,21 +30,20 @@ require (
github.com/onsi/gomega v1.5.0 // indirect github.com/onsi/gomega v1.5.0 // indirect
github.com/pelletier/go-toml v1.4.0 // indirect github.com/pelletier/go-toml v1.4.0 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20190512091148-babf20351dd7 // indirect github.com/remyoudompheng/bigfft v0.0.0-20190512091148-babf20351dd7 // indirect
github.com/smartystreets/assertions v1.0.0 // indirect
github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a // indirect
github.com/spf13/afero v1.2.2 // indirect github.com/spf13/afero v1.2.2 // indirect
github.com/spf13/cobra v0.0.5 github.com/spf13/cobra v1.0.0
github.com/spf13/jwalterweatherman v1.1.0 // indirect github.com/spf13/jwalterweatherman v1.1.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/spf13/viper v1.4.0 github.com/spf13/viper v1.4.0
github.com/steveyen/gtreap v0.0.0-20150807155958-0abe01ef9be2 // indirect github.com/stretchr/testify v1.4.0
github.com/stretchr/testify v1.3.0
github.com/syndtr/goleveldb v1.0.0 // indirect
github.com/tdewolff/minify/v2 v2.5.0 github.com/tdewolff/minify/v2 v2.5.0
github.com/tecbot/gorocksdb v0.0.0-20190519120508-025c3cf4ffb4 // indirect github.com/tecbot/gorocksdb v0.0.0-20191217155057-f0fad39f321c // indirect
github.com/tinylib/msgp v1.1.2 // indirect
go.etcd.io/bbolt v1.3.4
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5 // indirect golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5 // indirect
golang.org/x/net v0.0.0-20190607181551-461777fb6f67 golang.org/x/net v0.0.0-20190607181551-461777fb6f67
golang.org/x/sync v0.0.0-20190423024810-112230192c58 // indirect golang.org/x/sync v0.0.0-20190423024810-112230192c58 // indirect
golang.org/x/sys v0.0.0-20190608050228-5b15430b70e3 // indirect golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f // indirect
golang.org/x/text v0.3.2 // indirect golang.org/x/text v0.3.2 // indirect
gopkg.in/square/go-jose.v2 v2.3.1 // indirect gopkg.in/square/go-jose.v2 v2.3.1 // indirect
) )

110
go.sum
View File

@ -2,23 +2,30 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
github.com/RoaringBitmap/roaring v0.4.17 h1:oCYFIFEMSQZrLHpywH7919esI1VSrQZ0pJXkZPGIJ78= github.com/RoaringBitmap/roaring v0.4.21/go.mod h1:D0gp8kJQgE1A4LQ5wFLggQEyvDi06Mq5mKs52e1TwOo=
github.com/RoaringBitmap/roaring v0.4.17/go.mod h1:D3qVegWTmfCaX4Bl5CrBE9hfrSrrXIr8KVNvRsDi1NI= github.com/RoaringBitmap/roaring v0.4.23 h1:gpyfd12QohbqhFO4NVDUdoPOCXsyahYRQhINmlHxKeo=
github.com/RoaringBitmap/roaring v0.4.23/go.mod h1:D0gp8kJQgE1A4LQ5wFLggQEyvDi06Mq5mKs52e1TwOo=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
github.com/blevesearch/bleve v0.0.0-20180525174403-1d6d47ed3ad9 h1:q25+axgzH1KX+j63v3yrkY1VHc6PkyTfpnzOmtAH154= github.com/blevesearch/bleve v1.0.7 h1:4PspZE7XABMSKcVpzAKp0E05Yer1PIYmTWk+1ngNr/c=
github.com/blevesearch/bleve v0.0.0-20180525174403-1d6d47ed3ad9/go.mod h1:Y2lmIkzV6mcNfAnAdOd+ZxHkHchhBfU/xroGIp61wfw= github.com/blevesearch/bleve v1.0.7/go.mod h1:3xvmBtaw12Y4C9iA1RTzwWCof5j5HjydjCTiDE2TeE0=
github.com/blevesearch/blevex v0.0.0-20180227211930-4b158bb555a3 h1:U6vnxZrTfItfiUiYx0lf/LgHjRSfaKK5QHSom3lEbnA= github.com/blevesearch/blevex v0.0.0-20190916190636-152f0fe5c040 h1:SjYVcfJVZoCfBlg+fkaq2eoZHTf5HaJfaTeTkOtyfHQ=
github.com/blevesearch/blevex v0.0.0-20180227211930-4b158bb555a3/go.mod h1:WH+MU2F4T0VmSdaPX+Wu5GYoZBrYWdOZWSjzvYcDmqQ= github.com/blevesearch/blevex v0.0.0-20190916190636-152f0fe5c040/go.mod h1:WH+MU2F4T0VmSdaPX+Wu5GYoZBrYWdOZWSjzvYcDmqQ=
github.com/blevesearch/go-porterstemmer v1.0.2 h1:qe7n69gBd1OLY5sHKnxQHIbzn0LNJA4hpAf+5XDxV2I= github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo=
github.com/blevesearch/go-porterstemmer v1.0.2/go.mod h1:haWQqFT3RdOGz7PJuM3or/pWNJS1pKkoZJWCkWu0DVA= github.com/blevesearch/go-porterstemmer v1.0.3/go.mod h1:angGc5Ht+k2xhJdZi511LtmxuEf0OVpvUUNrwmM1P7M=
github.com/blevesearch/segment v0.0.0-20160915185041-762005e7a34f h1:kqbi9lqXLLs+zfWlgo1PIiRQ86n33K1JKotjj4rSYOg= github.com/blevesearch/mmap-go v1.0.2 h1:JtMHb+FgQCTTYIhtMvimw15dJwu1Y5lrZDMOFXVWPk0=
github.com/blevesearch/segment v0.0.0-20160915185041-762005e7a34f/go.mod h1:IInt5XRvpiGE09KOk9mmCMLjHhydIhNPKPPFLFBB7L8= github.com/blevesearch/mmap-go v1.0.2/go.mod h1:ol2qBqYaOUsGdm7aRMRrYGgPvnwLe6Y+7LMvAB5IbSA=
github.com/boltdb/bolt v0.0.0-20180302180052-fd01fc79c553 h1:yvSJ8qbaWLeS7COhu2KJ0epn4mmc+aGeBP7Dpg7xQTY= github.com/blevesearch/segment v0.9.0 h1:5lG7yBCx98or7gK2cHMKPukPZ/31Kag7nONpoBt22Ac=
github.com/boltdb/bolt v0.0.0-20180302180052-fd01fc79c553/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps= github.com/blevesearch/segment v0.9.0/go.mod h1:9PfHYUdQCgHktBgvtUOF4x+pc4/l8rdH0u5spnW85UQ=
github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s=
github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs=
github.com/blevesearch/zap/v11 v11.0.7 h1:nnmAOP6eXBkqEa1Srq1eqA5Wmn4w+BZjLdjynNxvd+M=
github.com/blevesearch/zap/v11 v11.0.7/go.mod h1:bJoY56fdU2m/IP4LLz/1h4jY2thBoREvoqbuJ8zhm9k=
github.com/blevesearch/zap/v12 v12.0.7 h1:y8FWSAYkdc4p1dn4YLxNNr1dxXlSUsakJh2Fc/r6cj4=
github.com/blevesearch/zap/v12 v12.0.7/go.mod h1:70DNK4ZN4tb42LubeDbfpp6xnm8g3ROYVvvZ6pEoXD8=
github.com/cenkalti/backoff v2.1.1+incompatible h1:tKJnvO2kl0zmb/jA5UKAt4VoEVw1qxKWjE/Bpp46npY= github.com/cenkalti/backoff v2.1.1+incompatible h1:tKJnvO2kl0zmb/jA5UKAt4VoEVw1qxKWjE/Bpp46npY=
github.com/cenkalti/backoff v2.1.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= github.com/cenkalti/backoff v2.1.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM=
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
@ -30,9 +37,14 @@ github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8Nz
github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
github.com/couchbase/vellum v0.0.0-20190606010143-5f4edc22838b h1:GB0V0Si9pq1M9HM2QPP+n1xKsZgnPdxsn8w03WeXRmg= github.com/couchbase/ghistogram v0.1.0 h1:b95QcQTCzjTUocDXp/uMgSNQi8oj1tGwnJ4bODWZnps=
github.com/couchbase/vellum v0.0.0-20190606010143-5f4edc22838b/go.mod h1:prYTC8EgTu3gwbqJihkud9zRXISvyulAplQ6exdCo1g= github.com/couchbase/ghistogram v0.1.0/go.mod h1:s1Jhy76zqfEecpNWJfWUiKZookAFaiGOEoyzgHt9i7k=
github.com/couchbase/moss v0.1.0 h1:HCL+xxHUwmOaL44kMM/gU08OW6QGCui1WVFO58bjhNI=
github.com/couchbase/moss v0.1.0/go.mod h1:9MaHIaRuy9pvLPUJxB8sh8OrLfyDczECVL37grCIubs=
github.com/couchbase/vellum v1.0.1 h1:qrj9ohvZedvc51S5KzPfJ6P6z0Vqzv7Lx7k3mVc2WOk=
github.com/couchbase/vellum v1.0.1/go.mod h1:FcwrEivFpNi24R3jLOs3n+fs5RnuQnQqCLBJ1uAg1W4=
github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE=
github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d h1:SwD98825d6bdB+pEuTxWOXiSjBrHdOl/UVp75eI7JT8= github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d h1:SwD98825d6bdB+pEuTxWOXiSjBrHdOl/UVp75eI7JT8=
github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d/go.mod h1:URriBxXwVq5ijiJ12C7iIZqlA69nTlI+LgI6/pwftG8= github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d/go.mod h1:URriBxXwVq5ijiJ12C7iIZqlA69nTlI+LgI6/pwftG8=
github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 h1:iwZdTE0PVqJCos1vaoKsclOGD3ADKpshg3SRtYBbwso= github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 h1:iwZdTE0PVqJCos1vaoKsclOGD3ADKpshg3SRtYBbwso=
@ -48,8 +60,6 @@ github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q=
github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo= github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo=
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY= github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/edsrzf/mmap-go v1.0.0 h1:CEBF7HpRnUCSJgGUb5h1Gm7e3VkmVDrR8lvWVLtrOFw=
github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M=
github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51 h1:0JZ+dUmQeA8IIVUMzysrX4/AKuQwWhV2dYQuPZdvdSQ= github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51 h1:0JZ+dUmQeA8IIVUMzysrX4/AKuQwWhV2dYQuPZdvdSQ=
github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51/go.mod h1:Yg+htXGokKKdzcwhuNDwVvN+uBxDGXJ7G/VN1d8fa64= github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51/go.mod h1:Yg+htXGokKKdzcwhuNDwVvN+uBxDGXJ7G/VN1d8fa64=
github.com/facebookgo/stack v0.0.0-20160209184415-751773369052 h1:JWuenKqqX8nojtoVVWjGfOF9635RETekkoH6Cc9SX0A= github.com/facebookgo/stack v0.0.0-20160209184415-751773369052 h1:JWuenKqqX8nojtoVVWjGfOF9635RETekkoH6Cc9SX0A=
@ -61,8 +71,8 @@ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMo
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2 h1:Ujru1hufTHVb++eG6OuNDKMxZnGIvF6o/u8q/8h2+I4= github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2 h1:Ujru1hufTHVb++eG6OuNDKMxZnGIvF6o/u8q/8h2+I4=
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE= github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
github.com/glycerine/goconvey v0.0.0-20180728074245-46e3a41ad493 h1:OTanQnFt0bi5iLFSdbEVA/idR6Q2WhCm+deb7ir2CcM= github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a h1:FQqoVvjbiUioBBFUL5up+h+GdCa/AnJsL/1bIs/veSI=
github.com/glycerine/goconvey v0.0.0-20180728074245-46e3a41ad493/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24= github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8= github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8=
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24= github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24=
github.com/go-acme/lego v2.5.0+incompatible/go.mod h1:yzMNe9CasVUhkquNvti5nAtPmG94USbYxYrZfTkIn0M= github.com/go-acme/lego v2.5.0+incompatible/go.mod h1:yzMNe9CasVUhkquNvti5nAtPmG94USbYxYrZfTkIn0M=
@ -81,15 +91,25 @@ github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg= github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
github.com/golang/protobuf v1.4.0 h1:oOuy+ugB+P/kBdUnG5QaMXSIyJ1q38wWSojYCb3z5VQ=
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db h1:woRePGFeVFfLKN/pOkfl+p/TAqKOfFu+7KPlMVpok/w= github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db h1:woRePGFeVFfLKN/pOkfl+p/TAqKOfFu+7KPlMVpok/w=
github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c h1:7lF+Vz0LqiRidnzC1Oq86fpX1q/iEv2KJdrCtttYjT4= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 h1:twflg0XRTjwKpxb/jFExr4HGq6on2dEOmnL6FV+fgPw=
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/gorilla/websocket v1.4.0 h1:WDFjx/TMzVgy9VdMMQi2K2Emtwi2QcUQsztZ/zLaH/Q= github.com/gorilla/websocket v1.4.0 h1:WDFjx/TMzVgy9VdMMQi2K2Emtwi2QcUQsztZ/zLaH/Q=
github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ=
github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs=
@ -118,6 +138,8 @@ github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid v1.2.1 h1:vJi+O/nMdFt0vqm8NZBI6wzALWdA2X+egi0ogNyrC/w= github.com/klauspost/cpuid v1.2.1 h1:vJi+O/nMdFt0vqm8NZBI6wzALWdA2X+egi0ogNyrC/w=
github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/kljensen/snowball v0.6.0 h1:6DZLCcZeL0cLfodx+Md4/OLC6b/bfurWUOUGs1ydfOU=
github.com/kljensen/snowball v0.6.0/go.mod h1:27N7E8fVU5H68RlUmnWwZCfxgt4POBJfENGMvNRhldw=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
@ -145,6 +167,8 @@ github.com/mitchellh/mapstructure v1.1.2 h1:fmNYVwqnSfB9mZU6OS2O6GsXM+wcskZDuKQz
github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae h1:VeRdUYdCw49yizlSbMEn2SZ+gT+3IUKx8BqxyQdz+BY= github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae h1:VeRdUYdCw49yizlSbMEn2SZ+gT+3IUKx8BqxyQdz+BY=
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae/go.mod h1:qAyveg+e4CE+eKJXWVjKXM4ck2QobLqTDytGJbLLhJg= github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae/go.mod h1:qAyveg+e4CE+eKJXWVjKXM4ck2QobLqTDytGJbLLhJg=
github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM=
github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
github.com/onsi/ginkgo v1.6.0 h1:Ix8l273rp3QzYgXSR+c8d1fTG7UPgYkOSELPhiY/YGw= github.com/onsi/ginkgo v1.6.0 h1:Ix8l273rp3QzYgXSR+c8d1fTG7UPgYkOSELPhiY/YGw=
@ -175,17 +199,14 @@ github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y8
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU=
github.com/rcrowley/go-metrics v0.0.0-20190826022208-cac0b30c2563/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
github.com/remyoudompheng/bigfft v0.0.0-20190512091148-babf20351dd7 h1:FUL3b97ZY2EPqg2NbXKuMHs5pXJB9hjj1fDHnF2vl28= github.com/remyoudompheng/bigfft v0.0.0-20190512091148-babf20351dd7 h1:FUL3b97ZY2EPqg2NbXKuMHs5pXJB9hjj1fDHnF2vl28=
github.com/remyoudompheng/bigfft v0.0.0-20190512091148-babf20351dd7/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/remyoudompheng/bigfft v0.0.0-20190512091148-babf20351dd7/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/assertions v1.0.0 h1:UVQPSSmc3qtTi+zPPkCXvZX9VvW/xT/NsRvKfwY81a8=
github.com/smartystreets/assertions v1.0.0/go.mod h1:kHHU4qYBaI3q23Pp3VPrmWhuIUrLW/7eUrw0BU5VaoM=
github.com/smartystreets/goconvey v0.0.0-20190306220146-200a235640ff/go.mod h1:KSQcGKpxUMHk3nbYzs/tIBAM2iDooCn0BmttHOJEbLs=
github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a h1:pa8hGb/2YqsZKovtsgrwcDH1RZhVbTKCjLp47XpqCDs=
github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/spf13/afero v1.1.2 h1:m8/z1t7/fwjysjQRYbP0RD+bUIF/8tJwPdEZsI83ACI= github.com/spf13/afero v1.1.2 h1:m8/z1t7/fwjysjQRYbP0RD+bUIF/8tJwPdEZsI83ACI=
@ -196,23 +217,29 @@ github.com/spf13/cast v1.3.0 h1:oget//CVOEoFewqQxwr0Ej5yjygnqGkvggSE/gB35Q8=
github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
github.com/spf13/cobra v0.0.5 h1:f0B+LkLX6DtmRH1isoNA9VTtNUK9K8xYd28JNNfOv/s= github.com/spf13/cobra v0.0.5 h1:f0B+LkLX6DtmRH1isoNA9VTtNUK9K8xYd28JNNfOv/s=
github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU=
github.com/spf13/cobra v1.0.0 h1:6m/oheQuQ13N9ks4hubMG6BnvwOeaJrqSPLahSnczz8=
github.com/spf13/cobra v1.0.0/go.mod h1:/6GTrnGXV9HjY+aR4k0oJ5tcvakLuG6EuKReYlHNrgE=
github.com/spf13/jwalterweatherman v1.0.0 h1:XHEdyB+EcvlqZamSM4ZOMGlc93t6AcsBEu9Gc1vn7yk= github.com/spf13/jwalterweatherman v1.0.0 h1:XHEdyB+EcvlqZamSM4ZOMGlc93t6AcsBEu9Gc1vn7yk=
github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk= github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk=
github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo= github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo=
github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg= github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg=
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s=
github.com/spf13/viper v1.4.0 h1:yXHLWeravcrgGyFSyCgdYpXQ9dR9c/WED3pg1RhxqEU= github.com/spf13/viper v1.4.0 h1:yXHLWeravcrgGyFSyCgdYpXQ9dR9c/WED3pg1RhxqEU=
github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/yZzE= github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/yZzE=
github.com/steveyen/gtreap v0.0.0-20150807155958-0abe01ef9be2 h1:JNEGSiWg6D3lcBCMCBqN3ELniXujt+0QNHLhNnO0w3s= github.com/steveyen/gtreap v0.1.0 h1:CjhzTa274PyJLJuMZwIzCO1PfC00oRa8d1Kc78bFXJM=
github.com/steveyen/gtreap v0.0.0-20150807155958-0abe01ef9be2/go.mod h1:mjqs7N0Q6m5HpR7QfXVBZXZWSqTjQLeTujjA/xUp2uw= github.com/steveyen/gtreap v0.1.0/go.mod h1:kl/5J7XbrOmlIbYIXdRHDDE5QxHqpk0cmkT7Z4dM9/Y=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w= github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/syndtr/goleveldb v1.0.0 h1:fBdIW9lB4Iz0n9khmH8w27SJ3QEJ7+IgjPEwGSZiFdE= github.com/syndtr/goleveldb v1.0.0 h1:fBdIW9lB4Iz0n9khmH8w27SJ3QEJ7+IgjPEwGSZiFdE=
github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ= github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ=
github.com/tdewolff/minify/v2 v2.5.0 h1:OWPdsMnomzKoL5tzgW3HK3t1zVxsHF6SlGxJjUnoMdw= github.com/tdewolff/minify/v2 v2.5.0 h1:OWPdsMnomzKoL5tzgW3HK3t1zVxsHF6SlGxJjUnoMdw=
@ -221,10 +248,12 @@ github.com/tdewolff/parse/v2 v2.3.7 h1:DXoTUgrUE2Eap0m7zg1ljCO5C78vhEi7HTc4YnJWr
github.com/tdewolff/parse/v2 v2.3.7/go.mod h1:HansaqmN4I/U7L6/tUp0NcwT2tFO0F4EAWYGSDzkYNk= github.com/tdewolff/parse/v2 v2.3.7/go.mod h1:HansaqmN4I/U7L6/tUp0NcwT2tFO0F4EAWYGSDzkYNk=
github.com/tdewolff/test v1.0.0 h1:jOwzqCXr5ePXEPGJaq2ivoR6HOCi+D5TPfpoyg8yvmU= github.com/tdewolff/test v1.0.0 h1:jOwzqCXr5ePXEPGJaq2ivoR6HOCi+D5TPfpoyg8yvmU=
github.com/tdewolff/test v1.0.0/go.mod h1:DiQUlutnqlEvdvhSn2LPGy4TFwRauAaYDsL+683RNX4= github.com/tdewolff/test v1.0.0/go.mod h1:DiQUlutnqlEvdvhSn2LPGy4TFwRauAaYDsL+683RNX4=
github.com/tecbot/gorocksdb v0.0.0-20190519120508-025c3cf4ffb4 h1:ktZy3TUr3YgNRAufBhDmvfLcRdlOU3CNy6p5haZkfkM= github.com/tecbot/gorocksdb v0.0.0-20191217155057-f0fad39f321c h1:g+WoO5jjkqGAzHWCjJB1zZfXPIAaDpzXIEJ0eS6B5Ok=
github.com/tecbot/gorocksdb v0.0.0-20190519120508-025c3cf4ffb4/go.mod h1:ahpPrc7HpcfEWDQRZEmnXMzHY03mLDYMCxeDzy46i+8= github.com/tecbot/gorocksdb v0.0.0-20191217155057-f0fad39f321c/go.mod h1:ahpPrc7HpcfEWDQRZEmnXMzHY03mLDYMCxeDzy46i+8=
github.com/tinylib/msgp v1.1.0 h1:9fQd+ICuRIu/ue4vxJZu6/LzxN0HwMds2nq/0cFvxHU= github.com/tinylib/msgp v1.1.0 h1:9fQd+ICuRIu/ue4vxJZu6/LzxN0HwMds2nq/0cFvxHU=
github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE=
github.com/tinylib/msgp v1.1.2 h1:gWmO7n0Ys2RBEb7GPYB9Ujq8Mk5p2U08lRnmMcGy6BQ=
github.com/tinylib/msgp v1.1.2/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE=
github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc=
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
@ -233,7 +262,10 @@ github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc=
github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
go.etcd.io/bbolt v1.3.2 h1:Z/90sZLPOeCy2PwprqkFa25PdkusRzaj9P8zm/KNyvk=
go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
go.etcd.io/bbolt v1.3.4 h1:hi1bXHMVrlQh6WwxAy+qZCV/SYIlqo+Ushwdpa4tAKg=
go.etcd.io/bbolt v1.3.4/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ=
go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
@ -272,11 +304,14 @@ golang.org/x/sys v0.0.0-20181031143558-9b800f95dbbc/go.mod h1:STP8DvDyc/dI5b8T5h
golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181221143128-b4a75ba826a6/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190124100055-b90733256f2e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190124100055-b90733256f2e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190608050228-5b15430b70e3 h1:xUZPeCzQtkdgRi9RjXIA+3w3RdyDLPqiaJlza5Fqpog= golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190608050228-5b15430b70e3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f h1:gWF768j/LaZugp8dyS4UwsslYCYz9XgFxvlgsn0n9H8=
golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
@ -286,11 +321,18 @@ golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGm
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
google.golang.org/protobuf v1.21.0 h1:qdOKuR/EIArgaWNjetjgTzgVTAZ+S/WXVrq9HW9zimw=
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=

View File

@ -5,7 +5,7 @@ import (
"encoding/binary" "encoding/binary"
"strconv" "strconv"
"github.com/boltdb/bolt" bolt "go.etcd.io/bbolt"
"github.com/khlieng/dispatch/pkg/session" "github.com/khlieng/dispatch/pkg/session"
"github.com/khlieng/dispatch/storage" "github.com/khlieng/dispatch/storage"

20
vendor/github.com/RoaringBitmap/roaring/.drone.yml generated vendored Normal file
View File

@ -0,0 +1,20 @@
kind: pipeline
name: default
workspace:
base: /go
path: src/github.com/RoaringBitmap/roaring
steps:
- name: test
image: golang
commands:
- go get -t
- go test
- go test -race -run TestConcurrent*
- go build -tags appengine
- go test -tags appengine
- GOARCH=386 go build
- GOARCH=386 go test
- GOARCH=arm go build
- GOARCH=arm64 go build

View File

@ -14,6 +14,7 @@ go:
- "1.10.x" - "1.10.x"
- "1.11.x" - "1.11.x"
- "1.12.x" - "1.12.x"
- "1.13.x"
- tip - tip
# whitelist # whitelist
@ -23,10 +24,14 @@ branches:
script: script:
- goveralls -v -service travis-ci -ignore arraycontainer_gen.go,bitmapcontainer_gen.go,rle16_gen.go,rle_gen.go,roaringarray_gen.go,rle.go || go test - goveralls -v -service travis-ci -ignore arraycontainer_gen.go,bitmapcontainer_gen.go,rle16_gen.go,rle_gen.go,roaringarray_gen.go,rle.go || go test
- go test -race -run TestConcurrent* - go test -race -run TestConcurrent*
- go build -tags appengine
- go test -tags appengine
- GOARCH=arm64 go build - GOARCH=arm64 go build
- GOARCH=386 go build - GOARCH=386 go build
- GOARCH=386 go test - GOARCH=386 go test
- GOARCH=arm go build - GOARCH=arm go build
- GOARCH=arm64 go build
matrix: matrix:
allow_failures: allow_failures:
- go: tip - go: tip

View File

@ -12,4 +12,5 @@ Vali Malinoiu (@0x4139),
Forud Ghafouri (@fzerorubigd), Forud Ghafouri (@fzerorubigd),
Joe Nall (@joenall), Joe Nall (@joenall),
(@fredim), (@fredim),
Edd Robinson (@e-dard) Edd Robinson (@e-dard),
Alexander Petrov (@alldroll)

View File

@ -63,7 +63,7 @@ qa: fmtcheck test vet lint
# Get the dependencies # Get the dependencies
deps: deps:
GOPATH=$(GOPATH) go get github.com/smartystreets/goconvey/convey GOPATH=$(GOPATH) go get github.com/stretchr/testify
GOPATH=$(GOPATH) go get github.com/willf/bitset GOPATH=$(GOPATH) go get github.com/willf/bitset
GOPATH=$(GOPATH) go get github.com/golang/lint/golint GOPATH=$(GOPATH) go get github.com/golang/lint/golint
GOPATH=$(GOPATH) go get github.com/mschoch/smat GOPATH=$(GOPATH) go get github.com/mschoch/smat

View File

@ -1,4 +1,5 @@
roaring [![Build Status](https://travis-ci.org/RoaringBitmap/roaring.png)](https://travis-ci.org/RoaringBitmap/roaring) [![Coverage Status](https://coveralls.io/repos/github/RoaringBitmap/roaring/badge.svg?branch=master)](https://coveralls.io/github/RoaringBitmap/roaring?branch=master) [![GoDoc](https://godoc.org/github.com/RoaringBitmap/roaring?status.svg)](https://godoc.org/github.com/RoaringBitmap/roaring) [![Go Report Card](https://goreportcard.com/badge/RoaringBitmap/roaring)](https://goreportcard.com/report/github.com/RoaringBitmap/roaring) roaring [![Build Status](https://travis-ci.org/RoaringBitmap/roaring.png)](https://travis-ci.org/RoaringBitmap/roaring) [![Coverage Status](https://coveralls.io/repos/github/RoaringBitmap/roaring/badge.svg?branch=master)](https://coveralls.io/github/RoaringBitmap/roaring?branch=master) [![GoDoc](https://godoc.org/github.com/RoaringBitmap/roaring?status.svg)](https://godoc.org/github.com/RoaringBitmap/roaring) [![Go Report Card](https://goreportcard.com/badge/RoaringBitmap/roaring)](https://goreportcard.com/report/github.com/RoaringBitmap/roaring)
[![Build Status](https://cloud.drone.io/api/badges/RoaringBitmap/roaring/status.svg)](https://cloud.drone.io/RoaringBitmap/roaring)
============= =============
This is a go version of the Roaring bitmap data structure. This is a go version of the Roaring bitmap data structure.
@ -6,12 +7,12 @@ This is a go version of the Roaring bitmap data structure.
Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and
[Elasticsearch][elasticsearch], [Metamarkets' Druid][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. [Elasticsearch][elasticsearch], [Apache Druid (Incubating)][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin].
[lucene]: https://lucene.apache.org/ [lucene]: https://lucene.apache.org/
[solr]: https://lucene.apache.org/solr/ [solr]: https://lucene.apache.org/solr/
[elasticsearch]: https://www.elastic.co/products/elasticsearch [elasticsearch]: https://www.elastic.co/products/elasticsearch
[druid]: http://druid.io/ [druid]: https://druid.apache.org/
[spark]: https://spark.apache.org/ [spark]: https://spark.apache.org/
[opensearchserver]: http://www.opensearchserver.com [opensearchserver]: http://www.opensearchserver.com
[cloudtorrent]: https://github.com/jpillora/cloud-torrent [cloudtorrent]: https://github.com/jpillora/cloud-torrent
@ -28,11 +29,17 @@ Roaring bitmaps are found to work well in many important applications:
The ``roaring`` Go library is used by The ``roaring`` Go library is used by
* [Cloud Torrent](https://github.com/jpillora/cloud-torrent): a self-hosted remote torrent client * [Cloud Torrent](https://github.com/jpillora/cloud-torrent)
* [runv](https://github.com/hyperhq/runv): an Hypervisor-based runtime for the Open Containers Initiative * [runv](https://github.com/hyperhq/runv)
* [InfluxDB](https://www.influxdata.com) * [InfluxDB](https://www.influxdata.com)
* [Pilosa](https://www.pilosa.com/) * [Pilosa](https://www.pilosa.com/)
* [Bleve](http://www.blevesearch.com) * [Bleve](http://www.blevesearch.com)
* [lindb](https://github.com/lindb/lindb)
* [Elasticell](https://github.com/deepfabric/elasticell)
* [SourceGraph](https://github.com/sourcegraph/sourcegraph)
* [M3](https://github.com/m3db/m3)
* [trident](https://github.com/NetApp/trident)
This library is used in production in several systems, it is part of the [Awesome Go collection](https://awesome-go.com). This library is used in production in several systems, it is part of the [Awesome Go collection](https://awesome-go.com).
@ -61,7 +68,6 @@ http://arxiv.org/abs/1402.6407 This paper used data from http://lemire.me/data/r
Dependencies are fetched automatically by giving the `-t` flag to `go get`. Dependencies are fetched automatically by giving the `-t` flag to `go get`.
they include they include
- github.com/smartystreets/goconvey/convey
- github.com/willf/bitset - github.com/willf/bitset
- github.com/mschoch/smat - github.com/mschoch/smat
- github.com/glycerine/go-unsnap-stream - github.com/glycerine/go-unsnap-stream
@ -207,7 +213,7 @@ You can use roaring with gore:
- go get -u github.com/motemen/gore - go get -u github.com/motemen/gore
- Make sure that ``$GOPATH/bin`` is in your ``$PATH``. - Make sure that ``$GOPATH/bin`` is in your ``$PATH``.
- go get github/RoaringBitmap/roaring - go get github.com/RoaringBitmap/roaring
```go ```go
$ gore $ gore

View File

@ -24,7 +24,19 @@ func (ac *arrayContainer) fillLeastSignificant16bits(x []uint32, i int, mask uin
} }
} }
func (ac *arrayContainer) getShortIterator() shortIterable { func (ac *arrayContainer) iterate(cb func(x uint16) bool) bool {
iterator := shortIterator{ac.content, 0}
for iterator.hasNext() {
if !cb(iterator.next()) {
return false
}
}
return true
}
func (ac *arrayContainer) getShortIterator() shortPeekable {
return &shortIterator{ac.content, 0} return &shortIterator{ac.content, 0}
} }
@ -33,7 +45,7 @@ func (ac *arrayContainer) getReverseIterator() shortIterable {
} }
func (ac *arrayContainer) getManyIterator() manyIterable { func (ac *arrayContainer) getManyIterator() manyIterable {
return &manyIterator{ac.content, 0} return &shortIterator{ac.content, 0}
} }
func (ac *arrayContainer) minimum() uint16 { func (ac *arrayContainer) minimum() uint16 {

View File

@ -96,6 +96,18 @@ func (bc *bitmapContainer) maximum() uint16 {
return uint16(0) return uint16(0)
} }
func (bc *bitmapContainer) iterate(cb func(x uint16) bool) bool {
iterator := bitmapContainerShortIterator{bc, bc.NextSetBit(0)}
for iterator.hasNext() {
if !cb(iterator.next()) {
return false
}
}
return true
}
type bitmapContainerShortIterator struct { type bitmapContainerShortIterator struct {
ptr *bitmapContainer ptr *bitmapContainer
i int i int
@ -110,11 +122,21 @@ func (bcsi *bitmapContainerShortIterator) hasNext() bool {
return bcsi.i >= 0 return bcsi.i >= 0
} }
func (bcsi *bitmapContainerShortIterator) peekNext() uint16 {
return uint16(bcsi.i)
}
func (bcsi *bitmapContainerShortIterator) advanceIfNeeded(minval uint16) {
if bcsi.hasNext() && bcsi.peekNext() < minval {
bcsi.i = bcsi.ptr.NextSetBit(int(minval))
}
}
func newBitmapContainerShortIterator(a *bitmapContainer) *bitmapContainerShortIterator { func newBitmapContainerShortIterator(a *bitmapContainer) *bitmapContainerShortIterator {
return &bitmapContainerShortIterator{a, a.NextSetBit(0)} return &bitmapContainerShortIterator{a, a.NextSetBit(0)}
} }
func (bc *bitmapContainer) getShortIterator() shortIterable { func (bc *bitmapContainer) getShortIterator() shortPeekable {
return newBitmapContainerShortIterator(bc) return newBitmapContainerShortIterator(bc)
} }

161
vendor/github.com/RoaringBitmap/roaring/byte_input.go generated vendored Normal file
View File

@ -0,0 +1,161 @@
package roaring
import (
"encoding/binary"
"io"
)
type byteInput interface {
// next returns a slice containing the next n bytes from the buffer,
// advancing the buffer as if the bytes had been returned by Read.
next(n int) ([]byte, error)
// readUInt32 reads uint32 with LittleEndian order
readUInt32() (uint32, error)
// readUInt16 reads uint16 with LittleEndian order
readUInt16() (uint16, error)
// getReadBytes returns read bytes
getReadBytes() int64
// skipBytes skips exactly n bytes
skipBytes(n int) error
}
func newByteInputFromReader(reader io.Reader) byteInput {
return &byteInputAdapter{
r: reader,
readBytes: 0,
}
}
func newByteInput(buf []byte) byteInput {
return &byteBuffer{
buf: buf,
off: 0,
}
}
type byteBuffer struct {
buf []byte
off int
}
// next returns a slice containing the next n bytes from the reader
// If there are fewer bytes than the given n, io.ErrUnexpectedEOF will be returned
func (b *byteBuffer) next(n int) ([]byte, error) {
m := len(b.buf) - b.off
if n > m {
return nil, io.ErrUnexpectedEOF
}
data := b.buf[b.off : b.off+n]
b.off += n
return data, nil
}
// readUInt32 reads uint32 with LittleEndian order
func (b *byteBuffer) readUInt32() (uint32, error) {
if len(b.buf)-b.off < 4 {
return 0, io.ErrUnexpectedEOF
}
v := binary.LittleEndian.Uint32(b.buf[b.off:])
b.off += 4
return v, nil
}
// readUInt16 reads uint16 with LittleEndian order
func (b *byteBuffer) readUInt16() (uint16, error) {
if len(b.buf)-b.off < 2 {
return 0, io.ErrUnexpectedEOF
}
v := binary.LittleEndian.Uint16(b.buf[b.off:])
b.off += 2
return v, nil
}
// getReadBytes returns read bytes
func (b *byteBuffer) getReadBytes() int64 {
return int64(b.off)
}
// skipBytes skips exactly n bytes
func (b *byteBuffer) skipBytes(n int) error {
m := len(b.buf) - b.off
if n > m {
return io.ErrUnexpectedEOF
}
b.off += n
return nil
}
// reset resets the given buffer with a new byte slice
func (b *byteBuffer) reset(buf []byte) {
b.buf = buf
b.off = 0
}
type byteInputAdapter struct {
r io.Reader
readBytes int
}
// next returns a slice containing the next n bytes from the buffer,
// advancing the buffer as if the bytes had been returned by Read.
func (b *byteInputAdapter) next(n int) ([]byte, error) {
buf := make([]byte, n)
m, err := io.ReadAtLeast(b.r, buf, n)
b.readBytes += m
if err != nil {
return nil, err
}
return buf, nil
}
// readUInt32 reads uint32 with LittleEndian order
func (b *byteInputAdapter) readUInt32() (uint32, error) {
buf, err := b.next(4)
if err != nil {
return 0, err
}
return binary.LittleEndian.Uint32(buf), nil
}
// readUInt16 reads uint16 with LittleEndian order
func (b *byteInputAdapter) readUInt16() (uint16, error) {
buf, err := b.next(2)
if err != nil {
return 0, err
}
return binary.LittleEndian.Uint16(buf), nil
}
// getReadBytes returns read bytes
func (b *byteInputAdapter) getReadBytes() int64 {
return int64(b.readBytes)
}
// skipBytes skips exactly n bytes
func (b *byteInputAdapter) skipBytes(n int) error {
_, err := b.next(n)
return err
}
// reset resets the given buffer with a new stream
func (b *byteInputAdapter) reset(stream io.Reader) {
b.r = stream
b.readBytes = 0
}

View File

@ -4,12 +4,13 @@ go 1.12
require ( require (
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2 github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2
github.com/glycerine/goconvey v0.0.0-20180728074245-46e3a41ad493 // indirect github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 // indirect
github.com/golang/snappy v0.0.1 // indirect github.com/golang/snappy v0.0.1 // indirect
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 // indirect
github.com/jtolds/gls v4.20.0+incompatible // indirect
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae
github.com/philhofer/fwd v1.0.0 // indirect github.com/philhofer/fwd v1.0.0 // indirect
github.com/smartystreets/goconvey v0.0.0-20190306220146-200a235640ff github.com/stretchr/testify v1.4.0
github.com/stretchr/testify v1.3.0
github.com/tinylib/msgp v1.1.0 github.com/tinylib/msgp v1.1.0
github.com/willf/bitset v1.1.10 github.com/willf/bitset v1.1.10
) )

View File

@ -2,12 +2,12 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2 h1:Ujru1hufTHVb++eG6OuNDKMxZnGIvF6o/u8q/8h2+I4= github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2 h1:Ujru1hufTHVb++eG6OuNDKMxZnGIvF6o/u8q/8h2+I4=
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE= github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
github.com/glycerine/goconvey v0.0.0-20180728074245-46e3a41ad493 h1:OTanQnFt0bi5iLFSdbEVA/idR6Q2WhCm+deb7ir2CcM= github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8=
github.com/glycerine/goconvey v0.0.0-20180728074245-46e3a41ad493/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24= github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24=
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8= github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 h1:twflg0XRTjwKpxb/jFExr4HGq6on2dEOmnL6FV+fgPw=
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae h1:VeRdUYdCw49yizlSbMEn2SZ+gT+3IUKx8BqxyQdz+BY= github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae h1:VeRdUYdCw49yizlSbMEn2SZ+gT+3IUKx8BqxyQdz+BY=
@ -16,14 +16,15 @@ github.com/philhofer/fwd v1.0.0 h1:UbZqGr5Y38ApvM/V/jEljVxwocdweyH+vmYvRPBnbqQ=
github.com/philhofer/fwd v1.0.0/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU= github.com/philhofer/fwd v1.0.0/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d h1:zE9ykElWQ6/NYmHa3jpm/yHnI4xSofP+UP6SpjHcSeM= github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v0.0.0-20190306220146-200a235640ff h1:86HlEv0yBCry9syNuylzqznKXDK11p6D0DT596yNMys=
github.com/smartystreets/goconvey v0.0.0-20190306220146-200a235640ff/go.mod h1:KSQcGKpxUMHk3nbYzs/tIBAM2iDooCn0BmttHOJEbLs=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/tinylib/msgp v1.1.0 h1:9fQd+ICuRIu/ue4vxJZu6/LzxN0HwMds2nq/0cFvxHU= github.com/tinylib/msgp v1.1.0 h1:9fQd+ICuRIu/ue4vxJZu6/LzxN0HwMds2nq/0cFvxHU=
github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE=
github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc= github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc=
github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

View File

@ -4,12 +4,7 @@ type manyIterable interface {
nextMany(hs uint32, buf []uint32) int nextMany(hs uint32, buf []uint32) int
} }
type manyIterator struct { func (si *shortIterator) nextMany(hs uint32, buf []uint32) int {
slice []uint16
loc int
}
func (si *manyIterator) nextMany(hs uint32, buf []uint32) int {
n := 0 n := 0
l := si.loc l := si.loc
s := si.slice s := si.slice

View File

@ -6,12 +6,12 @@
package roaring package roaring
import ( import (
"bufio"
"bytes" "bytes"
"encoding/base64" "encoding/base64"
"fmt" "fmt"
"io" "io"
"strconv" "strconv"
"sync"
) )
// Bitmap represents a compressed bitmap where you can add integers. // Bitmap represents a compressed bitmap where you can add integers.
@ -67,8 +67,14 @@ func (rb *Bitmap) WriteToMsgpack(stream io.Writer) (int64, error) {
// The format is compatible with other RoaringBitmap // The format is compatible with other RoaringBitmap
// implementations (Java, C) and is documented here: // implementations (Java, C) and is documented here:
// https://github.com/RoaringBitmap/RoaringFormatSpec // https://github.com/RoaringBitmap/RoaringFormatSpec
func (rb *Bitmap) ReadFrom(stream io.Reader) (int64, error) { func (rb *Bitmap) ReadFrom(reader io.Reader) (p int64, err error) {
return rb.highlowcontainer.readFrom(stream) stream := byteInputAdapterPool.Get().(*byteInputAdapter)
stream.reset(reader)
p, err = rb.highlowcontainer.readFrom(stream)
byteInputAdapterPool.Put(stream)
return
} }
// FromBuffer creates a bitmap from its serialized version stored in buffer // FromBuffer creates a bitmap from its serialized version stored in buffer
@ -87,10 +93,36 @@ func (rb *Bitmap) ReadFrom(stream io.Reader) (int64, error) {
// You should *not* change the copy-on-write status of the resulting // You should *not* change the copy-on-write status of the resulting
// bitmaps (SetCopyOnWrite). // bitmaps (SetCopyOnWrite).
// //
func (rb *Bitmap) FromBuffer(buf []byte) (int64, error) { // If buf becomes unavailable, then a bitmap created with
return rb.highlowcontainer.fromBuffer(buf) // FromBuffer would be effectively broken. Furthermore, any
// bitmap derived from this bitmap (e.g., via Or, And) might
// also be broken. Thus, before making buf unavailable, you should
// call CloneCopyOnWriteContainers on all such bitmaps.
//
func (rb *Bitmap) FromBuffer(buf []byte) (p int64, err error) {
stream := byteBufferPool.Get().(*byteBuffer)
stream.reset(buf)
p, err = rb.highlowcontainer.readFrom(stream)
byteBufferPool.Put(stream)
return
} }
var (
byteBufferPool = sync.Pool{
New: func() interface{} {
return &byteBuffer{}
},
}
byteInputAdapterPool = sync.Pool{
New: func() interface{} {
return &byteInputAdapter{}
},
}
)
// RunOptimize attempts to further compress the runs of consecutive values found in the bitmap // RunOptimize attempts to further compress the runs of consecutive values found in the bitmap
func (rb *Bitmap) RunOptimize() { func (rb *Bitmap) RunOptimize() {
rb.highlowcontainer.runOptimize() rb.highlowcontainer.runOptimize()
@ -110,29 +142,15 @@ func (rb *Bitmap) ReadFromMsgpack(stream io.Reader) (int64, error) {
} }
// MarshalBinary implements the encoding.BinaryMarshaler interface for the bitmap // MarshalBinary implements the encoding.BinaryMarshaler interface for the bitmap
// (same as ToBytes)
func (rb *Bitmap) MarshalBinary() ([]byte, error) { func (rb *Bitmap) MarshalBinary() ([]byte, error) {
var buf bytes.Buffer return rb.ToBytes()
writer := bufio.NewWriter(&buf)
_, err := rb.WriteTo(writer)
if err != nil {
return nil, err
}
err = writer.Flush()
if err != nil {
return nil, err
}
return buf.Bytes(), nil
} }
// UnmarshalBinary implements the encoding.BinaryUnmarshaler interface for the bitmap // UnmarshalBinary implements the encoding.BinaryUnmarshaler interface for the bitmap
func (rb *Bitmap) UnmarshalBinary(data []byte) error { func (rb *Bitmap) UnmarshalBinary(data []byte) error {
var buf bytes.Buffer r := bytes.NewReader(data)
_, err := buf.Write(data) _, err := rb.ReadFrom(r)
if err != nil {
return err
}
reader := bufio.NewReader(&buf)
_, err = rb.ReadFrom(reader)
return err return err
} }
@ -215,10 +233,20 @@ type IntIterable interface {
Next() uint32 Next() uint32
} }
// IntPeekable allows you to look at the next value without advancing and
// advance as long as the next value is smaller than minval
type IntPeekable interface {
IntIterable
// PeekNext peeks the next value without advancing the iterator
PeekNext() uint32
// AdvanceIfNeeded advances as long as the next value is smaller than minval
AdvanceIfNeeded(minval uint32)
}
type intIterator struct { type intIterator struct {
pos int pos int
hs uint32 hs uint32
iter shortIterable iter shortPeekable
highlowcontainer *roaringArray highlowcontainer *roaringArray
} }
@ -244,6 +272,30 @@ func (ii *intIterator) Next() uint32 {
return x return x
} }
// PeekNext peeks the next value without advancing the iterator
func (ii *intIterator) PeekNext() uint32 {
return uint32(ii.iter.peekNext()&maxLowBit) | ii.hs
}
// AdvanceIfNeeded advances as long as the next value is smaller than minval
func (ii *intIterator) AdvanceIfNeeded(minval uint32) {
to := minval >> 16
for ii.HasNext() && (ii.hs>>16) < to {
ii.pos++
ii.init()
}
if ii.HasNext() && (ii.hs>>16) == to {
ii.iter.advanceIfNeeded(lowbits(minval))
if !ii.iter.hasNext() {
ii.pos++
ii.init()
}
}
}
func newIntIterator(a *Bitmap) *intIterator { func newIntIterator(a *Bitmap) *intIterator {
p := new(intIterator) p := new(intIterator)
p.pos = 0 p.pos = 0
@ -364,9 +416,41 @@ func (rb *Bitmap) String() string {
return buffer.String() return buffer.String()
} }
// Iterator creates a new IntIterable to iterate over the integers contained in the bitmap, in sorted order; // Iterate iterates over the bitmap, calling the given callback with each value in the bitmap. If the callback returns
// false, the iteration is halted.
// The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove).
// There is no guarantee as to what order the values will be iterated
func (rb *Bitmap) Iterate(cb func(x uint32) bool) {
for i := 0; i < rb.highlowcontainer.size(); i++ {
hs := uint32(rb.highlowcontainer.getKeyAtIndex(i)) << 16
c := rb.highlowcontainer.getContainerAtIndex(i)
var shouldContinue bool
// This is hacky but it avoids allocations from invoking an interface method with a closure
switch t := c.(type) {
case *arrayContainer:
shouldContinue = t.iterate(func(x uint16) bool {
return cb(uint32(x) | hs)
})
case *runContainer16:
shouldContinue = t.iterate(func(x uint16) bool {
return cb(uint32(x) | hs)
})
case *bitmapContainer:
shouldContinue = t.iterate(func(x uint16) bool {
return cb(uint32(x) | hs)
})
}
if !shouldContinue {
break
}
}
}
// Iterator creates a new IntPeekable to iterate over the integers contained in the bitmap, in sorted order;
// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
func (rb *Bitmap) Iterator() IntIterable { func (rb *Bitmap) Iterator() IntPeekable {
return newIntIterator(rb) return newIntIterator(rb)
} }
@ -423,41 +507,72 @@ func (rb *Bitmap) Equals(o interface{}) bool {
// AddOffset adds the value 'offset' to each and every value in a bitmap, generating a new bitmap in the process // AddOffset adds the value 'offset' to each and every value in a bitmap, generating a new bitmap in the process
func AddOffset(x *Bitmap, offset uint32) (answer *Bitmap) { func AddOffset(x *Bitmap, offset uint32) (answer *Bitmap) {
containerOffset := highbits(offset) return AddOffset64(x, int64(offset))
inOffset := lowbits(offset) }
// AddOffset64 adds the value 'offset' to each and every value in a bitmap, generating a new bitmap in the process
// If offset + element is outside of the range [0,2^32), that the element will be dropped
func AddOffset64(x *Bitmap, offset int64) (answer *Bitmap) {
// we need "offset" to be a long because we want to support values
// between -0xFFFFFFFF up to +-0xFFFFFFFF
var containerOffset64 int64
if offset < 0 {
containerOffset64 = (offset - (1 << 16) + 1) / (1 << 16)
} else {
containerOffset64 = offset >> 16
}
if containerOffset64 >= (1<<16) || containerOffset64 <= -(1<<16) {
return New()
}
containerOffset := int32(containerOffset64)
inOffset := (uint16)(offset - containerOffset64*(1<<16))
if inOffset == 0 { if inOffset == 0 {
answer = x.Clone() answer = x.Clone()
for pos := 0; pos < answer.highlowcontainer.size(); pos++ { for pos := 0; pos < answer.highlowcontainer.size(); pos++ {
key := answer.highlowcontainer.getKeyAtIndex(pos) key := int32(answer.highlowcontainer.getKeyAtIndex(pos))
key += containerOffset key += containerOffset
answer.highlowcontainer.keys[pos] = key
if key >= 0 && key <= MaxUint16 {
answer.highlowcontainer.keys[pos] = uint16(key)
}
} }
} else { } else {
answer = New() answer = New()
for pos := 0; pos < x.highlowcontainer.size(); pos++ { for pos := 0; pos < x.highlowcontainer.size(); pos++ {
key := x.highlowcontainer.getKeyAtIndex(pos) key := int32(x.highlowcontainer.getKeyAtIndex(pos))
key += containerOffset key += containerOffset
c := x.highlowcontainer.getContainerAtIndex(pos) c := x.highlowcontainer.getContainerAtIndex(pos)
offsetted := c.addOffset(inOffset) offsetted := c.addOffset(inOffset)
if offsetted[0].getCardinality() > 0 {
if offsetted[0].getCardinality() > 0 && (key >= 0 && key <= MaxUint16) {
curSize := answer.highlowcontainer.size() curSize := answer.highlowcontainer.size()
lastkey := uint16(0) lastkey := int32(0)
if curSize > 0 { if curSize > 0 {
lastkey = answer.highlowcontainer.getKeyAtIndex(curSize - 1) lastkey = int32(answer.highlowcontainer.getKeyAtIndex(curSize - 1))
} }
if curSize > 0 && lastkey == key { if curSize > 0 && lastkey == key {
prev := answer.highlowcontainer.getContainerAtIndex(curSize - 1) prev := answer.highlowcontainer.getContainerAtIndex(curSize - 1)
orrseult := prev.ior(offsetted[0]) orrseult := prev.ior(offsetted[0])
answer.highlowcontainer.setContainerAtIndex(curSize-1, orrseult) answer.highlowcontainer.setContainerAtIndex(curSize-1, orrseult)
} else { } else {
answer.highlowcontainer.appendContainer(key, offsetted[0], false) answer.highlowcontainer.appendContainer(uint16(key), offsetted[0], false)
} }
} }
if offsetted[1].getCardinality() > 0 {
answer.highlowcontainer.appendContainer(key+1, offsetted[1], false) if offsetted[1].getCardinality() > 0 && ((key+1) >= 0 && (key+1) <= MaxUint16) {
answer.highlowcontainer.appendContainer(uint16(key+1), offsetted[1], false)
} }
} }
} }
return answer return answer
} }
@ -1378,6 +1493,21 @@ func (rb *Bitmap) GetCopyOnWrite() (val bool) {
return rb.highlowcontainer.copyOnWrite return rb.highlowcontainer.copyOnWrite
} }
// CloneCopyOnWriteContainers clones all containers which have
// needCopyOnWrite set to true.
// This can be used to make sure it is safe to munmap a []byte
// that the roaring array may still have a reference to, after
// calling FromBuffer.
// More generally this function is useful if you call FromBuffer
// to construct a bitmap with a backing array buf
// and then later discard the buf array. Note that you should call
// CloneCopyOnWriteContainers on all bitmaps that were derived
// from the 'FromBuffer' bitmap since they map have dependencies
// on the buf array as well.
func (rb *Bitmap) CloneCopyOnWriteContainers() {
rb.highlowcontainer.cloneCopyOnWriteContainers()
}
// FlipInt calls Flip after casting the parameters (convenience method) // FlipInt calls Flip after casting the parameters (convenience method)
func FlipInt(bm *Bitmap, rangeStart, rangeEnd int) *Bitmap { func FlipInt(bm *Bitmap, rangeStart, rangeEnd int) *Bitmap {
return Flip(bm, uint64(rangeStart), uint64(rangeEnd)) return Flip(bm, uint64(rangeStart), uint64(rangeEnd))

View File

@ -5,7 +5,6 @@ import (
"encoding/binary" "encoding/binary"
"fmt" "fmt"
"io" "io"
"io/ioutil"
snappy "github.com/glycerine/go-unsnap-stream" snappy "github.com/glycerine/go-unsnap-stream"
"github.com/tinylib/msgp/msgp" "github.com/tinylib/msgp/msgp"
@ -39,7 +38,8 @@ type container interface {
not(start, final int) container // range is [firstOfRange,lastOfRange) not(start, final int) container // range is [firstOfRange,lastOfRange)
inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx) inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx)
xor(r container) container xor(r container) container
getShortIterator() shortIterable getShortIterator() shortPeekable
iterate(cb func(x uint16) bool) bool
getReverseIterator() shortIterable getReverseIterator() shortIterable
getManyIterator() manyIterable getManyIterator() manyIterable
contains(i uint16) bool contains(i uint16) bool
@ -64,7 +64,6 @@ type container interface {
iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange) iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange)
selectInt(x uint16) int // selectInt returns the xth integer in the container selectInt(x uint16) int // selectInt returns the xth integer in the container
serializedSizeInBytes() int serializedSizeInBytes() int
readFrom(io.Reader) (int, error)
writeTo(io.Writer) (int, error) writeTo(io.Writer) (int, error)
numberOfRuns() int numberOfRuns() int
@ -283,6 +282,18 @@ func (ra *roaringArray) clone() *roaringArray {
return &sa return &sa
} }
// clone all containers which have needCopyOnWrite set to true
// This can be used to make sure it is safe to munmap a []byte
// that the roaring array may still have a reference to.
func (ra *roaringArray) cloneCopyOnWriteContainers() {
for i, needCopyOnWrite := range ra.needCopyOnWrite {
if needCopyOnWrite {
ra.containers[i] = ra.containers[i].clone()
ra.needCopyOnWrite[i] = false
}
}
}
// unused function: // unused function:
//func (ra *roaringArray) containsKey(x uint16) bool { //func (ra *roaringArray) containsKey(x uint16) bool {
// return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0) // return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0)
@ -479,20 +490,15 @@ func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) {
nw += 2 nw += 2
binary.LittleEndian.PutUint16(buf[2:], uint16(len(ra.keys)-1)) binary.LittleEndian.PutUint16(buf[2:], uint16(len(ra.keys)-1))
nw += 2 nw += 2
// compute isRun bitmap without temporary allocation
// compute isRun bitmap var runbitmapslice = buf[nw:nw+isRunSizeInBytes]
var ir []byte
isRun := newBitmapContainer()
for i, c := range ra.containers { for i, c := range ra.containers {
switch c.(type) { switch c.(type) {
case *runContainer16: case *runContainer16:
isRun.iadd(uint16(i)) runbitmapslice[i / 8] |= 1<<(uint(i)%8)
} }
} }
// convert to little endian nw += isRunSizeInBytes
ir = isRun.asLittleEndianByteSlice()[:isRunSizeInBytes]
nw += copy(buf[nw:], ir)
} else { } else {
binary.LittleEndian.PutUint32(buf[0:], uint32(serialCookieNoRunContainer)) binary.LittleEndian.PutUint32(buf[0:], uint32(serialCookieNoRunContainer))
nw += 4 nw += 4
@ -549,50 +555,52 @@ func (ra *roaringArray) toBytes() ([]byte, error) {
return buf.Bytes(), err return buf.Bytes(), err
} }
func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) { func (ra *roaringArray) readFrom(stream byteInput) (int64, error) {
pos := 0 cookie, err := stream.readUInt32()
if len(buf) < 8 {
return 0, fmt.Errorf("buffer too small, expecting at least 8 bytes, was %d", len(buf)) if err != nil {
return stream.getReadBytes(), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err)
} }
cookie := binary.LittleEndian.Uint32(buf) var size uint32
pos += 4
var size uint32 // number of containers
haveRunContainers := false
var isRunBitmap []byte var isRunBitmap []byte
// cookie header
if cookie&0x0000FFFF == serialCookie { if cookie&0x0000FFFF == serialCookie {
haveRunContainers = true size = uint32(uint16(cookie>>16) + 1)
size = uint32(uint16(cookie>>16) + 1) // number of containers
// create is-run-container bitmap // create is-run-container bitmap
isRunBitmapSize := (int(size) + 7) / 8 isRunBitmapSize := (int(size) + 7) / 8
if pos+isRunBitmapSize > len(buf) { isRunBitmap, err = stream.next(isRunBitmapSize)
return 0, fmt.Errorf("malformed bitmap, is-run bitmap overruns buffer at %d", pos+isRunBitmapSize)
}
isRunBitmap = buf[pos : pos+isRunBitmapSize] if err != nil {
pos += isRunBitmapSize return stream.getReadBytes(), fmt.Errorf("malformed bitmap, failed to read is-run bitmap, got: %s", err)
}
} else if cookie == serialCookieNoRunContainer { } else if cookie == serialCookieNoRunContainer {
size = binary.LittleEndian.Uint32(buf[pos:]) size, err = stream.readUInt32()
pos += 4
} else {
return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
}
if size > (1 << 16) {
return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.")
}
// descriptive header
// keycard - is {key, cardinality} tuple slice
if pos+2*2*int(size) > len(buf) {
return 0, fmt.Errorf("malfomred bitmap, key-cardinality slice overruns buffer at %d", pos+2*2*int(size))
}
keycard := byteSliceAsUint16Slice(buf[pos : pos+2*2*int(size)])
pos += 2 * 2 * int(size)
if !haveRunContainers || size >= noOffsetThreshold { if err != nil {
pos += 4 * int(size) return stream.getReadBytes(), fmt.Errorf("malformed bitmap, failed to read a bitmap size: %s", err)
}
} else {
return stream.getReadBytes(), fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
}
if size > (1 << 16) {
return stream.getReadBytes(), fmt.Errorf("it is logically impossible to have more than (1<<16) containers")
}
// descriptive header
buf, err := stream.next(2 * 2 * int(size))
if err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to read descriptive header: %s", err)
}
keycard := byteSliceAsUint16Slice(buf)
if isRunBitmap == nil || size >= noOffsetThreshold {
if err := stream.skipBytes(int(size) * 4); err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to skip bytes: %s", err)
}
} }
// Allocate slices upfront as number of containers is known // Allocate slices upfront as number of containers is known
@ -601,11 +609,13 @@ func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) {
} else { } else {
ra.containers = make([]container, size) ra.containers = make([]container, size)
} }
if cap(ra.keys) >= int(size) { if cap(ra.keys) >= int(size) {
ra.keys = ra.keys[:size] ra.keys = ra.keys[:size]
} else { } else {
ra.keys = make([]uint16, size) ra.keys = make([]uint16, size)
} }
if cap(ra.needCopyOnWrite) >= int(size) { if cap(ra.needCopyOnWrite) >= int(size) {
ra.needCopyOnWrite = ra.needCopyOnWrite[:size] ra.needCopyOnWrite = ra.needCopyOnWrite[:size]
} else { } else {
@ -613,129 +623,62 @@ func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) {
} }
for i := uint32(0); i < size; i++ { for i := uint32(0); i < size; i++ {
key := uint16(keycard[2*i]) key := keycard[2*i]
card := int(keycard[2*i+1]) + 1 card := int(keycard[2*i+1]) + 1
ra.keys[i] = key ra.keys[i] = key
ra.needCopyOnWrite[i] = true ra.needCopyOnWrite[i] = true
if haveRunContainers && isRunBitmap[i/8]&(1<<(i%8)) != 0 { if isRunBitmap != nil && isRunBitmap[i/8]&(1<<(i%8)) != 0 {
// run container // run container
nr := binary.LittleEndian.Uint16(buf[pos:]) nr, err := stream.readUInt16()
pos += 2
if pos+int(nr)*4 > len(buf) { if err != nil {
return 0, fmt.Errorf("malformed bitmap, a run container overruns buffer at %d:%d", pos, pos+int(nr)*4) return 0, fmt.Errorf("failed to read runtime container size: %s", err)
} }
buf, err := stream.next(int(nr) * 4)
if err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to read runtime container content: %s", err)
}
nb := runContainer16{ nb := runContainer16{
iv: byteSliceAsInterval16Slice(buf[pos : pos+int(nr)*4]), iv: byteSliceAsInterval16Slice(buf),
card: int64(card), card: int64(card),
} }
pos += int(nr) * 4
ra.containers[i] = &nb ra.containers[i] = &nb
} else if card > arrayDefaultMaxSize { } else if card > arrayDefaultMaxSize {
// bitmap container // bitmap container
buf, err := stream.next(arrayDefaultMaxSize * 2)
if err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to read bitmap container: %s", err)
}
nb := bitmapContainer{ nb := bitmapContainer{
cardinality: card, cardinality: card,
bitmap: byteSliceAsUint64Slice(buf[pos : pos+arrayDefaultMaxSize*2]), bitmap: byteSliceAsUint64Slice(buf),
} }
pos += arrayDefaultMaxSize * 2
ra.containers[i] = &nb ra.containers[i] = &nb
} else { } else {
// array container // array container
nb := arrayContainer{ buf, err := stream.next(card * 2)
byteSliceAsUint16Slice(buf[pos : pos+card*2]),
if err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to read array container: %s", err)
} }
pos += card * 2
nb := arrayContainer{
byteSliceAsUint16Slice(buf),
}
ra.containers[i] = &nb ra.containers[i] = &nb
} }
} }
return int64(pos), nil return stream.getReadBytes(), nil
}
func (ra *roaringArray) readFrom(stream io.Reader) (int64, error) {
pos := 0
var cookie uint32
err := binary.Read(stream, binary.LittleEndian, &cookie)
if err != nil {
return 0, fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err)
}
pos += 4
var size uint32
haveRunContainers := false
var isRun *bitmapContainer
if cookie&0x0000FFFF == serialCookie {
haveRunContainers = true
size = uint32(uint16(cookie>>16) + 1)
bytesToRead := (int(size) + 7) / 8
numwords := (bytesToRead + 7) / 8
by := make([]byte, bytesToRead, numwords*8)
nr, err := io.ReadFull(stream, by)
if err != nil {
return 8 + int64(nr), fmt.Errorf("error in readFrom: could not read the "+
"runContainer bit flags of length %v bytes: %v", bytesToRead, err)
}
pos += bytesToRead
by = by[:cap(by)]
isRun = newBitmapContainer()
for i := 0; i < numwords; i++ {
isRun.bitmap[i] = binary.LittleEndian.Uint64(by)
by = by[8:]
}
} else if cookie == serialCookieNoRunContainer {
err = binary.Read(stream, binary.LittleEndian, &size)
if err != nil {
return 0, fmt.Errorf("error in roaringArray.readFrom: when reading size, got: %s", err)
}
pos += 4
} else {
return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
}
if size > (1 << 16) {
return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.")
}
// descriptive header
keycard := make([]uint16, 2*size, 2*size)
err = binary.Read(stream, binary.LittleEndian, keycard)
if err != nil {
return 0, err
}
pos += 2 * 2 * int(size)
// offset header
if !haveRunContainers || size >= noOffsetThreshold {
io.CopyN(ioutil.Discard, stream, 4*int64(size)) // we never skip ahead so this data can be ignored
pos += 4 * int(size)
}
for i := uint32(0); i < size; i++ {
key := int(keycard[2*i])
card := int(keycard[2*i+1]) + 1
if haveRunContainers && isRun.contains(uint16(i)) {
nb := newRunContainer16()
nr, err := nb.readFrom(stream)
if err != nil {
return 0, err
}
pos += nr
ra.appendContainer(uint16(key), nb, false)
} else if card > arrayDefaultMaxSize {
nb := newBitmapContainer()
nr, err := nb.readFrom(stream)
if err != nil {
return 0, err
}
nb.cardinality = card
pos += nr
ra.appendContainer(keycard[2*i], nb, false)
} else {
nb := newArrayContainerSize(card)
nr, err := nb.readFrom(stream)
if err != nil {
return 0, err
}
pos += nr
ra.appendContainer(keycard[2*i], nb, false)
}
}
return int64(pos), nil
} }
func (ra *roaringArray) hasRunCompression() bool { func (ra *roaringArray) hasRunCompression() bool {

View File

@ -1149,195 +1149,152 @@ func (rc *runContainer16) Add(k uint16) (wasNew bool) {
//msgp:ignore runIterator //msgp:ignore runIterator
// runIterator16 advice: you must call Next() at least once // runIterator16 advice: you must call hasNext()
// before calling Cur(); and you should call HasNext() // before calling next()/peekNext() to insure there are contents.
// before calling Next() to insure there are contents.
type runIterator16 struct { type runIterator16 struct {
rc *runContainer16 rc *runContainer16
curIndex int64 curIndex int64
curPosInIndex uint16 curPosInIndex uint16
curSeq int64
} }
// newRunIterator16 returns a new empty run container. // newRunIterator16 returns a new empty run container.
func (rc *runContainer16) newRunIterator16() *runIterator16 { func (rc *runContainer16) newRunIterator16() *runIterator16 {
return &runIterator16{rc: rc, curIndex: -1} return &runIterator16{rc: rc, curIndex: 0, curPosInIndex: 0}
} }
// HasNext returns false if calling Next will panic. It func (rc *runContainer16) iterate(cb func(x uint16) bool) bool {
iterator := runIterator16{rc, 0, 0}
for iterator.hasNext() {
if !cb(iterator.next()) {
return false
}
}
return true
}
// hasNext returns false if calling next will panic. It
// returns true when there is at least one more value // returns true when there is at least one more value
// available in the iteration sequence. // available in the iteration sequence.
func (ri *runIterator16) hasNext() bool { func (ri *runIterator16) hasNext() bool {
if len(ri.rc.iv) == 0 { return int64(len(ri.rc.iv)) > ri.curIndex+1 ||
return false (int64(len(ri.rc.iv)) == ri.curIndex+1 && ri.rc.iv[ri.curIndex].length >= ri.curPosInIndex)
}
if ri.curIndex == -1 {
return true
}
return ri.curSeq+1 < ri.rc.cardinality()
} }
// cur returns the current value pointed to by the iterator. // next returns the next value in the iteration sequence.
func (ri *runIterator16) cur() uint16 { func (ri *runIterator16) next() uint16 {
next := ri.rc.iv[ri.curIndex].start + ri.curPosInIndex
if ri.curPosInIndex == ri.rc.iv[ri.curIndex].length {
ri.curPosInIndex = 0
ri.curIndex++
} else {
ri.curPosInIndex++
}
return next
}
// peekNext returns the next value in the iteration sequence without advancing the iterator
func (ri *runIterator16) peekNext() uint16 {
return ri.rc.iv[ri.curIndex].start + ri.curPosInIndex return ri.rc.iv[ri.curIndex].start + ri.curPosInIndex
} }
// Next returns the next value in the iteration sequence. // advanceIfNeeded advances as long as the next value is smaller than minval
func (ri *runIterator16) next() uint16 { func (ri *runIterator16) advanceIfNeeded(minval uint16) {
if !ri.hasNext() { if !ri.hasNext() || ri.peekNext() >= minval {
panic("no Next available") return
} }
if ri.curIndex >= int64(len(ri.rc.iv)) {
panic("runIterator.Next() going beyond what is available") opt := &searchOptions{
startIndex: ri.curIndex,
endxIndex: int64(len(ri.rc.iv)),
} }
if ri.curIndex == -1 {
// first time is special // interval cannot be -1 because of minval > peekNext
ri.curIndex = 0 interval, isPresent, _ := ri.rc.search(int64(minval), opt)
// if the minval is present, set the curPosIndex at the right position
if isPresent {
ri.curIndex = interval
ri.curPosInIndex = minval - ri.rc.iv[ri.curIndex].start
} else { } else {
ri.curPosInIndex++ // otherwise interval is set to to the minimum index of rc.iv
if int64(ri.rc.iv[ri.curIndex].start)+int64(ri.curPosInIndex) == int64(ri.rc.iv[ri.curIndex].last())+1 { // which comes strictly before the key, that's why we set the next interval
ri.curIndex = interval + 1
ri.curPosInIndex = 0 ri.curPosInIndex = 0
ri.curIndex++
} }
ri.curSeq++
}
return ri.cur()
} }
// remove removes the element that the iterator // runReverseIterator16 advice: you must call hasNext()
// is on from the run container. You can use
// Cur if you want to double check what is about
// to be deleted.
func (ri *runIterator16) remove() uint16 {
n := ri.rc.cardinality()
if n == 0 {
panic("runIterator.Remove called on empty runContainer16")
}
cur := ri.cur()
ri.rc.deleteAt(&ri.curIndex, &ri.curPosInIndex, &ri.curSeq)
return cur
}
// runReverseIterator16 advice: you must call next() at least once
// before calling cur(); and you should call hasNext()
// before calling next() to insure there are contents. // before calling next() to insure there are contents.
type runReverseIterator16 struct { type runReverseIterator16 struct {
rc *runContainer16 rc *runContainer16
curIndex int64 // index into rc.iv curIndex int64 // index into rc.iv
curPosInIndex uint16 // offset in rc.iv[curIndex] curPosInIndex uint16 // offset in rc.iv[curIndex]
curSeq int64 // 0->cardinality, performance optimization in hasNext()
} }
// newRunReverseIterator16 returns a new empty run iterator. // newRunReverseIterator16 returns a new empty run iterator.
func (rc *runContainer16) newRunReverseIterator16() *runReverseIterator16 { func (rc *runContainer16) newRunReverseIterator16() *runReverseIterator16 {
return &runReverseIterator16{rc: rc, curIndex: -2} index := int64(len(rc.iv)) - 1
pos := uint16(0)
if index >= 0 {
pos = rc.iv[index].length
}
return &runReverseIterator16{
rc: rc,
curIndex: index,
curPosInIndex: pos,
}
} }
// hasNext returns false if calling next will panic. It // hasNext returns false if calling next will panic. It
// returns true when there is at least one more value // returns true when there is at least one more value
// available in the iteration sequence. // available in the iteration sequence.
func (ri *runReverseIterator16) hasNext() bool { func (ri *runReverseIterator16) hasNext() bool {
if len(ri.rc.iv) == 0 { return ri.curIndex > 0 || ri.curIndex == 0 && ri.curPosInIndex >= 0
return false
}
if ri.curIndex == -2 {
return true
}
return ri.rc.cardinality()-ri.curSeq > 1
}
// cur returns the current value pointed to by the iterator.
func (ri *runReverseIterator16) cur() uint16 {
return ri.rc.iv[ri.curIndex].start + ri.curPosInIndex
} }
// next returns the next value in the iteration sequence. // next returns the next value in the iteration sequence.
func (ri *runReverseIterator16) next() uint16 { func (ri *runReverseIterator16) next() uint16 {
if !ri.hasNext() { next := ri.rc.iv[ri.curIndex].start + ri.curPosInIndex
panic("no next available")
}
if ri.curIndex == -1 {
panic("runReverseIterator.next() going beyond what is available")
}
if ri.curIndex == -2 {
// first time is special
ri.curIndex = int64(len(ri.rc.iv)) - 1
ri.curPosInIndex = ri.rc.iv[ri.curIndex].length
} else {
if ri.curPosInIndex > 0 { if ri.curPosInIndex > 0 {
ri.curPosInIndex-- ri.curPosInIndex--
} else { } else {
ri.curIndex-- ri.curIndex--
if ri.curIndex >= 0 {
ri.curPosInIndex = ri.rc.iv[ri.curIndex].length ri.curPosInIndex = ri.rc.iv[ri.curIndex].length
} }
ri.curSeq++
} }
return ri.cur()
return next
} }
// remove removes the element that the iterator func (rc *runContainer16) newManyRunIterator16() *runIterator16 {
// is on from the run container. You can use return rc.newRunIterator16()
// cur if you want to double check what is about
// to be deleted.
func (ri *runReverseIterator16) remove() uint16 {
n := ri.rc.cardinality()
if n == 0 {
panic("runReverseIterator.Remove called on empty runContainer16")
}
cur := ri.cur()
ri.rc.deleteAt(&ri.curIndex, &ri.curPosInIndex, &ri.curSeq)
return cur
}
type manyRunIterator16 struct {
rc *runContainer16
curIndex int64
curPosInIndex uint16
curSeq int64
}
func (rc *runContainer16) newManyRunIterator16() *manyRunIterator16 {
return &manyRunIterator16{rc: rc, curIndex: -1}
}
func (ri *manyRunIterator16) hasNext() bool {
if len(ri.rc.iv) == 0 {
return false
}
if ri.curIndex == -1 {
return true
}
return ri.curSeq+1 < ri.rc.cardinality()
} }
// hs are the high bits to include to avoid needing to reiterate over the buffer in NextMany // hs are the high bits to include to avoid needing to reiterate over the buffer in NextMany
func (ri *manyRunIterator16) nextMany(hs uint32, buf []uint32) int { func (ri *runIterator16) nextMany(hs uint32, buf []uint32) int {
n := 0 n := 0
if !ri.hasNext() { if !ri.hasNext() {
return n return n
} }
// start and end are inclusive // start and end are inclusive
for n < len(buf) { for n < len(buf) {
if ri.curIndex == -1 || int(ri.rc.iv[ri.curIndex].length-ri.curPosInIndex) <= 0 { moreVals := 0
ri.curPosInIndex = 0
ri.curIndex++
if ri.curIndex == int64(len(ri.rc.iv)) {
break
}
buf[n] = uint32(ri.rc.iv[ri.curIndex].start) | hs
if ri.curIndex != 0 {
ri.curSeq++
}
n++
// not strictly necessarily due to len(buf)-n min check, but saves some work
continue
}
// add as many as you can from this seq
moreVals := minOfInt(int(ri.rc.iv[ri.curIndex].length-ri.curPosInIndex), len(buf)-n)
base := uint32(ri.rc.iv[ri.curIndex].start+ri.curPosInIndex+1) | hs if ri.rc.iv[ri.curIndex].length >= ri.curPosInIndex {
// add as many as you can from this seq
moreVals = minOfInt(int(ri.rc.iv[ri.curIndex].length-ri.curPosInIndex)+1, len(buf)-n)
base := uint32(ri.rc.iv[ri.curIndex].start+ri.curPosInIndex) | hs
// allows BCE // allows BCE
buf2 := buf[n : n+moreVals] buf2 := buf[n : n+moreVals]
@ -1346,10 +1303,21 @@ func (ri *manyRunIterator16) nextMany(hs uint32, buf []uint32) int {
} }
// update values // update values
ri.curPosInIndex += uint16(moreVals) //moreVals always fits in uint16
ri.curSeq += int64(moreVals)
n += moreVals n += moreVals
} }
if moreVals+int(ri.curPosInIndex) > int(ri.rc.iv[ri.curIndex].length) {
ri.curPosInIndex = 0
ri.curIndex++
if ri.curIndex == int64(len(ri.rc.iv)) {
break
}
} else {
ri.curPosInIndex += uint16(moreVals) //moreVals always fits in uint16
}
}
return n return n
} }
@ -1357,21 +1325,19 @@ func (ri *manyRunIterator16) nextMany(hs uint32, buf []uint32) int {
func (rc *runContainer16) removeKey(key uint16) (wasPresent bool) { func (rc *runContainer16) removeKey(key uint16) (wasPresent bool) {
var index int64 var index int64
var curSeq int64
index, wasPresent, _ = rc.search(int64(key), nil) index, wasPresent, _ = rc.search(int64(key), nil)
if !wasPresent { if !wasPresent {
return // already removed, nothing to do. return // already removed, nothing to do.
} }
pos := key - rc.iv[index].start pos := key - rc.iv[index].start
rc.deleteAt(&index, &pos, &curSeq) rc.deleteAt(&index, &pos)
return return
} }
// internal helper functions // internal helper functions
func (rc *runContainer16) deleteAt(curIndex *int64, curPosInIndex *uint16, curSeq *int64) { func (rc *runContainer16) deleteAt(curIndex *int64, curPosInIndex *uint16) {
rc.card-- rc.card--
*curSeq--
ci := *curIndex ci := *curIndex
pos := *curPosInIndex pos := *curPosInIndex
@ -1488,7 +1454,7 @@ func (rc *runContainer16) selectInt16(j uint16) int {
var offset int64 var offset int64
for k := range rc.iv { for k := range rc.iv {
nextOffset := offset + rc.iv[k].runlen() + 1 nextOffset := offset + rc.iv[k].runlen()
if nextOffset > int64(j) { if nextOffset > int64(j) {
return int(int64(rc.iv[k].start) + (int64(j) - offset)) return int(int64(rc.iv[k].start) + (int64(j) - offset))
} }
@ -1993,7 +1959,7 @@ func (rc *runContainer16) fillLeastSignificant16bits(x []uint32, i int, mask uin
} }
} }
func (rc *runContainer16) getShortIterator() shortIterable { func (rc *runContainer16) getShortIterator() shortPeekable {
return rc.newRunIterator16() return rc.newRunIterator16()
} }

View File

@ -891,11 +891,6 @@ func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) {
if err != nil { if err != nil {
return return
} }
case "curSeq":
z.curSeq, err = dc.ReadInt64()
if err != nil {
return
}
default: default:
err = dc.Skip() err = dc.Skip()
if err != nil { if err != nil {
@ -908,9 +903,9 @@ func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) {
// Deprecated: EncodeMsg implements msgp.Encodable // Deprecated: EncodeMsg implements msgp.Encodable
func (z *runIterator16) EncodeMsg(en *msgp.Writer) (err error) { func (z *runIterator16) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 4 // map header, size 3
// write "rc" // write "rc"
err = en.Append(0x84, 0xa2, 0x72, 0x63) err = en.Append(0x83, 0xa2, 0x72, 0x63)
if err != nil { if err != nil {
return err return err
} }
@ -943,24 +938,15 @@ func (z *runIterator16) EncodeMsg(en *msgp.Writer) (err error) {
if err != nil { if err != nil {
return return
} }
// write "curSeq"
err = en.Append(0xa6, 0x63, 0x75, 0x72, 0x53, 0x65, 0x71)
if err != nil {
return err
}
err = en.WriteInt64(z.curSeq)
if err != nil {
return
}
return return
} }
// Deprecated: MarshalMsg implements msgp.Marshaler // Deprecated: MarshalMsg implements msgp.Marshaler
func (z *runIterator16) MarshalMsg(b []byte) (o []byte, err error) { func (z *runIterator16) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize()) o = msgp.Require(b, z.Msgsize())
// map header, size 4 // map header, size 3
// string "rc" // string "rc"
o = append(o, 0x84, 0xa2, 0x72, 0x63) o = append(o, 0x83, 0xa2, 0x72, 0x63)
if z.rc == nil { if z.rc == nil {
o = msgp.AppendNil(o) o = msgp.AppendNil(o)
} else { } else {
@ -975,9 +961,6 @@ func (z *runIterator16) MarshalMsg(b []byte) (o []byte, err error) {
// string "curPosInIndex" // string "curPosInIndex"
o = append(o, 0xad, 0x63, 0x75, 0x72, 0x50, 0x6f, 0x73, 0x49, 0x6e, 0x49, 0x6e, 0x64, 0x65, 0x78) o = append(o, 0xad, 0x63, 0x75, 0x72, 0x50, 0x6f, 0x73, 0x49, 0x6e, 0x49, 0x6e, 0x64, 0x65, 0x78)
o = msgp.AppendUint16(o, z.curPosInIndex) o = msgp.AppendUint16(o, z.curPosInIndex)
// string "curSeq"
o = append(o, 0xa6, 0x63, 0x75, 0x72, 0x53, 0x65, 0x71)
o = msgp.AppendInt64(o, z.curSeq)
return return
} }
@ -1023,11 +1006,6 @@ func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) {
if err != nil { if err != nil {
return return
} }
case "curSeq":
z.curSeq, bts, err = msgp.ReadInt64Bytes(bts)
if err != nil {
return
}
default: default:
bts, err = msgp.Skip(bts) bts, err = msgp.Skip(bts)
if err != nil { if err != nil {
@ -1047,7 +1025,7 @@ func (z *runIterator16) Msgsize() (s int) {
} else { } else {
s += z.rc.Msgsize() s += z.rc.Msgsize()
} }
s += 9 + msgp.Int64Size + 14 + msgp.Uint16Size + 7 + msgp.Int64Size s += 9 + msgp.Int64Size + 14 + msgp.Uint16Size
return return
} }

View File

@ -2,8 +2,6 @@ package roaring
import ( import (
"encoding/binary" "encoding/binary"
"errors"
"fmt"
"io" "io"
"github.com/tinylib/msgp/msgp" "github.com/tinylib/msgp/msgp"
@ -34,37 +32,3 @@ func (b *runContainer16) readFromMsgpack(stream io.Reader) (int, error) {
err := msgp.Decode(stream, b) err := msgp.Decode(stream, b)
return 0, err return 0, err
} }
var errCorruptedStream = errors.New("insufficient/odd number of stored bytes, corrupted stream detected")
func (b *runContainer16) readFrom(stream io.Reader) (int, error) {
b.iv = b.iv[:0]
b.card = 0
var numRuns uint16
err := binary.Read(stream, binary.LittleEndian, &numRuns)
if err != nil {
return 0, err
}
nr := int(numRuns)
encRun := make([]uint16, 2*nr)
by := make([]byte, 4*nr)
err = binary.Read(stream, binary.LittleEndian, &by)
if err != nil {
return 0, err
}
for i := range encRun {
if len(by) < 2 {
return 0, errCorruptedStream
}
encRun[i] = binary.LittleEndian.Uint16(by)
by = by[2:]
}
for i := 0; i < nr; i++ {
if i > 0 && b.iv[i-1].last() >= encRun[i*2] {
return 0, fmt.Errorf("error: stored runContainer had runs that were not in sorted order!! (b.iv[i-1=%v].last = %v >= encRun[i=%v] = %v)", i-1, b.iv[i-1].last(), i, encRun[i*2])
}
b.iv = append(b.iv, interval16{start: encRun[i*2], length: encRun[i*2+1]})
b.card += int64(encRun[i*2+1]) + 1
}
return 0, err
}

View File

@ -74,6 +74,16 @@ func uint64SliceAsByteSlice(slice []uint64) []byte {
return by return by
} }
func uint16SliceAsByteSlice(slice []uint16) []byte {
by := make([]byte, len(slice)*2)
for i, v := range slice {
binary.LittleEndian.PutUint16(by[i*2:], v)
}
return by
}
func byteSliceAsUint16Slice(slice []byte) []uint16 { func byteSliceAsUint16Slice(slice []byte) []uint16 {
if len(slice)%2 != 0 { if len(slice)%2 != 0 {
panic("Slice size should be divisible by 2") panic("Slice size should be divisible by 2")

View File

@ -6,6 +6,7 @@ import (
"errors" "errors"
"io" "io"
"reflect" "reflect"
"runtime"
"unsafe" "unsafe"
) )
@ -22,22 +23,6 @@ func (bc *bitmapContainer) writeTo(stream io.Writer) (int, error) {
return stream.Write(buf) return stream.Write(buf)
} }
// readFrom reads an arrayContainer from stream.
// PRE-REQUISITE: you must size the arrayContainer correctly (allocate b.content)
// *before* you call readFrom. We can't guess the size in the stream
// by this point.
func (ac *arrayContainer) readFrom(stream io.Reader) (int, error) {
buf := uint16SliceAsByteSlice(ac.content)
return io.ReadFull(stream, buf)
}
func (bc *bitmapContainer) readFrom(stream io.Reader) (int, error) {
buf := uint64SliceAsByteSlice(bc.bitmap)
n, err := io.ReadFull(stream, buf)
bc.computeCardinality()
return n, err
}
func uint64SliceAsByteSlice(slice []uint64) []byte { func uint64SliceAsByteSlice(slice []uint64) []byte {
// make a new slice header // make a new slice header
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
@ -46,8 +31,12 @@ func uint64SliceAsByteSlice(slice []uint64) []byte {
header.Len *= 8 header.Len *= 8
header.Cap *= 8 header.Cap *= 8
// instantiate result and use KeepAlive so data isn't unmapped.
result := *(*[]byte)(unsafe.Pointer(&header))
runtime.KeepAlive(&slice)
// return it // return it
return *(*[]byte)(unsafe.Pointer(&header)) return result
} }
func uint16SliceAsByteSlice(slice []uint16) []byte { func uint16SliceAsByteSlice(slice []uint16) []byte {
@ -58,8 +47,12 @@ func uint16SliceAsByteSlice(slice []uint16) []byte {
header.Len *= 2 header.Len *= 2
header.Cap *= 2 header.Cap *= 2
// instantiate result and use KeepAlive so data isn't unmapped.
result := *(*[]byte)(unsafe.Pointer(&header))
runtime.KeepAlive(&slice)
// return it // return it
return *(*[]byte)(unsafe.Pointer(&header)) return result
} }
func (bc *bitmapContainer) asLittleEndianByteSlice() []byte { func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
@ -68,50 +61,74 @@ func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
// Deserialization code follows // Deserialization code follows
func byteSliceAsUint16Slice(slice []byte) []uint16 { ////
// These methods (byteSliceAsUint16Slice,...) do not make copies,
// they are pointer-based (unsafe). The caller is responsible to
// ensure that the input slice does not get garbage collected, deleted
// or modified while you hold the returned slince.
////
func byteSliceAsUint16Slice(slice []byte) (result []uint16) { // here we create a new slice holder
if len(slice)%2 != 0 { if len(slice)%2 != 0 {
panic("Slice size should be divisible by 2") panic("Slice size should be divisible by 2")
} }
// reference: https://go101.org/article/unsafe.html
// make a new slice header // make a new slice header
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
// update its capacity and length // transfer the data from the given slice to a new variable (our result)
header.Len /= 2 rHeader.Data = bHeader.Data
header.Cap /= 2 rHeader.Len = bHeader.Len / 2
rHeader.Cap = bHeader.Cap / 2
// return it // instantiate result and use KeepAlive so data isn't unmapped.
return *(*[]uint16)(unsafe.Pointer(&header)) runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
// return result
return
} }
func byteSliceAsUint64Slice(slice []byte) []uint64 { func byteSliceAsUint64Slice(slice []byte) (result []uint64) {
if len(slice)%8 != 0 { if len(slice)%8 != 0 {
panic("Slice size should be divisible by 8") panic("Slice size should be divisible by 8")
} }
// reference: https://go101.org/article/unsafe.html
// make a new slice header // make a new slice header
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
// update its capacity and length // transfer the data from the given slice to a new variable (our result)
header.Len /= 8 rHeader.Data = bHeader.Data
header.Cap /= 8 rHeader.Len = bHeader.Len / 8
rHeader.Cap = bHeader.Cap / 8
// return it // instantiate result and use KeepAlive so data isn't unmapped.
return *(*[]uint64)(unsafe.Pointer(&header)) runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
// return result
return
} }
func byteSliceAsInterval16Slice(slice []byte) []interval16 { func byteSliceAsInterval16Slice(slice []byte) (result []interval16) {
if len(slice)%4 != 0 { if len(slice)%4 != 0 {
panic("Slice size should be divisible by 4") panic("Slice size should be divisible by 4")
} }
// reference: https://go101.org/article/unsafe.html
// make a new slice header // make a new slice header
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
// update its capacity and length // transfer the data from the given slice to a new variable (our result)
header.Len /= 4 rHeader.Data = bHeader.Data
header.Cap /= 4 rHeader.Len = bHeader.Len / 4
rHeader.Cap = bHeader.Cap / 4
// return it // instantiate result and use KeepAlive so data isn't unmapped.
return *(*[]interval16)(unsafe.Pointer(&header)) runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
// return result
return
} }

View File

@ -14,6 +14,7 @@ func equal(a, b []uint16) bool {
func difference(set1 []uint16, set2 []uint16, buffer []uint16) int { func difference(set1 []uint16, set2 []uint16, buffer []uint16) int {
if 0 == len(set2) { if 0 == len(set2) {
buffer = buffer[:len(set1)]
for k := 0; k < len(set1); k++ { for k := 0; k < len(set1); k++ {
buffer[k] = set1[k] buffer[k] = set1[k]
} }

View File

@ -5,6 +5,12 @@ type shortIterable interface {
next() uint16 next() uint16
} }
type shortPeekable interface {
shortIterable
peekNext() uint16
advanceIfNeeded(minval uint16)
}
type shortIterator struct { type shortIterator struct {
slice []uint16 slice []uint16
loc int loc int
@ -20,6 +26,16 @@ func (si *shortIterator) next() uint16 {
return a return a
} }
func (si *shortIterator) peekNext() uint16 {
return si.slice[si.loc]
}
func (si *shortIterator) advanceIfNeeded(minval uint16) {
if si.hasNext() && si.peekNext() < minval {
si.loc = advanceUntil(si.slice, si.loc, len(si.slice), minval)
}
}
type reverseIterator struct { type reverseIterator struct {
slice []uint16 slice []uint16
loc int loc int

View File

@ -112,7 +112,7 @@ func highbits(x uint32) uint16 {
return uint16(x >> 16) return uint16(x >> 16)
} }
func lowbits(x uint32) uint16 { func lowbits(x uint32) uint16 {
return uint16(x & 0xFFFF) return uint16(x & maxLowBit)
} }
const maxLowBit = 0xFFFF const maxLowBit = 0xFFFF
@ -302,24 +302,3 @@ func minOfUint16(a, b uint16) uint16 {
} }
return b return b
} }
func maxInt(a, b int) int {
if a > b {
return a
}
return b
}
func maxUint16(a, b uint16) uint16 {
if a > b {
return a
}
return b
}
func minUint16(a, b uint16) uint16 {
if a < b {
return a
}
return b
}

View File

@ -3,10 +3,9 @@ sudo: false
language: go language: go
go: go:
- 1.7.x - "1.12.x"
- 1.8.x - "1.13.x"
- 1.9.x - "1.14.x"
- "1.10"
script: script:
- go get golang.org/x/tools/cmd/cover - go get golang.org/x/tools/cmd/cover
@ -14,9 +13,10 @@ script:
- go get github.com/kisielk/errcheck - go get github.com/kisielk/errcheck
- go get -u github.com/FiloSottile/gvt - go get -u github.com/FiloSottile/gvt
- gvt restore - gvt restore
- go test -v $(go list ./... | grep -v vendor/) - go test -race -v $(go list ./... | grep -v vendor/)
- go vet $(go list ./... | grep -v vendor/) - go vet $(go list ./... | grep -v vendor/)
- errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/) - go test ./test -v -indexType scorch
- errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/);
- docs/project-code-coverage.sh - docs/project-code-coverage.sh
- docs/build_children.sh - docs/build_children.sh

View File

@ -0,0 +1,49 @@
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package en
import (
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/registry"
"github.com/blevesearch/snowballstem"
"github.com/blevesearch/snowballstem/english"
)
const SnowballStemmerName = "stemmer_en_snowball"
type EnglishStemmerFilter struct {
}
func NewEnglishStemmerFilter() *EnglishStemmerFilter {
return &EnglishStemmerFilter{}
}
func (s *EnglishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
env := snowballstem.NewEnv(string(token.Term))
english.Stem(env)
token.Term = []byte(env.Current())
}
return input
}
func EnglishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewEnglishStemmerFilter(), nil
}
func init() {
registry.RegisterTokenFilter(SnowballStemmerName, EnglishStemmerFilterConstructor)
}

View File

@ -86,6 +86,10 @@ func (t *TextField) Analyze() (int, analysis.TokenFrequencies) {
return fieldLength, tokenFreqs return fieldLength, tokenFreqs
} }
func (t *TextField) Analyzer() *analysis.Analyzer {
return t.analyzer
}
func (t *TextField) Value() []byte { func (t *TextField) Value() []byte {
return t.value return t.value
} }

View File

@ -33,10 +33,18 @@ var minLonRad = minLon * degreesToRadian
var minLatRad = minLat * degreesToRadian var minLatRad = minLat * degreesToRadian
var maxLonRad = maxLon * degreesToRadian var maxLonRad = maxLon * degreesToRadian
var maxLatRad = maxLat * degreesToRadian var maxLatRad = maxLat * degreesToRadian
var geoTolerance = 1E-6 var geoTolerance = 1e-6
var lonScale = float64((uint64(0x1)<<GeoBits)-1) / 360.0 var lonScale = float64((uint64(0x1)<<GeoBits)-1) / 360.0
var latScale = float64((uint64(0x1)<<GeoBits)-1) / 180.0 var latScale = float64((uint64(0x1)<<GeoBits)-1) / 180.0
var geoHashMaxLength = 12
// Point represents a geo point.
type Point struct {
Lon float64 `json:"lon"`
Lat float64 `json:"lat"`
}
// MortonHash computes the morton hash value for the provided geo point // MortonHash computes the morton hash value for the provided geo point
// This point is ordered as lon, lat. // This point is ordered as lon, lat.
func MortonHash(lon, lat float64) uint64 { func MortonHash(lon, lat float64) uint64 {
@ -168,3 +176,35 @@ func checkLongitude(longitude float64) error {
} }
return nil return nil
} }
func BoundingRectangleForPolygon(polygon []Point) (
float64, float64, float64, float64, error) {
err := checkLongitude(polygon[0].Lon)
if err != nil {
return 0, 0, 0, 0, err
}
err = checkLatitude(polygon[0].Lat)
if err != nil {
return 0, 0, 0, 0, err
}
maxY, minY := polygon[0].Lat, polygon[0].Lat
maxX, minX := polygon[0].Lon, polygon[0].Lon
for i := 1; i < len(polygon); i++ {
err := checkLongitude(polygon[i].Lon)
if err != nil {
return 0, 0, 0, 0, err
}
err = checkLatitude(polygon[i].Lat)
if err != nil {
return 0, 0, 0, 0, err
}
maxY = math.Max(maxY, polygon[i].Lat)
minY = math.Min(minY, polygon[i].Lat)
maxX = math.Max(maxX, polygon[i].Lon)
minX = math.Min(minX, polygon[i].Lon)
}
return minX, maxY, maxX, minY, nil
}

111
vendor/github.com/blevesearch/bleve/geo/geohash.go generated vendored Normal file
View File

@ -0,0 +1,111 @@
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This implementation is inspired from the geohash-js
// ref: https://github.com/davetroy/geohash-js
package geo
// encoding encapsulates an encoding defined by a given base32 alphabet.
type encoding struct {
enc string
dec [256]byte
}
// newEncoding constructs a new encoding defined by the given alphabet,
// which must be a 32-byte string.
func newEncoding(encoder string) *encoding {
e := new(encoding)
e.enc = encoder
for i := 0; i < len(e.dec); i++ {
e.dec[i] = 0xff
}
for i := 0; i < len(encoder); i++ {
e.dec[encoder[i]] = byte(i)
}
return e
}
// base32encoding with the Geohash alphabet.
var base32encoding = newEncoding("0123456789bcdefghjkmnpqrstuvwxyz")
var masks = []uint64{16, 8, 4, 2, 1}
// DecodeGeoHash decodes the string geohash faster with
// higher precision. This api is in experimental phase.
func DecodeGeoHash(geoHash string) (float64, float64) {
even := true
lat := []float64{-90.0, 90.0}
lon := []float64{-180.0, 180.0}
for i := 0; i < len(geoHash); i++ {
cd := uint64(base32encoding.dec[geoHash[i]])
for j := 0; j < 5; j++ {
if even {
if cd&masks[j] > 0 {
lon[0] = (lon[0] + lon[1]) / 2
} else {
lon[1] = (lon[0] + lon[1]) / 2
}
} else {
if cd&masks[j] > 0 {
lat[0] = (lat[0] + lat[1]) / 2
} else {
lat[1] = (lat[0] + lat[1]) / 2
}
}
even = !even
}
}
return (lat[0] + lat[1]) / 2, (lon[0] + lon[1]) / 2
}
func EncodeGeoHash(lat, lon float64) string {
even := true
lats := []float64{-90.0, 90.0}
lons := []float64{-180.0, 180.0}
precision := 12
var ch, bit uint64
var geoHash string
for len(geoHash) < precision {
if even {
mid := (lons[0] + lons[1]) / 2
if lon > mid {
ch |= masks[bit]
lons[0] = mid
} else {
lons[1] = mid
}
} else {
mid := (lats[0] + lats[1]) / 2
if lat > mid {
ch |= masks[bit]
lats[0] = mid
} else {
lats[1] = mid
}
}
even = !even
if bit < 4 {
bit++
} else {
geoHash += string(base32encoding.enc[ch])
ch = 0
bit = 0
}
}
return geoHash
}

View File

@ -16,6 +16,7 @@ package geo
import ( import (
"reflect" "reflect"
"strconv"
"strings" "strings"
) )
@ -24,6 +25,8 @@ import (
// Container: // Container:
// slice length 2 (GeoJSON) // slice length 2 (GeoJSON)
// first element lon, second element lat // first element lon, second element lat
// string (coordinates separated by comma, or a geohash)
// first element lat, second element lon
// map[string]interface{} // map[string]interface{}
// exact keys lat and lon or lng // exact keys lat and lon or lng
// struct // struct
@ -36,10 +39,14 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
var foundLon, foundLat bool var foundLon, foundLat bool
thingVal := reflect.ValueOf(thing) thingVal := reflect.ValueOf(thing)
if !thingVal.IsValid() {
return lon, lat, false
}
thingTyp := thingVal.Type() thingTyp := thingVal.Type()
// is it a slice // is it a slice
if thingVal.IsValid() && thingVal.Kind() == reflect.Slice { if thingVal.Kind() == reflect.Slice {
// must be length 2 // must be length 2
if thingVal.Len() == 2 { if thingVal.Len() == 2 {
first := thingVal.Index(0) first := thingVal.Index(0)
@ -55,6 +62,37 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
} }
} }
// is it a string
if thingVal.Kind() == reflect.String {
geoStr := thingVal.Interface().(string)
if strings.Contains(geoStr, ",") {
// geo point with coordinates split by comma
points := strings.Split(geoStr, ",")
for i, point := range points {
// trim any leading or trailing white spaces
points[i] = strings.TrimSpace(point)
}
if len(points) == 2 {
var err error
lat, err = strconv.ParseFloat(points[0], 64)
if err == nil {
foundLat = true
}
lon, err = strconv.ParseFloat(points[1], 64)
if err == nil {
foundLon = true
}
}
} else {
// geohash
if len(geoStr) <= geoHashMaxLength {
lat, lon = DecodeGeoHash(geoStr)
foundLat = true
foundLon = true
}
}
}
// is it a map // is it a map
if l, ok := thing.(map[string]interface{}); ok { if l, ok := thing.(map[string]interface{}); ok {
if lval, ok := l["lon"]; ok { if lval, ok := l["lon"]; ok {
@ -68,7 +106,7 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
} }
// now try reflection on struct fields // now try reflection on struct fields
if thingVal.IsValid() && thingVal.Kind() == reflect.Struct { if thingVal.Kind() == reflect.Struct {
for i := 0; i < thingVal.NumField(); i++ { for i := 0; i < thingVal.NumField(); i++ {
fieldName := thingTyp.Field(i).Name fieldName := thingTyp.Field(i).Name
if strings.HasPrefix(strings.ToLower(fieldName), "lon") { if strings.HasPrefix(strings.ToLower(fieldName), "lon") {
@ -113,6 +151,9 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
// extract numeric value (if possible) and returns a float64 // extract numeric value (if possible) and returns a float64
func extractNumericVal(v interface{}) (float64, bool) { func extractNumericVal(v interface{}) (float64, bool) {
val := reflect.ValueOf(v) val := reflect.ValueOf(v)
if !val.IsValid() {
return 0, false
}
typ := val.Type() typ := val.Type()
switch typ.Kind() { switch typ.Kind() {
case reflect.Float32, reflect.Float64: case reflect.Float32, reflect.Float64:

25
vendor/github.com/blevesearch/bleve/go.mod generated vendored Normal file
View File

@ -0,0 +1,25 @@
module github.com/blevesearch/bleve
go 1.13
require (
github.com/RoaringBitmap/roaring v0.4.21
github.com/blevesearch/blevex v0.0.0-20190916190636-152f0fe5c040
github.com/blevesearch/go-porterstemmer v1.0.3
github.com/blevesearch/segment v0.9.0
github.com/blevesearch/snowballstem v0.9.0
github.com/blevesearch/zap/v11 v11.0.7
github.com/blevesearch/zap/v12 v12.0.7
github.com/couchbase/ghistogram v0.1.0 // indirect
github.com/couchbase/moss v0.1.0
github.com/couchbase/vellum v1.0.1
github.com/golang/protobuf v1.3.2
github.com/kljensen/snowball v0.6.0
github.com/rcrowley/go-metrics v0.0.0-20190826022208-cac0b30c2563
github.com/spf13/cobra v0.0.5
github.com/steveyen/gtreap v0.1.0
github.com/syndtr/goleveldb v1.0.0
github.com/willf/bitset v1.1.10
go.etcd.io/bbolt v1.3.4
golang.org/x/text v0.3.0
)

View File

@ -117,6 +117,26 @@ func (b *Batch) String() string {
// be re-used in the future. // be re-used in the future.
func (b *Batch) Reset() { func (b *Batch) Reset() {
b.internal.Reset() b.internal.Reset()
b.lastDocSize = 0
b.totalSize = 0
}
func (b *Batch) Merge(o *Batch) {
if o != nil && o.internal != nil {
b.internal.Merge(o.internal)
if o.LastDocSize() > 0 {
b.lastDocSize = o.LastDocSize()
}
b.totalSize = uint64(b.internal.TotalDocSize())
}
}
func (b *Batch) SetPersistedCallback(f index.BatchCallback) {
b.internal.SetPersistedCallback(f)
}
func (b *Batch) PersistedCallback() index.BatchCallback {
return b.internal.PersistedCallback()
} }
// An Index implements all the indexing and searching // An Index implements all the indexing and searching

View File

@ -98,18 +98,33 @@ type IndexReader interface {
Close() error Close() error
} }
// The Regexp interface defines the subset of the regexp.Regexp API
// methods that are used by bleve indexes, allowing callers to pass in
// alternate implementations.
type Regexp interface {
FindStringIndex(s string) (loc []int)
LiteralPrefix() (prefix string, complete bool)
String() string
}
type IndexReaderRegexp interface { type IndexReaderRegexp interface {
FieldDictRegexp(field string, regex []byte) (FieldDict, error) FieldDictRegexp(field string, regex string) (FieldDict, error)
} }
type IndexReaderFuzzy interface { type IndexReaderFuzzy interface {
FieldDictFuzzy(field string, term []byte, fuzziness int) (FieldDict, error) FieldDictFuzzy(field string, term string, fuzziness int, prefix string) (FieldDict, error)
} }
type IndexReaderOnly interface { type IndexReaderOnly interface {
FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error) FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error)
} }
type IndexReaderContains interface {
FieldDictContains(field string) (FieldDictContains, error)
}
// FieldTerms contains the terms used by a document, keyed by field // FieldTerms contains the terms used by a document, keyed by field
type FieldTerms map[string][]string type FieldTerms map[string][]string
@ -219,6 +234,10 @@ type FieldDict interface {
Close() error Close() error
} }
type FieldDictContains interface {
Contains(key []byte) (bool, error)
}
// DocIDReader is the interface exposing enumeration of documents identifiers. // DocIDReader is the interface exposing enumeration of documents identifiers.
// Close the reader to release associated resources. // Close the reader to release associated resources.
type DocIDReader interface { type DocIDReader interface {
@ -237,9 +256,12 @@ type DocIDReader interface {
Close() error Close() error
} }
type BatchCallback func(error)
type Batch struct { type Batch struct {
IndexOps map[string]*document.Document IndexOps map[string]*document.Document
InternalOps map[string][]byte InternalOps map[string][]byte
persistedCallback BatchCallback
} }
func NewBatch() *Batch { func NewBatch() *Batch {
@ -265,6 +287,14 @@ func (b *Batch) DeleteInternal(key []byte) {
b.InternalOps[string(key)] = nil b.InternalOps[string(key)] = nil
} }
func (b *Batch) SetPersistedCallback(f BatchCallback) {
b.persistedCallback = f
}
func (b *Batch) PersistedCallback() BatchCallback {
return b.persistedCallback
}
func (b *Batch) String() string { func (b *Batch) String() string {
rv := fmt.Sprintf("Batch (%d ops, %d internal ops)\n", len(b.IndexOps), len(b.InternalOps)) rv := fmt.Sprintf("Batch (%d ops, %d internal ops)\n", len(b.IndexOps), len(b.InternalOps))
for k, v := range b.IndexOps { for k, v := range b.IndexOps {
@ -287,6 +317,27 @@ func (b *Batch) String() string {
func (b *Batch) Reset() { func (b *Batch) Reset() {
b.IndexOps = make(map[string]*document.Document) b.IndexOps = make(map[string]*document.Document)
b.InternalOps = make(map[string][]byte) b.InternalOps = make(map[string][]byte)
b.persistedCallback = nil
}
func (b *Batch) Merge(o *Batch) {
for k, v := range o.IndexOps {
b.IndexOps[k] = v
}
for k, v := range o.InternalOps {
b.InternalOps[k] = v
}
}
func (b *Batch) TotalDocSize() int {
var s int
for k, v := range b.IndexOps {
if v != nil {
s += v.Size() + size.SizeOfString
}
s += len(k)
}
return s
} }
// Optimizable represents an optional interface that implementable by // Optimizable represents an optional interface that implementable by
@ -298,11 +349,19 @@ type Optimizable interface {
Optimize(kind string, octx OptimizableContext) (OptimizableContext, error) Optimize(kind string, octx OptimizableContext) (OptimizableContext, error)
} }
// Represents a result of optimization -- see the Finish() method.
type Optimized interface{}
type OptimizableContext interface { type OptimizableContext interface {
// Once all the optimzable resources have been provided the same // Once all the optimzable resources have been provided the same
// OptimizableContext instance, the optimization preparations are // OptimizableContext instance, the optimization preparations are
// finished or completed via the Finish() method. // finished or completed via the Finish() method.
Finish() error //
// Depending on the optimization being performed, the Finish()
// method might return a non-nil Optimized instance. For example,
// the Optimized instance might represent an optimized
// TermFieldReader instance.
Finish() (Optimized, error)
} }
type DocValueReader interface { type DocValueReader interface {

View File

@ -302,7 +302,7 @@ Map local bitsets into global number space (global meaning cross-segment but sti
IndexSnapshot already should have mapping something like: IndexSnapshot already should have mapping something like:
0 - Offset 0 0 - Offset 0
1 - Offset 3 (because segment 0 had 3 docs) 1 - Offset 3 (because segment 0 had 3 docs)
2 - Offset 4 (becuase segment 1 had 1 doc) 2 - Offset 4 (because segment 1 had 1 doc)
This maps to search result bitset: This maps to search result bitset:

View File

@ -19,6 +19,7 @@ import (
"sync/atomic" "sync/atomic"
"github.com/RoaringBitmap/roaring" "github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment" "github.com/blevesearch/bleve/index/scorch/segment"
) )
@ -31,6 +32,7 @@ type segmentIntroduction struct {
applied chan error applied chan error
persisted chan error persisted chan error
persistedCallback index.BatchCallback
} }
type persistIntroduction struct { type persistIntroduction struct {
@ -74,11 +76,6 @@ OUTER:
case persist := <-s.persists: case persist := <-s.persists:
s.introducePersist(persist) s.introducePersist(persist)
case revertTo := <-s.revertToSnapshots:
err := s.revertToSnapshot(revertTo)
if err != nil {
continue OUTER
}
} }
var epochCurr uint64 var epochCurr uint64
@ -107,8 +104,11 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
s.rootLock.RLock() s.rootLock.RLock()
root := s.root root := s.root
root.AddRef()
s.rootLock.RUnlock() s.rootLock.RUnlock()
defer func() { _ = root.DecRef() }()
nsegs := len(root.segment) nsegs := len(root.segment)
// prepare new index snapshot // prepare new index snapshot
@ -123,6 +123,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
// iterate through current segments // iterate through current segments
var running uint64 var running uint64
var docsToPersistCount, memSegments, fileSegments uint64
for i := range root.segment { for i := range root.segment {
// see if optimistic work included this segment // see if optimistic work included this segment
delta, ok := next.obsoletes[root.segment[i].id] delta, ok := next.obsoletes[root.segment[i].id]
@ -161,7 +162,18 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
newSnapshot.offsets = append(newSnapshot.offsets, running) newSnapshot.offsets = append(newSnapshot.offsets, running)
running += newss.segment.Count() running += newss.segment.Count()
} }
if isMemorySegment(root.segment[i]) {
docsToPersistCount += root.segment[i].Count()
memSegments++
} else {
fileSegments++
} }
}
atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
// append new segment, if any, to end of the new index snapshot // append new segment, if any, to end of the new index snapshot
if next.data != nil { if next.data != nil {
@ -197,6 +209,9 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
if next.persisted != nil { if next.persisted != nil {
s.rootPersisted = append(s.rootPersisted, next.persisted) s.rootPersisted = append(s.rootPersisted, next.persisted)
} }
if next.persistedCallback != nil {
s.persistedCallbacks = append(s.persistedCallbacks, next.persistedCallback)
}
// swap in new index snapshot // swap in new index snapshot
newSnapshot.epoch = s.nextSnapshotEpoch newSnapshot.epoch = s.nextSnapshotEpoch
s.nextSnapshotEpoch++ s.nextSnapshotEpoch++
@ -221,10 +236,13 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
s.rootLock.Lock() s.rootLock.Lock()
root := s.root root := s.root
root.AddRef()
nextSnapshotEpoch := s.nextSnapshotEpoch nextSnapshotEpoch := s.nextSnapshotEpoch
s.nextSnapshotEpoch++ s.nextSnapshotEpoch++
s.rootLock.Unlock() s.rootLock.Unlock()
defer func() { _ = root.DecRef() }()
newIndexSnapshot := &IndexSnapshot{ newIndexSnapshot := &IndexSnapshot{
parent: s, parent: s,
epoch: nextSnapshotEpoch, epoch: nextSnapshotEpoch,
@ -235,6 +253,7 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
creator: "introducePersist", creator: "introducePersist",
} }
var docsToPersistCount, memSegments, fileSegments uint64
for i, segmentSnapshot := range root.segment { for i, segmentSnapshot := range root.segment {
// see if this segment has been replaced // see if this segment has been replaced
if replacement, ok := persist.persisted[segmentSnapshot.id]; ok { if replacement, ok := persist.persisted[segmentSnapshot.id]; ok {
@ -251,9 +270,17 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
// update items persisted incase of a new segment snapshot // update items persisted incase of a new segment snapshot
atomic.AddUint64(&s.stats.TotPersistedItems, newSegmentSnapshot.Count()) atomic.AddUint64(&s.stats.TotPersistedItems, newSegmentSnapshot.Count())
atomic.AddUint64(&s.stats.TotPersistedSegments, 1) atomic.AddUint64(&s.stats.TotPersistedSegments, 1)
fileSegments++
} else { } else {
newIndexSnapshot.segment[i] = root.segment[i] newIndexSnapshot.segment[i] = root.segment[i]
newIndexSnapshot.segment[i].segment.AddRef() newIndexSnapshot.segment[i].segment.AddRef()
if isMemorySegment(root.segment[i]) {
docsToPersistCount += root.segment[i].Count()
memSegments++
} else {
fileSegments++
}
} }
newIndexSnapshot.offsets[i] = root.offsets[i] newIndexSnapshot.offsets[i] = root.offsets[i]
} }
@ -262,6 +289,9 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
newIndexSnapshot.internal[k] = v newIndexSnapshot.internal[k] = v
} }
atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
newIndexSnapshot.updateSize() newIndexSnapshot.updateSize()
s.rootLock.Lock() s.rootLock.Lock()
rootPrev := s.root rootPrev := s.root
@ -276,14 +306,19 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
close(persist.applied) close(persist.applied)
} }
// The introducer should definitely handle the segmentMerge.notify
// channel before exiting the introduceMerge.
func (s *Scorch) introduceMerge(nextMerge *segmentMerge) { func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
atomic.AddUint64(&s.stats.TotIntroduceMergeBeg, 1) atomic.AddUint64(&s.stats.TotIntroduceMergeBeg, 1)
defer atomic.AddUint64(&s.stats.TotIntroduceMergeEnd, 1) defer atomic.AddUint64(&s.stats.TotIntroduceMergeEnd, 1)
s.rootLock.RLock() s.rootLock.RLock()
root := s.root root := s.root
root.AddRef()
s.rootLock.RUnlock() s.rootLock.RUnlock()
defer func() { _ = root.DecRef() }()
newSnapshot := &IndexSnapshot{ newSnapshot := &IndexSnapshot{
parent: s, parent: s,
internal: root.internal, internal: root.internal,
@ -293,7 +328,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
// iterate through current segments // iterate through current segments
newSegmentDeleted := roaring.NewBitmap() newSegmentDeleted := roaring.NewBitmap()
var running uint64 var running, docsToPersistCount, memSegments, fileSegments uint64
for i := range root.segment { for i := range root.segment {
segmentID := root.segment[i].id segmentID := root.segment[i].id
if segSnapAtMerge, ok := nextMerge.old[segmentID]; ok { if segSnapAtMerge, ok := nextMerge.old[segmentID]; ok {
@ -329,9 +364,17 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
root.segment[i].segment.AddRef() root.segment[i].segment.AddRef()
newSnapshot.offsets = append(newSnapshot.offsets, running) newSnapshot.offsets = append(newSnapshot.offsets, running)
running += root.segment[i].segment.Count() running += root.segment[i].segment.Count()
if isMemorySegment(root.segment[i]) {
docsToPersistCount += root.segment[i].Count()
memSegments++
} else {
fileSegments++
} }
} }
}
// before the newMerge introduction, need to clean the newly // before the newMerge introduction, need to clean the newly
// merged segment wrt the current root segments, hence // merged segment wrt the current root segments, hence
// applying the obsolete segment contents to newly merged segment // applying the obsolete segment contents to newly merged segment
@ -360,7 +403,19 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
}) })
newSnapshot.offsets = append(newSnapshot.offsets, running) newSnapshot.offsets = append(newSnapshot.offsets, running)
atomic.AddUint64(&s.stats.TotIntroducedSegmentsMerge, 1) atomic.AddUint64(&s.stats.TotIntroducedSegmentsMerge, 1)
switch nextMerge.new.(type) {
case segment.PersistedSegment:
fileSegments++
default:
docsToPersistCount += nextMerge.new.Count() - newSegmentDeleted.GetCardinality()
memSegments++
} }
}
atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
newSnapshot.AddRef() // 1 ref for the nextMerge.notify response newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
@ -384,65 +439,11 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
close(nextMerge.notify) close(nextMerge.notify)
} }
func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error { func isMemorySegment(s *SegmentSnapshot) bool {
atomic.AddUint64(&s.stats.TotIntroduceRevertBeg, 1) switch s.segment.(type) {
defer atomic.AddUint64(&s.stats.TotIntroduceRevertEnd, 1) case segment.PersistedSegment:
return false
if revertTo.snapshot == nil { default:
err := fmt.Errorf("Cannot revert to a nil snapshot") return true
revertTo.applied <- err
return err
} }
// acquire lock
s.rootLock.Lock()
// prepare a new index snapshot, based on next snapshot
newSnapshot := &IndexSnapshot{
parent: s,
segment: make([]*SegmentSnapshot, len(revertTo.snapshot.segment)),
offsets: revertTo.snapshot.offsets,
internal: revertTo.snapshot.internal,
epoch: s.nextSnapshotEpoch,
refs: 1,
creator: "revertToSnapshot",
}
s.nextSnapshotEpoch++
// iterate through segments
for i, segmentSnapshot := range revertTo.snapshot.segment {
newSnapshot.segment[i] = &SegmentSnapshot{
id: segmentSnapshot.id,
segment: segmentSnapshot.segment,
deleted: segmentSnapshot.deleted,
cachedDocs: segmentSnapshot.cachedDocs,
creator: segmentSnapshot.creator,
}
newSnapshot.segment[i].segment.AddRef()
// remove segment from ineligibleForRemoval map
filename := zapFileName(segmentSnapshot.id)
delete(s.ineligibleForRemoval, filename)
}
if revertTo.persisted != nil {
s.rootPersisted = append(s.rootPersisted, revertTo.persisted)
}
newSnapshot.updateSize()
// swap in new snapshot
rootPrev := s.root
s.root = newSnapshot
atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch)
// release lock
s.rootLock.Unlock()
if rootPrev != nil {
_ = rootPrev.DecRef()
}
close(revertTo.applied)
return nil
} }

View File

@ -18,13 +18,13 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"os" "os"
"strings"
"sync/atomic" "sync/atomic"
"time" "time"
"github.com/RoaringBitmap/roaring" "github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index/scorch/mergeplan" "github.com/blevesearch/bleve/index/scorch/mergeplan"
"github.com/blevesearch/bleve/index/scorch/segment" "github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/index/scorch/segment/zap"
) )
func (s *Scorch) mergerLoop() { func (s *Scorch) mergerLoop() {
@ -46,12 +46,12 @@ OUTER:
default: default:
// check to see if there is a new snapshot to persist // check to see if there is a new snapshot to persist
s.rootLock.RLock() s.rootLock.Lock()
ourSnapshot := s.root ourSnapshot := s.root
ourSnapshot.AddRef() ourSnapshot.AddRef()
atomic.StoreUint64(&s.iStats.mergeSnapshotSize, uint64(ourSnapshot.Size())) atomic.StoreUint64(&s.iStats.mergeSnapshotSize, uint64(ourSnapshot.Size()))
atomic.StoreUint64(&s.iStats.mergeEpoch, ourSnapshot.epoch) atomic.StoreUint64(&s.iStats.mergeEpoch, ourSnapshot.epoch)
s.rootLock.RUnlock() s.rootLock.Unlock()
if ourSnapshot.epoch != lastEpochMergePlanned { if ourSnapshot.epoch != lastEpochMergePlanned {
startTime := time.Now() startTime := time.Now()
@ -60,7 +60,7 @@ OUTER:
err := s.planMergeAtSnapshot(ourSnapshot, mergePlannerOptions) err := s.planMergeAtSnapshot(ourSnapshot, mergePlannerOptions)
if err != nil { if err != nil {
atomic.StoreUint64(&s.iStats.mergeEpoch, 0) atomic.StoreUint64(&s.iStats.mergeEpoch, 0)
if err == ErrClosed { if err == segment.ErrClosed {
// index has been closed // index has been closed
_ = ourSnapshot.DecRef() _ = ourSnapshot.DecRef()
break OUTER break OUTER
@ -130,18 +130,18 @@ func (s *Scorch) parseMergePlannerOptions() (*mergeplan.MergePlanOptions,
func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot, func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
options *mergeplan.MergePlanOptions) error { options *mergeplan.MergePlanOptions) error {
// build list of zap segments in this snapshot // build list of persisted segments in this snapshot
var onlyZapSnapshots []mergeplan.Segment var onlyPersistedSnapshots []mergeplan.Segment
for _, segmentSnapshot := range ourSnapshot.segment { for _, segmentSnapshot := range ourSnapshot.segment {
if _, ok := segmentSnapshot.segment.(*zap.Segment); ok { if _, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
onlyZapSnapshots = append(onlyZapSnapshots, segmentSnapshot) onlyPersistedSnapshots = append(onlyPersistedSnapshots, segmentSnapshot)
} }
} }
atomic.AddUint64(&s.stats.TotFileMergePlan, 1) atomic.AddUint64(&s.stats.TotFileMergePlan, 1)
// give this list to the planner // give this list to the planner
resultMergePlan, err := mergeplan.Plan(onlyZapSnapshots, options) resultMergePlan, err := mergeplan.Plan(onlyPersistedSnapshots, options)
if err != nil { if err != nil {
atomic.AddUint64(&s.stats.TotFileMergePlanErr, 1) atomic.AddUint64(&s.stats.TotFileMergePlanErr, 1)
return fmt.Errorf("merge planning err: %v", err) return fmt.Errorf("merge planning err: %v", err)
@ -151,13 +151,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1) atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1)
return nil return nil
} }
atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1) atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1)
atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks))) atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks)))
// process tasks in serial for now // process tasks in serial for now
var notifications []chan *IndexSnapshot var filenames []string
for _, task := range resultMergePlan.Tasks { for _, task := range resultMergePlan.Tasks {
if len(task.Segments) == 0 { if len(task.Segments) == 0 {
atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1) atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1)
@ -168,26 +168,32 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
oldMap := make(map[uint64]*SegmentSnapshot) oldMap := make(map[uint64]*SegmentSnapshot)
newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1) newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
segmentsToMerge := make([]*zap.Segment, 0, len(task.Segments)) segmentsToMerge := make([]segment.Segment, 0, len(task.Segments))
docsToDrop := make([]*roaring.Bitmap, 0, len(task.Segments)) docsToDrop := make([]*roaring.Bitmap, 0, len(task.Segments))
for _, planSegment := range task.Segments { for _, planSegment := range task.Segments {
if segSnapshot, ok := planSegment.(*SegmentSnapshot); ok { if segSnapshot, ok := planSegment.(*SegmentSnapshot); ok {
oldMap[segSnapshot.id] = segSnapshot oldMap[segSnapshot.id] = segSnapshot
if zapSeg, ok := segSnapshot.segment.(*zap.Segment); ok { if persistedSeg, ok := segSnapshot.segment.(segment.PersistedSegment); ok {
if segSnapshot.LiveSize() == 0 { if segSnapshot.LiveSize() == 0 {
atomic.AddUint64(&s.stats.TotFileMergeSegmentsEmpty, 1) atomic.AddUint64(&s.stats.TotFileMergeSegmentsEmpty, 1)
oldMap[segSnapshot.id] = nil oldMap[segSnapshot.id] = nil
} else { } else {
segmentsToMerge = append(segmentsToMerge, zapSeg) segmentsToMerge = append(segmentsToMerge, segSnapshot.segment)
docsToDrop = append(docsToDrop, segSnapshot.deleted) docsToDrop = append(docsToDrop, segSnapshot.deleted)
} }
// track the files getting merged for unsetting the
// removal ineligibility. This helps to unflip files
// even with fast merger, slow persister work flows.
path := persistedSeg.Path()
filenames = append(filenames,
strings.TrimPrefix(path, s.path+string(os.PathSeparator)))
} }
} }
} }
var oldNewDocNums map[uint64][]uint64 var oldNewDocNums map[uint64][]uint64
var segment segment.Segment var seg segment.Segment
if len(segmentsToMerge) > 0 { if len(segmentsToMerge) > 0 {
filename := zapFileName(newSegmentID) filename := zapFileName(newSegmentID)
s.markIneligibleForRemoval(filename) s.markIneligibleForRemoval(filename)
@ -196,9 +202,9 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
fileMergeZapStartTime := time.Now() fileMergeZapStartTime := time.Now()
atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1) atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
newDocNums, nBytes, err := zap.Merge(segmentsToMerge, docsToDrop, path, DefaultChunkFactor) newDocNums, _, err := s.segPlugin.Merge(segmentsToMerge, docsToDrop, path,
s.closeCh, s)
atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1) atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, nBytes)
fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime)) fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime))
atomic.AddUint64(&s.stats.TotFileMergeZapTime, fileMergeZapTime) atomic.AddUint64(&s.stats.TotFileMergeZapTime, fileMergeZapTime)
@ -209,10 +215,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
if err != nil { if err != nil {
s.unmarkIneligibleForRemoval(filename) s.unmarkIneligibleForRemoval(filename)
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1) atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
if err == segment.ErrClosed {
return err
}
return fmt.Errorf("merging failed: %v", err) return fmt.Errorf("merging failed: %v", err)
} }
segment, err = zap.Open(path) seg, err = s.segPlugin.Open(path)
if err != nil { if err != nil {
s.unmarkIneligibleForRemoval(filename) s.unmarkIneligibleForRemoval(filename)
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1) atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
@ -230,33 +239,41 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
id: newSegmentID, id: newSegmentID,
old: oldMap, old: oldMap,
oldNewDocNums: oldNewDocNums, oldNewDocNums: oldNewDocNums,
new: segment, new: seg,
notify: make(chan *IndexSnapshot, 1), notify: make(chan *IndexSnapshot),
} }
notifications = append(notifications, sm.notify)
// give it to the introducer // give it to the introducer
select { select {
case <-s.closeCh: case <-s.closeCh:
_ = segment.Close() _ = seg.Close()
return ErrClosed return segment.ErrClosed
case s.merges <- sm: case s.merges <- sm:
atomic.AddUint64(&s.stats.TotFileMergeIntroductions, 1) atomic.AddUint64(&s.stats.TotFileMergeIntroductions, 1)
} }
introStartTime := time.Now()
// it is safe to blockingly wait for the merge introduction
// here as the introducer is bound to handle the notify channel.
newSnapshot := <-sm.notify
introTime := uint64(time.Since(introStartTime))
atomic.AddUint64(&s.stats.TotFileMergeZapIntroductionTime, introTime)
if atomic.LoadUint64(&s.stats.MaxFileMergeZapIntroductionTime) < introTime {
atomic.StoreUint64(&s.stats.MaxFileMergeZapIntroductionTime, introTime)
}
atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1)
if newSnapshot != nil {
_ = newSnapshot.DecRef()
}
atomic.AddUint64(&s.stats.TotFileMergePlanTasksDone, 1) atomic.AddUint64(&s.stats.TotFileMergePlanTasksDone, 1)
} }
for _, notification := range notifications { // once all the newly merged segment introductions are done,
select { // its safe to unflip the removal ineligibility for the replaced
case <-s.closeCh: // older segments
return ErrClosed for _, f := range filenames {
case newSnapshot := <-notification: s.unmarkIneligibleForRemoval(f)
atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1)
if newSnapshot != nil {
_ = newSnapshot.DecRef()
}
}
} }
return nil return nil
@ -274,8 +291,8 @@ type segmentMerge struct {
// persisted segment, and synchronously introduce that new segment // persisted segment, and synchronously introduce that new segment
// into the root // into the root
func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot, func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
sbs []*zap.SegmentBase, sbsDrops []*roaring.Bitmap, sbsIndexes []int, sbs []segment.Segment, sbsDrops []*roaring.Bitmap,
chunkFactor uint32) (*IndexSnapshot, uint64, error) { sbsIndexes []int) (*IndexSnapshot, uint64, error) {
atomic.AddUint64(&s.stats.TotMemMergeBeg, 1) atomic.AddUint64(&s.stats.TotMemMergeBeg, 1)
memMergeZapStartTime := time.Now() memMergeZapStartTime := time.Now()
@ -287,7 +304,7 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
path := s.path + string(os.PathSeparator) + filename path := s.path + string(os.PathSeparator) + filename
newDocNums, _, err := newDocNums, _, err :=
zap.MergeSegmentBases(sbs, sbsDrops, path, chunkFactor) s.segPlugin.Merge(sbs, sbsDrops, path, s.closeCh, s)
atomic.AddUint64(&s.stats.TotMemMergeZapEnd, 1) atomic.AddUint64(&s.stats.TotMemMergeZapEnd, 1)
@ -302,22 +319,22 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
return nil, 0, err return nil, 0, err
} }
segment, err := zap.Open(path) seg, err := s.segPlugin.Open(path)
if err != nil { if err != nil {
atomic.AddUint64(&s.stats.TotMemMergeErr, 1) atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
return nil, 0, err return nil, 0, err
} }
// update persisted stats // update persisted stats
atomic.AddUint64(&s.stats.TotPersistedItems, segment.Count()) atomic.AddUint64(&s.stats.TotPersistedItems, seg.Count())
atomic.AddUint64(&s.stats.TotPersistedSegments, 1) atomic.AddUint64(&s.stats.TotPersistedSegments, 1)
sm := &segmentMerge{ sm := &segmentMerge{
id: newSegmentID, id: newSegmentID,
old: make(map[uint64]*SegmentSnapshot), old: make(map[uint64]*SegmentSnapshot),
oldNewDocNums: make(map[uint64][]uint64), oldNewDocNums: make(map[uint64][]uint64),
new: segment, new: seg,
notify: make(chan *IndexSnapshot, 1), notify: make(chan *IndexSnapshot),
} }
for i, idx := range sbsIndexes { for i, idx := range sbsIndexes {
@ -328,17 +345,20 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
select { // send to introducer select { // send to introducer
case <-s.closeCh: case <-s.closeCh:
_ = segment.DecRef() _ = seg.DecRef()
return nil, 0, ErrClosed return nil, 0, segment.ErrClosed
case s.merges <- sm: case s.merges <- sm:
} }
select { // wait for introduction to complete // blockingly wait for the introduction to complete
case <-s.closeCh: newSnapshot := <-sm.notify
return nil, 0, ErrClosed if newSnapshot != nil {
case newSnapshot := <-sm.notify:
atomic.AddUint64(&s.stats.TotMemMergeSegments, uint64(len(sbs))) atomic.AddUint64(&s.stats.TotMemMergeSegments, uint64(len(sbs)))
atomic.AddUint64(&s.stats.TotMemMergeDone, 1) atomic.AddUint64(&s.stats.TotMemMergeDone, 1)
return newSnapshot, newSegmentID, nil
} }
return newSnapshot, newSegmentID, nil
}
func (s *Scorch) ReportBytesWritten(bytesWritten uint64) {
atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, bytesWritten)
} }

View File

@ -217,14 +217,14 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
if len(roster) > 0 { if len(roster) > 0 {
rosterScore := scoreSegments(roster, o) rosterScore := scoreSegments(roster, o)
if len(bestRoster) <= 0 || rosterScore < bestRosterScore { if len(bestRoster) == 0 || rosterScore < bestRosterScore {
bestRoster = roster bestRoster = roster
bestRosterScore = rosterScore bestRosterScore = rosterScore
} }
} }
} }
if len(bestRoster) <= 0 { if len(bestRoster) == 0 {
return rv, nil return rv, nil
} }

View File

@ -18,17 +18,37 @@ import (
"fmt" "fmt"
"github.com/RoaringBitmap/roaring" "github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment/zap" "github.com/blevesearch/bleve/index/scorch/segment"
) )
func (s *IndexSnapshotTermFieldReader) Optimize(kind string, octx index.OptimizableContext) ( var OptimizeConjunction = true
index.OptimizableContext, error) { var OptimizeConjunctionUnadorned = true
if kind != "conjunction" { var OptimizeDisjunctionUnadorned = true
return octx, nil
func (s *IndexSnapshotTermFieldReader) Optimize(kind string,
octx index.OptimizableContext) (index.OptimizableContext, error) {
if OptimizeConjunction && kind == "conjunction" {
return s.optimizeConjunction(octx)
} }
if OptimizeConjunctionUnadorned && kind == "conjunction:unadorned" {
return s.optimizeConjunctionUnadorned(octx)
}
if OptimizeDisjunctionUnadorned && kind == "disjunction:unadorned" {
return s.optimizeDisjunctionUnadorned(octx)
}
return octx, nil
}
var OptimizeDisjunctionUnadornedMinChildCardinality = uint64(256)
// ----------------------------------------------------------------
func (s *IndexSnapshotTermFieldReader) optimizeConjunction(
octx index.OptimizableContext) (index.OptimizableContext, error) {
if octx == nil { if octx == nil {
octx = &OptimizeTFRConjunction{snapshot: s.snapshot} octx = &OptimizeTFRConjunction{snapshot: s.snapshot}
} }
@ -39,7 +59,7 @@ func (s *IndexSnapshotTermFieldReader) Optimize(kind string, octx index.Optimiza
} }
if o.snapshot != s.snapshot { if o.snapshot != s.snapshot {
return nil, fmt.Errorf("tried to optimize across different snapshots") return nil, fmt.Errorf("tried to optimize conjunction across different snapshots")
} }
o.tfrs = append(o.tfrs, s) o.tfrs = append(o.tfrs, s)
@ -53,41 +73,324 @@ type OptimizeTFRConjunction struct {
tfrs []*IndexSnapshotTermFieldReader tfrs []*IndexSnapshotTermFieldReader
} }
func (o *OptimizeTFRConjunction) Finish() error { func (o *OptimizeTFRConjunction) Finish() (index.Optimized, error) {
if len(o.tfrs) <= 1 { if len(o.tfrs) <= 1 {
return nil return nil, nil
} }
for i := range o.snapshot.segment { for i := range o.snapshot.segment {
itr0, ok := o.tfrs[0].iterators[i].(*zap.PostingsIterator) itr0, ok := o.tfrs[0].iterators[i].(segment.OptimizablePostingsIterator)
if !ok || itr0.ActualBM == nil { if !ok || itr0.ActualBitmap() == nil {
continue continue
} }
itr1, ok := o.tfrs[1].iterators[i].(*zap.PostingsIterator) itr1, ok := o.tfrs[1].iterators[i].(segment.OptimizablePostingsIterator)
if !ok || itr1.ActualBM == nil { if !ok || itr1.ActualBitmap() == nil {
continue continue
} }
bm := roaring.And(itr0.ActualBM, itr1.ActualBM) bm := roaring.And(itr0.ActualBitmap(), itr1.ActualBitmap())
for _, tfr := range o.tfrs[2:] { for _, tfr := range o.tfrs[2:] {
itr, ok := tfr.iterators[i].(*zap.PostingsIterator) itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
if !ok || itr.ActualBM == nil { if !ok || itr.ActualBitmap() == nil {
continue continue
} }
bm.And(itr.ActualBM) bm.And(itr.ActualBitmap())
} }
// in this conjunction optimization, the postings iterators
// will all share the same AND'ed together actual bitmap. The
// regular conjunction searcher machinery will still be used,
// but the underlying bitmap will be smaller.
for _, tfr := range o.tfrs {
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
if ok && itr.ActualBitmap() != nil {
itr.ReplaceActual(bm)
}
}
}
return nil, nil
}
// ----------------------------------------------------------------
// An "unadorned" conjunction optimization is appropriate when
// additional or subsidiary information like freq-norm's and
// term-vectors are not required, and instead only the internal-id's
// are needed.
func (s *IndexSnapshotTermFieldReader) optimizeConjunctionUnadorned(
octx index.OptimizableContext) (index.OptimizableContext, error) {
if octx == nil {
octx = &OptimizeTFRConjunctionUnadorned{snapshot: s.snapshot}
}
o, ok := octx.(*OptimizeTFRConjunctionUnadorned)
if !ok {
return nil, nil
}
if o.snapshot != s.snapshot {
return nil, fmt.Errorf("tried to optimize unadorned conjunction across different snapshots")
}
o.tfrs = append(o.tfrs, s)
return o, nil
}
type OptimizeTFRConjunctionUnadorned struct {
snapshot *IndexSnapshot
tfrs []*IndexSnapshotTermFieldReader
}
var OptimizeTFRConjunctionUnadornedTerm = []byte("<conjunction:unadorned>")
var OptimizeTFRConjunctionUnadornedField = "*"
// Finish of an unadorned conjunction optimization will compute a
// termFieldReader with an "actual" bitmap that represents the
// constituent bitmaps AND'ed together. This termFieldReader cannot
// provide any freq-norm or termVector associated information.
func (o *OptimizeTFRConjunctionUnadorned) Finish() (rv index.Optimized, err error) {
if len(o.tfrs) <= 1 {
return nil, nil
}
// We use an artificial term and field because the optimized
// termFieldReader can represent multiple terms and fields.
oTFR := &IndexSnapshotTermFieldReader{
term: OptimizeTFRConjunctionUnadornedTerm,
field: OptimizeTFRConjunctionUnadornedField,
snapshot: o.snapshot,
iterators: make([]segment.PostingsIterator, len(o.snapshot.segment)),
segmentOffset: 0,
includeFreq: false,
includeNorm: false,
includeTermVectors: false,
}
var actualBMs []*roaring.Bitmap // Collected from regular posting lists.
OUTER:
for i := range o.snapshot.segment {
actualBMs = actualBMs[:0]
var docNum1HitLast uint64
var docNum1HitLastOk bool
for _, tfr := range o.tfrs { for _, tfr := range o.tfrs {
itr, ok := tfr.iterators[i].(*zap.PostingsIterator) if _, ok := tfr.iterators[i].(*segment.EmptyPostingsIterator); ok {
if ok && itr.ActualBM != nil { // An empty postings iterator means the entire AND is empty.
itr.ActualBM = bm oTFR.iterators[i] = segment.AnEmptyPostingsIterator
itr.Actual = bm.Iterator() continue OUTER
}
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
if !ok {
// We only optimize postings iterators that support this operation.
return nil, nil
}
// If the postings iterator is "1-hit" optimized, then we
// can perform several optimizations up-front here.
docNum1Hit, ok := itr.DocNum1Hit()
if ok {
if docNum1HitLastOk && docNum1HitLast != docNum1Hit {
// The docNum1Hit doesn't match the previous
// docNum1HitLast, so the entire AND is empty.
oTFR.iterators[i] = segment.AnEmptyPostingsIterator
continue OUTER
}
docNum1HitLast = docNum1Hit
docNum1HitLastOk = true
continue
}
if itr.ActualBitmap() == nil {
// An empty actual bitmap means the entire AND is empty.
oTFR.iterators[i] = segment.AnEmptyPostingsIterator
continue OUTER
}
// Collect the actual bitmap for more processing later.
actualBMs = append(actualBMs, itr.ActualBitmap())
}
if docNum1HitLastOk {
// We reach here if all the 1-hit optimized posting
// iterators had the same 1-hit docNum, so we can check if
// our collected actual bitmaps also have that docNum.
for _, bm := range actualBMs {
if !bm.Contains(uint32(docNum1HitLast)) {
// The docNum1Hit isn't in one of our actual
// bitmaps, so the entire AND is empty.
oTFR.iterators[i] = segment.AnEmptyPostingsIterator
continue OUTER
}
}
// The actual bitmaps and docNum1Hits all contain or have
// the same 1-hit docNum, so that's our AND'ed result.
oTFR.iterators[i] = segment.NewUnadornedPostingsIteratorFrom1Hit(docNum1HitLast)
continue OUTER
}
if len(actualBMs) == 0 {
// If we've collected no actual bitmaps at this point,
// then the entire AND is empty.
oTFR.iterators[i] = segment.AnEmptyPostingsIterator
continue OUTER
}
if len(actualBMs) == 1 {
// If we've only 1 actual bitmap, then that's our result.
oTFR.iterators[i] = segment.NewUnadornedPostingsIteratorFromBitmap(actualBMs[0])
continue OUTER
}
// Else, AND together our collected bitmaps as our result.
bm := roaring.And(actualBMs[0], actualBMs[1])
for _, actualBM := range actualBMs[2:] {
bm.And(actualBM)
}
oTFR.iterators[i] = segment.NewUnadornedPostingsIteratorFromBitmap(bm)
}
return oTFR, nil
}
// ----------------------------------------------------------------
// An "unadorned" disjunction optimization is appropriate when
// additional or subsidiary information like freq-norm's and
// term-vectors are not required, and instead only the internal-id's
// are needed.
func (s *IndexSnapshotTermFieldReader) optimizeDisjunctionUnadorned(
octx index.OptimizableContext) (index.OptimizableContext, error) {
if octx == nil {
octx = &OptimizeTFRDisjunctionUnadorned{snapshot: s.snapshot}
}
o, ok := octx.(*OptimizeTFRDisjunctionUnadorned)
if !ok {
return nil, nil
}
if o.snapshot != s.snapshot {
return nil, fmt.Errorf("tried to optimize unadorned disjunction across different snapshots")
}
o.tfrs = append(o.tfrs, s)
return o, nil
}
type OptimizeTFRDisjunctionUnadorned struct {
snapshot *IndexSnapshot
tfrs []*IndexSnapshotTermFieldReader
}
var OptimizeTFRDisjunctionUnadornedTerm = []byte("<disjunction:unadorned>")
var OptimizeTFRDisjunctionUnadornedField = "*"
// Finish of an unadorned disjunction optimization will compute a
// termFieldReader with an "actual" bitmap that represents the
// constituent bitmaps OR'ed together. This termFieldReader cannot
// provide any freq-norm or termVector associated information.
func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err error) {
if len(o.tfrs) <= 1 {
return nil, nil
}
for i := range o.snapshot.segment {
var cMax uint64
for _, tfr := range o.tfrs {
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
if !ok {
return nil, nil
}
if itr.ActualBitmap() != nil {
c := itr.ActualBitmap().GetCardinality()
if cMax < c {
cMax = c
} }
} }
} }
return nil // Heuristic to skip the optimization if all the constituent
// bitmaps are too small, where the processing & resource
// overhead to create the OR'ed bitmap outweighs the benefit.
if cMax < OptimizeDisjunctionUnadornedMinChildCardinality {
return nil, nil
}
}
// We use an artificial term and field because the optimized
// termFieldReader can represent multiple terms and fields.
oTFR := &IndexSnapshotTermFieldReader{
term: OptimizeTFRDisjunctionUnadornedTerm,
field: OptimizeTFRDisjunctionUnadornedField,
snapshot: o.snapshot,
iterators: make([]segment.PostingsIterator, len(o.snapshot.segment)),
segmentOffset: 0,
includeFreq: false,
includeNorm: false,
includeTermVectors: false,
}
var docNums []uint32 // Collected docNum's from 1-hit posting lists.
var actualBMs []*roaring.Bitmap // Collected from regular posting lists.
for i := range o.snapshot.segment {
docNums = docNums[:0]
actualBMs = actualBMs[:0]
for _, tfr := range o.tfrs {
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
if !ok {
return nil, nil
}
docNum, ok := itr.DocNum1Hit()
if ok {
docNums = append(docNums, uint32(docNum))
continue
}
if itr.ActualBitmap() != nil {
actualBMs = append(actualBMs, itr.ActualBitmap())
}
}
var bm *roaring.Bitmap
if len(actualBMs) > 2 {
bm = roaring.HeapOr(actualBMs...)
} else if len(actualBMs) == 2 {
bm = roaring.Or(actualBMs[0], actualBMs[1])
} else if len(actualBMs) == 1 {
bm = actualBMs[0].Clone()
}
if bm == nil {
bm = roaring.New()
}
bm.AddMany(docNums)
oTFR.iterators[i] = segment.NewUnadornedPostingsIteratorFromBitmap(bm)
}
return oTFR, nil
} }

View File

@ -17,9 +17,11 @@ package scorch
import ( import (
"bytes" "bytes"
"encoding/binary" "encoding/binary"
"encoding/json"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"log" "log"
"math"
"os" "os"
"path/filepath" "path/filepath"
"strconv" "strconv"
@ -28,23 +30,54 @@ import (
"time" "time"
"github.com/RoaringBitmap/roaring" "github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment" "github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/index/scorch/segment/zap" bolt "go.etcd.io/bbolt"
"github.com/boltdb/bolt"
) )
var DefaultChunkFactor uint32 = 1024 // DefaultPersisterNapTimeMSec is kept to zero as this helps in direct
// persistence of segments with the default safe batch option.
// If the default safe batch option results in high number of
// files on disk, then users may initialise this configuration parameter
// with higher values so that the persister will nap a bit within it's
// work loop to favour better in-memory merging of segments to result
// in fewer segment files on disk. But that may come with an indexing
// performance overhead.
// Unsafe batch users are advised to override this to higher value
// for better performance especially with high data density.
var DefaultPersisterNapTimeMSec int = 0 // ms
// Arbitrary number, need to make it configurable. // DefaultPersisterNapUnderNumFiles helps in controlling the pace of
// Lower values like 10/making persister really slow // persister. At times of a slow merger progress with heavy file merging
// doesn't work well as it is creating more files to // operations, its better to pace down the persister for letting the merger
// persist for in next persist iteration and spikes the # FDs. // to catch up within a range defined by this parameter.
// Ideal value should let persister also proceed at // Fewer files on disk (as per the merge plan) would result in keeping the
// an optimum pace so that the merger can skip // file handle usage under limit, faster disk merger and a healthier index.
// many intermediate snapshots. // Its been observed that such a loosely sync'ed introducer-persister-merger
// This needs to be based on empirical data. // trio results in better overall performance.
// TODO - may need to revisit this approach/value. var DefaultPersisterNapUnderNumFiles int = 1000
var epochDistance = uint64(5)
var DefaultMemoryPressurePauseThreshold uint64 = math.MaxUint64
type persisterOptions struct {
// PersisterNapTimeMSec controls the wait/delay injected into
// persistence workloop to improve the chances for
// a healthier and heavier in-memory merging
PersisterNapTimeMSec int
// PersisterNapTimeMSec > 0, and the number of files is less than
// PersisterNapUnderNumFiles, then the persister will sleep
// PersisterNapTimeMSec amount of time to improve the chances for
// a healthier and heavier in-memory merging
PersisterNapUnderNumFiles int
// MemoryPressurePauseThreshold let persister to have a better leeway
// for prudently performing the memory merge of segments on a memory
// pressure situation. Here the config value is an upper threshold
// for the number of paused application threads. The default value would
// be a very high number to always favour the merging of memory segments.
MemoryPressurePauseThreshold uint64
}
type notificationChan chan struct{} type notificationChan chan struct{}
@ -54,6 +87,16 @@ func (s *Scorch) persisterLoop() {
var persistWatchers []*epochWatcher var persistWatchers []*epochWatcher
var lastPersistedEpoch, lastMergedEpoch uint64 var lastPersistedEpoch, lastMergedEpoch uint64
var ew *epochWatcher var ew *epochWatcher
var unpersistedCallbacks []index.BatchCallback
po, err := s.parsePersisterOptions()
if err != nil {
s.fireAsyncError(fmt.Errorf("persisterOptions json parsing err: %v", err))
s.asyncTasks.Done()
return
}
OUTER: OUTER:
for { for {
atomic.AddUint64(&s.stats.TotPersistLoopBeg, 1) atomic.AddUint64(&s.stats.TotPersistLoopBeg, 1)
@ -69,10 +112,11 @@ OUTER:
lastMergedEpoch = ew.epoch lastMergedEpoch = ew.epoch
} }
lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch, lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
lastMergedEpoch, persistWatchers) lastMergedEpoch, persistWatchers, po)
var ourSnapshot *IndexSnapshot var ourSnapshot *IndexSnapshot
var ourPersisted []chan error var ourPersisted []chan error
var ourPersistedCallbacks []index.BatchCallback
// check to see if there is a new snapshot to persist // check to see if there is a new snapshot to persist
s.rootLock.Lock() s.rootLock.Lock()
@ -81,6 +125,8 @@ OUTER:
ourSnapshot.AddRef() ourSnapshot.AddRef()
ourPersisted = s.rootPersisted ourPersisted = s.rootPersisted
s.rootPersisted = nil s.rootPersisted = nil
ourPersistedCallbacks = s.persistedCallbacks
s.persistedCallbacks = nil
atomic.StoreUint64(&s.iStats.persistSnapshotSize, uint64(ourSnapshot.Size())) atomic.StoreUint64(&s.iStats.persistSnapshotSize, uint64(ourSnapshot.Size()))
atomic.StoreUint64(&s.iStats.persistEpoch, ourSnapshot.epoch) atomic.StoreUint64(&s.iStats.persistEpoch, ourSnapshot.epoch)
} }
@ -89,7 +135,7 @@ OUTER:
if ourSnapshot != nil { if ourSnapshot != nil {
startTime := time.Now() startTime := time.Now()
err := s.persistSnapshot(ourSnapshot) err := s.persistSnapshot(ourSnapshot, po)
for _, ch := range ourPersisted { for _, ch := range ourPersisted {
if err != nil { if err != nil {
ch <- err ch <- err
@ -98,17 +144,34 @@ OUTER:
} }
if err != nil { if err != nil {
atomic.StoreUint64(&s.iStats.persistEpoch, 0) atomic.StoreUint64(&s.iStats.persistEpoch, 0)
if err == ErrClosed { if err == segment.ErrClosed {
// index has been closed // index has been closed
_ = ourSnapshot.DecRef() _ = ourSnapshot.DecRef()
break OUTER break OUTER
} }
// save this current snapshot's persistedCallbacks, to invoke during
// the retry attempt
unpersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err)) s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err))
_ = ourSnapshot.DecRef() _ = ourSnapshot.DecRef()
atomic.AddUint64(&s.stats.TotPersistLoopErr, 1) atomic.AddUint64(&s.stats.TotPersistLoopErr, 1)
continue OUTER continue OUTER
} }
if unpersistedCallbacks != nil {
// in the event of this being a retry attempt for persisting a snapshot
// that had earlier failed, prepend the persistedCallbacks associated
// with earlier segment(s) to the latest persistedCallbacks
ourPersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
unpersistedCallbacks = nil
}
for i := range ourPersistedCallbacks {
ourPersistedCallbacks[i](err)
}
atomic.StoreUint64(&s.stats.LastPersistedEpoch, ourSnapshot.epoch) atomic.StoreUint64(&s.stats.LastPersistedEpoch, ourSnapshot.epoch)
lastPersistedEpoch = ourSnapshot.epoch lastPersistedEpoch = ourSnapshot.epoch
@ -179,15 +242,51 @@ func notifyMergeWatchers(lastPersistedEpoch uint64,
return watchersNext return watchersNext
} }
func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch uint64, func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64,
persistWatchers []*epochWatcher) (uint64, []*epochWatcher) { lastMergedEpoch uint64, persistWatchers []*epochWatcher,
po *persisterOptions) (uint64, []*epochWatcher) {
// first, let the watchers proceed if they lag behind // First, let the watchers proceed if they lag behind
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers) persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
// Check the merger lag by counting the segment files on disk,
numFilesOnDisk, _, _ := s.diskFileStats(nil)
// On finding fewer files on disk, persister takes a short pause
// for sufficient in-memory segments to pile up for the next
// memory merge cum persist loop.
if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) &&
po.PersisterNapTimeMSec > 0 && s.paused() == 0 {
select {
case <-s.closeCh:
case <-time.After(time.Millisecond * time.Duration(po.PersisterNapTimeMSec)):
atomic.AddUint64(&s.stats.TotPersisterNapPauseCompleted, 1)
case ew := <-s.persisterNotifier:
// unblock the merger in meantime
persistWatchers = append(persistWatchers, ew)
lastMergedEpoch = ew.epoch
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
atomic.AddUint64(&s.stats.TotPersisterMergerNapBreak, 1)
}
return lastMergedEpoch, persistWatchers
}
// Finding too many files on disk could be due to two reasons.
// 1. Too many older snapshots awaiting the clean up.
// 2. The merger could be lagging behind on merging the disk files.
if numFilesOnDisk > uint64(po.PersisterNapUnderNumFiles) {
s.removeOldData()
numFilesOnDisk, _, _ = s.diskFileStats(nil)
}
// Persister pause until the merger catches up to reduce the segment
// file count under the threshold.
// But if there is memory pressure, then skip this sleep maneuvers.
OUTER: OUTER:
// check for slow merger and await until the merger catch up for po.PersisterNapUnderNumFiles > 0 &&
for lastPersistedEpoch > lastMergedEpoch+epochDistance { numFilesOnDisk >= uint64(po.PersisterNapUnderNumFiles) &&
lastMergedEpoch < lastPersistedEpoch {
atomic.AddUint64(&s.stats.TotPersisterSlowMergerPause, 1) atomic.AddUint64(&s.stats.TotPersisterSlowMergerPause, 1)
select { select {
@ -202,12 +301,39 @@ OUTER:
// let the watchers proceed if they lag behind // let the watchers proceed if they lag behind
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers) persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
numFilesOnDisk, _, _ = s.diskFileStats(nil)
} }
return lastMergedEpoch, persistWatchers return lastMergedEpoch, persistWatchers
} }
func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error { func (s *Scorch) parsePersisterOptions() (*persisterOptions, error) {
po := persisterOptions{
PersisterNapTimeMSec: DefaultPersisterNapTimeMSec,
PersisterNapUnderNumFiles: DefaultPersisterNapUnderNumFiles,
MemoryPressurePauseThreshold: DefaultMemoryPressurePauseThreshold,
}
if v, ok := s.config["scorchPersisterOptions"]; ok {
b, err := json.Marshal(v)
if err != nil {
return &po, err
}
err = json.Unmarshal(b, &po)
if err != nil {
return &po, err
}
}
return &po, nil
}
func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot,
po *persisterOptions) error {
// Perform in-memory segment merging only when the memory pressure is
// below the configured threshold, else the persister performs the
// direct persistence of segments.
if s.paused() < po.MemoryPressurePauseThreshold {
persisted, err := s.persistSnapshotMaybeMerge(snapshot) persisted, err := s.persistSnapshotMaybeMerge(snapshot)
if err != nil { if err != nil {
return err return err
@ -215,6 +341,7 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
if persisted { if persisted {
return nil return nil
} }
}
return s.persistSnapshotDirect(snapshot) return s.persistSnapshotDirect(snapshot)
} }
@ -230,13 +357,13 @@ var DefaultMinSegmentsForInMemoryMerge = 2
func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) ( func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) (
bool, error) { bool, error) {
// collect the in-memory zap segments (SegmentBase instances) // collect the in-memory zap segments (SegmentBase instances)
var sbs []*zap.SegmentBase var sbs []segment.Segment
var sbsDrops []*roaring.Bitmap var sbsDrops []*roaring.Bitmap
var sbsIndexes []int var sbsIndexes []int
for i, segmentSnapshot := range snapshot.segment { for i, segmentSnapshot := range snapshot.segment {
if sb, ok := segmentSnapshot.segment.(*zap.SegmentBase); ok { if _, ok := segmentSnapshot.segment.(segment.PersistedSegment); !ok {
sbs = append(sbs, sb) sbs = append(sbs, segmentSnapshot.segment)
sbsDrops = append(sbsDrops, segmentSnapshot.deleted) sbsDrops = append(sbsDrops, segmentSnapshot.deleted)
sbsIndexes = append(sbsIndexes, i) sbsIndexes = append(sbsIndexes, i)
} }
@ -247,7 +374,7 @@ func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) (
} }
newSnapshot, newSegmentID, err := s.mergeSegmentBases( newSnapshot, newSegmentID, err := s.mergeSegmentBases(
snapshot, sbs, sbsDrops, sbsIndexes, DefaultChunkFactor) snapshot, sbs, sbsDrops, sbsIndexes)
if err != nil { if err != nil {
return false, err return false, err
} }
@ -329,13 +456,13 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
if err != nil { if err != nil {
return err return err
} }
err = metaBucket.Put([]byte("type"), []byte(zap.Type)) err = metaBucket.Put(boltMetaDataSegmentTypeKey, []byte(s.segPlugin.Type()))
if err != nil { if err != nil {
return err return err
} }
buf := make([]byte, binary.MaxVarintLen32) buf := make([]byte, binary.MaxVarintLen32)
binary.BigEndian.PutUint32(buf, zap.Version) binary.BigEndian.PutUint32(buf, s.segPlugin.Version())
err = metaBucket.Put([]byte("version"), buf) err = metaBucket.Put(boltMetaDataSegmentVersionKey, buf)
if err != nil { if err != nil {
return err return err
} }
@ -364,11 +491,19 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
return err return err
} }
switch seg := segmentSnapshot.segment.(type) { switch seg := segmentSnapshot.segment.(type) {
case *zap.SegmentBase: case segment.PersistedSegment:
path := seg.Path()
filename := strings.TrimPrefix(path, s.path+string(os.PathSeparator))
err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
if err != nil {
return err
}
filenames = append(filenames, filename)
case segment.UnpersistedSegment:
// need to persist this to disk // need to persist this to disk
filename := zapFileName(segmentSnapshot.id) filename := zapFileName(segmentSnapshot.id)
path := s.path + string(os.PathSeparator) + filename path := s.path + string(os.PathSeparator) + filename
err = zap.PersistSegmentBase(seg, path) err = seg.Persist(path)
if err != nil { if err != nil {
return fmt.Errorf("error persisting segment: %v", err) return fmt.Errorf("error persisting segment: %v", err)
} }
@ -378,14 +513,7 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
return err return err
} }
filenames = append(filenames, filename) filenames = append(filenames, filename)
case *zap.Segment:
path := seg.Path()
filename := strings.TrimPrefix(path, s.path+string(os.PathSeparator))
err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
if err != nil {
return err
}
filenames = append(filenames, filename)
default: default:
return fmt.Errorf("unknown segment type: %T", seg) return fmt.Errorf("unknown segment type: %T", seg)
} }
@ -423,7 +551,7 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
} }
}() }()
for segmentID, path := range newSegmentPaths { for segmentID, path := range newSegmentPaths {
newSegments[segmentID], err = zap.Open(path) newSegments[segmentID], err = s.segPlugin.Open(path)
if err != nil { if err != nil {
return fmt.Errorf("error opening new segment at %s, %v", path, err) return fmt.Errorf("error opening new segment at %s, %v", path, err)
} }
@ -436,15 +564,13 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
select { select {
case <-s.closeCh: case <-s.closeCh:
err = ErrClosed return segment.ErrClosed
return err
case s.persists <- persist: case s.persists <- persist:
} }
select { select {
case <-s.closeCh: case <-s.closeCh:
err = ErrClosed return segment.ErrClosed
return err
case <-persist.applied: case <-persist.applied:
} }
} }
@ -481,6 +607,8 @@ var boltPathKey = []byte{'p'}
var boltDeletedKey = []byte{'d'} var boltDeletedKey = []byte{'d'}
var boltInternalKey = []byte{'i'} var boltInternalKey = []byte{'i'}
var boltMetaDataKey = []byte{'m'} var boltMetaDataKey = []byte{'m'}
var boltMetaDataSegmentTypeKey = []byte("type")
var boltMetaDataSegmentVersionKey = []byte("version")
func (s *Scorch) loadFromBolt() error { func (s *Scorch) loadFromBolt() error {
return s.rootBolt.View(func(tx *bolt.Tx) error { return s.rootBolt.View(func(tx *bolt.Tx) error {
@ -521,11 +649,14 @@ func (s *Scorch) loadFromBolt() error {
s.nextSegmentID++ s.nextSegmentID++
s.rootLock.Lock() s.rootLock.Lock()
s.nextSnapshotEpoch = snapshotEpoch + 1 s.nextSnapshotEpoch = snapshotEpoch + 1
if s.root != nil { rootPrev := s.root
_ = s.root.DecRef()
}
s.root = indexSnapshot s.root = indexSnapshot
s.rootLock.Unlock() s.rootLock.Unlock()
if rootPrev != nil {
_ = rootPrev.DecRef()
}
foundRoot = true foundRoot = true
} }
return nil return nil
@ -562,6 +693,23 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
refs: 1, refs: 1,
creator: "loadSnapshot", creator: "loadSnapshot",
} }
// first we look for the meta-data bucket, this will tell us
// which segment type/version was used for this snapshot
// all operations for this scorch will use this type/version
metaBucket := snapshot.Bucket(boltMetaDataKey)
if metaBucket == nil {
_ = rv.DecRef()
return nil, fmt.Errorf("meta-data bucket missing")
}
segmentType := string(metaBucket.Get(boltMetaDataSegmentTypeKey))
segmentVersion := binary.BigEndian.Uint32(
metaBucket.Get(boltMetaDataSegmentVersionKey))
err := s.loadSegmentPlugin(segmentType, segmentVersion)
if err != nil {
_ = rv.DecRef()
return nil, fmt.Errorf(
"unable to load correct segment wrapper: %v", err)
}
var running uint64 var running uint64
c := snapshot.Cursor() c := snapshot.Cursor()
for k, _ := c.First(); k != nil; k, _ = c.Next() { for k, _ := c.First(); k != nil; k, _ = c.Next() {
@ -606,7 +754,7 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
return nil, fmt.Errorf("segment path missing") return nil, fmt.Errorf("segment path missing")
} }
segmentPath := s.path + string(os.PathSeparator) + string(pathBytes) segmentPath := s.path + string(os.PathSeparator) + string(pathBytes)
segment, err := zap.Open(segmentPath) segment, err := s.segPlugin.Open(segmentPath)
if err != nil { if err != nil {
return nil, fmt.Errorf("error opening bolt segment: %v", err) return nil, fmt.Errorf("error opening bolt segment: %v", err)
} }
@ -643,13 +791,12 @@ func (s *Scorch) removeOldData() {
if err != nil { if err != nil {
s.fireAsyncError(fmt.Errorf("got err removing old bolt snapshots: %v", err)) s.fireAsyncError(fmt.Errorf("got err removing old bolt snapshots: %v", err))
} }
atomic.AddUint64(&s.stats.TotSnapshotsRemovedFromMetaStore, uint64(removed))
if removed > 0 {
err = s.removeOldZapFiles() err = s.removeOldZapFiles()
if err != nil { if err != nil {
s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err)) s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err))
} }
}
} }
// NumSnapshotsToKeep represents how many recent, old snapshots to // NumSnapshotsToKeep represents how many recent, old snapshots to
@ -690,7 +837,7 @@ func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) {
s.eligibleForRemoval = newEligible s.eligibleForRemoval = newEligible
s.rootLock.Unlock() s.rootLock.Unlock()
if len(epochsToRemove) <= 0 { if len(epochsToRemove) == 0 {
return 0, nil return 0, nil
} }

View File

@ -28,10 +28,9 @@ import (
"github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment" "github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/index/scorch/segment/zap"
"github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/registry" "github.com/blevesearch/bleve/registry"
"github.com/boltdb/bolt" bolt "go.etcd.io/bbolt"
) )
const Name = "scorch" const Name = "scorch"
@ -41,12 +40,14 @@ const Version uint8 = 2
var ErrClosed = fmt.Errorf("scorch closed") var ErrClosed = fmt.Errorf("scorch closed")
type Scorch struct { type Scorch struct {
nextSegmentID uint64
stats Stats
iStats internalStats
readOnly bool readOnly bool
version uint8 version uint8
config map[string]interface{} config map[string]interface{}
analysisQueue *index.AnalysisQueue analysisQueue *index.AnalysisQueue
stats Stats
nextSegmentID uint64
path string path string
unsafeBatch bool unsafeBatch bool
@ -54,6 +55,7 @@ type Scorch struct {
rootLock sync.RWMutex rootLock sync.RWMutex
root *IndexSnapshot // holds 1 ref-count on the root root *IndexSnapshot // holds 1 ref-count on the root
rootPersisted []chan error // closed when root is persisted rootPersisted []chan error // closed when root is persisted
persistedCallbacks []index.BatchCallback
nextSnapshotEpoch uint64 nextSnapshotEpoch uint64
eligibleForRemoval []uint64 // Index snapshot epochs that are safe to GC. eligibleForRemoval []uint64 // Index snapshot epochs that are safe to GC.
ineligibleForRemoval map[string]bool // Filenames that should not be GC'ed yet. ineligibleForRemoval map[string]bool // Filenames that should not be GC'ed yet.
@ -64,7 +66,6 @@ type Scorch struct {
persists chan *persistIntroduction persists chan *persistIntroduction
merges chan *segmentMerge merges chan *segmentMerge
introducerNotifier chan *epochWatcher introducerNotifier chan *epochWatcher
revertToSnapshots chan *snapshotReversion
persisterNotifier chan *epochWatcher persisterNotifier chan *epochWatcher
rootBolt *bolt.DB rootBolt *bolt.DB
asyncTasks sync.WaitGroup asyncTasks sync.WaitGroup
@ -72,7 +73,11 @@ type Scorch struct {
onEvent func(event Event) onEvent func(event Event)
onAsyncError func(err error) onAsyncError func(err error)
iStats internalStats pauseLock sync.RWMutex
pauseCount uint64
segPlugin segment.Plugin
} }
type internalStats struct { type internalStats struct {
@ -96,7 +101,25 @@ func NewScorch(storeName string,
nextSnapshotEpoch: 1, nextSnapshotEpoch: 1,
closeCh: make(chan struct{}), closeCh: make(chan struct{}),
ineligibleForRemoval: map[string]bool{}, ineligibleForRemoval: map[string]bool{},
segPlugin: defaultSegmentPlugin,
} }
// check if the caller has requested a specific segment type/version
forcedSegmentVersion, ok := config["forceSegmentVersion"].(int)
if ok {
forcedSegmentType, ok2 := config["forceSegmentType"].(string)
if !ok2 {
return nil, fmt.Errorf(
"forceSegmentVersion set to %d, must also specify forceSegmentType", forcedSegmentVersion)
}
err := rv.loadSegmentPlugin(forcedSegmentType,
uint32(forcedSegmentVersion))
if err != nil {
return nil, err
}
}
rv.root = &IndexSnapshot{parent: rv, refs: 1, creator: "NewScorch"} rv.root = &IndexSnapshot{parent: rv, refs: 1, creator: "NewScorch"}
ro, ok := config["read_only"].(bool) ro, ok := config["read_only"].(bool)
if ok { if ok {
@ -117,9 +140,30 @@ func NewScorch(storeName string,
return rv, nil return rv, nil
} }
func (s *Scorch) paused() uint64 {
s.pauseLock.Lock()
pc := s.pauseCount
s.pauseLock.Unlock()
return pc
}
func (s *Scorch) incrPause() {
s.pauseLock.Lock()
s.pauseCount++
s.pauseLock.Unlock()
}
func (s *Scorch) decrPause() {
s.pauseLock.Lock()
s.pauseCount--
s.pauseLock.Unlock()
}
func (s *Scorch) fireEvent(kind EventKind, dur time.Duration) { func (s *Scorch) fireEvent(kind EventKind, dur time.Duration) {
if s.onEvent != nil { if s.onEvent != nil {
s.incrPause()
s.onEvent(Event{Kind: kind, Scorch: s, Duration: dur}) s.onEvent(Event{Kind: kind, Scorch: s, Duration: dur})
s.decrPause()
} }
} }
@ -189,12 +233,14 @@ func (s *Scorch) openBolt() error {
} }
} }
atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, uint64(len(s.root.segment)))
s.introductions = make(chan *segmentIntroduction) s.introductions = make(chan *segmentIntroduction)
s.persists = make(chan *persistIntroduction) s.persists = make(chan *persistIntroduction)
s.merges = make(chan *segmentMerge) s.merges = make(chan *segmentMerge)
s.introducerNotifier = make(chan *epochWatcher, 1) s.introducerNotifier = make(chan *epochWatcher, 1)
s.revertToSnapshots = make(chan *snapshotReversion)
s.persisterNotifier = make(chan *epochWatcher, 1) s.persisterNotifier = make(chan *epochWatcher, 1)
s.closeCh = make(chan struct{})
if !s.readOnly && s.path != "" { if !s.readOnly && s.path != "" {
err := s.removeOldZapFiles() // Before persister or merger create any new files. err := s.removeOldZapFiles() // Before persister or merger create any new files.
@ -235,7 +281,10 @@ func (s *Scorch) Close() (err error) {
err = s.rootBolt.Close() err = s.rootBolt.Close()
s.rootLock.Lock() s.rootLock.Lock()
if s.root != nil { if s.root != nil {
_ = s.root.DecRef() err2 := s.root.DecRef()
if err == nil {
err = err2
}
} }
s.root = nil s.root = nil
s.rootLock.Unlock() s.rootLock.Unlock()
@ -284,6 +333,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
// FIXME could sort ids list concurrent with analysis? // FIXME could sort ids list concurrent with analysis?
if numUpdates > 0 {
go func() { go func() {
for _, doc := range batch.IndexOps { for _, doc := range batch.IndexOps {
if doc != nil { if doc != nil {
@ -293,6 +343,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
} }
} }
}() }()
}
// wait for analysis result // wait for analysis result
analysisResults := make([]*index.AnalysisResult, int(numUpdates)) analysisResults := make([]*index.AnalysisResult, int(numUpdates))
@ -319,7 +370,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
var newSegment segment.Segment var newSegment segment.Segment
var bufBytes uint64 var bufBytes uint64
if len(analysisResults) > 0 { if len(analysisResults) > 0 {
newSegment, bufBytes, err = zap.AnalysisResultsToSegmentBase(analysisResults, DefaultChunkFactor) newSegment, bufBytes, err = s.segPlugin.New(analysisResults)
if err != nil { if err != nil {
return err return err
} }
@ -328,7 +379,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
atomic.AddUint64(&s.stats.TotBatchesEmpty, 1) atomic.AddUint64(&s.stats.TotBatchesEmpty, 1)
} }
err = s.prepareSegment(newSegment, ids, batch.InternalOps) err = s.prepareSegment(newSegment, ids, batch.InternalOps, batch.PersistedCallback())
if err != nil { if err != nil {
if newSegment != nil { if newSegment != nil {
_ = newSegment.Close() _ = newSegment.Close()
@ -348,7 +399,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
} }
func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string, func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
internalOps map[string][]byte) error { internalOps map[string][]byte, persistedCallback index.BatchCallback) error {
// new introduction // new introduction
introduction := &segmentIntroduction{ introduction := &segmentIntroduction{
@ -358,6 +409,7 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
obsoletes: make(map[uint64]*roaring.Bitmap), obsoletes: make(map[uint64]*roaring.Bitmap),
internal: internalOps, internal: internalOps,
applied: make(chan error), applied: make(chan error),
persistedCallback: persistedCallback,
} }
if !s.unsafeBatch { if !s.unsafeBatch {
@ -370,6 +422,8 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
root.AddRef() root.AddRef()
s.rootLock.RUnlock() s.rootLock.RUnlock()
defer func() { _ = root.DecRef() }()
for _, seg := range root.segment { for _, seg := range root.segment {
delta, err := seg.segment.DocNumbers(ids) delta, err := seg.segment.DocNumbers(ids)
if err != nil { if err != nil {
@ -378,8 +432,6 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
introduction.obsoletes[seg.id] = delta introduction.obsoletes[seg.id] = delta
} }
_ = root.DecRef()
introStartTime := time.Now() introStartTime := time.Now()
s.introductions <- introduction s.introductions <- introduction
@ -434,24 +486,57 @@ func (s *Scorch) currentSnapshot() *IndexSnapshot {
func (s *Scorch) Stats() json.Marshaler { func (s *Scorch) Stats() json.Marshaler {
return &s.stats return &s.stats
} }
func (s *Scorch) StatsMap() map[string]interface{} {
m := s.stats.ToMap()
func (s *Scorch) diskFileStats(rootSegmentPaths map[string]struct{}) (uint64,
uint64, uint64) {
var numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot uint64
if s.path != "" { if s.path != "" {
finfos, err := ioutil.ReadDir(s.path) finfos, err := ioutil.ReadDir(s.path)
if err == nil { if err == nil {
var numFilesOnDisk, numBytesUsedDisk uint64
for _, finfo := range finfos { for _, finfo := range finfos {
if !finfo.IsDir() { if !finfo.IsDir() {
numBytesUsedDisk += uint64(finfo.Size()) numBytesUsedDisk += uint64(finfo.Size())
numFilesOnDisk++ numFilesOnDisk++
if rootSegmentPaths != nil {
fname := s.path + string(os.PathSeparator) + finfo.Name()
if _, fileAtRoot := rootSegmentPaths[fname]; fileAtRoot {
numBytesOnDiskByRoot += uint64(finfo.Size())
} }
} }
}
}
}
}
// if no root files path given, then consider all disk files.
if rootSegmentPaths == nil {
return numFilesOnDisk, numBytesUsedDisk, numBytesUsedDisk
}
return numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot
}
func (s *Scorch) rootDiskSegmentsPaths() map[string]struct{} {
rv := make(map[string]struct{}, len(s.root.segment))
for _, segmentSnapshot := range s.root.segment {
if seg, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
rv[seg.Path()] = struct{}{}
}
}
return rv
}
func (s *Scorch) StatsMap() map[string]interface{} {
m := s.stats.ToMap()
s.rootLock.RLock()
rootSegPaths := s.rootDiskSegmentsPaths()
m["CurFilesIneligibleForRemoval"] = uint64(len(s.ineligibleForRemoval))
s.rootLock.RUnlock()
numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot := s.diskFileStats(rootSegPaths)
m["CurOnDiskBytes"] = numBytesUsedDisk m["CurOnDiskBytes"] = numBytesUsedDisk
m["CurOnDiskFiles"] = numFilesOnDisk m["CurOnDiskFiles"] = numFilesOnDisk
}
}
// TODO: consider one day removing these backwards compatible // TODO: consider one day removing these backwards compatible
// names for apps using the old names // names for apps using the old names
@ -466,8 +551,16 @@ func (s *Scorch) StatsMap() map[string]interface{} {
m["num_plain_text_bytes_indexed"] = m["TotIndexedPlainTextBytes"] m["num_plain_text_bytes_indexed"] = m["TotIndexedPlainTextBytes"]
m["num_items_introduced"] = m["TotIntroducedItems"] m["num_items_introduced"] = m["TotIntroducedItems"]
m["num_items_persisted"] = m["TotPersistedItems"] m["num_items_persisted"] = m["TotPersistedItems"]
m["num_bytes_used_disk"] = m["CurOnDiskBytes"] m["num_recs_to_persist"] = m["TotItemsToPersist"]
m["num_files_on_disk"] = m["CurOnDiskFiles"] // total disk bytes found in index directory inclusive of older snapshots
m["num_bytes_used_disk"] = numBytesUsedDisk
// total disk bytes by the latest root index, exclusive of older snapshots
m["num_bytes_used_disk_by_root"] = numBytesOnDiskByRoot
m["num_files_on_disk"] = numFilesOnDisk
m["num_root_memorysegments"] = m["TotMemorySegmentsAtRoot"]
m["num_root_filesegments"] = m["TotFileSegmentsAtRoot"]
m["num_persister_nap_pause_completed"] = m["TotPersisterNapPauseCompleted"]
m["num_persister_nap_merger_break"] = m["TotPersisterMergerNapBreak"]
m["total_compaction_written_bytes"] = m["TotFileMergeWrittenBytes"] m["total_compaction_written_bytes"] = m["TotFileMergeWrittenBytes"]
return m return m
@ -486,7 +579,7 @@ func (s *Scorch) Analyze(d *document.Document) *index.AnalysisResult {
rv.Analyzed[i] = tokenFreqs rv.Analyzed[i] = tokenFreqs
rv.Length[i] = fieldLength rv.Length[i] = fieldLength
if len(d.CompositeFields) > 0 { if len(d.CompositeFields) > 0 && field.Name() != "_id" {
// see if any of the composite fields need this // see if any of the composite fields need this
for _, compositeField := range d.CompositeFields { for _, compositeField := range d.CompositeFields {
compositeField.Compose(field.Name(), fieldLength, tokenFreqs) compositeField.Compose(field.Name(), fieldLength, tokenFreqs)

View File

@ -17,6 +17,7 @@ package segment
import ( import (
"github.com/RoaringBitmap/roaring" "github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/couchbase/vellum"
) )
type EmptySegment struct{} type EmptySegment struct{}
@ -80,12 +81,8 @@ func (e *EmptyDictionary) RangeIterator(start, end string) DictionaryIterator {
return &EmptyDictionaryIterator{} return &EmptyDictionaryIterator{}
} }
func (e *EmptyDictionary) RegexpIterator(start string) DictionaryIterator { func (e *EmptyDictionary) AutomatonIterator(a vellum.Automaton,
return &EmptyDictionaryIterator{} startKeyInclusive, endKeyExclusive []byte) DictionaryIterator {
}
func (e *EmptyDictionary) FuzzyIterator(term string,
fuzziness int) DictionaryIterator {
return &EmptyDictionaryIterator{} return &EmptyDictionaryIterator{}
} }
@ -94,14 +91,18 @@ func (e *EmptyDictionary) OnlyIterator(onlyTerms [][]byte,
return &EmptyDictionaryIterator{} return &EmptyDictionaryIterator{}
} }
func (e *EmptyDictionary) Contains(key []byte) (bool, error) {
return false, nil
}
type EmptyDictionaryIterator struct{} type EmptyDictionaryIterator struct{}
func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) { func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
return nil, nil return nil, nil
} }
func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) { func (e *EmptyDictionaryIterator) Contains(key []byte) (bool, error) {
return nil, nil return false, nil
} }
type EmptyPostingsList struct{} type EmptyPostingsList struct{}
@ -125,6 +126,12 @@ func (e *EmptyPostingsIterator) Next() (Posting, error) {
return nil, nil return nil, nil
} }
func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
return nil, nil
}
func (e *EmptyPostingsIterator) Size() int { func (e *EmptyPostingsIterator) Size() int {
return 0 return 0
} }
var AnEmptyPostingsIterator = &EmptyPostingsIterator{}

View File

@ -19,7 +19,10 @@
package segment package segment
import "fmt" import (
"errors"
"fmt"
)
const ( const (
MaxVarintSize = 9 MaxVarintSize = 9
@ -92,3 +95,82 @@ func DecodeUvarintAscending(b []byte) ([]byte, uint64, error) {
} }
return b[length:], v, nil return b[length:], v, nil
} }
// ------------------------------------------------------------
type MemUvarintReader struct {
C int // index of next byte to read from S
S []byte
}
func NewMemUvarintReader(s []byte) *MemUvarintReader {
return &MemUvarintReader{S: s}
}
// Len returns the number of unread bytes.
func (r *MemUvarintReader) Len() int {
n := len(r.S) - r.C
if n < 0 {
return 0
}
return n
}
var ErrMemUvarintReaderOverflow = errors.New("MemUvarintReader overflow")
// ReadUvarint reads an encoded uint64. The original code this was
// based on is at encoding/binary/ReadUvarint().
func (r *MemUvarintReader) ReadUvarint() (uint64, error) {
var x uint64
var s uint
var C = r.C
var S = r.S
for {
b := S[C]
C++
if b < 0x80 {
r.C = C
// why 63? The original code had an 'i += 1' loop var and
// checked for i > 9 || i == 9 ...; but, we no longer
// check for the i var, but instead check here for s,
// which is incremented by 7. So, 7*9 == 63.
//
// why the "extra" >= check? The normal case is that s <
// 63, so we check this single >= guard first so that we
// hit the normal, nil-error return pathway sooner.
if s >= 63 && (s > 63 || s == 63 && b > 1) {
return 0, ErrMemUvarintReaderOverflow
}
return x | uint64(b)<<s, nil
}
x |= uint64(b&0x7f) << s
s += 7
}
}
// SkipUvarint skips ahead one encoded uint64.
func (r *MemUvarintReader) SkipUvarint() {
for {
b := r.S[r.C]
r.C++
if b < 0x80 {
return
}
}
}
// SkipBytes skips a count number of bytes.
func (r *MemUvarintReader) SkipBytes(count int) {
r.C = r.C + count
}
func (r *MemUvarintReader) Reset(s []byte) {
r.C = 0
r.S = s
}

View File

@ -0,0 +1,58 @@
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package segment
import (
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index"
)
// Plugin represents the essential functions required by a package to plug in
// it's segment implementation
type Plugin interface {
// Type is the name for this segment plugin
Type() string
// Version is a numeric value identifying a specific version of this type.
// When incompatible changes are made to a particular type of plugin, the
// version must be incremented.
Version() uint32
// New takes a set of AnalysisResults and turns them into a new Segment
New(results []*index.AnalysisResult) (Segment, uint64, error)
// Open attempts to open the file at the specified path and
// return the corresponding Segment
Open(path string) (Segment, error)
// Merge takes a set of Segments, and creates a new segment on disk at
// the specified path.
// Drops is a set of bitmaps (one for each segment) indicating which
// documents can be dropped from the segments during the merge.
// If the closeCh channel is closed, Merge will cease doing work at
// the next opportunity, and return an error (closed).
// StatsReporter can optionally be provided, in which case progress
// made during the merge is reported while operation continues.
// Returns:
// A slice of new document numbers (one for each input segment),
// this allows the caller to know a particular document's new
// document number in the newly merged segment.
// The number of bytes written to the new segment file.
// An error, if any occurred.
Merge(segments []Segment, drops []*roaring.Bitmap, path string,
closeCh chan struct{}, s StatsReporter) (
[][]uint64, uint64, error)
}

View File

@ -0,0 +1,75 @@
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package segment
import (
"regexp/syntax"
"github.com/couchbase/vellum/regexp"
)
func ParseRegexp(pattern string) (a *regexp.Regexp, prefixBeg, prefixEnd []byte, err error) {
// TODO: potential optimization where syntax.Regexp supports a Simplify() API?
parsed, err := syntax.Parse(pattern, syntax.Perl)
if err != nil {
return nil, nil, nil, err
}
re, err := regexp.NewParsedWithLimit(pattern, parsed, regexp.DefaultLimit)
if err != nil {
return nil, nil, nil, err
}
prefix := LiteralPrefix(parsed)
if prefix != "" {
prefixBeg := []byte(prefix)
prefixEnd := IncrementBytes(prefixBeg)
return re, prefixBeg, prefixEnd, nil
}
return re, nil, nil, nil
}
// Returns the literal prefix given the parse tree for a regexp
func LiteralPrefix(s *syntax.Regexp) string {
// traverse the left-most branch in the parse tree as long as the
// node represents a concatenation
for s != nil && s.Op == syntax.OpConcat {
if len(s.Sub) < 1 {
return ""
}
s = s.Sub[0]
}
if s.Op == syntax.OpLiteral && (s.Flags&syntax.FoldCase == 0) {
return string(s.Rune)
}
return "" // no literal prefix
}
func IncrementBytes(in []byte) []byte {
rv := make([]byte, len(in))
copy(rv, in)
for i := len(rv) - 1; i >= 0; i-- {
rv[i] = rv[i] + 1
if rv[i] != 0 {
return rv // didn't overflow, so stop
}
}
return nil // overflowed
}

View File

@ -15,10 +15,15 @@
package segment package segment
import ( import (
"fmt"
"github.com/RoaringBitmap/roaring" "github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/couchbase/vellum"
) )
var ErrClosed = fmt.Errorf("index closed")
// DocumentFieldValueVisitor defines a callback to be visited for each // DocumentFieldValueVisitor defines a callback to be visited for each
// stored field value. The return value determines if the visitor // stored field value. The return value determines if the visitor
// should keep going. Returning true continues visiting, false stops. // should keep going. Returning true continues visiting, false stops.
@ -45,15 +50,27 @@ type Segment interface {
DecRef() error DecRef() error
} }
type UnpersistedSegment interface {
Segment
Persist(path string) error
}
type PersistedSegment interface {
Segment
Path() string
}
type TermDictionary interface { type TermDictionary interface {
PostingsList(term []byte, except *roaring.Bitmap, prealloc PostingsList) (PostingsList, error) PostingsList(term []byte, except *roaring.Bitmap, prealloc PostingsList) (PostingsList, error)
Iterator() DictionaryIterator Iterator() DictionaryIterator
PrefixIterator(prefix string) DictionaryIterator PrefixIterator(prefix string) DictionaryIterator
RangeIterator(start, end string) DictionaryIterator RangeIterator(start, end string) DictionaryIterator
RegexpIterator(regex string) DictionaryIterator AutomatonIterator(a vellum.Automaton,
FuzzyIterator(term string, fuzziness int) DictionaryIterator startKeyInclusive, endKeyExclusive []byte) DictionaryIterator
OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator
Contains(key []byte) (bool, error)
} }
type DictionaryIterator interface { type DictionaryIterator interface {
@ -89,6 +106,12 @@ type PostingsIterator interface {
Size() int Size() int
} }
type OptimizablePostingsIterator interface {
ActualBitmap() *roaring.Bitmap
DocNum1Hit() (uint64, bool)
ReplaceActual(*roaring.Bitmap)
}
type Posting interface { type Posting interface {
Number() uint64 Number() uint64
@ -124,3 +147,7 @@ type DocumentFieldTermVisitable interface {
type DocVisitState interface { type DocVisitState interface {
} }
type StatsReporter interface {
ReportBytesWritten(bytesWritten uint64)
}

View File

@ -0,0 +1,148 @@
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package segment
import (
"github.com/RoaringBitmap/roaring"
"math"
"reflect"
)
var reflectStaticSizeUnadornedPostingsIteratorBitmap int
var reflectStaticSizeUnadornedPostingsIterator1Hit int
var reflectStaticSizeUnadornedPosting int
func init() {
var pib UnadornedPostingsIteratorBitmap
reflectStaticSizeUnadornedPostingsIteratorBitmap = int(reflect.TypeOf(pib).Size())
var pi1h UnadornedPostingsIterator1Hit
reflectStaticSizeUnadornedPostingsIterator1Hit = int(reflect.TypeOf(pi1h).Size())
var up UnadornedPosting
reflectStaticSizeUnadornedPosting = int(reflect.TypeOf(up).Size())
}
type UnadornedPostingsIteratorBitmap struct{
actual roaring.IntPeekable
actualBM *roaring.Bitmap
}
func (i *UnadornedPostingsIteratorBitmap) Next() (Posting, error) {
return i.nextAtOrAfter(0)
}
func (i *UnadornedPostingsIteratorBitmap) Advance(docNum uint64) (Posting, error) {
return i.nextAtOrAfter(docNum)
}
func (i *UnadornedPostingsIteratorBitmap) nextAtOrAfter(atOrAfter uint64) (Posting, error) {
docNum, exists := i.nextDocNumAtOrAfter(atOrAfter)
if !exists {
return nil, nil
}
return UnadornedPosting(docNum), nil
}
func (i *UnadornedPostingsIteratorBitmap) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool) {
if i.actual == nil || !i.actual.HasNext() {
return 0, false
}
i.actual.AdvanceIfNeeded(uint32(atOrAfter))
if !i.actual.HasNext() {
return 0, false // couldn't find anything
}
return uint64(i.actual.Next()), true
}
func (i *UnadornedPostingsIteratorBitmap) Size() int {
return reflectStaticSizeUnadornedPostingsIteratorBitmap
}
func NewUnadornedPostingsIteratorFromBitmap(bm *roaring.Bitmap) PostingsIterator {
return &UnadornedPostingsIteratorBitmap{
actualBM: bm,
actual: bm.Iterator(),
}
}
const docNum1HitFinished = math.MaxUint64
type UnadornedPostingsIterator1Hit struct{
docNum uint64
}
func (i *UnadornedPostingsIterator1Hit) Next() (Posting, error) {
return i.nextAtOrAfter(0)
}
func (i *UnadornedPostingsIterator1Hit) Advance(docNum uint64) (Posting, error) {
return i.nextAtOrAfter(docNum)
}
func (i *UnadornedPostingsIterator1Hit) nextAtOrAfter(atOrAfter uint64) (Posting, error) {
docNum, exists := i.nextDocNumAtOrAfter(atOrAfter)
if !exists {
return nil, nil
}
return UnadornedPosting(docNum), nil
}
func (i *UnadornedPostingsIterator1Hit) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool) {
if i.docNum == docNum1HitFinished {
return 0, false
}
if i.docNum < atOrAfter {
// advanced past our 1-hit
i.docNum = docNum1HitFinished // consume our 1-hit docNum
return 0, false
}
docNum := i.docNum
i.docNum = docNum1HitFinished // consume our 1-hit docNum
return docNum, true
}
func (i *UnadornedPostingsIterator1Hit) Size() int {
return reflectStaticSizeUnadornedPostingsIterator1Hit
}
func NewUnadornedPostingsIteratorFrom1Hit(docNum1Hit uint64) PostingsIterator {
return &UnadornedPostingsIterator1Hit{
docNum1Hit,
}
}
type UnadornedPosting uint64
func (p UnadornedPosting) Number() uint64 {
return uint64(p)
}
func (p UnadornedPosting) Frequency() uint64 {
return 0
}
func (p UnadornedPosting) Norm() float64 {
return 0
}
func (p UnadornedPosting) Locations() []Location {
return nil
}
func (p UnadornedPosting) Size() int {
return reflectStaticSizeUnadornedPosting
}

View File

@ -0,0 +1,77 @@
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"fmt"
"github.com/blevesearch/bleve/index/scorch/segment"
zapv11 "github.com/blevesearch/zap/v11"
zapv12 "github.com/blevesearch/zap/v12"
)
var supportedSegmentPlugins map[string]map[uint32]segment.Plugin
var defaultSegmentPlugin segment.Plugin
func init() {
ResetPlugins()
RegisterPlugin(zapv12.Plugin(), false)
RegisterPlugin(zapv11.Plugin(), true)
}
func ResetPlugins() {
supportedSegmentPlugins = map[string]map[uint32]segment.Plugin{}
}
func RegisterPlugin(plugin segment.Plugin, makeDefault bool) {
if _, ok := supportedSegmentPlugins[plugin.Type()]; !ok {
supportedSegmentPlugins[plugin.Type()] = map[uint32]segment.Plugin{}
}
supportedSegmentPlugins[plugin.Type()][plugin.Version()] = plugin
if makeDefault {
defaultSegmentPlugin = plugin
}
}
func SupportedSegmentTypes() (rv []string) {
for k := range supportedSegmentPlugins {
rv = append(rv, k)
}
return
}
func SupportedSegmentTypeVersions(typ string) (rv []uint32) {
for k := range supportedSegmentPlugins[typ] {
rv = append(rv, k)
}
return rv
}
func (s *Scorch) loadSegmentPlugin(forcedSegmentType string,
forcedSegmentVersion uint32) error {
if versions, ok := supportedSegmentPlugins[forcedSegmentType]; ok {
if segPlugin, ok := versions[uint32(forcedSegmentVersion)]; ok {
s.segPlugin = segPlugin
return nil
}
return fmt.Errorf(
"unsupported version %d for segment type: %s, supported: %v",
forcedSegmentVersion, forcedSegmentType,
SupportedSegmentTypeVersions(forcedSegmentType))
}
return fmt.Errorf("unsupported segment type: %s, supported: %v",
forcedSegmentType, SupportedSegmentTypes())
}

View File

@ -27,9 +27,15 @@ import (
"github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment" "github.com/blevesearch/bleve/index/scorch/segment"
"github.com/couchbase/vellum"
lev "github.com/couchbase/vellum/levenshtein"
) )
// re usable, threadsafe levenshtein builders
var lb1, lb2 *lev.LevenshteinAutomatonBuilder
type asynchSegmentResult struct { type asynchSegmentResult struct {
dict segment.TermDictionary
dictItr segment.DictionaryIterator dictItr segment.DictionaryIterator
index int index int
@ -45,6 +51,15 @@ var reflectStaticSizeIndexSnapshot int
func init() { func init() {
var is interface{} = IndexSnapshot{} var is interface{} = IndexSnapshot{}
reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size()) reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size())
var err error
lb1, err = lev.NewLevenshteinAutomatonBuilder(1, true)
if err != nil {
panic(fmt.Errorf("Levenshtein automaton ed1 builder err: %v", err))
}
lb2, err = lev.NewLevenshteinAutomatonBuilder(2, true)
if err != nil {
panic(fmt.Errorf("Levenshtein automaton ed2 builder err: %v", err))
}
} }
type IndexSnapshot struct { type IndexSnapshot struct {
@ -61,7 +76,6 @@ type IndexSnapshot struct {
m2 sync.Mutex // Protects the fields that follow. m2 sync.Mutex // Protects the fields that follow.
fieldTFRs map[string][]*IndexSnapshotTermFieldReader // keyed by field, recycled TFR's fieldTFRs map[string][]*IndexSnapshotTermFieldReader // keyed by field, recycled TFR's
fieldDicts map[string][]segment.TermDictionary // keyed by field, recycled dicts
} }
func (i *IndexSnapshot) Segments() []*SegmentSnapshot { func (i *IndexSnapshot) Segments() []*SegmentSnapshot {
@ -113,17 +127,23 @@ func (i *IndexSnapshot) updateSize() {
} }
} }
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) { func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string,
makeItr func(i segment.TermDictionary) segment.DictionaryIterator,
randomLookup bool) (*IndexSnapshotFieldDict, error) {
results := make(chan *asynchSegmentResult) results := make(chan *asynchSegmentResult)
for index, segment := range i.segment { for index, segment := range i.segment {
go func(index int, segment *SegmentSnapshot) { go func(index int, segment *SegmentSnapshot) {
dict, err := segment.Dictionary(field) dict, err := segment.segment.Dictionary(field)
if err != nil { if err != nil {
results <- &asynchSegmentResult{err: err} results <- &asynchSegmentResult{err: err}
} else {
if randomLookup {
results <- &asynchSegmentResult{dict: dict}
} else { } else {
results <- &asynchSegmentResult{dictItr: makeItr(dict)} results <- &asynchSegmentResult{dictItr: makeItr(dict)}
} }
}
}(index, segment) }(index, segment)
} }
@ -137,6 +157,7 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
if asr.err != nil && err == nil { if asr.err != nil && err == nil {
err = asr.err err = asr.err
} else { } else {
if !randomLookup {
next, err2 := asr.dictItr.Next() next, err2 := asr.dictItr.Next()
if err2 != nil && err == nil { if err2 != nil && err == nil {
err = err2 err = err2
@ -147,14 +168,22 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
curr: *next, curr: *next,
}) })
} }
} else {
rv.cursors = append(rv.cursors, &segmentDictCursor{
dict: asr.dict,
})
}
} }
} }
// after ensuring we've read all items on channel // after ensuring we've read all items on channel
if err != nil { if err != nil {
return nil, err return nil, err
} }
if !randomLookup {
// prepare heap // prepare heap
heap.Init(rv) heap.Init(rv)
}
return rv, nil return rv, nil
} }
@ -162,42 +191,75 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) { func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.Iterator() return i.Iterator()
}) }, false)
} }
func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte, func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte,
endTerm []byte) (index.FieldDict, error) { endTerm []byte) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.RangeIterator(string(startTerm), string(endTerm)) return i.RangeIterator(string(startTerm), string(endTerm))
}) }, false)
} }
func (i *IndexSnapshot) FieldDictPrefix(field string, func (i *IndexSnapshot) FieldDictPrefix(field string,
termPrefix []byte) (index.FieldDict, error) { termPrefix []byte) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.PrefixIterator(string(termPrefix)) return i.PrefixIterator(string(termPrefix))
}) }, false)
} }
func (i *IndexSnapshot) FieldDictRegexp(field string, func (i *IndexSnapshot) FieldDictRegexp(field string,
termRegex []byte) (index.FieldDict, error) { termRegex string) (index.FieldDict, error) {
// TODO: potential optimization where the literal prefix represents the,
// entire regexp, allowing us to use PrefixIterator(prefixTerm)?
a, prefixBeg, prefixEnd, err := segment.ParseRegexp(termRegex)
if err != nil {
return nil, err
}
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.RegexpIterator(string(termRegex)) return i.AutomatonIterator(a, prefixBeg, prefixEnd)
}) }, false)
}
func (i *IndexSnapshot) getLevAutomaton(term string,
fuzziness uint8) (vellum.Automaton, error) {
if fuzziness == 1 {
return lb1.BuildDfa(term, fuzziness)
} else if fuzziness == 2 {
return lb2.BuildDfa(term, fuzziness)
}
return nil, fmt.Errorf("fuzziness exceeds the max limit")
} }
func (i *IndexSnapshot) FieldDictFuzzy(field string, func (i *IndexSnapshot) FieldDictFuzzy(field string,
term []byte, fuzziness int) (index.FieldDict, error) { term string, fuzziness int, prefix string) (index.FieldDict, error) {
a, err := i.getLevAutomaton(term, uint8(fuzziness))
if err != nil {
return nil, err
}
var prefixBeg, prefixEnd []byte
if prefix != "" {
prefixBeg = []byte(prefix)
prefixEnd = segment.IncrementBytes(prefixBeg)
}
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.FuzzyIterator(string(term), fuzziness) return i.AutomatonIterator(a, prefixBeg, prefixEnd)
}) }, false)
} }
func (i *IndexSnapshot) FieldDictOnly(field string, func (i *IndexSnapshot) FieldDictOnly(field string,
onlyTerms [][]byte, includeCount bool) (index.FieldDict, error) { onlyTerms [][]byte, includeCount bool) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.OnlyIterator(onlyTerms, includeCount) return i.OnlyIterator(onlyTerms, includeCount)
}) }, false)
}
func (i *IndexSnapshot) FieldDictContains(field string) (index.FieldDictContains, error) {
return i.newIndexSnapshotFieldDict(field, nil, true)
} }
func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) { func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
@ -393,8 +455,8 @@ func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err err
} }
func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
includeNorm, includeTermVectors bool) (tfr index.TermFieldReader, err error) { includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
rv, dicts := i.allocTermFieldReaderDicts(field) rv := i.allocTermFieldReaderDicts(field)
rv.term = term rv.term = term
rv.field = field rv.field = field
@ -412,20 +474,19 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
rv.currPosting = nil rv.currPosting = nil
rv.currID = rv.currID[:0] rv.currID = rv.currID[:0]
if dicts == nil { if rv.dicts == nil {
dicts = make([]segment.TermDictionary, len(i.segment)) rv.dicts = make([]segment.TermDictionary, len(i.segment))
for i, segment := range i.segment { for i, segment := range i.segment {
dict, err := segment.Dictionary(field) dict, err := segment.segment.Dictionary(field)
if err != nil { if err != nil {
return nil, err return nil, err
} }
dicts[i] = dict rv.dicts[i] = dict
} }
} }
rv.dicts = dicts
for i := range i.segment { for i, segment := range i.segment {
pl, err := dicts[i].PostingsList(term, nil, rv.postings[i]) pl, err := rv.dicts[i].PostingsList(term, segment.deleted, rv.postings[i])
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -436,37 +497,37 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
return rv, nil return rv, nil
} }
func (i *IndexSnapshot) allocTermFieldReaderDicts(field string) ( func (i *IndexSnapshot) allocTermFieldReaderDicts(field string) (tfr *IndexSnapshotTermFieldReader) {
tfr *IndexSnapshotTermFieldReader, dicts []segment.TermDictionary) {
i.m2.Lock() i.m2.Lock()
if i.fieldDicts != nil {
dicts = i.fieldDicts[field]
}
if i.fieldTFRs != nil { if i.fieldTFRs != nil {
tfrs := i.fieldTFRs[field] tfrs := i.fieldTFRs[field]
last := len(tfrs) - 1 last := len(tfrs) - 1
if last >= 0 { if last >= 0 {
rv := tfrs[last] tfr = tfrs[last]
tfrs[last] = nil tfrs[last] = nil
i.fieldTFRs[field] = tfrs[:last] i.fieldTFRs[field] = tfrs[:last]
i.m2.Unlock() i.m2.Unlock()
return rv, dicts return
} }
} }
i.m2.Unlock() i.m2.Unlock()
return &IndexSnapshotTermFieldReader{}, dicts return &IndexSnapshotTermFieldReader{}
} }
func (i *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader) { func (i *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader) {
i.parent.rootLock.RLock()
obsolete := i.parent.root != i
i.parent.rootLock.RUnlock()
if obsolete {
// if we're not the current root (mutations happened), don't bother recycling
return
}
i.m2.Lock() i.m2.Lock()
if i.fieldTFRs == nil { if i.fieldTFRs == nil {
i.fieldTFRs = map[string][]*IndexSnapshotTermFieldReader{} i.fieldTFRs = map[string][]*IndexSnapshotTermFieldReader{}
} }
i.fieldTFRs[tfr.field] = append(i.fieldTFRs[tfr.field], tfr) i.fieldTFRs[tfr.field] = append(i.fieldTFRs[tfr.field], tfr)
if i.fieldDicts == nil {
i.fieldDicts = map[string][]segment.TermDictionary{}
}
i.fieldDicts[tfr.field] = tfr.dicts
i.m2.Unlock() i.m2.Unlock()
} }
@ -636,7 +697,7 @@ func (i *IndexSnapshot) DumpFields() chan interface{} {
// subtractStrings returns set a minus elements of set b. // subtractStrings returns set a minus elements of set b.
func subtractStrings(a, b []string) []string { func subtractStrings(a, b []string) []string {
if len(b) <= 0 { if len(b) == 0 {
return a return a
} }

View File

@ -22,6 +22,7 @@ import (
) )
type segmentDictCursor struct { type segmentDictCursor struct {
dict segment.TermDictionary
itr segment.DictionaryIterator itr segment.DictionaryIterator
curr index.DictEntry curr index.DictEntry
} }
@ -52,7 +53,7 @@ func (i *IndexSnapshotFieldDict) Pop() interface{} {
} }
func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) { func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
if len(i.cursors) <= 0 { if len(i.cursors) == 0 {
return nil, nil return nil, nil
} }
i.entry = i.cursors[0].curr i.entry = i.cursors[0].curr
@ -91,3 +92,17 @@ func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
func (i *IndexSnapshotFieldDict) Close() error { func (i *IndexSnapshotFieldDict) Close() error {
return nil return nil
} }
func (i *IndexSnapshotFieldDict) Contains(key []byte) (bool, error) {
if len(i.cursors) == 0 {
return false, nil
}
for _, cursor := range i.cursors {
if found, _ := cursor.dict.Contains(key); found {
return true, nil
}
}
return false, nil
}

View File

@ -74,7 +74,7 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in
rv = &index.TermFieldDoc{} rv = &index.TermFieldDoc{}
} }
// find the next hit // find the next hit
for i.segmentOffset < len(i.postings) { for i.segmentOffset < len(i.iterators) {
next, err := i.iterators[i.segmentOffset].Next() next, err := i.iterators[i.segmentOffset].Next()
if err != nil { if err != nil {
return nil, err return nil, err

View File

@ -17,9 +17,10 @@ package scorch
import ( import (
"fmt" "fmt"
"log" "log"
"os"
"github.com/blevesearch/bleve/index/scorch/segment" "github.com/blevesearch/bleve/index/scorch/segment"
"github.com/boltdb/bolt" bolt "go.etcd.io/bbolt"
) )
type RollbackPoint struct { type RollbackPoint struct {
@ -34,13 +35,22 @@ func (r *RollbackPoint) GetInternal(key []byte) []byte {
// RollbackPoints returns an array of rollback points available for // RollbackPoints returns an array of rollback points available for
// the application to rollback to, with more recent rollback points // the application to rollback to, with more recent rollback points
// (higher epochs) coming first. // (higher epochs) coming first.
func (s *Scorch) RollbackPoints() ([]*RollbackPoint, error) { func RollbackPoints(path string) ([]*RollbackPoint, error) {
if s.rootBolt == nil { if len(path) == 0 {
return nil, fmt.Errorf("RollbackPoints: root is nil") return nil, fmt.Errorf("RollbackPoints: invalid path")
}
rootBoltPath := path + string(os.PathSeparator) + "root.bolt"
rootBoltOpt := &bolt.Options{
ReadOnly: true,
}
rootBolt, err := bolt.Open(rootBoltPath, 0600, rootBoltOpt)
if err != nil || rootBolt == nil {
return nil, err
} }
// start a read-only bolt transaction // start a read-only bolt transaction
tx, err := s.rootBolt.Begin(false) tx, err := rootBolt.Begin(false)
if err != nil { if err != nil {
return nil, fmt.Errorf("RollbackPoints: failed to start" + return nil, fmt.Errorf("RollbackPoints: failed to start" +
" read-only transaction") " read-only transaction")
@ -49,6 +59,7 @@ func (s *Scorch) RollbackPoints() ([]*RollbackPoint, error) {
// read-only bolt transactions to be rolled back // read-only bolt transactions to be rolled back
defer func() { defer func() {
_ = tx.Rollback() _ = tx.Rollback()
_ = rootBolt.Close()
}() }()
snapshots := tx.Bucket(boltSnapshotsBucket) snapshots := tx.Bucket(boltSnapshotsBucket)
@ -105,69 +116,98 @@ func (s *Scorch) RollbackPoints() ([]*RollbackPoint, error) {
return rollbackPoints, nil return rollbackPoints, nil
} }
// Rollback atomically and durably (if unsafeBatch is unset) brings // Rollback atomically and durably brings the store back to the point
// the store back to the point in time as represented by the // in time as represented by the RollbackPoint.
// RollbackPoint. Rollback() should only be passed a RollbackPoint // Rollback() should only be passed a RollbackPoint that came from the
// that came from the same store using the RollbackPoints() API. // same store using the RollbackPoints() API along with the index path.
func (s *Scorch) Rollback(to *RollbackPoint) error { func Rollback(path string, to *RollbackPoint) error {
if to == nil { if to == nil {
return fmt.Errorf("Rollback: RollbackPoint is nil") return fmt.Errorf("Rollback: RollbackPoint is nil")
} }
if len(path) == 0 {
if s.rootBolt == nil { return fmt.Errorf("Rollback: index path is empty")
return fmt.Errorf("Rollback: root is nil")
} }
revert := &snapshotReversion{} rootBoltPath := path + string(os.PathSeparator) + "root.bolt"
rootBoltOpt := &bolt.Options{
ReadOnly: false,
}
rootBolt, err := bolt.Open(rootBoltPath, 0600, rootBoltOpt)
if err != nil || rootBolt == nil {
return err
}
defer func() {
err1 := rootBolt.Close()
if err1 != nil && err == nil {
err = err1
}
}()
s.rootLock.Lock() // pick all the younger persisted epochs in bolt store
// including the target one.
err := s.rootBolt.View(func(tx *bolt.Tx) error { var found bool
var eligibleEpochs []uint64
err = rootBolt.View(func(tx *bolt.Tx) error {
snapshots := tx.Bucket(boltSnapshotsBucket) snapshots := tx.Bucket(boltSnapshotsBucket)
if snapshots == nil { if snapshots == nil {
return fmt.Errorf("Rollback: no snapshots available") return nil
} }
sc := snapshots.Cursor()
pos := segment.EncodeUvarintAscending(nil, to.epoch) for sk, _ := sc.Last(); sk != nil && !found; sk, _ = sc.Prev() {
_, snapshotEpoch, err := segment.DecodeUvarintAscending(sk)
snapshot := snapshots.Bucket(pos)
if snapshot == nil {
return fmt.Errorf("Rollback: snapshot not found")
}
indexSnapshot, err := s.loadSnapshot(snapshot)
if err != nil { if err != nil {
return fmt.Errorf("Rollback: unable to load snapshot: %v", err) continue
} }
if snapshotEpoch == to.epoch {
// add segments referenced by loaded index snapshot to the found = true
// ineligibleForRemoval map }
for _, segSnap := range indexSnapshot.segment { eligibleEpochs = append(eligibleEpochs, snapshotEpoch)
filename := zapFileName(segSnap.id)
s.ineligibleForRemoval[filename] = true
} }
revert.snapshot = indexSnapshot
revert.applied = make(chan error)
revert.persisted = make(chan error)
return nil return nil
}) })
s.rootLock.Unlock() if len(eligibleEpochs) == 0 {
return fmt.Errorf("Rollback: no persisted epochs found in bolt")
}
if !found {
return fmt.Errorf("Rollback: target epoch %d not found in bolt", to.epoch)
}
// start a write transaction
tx, err := rootBolt.Begin(true)
if err != nil { if err != nil {
return err return err
} }
// introduce the reversion defer func() {
s.revertToSnapshots <- revert if err == nil {
err = tx.Commit()
} else {
_ = tx.Rollback()
}
if err == nil {
err = rootBolt.Sync()
}
}()
// block until this snapshot is applied snapshots := tx.Bucket(boltSnapshotsBucket)
err = <-revert.applied if snapshots == nil {
return nil
}
for _, epoch := range eligibleEpochs {
k := segment.EncodeUvarintAscending(nil, epoch)
if err != nil { if err != nil {
return fmt.Errorf("Rollback: failed with err: %v", err) continue
}
if epoch == to.epoch {
// return here as it already processed until the given epoch
return nil
}
err = snapshots.DeleteBucket(k)
if err == bolt.ErrBucketNotFound {
err = nil
}
} }
return <-revert.persisted return err
} }

View File

@ -29,43 +29,6 @@ var TermSeparator byte = 0xff
var TermSeparatorSplitSlice = []byte{TermSeparator} var TermSeparatorSplitSlice = []byte{TermSeparator}
type SegmentDictionarySnapshot struct {
s *SegmentSnapshot
d segment.TermDictionary
}
func (s *SegmentDictionarySnapshot) PostingsList(term []byte, except *roaring.Bitmap,
prealloc segment.PostingsList) (segment.PostingsList, error) {
// TODO: if except is non-nil, perhaps need to OR it with s.s.deleted?
return s.d.PostingsList(term, s.s.deleted, prealloc)
}
func (s *SegmentDictionarySnapshot) Iterator() segment.DictionaryIterator {
return s.d.Iterator()
}
func (s *SegmentDictionarySnapshot) PrefixIterator(prefix string) segment.DictionaryIterator {
return s.d.PrefixIterator(prefix)
}
func (s *SegmentDictionarySnapshot) RangeIterator(start, end string) segment.DictionaryIterator {
return s.d.RangeIterator(start, end)
}
func (s *SegmentDictionarySnapshot) RegexpIterator(regex string) segment.DictionaryIterator {
return s.d.RegexpIterator(regex)
}
func (s *SegmentDictionarySnapshot) FuzzyIterator(term string,
fuzziness int) segment.DictionaryIterator {
return s.d.FuzzyIterator(term, fuzziness)
}
func (s *SegmentDictionarySnapshot) OnlyIterator(onlyTerms [][]byte,
includeCount bool) segment.DictionaryIterator {
return s.d.OnlyIterator(onlyTerms, includeCount)
}
type SegmentSnapshot struct { type SegmentSnapshot struct {
id uint64 id uint64
segment segment.Segment segment segment.Segment
@ -115,17 +78,6 @@ func (s *SegmentSnapshot) Count() uint64 {
return rv return rv
} }
func (s *SegmentSnapshot) Dictionary(field string) (segment.TermDictionary, error) {
d, err := s.segment.Dictionary(field)
if err != nil {
return nil, err
}
return &SegmentDictionarySnapshot{
s: s,
d: d,
}, nil
}
func (s *SegmentSnapshot) DocNumbers(docIDs []string) (*roaring.Bitmap, error) { func (s *SegmentSnapshot) DocNumbers(docIDs []string) (*roaring.Bitmap, error) {
rv, err := s.segment.DocNumbers(docIDs) rv, err := s.segment.DocNumbers(docIDs)
if err != nil { if err != nil {
@ -137,7 +89,7 @@ func (s *SegmentSnapshot) DocNumbers(docIDs []string) (*roaring.Bitmap, error) {
return rv, nil return rv, nil
} }
// DocNumbersLive returns bitsit containing doc numbers for all live docs // DocNumbersLive returns a bitmap containing doc numbers for all live docs
func (s *SegmentSnapshot) DocNumbersLive() *roaring.Bitmap { func (s *SegmentSnapshot) DocNumbersLive() *roaring.Bitmap {
rv := roaring.NewBitmap() rv := roaring.NewBitmap()
rv.AddRange(0, s.segment.Count()) rv.AddRange(0, s.segment.Count())
@ -161,14 +113,29 @@ func (s *SegmentSnapshot) Size() (rv int) {
} }
type cachedFieldDocs struct { type cachedFieldDocs struct {
m sync.Mutex
readyCh chan struct{} // closed when the cachedFieldDocs.docs is ready to be used. readyCh chan struct{} // closed when the cachedFieldDocs.docs is ready to be used.
err error // Non-nil if there was an error when preparing this cachedFieldDocs. err error // Non-nil if there was an error when preparing this cachedFieldDocs.
docs map[uint64][]byte // Keyed by localDocNum, value is a list of terms delimited by 0xFF. docs map[uint64][]byte // Keyed by localDocNum, value is a list of terms delimited by 0xFF.
size uint64 size uint64
} }
func (cfd *cachedFieldDocs) Size() int {
var rv int
cfd.m.Lock()
for _, entry := range cfd.docs {
rv += 8 /* size of uint64 */ + len(entry)
}
cfd.m.Unlock()
return rv
}
func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) { func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) {
defer close(cfd.readyCh) cfd.m.Lock()
defer func() {
close(cfd.readyCh)
cfd.m.Unlock()
}()
cfd.size += uint64(size.SizeOfUint64) /* size field */ cfd.size += uint64(size.SizeOfUint64) /* size field */
dict, err := ss.segment.Dictionary(field) dict, err := ss.segment.Dictionary(field)
@ -216,9 +183,9 @@ func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) {
} }
type cachedDocs struct { type cachedDocs struct {
size uint64
m sync.Mutex // As the cache is asynchronously prepared, need a lock m sync.Mutex // As the cache is asynchronously prepared, need a lock
cache map[string]*cachedFieldDocs // Keyed by field cache map[string]*cachedFieldDocs // Keyed by field
size uint64
} }
func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error { func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error {
@ -279,9 +246,7 @@ func (c *cachedDocs) updateSizeLOCKED() {
for k, v := range c.cache { // cachedFieldDocs for k, v := range c.cache { // cachedFieldDocs
sizeInBytes += len(k) sizeInBytes += len(k)
if v != nil { if v != nil {
for _, entry := range v.docs { // docs sizeInBytes += v.Size()
sizeInBytes += 8 /* size of uint64 */ + len(entry)
}
} }
} }
atomic.StoreUint64(&c.size, uint64(sizeInBytes)) atomic.StoreUint64(&c.size, uint64(sizeInBytes))

View File

@ -69,11 +69,15 @@ type Stats struct {
TotPersistLoopEnd uint64 TotPersistLoopEnd uint64
TotPersistedItems uint64 TotPersistedItems uint64
TotItemsToPersist uint64
TotPersistedSegments uint64 TotPersistedSegments uint64
TotPersisterSlowMergerPause uint64 TotPersisterSlowMergerPause uint64
TotPersisterSlowMergerResume uint64 TotPersisterSlowMergerResume uint64
TotPersisterNapPauseCompleted uint64
TotPersisterMergerNapBreak uint64
TotFileMergeLoopBeg uint64 TotFileMergeLoopBeg uint64
TotFileMergeLoopErr uint64 TotFileMergeLoopErr uint64
TotFileMergeLoopEnd uint64 TotFileMergeLoopEnd uint64
@ -91,15 +95,22 @@ type Stats struct {
TotFileMergeSegmentsEmpty uint64 TotFileMergeSegmentsEmpty uint64
TotFileMergeSegments uint64 TotFileMergeSegments uint64
TotFileSegmentsAtRoot uint64
TotFileMergeWrittenBytes uint64 TotFileMergeWrittenBytes uint64
TotFileMergeZapBeg uint64 TotFileMergeZapBeg uint64
TotFileMergeZapEnd uint64 TotFileMergeZapEnd uint64
TotFileMergeZapTime uint64 TotFileMergeZapTime uint64
MaxFileMergeZapTime uint64 MaxFileMergeZapTime uint64
TotFileMergeZapIntroductionTime uint64
MaxFileMergeZapIntroductionTime uint64
TotFileMergeIntroductions uint64 TotFileMergeIntroductions uint64
TotFileMergeIntroductionsDone uint64 TotFileMergeIntroductionsDone uint64
TotFileMergeIntroductionsSkipped uint64
CurFilesIneligibleForRemoval uint64
TotSnapshotsRemovedFromMetaStore uint64
TotMemMergeBeg uint64 TotMemMergeBeg uint64
TotMemMergeErr uint64 TotMemMergeErr uint64
@ -109,6 +120,7 @@ type Stats struct {
TotMemMergeZapTime uint64 TotMemMergeZapTime uint64
MaxMemMergeZapTime uint64 MaxMemMergeZapTime uint64
TotMemMergeSegments uint64 TotMemMergeSegments uint64
TotMemorySegmentsAtRoot uint64
} }
// atomically populates the returned map // atomically populates the returned map

View File

@ -17,7 +17,7 @@ package boltdb
import ( import (
"bytes" "bytes"
"github.com/boltdb/bolt" bolt "go.etcd.io/bbolt"
) )
type Iterator struct { type Iterator struct {

View File

@ -16,7 +16,7 @@ package boltdb
import ( import (
"github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/index/store"
"github.com/boltdb/bolt" bolt "go.etcd.io/bbolt"
) )
type Reader struct { type Reader struct {

View File

@ -30,7 +30,7 @@ import (
"github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/registry" "github.com/blevesearch/bleve/registry"
"github.com/boltdb/bolt" bolt "go.etcd.io/bbolt"
) )
const ( const (
@ -74,6 +74,12 @@ func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore,
bo.ReadOnly = ro bo.ReadOnly = ro
} }
if initialMmapSize, ok := config["initialMmapSize"].(int); ok {
bo.InitialMmapSize = initialMmapSize
} else if initialMmapSize, ok := config["initialMmapSize"].(float64); ok {
bo.InitialMmapSize = int(initialMmapSize)
}
db, err := bolt.Open(path, 0600, bo) db, err := bolt.Open(path, 0600, bo)
if err != nil { if err != nil {
return nil, err return nil, err

View File

@ -584,7 +584,7 @@ func (tfr *TermFrequencyRow) parseK(key []byte) error {
func (tfr *TermFrequencyRow) parseKDoc(key []byte, term []byte) error { func (tfr *TermFrequencyRow) parseKDoc(key []byte, term []byte) error {
tfr.doc = key[3+len(term)+1:] tfr.doc = key[3+len(term)+1:]
if len(tfr.doc) <= 0 { if len(tfr.doc) == 0 {
return fmt.Errorf("invalid term frequency key, empty docid") return fmt.Errorf("invalid term frequency key, empty docid")
} }

View File

@ -415,7 +415,6 @@ func (udc *UpsideDownCouch) Close() error {
func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
// do analysis before acquiring write lock // do analysis before acquiring write lock
analysisStart := time.Now() analysisStart := time.Now()
numPlainTextBytes := doc.NumPlainTextBytes()
resultChan := make(chan *index.AnalysisResult) resultChan := make(chan *index.AnalysisResult)
aw := index.NewAnalysisWork(udc, doc, resultChan) aw := index.NewAnalysisWork(udc, doc, resultChan)
@ -452,6 +451,11 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
return return
} }
return udc.UpdateWithAnalysis(doc, result, backIndexRow)
}
func (udc *UpsideDownCouch) UpdateWithAnalysis(doc *document.Document,
result *index.AnalysisResult, backIndexRow *BackIndexRow) (err error) {
// start a writer for this update // start a writer for this update
indexStart := time.Now() indexStart := time.Now()
var kvwriter store.KVWriter var kvwriter store.KVWriter
@ -490,7 +494,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart))) atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))
if err == nil { if err == nil {
atomic.AddUint64(&udc.stats.updates, 1) atomic.AddUint64(&udc.stats.updates, 1)
atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes) atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, doc.NumPlainTextBytes())
} else { } else {
atomic.AddUint64(&udc.stats.errors, 1) atomic.AddUint64(&udc.stats.errors, 1)
} }
@ -775,7 +779,7 @@ func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.
} }
func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []*index.TermFieldVector { func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []*index.TermFieldVector {
if len(in) <= 0 { if len(in) == 0 {
return nil return nil
} }
@ -797,6 +801,10 @@ func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []
} }
func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
persistedCallback := batch.PersistedCallback()
if persistedCallback != nil {
defer persistedCallback(err)
}
analysisStart := time.Now() analysisStart := time.Now()
resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps)) resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps))
@ -810,8 +818,10 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
} }
} }
if numUpdates > 0 {
go func() { go func() {
for _, doc := range batch.IndexOps { for k := range batch.IndexOps {
doc := batch.IndexOps[k]
if doc != nil { if doc != nil {
aw := index.NewAnalysisWork(udc, doc, resultChan) aw := index.NewAnalysisWork(udc, doc, resultChan)
// put the work on the queue // put the work on the queue
@ -819,6 +829,7 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
} }
} }
}() }()
}
// retrieve back index rows concurrent with analysis // retrieve back index rows concurrent with analysis
docBackIndexRowErr := error(nil) docBackIndexRowErr := error(nil)
@ -958,6 +969,7 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
} else { } else {
atomic.AddUint64(&udc.stats.errors, 1) atomic.AddUint64(&udc.stats.errors, 1)
} }
return return
} }

View File

@ -433,6 +433,9 @@ func createChildSearchRequest(req *SearchRequest) *SearchRequest {
Explain: req.Explain, Explain: req.Explain,
Sort: req.Sort.Copy(), Sort: req.Sort.Copy(),
IncludeLocations: req.IncludeLocations, IncludeLocations: req.IncludeLocations,
Score: req.Score,
SearchAfter: req.SearchAfter,
SearchBefore: req.SearchBefore,
} }
return &rv return &rv
} }
@ -450,6 +453,14 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
searchStart := time.Now() searchStart := time.Now()
asyncResults := make(chan *asyncSearchResult, len(indexes)) asyncResults := make(chan *asyncSearchResult, len(indexes))
var reverseQueryExecution bool
if req.SearchBefore != nil {
reverseQueryExecution = true
req.Sort.Reverse()
req.SearchAfter = req.SearchBefore
req.SearchBefore = nil
}
// run search on each index in separate go routine // run search on each index in separate go routine
var waitGroup sync.WaitGroup var waitGroup sync.WaitGroup
@ -502,7 +513,7 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
// sort all hits with the requested order // sort all hits with the requested order
if len(req.Sort) > 0 { if len(req.Sort) > 0 {
sorter := newMultiSearchHitSorter(req.Sort, sr.Hits) sorter := newSearchHitSorter(req.Sort, sr.Hits)
sort.Sort(sorter) sort.Sort(sorter)
} }
@ -523,6 +534,17 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
sr.Facets.Fixup(name, fr.Size) sr.Facets.Fixup(name, fr.Size)
} }
if reverseQueryExecution {
// reverse the sort back to the original
req.Sort.Reverse()
// resort using the original order
mhs := newSearchHitSorter(req.Sort, sr.Hits)
sort.Sort(mhs)
// reset request
req.SearchBefore = req.SearchAfter
req.SearchAfter = nil
}
// fix up original request // fix up original request
sr.Request = req sr.Request = req
searchDuration := time.Since(searchStart) searchDuration := time.Since(searchStart)
@ -580,26 +602,3 @@ func (f *indexAliasImplFieldDict) Close() error {
defer f.index.mutex.RUnlock() defer f.index.mutex.RUnlock()
return f.fieldDict.Close() return f.fieldDict.Close()
} }
type multiSearchHitSorter struct {
hits search.DocumentMatchCollection
sort search.SortOrder
cachedScoring []bool
cachedDesc []bool
}
func newMultiSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *multiSearchHitSorter {
return &multiSearchHitSorter{
sort: sort,
hits: hits,
cachedScoring: sort.CacheIsScore(),
cachedDesc: sort.CacheDescending(),
}
}
func (m *multiSearchHitSorter) Len() int { return len(m.hits) }
func (m *multiSearchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] }
func (m *multiSearchHitSorter) Less(i, j int) bool {
c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j])
return c < 0
}

View File

@ -19,6 +19,7 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"os" "os"
"sort"
"sync" "sync"
"sync/atomic" "sync/atomic"
"time" "time"
@ -442,7 +443,20 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
return nil, ErrorIndexClosed return nil, ErrorIndexClosed
} }
collector := collector.NewTopNCollector(req.Size, req.From, req.Sort) var reverseQueryExecution bool
if req.SearchBefore != nil {
reverseQueryExecution = true
req.Sort.Reverse()
req.SearchAfter = req.SearchBefore
req.SearchBefore = nil
}
var coll *collector.TopNCollector
if req.SearchAfter != nil {
coll = collector.NewTopNCollectorAfter(req.Size, req.Sort, req.SearchAfter)
} else {
coll = collector.NewTopNCollector(req.Size, req.From, req.Sort)
}
// open a reader for this search // open a reader for this search
indexReader, err := i.i.Reader() indexReader, err := i.i.Reader()
@ -458,6 +472,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
searcher, err := req.Query.Searcher(indexReader, i.m, search.SearcherOptions{ searcher, err := req.Query.Searcher(indexReader, i.m, search.SearcherOptions{
Explain: req.Explain, Explain: req.Explain,
IncludeTermVectors: req.IncludeLocations || req.Highlight != nil, IncludeTermVectors: req.IncludeLocations || req.Highlight != nil,
Score: req.Score,
}) })
if err != nil { if err != nil {
return nil, err return nil, err
@ -493,10 +508,10 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
facetsBuilder.Add(facetName, facetBuilder) facetsBuilder.Add(facetName, facetBuilder)
} }
} }
collector.SetFacetsBuilder(facetsBuilder) coll.SetFacetsBuilder(facetsBuilder)
} }
memNeeded := memNeededForSearch(req, searcher, collector) memNeeded := memNeededForSearch(req, searcher, coll)
if cb := ctx.Value(SearchQueryStartCallbackKey); cb != nil { if cb := ctx.Value(SearchQueryStartCallbackKey); cb != nil {
if cbF, ok := cb.(SearchQueryStartCallbackFn); ok { if cbF, ok := cb.(SearchQueryStartCallbackFn); ok {
err = cbF(memNeeded) err = cbF(memNeeded)
@ -514,12 +529,12 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
} }
} }
err = collector.Collect(ctx, searcher, indexReader) err = coll.Collect(ctx, searcher, indexReader)
if err != nil { if err != nil {
return nil, err return nil, err
} }
hits := collector.Results() hits := coll.Results()
var highlighter highlight.Highlighter var highlighter highlight.Highlighter
@ -541,8 +556,54 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
} }
for _, hit := range hits { for _, hit := range hits {
if i.name != "" {
hit.Index = i.name
}
err = LoadAndHighlightFields(hit, req, i.name, indexReader, highlighter)
if err != nil {
return nil, err
}
}
atomic.AddUint64(&i.stats.searches, 1)
searchDuration := time.Since(searchStart)
atomic.AddUint64(&i.stats.searchTime, uint64(searchDuration))
if Config.SlowSearchLogThreshold > 0 &&
searchDuration > Config.SlowSearchLogThreshold {
logger.Printf("slow search took %s - %v", searchDuration, req)
}
if reverseQueryExecution {
// reverse the sort back to the original
req.Sort.Reverse()
// resort using the original order
mhs := newSearchHitSorter(req.Sort, hits)
sort.Sort(mhs)
// reset request
req.SearchBefore = req.SearchAfter
req.SearchAfter = nil
}
return &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
},
Request: req,
Hits: hits,
Total: coll.Total(),
MaxScore: coll.MaxScore(),
Took: searchDuration,
Facets: coll.FacetResults(),
}, nil
}
func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest,
indexName string, r index.IndexReader,
highlighter highlight.Highlighter) error {
if len(req.Fields) > 0 || highlighter != nil { if len(req.Fields) > 0 || highlighter != nil {
doc, err := indexReader.Document(hit.ID) doc, err := r.Document(hit.ID)
if err == nil && doc != nil { if err == nil && doc != nil {
if len(req.Fields) > 0 { if len(req.Fields) > 0 {
fieldsToLoad := deDuplicate(req.Fields) fieldsToLoad := deDuplicate(req.Fields)
@ -600,35 +661,11 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
} else if doc == nil { } else if doc == nil {
// unexpected case, a doc ID that was found as a search hit // unexpected case, a doc ID that was found as a search hit
// was unable to be found during document lookup // was unable to be found during document lookup
return nil, ErrorIndexReadInconsistency return ErrorIndexReadInconsistency
}
}
if i.name != "" {
hit.Index = i.name
} }
} }
atomic.AddUint64(&i.stats.searches, 1) return nil
searchDuration := time.Since(searchStart)
atomic.AddUint64(&i.stats.searchTime, uint64(searchDuration))
if Config.SlowSearchLogThreshold > 0 &&
searchDuration > Config.SlowSearchLogThreshold {
logger.Printf("slow search took %s - %v", searchDuration, req)
}
return &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
},
Request: req,
Hits: hits,
Total: collector.Total(),
MaxScore: collector.MaxScore(),
Took: searchDuration,
Facets: collector.FacetResults(),
}, nil
} }
// Fields returns the name of all the fields this // Fields returns the name of all the fields this
@ -853,3 +890,26 @@ func deDuplicate(fields []string) []string {
} }
return ret return ret
} }
type searchHitSorter struct {
hits search.DocumentMatchCollection
sort search.SortOrder
cachedScoring []bool
cachedDesc []bool
}
func newSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *searchHitSorter {
return &searchHitSorter{
sort: sort,
hits: hits,
cachedScoring: sort.CacheIsScore(),
cachedDesc: sort.CacheDescending(),
}
}
func (m *searchHitSorter) Len() int { return len(m.hits) }
func (m *searchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] }
func (m *searchHitSorter) Less(i, j int) bool {
c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j])
return c < 0
}

View File

@ -18,6 +18,7 @@ import (
"encoding/json" "encoding/json"
"io/ioutil" "io/ioutil"
"os" "os"
"path/filepath"
"github.com/blevesearch/bleve/index/upsidedown" "github.com/blevesearch/bleve/index/upsidedown"
) )
@ -92,5 +93,5 @@ func (i *indexMeta) Save(path string) (err error) {
} }
func indexMetaPath(path string) string { func indexMetaPath(path string) string {
return path + string(os.PathSeparator) + metaFilename return filepath.Join(path, metaFilename)
} }

View File

@ -42,7 +42,7 @@ type DocumentMapping struct {
Dynamic bool `json:"dynamic"` Dynamic bool `json:"dynamic"`
Properties map[string]*DocumentMapping `json:"properties,omitempty"` Properties map[string]*DocumentMapping `json:"properties,omitempty"`
Fields []*FieldMapping `json:"fields,omitempty"` Fields []*FieldMapping `json:"fields,omitempty"`
DefaultAnalyzer string `json:"default_analyzer"` DefaultAnalyzer string `json:"default_analyzer,omitempty"`
// StructTagKey overrides "json" when looking for field names in struct tags // StructTagKey overrides "json" when looking for field names in struct tags
StructTagKey string `json:"struct_tag_key,omitempty"` StructTagKey string `json:"struct_tag_key,omitempty"`
@ -324,13 +324,17 @@ func (dm *DocumentMapping) defaultAnalyzerName(path []string) string {
} }
func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) { func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) {
// allow default "json" tag to be overriden // allow default "json" tag to be overridden
structTagKey := dm.StructTagKey structTagKey := dm.StructTagKey
if structTagKey == "" { if structTagKey == "" {
structTagKey = "json" structTagKey = "json"
} }
val := reflect.ValueOf(data) val := reflect.ValueOf(data)
if !val.IsValid() {
return
}
typ := val.Type() typ := val.Type()
switch typ.Kind() { switch typ.Kind() {
case reflect.Map: case reflect.Map:
@ -420,8 +424,12 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
if subDocMapping != nil { if subDocMapping != nil {
// index by explicit mapping // index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields { for _, fieldMapping := range subDocMapping.Fields {
if fieldMapping.Type == "geopoint" {
fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
} else {
fieldMapping.processString(propertyValueString, pathString, path, indexes, context) fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
} }
}
} else if closestDocMapping.Dynamic { } else if closestDocMapping.Dynamic {
// automatic indexing behavior // automatic indexing behavior
@ -517,19 +525,27 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
if !propertyValue.IsNil() { if !propertyValue.IsNil() {
switch property := property.(type) { switch property := property.(type) {
case encoding.TextMarshaler: case encoding.TextMarshaler:
// ONLY process TextMarshaler if there is an explicit mapping
txt, err := property.MarshalText() // AND all of the fiels are of type text
if err == nil && subDocMapping != nil { // OTHERWISE process field without TextMarshaler
// index by explicit mapping if subDocMapping != nil {
allFieldsText := true
for _, fieldMapping := range subDocMapping.Fields { for _, fieldMapping := range subDocMapping.Fields {
if fieldMapping.Type == "text" { if fieldMapping.Type != "text" {
fieldMapping.processString(string(txt), pathString, path, indexes, context) allFieldsText = false
break
}
}
txt, err := property.MarshalText()
if err == nil && allFieldsText {
txtStr := string(txt)
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processString(txtStr, pathString, path, indexes, context)
}
return
} }
} }
} else {
dm.walkDocument(property, path, indexes, context) dm.walkDocument(property, path, indexes, context)
}
default: default:
dm.walkDocument(property, path, indexes, context) dm.walkDocument(property, path, indexes, context)
} }

View File

@ -320,8 +320,8 @@ func (im *IndexMappingImpl) determineType(data interface{}) string {
func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}) error { func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}) error {
docType := im.determineType(data) docType := im.determineType(data)
docMapping := im.mappingForType(docType) docMapping := im.mappingForType(docType)
walkContext := im.newWalkContext(doc, docMapping)
if docMapping.Enabled { if docMapping.Enabled {
walkContext := im.newWalkContext(doc, docMapping)
docMapping.walkDocument(data, []string{}, []uint64{}, walkContext) docMapping.walkDocument(data, []string{}, []uint64{}, walkContext)
// see if the _all field was disabled // see if the _all field was disabled

View File

@ -35,6 +35,9 @@ func lookupPropertyPath(data interface{}, path string) interface{} {
func lookupPropertyPathPart(data interface{}, part string) interface{} { func lookupPropertyPathPart(data interface{}, part string) interface{} {
val := reflect.ValueOf(data) val := reflect.ValueOf(data)
if !val.IsValid() {
return nil
}
typ := val.Type() typ := val.Type()
switch typ.Kind() { switch typ.Kind() {
case reflect.Map: case reflect.Map:

View File

@ -14,7 +14,7 @@ var interleaveShift = []uint{1, 2, 4, 8, 16}
// Interleave the first 32 bits of each uint64 // Interleave the first 32 bits of each uint64
// apdated from org.apache.lucene.util.BitUtil // apdated from org.apache.lucene.util.BitUtil
// whcih was adapted from: // which was adapted from:
// http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN // http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN
func Interleave(v1, v2 uint64) uint64 { func Interleave(v1, v2 uint64) uint64 {
v1 = (v1 | (v1 << interleaveShift[4])) & interleaveMagic[4] v1 = (v1 | (v1 << interleaveShift[4])) & interleaveMagic[4]

View File

@ -23,12 +23,26 @@ const ShiftStartInt64 byte = 0x20
type PrefixCoded []byte type PrefixCoded []byte
func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) { func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) {
rv, _, err := NewPrefixCodedInt64Prealloc(in, shift, nil)
return rv, err
}
func NewPrefixCodedInt64Prealloc(in int64, shift uint, prealloc []byte) (
rv PrefixCoded, preallocRest []byte, err error) {
if shift > 63 { if shift > 63 {
return nil, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift) return nil, prealloc, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift)
} }
nChars := ((63 - shift) / 7) + 1 nChars := ((63 - shift) / 7) + 1
rv := make(PrefixCoded, nChars+1)
size := int(nChars + 1)
if len(prealloc) >= size {
rv = PrefixCoded(prealloc[0:size])
preallocRest = prealloc[size:]
} else {
rv = make(PrefixCoded, size)
}
rv[0] = ShiftStartInt64 + byte(shift) rv[0] = ShiftStartInt64 + byte(shift)
sortableBits := int64(uint64(in) ^ 0x8000000000000000) sortableBits := int64(uint64(in) ^ 0x8000000000000000)
@ -40,7 +54,8 @@ func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) {
nChars-- nChars--
sortableBits = int64(uint64(sortableBits) >> 7) sortableBits = int64(uint64(sortableBits) >> 7)
} }
return rv, nil
return rv, preallocRest, nil
} }
func MustNewPrefixCodedInt64(in int64, shift uint) PrefixCoded { func MustNewPrefixCodedInt64(in int64, shift uint) PrefixCoded {

View File

@ -261,6 +261,9 @@ func (h *HighlightRequest) AddField(field string) {
// Explain triggers inclusion of additional search // Explain triggers inclusion of additional search
// result score explanations. // result score explanations.
// Sort describes the desired order for the results to be returned. // Sort describes the desired order for the results to be returned.
// Score controls the kind of scoring performed
// SearchAfter supports deep paging by providing a minimum sort key
// SearchBefore supports deep paging by providing a maximum sort key
// //
// A special field named "*" can be used to return all fields. // A special field named "*" can be used to return all fields.
type SearchRequest struct { type SearchRequest struct {
@ -273,6 +276,9 @@ type SearchRequest struct {
Explain bool `json:"explain"` Explain bool `json:"explain"`
Sort search.SortOrder `json:"sort"` Sort search.SortOrder `json:"sort"`
IncludeLocations bool `json:"includeLocations"` IncludeLocations bool `json:"includeLocations"`
Score string `json:"score,omitempty"`
SearchAfter []string `json:"search_after"`
SearchBefore []string `json:"search_before"`
} }
func (r *SearchRequest) Validate() error { func (r *SearchRequest) Validate() error {
@ -283,6 +289,27 @@ func (r *SearchRequest) Validate() error {
} }
} }
if r.SearchAfter != nil && r.SearchBefore != nil {
return fmt.Errorf("cannot use search after and search before together")
}
if r.SearchAfter != nil {
if r.From != 0 {
return fmt.Errorf("cannot use search after with from !=0")
}
if len(r.SearchAfter) != len(r.Sort) {
return fmt.Errorf("search after must have same size as sort order")
}
}
if r.SearchBefore != nil {
if r.From != 0 {
return fmt.Errorf("cannot use search before with from !=0")
}
if len(r.SearchBefore) != len(r.Sort) {
return fmt.Errorf("search before must have same size as sort order")
}
}
return r.Facets.Validate() return r.Facets.Validate()
} }
@ -309,6 +336,18 @@ func (r *SearchRequest) SortByCustom(order search.SortOrder) {
r.Sort = order r.Sort = order
} }
// SetSearchAfter sets the request to skip over hits with a sort
// value less than the provided sort after key
func (r *SearchRequest) SetSearchAfter(after []string) {
r.SearchAfter = after
}
// SetSearchBefore sets the request to skip over hits with a sort
// value greater than the provided sort before key
func (r *SearchRequest) SetSearchBefore(before []string) {
r.SearchBefore = before
}
// UnmarshalJSON deserializes a JSON representation of // UnmarshalJSON deserializes a JSON representation of
// a SearchRequest // a SearchRequest
func (r *SearchRequest) UnmarshalJSON(input []byte) error { func (r *SearchRequest) UnmarshalJSON(input []byte) error {
@ -322,6 +361,9 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
Explain bool `json:"explain"` Explain bool `json:"explain"`
Sort []json.RawMessage `json:"sort"` Sort []json.RawMessage `json:"sort"`
IncludeLocations bool `json:"includeLocations"` IncludeLocations bool `json:"includeLocations"`
Score string `json:"score"`
SearchAfter []string `json:"search_after"`
SearchBefore []string `json:"search_before"`
} }
err := json.Unmarshal(input, &temp) err := json.Unmarshal(input, &temp)
@ -348,6 +390,9 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
r.Fields = temp.Fields r.Fields = temp.Fields
r.Facets = temp.Facets r.Facets = temp.Facets
r.IncludeLocations = temp.IncludeLocations r.IncludeLocations = temp.IncludeLocations
r.Score = temp.Score
r.SearchAfter = temp.SearchAfter
r.SearchBefore = temp.SearchBefore
r.Query, err = query.ParseQuery(temp.Q) r.Query, err = query.ParseQuery(temp.Q)
if err != nil { if err != nil {
return err return err

View File

@ -30,3 +30,23 @@ type Collector interface {
SetFacetsBuilder(facetsBuilder *FacetsBuilder) SetFacetsBuilder(facetsBuilder *FacetsBuilder)
FacetResults() FacetResults FacetResults() FacetResults
} }
// DocumentMatchHandler is the type of document match callback
// bleve will invoke during the search.
// Eventually, bleve will indicate the completion of an ongoing search,
// by passing a nil value for the document match callback.
// The application should take a copy of the hit/documentMatch
// if it wish to own it or need prolonged access to it.
type DocumentMatchHandler func(hit *DocumentMatch) error
type MakeDocumentMatchHandlerKeyType string
var MakeDocumentMatchHandlerKey = MakeDocumentMatchHandlerKeyType(
"MakeDocumentMatchHandlerKey")
// MakeDocumentMatchHandler is an optional DocumentMatchHandler
// builder function which the applications can pass to bleve.
// These builder methods gives a DocumentMatchHandler function
// to bleve, which it will invoke on every document matches.
type MakeDocumentMatchHandler func(ctx *SearchContext) (
callback DocumentMatchHandler, loadID bool, err error)

View File

@ -25,9 +25,9 @@ type collectStoreHeap struct {
compare collectorCompare compare collectorCompare
} }
func newStoreHeap(cap int, compare collectorCompare) *collectStoreHeap { func newStoreHeap(capacity int, compare collectorCompare) *collectStoreHeap {
rv := &collectStoreHeap{ rv := &collectStoreHeap{
heap: make(search.DocumentMatchCollection, 0, cap), heap: make(search.DocumentMatchCollection, 0, capacity),
compare: compare, compare: compare,
} }
heap.Init(rv) heap.Init(rv)

View File

@ -25,7 +25,7 @@ type collectStoreList struct {
compare collectorCompare compare collectorCompare
} }
func newStoreList(cap int, compare collectorCompare) *collectStoreList { func newStoreList(capacity int, compare collectorCompare) *collectStoreList {
rv := &collectStoreList{ rv := &collectStoreList{
results: list.New(), results: list.New(),
compare: compare, compare: compare,
@ -34,8 +34,7 @@ func newStoreList(cap int, compare collectorCompare) *collectStoreList {
return rv return rv
} }
func (c *collectStoreList) AddNotExceedingSize(doc *search.DocumentMatch, func (c *collectStoreList) AddNotExceedingSize(doc *search.DocumentMatch, size int) *search.DocumentMatch {
size int) *search.DocumentMatch {
c.add(doc) c.add(doc)
if c.len() > size { if c.len() > size {
return c.removeLast() return c.removeLast()

View File

@ -21,9 +21,9 @@ type collectStoreSlice struct {
compare collectorCompare compare collectorCompare
} }
func newStoreSlice(cap int, compare collectorCompare) *collectStoreSlice { func newStoreSlice(capacity int, compare collectorCompare) *collectStoreSlice {
rv := &collectStoreSlice{ rv := &collectStoreSlice{
slice: make(search.DocumentMatchCollection, 0, cap), slice: make(search.DocumentMatchCollection, 0, capacity),
compare: compare, compare: compare,
} }
return rv return rv

View File

@ -17,6 +17,7 @@ package collector
import ( import (
"context" "context"
"reflect" "reflect"
"strconv"
"time" "time"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
@ -69,6 +70,7 @@ type TopNCollector struct {
lowestMatchOutsideResults *search.DocumentMatch lowestMatchOutsideResults *search.DocumentMatch
updateFieldVisitor index.DocumentFieldTermVisitor updateFieldVisitor index.DocumentFieldTermVisitor
dvReader index.DocValueReader dvReader index.DocValueReader
searchAfter *search.DocumentMatch
} }
// CheckDoneEvery controls how frequently we check the context deadline // CheckDoneEvery controls how frequently we check the context deadline
@ -78,6 +80,33 @@ const CheckDoneEvery = uint64(1024)
// skipping over the first 'skip' hits // skipping over the first 'skip' hits
// ordering hits by the provided sort order // ordering hits by the provided sort order
func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector { func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
return newTopNCollector(size, skip, sort)
}
// NewTopNCollector builds a collector to find the top 'size' hits
// skipping over the first 'skip' hits
// ordering hits by the provided sort order
func NewTopNCollectorAfter(size int, sort search.SortOrder, after []string) *TopNCollector {
rv := newTopNCollector(size, 0, sort)
rv.searchAfter = &search.DocumentMatch{
Sort: after,
}
for pos, ss := range sort {
if ss.RequiresDocID() {
rv.searchAfter.ID = after[pos]
}
if ss.RequiresScoring() {
if score, err := strconv.ParseFloat(after[pos], 64); err == nil {
rv.searchAfter.Score = score
}
}
}
return rv
}
func newTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
hc := &TopNCollector{size: size, skip: skip, sort: sort} hc := &TopNCollector{size: size, skip: skip, sort: sort}
// pre-allocate space on the store to avoid reslicing // pre-allocate space on the store to avoid reslicing
@ -140,6 +169,8 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
} }
searchContext := &search.SearchContext{ searchContext := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)), DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)),
Collector: hc,
IndexReader: reader,
} }
hc.dvReader, err = reader.DocValueReader(hc.neededFields) hc.dvReader, err = reader.DocValueReader(hc.neededFields)
@ -154,6 +185,19 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
hc.sort.UpdateVisitor(field, term) hc.sort.UpdateVisitor(field, term)
} }
dmHandlerMaker := MakeTopNDocumentMatchHandler
if cv := ctx.Value(search.MakeDocumentMatchHandlerKey); cv != nil {
dmHandlerMaker = cv.(search.MakeDocumentMatchHandler)
}
// use the application given builder for making the custom document match
// handler and perform callbacks/invocations on the newly made handler.
dmHandler, loadID, err := dmHandlerMaker(searchContext)
if err != nil {
return err
}
hc.needDocIds = hc.needDocIds || loadID
select { select {
case <-ctx.Done(): case <-ctx.Done():
return ctx.Err() return ctx.Err()
@ -169,13 +213,26 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
} }
} }
err = hc.collectSingle(searchContext, reader, next) err = hc.prepareDocumentMatch(searchContext, reader, next)
if err != nil {
break
}
err = dmHandler(next)
if err != nil { if err != nil {
break break
} }
next, err = searcher.Next(searchContext) next, err = searcher.Next(searchContext)
} }
// help finalize/flush the results in case
// of custom document match handlers.
err = dmHandler(nil)
if err != nil {
return err
}
// compute search duration // compute search duration
hc.took = time.Since(startTime) hc.took = time.Since(startTime)
if err != nil { if err != nil {
@ -191,8 +248,8 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
var sortByScoreOpt = []string{"_score"} var sortByScoreOpt = []string{"_score"}
func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.IndexReader, d *search.DocumentMatch) error { func (hc *TopNCollector) prepareDocumentMatch(ctx *search.SearchContext,
var err error reader index.IndexReader, d *search.DocumentMatch) (err error) {
// visit field terms for features that require it (sort, facets) // visit field terms for features that require it (sort, facets)
if len(hc.neededFields) > 0 { if len(hc.neededFields) > 0 {
@ -226,11 +283,37 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I
hc.sort.Value(d) hc.sort.Value(d)
} }
return nil
}
func MakeTopNDocumentMatchHandler(
ctx *search.SearchContext) (search.DocumentMatchHandler, bool, error) {
var hc *TopNCollector
var ok bool
if hc, ok = ctx.Collector.(*TopNCollector); ok {
return func(d *search.DocumentMatch) error {
if d == nil {
return nil
}
// support search after based pagination,
// if this hit is <= the search after sort key
// we should skip it
if hc.searchAfter != nil {
// exact sort order matches use hit number to break tie
// but we want to allow for exact match, so we pretend
hc.searchAfter.HitNumber = d.HitNumber
if hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.searchAfter) <= 0 {
return nil
}
}
// optimization, we track lowest sorting hit already removed from heap // optimization, we track lowest sorting hit already removed from heap
// with this one comparison, we can avoid all heap operations if // with this one comparison, we can avoid all heap operations if
// this hit would have been added and then immediately removed // this hit would have been added and then immediately removed
if hc.lowestMatchOutsideResults != nil { if hc.lowestMatchOutsideResults != nil {
cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.lowestMatchOutsideResults) cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d,
hc.lowestMatchOutsideResults)
if cmp >= 0 { if cmp >= 0 {
// this hit can't possibly be in the result set, so avoid heap ops // this hit can't possibly be in the result set, so avoid heap ops
ctx.DocumentMatchPool.Put(d) ctx.DocumentMatchPool.Put(d)
@ -243,7 +326,8 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I
if hc.lowestMatchOutsideResults == nil { if hc.lowestMatchOutsideResults == nil {
hc.lowestMatchOutsideResults = removed hc.lowestMatchOutsideResults = removed
} else { } else {
cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, removed, hc.lowestMatchOutsideResults) cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc,
removed, hc.lowestMatchOutsideResults)
if cmp < 0 { if cmp < 0 {
tmp := hc.lowestMatchOutsideResults tmp := hc.lowestMatchOutsideResults
hc.lowestMatchOutsideResults = removed hc.lowestMatchOutsideResults = removed
@ -251,8 +335,10 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I
} }
} }
} }
return nil return nil
}, false, nil
}
return nil, false, nil
} }
// visitFieldTerms is responsible for visiting the field terms of the // visitFieldTerms is responsible for visiting the field terms of the

View File

@ -54,14 +54,14 @@ type FacetBuilder interface {
type FacetsBuilder struct { type FacetsBuilder struct {
indexReader index.IndexReader indexReader index.IndexReader
facets map[string]FacetBuilder facetNames []string
facets []FacetBuilder
fields []string fields []string
} }
func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder { func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {
return &FacetsBuilder{ return &FacetsBuilder{
indexReader: indexReader, indexReader: indexReader,
facets: make(map[string]FacetBuilder, 0),
} }
} }
@ -69,8 +69,7 @@ func (fb *FacetsBuilder) Size() int {
sizeInBytes := reflectStaticSizeFacetsBuilder + size.SizeOfPtr sizeInBytes := reflectStaticSizeFacetsBuilder + size.SizeOfPtr
for k, v := range fb.facets { for k, v := range fb.facets {
sizeInBytes += size.SizeOfString + len(k) + sizeInBytes += size.SizeOfString + v.Size() + len(fb.facetNames[k])
v.Size()
} }
for _, entry := range fb.fields { for _, entry := range fb.fields {
@ -81,7 +80,8 @@ func (fb *FacetsBuilder) Size() int {
} }
func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) { func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) {
fb.facets[name] = facetBuilder fb.facetNames = append(fb.facetNames, name)
fb.facets = append(fb.facets, facetBuilder)
fb.fields = append(fb.fields, facetBuilder.Field()) fb.fields = append(fb.fields, facetBuilder.Field())
} }
@ -333,9 +333,9 @@ func (fr FacetResults) Fixup(name string, size int) {
func (fb *FacetsBuilder) Results() FacetResults { func (fb *FacetsBuilder) Results() FacetResults {
fr := make(FacetResults) fr := make(FacetResults)
for facetName, facetBuilder := range fb.facets { for i, facetBuilder := range fb.facets {
facetResult := facetBuilder.Result() facetResult := facetBuilder.Result()
fr[facetName] = facetResult fr[fb.facetNames[i]] = facetResult
} }
return fr return fr
} }

View File

@ -58,6 +58,11 @@ OUTER:
// push back towards beginning // push back towards beginning
// without cross maxbegin // without cross maxbegin
for start > 0 && used < s.fragmentSize { for start > 0 && used < s.fragmentSize {
if start > len(orig) {
// bail if out of bounds, possibly due to token replacement
// e.g with a regexp replacement
continue OUTER
}
r, size := utf8.DecodeLastRune(orig[0:start]) r, size := utf8.DecodeLastRune(orig[0:start])
if r == utf8.RuneError { if r == utf8.RuneError {
continue OUTER // bail continue OUTER // bail

View File

@ -70,9 +70,11 @@ func (q *ConjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
} }
ss = append(ss, sr) ss = append(ss, sr)
} }
if len(ss) < 1 { if len(ss) < 1 {
return searcher.NewMatchNoneSearcher(i) return searcher.NewMatchNoneSearcher(i)
} }
return searcher.NewConjunctionSearcher(i, ss, options) return searcher.NewConjunctionSearcher(i, ss, options)
} }

View File

@ -41,6 +41,14 @@ type BleveQueryTime struct {
time.Time time.Time
} }
var MinRFC3339CompatibleTime time.Time
var MaxRFC3339CompatibleTime time.Time
func init() {
MinRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "1677-12-01T00:00:00Z")
MaxRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "2262-04-11T11:59:59Z")
}
func queryTimeFromString(t string) (time.Time, error) { func queryTimeFromString(t string) (time.Time, error) {
dateTimeParser, err := cache.DateTimeParserNamed(QueryDateTimeParser) dateTimeParser, err := cache.DateTimeParserNamed(QueryDateTimeParser)
if err != nil { if err != nil {
@ -143,10 +151,20 @@ func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) {
min := math.Inf(-1) min := math.Inf(-1)
max := math.Inf(1) max := math.Inf(1)
if !q.Start.IsZero() { if !q.Start.IsZero() {
min = numeric.Int64ToFloat64(q.Start.UnixNano()) if !isDatetimeCompatible(q.Start) {
// overflow
return nil, nil, fmt.Errorf("invalid/unsupported date range, start: %v", q.Start)
}
startInt64 := q.Start.UnixNano()
min = numeric.Int64ToFloat64(startInt64)
} }
if !q.End.IsZero() { if !q.End.IsZero() {
max = numeric.Int64ToFloat64(q.End.UnixNano()) if !isDatetimeCompatible(q.End) {
// overflow
return nil, nil, fmt.Errorf("invalid/unsupported date range, end: %v", q.End)
}
endInt64 := q.End.UnixNano()
max = numeric.Int64ToFloat64(endInt64)
} }
return &min, &max, nil return &min, &max, nil
@ -162,3 +180,12 @@ func (q *DateRangeQuery) Validate() error {
} }
return nil return nil
} }
func isDatetimeCompatible(t BleveQueryTime) bool {
if QueryDateTimeFormat == time.RFC3339 &&
(t.Before(MinRFC3339CompatibleTime) || t.After(MaxRFC3339CompatibleTime)) {
return false
}
return true
}

View File

@ -58,7 +58,8 @@ func (q *DisjunctionQuery) SetMin(m float64) {
q.Min = m q.Min = m
} }
func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
options search.SearcherOptions) (search.Searcher, error) {
ss := make([]search.Searcher, 0, len(q.Disjuncts)) ss := make([]search.Searcher, 0, len(q.Disjuncts))
for _, disjunct := range q.Disjuncts { for _, disjunct := range q.Disjuncts {
sr, err := disjunct.Searcher(i, m, options) sr, err := disjunct.Searcher(i, m, options)
@ -76,9 +77,11 @@ func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
} }
ss = append(ss, sr) ss = append(ss, sr)
} }
if len(ss) < 1 { if len(ss) < 1 {
return searcher.NewMatchNoneSearcher(i) return searcher.NewMatchNoneSearcher(i)
} }
return searcher.NewDisjunctionSearcher(i, ss, q.Min, options) return searcher.NewDisjunctionSearcher(i, ss, q.Min, options)
} }

View File

@ -0,0 +1,94 @@
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type GeoBoundingPolygonQuery struct {
Points []geo.Point `json:"polygon_points"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
func NewGeoBoundingPolygonQuery(points []geo.Point) *GeoBoundingPolygonQuery {
return &GeoBoundingPolygonQuery{
Points: points}
}
func (q *GeoBoundingPolygonQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *GeoBoundingPolygonQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *GeoBoundingPolygonQuery) SetField(f string) {
q.FieldVal = f
}
func (q *GeoBoundingPolygonQuery) Field() string {
return q.FieldVal
}
func (q *GeoBoundingPolygonQuery) Searcher(i index.IndexReader,
m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
return searcher.NewGeoBoundedPolygonSearcher(i, q.Points, field, q.BoostVal.Value(), options)
}
func (q *GeoBoundingPolygonQuery) Validate() error {
return nil
}
func (q *GeoBoundingPolygonQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Points []interface{} `json:"polygon_points"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
q.Points = make([]geo.Point, 0, len(tmp.Points))
for _, i := range tmp.Points {
// now use our generic point parsing code from the geo package
lon, lat, found := geo.ExtractGeoPoint(i)
if !found {
return fmt.Errorf("geo polygon point: %v is not in a valid format", i)
}
q.Points = append(q.Points, geo.Point{Lon: lon, Lat: lat})
}
q.FieldVal = tmp.FieldVal
q.BoostVal = tmp.BoostVal
return nil
}

View File

@ -273,6 +273,15 @@ func ParseQuery(input []byte) (Query, error) {
} }
return &rv, nil return &rv, nil
} }
_, hasPoints := tmp["polygon_points"]
if hasPoints {
var rv GeoBoundingPolygonQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
return nil, fmt.Errorf("unknown query type") return nil, fmt.Errorf("unknown query type")
} }
@ -296,32 +305,28 @@ func expandQuery(m mapping.IndexMapping, query Query) (Query, error) {
} }
expand = func(query Query) (Query, error) { expand = func(query Query) (Query, error) {
switch query.(type) { switch q := query.(type) {
case *QueryStringQuery: case *QueryStringQuery:
q := query.(*QueryStringQuery)
parsed, err := parseQuerySyntax(q.Query) parsed, err := parseQuerySyntax(q.Query)
if err != nil { if err != nil {
return nil, fmt.Errorf("could not parse '%s': %s", q.Query, err) return nil, fmt.Errorf("could not parse '%s': %s", q.Query, err)
} }
return expand(parsed) return expand(parsed)
case *ConjunctionQuery: case *ConjunctionQuery:
q := *query.(*ConjunctionQuery)
children, err := expandSlice(q.Conjuncts) children, err := expandSlice(q.Conjuncts)
if err != nil { if err != nil {
return nil, err return nil, err
} }
q.Conjuncts = children q.Conjuncts = children
return &q, nil return q, nil
case *DisjunctionQuery: case *DisjunctionQuery:
q := *query.(*DisjunctionQuery)
children, err := expandSlice(q.Disjuncts) children, err := expandSlice(q.Disjuncts)
if err != nil { if err != nil {
return nil, err return nil, err
} }
q.Disjuncts = children q.Disjuncts = children
return &q, nil return q, nil
case *BooleanQuery: case *BooleanQuery:
q := *query.(*BooleanQuery)
var err error var err error
q.Must, err = expand(q.Must) q.Must, err = expand(q.Must)
if err != nil { if err != nil {
@ -335,7 +340,7 @@ func expandQuery(m mapping.IndexMapping, query Query) (Query, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
return &q, nil return q, nil
default: default:
return query, nil return query, nil
} }

View File

@ -273,6 +273,7 @@ func inNumOrStrState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
// see where to go // see where to go
if !l.seenDot && next == '.' { if !l.seenDot && next == '.' {
// stay in this state // stay in this state
l.seenDot = true
l.buf += string(next) l.buf += string(next)
return inNumOrStrState, true return inNumOrStrState, true
} else if unicode.IsDigit(next) { } else if unicode.IsDigit(next) {

View File

@ -15,7 +15,6 @@
package query package query
import ( import (
"regexp"
"strings" "strings"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
@ -28,7 +27,6 @@ type RegexpQuery struct {
Regexp string `json:"regexp"` Regexp string `json:"regexp"`
FieldVal string `json:"field,omitempty"` FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"` BoostVal *Boost `json:"boost,omitempty"`
compiled *regexp.Regexp
} }
// NewRegexpQuery creates a new Query which finds // NewRegexpQuery creates a new Query which finds
@ -64,33 +62,20 @@ func (q *RegexpQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, opti
if q.FieldVal == "" { if q.FieldVal == "" {
field = m.DefaultSearchField() field = m.DefaultSearchField()
} }
err := q.compile()
if err != nil {
return nil, err
}
return searcher.NewRegexpSearcher(i, q.compiled, field, q.BoostVal.Value(), options) // require that pattern NOT be anchored to start and end of term.
} // do not attempt to remove trailing $, its presence is not
// known to interfere with LiteralPrefix() the way ^ does
func (q *RegexpQuery) Validate() error { // and removing $ introduces possible ambiguities with escaped \$, \\$, etc
return q.compile()
}
func (q *RegexpQuery) compile() error {
if q.compiled == nil {
// require that pattern NOT be anchored to start and end of term
actualRegexp := q.Regexp actualRegexp := q.Regexp
if strings.HasPrefix(actualRegexp, "^") { if strings.HasPrefix(actualRegexp, "^") {
actualRegexp = actualRegexp[1:] // remove leading ^ actualRegexp = actualRegexp[1:] // remove leading ^
} }
// do not attempt to remove trailing $, it's presence is not
// known to interfere with LiteralPrefix() the way ^ does return searcher.NewRegexpStringSearcher(i, actualRegexp, field,
// and removing $ introduces possible ambiguities with escaped \$, \\$, etc q.BoostVal.Value(), options)
var err error }
q.compiled, err = regexp.Compile(actualRegexp)
if err != nil { func (q *RegexpQuery) Validate() error {
return err return nil // real validation delayed until searcher constructor
}
}
return nil
} }

View File

@ -15,7 +15,6 @@
package query package query
import ( import (
"regexp"
"strings" "strings"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
@ -47,7 +46,6 @@ type WildcardQuery struct {
Wildcard string `json:"wildcard"` Wildcard string `json:"wildcard"`
FieldVal string `json:"field,omitempty"` FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"` BoostVal *Boost `json:"boost,omitempty"`
compiled *regexp.Regexp
} }
// NewWildcardQuery creates a new Query which finds // NewWildcardQuery creates a new Query which finds
@ -83,24 +81,13 @@ func (q *WildcardQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, op
if q.FieldVal == "" { if q.FieldVal == "" {
field = m.DefaultSearchField() field = m.DefaultSearchField()
} }
if q.compiled == nil {
var err error
q.compiled, err = q.convertToRegexp()
if err != nil {
return nil, err
}
}
return searcher.NewRegexpSearcher(i, q.compiled, field, q.BoostVal.Value(), options) regexpString := wildcardRegexpReplacer.Replace(q.Wildcard)
return searcher.NewRegexpStringSearcher(i, regexpString, field,
q.BoostVal.Value(), options)
} }
func (q *WildcardQuery) Validate() error { func (q *WildcardQuery) Validate() error {
var err error return nil // real validation delayed until searcher constructor
q.compiled, err = q.convertToRegexp()
return err
}
func (q *WildcardQuery) convertToRegexp() (*regexp.Regexp, error) {
regexpString := wildcardRegexpReplacer.Replace(q.Wildcard)
return regexp.Compile(regexpString)
} }

View File

@ -40,6 +40,7 @@ type TermQueryScorer struct {
idf float64 idf float64
options search.SearcherOptions options search.SearcherOptions
idfExplanation *search.Explanation idfExplanation *search.Explanation
includeScore bool
queryNorm float64 queryNorm float64
queryWeight float64 queryWeight float64
queryWeightExplanation *search.Explanation queryWeightExplanation *search.Explanation
@ -70,6 +71,7 @@ func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64,
idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)), idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)),
options: options, options: options,
queryWeight: 1.0, queryWeight: 1.0,
includeScore: options.Score != "none",
} }
if options.Explain { if options.Explain {
@ -113,9 +115,10 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
} }
func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.TermFieldDoc) *search.DocumentMatch { func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.TermFieldDoc) *search.DocumentMatch {
rv := ctx.DocumentMatchPool.Get()
// perform any score computations only when needed
if s.includeScore || s.options.Explain {
var scoreExplanation *search.Explanation var scoreExplanation *search.Explanation
// need to compute score
var tf float64 var tf float64
if termMatch.Freq < MaxSqrtCache { if termMatch.Freq < MaxSqrtCache {
tf = SqrtCache[int(termMatch.Freq)] tf = SqrtCache[int(termMatch.Freq)]
@ -157,12 +160,16 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
} }
} }
rv := ctx.DocumentMatchPool.Get() if s.includeScore {
rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...)
rv.Score = score rv.Score = score
}
if s.options.Explain { if s.options.Explain {
rv.Expl = scoreExplanation rv.Expl = scoreExplanation
} }
}
rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...)
if len(termMatch.Vectors) > 0 { if len(termMatch.Vectors) > 0 {
if cap(rv.FieldTermLocations) < len(termMatch.Vectors) { if cap(rv.FieldTermLocations) < len(termMatch.Vectors) {

View File

@ -17,8 +17,8 @@ package search
import ( import (
"fmt" "fmt"
"reflect" "reflect"
"sort"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/size" "github.com/blevesearch/bleve/size"
) )
@ -50,6 +50,24 @@ func (ap ArrayPositions) Equals(other ArrayPositions) bool {
return true return true
} }
func (ap ArrayPositions) Compare(other ArrayPositions) int {
for i, p := range ap {
if i >= len(other) {
return 1
}
if p < other[i] {
return -1
}
if p > other[i] {
return 1
}
}
if len(ap) < len(other) {
return -1
}
return 0
}
type Location struct { type Location struct {
// Pos is the position of the term within the field, starting at 1 // Pos is the position of the term within the field, starting at 1
Pos uint64 `json:"pos"` Pos uint64 `json:"pos"`
@ -69,6 +87,46 @@ func (l *Location) Size() int {
type Locations []*Location type Locations []*Location
func (p Locations) Len() int { return len(p) }
func (p Locations) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func (p Locations) Less(i, j int) bool {
c := p[i].ArrayPositions.Compare(p[j].ArrayPositions)
if c < 0 {
return true
}
if c > 0 {
return false
}
return p[i].Pos < p[j].Pos
}
func (p Locations) Dedupe() Locations { // destructive!
if len(p) <= 1 {
return p
}
sort.Sort(p)
slow := 0
for _, pfast := range p {
pslow := p[slow]
if pslow.Pos == pfast.Pos &&
pslow.Start == pfast.Start &&
pslow.End == pfast.End &&
pslow.ArrayPositions.Equals(pfast.ArrayPositions) {
continue // duplicate, so only move fast ahead
}
slow++
p[slow] = pfast
}
return p[:slow+1]
}
type TermLocationMap map[string]Locations type TermLocationMap map[string]Locations
func (t TermLocationMap) AddLocation(term string, location *Location) { func (t TermLocationMap) AddLocation(term string, location *Location) {
@ -100,9 +158,6 @@ type DocumentMatch struct {
// fields as float64s and date fields as time.RFC3339 formatted strings. // fields as float64s and date fields as time.RFC3339 formatted strings.
Fields map[string]interface{} `json:"fields,omitempty"` Fields map[string]interface{} `json:"fields,omitempty"`
// if we load the document for this hit, remember it so we dont load again
Document *document.Document `json:"-"`
// used to maintain natural index order // used to maintain natural index order
HitNumber uint64 `json:"-"` HitNumber uint64 `json:"-"`
@ -195,10 +250,6 @@ func (dm *DocumentMatch) Size() int {
size.SizeOfPtr size.SizeOfPtr
} }
if dm.Document != nil {
sizeInBytes += dm.Document.Size()
}
return sizeInBytes return sizeInBytes
} }
@ -216,6 +267,7 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
var lastField string var lastField string
var tlm TermLocationMap var tlm TermLocationMap
var needsDedupe bool
for i, ftl := range dm.FieldTermLocations { for i, ftl := range dm.FieldTermLocations {
if lastField != ftl.Field { if lastField != ftl.Field {
@ -239,7 +291,19 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...) loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...)
} }
tlm[ftl.Term] = append(tlm[ftl.Term], loc) locs := tlm[ftl.Term]
// if the loc is before or at the last location, then there
// might be duplicates that need to be deduplicated
if !needsDedupe && len(locs) > 0 {
last := locs[len(locs)-1]
cmp := loc.ArrayPositions.Compare(last.ArrayPositions)
if cmp < 0 || (cmp == 0 && loc.Pos <= last.Pos) {
needsDedupe = true
}
}
tlm[ftl.Term] = append(locs, loc)
dm.FieldTermLocations[i] = FieldTermLocation{ // recycle dm.FieldTermLocations[i] = FieldTermLocation{ // recycle
Location: Location{ Location: Location{
@ -247,6 +311,14 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
}, },
} }
} }
if needsDedupe {
for _, tlm := range dm.Locations {
for term, locs := range tlm {
tlm[term] = locs.Dedupe()
}
}
}
} }
dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle
@ -280,11 +352,14 @@ type Searcher interface {
type SearcherOptions struct { type SearcherOptions struct {
Explain bool Explain bool
IncludeTermVectors bool IncludeTermVectors bool
Score string
} }
// SearchContext represents the context around a single search // SearchContext represents the context around a single search
type SearchContext struct { type SearchContext struct {
DocumentMatchPool *DocumentMatchPool DocumentMatchPool *DocumentMatchPool
Collector Collector
IndexReader index.IndexReader
} }
func (sc *SearchContext) Size() int { func (sc *SearchContext) Size() int {

View File

@ -45,6 +45,7 @@ type BooleanSearcher struct {
scorer *scorer.ConjunctionQueryScorer scorer *scorer.ConjunctionQueryScorer
matches []*search.DocumentMatch matches []*search.DocumentMatch
initialized bool initialized bool
done bool
} }
func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) { func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) {
@ -207,6 +208,10 @@ func (s *BooleanSearcher) SetQueryNorm(qnorm float64) {
func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) { func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
if s.done {
return nil, nil
}
if !s.initialized { if !s.initialized {
err := s.initSearchers(ctx) err := s.initSearchers(ctx)
if err != nil { if err != nil {
@ -319,11 +324,20 @@ func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch
return nil, err return nil, err
} }
} }
if rv == nil {
s.done = true
}
return rv, nil return rv, nil
} }
func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) { func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
if s.done {
return nil, nil
}
if !s.initialized { if !s.initialized {
err := s.initSearchers(ctx) err := s.initSearchers(ctx)
if err != nil { if err != nil {
@ -331,6 +345,8 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
} }
} }
// Advance the searcher only if the cursor is trailing the lookup ID
if s.currentID == nil || s.currentID.Compare(ID) < 0 {
var err error var err error
if s.mustSearcher != nil { if s.mustSearcher != nil {
if s.currMust != nil { if s.currMust != nil {
@ -341,6 +357,7 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
return nil, err return nil, err
} }
} }
if s.shouldSearcher != nil { if s.shouldSearcher != nil {
if s.currShould != nil { if s.currShould != nil {
ctx.DocumentMatchPool.Put(s.currShould) ctx.DocumentMatchPool.Put(s.currShould)
@ -350,7 +367,12 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
return nil, err return nil, err
} }
} }
if s.mustNotSearcher != nil { if s.mustNotSearcher != nil {
// Additional check for mustNotSearcher, whose cursor isn't tracked by
// currentID to prevent it from moving when the searcher's tracked
// position is already ahead of or at the requested ID.
if s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0 {
if s.currMustNot != nil { if s.currMustNot != nil {
ctx.DocumentMatchPool.Put(s.currMustNot) ctx.DocumentMatchPool.Put(s.currMustNot)
} }
@ -359,6 +381,7 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
return nil, err return nil, err
} }
} }
}
if s.mustSearcher != nil && s.currMust != nil { if s.mustSearcher != nil && s.currMust != nil {
s.currentID = s.currMust.IndexInternalID s.currentID = s.currMust.IndexInternalID
@ -367,6 +390,7 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
} else { } else {
s.currentID = nil s.currentID = nil
} }
}
return s.Next(ctx) return s.Next(ctx)
} }

View File

@ -43,14 +43,27 @@ type ConjunctionSearcher struct {
options search.SearcherOptions options search.SearcherOptions
} }
func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, options search.SearcherOptions) (*ConjunctionSearcher, error) { func NewConjunctionSearcher(indexReader index.IndexReader,
// build the downstream searchers qsearchers []search.Searcher, options search.SearcherOptions) (
search.Searcher, error) {
// build the sorted downstream searchers
searchers := make(OrderedSearcherList, len(qsearchers)) searchers := make(OrderedSearcherList, len(qsearchers))
for i, searcher := range qsearchers { for i, searcher := range qsearchers {
searchers[i] = searcher searchers[i] = searcher
} }
// sort the searchers
sort.Sort(searchers) sort.Sort(searchers)
// attempt the "unadorned" conjunction optimization only when we
// do not need extra information like freq-norm's or term vectors
if len(searchers) > 1 &&
options.Score == "none" && !options.IncludeTermVectors {
rv, err := optimizeCompositeSearcher("conjunction:unadorned",
indexReader, searchers, options)
if err != nil || rv != nil {
return rv, err
}
}
// build our searcher // build our searcher
rv := ConjunctionSearcher{ rv := ConjunctionSearcher{
indexReader: indexReader, indexReader: indexReader,
@ -63,24 +76,10 @@ func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S
// attempt push-down conjunction optimization when there's >1 searchers // attempt push-down conjunction optimization when there's >1 searchers
if len(searchers) > 1 { if len(searchers) > 1 {
var octx index.OptimizableContext rv, err := optimizeCompositeSearcher("conjunction",
indexReader, searchers, options)
for _, searcher := range searchers { if err != nil || rv != nil {
o, ok := searcher.(index.Optimizable) return rv, err
if ok {
var err error
octx, err = o.Optimize("conjunction", octx)
if err != nil {
return nil, err
}
}
}
if octx != nil {
err := octx.Finish()
if err != nil {
return nil, err
}
} }
} }
@ -158,7 +157,7 @@ func (s *ConjunctionSearcher) Next(ctx *search.SearchContext) (*search.DocumentM
var rv *search.DocumentMatch var rv *search.DocumentMatch
var err error var err error
OUTER: OUTER:
for s.currs[s.maxIDIdx] != nil { for s.maxIDIdx < len(s.currs) && s.currs[s.maxIDIdx] != nil {
maxID := s.currs[s.maxIDIdx].IndexInternalID maxID := s.currs[s.maxIDIdx].IndexInternalID
i := 0 i := 0

View File

@ -40,6 +40,18 @@ func NewDisjunctionSearcher(indexReader index.IndexReader,
func newDisjunctionSearcher(indexReader index.IndexReader, func newDisjunctionSearcher(indexReader index.IndexReader,
qsearchers []search.Searcher, min float64, options search.SearcherOptions, qsearchers []search.Searcher, min float64, options search.SearcherOptions,
limit bool) (search.Searcher, error) { limit bool) (search.Searcher, error) {
// attempt the "unadorned" disjunction optimization only when we
// do not need extra information like freq-norm's or term vectors
// and the requested min is simple
if len(qsearchers) > 1 && min <= 1 &&
options.Score == "none" && !options.IncludeTermVectors {
rv, err := optimizeCompositeSearcher("disjunction:unadorned",
indexReader, qsearchers, options)
if err != nil || rv != nil {
return rv, err
}
}
if len(qsearchers) > DisjunctionHeapTakeover { if len(qsearchers) > DisjunctionHeapTakeover {
return newDisjunctionHeapSearcher(indexReader, qsearchers, min, options, return newDisjunctionHeapSearcher(indexReader, qsearchers, min, options,
limit) limit)
@ -48,6 +60,42 @@ func newDisjunctionSearcher(indexReader index.IndexReader,
limit) limit)
} }
func optimizeCompositeSearcher(optimizationKind string,
indexReader index.IndexReader, qsearchers []search.Searcher,
options search.SearcherOptions) (search.Searcher, error) {
var octx index.OptimizableContext
for _, searcher := range qsearchers {
o, ok := searcher.(index.Optimizable)
if !ok {
return nil, nil
}
var err error
octx, err = o.Optimize(optimizationKind, octx)
if err != nil {
return nil, err
}
if octx == nil {
return nil, nil
}
}
optimized, err := octx.Finish()
if err != nil || optimized == nil {
return nil, err
}
tfr, ok := optimized.(index.TermFieldReader)
if !ok {
return nil, nil
}
return newTermSearcherFromReader(indexReader, tfr,
[]byte(optimizationKind), "*", 1.0, options)
}
func tooManyClauses(count int) bool { func tooManyClauses(count int) bool {
if DisjunctionMaxClauseCount != 0 && count > DisjunctionMaxClauseCount { if DisjunctionMaxClauseCount != 0 && count > DisjunctionMaxClauseCount {
return true return true
@ -55,7 +103,7 @@ func tooManyClauses(count int) bool {
return false return false
} }
func tooManyClausesErr() error { func tooManyClausesErr(count int) error {
return fmt.Errorf("TooManyClauses[maxClauseCount is set to %d]", return fmt.Errorf("TooManyClauses[%d > maxClauseCount, which is set to %d]",
DisjunctionMaxClauseCount) count, DisjunctionMaxClauseCount)
} }

View File

@ -62,7 +62,7 @@ func newDisjunctionHeapSearcher(indexReader index.IndexReader,
limit bool) ( limit bool) (
*DisjunctionHeapSearcher, error) { *DisjunctionHeapSearcher, error) {
if limit && tooManyClauses(len(searchers)) { if limit && tooManyClauses(len(searchers)) {
return nil, tooManyClausesErr() return nil, tooManyClausesErr(len(searchers))
} }
// build our searcher // build our searcher

View File

@ -50,7 +50,7 @@ func newDisjunctionSliceSearcher(indexReader index.IndexReader,
limit bool) ( limit bool) (
*DisjunctionSliceSearcher, error) { *DisjunctionSliceSearcher, error) {
if limit && tooManyClauses(len(qsearchers)) { if limit && tooManyClauses(len(qsearchers)) {
return nil, tooManyClausesErr() return nil, tooManyClausesErr(len(qsearchers))
} }
// build the downstream searchers // build the downstream searchers
searchers := make(OrderedSearcherList, len(qsearchers)) searchers := make(OrderedSearcherList, len(qsearchers))

View File

@ -31,6 +31,10 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string,
return nil, fmt.Errorf("fuzziness exceeds max (%d)", MaxFuzziness) return nil, fmt.Errorf("fuzziness exceeds max (%d)", MaxFuzziness)
} }
if fuzziness < 0 {
return nil, fmt.Errorf("invalid fuzziness, negative")
}
// Note: we don't byte slice the term for a prefix because of runes. // Note: we don't byte slice the term for a prefix because of runes.
prefixTerm := "" prefixTerm := ""
for i, r := range term { for i, r := range term {
@ -53,17 +57,14 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string,
func findFuzzyCandidateTerms(indexReader index.IndexReader, term string, func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
fuzziness int, field, prefixTerm string) (rv []string, err error) { fuzziness int, field, prefixTerm string) (rv []string, err error) {
rv = make([]string, 0) rv = make([]string, 0)
var fieldDict index.FieldDict
if len(prefixTerm) > 0 {
fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
} else {
// in case of advanced reader implementations directly call // in case of advanced reader implementations directly call
// the levenshtein automaton based iterator to collect the // the levenshtein automaton based iterator to collect the
// candidate terms // candidate terms
if ir, ok := indexReader.(index.IndexReaderFuzzy); ok { if ir, ok := indexReader.(index.IndexReaderFuzzy); ok {
fieldDict, err = ir.FieldDictFuzzy(field, []byte(term), fuzziness) fieldDict, err := ir.FieldDictFuzzy(field, term, fuzziness, prefixTerm)
if err != nil { if err != nil {
return rv, err return nil, err
} }
defer func() { defer func() {
if cerr := fieldDict.Close(); cerr != nil && err == nil { if cerr := fieldDict.Close(); cerr != nil && err == nil {
@ -73,12 +74,23 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
tfd, err := fieldDict.Next() tfd, err := fieldDict.Next()
for err == nil && tfd != nil { for err == nil && tfd != nil {
rv = append(rv, tfd.Term) rv = append(rv, tfd.Term)
if tooManyClauses(len(rv)) {
return nil, tooManyClausesErr(len(rv))
}
tfd, err = fieldDict.Next() tfd, err = fieldDict.Next()
} }
return rv, err return rv, err
} }
var fieldDict index.FieldDict
if len(prefixTerm) > 0 {
fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
} else {
fieldDict, err = indexReader.FieldDict(field) fieldDict, err = indexReader.FieldDict(field)
} }
if err != nil {
return nil, err
}
defer func() { defer func() {
if cerr := fieldDict.Close(); cerr != nil && err == nil { if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr err = cerr
@ -95,7 +107,7 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
if !exceeded && ld <= fuzziness { if !exceeded && ld <= fuzziness {
rv = append(rv, tfd.Term) rv = append(rv, tfd.Term)
if tooManyClauses(len(rv)) { if tooManyClauses(len(rv)) {
return rv, tooManyClausesErr() return nil, tooManyClausesErr(len(rv))
} }
} }
tfd, err = fieldDict.Next() tfd, err = fieldDict.Next()

View File

@ -22,6 +22,11 @@ import (
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
) )
type filterFunc func(key []byte) bool
var GeoBitsShift1 = (geo.GeoBits << 1)
var GeoBitsShift1Minus1 = GeoBitsShift1 - 1
func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat, func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
maxLon, maxLat float64, field string, boost float64, maxLon, maxLat float64, field string, boost float64,
options search.SearcherOptions, checkBoundaries bool) ( options search.SearcherOptions, checkBoundaries bool) (
@ -36,10 +41,18 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
} }
// do math to produce list of terms needed for this search // do math to produce list of terms needed for this search
onBoundaryTerms, notOnBoundaryTerms := ComputeGeoRange(0, (geo.GeoBits<<1)-1, onBoundaryTerms, notOnBoundaryTerms, err := ComputeGeoRange(0, GeoBitsShift1Minus1,
minLon, minLat, maxLon, maxLat, checkBoundaries) minLon, minLat, maxLon, maxLat, checkBoundaries, indexReader, field)
if err != nil {
return nil, err
}
var onBoundarySearcher search.Searcher var onBoundarySearcher search.Searcher
dvReader, err := indexReader.DocValueReader([]string{field})
if err != nil {
return nil, err
}
if len(onBoundaryTerms) > 0 { if len(onBoundaryTerms) > 0 {
rawOnBoundarySearcher, err := NewMultiTermSearcherBytes(indexReader, rawOnBoundarySearcher, err := NewMultiTermSearcherBytes(indexReader,
onBoundaryTerms, field, boost, options, false) onBoundaryTerms, field, boost, options, false)
@ -48,7 +61,7 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
} }
// add filter to check points near the boundary // add filter to check points near the boundary
onBoundarySearcher = NewFilteringSearcher(rawOnBoundarySearcher, onBoundarySearcher = NewFilteringSearcher(rawOnBoundarySearcher,
buildRectFilter(indexReader, field, minLon, minLat, maxLon, maxLat)) buildRectFilter(dvReader, field, minLon, minLat, maxLon, maxLat))
openedSearchers = append(openedSearchers, onBoundarySearcher) openedSearchers = append(openedSearchers, onBoundarySearcher)
} }
@ -89,68 +102,132 @@ var geoMaxShift = document.GeoPrecisionStep * 4
var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2 var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2
func ComputeGeoRange(term uint64, shift uint, func ComputeGeoRange(term uint64, shift uint,
sminLon, sminLat, smaxLon, smaxLat float64, sminLon, sminLat, smaxLon, smaxLat float64, checkBoundaries bool,
checkBoundaries bool) ( indexReader index.IndexReader, field string) (
onBoundary [][]byte, notOnBoundary [][]byte) { onBoundary [][]byte, notOnBoundary [][]byte, err error) {
split := term | uint64(0x1)<<shift preallocBytesLen := 32
var upperMax uint64 preallocBytes := make([]byte, preallocBytesLen)
if shift < 63 {
upperMax = term | ((uint64(1) << (shift + 1)) - 1)
} else {
upperMax = 0xffffffffffffffff
}
lowerMax := split - 1
onBoundary, notOnBoundary = relateAndRecurse(term, lowerMax, shift,
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
plusOnBoundary, plusNotOnBoundary := relateAndRecurse(split, upperMax, shift,
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
onBoundary = append(onBoundary, plusOnBoundary...)
notOnBoundary = append(notOnBoundary, plusNotOnBoundary...)
return
}
func relateAndRecurse(start, end uint64, res uint, makePrefixCoded := func(in int64, shift uint) (rv numeric.PrefixCoded) {
sminLon, sminLat, smaxLon, smaxLat float64, if len(preallocBytes) <= 0 {
checkBoundaries bool) ( preallocBytesLen = preallocBytesLen * 2
onBoundary [][]byte, notOnBoundary [][]byte) { preallocBytes = make([]byte, preallocBytesLen)
}
rv, preallocBytes, err =
numeric.NewPrefixCodedInt64Prealloc(in, shift, preallocBytes)
return rv
}
var fieldDict index.FieldDictContains
var isIndexed filterFunc
if irr, ok := indexReader.(index.IndexReaderContains); ok {
fieldDict, err = irr.FieldDictContains(field)
if err != nil {
return nil, nil, err
}
isIndexed = func(term []byte) bool {
found, err := fieldDict.Contains(term)
return err == nil && found
}
}
defer func() {
if fieldDict != nil {
if fd, ok := fieldDict.(index.FieldDict); ok {
cerr := fd.Close()
if cerr != nil {
err = cerr
}
}
}
}()
if isIndexed == nil {
isIndexed = func(term []byte) bool {
if indexReader != nil {
reader, err := indexReader.TermFieldReader(term, field, false, false, false)
if err != nil || reader == nil {
return false
}
if reader.Count() == 0 {
_ = reader.Close()
return false
}
_ = reader.Close()
}
return true
}
}
var computeGeoRange func(term uint64, shift uint) // declare for recursion
relateAndRecurse := func(start, end uint64, res, level uint) {
minLon := geo.MortonUnhashLon(start) minLon := geo.MortonUnhashLon(start)
minLat := geo.MortonUnhashLat(start) minLat := geo.MortonUnhashLat(start)
maxLon := geo.MortonUnhashLon(end) maxLon := geo.MortonUnhashLon(end)
maxLat := geo.MortonUnhashLat(end) maxLat := geo.MortonUnhashLat(end)
level := ((geo.GeoBits << 1) - res) >> 1
within := res%document.GeoPrecisionStep == 0 && within := res%document.GeoPrecisionStep == 0 &&
geo.RectWithin(minLon, minLat, maxLon, maxLat, geo.RectWithin(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat) sminLon, sminLat, smaxLon, smaxLat)
if within || (level == geoDetailLevel && if within || (level == geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat, geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)) { sminLon, sminLat, smaxLon, smaxLat)) {
codedTerm := makePrefixCoded(int64(start), res)
if isIndexed(codedTerm) {
if !within && checkBoundaries { if !within && checkBoundaries {
return [][]byte{ onBoundary = append(onBoundary, codedTerm)
numeric.MustNewPrefixCodedInt64(int64(start), res), } else {
}, nil notOnBoundary = append(notOnBoundary, codedTerm)
} }
return nil,
[][]byte{
numeric.MustNewPrefixCodedInt64(int64(start), res),
} }
} else if level < geoDetailLevel && } else if level < geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat, geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat) { sminLon, sminLat, smaxLon, smaxLat) {
return ComputeGeoRange(start, res-1, sminLon, sminLat, smaxLon, smaxLat, computeGeoRange(start, res-1)
checkBoundaries)
} }
return nil, nil }
computeGeoRange = func(term uint64, shift uint) {
if err != nil {
return
}
split := term | uint64(0x1)<<shift
var upperMax uint64
if shift < 63 {
upperMax = term | ((uint64(1) << (shift + 1)) - 1)
} else {
upperMax = 0xffffffffffffffff
}
lowerMax := split - 1
level := (GeoBitsShift1 - shift) >> 1
relateAndRecurse(term, lowerMax, shift, level)
relateAndRecurse(split, upperMax, shift, level)
}
computeGeoRange(term, shift)
if err != nil {
return nil, nil, err
}
return onBoundary, notOnBoundary, err
} }
func buildRectFilter(indexReader index.IndexReader, field string, func buildRectFilter(dvReader index.DocValueReader, field string,
minLon, minLat, maxLon, maxLat float64) FilterFunc { minLon, minLat, maxLon, maxLat float64) FilterFunc {
return func(d *search.DocumentMatch) bool { return func(d *search.DocumentMatch) bool {
var lon, lat float64 // check geo matches against all numeric type terms indexed
var lons, lats []float64
var found bool var found bool
err := indexReader.DocumentVisitFieldTerms(d.IndexInternalID, err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
[]string{field}, func(field string, term []byte) {
// only consider the values which are shifted 0 // only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term) prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift() shift, err := prefixCoded.Shift()
@ -158,15 +235,19 @@ func buildRectFilter(indexReader index.IndexReader, field string,
var i64 int64 var i64 int64
i64, err = prefixCoded.Int64() i64, err = prefixCoded.Int64()
if err == nil { if err == nil {
lon = geo.MortonUnhashLon(uint64(i64)) lons = append(lons, geo.MortonUnhashLon(uint64(i64)))
lat = geo.MortonUnhashLat(uint64(i64)) lats = append(lats, geo.MortonUnhashLat(uint64(i64)))
found = true found = true
} }
} }
}) })
if err == nil && found { if err == nil && found {
return geo.BoundingBoxContains(lon, lat, for i := range lons {
minLon, minLat, maxLon, maxLat) if geo.BoundingBoxContains(lons[i], lats[i],
minLon, minLat, maxLon, maxLat) {
return true
}
}
} }
return false return false
} }

View File

@ -34,14 +34,19 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
// build a searcher for the box // build a searcher for the box
boxSearcher, err := boxSearcher(indexReader, boxSearcher, err := boxSearcher(indexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, topLeftLon, topLeftLat, bottomRightLon, bottomRightLat,
field, boost, options) field, boost, options, false)
if err != nil {
return nil, err
}
dvReader, err := indexReader.DocValueReader([]string{field})
if err != nil { if err != nil {
return nil, err return nil, err
} }
// wrap it in a filtering searcher which checks the actual distance // wrap it in a filtering searcher which checks the actual distance
return NewFilteringSearcher(boxSearcher, return NewFilteringSearcher(boxSearcher,
buildDistFilter(indexReader, field, centerLon, centerLat, dist)), nil buildDistFilter(dvReader, field, centerLon, centerLat, dist)), nil
} }
// boxSearcher builds a searcher for the described bounding box // boxSearcher builds a searcher for the described bounding box
@ -49,19 +54,20 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
// two boxes joined through a disjunction searcher // two boxes joined through a disjunction searcher
func boxSearcher(indexReader index.IndexReader, func boxSearcher(indexReader index.IndexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64, topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64,
field string, boost float64, options search.SearcherOptions) ( field string, boost float64, options search.SearcherOptions, checkBoundaries bool) (
search.Searcher, error) { search.Searcher, error) {
if bottomRightLon < topLeftLon { if bottomRightLon < topLeftLon {
// cross date line, rewrite as two parts // cross date line, rewrite as two parts
leftSearcher, err := NewGeoBoundingBoxSearcher(indexReader, leftSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
-180, bottomRightLat, bottomRightLon, topLeftLat, -180, bottomRightLat, bottomRightLon, topLeftLat,
field, boost, options, false) field, boost, options, checkBoundaries)
if err != nil { if err != nil {
return nil, err return nil, err
} }
rightSearcher, err := NewGeoBoundingBoxSearcher(indexReader, rightSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options, false) topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options,
checkBoundaries)
if err != nil { if err != nil {
_ = leftSearcher.Close() _ = leftSearcher.Close()
return nil, err return nil, err
@ -77,41 +83,44 @@ func boxSearcher(indexReader index.IndexReader,
return boxSearcher, nil return boxSearcher, nil
} }
// build geoboundinggox searcher for that bounding box // build geoboundingbox searcher for that bounding box
boxSearcher, err := NewGeoBoundingBoxSearcher(indexReader, boxSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost, topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost,
options, false) options, checkBoundaries)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return boxSearcher, nil return boxSearcher, nil
} }
func buildDistFilter(indexReader index.IndexReader, field string, func buildDistFilter(dvReader index.DocValueReader, field string,
centerLon, centerLat, maxDist float64) FilterFunc { centerLon, centerLat, maxDist float64) FilterFunc {
return func(d *search.DocumentMatch) bool { return func(d *search.DocumentMatch) bool {
var lon, lat float64 // check geo matches against all numeric type terms indexed
var lons, lats []float64
var found bool var found bool
err := indexReader.DocumentVisitFieldTerms(d.IndexInternalID,
[]string{field}, func(field string, term []byte) { err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
// only consider the values which are shifted 0 // only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term) prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift() shift, err := prefixCoded.Shift()
if err == nil && shift == 0 { if err == nil && shift == 0 {
i64, err := prefixCoded.Int64() i64, err := prefixCoded.Int64()
if err == nil { if err == nil {
lon = geo.MortonUnhashLon(uint64(i64)) lons = append(lons, geo.MortonUnhashLon(uint64(i64)))
lat = geo.MortonUnhashLat(uint64(i64)) lats = append(lats, geo.MortonUnhashLat(uint64(i64)))
found = true found = true
} }
} }
}) })
if err == nil && found { if err == nil && found {
dist := geo.Haversin(lon, lat, centerLon, centerLat) for i := range lons {
dist := geo.Haversin(lons[i], lats[i], centerLon, centerLat)
if dist <= maxDist/1000 { if dist <= maxDist/1000 {
return true return true
} }
} }
}
return false return false
} }
} }

View File

@ -0,0 +1,126 @@
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"fmt"
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
"math"
)
func NewGeoBoundedPolygonSearcher(indexReader index.IndexReader,
polygon []geo.Point, field string, boost float64,
options search.SearcherOptions) (search.Searcher, error) {
if len(polygon) < 3 {
return nil, fmt.Errorf("Too few points specified for the polygon boundary")
}
// compute the bounding box enclosing the polygon
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, err :=
geo.BoundingRectangleForPolygon(polygon)
if err != nil {
return nil, err
}
// build a searcher for the bounding box on the polygon
boxSearcher, err := boxSearcher(indexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat,
field, boost, options, true)
if err != nil {
return nil, err
}
dvReader, err := indexReader.DocValueReader([]string{field})
if err != nil {
return nil, err
}
// wrap it in a filtering searcher that checks for the polygon inclusivity
return NewFilteringSearcher(boxSearcher,
buildPolygonFilter(dvReader, field, polygon)), nil
}
const float64EqualityThreshold = 1e-6
func almostEqual(a, b float64) bool {
return math.Abs(a-b) <= float64EqualityThreshold
}
// buildPolygonFilter returns true if the point lies inside the
// polygon. It is based on the ray-casting technique as referred
// here: https://wrf.ecse.rpi.edu/nikola/pubdetails/pnpoly.html
func buildPolygonFilter(dvReader index.DocValueReader, field string,
polygon []geo.Point) FilterFunc {
return func(d *search.DocumentMatch) bool {
// check geo matches against all numeric type terms indexed
var lons, lats []float64
var found bool
err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
lons = append(lons, geo.MortonUnhashLon(uint64(i64)))
lats = append(lats, geo.MortonUnhashLat(uint64(i64)))
found = true
}
}
})
// Note: this approach works for points which are strictly inside
// the polygon. ie it might fail for certain points on the polygon boundaries.
if err == nil && found {
nVertices := len(polygon)
if len(polygon) < 3 {
return false
}
rayIntersectsSegment := func(point, a, b geo.Point) bool {
return (a.Lat > point.Lat) != (b.Lat > point.Lat) &&
point.Lon < (b.Lon-a.Lon)*(point.Lat-a.Lat)/(b.Lat-a.Lat)+a.Lon
}
for i := range lons {
pt := geo.Point{Lon: lons[i], Lat: lats[i]}
inside := rayIntersectsSegment(pt, polygon[len(polygon)-1], polygon[0])
// check for a direct vertex match
if almostEqual(polygon[0].Lat, lats[i]) &&
almostEqual(polygon[0].Lon, lons[i]) {
return true
}
for j := 1; j < nVertices; j++ {
if almostEqual(polygon[j].Lat, lats[i]) &&
almostEqual(polygon[j].Lon, lons[i]) {
return true
}
if rayIntersectsSegment(pt, polygon[j-1], polygon[j]) {
inside = !inside
}
}
if inside {
return true
}
}
}
return false
}
}

View File

@ -22,6 +22,10 @@ import (
func NewMultiTermSearcher(indexReader index.IndexReader, terms []string, func NewMultiTermSearcher(indexReader index.IndexReader, terms []string,
field string, boost float64, options search.SearcherOptions, limit bool) ( field string, boost float64, options search.SearcherOptions, limit bool) (
search.Searcher, error) { search.Searcher, error) {
if limit && tooManyClauses(len(terms)) {
return nil, tooManyClausesErr(len(terms))
}
qsearchers := make([]search.Searcher, len(terms)) qsearchers := make([]search.Searcher, len(terms))
qsearchersClose := func() { qsearchersClose := func() {
for _, searcher := range qsearchers { for _, searcher := range qsearchers {
@ -46,6 +50,10 @@ func NewMultiTermSearcher(indexReader index.IndexReader, terms []string,
func NewMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte, func NewMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte,
field string, boost float64, options search.SearcherOptions, limit bool) ( field string, boost float64, options search.SearcherOptions, limit bool) (
search.Searcher, error) { search.Searcher, error) {
if limit && tooManyClauses(len(terms)) {
return nil, tooManyClausesErr(len(terms))
}
qsearchers := make([]search.Searcher, len(terms)) qsearchers := make([]search.Searcher, len(terms))
qsearchersClose := func() { qsearchersClose := func() {
for _, searcher := range qsearchers { for _, searcher := range qsearchers {

View File

@ -53,22 +53,51 @@ func NewNumericRangeSearcher(indexReader index.IndexReader,
if !*inclusiveMax && maxInt64 != math.MinInt64 { if !*inclusiveMax && maxInt64 != math.MinInt64 {
maxInt64-- maxInt64--
} }
var fieldDict index.FieldDictContains
var isIndexed filterFunc
var err error
if irr, ok := indexReader.(index.IndexReaderContains); ok {
fieldDict, err = irr.FieldDictContains(field)
if err != nil {
return nil, err
}
isIndexed = func(term []byte) bool {
found, err := fieldDict.Contains(term)
return err == nil && found
}
}
// FIXME hard-coded precision, should match field declaration // FIXME hard-coded precision, should match field declaration
termRanges := splitInt64Range(minInt64, maxInt64, 4) termRanges := splitInt64Range(minInt64, maxInt64, 4)
terms := termRanges.Enumerate() terms := termRanges.Enumerate(isIndexed)
if fieldDict != nil {
if fd, ok := fieldDict.(index.FieldDict); ok {
cerr := fd.Close()
if cerr != nil {
err = cerr
}
}
}
if len(terms) < 1 { if len(terms) < 1 {
// cannot return MatchNoneSearcher because of interaction with // cannot return MatchNoneSearcher because of interaction with
// commit f391b991c20f02681bacd197afc6d8aed444e132 // commit f391b991c20f02681bacd197afc6d8aed444e132
return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options, return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options,
true) true)
} }
var err error
// for upside_down
if isIndexed == nil {
terms, err = filterCandidateTerms(indexReader, terms, field) terms, err = filterCandidateTerms(indexReader, terms, field)
if err != nil { if err != nil {
return nil, err return nil, err
} }
}
if tooManyClauses(len(terms)) { if tooManyClauses(len(terms)) {
return nil, tooManyClausesErr() return nil, tooManyClausesErr(len(terms))
} }
return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options, return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options,
@ -125,11 +154,17 @@ type termRange struct {
endTerm []byte endTerm []byte
} }
func (t *termRange) Enumerate() [][]byte { func (t *termRange) Enumerate(filter filterFunc) [][]byte {
var rv [][]byte var rv [][]byte
next := t.startTerm next := t.startTerm
for bytes.Compare(next, t.endTerm) <= 0 { for bytes.Compare(next, t.endTerm) <= 0 {
if filter != nil {
if filter(next) {
rv = append(rv, next) rv = append(rv, next)
}
} else {
rv = append(rv, next)
}
next = incrementBytes(next) next = incrementBytes(next)
} }
return rv return rv
@ -150,10 +185,10 @@ func incrementBytes(in []byte) []byte {
type termRanges []*termRange type termRanges []*termRange
func (tr termRanges) Enumerate() [][]byte { func (tr termRanges) Enumerate(filter filterFunc) [][]byte {
var rv [][]byte var rv [][]byte
for _, tri := range tr { for _, tri := range tr {
trie := tri.Enumerate() trie := tri.Enumerate(filter)
rv = append(rv, trie...) rv = append(rv, trie...)
} }
return rv return rv

View File

@ -32,7 +32,7 @@ func init() {
} }
type PhraseSearcher struct { type PhraseSearcher struct {
mustSearcher *ConjunctionSearcher mustSearcher search.Searcher
queryNorm float64 queryNorm float64
currMust *search.DocumentMatch currMust *search.DocumentMatch
terms [][]string terms [][]string
@ -210,7 +210,7 @@ func (s *PhraseSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch,
return nil, nil return nil, nil
} }
// checkCurrMustMatch is soley concerned with determining if the DocumentMatch // checkCurrMustMatch is solely concerned with determining if the DocumentMatch
// pointed to by s.currMust (which satisifies the pre-condition searcher) // pointed to by s.currMust (which satisifies the pre-condition searcher)
// also satisfies the phase constraints. if so, it returns a DocumentMatch // also satisfies the phase constraints. if so, it returns a DocumentMatch
// for this document, otherwise nil // for this document, otherwise nil
@ -241,7 +241,7 @@ func (s *PhraseSearcher) checkCurrMustMatch(ctx *search.SearchContext) *search.D
return nil return nil
} }
// checkCurrMustMatchField is soley concerned with determining if one // checkCurrMustMatchField is solely concerned with determining if one
// particular field within the currMust DocumentMatch Locations // particular field within the currMust DocumentMatch Locations
// satisfies the phase constraints (possibly more than once). if so, // satisfies the phase constraints (possibly more than once). if so,
// the matching field term locations are appended to the provided // the matching field term locations are appended to the provided

View File

@ -21,17 +21,22 @@ import (
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
) )
// NewRegexpSearcher creates a searcher which will match documents that // NewRegexpStringSearcher is similar to NewRegexpSearcher, but
// contain terms which match the pattern regexp. The match must be EXACT // additionally optimizes for index readers that handle regexp's.
// matching the entire term. The provided regexp SHOULD NOT start with ^ func NewRegexpStringSearcher(indexReader index.IndexReader, pattern string,
// or end with $ as this can intefere with the implementation. Separately,
// matches will be checked to ensure they match the entire term.
func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp,
field string, boost float64, options search.SearcherOptions) ( field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) { search.Searcher, error) {
var candidateTerms []string ir, ok := indexReader.(index.IndexReaderRegexp)
if ir, ok := indexReader.(index.IndexReaderRegexp); ok { if !ok {
fieldDict, err := ir.FieldDictRegexp(field, []byte(pattern.String())) r, err := regexp.Compile(pattern)
if err != nil {
return nil, err
}
return NewRegexpSearcher(indexReader, r, field, boost, options)
}
fieldDict, err := ir.FieldDictRegexp(field, pattern)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -41,7 +46,8 @@ func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp,
} }
}() }()
// enumerate the terms and check against regexp var candidateTerms []string
tfd, err := fieldDict.Next() tfd, err := fieldDict.Next()
for err == nil && tfd != nil { for err == nil && tfd != nil {
candidateTerms = append(candidateTerms, tfd.Term) candidateTerms = append(candidateTerms, tfd.Term)
@ -50,7 +56,21 @@ func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp,
if err != nil { if err != nil {
return nil, err return nil, err
} }
} else {
return NewMultiTermSearcher(indexReader, candidateTerms, field, boost,
options, true)
}
// NewRegexpSearcher creates a searcher which will match documents that
// contain terms which match the pattern regexp. The match must be EXACT
// matching the entire term. The provided regexp SHOULD NOT start with ^
// or end with $ as this can intefere with the implementation. Separately,
// matches will be checked to ensure they match the entire term.
func NewRegexpSearcher(indexReader index.IndexReader, pattern index.Regexp,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
var candidateTerms []string
prefixTerm, complete := pattern.LiteralPrefix() prefixTerm, complete := pattern.LiteralPrefix()
if complete { if complete {
// there is no pattern // there is no pattern
@ -63,14 +83,13 @@ func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp,
return nil, err return nil, err
} }
} }
}
return NewMultiTermSearcher(indexReader, candidateTerms, field, boost, return NewMultiTermSearcher(indexReader, candidateTerms, field, boost,
options, true) options, true)
} }
func findRegexpCandidateTerms(indexReader index.IndexReader, func findRegexpCandidateTerms(indexReader index.IndexReader,
pattern *regexp.Regexp, field, prefixTerm string) (rv []string, err error) { pattern index.Regexp, field, prefixTerm string) (rv []string, err error) {
rv = make([]string, 0) rv = make([]string, 0)
var fieldDict index.FieldDict var fieldDict index.FieldDict
if len(prefixTerm) > 0 { if len(prefixTerm) > 0 {
@ -91,7 +110,7 @@ func findRegexpCandidateTerms(indexReader index.IndexReader,
if matchPos != nil && matchPos[0] == 0 && matchPos[1] == len(tfd.Term) { if matchPos != nil && matchPos[0] == 0 && matchPos[1] == len(tfd.Term) {
rv = append(rv, tfd.Term) rv = append(rv, tfd.Term)
if tooManyClauses(len(rv)) { if tooManyClauses(len(rv)) {
return rv, tooManyClausesErr() return rv, tooManyClausesErr(len(rv))
} }
} }
tfd, err = fieldDict.Next() tfd, err = fieldDict.Next()

View File

@ -38,28 +38,20 @@ type TermSearcher struct {
} }
func NewTermSearcher(indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) { func NewTermSearcher(indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
reader, err := indexReader.TermFieldReader([]byte(term), field, true, true, options.IncludeTermVectors) return NewTermSearcherBytes(indexReader, []byte(term), field, boost, options)
if err != nil {
return nil, err
}
count, err := indexReader.DocCount()
if err != nil {
_ = reader.Close()
return nil, err
}
scorer := scorer.NewTermQueryScorer([]byte(term), field, boost, count, reader.Count(), options)
return &TermSearcher{
indexReader: indexReader,
reader: reader,
scorer: scorer,
}, nil
} }
func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) { func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
reader, err := indexReader.TermFieldReader(term, field, true, true, options.IncludeTermVectors) needFreqNorm := options.Score != "none"
reader, err := indexReader.TermFieldReader(term, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return newTermSearcherFromReader(indexReader, reader, term, field, boost, options)
}
func newTermSearcherFromReader(indexReader index.IndexReader, reader index.TermFieldReader,
term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
count, err := indexReader.DocCount() count, err := indexReader.DocCount()
if err != nil { if err != nil {
_ = reader.Close() _ = reader.Close()

View File

@ -27,13 +27,24 @@ func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string,
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer func() {
if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr
}
}()
var terms []string var terms []string
tfd, err := fieldDict.Next() tfd, err := fieldDict.Next()
for err == nil && tfd != nil { for err == nil && tfd != nil {
terms = append(terms, tfd.Term) terms = append(terms, tfd.Term)
if tooManyClauses(len(terms)) {
return nil, tooManyClausesErr(len(terms))
}
tfd, err = fieldDict.Next() tfd, err = fieldDict.Next()
} }
if err != nil {
return nil, err
}
return NewMultiTermSearcher(indexReader, terms, field, boost, options, true) return NewMultiTermSearcher(indexReader, terms, field, boost, options, true)
} }

Some files were not shown because too many files have changed in this diff Show More