Update server dependencies
This commit is contained in:
parent
fb8fec38ff
commit
de36fe682a
10
vendor/github.com/RoaringBitmap/roaring/AUTHORS
generated
vendored
Normal file
10
vendor/github.com/RoaringBitmap/roaring/AUTHORS
generated
vendored
Normal file
@ -0,0 +1,10 @@
|
||||
# This is the official list of roaring authors for copyright purposes.
|
||||
|
||||
Todd Gruben (@tgruben),
|
||||
Daniel Lemire (@lemire),
|
||||
Elliot Murphy (@statik),
|
||||
Bob Potter (@bpot),
|
||||
Tyson Maly (@tvmaly),
|
||||
Will Glynn (@willglynn),
|
||||
Brent Pedersen (@brentp)
|
||||
Maciej Biłas (@maciej)
|
12
vendor/github.com/RoaringBitmap/roaring/CONTRIBUTORS
generated
vendored
Normal file
12
vendor/github.com/RoaringBitmap/roaring/CONTRIBUTORS
generated
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
# This is the official list of roaring contributors
|
||||
|
||||
Todd Gruben (@tgruben),
|
||||
Daniel Lemire (@lemire),
|
||||
Elliot Murphy (@statik),
|
||||
Bob Potter (@bpot),
|
||||
Tyson Maly (@tvmaly),
|
||||
Will Glynn (@willglynn),
|
||||
Brent Pedersen (@brentp),
|
||||
Jason E. Aten (@glycerine),
|
||||
Vali Malinoiu (@0x4139),
|
||||
Forud Ghafouri (@fzerorubigd)
|
202
vendor/github.com/RoaringBitmap/roaring/LICENSE
generated
vendored
Normal file
202
vendor/github.com/RoaringBitmap/roaring/LICENSE
generated
vendored
Normal file
@ -0,0 +1,202 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright 2016 by the authors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
202
vendor/github.com/RoaringBitmap/roaring/LICENSE-2.0.txt
generated
vendored
Normal file
202
vendor/github.com/RoaringBitmap/roaring/LICENSE-2.0.txt
generated
vendored
Normal file
@ -0,0 +1,202 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright 2016 by the authors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
121
vendor/github.com/RoaringBitmap/roaring/Makefile
generated
vendored
Normal file
121
vendor/github.com/RoaringBitmap/roaring/Makefile
generated
vendored
Normal file
@ -0,0 +1,121 @@
|
||||
.PHONY: help all test format fmtcheck vet lint qa deps clean nuke rle backrle ser fetch-real-roaring-datasets
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# Display general help about this command
|
||||
help:
|
||||
@echo ""
|
||||
@echo "The following commands are available:"
|
||||
@echo ""
|
||||
@echo " make qa : Run all the tests"
|
||||
@echo " make test : Run the unit tests"
|
||||
@echo ""
|
||||
@echo " make format : Format the source code"
|
||||
@echo " make fmtcheck : Check if the source code has been formatted"
|
||||
@echo " make vet : Check for suspicious constructs"
|
||||
@echo " make lint : Check for style errors"
|
||||
@echo ""
|
||||
@echo " make deps : Get the dependencies"
|
||||
@echo " make clean : Remove any build artifact"
|
||||
@echo " make nuke : Deletes any intermediate file"
|
||||
@echo ""
|
||||
@echo " make fuzz-smat : Fuzzy testing with smat"
|
||||
@echo " make fuzz-stream : Fuzzy testing with stream deserialization"
|
||||
@echo " make fuzz-buffer : Fuzzy testing with buffer deserialization"
|
||||
@echo ""
|
||||
|
||||
# Alias for help target
|
||||
all: help
|
||||
test:
|
||||
go test
|
||||
go test -race -run TestConcurrent*
|
||||
# Format the source code
|
||||
format:
|
||||
@find ./ -type f -name "*.go" -exec gofmt -w {} \;
|
||||
|
||||
# Check if the source code has been formatted
|
||||
fmtcheck:
|
||||
@mkdir -p target
|
||||
@find ./ -type f -name "*.go" -exec gofmt -d {} \; | tee target/format.diff
|
||||
@test ! -s target/format.diff || { echo "ERROR: the source code has not been formatted - please use 'make format' or 'gofmt'"; exit 1; }
|
||||
|
||||
# Check for syntax errors
|
||||
vet:
|
||||
GOPATH=$(GOPATH) go vet ./...
|
||||
|
||||
# Check for style errors
|
||||
lint:
|
||||
GOPATH=$(GOPATH) PATH=$(GOPATH)/bin:$(PATH) golint ./...
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# Alias to run all quality-assurance checks
|
||||
qa: fmtcheck test vet lint
|
||||
|
||||
# --- INSTALL ---
|
||||
|
||||
# Get the dependencies
|
||||
deps:
|
||||
GOPATH=$(GOPATH) go get github.com/smartystreets/goconvey/convey
|
||||
GOPATH=$(GOPATH) go get github.com/willf/bitset
|
||||
GOPATH=$(GOPATH) go get github.com/golang/lint/golint
|
||||
GOPATH=$(GOPATH) go get github.com/mschoch/smat
|
||||
GOPATH=$(GOPATH) go get github.com/dvyukov/go-fuzz/go-fuzz
|
||||
GOPATH=$(GOPATH) go get github.com/dvyukov/go-fuzz/go-fuzz-build
|
||||
GOPATH=$(GOPATH) go get github.com/glycerine/go-unsnap-stream
|
||||
GOPATH=$(GOPATH) go get github.com/philhofer/fwd
|
||||
GOPATH=$(GOPATH) go get github.com/jtolds/gls
|
||||
|
||||
fuzz-smat:
|
||||
go test -tags=gofuzz -run=TestGenerateSmatCorpus
|
||||
go-fuzz-build -func FuzzSmat github.com/RoaringBitmap/roaring
|
||||
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
|
||||
|
||||
|
||||
fuzz-stream:
|
||||
go-fuzz-build -func FuzzSerializationStream github.com/RoaringBitmap/roaring
|
||||
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
|
||||
|
||||
|
||||
fuzz-buffer:
|
||||
go-fuzz-build -func FuzzSerializationBuffer github.com/RoaringBitmap/roaring
|
||||
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
|
||||
|
||||
# Remove any build artifact
|
||||
clean:
|
||||
GOPATH=$(GOPATH) go clean ./...
|
||||
|
||||
# Deletes any intermediate file
|
||||
nuke:
|
||||
rm -rf ./target
|
||||
GOPATH=$(GOPATH) go clean -i ./...
|
||||
|
||||
rle:
|
||||
cp rle.go rle16.go
|
||||
perl -pi -e 's/32/16/g' rle16.go
|
||||
cp rle_test.go rle16_test.go
|
||||
perl -pi -e 's/32/16/g' rle16_test.go
|
||||
|
||||
backrle:
|
||||
cp rle16.go rle.go
|
||||
perl -pi -e 's/16/32/g' rle.go
|
||||
perl -pi -e 's/2032/2016/g' rle.go
|
||||
|
||||
ser: rle
|
||||
go generate
|
||||
|
||||
cover:
|
||||
go test -coverprofile=coverage.out
|
||||
go tool cover -html=coverage.out
|
||||
|
||||
fetch-real-roaring-datasets:
|
||||
# pull github.com/RoaringBitmap/real-roaring-datasets -> testdata/real-roaring-datasets
|
||||
git submodule init
|
||||
git submodule update
|
246
vendor/github.com/RoaringBitmap/roaring/README.md
generated
vendored
Normal file
246
vendor/github.com/RoaringBitmap/roaring/README.md
generated
vendored
Normal file
@ -0,0 +1,246 @@
|
||||
roaring [](https://travis-ci.org/RoaringBitmap/roaring) [](https://coveralls.io/github/RoaringBitmap/roaring?branch=master) [](https://godoc.org/github.com/RoaringBitmap/roaring) [](https://goreportcard.com/report/github.com/RoaringBitmap/roaring)
|
||||
=============
|
||||
|
||||
This is a go version of the Roaring bitmap data structure.
|
||||
|
||||
|
||||
|
||||
Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and
|
||||
[Elasticsearch][elasticsearch], [Metamarkets' Druid][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin].
|
||||
|
||||
[lucene]: https://lucene.apache.org/
|
||||
[solr]: https://lucene.apache.org/solr/
|
||||
[elasticsearch]: https://www.elastic.co/products/elasticsearch
|
||||
[druid]: http://druid.io/
|
||||
[spark]: https://spark.apache.org/
|
||||
[opensearchserver]: http://www.opensearchserver.com
|
||||
[cloudtorrent]: https://github.com/jpillora/cloud-torrent
|
||||
[whoosh]: https://bitbucket.org/mchaput/whoosh/wiki/Home
|
||||
[pilosa]: https://www.pilosa.com/
|
||||
[kylin]: http://kylin.apache.org/
|
||||
[pinot]: http://github.com/linkedin/pinot/wiki
|
||||
[vsts]: https://www.visualstudio.com/team-services/
|
||||
[atlas]: https://github.com/Netflix/atlas
|
||||
|
||||
Roaring bitmaps are found to work well in many important applications:
|
||||
|
||||
> Use Roaring for bitmap compression whenever possible. Do not use other bitmap compression methods ([Wang et al., SIGMOD 2017](http://db.ucsd.edu/wp-content/uploads/2017/03/sidm338-wangA.pdf))
|
||||
|
||||
|
||||
The ``roaring`` Go library is used by
|
||||
* [Cloud Torrent](https://github.com/jpillora/cloud-torrent): a self-hosted remote torrent client
|
||||
* [runv](https://github.com/hyperhq/runv): an Hypervisor-based runtime for the Open Containers Initiative
|
||||
* [InfluxDB](https://www.influxdata.com)
|
||||
* [Pilosa](https://www.pilosa.com/)
|
||||
* [Bleve](http://www.blevesearch.com)
|
||||
|
||||
This library is used in production in several systems, it is part of the [Awesome Go collection](https://awesome-go.com).
|
||||
|
||||
|
||||
There are also [Java](https://github.com/RoaringBitmap/RoaringBitmap) and [C/C++](https://github.com/RoaringBitmap/CRoaring) versions. The Java, C, C++ and Go version are binary compatible: e.g, you can save bitmaps
|
||||
from a Java program and load them back in Go, and vice versa. We have a [format specification](https://github.com/RoaringBitmap/RoaringFormatSpec).
|
||||
|
||||
|
||||
This code is licensed under Apache License, Version 2.0 (ASL2.0).
|
||||
|
||||
Copyright 2016-... by the authors.
|
||||
|
||||
|
||||
### References
|
||||
|
||||
- Daniel Lemire, Owen Kaser, Nathan Kurz, Luca Deri, Chris O'Hara, François Saint-Jacques, Gregory Ssi-Yan-Kai, Roaring Bitmaps: Implementation of an Optimized Software Library, Software: Practice and Experience 48 (4), 2018 [arXiv:1709.07821](https://arxiv.org/abs/1709.07821)
|
||||
- Samy Chambi, Daniel Lemire, Owen Kaser, Robert Godin,
|
||||
Better bitmap performance with Roaring bitmaps,
|
||||
Software: Practice and Experience 46 (5), 2016.
|
||||
http://arxiv.org/abs/1402.6407 This paper used data from http://lemire.me/data/realroaring2014.html
|
||||
- Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience 46 (11), 2016. http://arxiv.org/abs/1603.06549
|
||||
|
||||
|
||||
### Dependencies
|
||||
|
||||
Dependencies are fetched automatically by giving the `-t` flag to `go get`.
|
||||
|
||||
they include
|
||||
- github.com/smartystreets/goconvey/convey
|
||||
- github.com/willf/bitset
|
||||
- github.com/mschoch/smat
|
||||
- github.com/glycerine/go-unsnap-stream
|
||||
- github.com/philhofer/fwd
|
||||
- github.com/jtolds/gls
|
||||
|
||||
Note that the smat library requires Go 1.6 or better.
|
||||
|
||||
#### Installation
|
||||
|
||||
- go get -t github.com/RoaringBitmap/roaring
|
||||
|
||||
|
||||
### Example
|
||||
|
||||
Here is a simplified but complete example:
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"bytes"
|
||||
)
|
||||
|
||||
|
||||
func main() {
|
||||
// example inspired by https://github.com/fzandona/goroar
|
||||
fmt.Println("==roaring==")
|
||||
rb1 := roaring.BitmapOf(1, 2, 3, 4, 5, 100, 1000)
|
||||
fmt.Println(rb1.String())
|
||||
|
||||
rb2 := roaring.BitmapOf(3, 4, 1000)
|
||||
fmt.Println(rb2.String())
|
||||
|
||||
rb3 := roaring.New()
|
||||
fmt.Println(rb3.String())
|
||||
|
||||
fmt.Println("Cardinality: ", rb1.GetCardinality())
|
||||
|
||||
fmt.Println("Contains 3? ", rb1.Contains(3))
|
||||
|
||||
rb1.And(rb2)
|
||||
|
||||
rb3.Add(1)
|
||||
rb3.Add(5)
|
||||
|
||||
rb3.Or(rb1)
|
||||
|
||||
// computes union of the three bitmaps in parallel using 4 workers
|
||||
roaring.ParOr(4, rb1, rb2, rb3)
|
||||
// computes intersection of the three bitmaps in parallel using 4 workers
|
||||
roaring.ParAnd(4, rb1, rb2, rb3)
|
||||
|
||||
|
||||
// prints 1, 3, 4, 5, 1000
|
||||
i := rb3.Iterator()
|
||||
for i.HasNext() {
|
||||
fmt.Println(i.Next())
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
// next we include an example of serialization
|
||||
buf := new(bytes.Buffer)
|
||||
rb1.WriteTo(buf) // we omit error handling
|
||||
newrb:= roaring.New()
|
||||
newrb.ReadFrom(buf)
|
||||
if rb1.Equals(newrb) {
|
||||
fmt.Println("I wrote the content to a byte stream and read it back.")
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
If you wish to use serialization and handle errors, you might want to
|
||||
consider the following sample of code:
|
||||
|
||||
```go
|
||||
rb := BitmapOf(1, 2, 3, 4, 5, 100, 1000)
|
||||
buf := new(bytes.Buffer)
|
||||
size,err:=rb.WriteTo(buf)
|
||||
if err != nil {
|
||||
t.Errorf("Failed writing")
|
||||
}
|
||||
newrb:= New()
|
||||
size,err=newrb.ReadFrom(buf)
|
||||
if err != nil {
|
||||
t.Errorf("Failed reading")
|
||||
}
|
||||
if ! rb.Equals(newrb) {
|
||||
t.Errorf("Cannot retrieve serialized version")
|
||||
}
|
||||
```
|
||||
|
||||
Given N integers in [0,x), then the serialized size in bytes of
|
||||
a Roaring bitmap should never exceed this bound:
|
||||
|
||||
`` 8 + 9 * ((long)x+65535)/65536 + 2 * N ``
|
||||
|
||||
That is, given a fixed overhead for the universe size (x), Roaring
|
||||
bitmaps never use more than 2 bytes per integer. You can call
|
||||
``BoundSerializedSizeInBytes`` for a more precise estimate.
|
||||
|
||||
|
||||
### Documentation
|
||||
|
||||
Current documentation is available at http://godoc.org/github.com/RoaringBitmap/roaring
|
||||
|
||||
### Goroutine safety
|
||||
|
||||
In general, it should not generally be considered safe to access
|
||||
the same bitmaps using different goroutines--they are left
|
||||
unsynchronized for performance. Should you want to access
|
||||
a Bitmap from more than one goroutine, you should
|
||||
provide synchronization. Typically this is done by using channels to pass
|
||||
the *Bitmap around (in Go style; so there is only ever one owner),
|
||||
or by using `sync.Mutex` to serialize operations on Bitmaps.
|
||||
|
||||
### Coverage
|
||||
|
||||
We test our software. For a report on our test coverage, see
|
||||
|
||||
https://coveralls.io/github/RoaringBitmap/roaring?branch=master
|
||||
|
||||
### Benchmark
|
||||
|
||||
Type
|
||||
|
||||
go test -bench Benchmark -run -
|
||||
|
||||
To run benchmarks on [Real Roaring Datasets](https://github.com/RoaringBitmap/real-roaring-datasets)
|
||||
run the following:
|
||||
|
||||
```sh
|
||||
go get github.com/RoaringBitmap/real-roaring-datasets
|
||||
BENCH_REAL_DATA=1 go test -bench BenchmarkRealData -run -
|
||||
```
|
||||
|
||||
### Iterative use
|
||||
|
||||
You can use roaring with gore:
|
||||
|
||||
- go get -u github.com/motemen/gore
|
||||
- Make sure that ``$GOPATH/bin`` is in your ``$PATH``.
|
||||
- go get github/RoaringBitmap/roaring
|
||||
|
||||
```go
|
||||
$ gore
|
||||
gore version 0.2.6 :help for help
|
||||
gore> :import github.com/RoaringBitmap/roaring
|
||||
gore> x:=roaring.New()
|
||||
gore> x.Add(1)
|
||||
gore> x.String()
|
||||
"{1}"
|
||||
```
|
||||
|
||||
|
||||
### Fuzzy testing
|
||||
|
||||
You can help us test further the library with fuzzy testing:
|
||||
|
||||
go get github.com/dvyukov/go-fuzz/go-fuzz
|
||||
go get github.com/dvyukov/go-fuzz/go-fuzz-build
|
||||
go test -tags=gofuzz -run=TestGenerateSmatCorpus
|
||||
go-fuzz-build github.com/RoaringBitmap/roaring
|
||||
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
|
||||
|
||||
Let it run, and if the # of crashers is > 0, check out the reports in
|
||||
the workdir where you should be able to find the panic goroutine stack
|
||||
traces.
|
||||
|
||||
### Alternative in Go
|
||||
|
||||
There is a Go version wrapping the C/C++ implementation https://github.com/RoaringBitmap/gocroaring
|
||||
|
||||
For an alternative implementation in Go, see https://github.com/fzandona/goroar
|
||||
The two versions were written independently.
|
||||
|
||||
|
||||
### Mailing list/discussion group
|
||||
|
||||
https://groups.google.com/forum/#!forum/roaring-bitmaps
|
960
vendor/github.com/RoaringBitmap/roaring/arraycontainer.go
generated
vendored
Normal file
960
vendor/github.com/RoaringBitmap/roaring/arraycontainer.go
generated
vendored
Normal file
@ -0,0 +1,960 @@
|
||||
package roaring
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
//go:generate msgp -unexported
|
||||
|
||||
type arrayContainer struct {
|
||||
content []uint16
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) String() string {
|
||||
s := "{"
|
||||
for it := ac.getShortIterator(); it.hasNext(); {
|
||||
s += fmt.Sprintf("%v, ", it.next())
|
||||
}
|
||||
return s + "}"
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) fillLeastSignificant16bits(x []uint32, i int, mask uint32) {
|
||||
for k := 0; k < len(ac.content); k++ {
|
||||
x[k+i] = uint32(ac.content[k]) | mask
|
||||
}
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) getShortIterator() shortIterable {
|
||||
return &shortIterator{ac.content, 0}
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) getManyIterator() manyIterable {
|
||||
return &manyIterator{ac.content, 0}
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) minimum() uint16 {
|
||||
return ac.content[0] // assume not empty
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) maximum() uint16 {
|
||||
return ac.content[len(ac.content)-1] // assume not empty
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) getSizeInBytes() int {
|
||||
return ac.getCardinality() * 2
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) serializedSizeInBytes() int {
|
||||
return ac.getCardinality() * 2
|
||||
}
|
||||
|
||||
func arrayContainerSizeInBytes(card int) int {
|
||||
return card * 2
|
||||
}
|
||||
|
||||
// add the values in the range [firstOfRange,endx)
|
||||
func (ac *arrayContainer) iaddRange(firstOfRange, endx int) container {
|
||||
if firstOfRange >= endx {
|
||||
return ac
|
||||
}
|
||||
indexstart := binarySearch(ac.content, uint16(firstOfRange))
|
||||
if indexstart < 0 {
|
||||
indexstart = -indexstart - 1
|
||||
}
|
||||
indexend := binarySearch(ac.content, uint16(endx-1))
|
||||
if indexend < 0 {
|
||||
indexend = -indexend - 1
|
||||
} else {
|
||||
indexend++
|
||||
}
|
||||
rangelength := endx - firstOfRange
|
||||
newcardinality := indexstart + (ac.getCardinality() - indexend) + rangelength
|
||||
if newcardinality > arrayDefaultMaxSize {
|
||||
a := ac.toBitmapContainer()
|
||||
return a.iaddRange(firstOfRange, endx)
|
||||
}
|
||||
if cap(ac.content) < newcardinality {
|
||||
tmp := make([]uint16, newcardinality, newcardinality)
|
||||
copy(tmp[:indexstart], ac.content[:indexstart])
|
||||
copy(tmp[indexstart+rangelength:], ac.content[indexend:])
|
||||
|
||||
ac.content = tmp
|
||||
} else {
|
||||
ac.content = ac.content[:newcardinality]
|
||||
copy(ac.content[indexstart+rangelength:], ac.content[indexend:])
|
||||
|
||||
}
|
||||
for k := 0; k < rangelength; k++ {
|
||||
ac.content[k+indexstart] = uint16(firstOfRange + k)
|
||||
}
|
||||
return ac
|
||||
}
|
||||
|
||||
// remove the values in the range [firstOfRange,endx)
|
||||
func (ac *arrayContainer) iremoveRange(firstOfRange, endx int) container {
|
||||
if firstOfRange >= endx {
|
||||
return ac
|
||||
}
|
||||
indexstart := binarySearch(ac.content, uint16(firstOfRange))
|
||||
if indexstart < 0 {
|
||||
indexstart = -indexstart - 1
|
||||
}
|
||||
indexend := binarySearch(ac.content, uint16(endx-1))
|
||||
if indexend < 0 {
|
||||
indexend = -indexend - 1
|
||||
} else {
|
||||
indexend++
|
||||
}
|
||||
rangelength := indexend - indexstart
|
||||
answer := ac
|
||||
copy(answer.content[indexstart:], ac.content[indexstart+rangelength:])
|
||||
answer.content = answer.content[:ac.getCardinality()-rangelength]
|
||||
return answer
|
||||
}
|
||||
|
||||
// flip the values in the range [firstOfRange,endx)
|
||||
func (ac *arrayContainer) not(firstOfRange, endx int) container {
|
||||
if firstOfRange >= endx {
|
||||
//p("arrayContainer.not(): exiting early with ac.clone()")
|
||||
return ac.clone()
|
||||
}
|
||||
return ac.notClose(firstOfRange, endx-1) // remove everything in [firstOfRange,endx-1]
|
||||
}
|
||||
|
||||
// flip the values in the range [firstOfRange,lastOfRange]
|
||||
func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
|
||||
if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange]
|
||||
//p("arrayContainer.notClose(): exiting early with ac.clone()")
|
||||
return ac.clone()
|
||||
}
|
||||
|
||||
// determine the span of array indices to be affected^M
|
||||
startIndex := binarySearch(ac.content, uint16(firstOfRange))
|
||||
//p("startIndex=%v", startIndex)
|
||||
if startIndex < 0 {
|
||||
startIndex = -startIndex - 1
|
||||
}
|
||||
lastIndex := binarySearch(ac.content, uint16(lastOfRange))
|
||||
//p("lastIndex=%v", lastIndex)
|
||||
if lastIndex < 0 {
|
||||
lastIndex = -lastIndex - 2
|
||||
}
|
||||
currentValuesInRange := lastIndex - startIndex + 1
|
||||
spanToBeFlipped := lastOfRange - firstOfRange + 1
|
||||
newValuesInRange := spanToBeFlipped - currentValuesInRange
|
||||
cardinalityChange := newValuesInRange - currentValuesInRange
|
||||
newCardinality := len(ac.content) + cardinalityChange
|
||||
//p("new card is %v", newCardinality)
|
||||
if newCardinality > arrayDefaultMaxSize {
|
||||
//p("new card over arrayDefaultMaxSize, so returning bitmap")
|
||||
return ac.toBitmapContainer().not(firstOfRange, lastOfRange+1)
|
||||
}
|
||||
answer := newArrayContainer()
|
||||
answer.content = make([]uint16, newCardinality, newCardinality) //a hack for sure
|
||||
|
||||
copy(answer.content, ac.content[:startIndex])
|
||||
outPos := startIndex
|
||||
inPos := startIndex
|
||||
valInRange := firstOfRange
|
||||
for ; valInRange <= lastOfRange && inPos <= lastIndex; valInRange++ {
|
||||
if uint16(valInRange) != ac.content[inPos] {
|
||||
answer.content[outPos] = uint16(valInRange)
|
||||
outPos++
|
||||
} else {
|
||||
inPos++
|
||||
}
|
||||
}
|
||||
|
||||
for ; valInRange <= lastOfRange; valInRange++ {
|
||||
answer.content[outPos] = uint16(valInRange)
|
||||
outPos++
|
||||
}
|
||||
|
||||
for i := lastIndex + 1; i < len(ac.content); i++ {
|
||||
answer.content[outPos] = ac.content[i]
|
||||
outPos++
|
||||
}
|
||||
answer.content = answer.content[:newCardinality]
|
||||
return answer
|
||||
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) equals(o container) bool {
|
||||
|
||||
srb, ok := o.(*arrayContainer)
|
||||
if ok {
|
||||
// Check if the containers are the same object.
|
||||
if ac == srb {
|
||||
return true
|
||||
}
|
||||
|
||||
if len(srb.content) != len(ac.content) {
|
||||
return false
|
||||
}
|
||||
|
||||
for i, v := range ac.content {
|
||||
if v != srb.content[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// use generic comparison
|
||||
bCard := o.getCardinality()
|
||||
aCard := ac.getCardinality()
|
||||
if bCard != aCard {
|
||||
return false
|
||||
}
|
||||
|
||||
ait := ac.getShortIterator()
|
||||
bit := o.getShortIterator()
|
||||
for ait.hasNext() {
|
||||
if bit.next() != ait.next() {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) toBitmapContainer() *bitmapContainer {
|
||||
bc := newBitmapContainer()
|
||||
bc.loadData(ac)
|
||||
return bc
|
||||
|
||||
}
|
||||
func (ac *arrayContainer) iadd(x uint16) (wasNew bool) {
|
||||
// Special case adding to the end of the container.
|
||||
l := len(ac.content)
|
||||
if l > 0 && l < arrayDefaultMaxSize && ac.content[l-1] < x {
|
||||
ac.content = append(ac.content, x)
|
||||
return true
|
||||
}
|
||||
|
||||
loc := binarySearch(ac.content, x)
|
||||
|
||||
if loc < 0 {
|
||||
s := ac.content
|
||||
i := -loc - 1
|
||||
s = append(s, 0)
|
||||
copy(s[i+1:], s[i:])
|
||||
s[i] = x
|
||||
ac.content = s
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) iaddReturnMinimized(x uint16) container {
|
||||
// Special case adding to the end of the container.
|
||||
l := len(ac.content)
|
||||
if l > 0 && l < arrayDefaultMaxSize && ac.content[l-1] < x {
|
||||
ac.content = append(ac.content, x)
|
||||
return ac
|
||||
}
|
||||
|
||||
loc := binarySearch(ac.content, x)
|
||||
|
||||
if loc < 0 {
|
||||
if len(ac.content) >= arrayDefaultMaxSize {
|
||||
a := ac.toBitmapContainer()
|
||||
a.iadd(x)
|
||||
return a
|
||||
}
|
||||
s := ac.content
|
||||
i := -loc - 1
|
||||
s = append(s, 0)
|
||||
copy(s[i+1:], s[i:])
|
||||
s[i] = x
|
||||
ac.content = s
|
||||
}
|
||||
return ac
|
||||
}
|
||||
|
||||
// iremoveReturnMinimized is allowed to change the return type to minimize storage.
|
||||
func (ac *arrayContainer) iremoveReturnMinimized(x uint16) container {
|
||||
ac.iremove(x)
|
||||
return ac
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) iremove(x uint16) bool {
|
||||
loc := binarySearch(ac.content, x)
|
||||
if loc >= 0 {
|
||||
s := ac.content
|
||||
s = append(s[:loc], s[loc+1:]...)
|
||||
ac.content = s
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) remove(x uint16) container {
|
||||
out := &arrayContainer{make([]uint16, len(ac.content))}
|
||||
copy(out.content, ac.content[:])
|
||||
|
||||
loc := binarySearch(out.content, x)
|
||||
if loc >= 0 {
|
||||
s := out.content
|
||||
s = append(s[:loc], s[loc+1:]...)
|
||||
out.content = s
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) or(a container) container {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return ac.orArray(x)
|
||||
case *bitmapContainer:
|
||||
return x.orArray(ac)
|
||||
case *runContainer16:
|
||||
if x.isFull() {
|
||||
return x.clone()
|
||||
}
|
||||
return x.orArray(ac)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) orCardinality(a container) int {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return ac.orArrayCardinality(x)
|
||||
case *bitmapContainer:
|
||||
return x.orArrayCardinality(ac)
|
||||
case *runContainer16:
|
||||
return x.orArrayCardinality(ac)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) ior(a container) container {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return ac.iorArray(x)
|
||||
case *bitmapContainer:
|
||||
return a.(*bitmapContainer).orArray(ac)
|
||||
//return ac.iorBitmap(x) // note: this does not make sense
|
||||
case *runContainer16:
|
||||
if x.isFull() {
|
||||
return x.clone()
|
||||
}
|
||||
return ac.iorRun16(x)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) iorArray(value2 *arrayContainer) container {
|
||||
value1 := ac
|
||||
len1 := value1.getCardinality()
|
||||
len2 := value2.getCardinality()
|
||||
maxPossibleCardinality := len1 + len2
|
||||
if maxPossibleCardinality > arrayDefaultMaxSize { // it could be a bitmap!
|
||||
bc := newBitmapContainer()
|
||||
for k := 0; k < len(value2.content); k++ {
|
||||
v := value2.content[k]
|
||||
i := uint(v) >> 6
|
||||
mask := uint64(1) << (v % 64)
|
||||
bc.bitmap[i] |= mask
|
||||
}
|
||||
for k := 0; k < len(ac.content); k++ {
|
||||
v := ac.content[k]
|
||||
i := uint(v) >> 6
|
||||
mask := uint64(1) << (v % 64)
|
||||
bc.bitmap[i] |= mask
|
||||
}
|
||||
bc.cardinality = int(popcntSlice(bc.bitmap))
|
||||
if bc.cardinality <= arrayDefaultMaxSize {
|
||||
return bc.toArrayContainer()
|
||||
}
|
||||
return bc
|
||||
}
|
||||
if maxPossibleCardinality > cap(value1.content) {
|
||||
newcontent := make([]uint16, 0, maxPossibleCardinality)
|
||||
copy(newcontent[len2:maxPossibleCardinality], ac.content[0:len1])
|
||||
ac.content = newcontent
|
||||
} else {
|
||||
copy(ac.content[len2:maxPossibleCardinality], ac.content[0:len1])
|
||||
}
|
||||
nl := union2by2(value1.content[len2:maxPossibleCardinality], value2.content, ac.content)
|
||||
ac.content = ac.content[:nl] // reslice to match actual used capacity
|
||||
return ac
|
||||
}
|
||||
|
||||
// Note: such code does not make practical sense, except for lazy evaluations
|
||||
func (ac *arrayContainer) iorBitmap(bc2 *bitmapContainer) container {
|
||||
bc1 := ac.toBitmapContainer()
|
||||
bc1.iorBitmap(bc2)
|
||||
*ac = *newArrayContainerFromBitmap(bc1)
|
||||
return ac
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) iorRun16(rc *runContainer16) container {
|
||||
bc1 := ac.toBitmapContainer()
|
||||
bc2 := rc.toBitmapContainer()
|
||||
bc1.iorBitmap(bc2)
|
||||
*ac = *newArrayContainerFromBitmap(bc1)
|
||||
return ac
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) lazyIOR(a container) container {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return ac.lazyIorArray(x)
|
||||
case *bitmapContainer:
|
||||
return ac.lazyIorBitmap(x)
|
||||
case *runContainer16:
|
||||
if x.isFull() {
|
||||
return x.clone()
|
||||
}
|
||||
return ac.lazyIorRun16(x)
|
||||
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) lazyIorArray(ac2 *arrayContainer) container {
|
||||
// TODO actually make this lazy
|
||||
return ac.iorArray(ac2)
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) lazyIorBitmap(bc *bitmapContainer) container {
|
||||
// TODO actually make this lazy
|
||||
return ac.iorBitmap(bc)
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) lazyIorRun16(rc *runContainer16) container {
|
||||
// TODO actually make this lazy
|
||||
return ac.iorRun16(rc)
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) lazyOR(a container) container {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return ac.lazyorArray(x)
|
||||
case *bitmapContainer:
|
||||
return a.lazyOR(ac)
|
||||
case *runContainer16:
|
||||
if x.isFull() {
|
||||
return x.clone()
|
||||
}
|
||||
return x.orArray(ac)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) orArray(value2 *arrayContainer) container {
|
||||
value1 := ac
|
||||
maxPossibleCardinality := value1.getCardinality() + value2.getCardinality()
|
||||
if maxPossibleCardinality > arrayDefaultMaxSize { // it could be a bitmap!
|
||||
bc := newBitmapContainer()
|
||||
for k := 0; k < len(value2.content); k++ {
|
||||
v := value2.content[k]
|
||||
i := uint(v) >> 6
|
||||
mask := uint64(1) << (v % 64)
|
||||
bc.bitmap[i] |= mask
|
||||
}
|
||||
for k := 0; k < len(ac.content); k++ {
|
||||
v := ac.content[k]
|
||||
i := uint(v) >> 6
|
||||
mask := uint64(1) << (v % 64)
|
||||
bc.bitmap[i] |= mask
|
||||
}
|
||||
bc.cardinality = int(popcntSlice(bc.bitmap))
|
||||
if bc.cardinality <= arrayDefaultMaxSize {
|
||||
return bc.toArrayContainer()
|
||||
}
|
||||
return bc
|
||||
}
|
||||
answer := newArrayContainerCapacity(maxPossibleCardinality)
|
||||
nl := union2by2(value1.content, value2.content, answer.content)
|
||||
answer.content = answer.content[:nl] // reslice to match actual used capacity
|
||||
return answer
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) orArrayCardinality(value2 *arrayContainer) int {
|
||||
return union2by2Cardinality(ac.content, value2.content)
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) lazyorArray(value2 *arrayContainer) container {
|
||||
value1 := ac
|
||||
maxPossibleCardinality := value1.getCardinality() + value2.getCardinality()
|
||||
if maxPossibleCardinality > arrayLazyLowerBound { // it could be a bitmap!^M
|
||||
bc := newBitmapContainer()
|
||||
for k := 0; k < len(value2.content); k++ {
|
||||
v := value2.content[k]
|
||||
i := uint(v) >> 6
|
||||
mask := uint64(1) << (v % 64)
|
||||
bc.bitmap[i] |= mask
|
||||
}
|
||||
for k := 0; k < len(ac.content); k++ {
|
||||
v := ac.content[k]
|
||||
i := uint(v) >> 6
|
||||
mask := uint64(1) << (v % 64)
|
||||
bc.bitmap[i] |= mask
|
||||
}
|
||||
bc.cardinality = invalidCardinality
|
||||
return bc
|
||||
}
|
||||
answer := newArrayContainerCapacity(maxPossibleCardinality)
|
||||
nl := union2by2(value1.content, value2.content, answer.content)
|
||||
answer.content = answer.content[:nl] // reslice to match actual used capacity
|
||||
return answer
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) and(a container) container {
|
||||
//p("ac.and() called")
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return ac.andArray(x)
|
||||
case *bitmapContainer:
|
||||
return x.and(ac)
|
||||
case *runContainer16:
|
||||
if x.isFull() {
|
||||
return ac.clone()
|
||||
}
|
||||
return x.andArray(ac)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) andCardinality(a container) int {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return ac.andArrayCardinality(x)
|
||||
case *bitmapContainer:
|
||||
return x.andCardinality(ac)
|
||||
case *runContainer16:
|
||||
return x.andArrayCardinality(ac)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) intersects(a container) bool {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return ac.intersectsArray(x)
|
||||
case *bitmapContainer:
|
||||
return x.intersects(ac)
|
||||
case *runContainer16:
|
||||
return x.intersects(ac)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) iand(a container) container {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return ac.iandArray(x)
|
||||
case *bitmapContainer:
|
||||
return ac.iandBitmap(x)
|
||||
case *runContainer16:
|
||||
if x.isFull() {
|
||||
return ac.clone()
|
||||
}
|
||||
return x.andArray(ac)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) iandBitmap(bc *bitmapContainer) container {
|
||||
pos := 0
|
||||
c := ac.getCardinality()
|
||||
for k := 0; k < c; k++ {
|
||||
// branchless
|
||||
v := ac.content[k]
|
||||
ac.content[pos] = v
|
||||
pos += int(bc.bitValue(v))
|
||||
}
|
||||
ac.content = ac.content[:pos]
|
||||
return ac
|
||||
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) xor(a container) container {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return ac.xorArray(x)
|
||||
case *bitmapContainer:
|
||||
return a.xor(ac)
|
||||
case *runContainer16:
|
||||
return x.xorArray(ac)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) xorArray(value2 *arrayContainer) container {
|
||||
value1 := ac
|
||||
totalCardinality := value1.getCardinality() + value2.getCardinality()
|
||||
if totalCardinality > arrayDefaultMaxSize { // it could be a bitmap!
|
||||
bc := newBitmapContainer()
|
||||
for k := 0; k < len(value2.content); k++ {
|
||||
v := value2.content[k]
|
||||
i := uint(v) >> 6
|
||||
bc.bitmap[i] ^= (uint64(1) << (v % 64))
|
||||
}
|
||||
for k := 0; k < len(ac.content); k++ {
|
||||
v := ac.content[k]
|
||||
i := uint(v) >> 6
|
||||
bc.bitmap[i] ^= (uint64(1) << (v % 64))
|
||||
}
|
||||
bc.computeCardinality()
|
||||
if bc.cardinality <= arrayDefaultMaxSize {
|
||||
return bc.toArrayContainer()
|
||||
}
|
||||
return bc
|
||||
}
|
||||
desiredCapacity := totalCardinality
|
||||
answer := newArrayContainerCapacity(desiredCapacity)
|
||||
length := exclusiveUnion2by2(value1.content, value2.content, answer.content)
|
||||
answer.content = answer.content[:length]
|
||||
return answer
|
||||
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) andNot(a container) container {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return ac.andNotArray(x)
|
||||
case *bitmapContainer:
|
||||
return ac.andNotBitmap(x)
|
||||
case *runContainer16:
|
||||
return ac.andNotRun16(x)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) andNotRun16(rc *runContainer16) container {
|
||||
acb := ac.toBitmapContainer()
|
||||
rcb := rc.toBitmapContainer()
|
||||
return acb.andNotBitmap(rcb)
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) iandNot(a container) container {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return ac.iandNotArray(x)
|
||||
case *bitmapContainer:
|
||||
return ac.iandNotBitmap(x)
|
||||
case *runContainer16:
|
||||
return ac.iandNotRun16(x)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) iandNotRun16(rc *runContainer16) container {
|
||||
rcb := rc.toBitmapContainer()
|
||||
acb := ac.toBitmapContainer()
|
||||
acb.iandNotBitmapSurely(rcb)
|
||||
*ac = *(acb.toArrayContainer())
|
||||
return ac
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) andNotArray(value2 *arrayContainer) container {
|
||||
value1 := ac
|
||||
desiredcapacity := value1.getCardinality()
|
||||
answer := newArrayContainerCapacity(desiredcapacity)
|
||||
length := difference(value1.content, value2.content, answer.content)
|
||||
answer.content = answer.content[:length]
|
||||
return answer
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) iandNotArray(value2 *arrayContainer) container {
|
||||
length := difference(ac.content, value2.content, ac.content)
|
||||
ac.content = ac.content[:length]
|
||||
return ac
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) andNotBitmap(value2 *bitmapContainer) container {
|
||||
desiredcapacity := ac.getCardinality()
|
||||
answer := newArrayContainerCapacity(desiredcapacity)
|
||||
answer.content = answer.content[:desiredcapacity]
|
||||
pos := 0
|
||||
for _, v := range ac.content {
|
||||
answer.content[pos] = v
|
||||
pos += 1 - int(value2.bitValue(v))
|
||||
}
|
||||
answer.content = answer.content[:pos]
|
||||
return answer
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) andBitmap(value2 *bitmapContainer) container {
|
||||
desiredcapacity := ac.getCardinality()
|
||||
answer := newArrayContainerCapacity(desiredcapacity)
|
||||
answer.content = answer.content[:desiredcapacity]
|
||||
pos := 0
|
||||
for _, v := range ac.content {
|
||||
answer.content[pos] = v
|
||||
pos += int(value2.bitValue(v))
|
||||
}
|
||||
answer.content = answer.content[:pos]
|
||||
return answer
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) iandNotBitmap(value2 *bitmapContainer) container {
|
||||
pos := 0
|
||||
for _, v := range ac.content {
|
||||
ac.content[pos] = v
|
||||
pos += 1 - int(value2.bitValue(v))
|
||||
}
|
||||
ac.content = ac.content[:pos]
|
||||
return ac
|
||||
}
|
||||
|
||||
func copyOf(array []uint16, size int) []uint16 {
|
||||
result := make([]uint16, size)
|
||||
for i, x := range array {
|
||||
if i == size {
|
||||
break
|
||||
}
|
||||
result[i] = x
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// flip the values in the range [firstOfRange,endx)
|
||||
func (ac *arrayContainer) inot(firstOfRange, endx int) container {
|
||||
if firstOfRange >= endx {
|
||||
return ac
|
||||
}
|
||||
return ac.inotClose(firstOfRange, endx-1) // remove everything in [firstOfRange,endx-1]
|
||||
}
|
||||
|
||||
// flip the values in the range [firstOfRange,lastOfRange]
|
||||
func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
|
||||
//p("ac.inotClose() starting")
|
||||
if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange]
|
||||
return ac
|
||||
}
|
||||
// determine the span of array indices to be affected
|
||||
startIndex := binarySearch(ac.content, uint16(firstOfRange))
|
||||
if startIndex < 0 {
|
||||
startIndex = -startIndex - 1
|
||||
}
|
||||
lastIndex := binarySearch(ac.content, uint16(lastOfRange))
|
||||
if lastIndex < 0 {
|
||||
lastIndex = -lastIndex - 1 - 1
|
||||
}
|
||||
currentValuesInRange := lastIndex - startIndex + 1
|
||||
spanToBeFlipped := lastOfRange - firstOfRange + 1
|
||||
|
||||
newValuesInRange := spanToBeFlipped - currentValuesInRange
|
||||
buffer := make([]uint16, newValuesInRange)
|
||||
cardinalityChange := newValuesInRange - currentValuesInRange
|
||||
newCardinality := len(ac.content) + cardinalityChange
|
||||
if cardinalityChange > 0 {
|
||||
if newCardinality > len(ac.content) {
|
||||
if newCardinality > arrayDefaultMaxSize {
|
||||
//p("ac.inotClose() converting to bitmap and doing inot there")
|
||||
bcRet := ac.toBitmapContainer()
|
||||
bcRet.inot(firstOfRange, lastOfRange+1)
|
||||
*ac = *bcRet.toArrayContainer()
|
||||
return bcRet
|
||||
}
|
||||
ac.content = copyOf(ac.content, newCardinality)
|
||||
}
|
||||
base := lastIndex + 1
|
||||
copy(ac.content[lastIndex+1+cardinalityChange:], ac.content[base:base+len(ac.content)-1-lastIndex])
|
||||
ac.negateRange(buffer, startIndex, lastIndex, firstOfRange, lastOfRange+1)
|
||||
} else { // no expansion needed
|
||||
ac.negateRange(buffer, startIndex, lastIndex, firstOfRange, lastOfRange+1)
|
||||
if cardinalityChange < 0 {
|
||||
|
||||
for i := startIndex + newValuesInRange; i < newCardinality; i++ {
|
||||
ac.content[i] = ac.content[i-cardinalityChange]
|
||||
}
|
||||
}
|
||||
}
|
||||
ac.content = ac.content[:newCardinality]
|
||||
//p("bottom of ac.inotClose(): returning ac")
|
||||
return ac
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) negateRange(buffer []uint16, startIndex, lastIndex, startRange, lastRange int) {
|
||||
// compute the negation into buffer
|
||||
outPos := 0
|
||||
inPos := startIndex // value here always >= valInRange,
|
||||
// until it is exhausted
|
||||
// n.b., we can start initially exhausted.
|
||||
|
||||
valInRange := startRange
|
||||
for ; valInRange < lastRange && inPos <= lastIndex; valInRange++ {
|
||||
if uint16(valInRange) != ac.content[inPos] {
|
||||
buffer[outPos] = uint16(valInRange)
|
||||
outPos++
|
||||
} else {
|
||||
inPos++
|
||||
}
|
||||
}
|
||||
|
||||
// if there are extra items (greater than the biggest
|
||||
// pre-existing one in range), buffer them
|
||||
for ; valInRange < lastRange; valInRange++ {
|
||||
buffer[outPos] = uint16(valInRange)
|
||||
outPos++
|
||||
}
|
||||
|
||||
if outPos != len(buffer) {
|
||||
panic("negateRange: internal bug")
|
||||
}
|
||||
|
||||
for i, item := range buffer {
|
||||
ac.content[i+startIndex] = item
|
||||
}
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) isFull() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) andArray(value2 *arrayContainer) container {
|
||||
desiredcapacity := minOfInt(ac.getCardinality(), value2.getCardinality())
|
||||
answer := newArrayContainerCapacity(desiredcapacity)
|
||||
length := intersection2by2(
|
||||
ac.content,
|
||||
value2.content,
|
||||
answer.content)
|
||||
answer.content = answer.content[:length]
|
||||
return answer
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) andArrayCardinality(value2 *arrayContainer) int {
|
||||
return intersection2by2Cardinality(
|
||||
ac.content,
|
||||
value2.content)
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) intersectsArray(value2 *arrayContainer) bool {
|
||||
return intersects2by2(
|
||||
ac.content,
|
||||
value2.content)
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) iandArray(value2 *arrayContainer) container {
|
||||
length := intersection2by2(
|
||||
ac.content,
|
||||
value2.content,
|
||||
ac.content)
|
||||
ac.content = ac.content[:length]
|
||||
return ac
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) getCardinality() int {
|
||||
return len(ac.content)
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) rank(x uint16) int {
|
||||
answer := binarySearch(ac.content, x)
|
||||
if answer >= 0 {
|
||||
return answer + 1
|
||||
}
|
||||
return -answer - 1
|
||||
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) selectInt(x uint16) int {
|
||||
return int(ac.content[x])
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) clone() container {
|
||||
ptr := arrayContainer{make([]uint16, len(ac.content))}
|
||||
copy(ptr.content, ac.content[:])
|
||||
return &ptr
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) contains(x uint16) bool {
|
||||
return binarySearch(ac.content, x) >= 0
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) loadData(bitmapContainer *bitmapContainer) {
|
||||
ac.content = make([]uint16, bitmapContainer.cardinality, bitmapContainer.cardinality)
|
||||
bitmapContainer.fillArray(ac.content)
|
||||
}
|
||||
func newArrayContainer() *arrayContainer {
|
||||
p := new(arrayContainer)
|
||||
return p
|
||||
}
|
||||
|
||||
func newArrayContainerFromBitmap(bc *bitmapContainer) *arrayContainer {
|
||||
ac := &arrayContainer{}
|
||||
ac.loadData(bc)
|
||||
return ac
|
||||
}
|
||||
|
||||
func newArrayContainerCapacity(size int) *arrayContainer {
|
||||
p := new(arrayContainer)
|
||||
p.content = make([]uint16, 0, size)
|
||||
return p
|
||||
}
|
||||
|
||||
func newArrayContainerSize(size int) *arrayContainer {
|
||||
p := new(arrayContainer)
|
||||
p.content = make([]uint16, size, size)
|
||||
return p
|
||||
}
|
||||
|
||||
func newArrayContainerRange(firstOfRun, lastOfRun int) *arrayContainer {
|
||||
valuesInRange := lastOfRun - firstOfRun + 1
|
||||
this := newArrayContainerCapacity(valuesInRange)
|
||||
for i := 0; i < valuesInRange; i++ {
|
||||
this.content = append(this.content, uint16(firstOfRun+i))
|
||||
}
|
||||
return this
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) numberOfRuns() (nr int) {
|
||||
n := len(ac.content)
|
||||
var runlen uint16
|
||||
var cur, prev uint16
|
||||
|
||||
switch n {
|
||||
case 0:
|
||||
return 0
|
||||
case 1:
|
||||
return 1
|
||||
default:
|
||||
for i := 1; i < n; i++ {
|
||||
prev = ac.content[i-1]
|
||||
cur = ac.content[i]
|
||||
|
||||
if cur == prev+1 {
|
||||
runlen++
|
||||
} else {
|
||||
if cur < prev {
|
||||
panic("then fundamental arrayContainer assumption of sorted ac.content was broken")
|
||||
}
|
||||
if cur == prev {
|
||||
panic("then fundamental arrayContainer assumption of deduplicated content was broken")
|
||||
} else {
|
||||
nr++
|
||||
runlen = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
nr++
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// convert to run or array *if needed*
|
||||
func (ac *arrayContainer) toEfficientContainer() container {
|
||||
|
||||
numRuns := ac.numberOfRuns()
|
||||
|
||||
sizeAsRunContainer := runContainer16SerializedSizeInBytes(numRuns)
|
||||
sizeAsBitmapContainer := bitmapContainerSizeInBytes()
|
||||
card := ac.getCardinality()
|
||||
sizeAsArrayContainer := arrayContainerSizeInBytes(card)
|
||||
|
||||
if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
|
||||
return newRunContainer16FromArray(ac)
|
||||
}
|
||||
if card <= arrayDefaultMaxSize {
|
||||
return ac
|
||||
}
|
||||
return ac.toBitmapContainer()
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) containerType() contype {
|
||||
return arrayContype
|
||||
}
|
134
vendor/github.com/RoaringBitmap/roaring/arraycontainer_gen.go
generated
vendored
Normal file
134
vendor/github.com/RoaringBitmap/roaring/arraycontainer_gen.go
generated
vendored
Normal file
@ -0,0 +1,134 @@
|
||||
package roaring
|
||||
|
||||
// NOTE: THIS FILE WAS PRODUCED BY THE
|
||||
// MSGP CODE GENERATION TOOL (github.com/tinylib/msgp)
|
||||
// DO NOT EDIT
|
||||
|
||||
import "github.com/tinylib/msgp/msgp"
|
||||
|
||||
// DecodeMsg implements msgp.Decodable
|
||||
func (z *arrayContainer) DecodeMsg(dc *msgp.Reader) (err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
var zbzg uint32
|
||||
zbzg, err = dc.ReadMapHeader()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for zbzg > 0 {
|
||||
zbzg--
|
||||
field, err = dc.ReadMapKeyPtr()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
switch msgp.UnsafeString(field) {
|
||||
case "content":
|
||||
var zbai uint32
|
||||
zbai, err = dc.ReadArrayHeader()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if cap(z.content) >= int(zbai) {
|
||||
z.content = (z.content)[:zbai]
|
||||
} else {
|
||||
z.content = make([]uint16, zbai)
|
||||
}
|
||||
for zxvk := range z.content {
|
||||
z.content[zxvk], err = dc.ReadUint16()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
default:
|
||||
err = dc.Skip()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// EncodeMsg implements msgp.Encodable
|
||||
func (z *arrayContainer) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
// map header, size 1
|
||||
// write "content"
|
||||
err = en.Append(0x81, 0xa7, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = en.WriteArrayHeader(uint32(len(z.content)))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for zxvk := range z.content {
|
||||
err = en.WriteUint16(z.content[zxvk])
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// MarshalMsg implements msgp.Marshaler
|
||||
func (z *arrayContainer) MarshalMsg(b []byte) (o []byte, err error) {
|
||||
o = msgp.Require(b, z.Msgsize())
|
||||
// map header, size 1
|
||||
// string "content"
|
||||
o = append(o, 0x81, 0xa7, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74)
|
||||
o = msgp.AppendArrayHeader(o, uint32(len(z.content)))
|
||||
for zxvk := range z.content {
|
||||
o = msgp.AppendUint16(o, z.content[zxvk])
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// UnmarshalMsg implements msgp.Unmarshaler
|
||||
func (z *arrayContainer) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
var zcmr uint32
|
||||
zcmr, bts, err = msgp.ReadMapHeaderBytes(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for zcmr > 0 {
|
||||
zcmr--
|
||||
field, bts, err = msgp.ReadMapKeyZC(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
switch msgp.UnsafeString(field) {
|
||||
case "content":
|
||||
var zajw uint32
|
||||
zajw, bts, err = msgp.ReadArrayHeaderBytes(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if cap(z.content) >= int(zajw) {
|
||||
z.content = (z.content)[:zajw]
|
||||
} else {
|
||||
z.content = make([]uint16, zajw)
|
||||
}
|
||||
for zxvk := range z.content {
|
||||
z.content[zxvk], bts, err = msgp.ReadUint16Bytes(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
default:
|
||||
bts, err = msgp.Skip(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
o = bts
|
||||
return
|
||||
}
|
||||
|
||||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
func (z *arrayContainer) Msgsize() (s int) {
|
||||
s = 1 + 8 + msgp.ArrayHeaderSize + (len(z.content) * (msgp.Uint16Size))
|
||||
return
|
||||
}
|
982
vendor/github.com/RoaringBitmap/roaring/bitmapcontainer.go
generated
vendored
Normal file
982
vendor/github.com/RoaringBitmap/roaring/bitmapcontainer.go
generated
vendored
Normal file
@ -0,0 +1,982 @@
|
||||
package roaring
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
//go:generate msgp -unexported
|
||||
|
||||
type bitmapContainer struct {
|
||||
cardinality int
|
||||
bitmap []uint64
|
||||
}
|
||||
|
||||
func (bc bitmapContainer) String() string {
|
||||
var s string
|
||||
for it := bc.getShortIterator(); it.hasNext(); {
|
||||
s += fmt.Sprintf("%v, ", it.next())
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func newBitmapContainer() *bitmapContainer {
|
||||
p := new(bitmapContainer)
|
||||
size := (1 << 16) / 64
|
||||
p.bitmap = make([]uint64, size, size)
|
||||
return p
|
||||
}
|
||||
|
||||
func newBitmapContainerwithRange(firstOfRun, lastOfRun int) *bitmapContainer {
|
||||
bc := newBitmapContainer()
|
||||
bc.cardinality = lastOfRun - firstOfRun + 1
|
||||
if bc.cardinality == maxCapacity {
|
||||
fill(bc.bitmap, uint64(0xffffffffffffffff))
|
||||
} else {
|
||||
firstWord := firstOfRun / 64
|
||||
lastWord := lastOfRun / 64
|
||||
zeroPrefixLength := uint64(firstOfRun & 63)
|
||||
zeroSuffixLength := uint64(63 - (lastOfRun & 63))
|
||||
|
||||
fillRange(bc.bitmap, firstWord, lastWord+1, uint64(0xffffffffffffffff))
|
||||
bc.bitmap[firstWord] ^= ((uint64(1) << zeroPrefixLength) - 1)
|
||||
blockOfOnes := (uint64(1) << zeroSuffixLength) - 1
|
||||
maskOnLeft := blockOfOnes << (uint64(64) - zeroSuffixLength)
|
||||
bc.bitmap[lastWord] ^= maskOnLeft
|
||||
}
|
||||
return bc
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) minimum() uint16 {
|
||||
for i := 0; i < len(bc.bitmap); i++ {
|
||||
w := bc.bitmap[i]
|
||||
if w != 0 {
|
||||
r := countTrailingZeros(w)
|
||||
return uint16(r + i*64)
|
||||
}
|
||||
}
|
||||
return MaxUint16
|
||||
}
|
||||
|
||||
// i should be non-zero
|
||||
func clz(i uint64) int {
|
||||
n := 1
|
||||
x := uint32(i >> 32)
|
||||
if x == 0 {
|
||||
n += 32
|
||||
x = uint32(i)
|
||||
}
|
||||
if x>>16 == 0 {
|
||||
n += 16
|
||||
x = x << 16
|
||||
}
|
||||
if x>>24 == 0 {
|
||||
n += 8
|
||||
x = x << 8
|
||||
}
|
||||
if x>>28 == 0 {
|
||||
n += 4
|
||||
x = x << 4
|
||||
}
|
||||
if x>>30 == 0 {
|
||||
n += 2
|
||||
x = x << 2
|
||||
}
|
||||
return n - int(x>>31)
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) maximum() uint16 {
|
||||
for i := len(bc.bitmap); i > 0; i-- {
|
||||
w := bc.bitmap[i-1]
|
||||
if w != 0 {
|
||||
r := clz(w)
|
||||
return uint16((i-1)*64 + 63 - r)
|
||||
}
|
||||
}
|
||||
return uint16(0)
|
||||
}
|
||||
|
||||
type bitmapContainerShortIterator struct {
|
||||
ptr *bitmapContainer
|
||||
i int
|
||||
}
|
||||
|
||||
func (bcsi *bitmapContainerShortIterator) next() uint16 {
|
||||
j := bcsi.i
|
||||
bcsi.i = bcsi.ptr.NextSetBit(bcsi.i + 1)
|
||||
return uint16(j)
|
||||
}
|
||||
func (bcsi *bitmapContainerShortIterator) hasNext() bool {
|
||||
return bcsi.i >= 0
|
||||
}
|
||||
|
||||
func newBitmapContainerShortIterator(a *bitmapContainer) *bitmapContainerShortIterator {
|
||||
return &bitmapContainerShortIterator{a, a.NextSetBit(0)}
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) getShortIterator() shortIterable {
|
||||
return newBitmapContainerShortIterator(bc)
|
||||
}
|
||||
|
||||
type bitmapContainerManyIterator struct {
|
||||
ptr *bitmapContainer
|
||||
base int
|
||||
bitset uint64
|
||||
}
|
||||
|
||||
func (bcmi *bitmapContainerManyIterator) nextMany(hs uint32, buf []uint32) int {
|
||||
n := 0
|
||||
base := bcmi.base
|
||||
bitset := bcmi.bitset
|
||||
|
||||
for n < len(buf) {
|
||||
if bitset == 0 {
|
||||
base += 1
|
||||
if base >= len(bcmi.ptr.bitmap) {
|
||||
bcmi.base = base
|
||||
bcmi.bitset = bitset
|
||||
return n
|
||||
}
|
||||
bitset = bcmi.ptr.bitmap[base]
|
||||
continue
|
||||
}
|
||||
t := bitset & -bitset
|
||||
buf[n] = uint32(((base * 64) + int(popcount(t-1)))) | hs
|
||||
n = n + 1
|
||||
bitset ^= t
|
||||
}
|
||||
|
||||
bcmi.base = base
|
||||
bcmi.bitset = bitset
|
||||
return n
|
||||
}
|
||||
|
||||
func newBitmapContainerManyIterator(a *bitmapContainer) *bitmapContainerManyIterator {
|
||||
return &bitmapContainerManyIterator{a, -1, 0}
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) getManyIterator() manyIterable {
|
||||
return newBitmapContainerManyIterator(bc)
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) getSizeInBytes() int {
|
||||
return len(bc.bitmap) * 8 // + bcBaseBytes
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) serializedSizeInBytes() int {
|
||||
//return bc.Msgsize()// NOO! This breaks GetSerializedSizeInBytes
|
||||
return len(bc.bitmap) * 8
|
||||
}
|
||||
|
||||
const bcBaseBytes = int(unsafe.Sizeof(bitmapContainer{}))
|
||||
|
||||
// bitmapContainer doesn't depend on card, always fully allocated
|
||||
func bitmapContainerSizeInBytes() int {
|
||||
return bcBaseBytes + (1<<16)/8
|
||||
}
|
||||
|
||||
func bitmapEquals(a, b []uint64) bool {
|
||||
if len(a) != len(b) {
|
||||
//p("bitmaps differ on length. len(a)=%v; len(b)=%v", len(a), len(b))
|
||||
return false
|
||||
}
|
||||
for i, v := range a {
|
||||
if v != b[i] {
|
||||
//p("bitmaps differ on element i=%v", i)
|
||||
return false
|
||||
}
|
||||
}
|
||||
//p("bitmapEquals returning true")
|
||||
return true
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) fillLeastSignificant16bits(x []uint32, i int, mask uint32) {
|
||||
// TODO: should be written as optimized assembly
|
||||
pos := i
|
||||
base := mask
|
||||
for k := 0; k < len(bc.bitmap); k++ {
|
||||
bitset := bc.bitmap[k]
|
||||
for bitset != 0 {
|
||||
t := bitset & -bitset
|
||||
x[pos] = base + uint32(popcount(t-1))
|
||||
pos++
|
||||
bitset ^= t
|
||||
}
|
||||
base += 64
|
||||
}
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) equals(o container) bool {
|
||||
srb, ok := o.(*bitmapContainer)
|
||||
if ok {
|
||||
//p("bitmapContainers.equals: both are bitmapContainers")
|
||||
if srb.cardinality != bc.cardinality {
|
||||
//p("bitmapContainers.equals: card differs: %v vs %v", srb.cardinality, bc.cardinality)
|
||||
return false
|
||||
}
|
||||
return bitmapEquals(bc.bitmap, srb.bitmap)
|
||||
}
|
||||
|
||||
// use generic comparison
|
||||
if bc.getCardinality() != o.getCardinality() {
|
||||
return false
|
||||
}
|
||||
ait := o.getShortIterator()
|
||||
bit := bc.getShortIterator()
|
||||
|
||||
for ait.hasNext() {
|
||||
if bit.next() != ait.next() {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) iaddReturnMinimized(i uint16) container {
|
||||
bc.iadd(i)
|
||||
if bc.isFull() {
|
||||
return newRunContainer16Range(0, MaxUint16)
|
||||
}
|
||||
return bc
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) iadd(i uint16) bool {
|
||||
x := int(i)
|
||||
previous := bc.bitmap[x/64]
|
||||
mask := uint64(1) << (uint(x) % 64)
|
||||
newb := previous | mask
|
||||
bc.bitmap[x/64] = newb
|
||||
bc.cardinality += int((previous ^ newb) >> (uint(x) % 64))
|
||||
return newb != previous
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) iremoveReturnMinimized(i uint16) container {
|
||||
if bc.iremove(i) {
|
||||
if bc.cardinality == arrayDefaultMaxSize {
|
||||
return bc.toArrayContainer()
|
||||
}
|
||||
}
|
||||
return bc
|
||||
}
|
||||
|
||||
// iremove returns true if i was found.
|
||||
func (bc *bitmapContainer) iremove(i uint16) bool {
|
||||
/* branchless code
|
||||
w := bc.bitmap[i>>6]
|
||||
mask := uint64(1) << (i % 64)
|
||||
neww := w &^ mask
|
||||
bc.cardinality -= int((w ^ neww) >> (i % 64))
|
||||
bc.bitmap[i>>6] = neww */
|
||||
if bc.contains(i) {
|
||||
bc.cardinality--
|
||||
bc.bitmap[i/64] &^= (uint64(1) << (i % 64))
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) isFull() bool {
|
||||
return bc.cardinality == int(MaxUint16)+1
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) getCardinality() int {
|
||||
return bc.cardinality
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) clone() container {
|
||||
ptr := bitmapContainer{bc.cardinality, make([]uint64, len(bc.bitmap))}
|
||||
copy(ptr.bitmap, bc.bitmap[:])
|
||||
return &ptr
|
||||
}
|
||||
|
||||
// add all values in range [firstOfRange,lastOfRange)
|
||||
func (bc *bitmapContainer) iaddRange(firstOfRange, lastOfRange int) container {
|
||||
bc.cardinality += setBitmapRangeAndCardinalityChange(bc.bitmap, firstOfRange, lastOfRange)
|
||||
return bc
|
||||
}
|
||||
|
||||
// remove all values in range [firstOfRange,lastOfRange)
|
||||
func (bc *bitmapContainer) iremoveRange(firstOfRange, lastOfRange int) container {
|
||||
bc.cardinality += resetBitmapRangeAndCardinalityChange(bc.bitmap, firstOfRange, lastOfRange)
|
||||
if bc.getCardinality() <= arrayDefaultMaxSize {
|
||||
return bc.toArrayContainer()
|
||||
}
|
||||
return bc
|
||||
}
|
||||
|
||||
// flip all values in range [firstOfRange,endx)
|
||||
func (bc *bitmapContainer) inot(firstOfRange, endx int) container {
|
||||
p("bc.inot() called with [%v, %v)", firstOfRange, endx)
|
||||
if endx-firstOfRange == maxCapacity {
|
||||
//p("endx-firstOfRange == maxCapacity")
|
||||
flipBitmapRange(bc.bitmap, firstOfRange, endx)
|
||||
bc.cardinality = maxCapacity - bc.cardinality
|
||||
//p("bc.cardinality is now %v", bc.cardinality)
|
||||
} else if endx-firstOfRange > maxCapacity/2 {
|
||||
//p("endx-firstOfRange > maxCapacity/2")
|
||||
flipBitmapRange(bc.bitmap, firstOfRange, endx)
|
||||
bc.computeCardinality()
|
||||
} else {
|
||||
bc.cardinality += flipBitmapRangeAndCardinalityChange(bc.bitmap, firstOfRange, endx)
|
||||
}
|
||||
if bc.getCardinality() <= arrayDefaultMaxSize {
|
||||
return bc.toArrayContainer()
|
||||
}
|
||||
return bc
|
||||
}
|
||||
|
||||
// flip all values in range [firstOfRange,endx)
|
||||
func (bc *bitmapContainer) not(firstOfRange, endx int) container {
|
||||
answer := bc.clone()
|
||||
return answer.inot(firstOfRange, endx)
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) or(a container) container {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return bc.orArray(x)
|
||||
case *bitmapContainer:
|
||||
return bc.orBitmap(x)
|
||||
case *runContainer16:
|
||||
if x.isFull() {
|
||||
return x.clone()
|
||||
}
|
||||
return x.orBitmapContainer(bc)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) orCardinality(a container) int {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return bc.orArrayCardinality(x)
|
||||
case *bitmapContainer:
|
||||
return bc.orBitmapCardinality(x)
|
||||
case *runContainer16:
|
||||
return x.orBitmapContainerCardinality(bc)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) ior(a container) container {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return bc.iorArray(x)
|
||||
case *bitmapContainer:
|
||||
return bc.iorBitmap(x)
|
||||
case *runContainer16:
|
||||
if x.isFull() {
|
||||
return x.clone()
|
||||
}
|
||||
for i := range x.iv {
|
||||
bc.iaddRange(int(x.iv[i].start), int(x.iv[i].last())+1)
|
||||
}
|
||||
if bc.isFull() {
|
||||
return newRunContainer16Range(0, MaxUint16)
|
||||
}
|
||||
//bc.computeCardinality()
|
||||
return bc
|
||||
}
|
||||
panic(fmt.Errorf("unsupported container type %T", a))
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) lazyIOR(a container) container {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return bc.lazyIORArray(x)
|
||||
case *bitmapContainer:
|
||||
return bc.lazyIORBitmap(x)
|
||||
case *runContainer16:
|
||||
if x.isFull() {
|
||||
return x.clone()
|
||||
}
|
||||
|
||||
// Manually inlined setBitmapRange function
|
||||
bitmap := bc.bitmap
|
||||
for _, iv := range x.iv {
|
||||
start := int(iv.start)
|
||||
end := int(iv.last()) + 1
|
||||
if start >= end {
|
||||
continue
|
||||
}
|
||||
firstword := start / 64
|
||||
endword := (end - 1) / 64
|
||||
if firstword == endword {
|
||||
bitmap[firstword] |= (^uint64(0) << uint(start%64)) & (^uint64(0) >> (uint(-end) % 64))
|
||||
continue
|
||||
}
|
||||
bitmap[firstword] |= ^uint64(0) << uint(start%64)
|
||||
for i := firstword + 1; i < endword; i++ {
|
||||
bitmap[i] = ^uint64(0)
|
||||
}
|
||||
bitmap[endword] |= ^uint64(0) >> (uint(-end) % 64)
|
||||
}
|
||||
bc.cardinality = invalidCardinality
|
||||
return bc
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) lazyOR(a container) container {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return bc.lazyORArray(x)
|
||||
case *bitmapContainer:
|
||||
return bc.lazyORBitmap(x)
|
||||
case *runContainer16:
|
||||
if x.isFull() {
|
||||
return x.clone()
|
||||
}
|
||||
// TODO: implement lazy OR
|
||||
return x.orBitmapContainer(bc)
|
||||
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) orArray(value2 *arrayContainer) container {
|
||||
answer := bc.clone().(*bitmapContainer)
|
||||
c := value2.getCardinality()
|
||||
for k := 0; k < c; k++ {
|
||||
v := value2.content[k]
|
||||
i := uint(v) >> 6
|
||||
bef := answer.bitmap[i]
|
||||
aft := bef | (uint64(1) << (v % 64))
|
||||
answer.bitmap[i] = aft
|
||||
answer.cardinality += int((bef - aft) >> 63)
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) orArrayCardinality(value2 *arrayContainer) int {
|
||||
answer := 0
|
||||
c := value2.getCardinality()
|
||||
for k := 0; k < c; k++ {
|
||||
// branchless:
|
||||
v := value2.content[k]
|
||||
i := uint(v) >> 6
|
||||
bef := bc.bitmap[i]
|
||||
aft := bef | (uint64(1) << (v % 64))
|
||||
answer += int((bef - aft) >> 63)
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) orBitmap(value2 *bitmapContainer) container {
|
||||
answer := newBitmapContainer()
|
||||
for k := 0; k < len(answer.bitmap); k++ {
|
||||
answer.bitmap[k] = bc.bitmap[k] | value2.bitmap[k]
|
||||
}
|
||||
answer.computeCardinality()
|
||||
if answer.isFull() {
|
||||
return newRunContainer16Range(0, MaxUint16)
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) orBitmapCardinality(value2 *bitmapContainer) int {
|
||||
return int(popcntOrSlice(bc.bitmap, value2.bitmap))
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) andBitmapCardinality(value2 *bitmapContainer) int {
|
||||
return int(popcntAndSlice(bc.bitmap, value2.bitmap))
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) computeCardinality() {
|
||||
bc.cardinality = int(popcntSlice(bc.bitmap))
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) iorArray(ac *arrayContainer) container {
|
||||
for k := range ac.content {
|
||||
vc := ac.content[k]
|
||||
i := uint(vc) >> 6
|
||||
bef := bc.bitmap[i]
|
||||
aft := bef | (uint64(1) << (vc % 64))
|
||||
bc.bitmap[i] = aft
|
||||
bc.cardinality += int((bef - aft) >> 63)
|
||||
}
|
||||
if bc.isFull() {
|
||||
return newRunContainer16Range(0, MaxUint16)
|
||||
}
|
||||
return bc
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) iorBitmap(value2 *bitmapContainer) container {
|
||||
answer := bc
|
||||
answer.cardinality = 0
|
||||
for k := 0; k < len(answer.bitmap); k++ {
|
||||
answer.bitmap[k] = bc.bitmap[k] | value2.bitmap[k]
|
||||
}
|
||||
answer.computeCardinality()
|
||||
if bc.isFull() {
|
||||
return newRunContainer16Range(0, MaxUint16)
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) lazyIORArray(value2 *arrayContainer) container {
|
||||
answer := bc
|
||||
c := value2.getCardinality()
|
||||
for k := 0; k < c; k++ {
|
||||
vc := value2.content[k]
|
||||
i := uint(vc) >> 6
|
||||
answer.bitmap[i] = answer.bitmap[i] | (uint64(1) << (vc % 64))
|
||||
}
|
||||
answer.cardinality = invalidCardinality
|
||||
return answer
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) lazyORArray(value2 *arrayContainer) container {
|
||||
answer := bc.clone().(*bitmapContainer)
|
||||
return answer.lazyIORArray(value2)
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) lazyIORBitmap(value2 *bitmapContainer) container {
|
||||
answer := bc
|
||||
for k := 0; k < len(answer.bitmap); k++ {
|
||||
answer.bitmap[k] = bc.bitmap[k] | value2.bitmap[k]
|
||||
}
|
||||
bc.cardinality = invalidCardinality
|
||||
return answer
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) lazyORBitmap(value2 *bitmapContainer) container {
|
||||
answer := bc.clone().(*bitmapContainer)
|
||||
return answer.lazyIORBitmap(value2)
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) xor(a container) container {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return bc.xorArray(x)
|
||||
case *bitmapContainer:
|
||||
return bc.xorBitmap(x)
|
||||
case *runContainer16:
|
||||
return x.xorBitmap(bc)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) xorArray(value2 *arrayContainer) container {
|
||||
answer := bc.clone().(*bitmapContainer)
|
||||
c := value2.getCardinality()
|
||||
for k := 0; k < c; k++ {
|
||||
vc := value2.content[k]
|
||||
index := uint(vc) >> 6
|
||||
abi := answer.bitmap[index]
|
||||
mask := uint64(1) << (vc % 64)
|
||||
answer.cardinality += 1 - 2*int((abi&mask)>>(vc%64))
|
||||
answer.bitmap[index] = abi ^ mask
|
||||
}
|
||||
if answer.cardinality <= arrayDefaultMaxSize {
|
||||
return answer.toArrayContainer()
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) rank(x uint16) int {
|
||||
// TODO: rewrite in assembly
|
||||
leftover := (uint(x) + 1) & 63
|
||||
if leftover == 0 {
|
||||
return int(popcntSlice(bc.bitmap[:(uint(x)+1)/64]))
|
||||
}
|
||||
return int(popcntSlice(bc.bitmap[:(uint(x)+1)/64]) + popcount(bc.bitmap[(uint(x)+1)/64]<<(64-leftover)))
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) selectInt(x uint16) int {
|
||||
remaining := x
|
||||
for k := 0; k < len(bc.bitmap); k++ {
|
||||
w := popcount(bc.bitmap[k])
|
||||
if uint16(w) > remaining {
|
||||
return k*64 + selectBitPosition(bc.bitmap[k], int(remaining))
|
||||
}
|
||||
remaining -= uint16(w)
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) xorBitmap(value2 *bitmapContainer) container {
|
||||
newCardinality := int(popcntXorSlice(bc.bitmap, value2.bitmap))
|
||||
|
||||
if newCardinality > arrayDefaultMaxSize {
|
||||
answer := newBitmapContainer()
|
||||
for k := 0; k < len(answer.bitmap); k++ {
|
||||
answer.bitmap[k] = bc.bitmap[k] ^ value2.bitmap[k]
|
||||
}
|
||||
answer.cardinality = newCardinality
|
||||
if answer.isFull() {
|
||||
return newRunContainer16Range(0, MaxUint16)
|
||||
}
|
||||
return answer
|
||||
}
|
||||
ac := newArrayContainerSize(newCardinality)
|
||||
fillArrayXOR(ac.content, bc.bitmap, value2.bitmap)
|
||||
ac.content = ac.content[:newCardinality]
|
||||
return ac
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) and(a container) container {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return bc.andArray(x)
|
||||
case *bitmapContainer:
|
||||
return bc.andBitmap(x)
|
||||
case *runContainer16:
|
||||
if x.isFull() {
|
||||
return bc.clone()
|
||||
}
|
||||
return x.andBitmapContainer(bc)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) andCardinality(a container) int {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return bc.andArrayCardinality(x)
|
||||
case *bitmapContainer:
|
||||
return bc.andBitmapCardinality(x)
|
||||
case *runContainer16:
|
||||
return x.andBitmapContainerCardinality(bc)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) intersects(a container) bool {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return bc.intersectsArray(x)
|
||||
case *bitmapContainer:
|
||||
return bc.intersectsBitmap(x)
|
||||
case *runContainer16:
|
||||
return x.intersects(bc)
|
||||
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) iand(a container) container {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return bc.iandArray(x)
|
||||
case *bitmapContainer:
|
||||
return bc.iandBitmap(x)
|
||||
case *runContainer16:
|
||||
if x.isFull() {
|
||||
return bc.clone()
|
||||
}
|
||||
return bc.iandRun16(x)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) iandRun16(rc *runContainer16) container {
|
||||
rcb := newBitmapContainerFromRun(rc)
|
||||
return bc.iandBitmap(rcb)
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) iandArray(ac *arrayContainer) container {
|
||||
acb := ac.toBitmapContainer()
|
||||
return bc.iandBitmap(acb)
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) andArray(value2 *arrayContainer) *arrayContainer {
|
||||
answer := newArrayContainerCapacity(len(value2.content))
|
||||
answer.content = answer.content[:cap(answer.content)]
|
||||
c := value2.getCardinality()
|
||||
pos := 0
|
||||
for k := 0; k < c; k++ {
|
||||
v := value2.content[k]
|
||||
answer.content[pos] = v
|
||||
pos += int(bc.bitValue(v))
|
||||
}
|
||||
answer.content = answer.content[:pos]
|
||||
return answer
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) andArrayCardinality(value2 *arrayContainer) int {
|
||||
c := value2.getCardinality()
|
||||
pos := 0
|
||||
for k := 0; k < c; k++ {
|
||||
v := value2.content[k]
|
||||
pos += int(bc.bitValue(v))
|
||||
}
|
||||
return pos
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) getCardinalityInRange(start, end uint) int {
|
||||
if start >= end {
|
||||
return 0
|
||||
}
|
||||
firstword := start / 64
|
||||
endword := (end - 1) / 64
|
||||
const allones = ^uint64(0)
|
||||
if firstword == endword {
|
||||
return int(popcount(bc.bitmap[firstword] & ((allones << (start % 64)) & (allones >> (64 - (end % 64))))))
|
||||
}
|
||||
answer := popcount(bc.bitmap[firstword] & (allones << (start % 64)))
|
||||
answer += popcntSlice(bc.bitmap[firstword+1 : endword])
|
||||
answer += popcount(bc.bitmap[endword] & (allones >> (64 - (end % 64))))
|
||||
return int(answer)
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) andBitmap(value2 *bitmapContainer) container {
|
||||
newcardinality := int(popcntAndSlice(bc.bitmap, value2.bitmap))
|
||||
if newcardinality > arrayDefaultMaxSize {
|
||||
answer := newBitmapContainer()
|
||||
for k := 0; k < len(answer.bitmap); k++ {
|
||||
answer.bitmap[k] = bc.bitmap[k] & value2.bitmap[k]
|
||||
}
|
||||
answer.cardinality = newcardinality
|
||||
return answer
|
||||
}
|
||||
ac := newArrayContainerSize(newcardinality)
|
||||
fillArrayAND(ac.content, bc.bitmap, value2.bitmap)
|
||||
ac.content = ac.content[:newcardinality] //not sure why i need this
|
||||
return ac
|
||||
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) intersectsArray(value2 *arrayContainer) bool {
|
||||
c := value2.getCardinality()
|
||||
for k := 0; k < c; k++ {
|
||||
v := value2.content[k]
|
||||
if bc.contains(v) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) intersectsBitmap(value2 *bitmapContainer) bool {
|
||||
for k := 0; k < len(bc.bitmap); k++ {
|
||||
if (bc.bitmap[k] & value2.bitmap[k]) != 0 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) iandBitmap(value2 *bitmapContainer) container {
|
||||
newcardinality := int(popcntAndSlice(bc.bitmap, value2.bitmap))
|
||||
for k := 0; k < len(bc.bitmap); k++ {
|
||||
bc.bitmap[k] = bc.bitmap[k] & value2.bitmap[k]
|
||||
}
|
||||
bc.cardinality = newcardinality
|
||||
|
||||
if newcardinality <= arrayDefaultMaxSize {
|
||||
return newArrayContainerFromBitmap(bc)
|
||||
}
|
||||
return bc
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) andNot(a container) container {
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return bc.andNotArray(x)
|
||||
case *bitmapContainer:
|
||||
return bc.andNotBitmap(x)
|
||||
case *runContainer16:
|
||||
return bc.andNotRun16(x)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) andNotRun16(rc *runContainer16) container {
|
||||
rcb := rc.toBitmapContainer()
|
||||
return bc.andNotBitmap(rcb)
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) iandNot(a container) container {
|
||||
//p("bitmapContainer.iandNot() starting")
|
||||
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return bc.iandNotArray(x)
|
||||
case *bitmapContainer:
|
||||
return bc.iandNotBitmapSurely(x)
|
||||
case *runContainer16:
|
||||
return bc.iandNotRun16(x)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) iandNotArray(ac *arrayContainer) container {
|
||||
acb := ac.toBitmapContainer()
|
||||
return bc.iandNotBitmapSurely(acb)
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) iandNotRun16(rc *runContainer16) container {
|
||||
rcb := rc.toBitmapContainer()
|
||||
return bc.iandNotBitmapSurely(rcb)
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) andNotArray(value2 *arrayContainer) container {
|
||||
answer := bc.clone().(*bitmapContainer)
|
||||
c := value2.getCardinality()
|
||||
for k := 0; k < c; k++ {
|
||||
vc := value2.content[k]
|
||||
i := uint(vc) >> 6
|
||||
oldv := answer.bitmap[i]
|
||||
newv := oldv &^ (uint64(1) << (vc % 64))
|
||||
answer.bitmap[i] = newv
|
||||
answer.cardinality -= int((oldv ^ newv) >> (vc % 64))
|
||||
}
|
||||
if answer.cardinality <= arrayDefaultMaxSize {
|
||||
return answer.toArrayContainer()
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) andNotBitmap(value2 *bitmapContainer) container {
|
||||
newCardinality := int(popcntMaskSlice(bc.bitmap, value2.bitmap))
|
||||
if newCardinality > arrayDefaultMaxSize {
|
||||
answer := newBitmapContainer()
|
||||
for k := 0; k < len(answer.bitmap); k++ {
|
||||
answer.bitmap[k] = bc.bitmap[k] &^ value2.bitmap[k]
|
||||
}
|
||||
answer.cardinality = newCardinality
|
||||
return answer
|
||||
}
|
||||
ac := newArrayContainerSize(newCardinality)
|
||||
fillArrayANDNOT(ac.content, bc.bitmap, value2.bitmap)
|
||||
return ac
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) iandNotBitmapSurely(value2 *bitmapContainer) *bitmapContainer {
|
||||
newCardinality := int(popcntMaskSlice(bc.bitmap, value2.bitmap))
|
||||
for k := 0; k < len(bc.bitmap); k++ {
|
||||
bc.bitmap[k] = bc.bitmap[k] &^ value2.bitmap[k]
|
||||
}
|
||||
bc.cardinality = newCardinality
|
||||
return bc
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) contains(i uint16) bool { //testbit
|
||||
x := uint(i)
|
||||
w := bc.bitmap[x>>6]
|
||||
mask := uint64(1) << (x & 63)
|
||||
return (w & mask) != 0
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) bitValue(i uint16) uint64 {
|
||||
x := uint(i)
|
||||
w := bc.bitmap[x>>6]
|
||||
return (w >> (x & 63)) & 1
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) loadData(arrayContainer *arrayContainer) {
|
||||
bc.cardinality = arrayContainer.getCardinality()
|
||||
c := arrayContainer.getCardinality()
|
||||
for k := 0; k < c; k++ {
|
||||
x := arrayContainer.content[k]
|
||||
i := int(x) / 64
|
||||
bc.bitmap[i] |= (uint64(1) << uint(x%64))
|
||||
}
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) toArrayContainer() *arrayContainer {
|
||||
ac := &arrayContainer{}
|
||||
ac.loadData(bc)
|
||||
return ac
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) fillArray(container []uint16) {
|
||||
//TODO: rewrite in assembly
|
||||
pos := 0
|
||||
base := 0
|
||||
for k := 0; k < len(bc.bitmap); k++ {
|
||||
bitset := bc.bitmap[k]
|
||||
for bitset != 0 {
|
||||
t := bitset & -bitset
|
||||
container[pos] = uint16((base + int(popcount(t-1))))
|
||||
pos = pos + 1
|
||||
bitset ^= t
|
||||
}
|
||||
base += 64
|
||||
}
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) NextSetBit(i int) int {
|
||||
x := i / 64
|
||||
if x >= len(bc.bitmap) {
|
||||
return -1
|
||||
}
|
||||
w := bc.bitmap[x]
|
||||
w = w >> uint(i%64)
|
||||
if w != 0 {
|
||||
return i + countTrailingZeros(w)
|
||||
}
|
||||
x++
|
||||
for ; x < len(bc.bitmap); x++ {
|
||||
if bc.bitmap[x] != 0 {
|
||||
return (x * 64) + countTrailingZeros(bc.bitmap[x])
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// reference the java implementation
|
||||
// https://github.com/RoaringBitmap/RoaringBitmap/blob/master/src/main/java/org/roaringbitmap/BitmapContainer.java#L875-L892
|
||||
//
|
||||
func (bc *bitmapContainer) numberOfRuns() int {
|
||||
if bc.cardinality == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
var numRuns uint64
|
||||
nextWord := bc.bitmap[0]
|
||||
|
||||
for i := 0; i < len(bc.bitmap)-1; i++ {
|
||||
word := nextWord
|
||||
nextWord = bc.bitmap[i+1]
|
||||
numRuns += popcount((^word)&(word<<1)) + ((word >> 63) &^ nextWord)
|
||||
}
|
||||
|
||||
word := nextWord
|
||||
numRuns += popcount((^word) & (word << 1))
|
||||
if (word & 0x8000000000000000) != 0 {
|
||||
numRuns++
|
||||
}
|
||||
|
||||
return int(numRuns)
|
||||
}
|
||||
|
||||
// convert to run or array *if needed*
|
||||
func (bc *bitmapContainer) toEfficientContainer() container {
|
||||
|
||||
numRuns := bc.numberOfRuns()
|
||||
|
||||
sizeAsRunContainer := runContainer16SerializedSizeInBytes(numRuns)
|
||||
sizeAsBitmapContainer := bitmapContainerSizeInBytes()
|
||||
card := bc.getCardinality()
|
||||
sizeAsArrayContainer := arrayContainerSizeInBytes(card)
|
||||
|
||||
if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
|
||||
return newRunContainer16FromBitmapContainer(bc)
|
||||
}
|
||||
if card <= arrayDefaultMaxSize {
|
||||
return bc.toArrayContainer()
|
||||
}
|
||||
return bc
|
||||
}
|
||||
|
||||
func newBitmapContainerFromRun(rc *runContainer16) *bitmapContainer {
|
||||
|
||||
if len(rc.iv) == 1 {
|
||||
return newBitmapContainerwithRange(int(rc.iv[0].start), int(rc.iv[0].last()))
|
||||
}
|
||||
|
||||
bc := newBitmapContainer()
|
||||
for i := range rc.iv {
|
||||
setBitmapRange(bc.bitmap, int(rc.iv[i].start), int(rc.iv[i].last())+1)
|
||||
bc.cardinality += int(rc.iv[i].last()) + 1 - int(rc.iv[i].start)
|
||||
}
|
||||
//bc.computeCardinality()
|
||||
return bc
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) containerType() contype {
|
||||
return bitmapContype
|
||||
}
|
415
vendor/github.com/RoaringBitmap/roaring/bitmapcontainer_gen.go
generated
vendored
Normal file
415
vendor/github.com/RoaringBitmap/roaring/bitmapcontainer_gen.go
generated
vendored
Normal file
@ -0,0 +1,415 @@
|
||||
package roaring
|
||||
|
||||
// NOTE: THIS FILE WAS PRODUCED BY THE
|
||||
// MSGP CODE GENERATION TOOL (github.com/tinylib/msgp)
|
||||
// DO NOT EDIT
|
||||
|
||||
import "github.com/tinylib/msgp/msgp"
|
||||
|
||||
// DecodeMsg implements msgp.Decodable
|
||||
func (z *bitmapContainer) DecodeMsg(dc *msgp.Reader) (err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
var zbzg uint32
|
||||
zbzg, err = dc.ReadMapHeader()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for zbzg > 0 {
|
||||
zbzg--
|
||||
field, err = dc.ReadMapKeyPtr()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
switch msgp.UnsafeString(field) {
|
||||
case "cardinality":
|
||||
z.cardinality, err = dc.ReadInt()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
case "bitmap":
|
||||
var zbai uint32
|
||||
zbai, err = dc.ReadArrayHeader()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if cap(z.bitmap) >= int(zbai) {
|
||||
z.bitmap = (z.bitmap)[:zbai]
|
||||
} else {
|
||||
z.bitmap = make([]uint64, zbai)
|
||||
}
|
||||
for zxvk := range z.bitmap {
|
||||
z.bitmap[zxvk], err = dc.ReadUint64()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
default:
|
||||
err = dc.Skip()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// EncodeMsg implements msgp.Encodable
|
||||
func (z *bitmapContainer) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
// map header, size 2
|
||||
// write "cardinality"
|
||||
err = en.Append(0x82, 0xab, 0x63, 0x61, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0x69, 0x74, 0x79)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = en.WriteInt(z.cardinality)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
// write "bitmap"
|
||||
err = en.Append(0xa6, 0x62, 0x69, 0x74, 0x6d, 0x61, 0x70)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = en.WriteArrayHeader(uint32(len(z.bitmap)))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for zxvk := range z.bitmap {
|
||||
err = en.WriteUint64(z.bitmap[zxvk])
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// MarshalMsg implements msgp.Marshaler
|
||||
func (z *bitmapContainer) MarshalMsg(b []byte) (o []byte, err error) {
|
||||
o = msgp.Require(b, z.Msgsize())
|
||||
// map header, size 2
|
||||
// string "cardinality"
|
||||
o = append(o, 0x82, 0xab, 0x63, 0x61, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0x69, 0x74, 0x79)
|
||||
o = msgp.AppendInt(o, z.cardinality)
|
||||
// string "bitmap"
|
||||
o = append(o, 0xa6, 0x62, 0x69, 0x74, 0x6d, 0x61, 0x70)
|
||||
o = msgp.AppendArrayHeader(o, uint32(len(z.bitmap)))
|
||||
for zxvk := range z.bitmap {
|
||||
o = msgp.AppendUint64(o, z.bitmap[zxvk])
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// UnmarshalMsg implements msgp.Unmarshaler
|
||||
func (z *bitmapContainer) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
var zcmr uint32
|
||||
zcmr, bts, err = msgp.ReadMapHeaderBytes(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for zcmr > 0 {
|
||||
zcmr--
|
||||
field, bts, err = msgp.ReadMapKeyZC(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
switch msgp.UnsafeString(field) {
|
||||
case "cardinality":
|
||||
z.cardinality, bts, err = msgp.ReadIntBytes(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
case "bitmap":
|
||||
var zajw uint32
|
||||
zajw, bts, err = msgp.ReadArrayHeaderBytes(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if cap(z.bitmap) >= int(zajw) {
|
||||
z.bitmap = (z.bitmap)[:zajw]
|
||||
} else {
|
||||
z.bitmap = make([]uint64, zajw)
|
||||
}
|
||||
for zxvk := range z.bitmap {
|
||||
z.bitmap[zxvk], bts, err = msgp.ReadUint64Bytes(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
default:
|
||||
bts, err = msgp.Skip(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
o = bts
|
||||
return
|
||||
}
|
||||
|
||||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
func (z *bitmapContainer) Msgsize() (s int) {
|
||||
s = 1 + 12 + msgp.IntSize + 7 + msgp.ArrayHeaderSize + (len(z.bitmap) * (msgp.Uint64Size))
|
||||
return
|
||||
}
|
||||
|
||||
// DecodeMsg implements msgp.Decodable
|
||||
func (z *bitmapContainerShortIterator) DecodeMsg(dc *msgp.Reader) (err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
var zhct uint32
|
||||
zhct, err = dc.ReadMapHeader()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for zhct > 0 {
|
||||
zhct--
|
||||
field, err = dc.ReadMapKeyPtr()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
switch msgp.UnsafeString(field) {
|
||||
case "ptr":
|
||||
if dc.IsNil() {
|
||||
err = dc.ReadNil()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
z.ptr = nil
|
||||
} else {
|
||||
if z.ptr == nil {
|
||||
z.ptr = new(bitmapContainer)
|
||||
}
|
||||
var zcua uint32
|
||||
zcua, err = dc.ReadMapHeader()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for zcua > 0 {
|
||||
zcua--
|
||||
field, err = dc.ReadMapKeyPtr()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
switch msgp.UnsafeString(field) {
|
||||
case "cardinality":
|
||||
z.ptr.cardinality, err = dc.ReadInt()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
case "bitmap":
|
||||
var zxhx uint32
|
||||
zxhx, err = dc.ReadArrayHeader()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if cap(z.ptr.bitmap) >= int(zxhx) {
|
||||
z.ptr.bitmap = (z.ptr.bitmap)[:zxhx]
|
||||
} else {
|
||||
z.ptr.bitmap = make([]uint64, zxhx)
|
||||
}
|
||||
for zwht := range z.ptr.bitmap {
|
||||
z.ptr.bitmap[zwht], err = dc.ReadUint64()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
default:
|
||||
err = dc.Skip()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
case "i":
|
||||
z.i, err = dc.ReadInt()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
default:
|
||||
err = dc.Skip()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// EncodeMsg implements msgp.Encodable
|
||||
func (z *bitmapContainerShortIterator) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
// map header, size 2
|
||||
// write "ptr"
|
||||
err = en.Append(0x82, 0xa3, 0x70, 0x74, 0x72)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if z.ptr == nil {
|
||||
err = en.WriteNil()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
} else {
|
||||
// map header, size 2
|
||||
// write "cardinality"
|
||||
err = en.Append(0x82, 0xab, 0x63, 0x61, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0x69, 0x74, 0x79)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = en.WriteInt(z.ptr.cardinality)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
// write "bitmap"
|
||||
err = en.Append(0xa6, 0x62, 0x69, 0x74, 0x6d, 0x61, 0x70)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = en.WriteArrayHeader(uint32(len(z.ptr.bitmap)))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for zwht := range z.ptr.bitmap {
|
||||
err = en.WriteUint64(z.ptr.bitmap[zwht])
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
// write "i"
|
||||
err = en.Append(0xa1, 0x69)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = en.WriteInt(z.i)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// MarshalMsg implements msgp.Marshaler
|
||||
func (z *bitmapContainerShortIterator) MarshalMsg(b []byte) (o []byte, err error) {
|
||||
o = msgp.Require(b, z.Msgsize())
|
||||
// map header, size 2
|
||||
// string "ptr"
|
||||
o = append(o, 0x82, 0xa3, 0x70, 0x74, 0x72)
|
||||
if z.ptr == nil {
|
||||
o = msgp.AppendNil(o)
|
||||
} else {
|
||||
// map header, size 2
|
||||
// string "cardinality"
|
||||
o = append(o, 0x82, 0xab, 0x63, 0x61, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0x69, 0x74, 0x79)
|
||||
o = msgp.AppendInt(o, z.ptr.cardinality)
|
||||
// string "bitmap"
|
||||
o = append(o, 0xa6, 0x62, 0x69, 0x74, 0x6d, 0x61, 0x70)
|
||||
o = msgp.AppendArrayHeader(o, uint32(len(z.ptr.bitmap)))
|
||||
for zwht := range z.ptr.bitmap {
|
||||
o = msgp.AppendUint64(o, z.ptr.bitmap[zwht])
|
||||
}
|
||||
}
|
||||
// string "i"
|
||||
o = append(o, 0xa1, 0x69)
|
||||
o = msgp.AppendInt(o, z.i)
|
||||
return
|
||||
}
|
||||
|
||||
// UnmarshalMsg implements msgp.Unmarshaler
|
||||
func (z *bitmapContainerShortIterator) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
var zlqf uint32
|
||||
zlqf, bts, err = msgp.ReadMapHeaderBytes(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for zlqf > 0 {
|
||||
zlqf--
|
||||
field, bts, err = msgp.ReadMapKeyZC(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
switch msgp.UnsafeString(field) {
|
||||
case "ptr":
|
||||
if msgp.IsNil(bts) {
|
||||
bts, err = msgp.ReadNilBytes(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
z.ptr = nil
|
||||
} else {
|
||||
if z.ptr == nil {
|
||||
z.ptr = new(bitmapContainer)
|
||||
}
|
||||
var zdaf uint32
|
||||
zdaf, bts, err = msgp.ReadMapHeaderBytes(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for zdaf > 0 {
|
||||
zdaf--
|
||||
field, bts, err = msgp.ReadMapKeyZC(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
switch msgp.UnsafeString(field) {
|
||||
case "cardinality":
|
||||
z.ptr.cardinality, bts, err = msgp.ReadIntBytes(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
case "bitmap":
|
||||
var zpks uint32
|
||||
zpks, bts, err = msgp.ReadArrayHeaderBytes(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if cap(z.ptr.bitmap) >= int(zpks) {
|
||||
z.ptr.bitmap = (z.ptr.bitmap)[:zpks]
|
||||
} else {
|
||||
z.ptr.bitmap = make([]uint64, zpks)
|
||||
}
|
||||
for zwht := range z.ptr.bitmap {
|
||||
z.ptr.bitmap[zwht], bts, err = msgp.ReadUint64Bytes(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
default:
|
||||
bts, err = msgp.Skip(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
case "i":
|
||||
z.i, bts, err = msgp.ReadIntBytes(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
default:
|
||||
bts, err = msgp.Skip(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
o = bts
|
||||
return
|
||||
}
|
||||
|
||||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
func (z *bitmapContainerShortIterator) Msgsize() (s int) {
|
||||
s = 1 + 4
|
||||
if z.ptr == nil {
|
||||
s += msgp.NilSize
|
||||
} else {
|
||||
s += 1 + 12 + msgp.IntSize + 7 + msgp.ArrayHeaderSize + (len(z.ptr.bitmap) * (msgp.Uint64Size))
|
||||
}
|
||||
s += 2 + msgp.IntSize
|
||||
return
|
||||
}
|
11
vendor/github.com/RoaringBitmap/roaring/ctz.go
generated
vendored
Normal file
11
vendor/github.com/RoaringBitmap/roaring/ctz.go
generated
vendored
Normal file
@ -0,0 +1,11 @@
|
||||
// +build go1.9
|
||||
// "go1.9", from Go version 1.9 onward
|
||||
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
|
||||
|
||||
package roaring
|
||||
|
||||
import "math/bits"
|
||||
|
||||
func countTrailingZeros(x uint64) int {
|
||||
return bits.TrailingZeros64(x)
|
||||
}
|
71
vendor/github.com/RoaringBitmap/roaring/ctz_compat.go
generated
vendored
Normal file
71
vendor/github.com/RoaringBitmap/roaring/ctz_compat.go
generated
vendored
Normal file
@ -0,0 +1,71 @@
|
||||
// +build !go1.9
|
||||
|
||||
package roaring
|
||||
|
||||
// Reuse of portions of go/src/math/big standard lib code
|
||||
// under this license:
|
||||
/*
|
||||
Copyright (c) 2009 The Go Authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
const deBruijn32 = 0x077CB531
|
||||
|
||||
var deBruijn32Lookup = []byte{
|
||||
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
|
||||
}
|
||||
|
||||
const deBruijn64 = 0x03f79d71b4ca8b09
|
||||
|
||||
var deBruijn64Lookup = []byte{
|
||||
0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
|
||||
62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
|
||||
63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
|
||||
54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
|
||||
}
|
||||
|
||||
// trailingZeroBits returns the number of consecutive least significant zero
|
||||
// bits of x.
|
||||
func countTrailingZeros(x uint64) int {
|
||||
// x & -x leaves only the right-most bit set in the word. Let k be the
|
||||
// index of that bit. Since only a single bit is set, the value is two
|
||||
// to the power of k. Multiplying by a power of two is equivalent to
|
||||
// left shifting, in this case by k bits. The de Bruijn constant is
|
||||
// such that all six bit, consecutive substrings are distinct.
|
||||
// Therefore, if we have a left shifted version of this constant we can
|
||||
// find by how many bits it was shifted by looking at which six bit
|
||||
// substring ended up at the top of the word.
|
||||
// (Knuth, volume 4, section 7.3.1)
|
||||
if x == 0 {
|
||||
// We have to special case 0; the fomula
|
||||
// below doesn't work for 0.
|
||||
return 64
|
||||
}
|
||||
return int(deBruijn64Lookup[((x&-x)*(deBruijn64))>>58])
|
||||
}
|
215
vendor/github.com/RoaringBitmap/roaring/fastaggregation.go
generated
vendored
Normal file
215
vendor/github.com/RoaringBitmap/roaring/fastaggregation.go
generated
vendored
Normal file
@ -0,0 +1,215 @@
|
||||
package roaring
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
)
|
||||
|
||||
// Or function that requires repairAfterLazy
|
||||
func lazyOR(x1, x2 *Bitmap) *Bitmap {
|
||||
answer := NewBitmap()
|
||||
pos1 := 0
|
||||
pos2 := 0
|
||||
length1 := x1.highlowcontainer.size()
|
||||
length2 := x2.highlowcontainer.size()
|
||||
main:
|
||||
for (pos1 < length1) && (pos2 < length2) {
|
||||
s1 := x1.highlowcontainer.getKeyAtIndex(pos1)
|
||||
s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
|
||||
|
||||
for {
|
||||
if s1 < s2 {
|
||||
answer.highlowcontainer.appendCopy(x1.highlowcontainer, pos1)
|
||||
pos1++
|
||||
if pos1 == length1 {
|
||||
break main
|
||||
}
|
||||
s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
|
||||
} else if s1 > s2 {
|
||||
answer.highlowcontainer.appendCopy(x2.highlowcontainer, pos2)
|
||||
pos2++
|
||||
if pos2 == length2 {
|
||||
break main
|
||||
}
|
||||
s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
|
||||
} else {
|
||||
c1 := x1.highlowcontainer.getContainerAtIndex(pos1)
|
||||
switch t := c1.(type) {
|
||||
case *arrayContainer:
|
||||
c1 = t.toBitmapContainer()
|
||||
case *runContainer16:
|
||||
if !t.isFull() {
|
||||
c1 = t.toBitmapContainer()
|
||||
}
|
||||
}
|
||||
|
||||
answer.highlowcontainer.appendContainer(s1, c1.lazyOR(x2.highlowcontainer.getContainerAtIndex(pos2)), false)
|
||||
pos1++
|
||||
pos2++
|
||||
if (pos1 == length1) || (pos2 == length2) {
|
||||
break main
|
||||
}
|
||||
s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
|
||||
s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
|
||||
}
|
||||
}
|
||||
}
|
||||
if pos1 == length1 {
|
||||
answer.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2)
|
||||
} else if pos2 == length2 {
|
||||
answer.highlowcontainer.appendCopyMany(x1.highlowcontainer, pos1, length1)
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
// In-place Or function that requires repairAfterLazy
|
||||
func (x1 *Bitmap) lazyOR(x2 *Bitmap) *Bitmap {
|
||||
pos1 := 0
|
||||
pos2 := 0
|
||||
length1 := x1.highlowcontainer.size()
|
||||
length2 := x2.highlowcontainer.size()
|
||||
main:
|
||||
for (pos1 < length1) && (pos2 < length2) {
|
||||
s1 := x1.highlowcontainer.getKeyAtIndex(pos1)
|
||||
s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
|
||||
|
||||
for {
|
||||
if s1 < s2 {
|
||||
pos1++
|
||||
if pos1 == length1 {
|
||||
break main
|
||||
}
|
||||
s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
|
||||
} else if s1 > s2 {
|
||||
x1.highlowcontainer.insertNewKeyValueAt(pos1, s2, x2.highlowcontainer.getContainerAtIndex(pos2).clone())
|
||||
pos2++
|
||||
pos1++
|
||||
length1++
|
||||
if pos2 == length2 {
|
||||
break main
|
||||
}
|
||||
s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
|
||||
} else {
|
||||
c1 := x1.highlowcontainer.getContainerAtIndex(pos1)
|
||||
switch t := c1.(type) {
|
||||
case *arrayContainer:
|
||||
c1 = t.toBitmapContainer()
|
||||
case *runContainer16:
|
||||
if !t.isFull() {
|
||||
c1 = t.toBitmapContainer()
|
||||
}
|
||||
case *bitmapContainer:
|
||||
c1 = x1.highlowcontainer.getWritableContainerAtIndex(pos1)
|
||||
}
|
||||
|
||||
x1.highlowcontainer.containers[pos1] = c1.lazyIOR(x2.highlowcontainer.getContainerAtIndex(pos2))
|
||||
x1.highlowcontainer.needCopyOnWrite[pos1] = false
|
||||
pos1++
|
||||
pos2++
|
||||
if (pos1 == length1) || (pos2 == length2) {
|
||||
break main
|
||||
}
|
||||
s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
|
||||
s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
|
||||
}
|
||||
}
|
||||
}
|
||||
if pos1 == length1 {
|
||||
x1.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2)
|
||||
}
|
||||
return x1
|
||||
}
|
||||
|
||||
// to be called after lazy aggregates
|
||||
func (x1 *Bitmap) repairAfterLazy() {
|
||||
for pos := 0; pos < x1.highlowcontainer.size(); pos++ {
|
||||
c := x1.highlowcontainer.getContainerAtIndex(pos)
|
||||
switch c.(type) {
|
||||
case *bitmapContainer:
|
||||
if c.(*bitmapContainer).cardinality == invalidCardinality {
|
||||
c = x1.highlowcontainer.getWritableContainerAtIndex(pos)
|
||||
c.(*bitmapContainer).computeCardinality()
|
||||
if c.(*bitmapContainer).getCardinality() <= arrayDefaultMaxSize {
|
||||
x1.highlowcontainer.setContainerAtIndex(pos, c.(*bitmapContainer).toArrayContainer())
|
||||
} else if c.(*bitmapContainer).isFull() {
|
||||
x1.highlowcontainer.setContainerAtIndex(pos, newRunContainer16Range(0, MaxUint16))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// FastAnd computes the intersection between many bitmaps quickly
|
||||
// Compared to the And function, it can take many bitmaps as input, thus saving the trouble
|
||||
// of manually calling "And" many times.
|
||||
func FastAnd(bitmaps ...*Bitmap) *Bitmap {
|
||||
if len(bitmaps) == 0 {
|
||||
return NewBitmap()
|
||||
} else if len(bitmaps) == 1 {
|
||||
return bitmaps[0].Clone()
|
||||
}
|
||||
answer := And(bitmaps[0], bitmaps[1])
|
||||
for _, bm := range bitmaps[2:] {
|
||||
answer.And(bm)
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
// FastOr computes the union between many bitmaps quickly, as opposed to having to call Or repeatedly.
|
||||
// It might also be faster than calling Or repeatedly.
|
||||
func FastOr(bitmaps ...*Bitmap) *Bitmap {
|
||||
if len(bitmaps) == 0 {
|
||||
return NewBitmap()
|
||||
} else if len(bitmaps) == 1 {
|
||||
return bitmaps[0].Clone()
|
||||
}
|
||||
answer := lazyOR(bitmaps[0], bitmaps[1])
|
||||
for _, bm := range bitmaps[2:] {
|
||||
answer = answer.lazyOR(bm)
|
||||
}
|
||||
// here is where repairAfterLazy is called.
|
||||
answer.repairAfterLazy()
|
||||
return answer
|
||||
}
|
||||
|
||||
// HeapOr computes the union between many bitmaps quickly using a heap.
|
||||
// It might be faster than calling Or repeatedly.
|
||||
func HeapOr(bitmaps ...*Bitmap) *Bitmap {
|
||||
if len(bitmaps) == 0 {
|
||||
return NewBitmap()
|
||||
}
|
||||
// TODO: for better speed, we could do the operation lazily, see Java implementation
|
||||
pq := make(priorityQueue, len(bitmaps))
|
||||
for i, bm := range bitmaps {
|
||||
pq[i] = &item{bm, i}
|
||||
}
|
||||
heap.Init(&pq)
|
||||
|
||||
for pq.Len() > 1 {
|
||||
x1 := heap.Pop(&pq).(*item)
|
||||
x2 := heap.Pop(&pq).(*item)
|
||||
heap.Push(&pq, &item{Or(x1.value, x2.value), 0})
|
||||
}
|
||||
return heap.Pop(&pq).(*item).value
|
||||
}
|
||||
|
||||
// HeapXor computes the symmetric difference between many bitmaps quickly (as opposed to calling Xor repeated).
|
||||
// Internally, this function uses a heap.
|
||||
// It might be faster than calling Xor repeatedly.
|
||||
func HeapXor(bitmaps ...*Bitmap) *Bitmap {
|
||||
if len(bitmaps) == 0 {
|
||||
return NewBitmap()
|
||||
}
|
||||
|
||||
pq := make(priorityQueue, len(bitmaps))
|
||||
for i, bm := range bitmaps {
|
||||
pq[i] = &item{bm, i}
|
||||
}
|
||||
heap.Init(&pq)
|
||||
|
||||
for pq.Len() > 1 {
|
||||
x1 := heap.Pop(&pq).(*item)
|
||||
x2 := heap.Pop(&pq).(*item)
|
||||
heap.Push(&pq, &item{Xor(x1.value, x2.value), 0})
|
||||
}
|
||||
return heap.Pop(&pq).(*item).value
|
||||
}
|
23
vendor/github.com/RoaringBitmap/roaring/manyiterator.go
generated
vendored
Normal file
23
vendor/github.com/RoaringBitmap/roaring/manyiterator.go
generated
vendored
Normal file
@ -0,0 +1,23 @@
|
||||
package roaring
|
||||
|
||||
type manyIterable interface {
|
||||
nextMany(hs uint32, buf []uint32) int
|
||||
}
|
||||
|
||||
type manyIterator struct {
|
||||
slice []uint16
|
||||
loc int
|
||||
}
|
||||
|
||||
func (si *manyIterator) nextMany(hs uint32, buf []uint32) int {
|
||||
n := 0
|
||||
l := si.loc
|
||||
s := si.slice
|
||||
for n < len(buf) && l < len(s) {
|
||||
buf[n] = uint32(s[l]) | hs
|
||||
l++
|
||||
n++
|
||||
}
|
||||
si.loc = l
|
||||
return n
|
||||
}
|
613
vendor/github.com/RoaringBitmap/roaring/parallel.go
generated
vendored
Normal file
613
vendor/github.com/RoaringBitmap/roaring/parallel.go
generated
vendored
Normal file
@ -0,0 +1,613 @@
|
||||
package roaring
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
"fmt"
|
||||
"runtime"
|
||||
"sync"
|
||||
)
|
||||
|
||||
var defaultWorkerCount = runtime.NumCPU()
|
||||
|
||||
type bitmapContainerKey struct {
|
||||
key uint16
|
||||
idx int
|
||||
bitmap *Bitmap
|
||||
}
|
||||
|
||||
type multipleContainers struct {
|
||||
key uint16
|
||||
containers []container
|
||||
idx int
|
||||
}
|
||||
|
||||
type keyedContainer struct {
|
||||
key uint16
|
||||
container container
|
||||
idx int
|
||||
}
|
||||
|
||||
type bitmapContainerHeap []bitmapContainerKey
|
||||
|
||||
func (h bitmapContainerHeap) Len() int { return len(h) }
|
||||
func (h bitmapContainerHeap) Less(i, j int) bool { return h[i].key < h[j].key }
|
||||
func (h bitmapContainerHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
|
||||
|
||||
func (h *bitmapContainerHeap) Push(x interface{}) {
|
||||
// Push and Pop use pointer receivers because they modify the slice's length,
|
||||
// not just its contents.
|
||||
*h = append(*h, x.(bitmapContainerKey))
|
||||
}
|
||||
|
||||
func (h *bitmapContainerHeap) Pop() interface{} {
|
||||
old := *h
|
||||
n := len(old)
|
||||
x := old[n-1]
|
||||
*h = old[0 : n-1]
|
||||
return x
|
||||
}
|
||||
|
||||
func (h bitmapContainerHeap) Peek() bitmapContainerKey {
|
||||
return h[0]
|
||||
}
|
||||
|
||||
func (h *bitmapContainerHeap) popIncrementing() (key uint16, container container) {
|
||||
k := h.Peek()
|
||||
key = k.key
|
||||
container = k.bitmap.highlowcontainer.containers[k.idx]
|
||||
|
||||
newIdx := k.idx + 1
|
||||
if newIdx < k.bitmap.highlowcontainer.size() {
|
||||
k = bitmapContainerKey{
|
||||
k.bitmap.highlowcontainer.keys[newIdx],
|
||||
newIdx,
|
||||
k.bitmap,
|
||||
}
|
||||
(*h)[0] = k
|
||||
heap.Fix(h, 0)
|
||||
} else {
|
||||
heap.Pop(h)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (h *bitmapContainerHeap) Next(containers []container) multipleContainers {
|
||||
if h.Len() == 0 {
|
||||
return multipleContainers{}
|
||||
}
|
||||
|
||||
key, container := h.popIncrementing()
|
||||
containers = append(containers, container)
|
||||
|
||||
for h.Len() > 0 && key == h.Peek().key {
|
||||
_, container = h.popIncrementing()
|
||||
containers = append(containers, container)
|
||||
}
|
||||
|
||||
return multipleContainers{
|
||||
key,
|
||||
containers,
|
||||
-1,
|
||||
}
|
||||
}
|
||||
|
||||
func newBitmapContainerHeap(bitmaps ...*Bitmap) bitmapContainerHeap {
|
||||
// Initialize heap
|
||||
var h bitmapContainerHeap = make([]bitmapContainerKey, 0, len(bitmaps))
|
||||
for _, bitmap := range bitmaps {
|
||||
if !bitmap.IsEmpty() {
|
||||
key := bitmapContainerKey{
|
||||
bitmap.highlowcontainer.keys[0],
|
||||
0,
|
||||
bitmap,
|
||||
}
|
||||
h = append(h, key)
|
||||
}
|
||||
}
|
||||
|
||||
heap.Init(&h)
|
||||
|
||||
return h
|
||||
}
|
||||
|
||||
func repairAfterLazy(c container) container {
|
||||
switch t := c.(type) {
|
||||
case *bitmapContainer:
|
||||
if t.cardinality == invalidCardinality {
|
||||
t.computeCardinality()
|
||||
}
|
||||
|
||||
if t.getCardinality() <= arrayDefaultMaxSize {
|
||||
return t.toArrayContainer()
|
||||
} else if c.(*bitmapContainer).isFull() {
|
||||
return newRunContainer16Range(0, MaxUint16)
|
||||
}
|
||||
}
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
func toBitmapContainer(c container) container {
|
||||
switch t := c.(type) {
|
||||
case *arrayContainer:
|
||||
return t.toBitmapContainer()
|
||||
case *runContainer16:
|
||||
if !t.isFull() {
|
||||
return t.toBitmapContainer()
|
||||
}
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
func appenderRoutine(bitmapChan chan<- *Bitmap, resultChan <-chan keyedContainer, expectedKeysChan <-chan int) {
|
||||
expectedKeys := -1
|
||||
appendedKeys := 0
|
||||
keys := make([]uint16, 0)
|
||||
containers := make([]container, 0)
|
||||
for appendedKeys != expectedKeys {
|
||||
select {
|
||||
case item := <-resultChan:
|
||||
if len(keys) <= item.idx {
|
||||
keys = append(keys, make([]uint16, item.idx-len(keys)+1)...)
|
||||
containers = append(containers, make([]container, item.idx-len(containers)+1)...)
|
||||
}
|
||||
keys[item.idx] = item.key
|
||||
containers[item.idx] = item.container
|
||||
|
||||
appendedKeys++
|
||||
case msg := <-expectedKeysChan:
|
||||
expectedKeys = msg
|
||||
}
|
||||
}
|
||||
answer := &Bitmap{
|
||||
roaringArray{
|
||||
make([]uint16, 0, expectedKeys),
|
||||
make([]container, 0, expectedKeys),
|
||||
make([]bool, 0, expectedKeys),
|
||||
false,
|
||||
nil,
|
||||
},
|
||||
}
|
||||
for i := range keys {
|
||||
if containers[i] != nil { // in case a resulting container was empty, see ParAnd function
|
||||
answer.highlowcontainer.appendContainer(keys[i], containers[i], false)
|
||||
}
|
||||
}
|
||||
|
||||
bitmapChan <- answer
|
||||
}
|
||||
|
||||
// ParHeapOr computes the union (OR) of all provided bitmaps in parallel,
|
||||
// where the parameter "parallelism" determines how many workers are to be used
|
||||
// (if it is set to 0, a default number of workers is chosen)
|
||||
// ParHeapOr uses a heap to compute the union. For rare cases it might be faster than ParOr
|
||||
func ParHeapOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
|
||||
|
||||
bitmapCount := len(bitmaps)
|
||||
if bitmapCount == 0 {
|
||||
return NewBitmap()
|
||||
} else if bitmapCount == 1 {
|
||||
return bitmaps[0].Clone()
|
||||
}
|
||||
|
||||
if parallelism == 0 {
|
||||
parallelism = defaultWorkerCount
|
||||
}
|
||||
|
||||
h := newBitmapContainerHeap(bitmaps...)
|
||||
|
||||
bitmapChan := make(chan *Bitmap)
|
||||
inputChan := make(chan multipleContainers, 128)
|
||||
resultChan := make(chan keyedContainer, 32)
|
||||
expectedKeysChan := make(chan int)
|
||||
|
||||
pool := sync.Pool{
|
||||
New: func() interface{} {
|
||||
return make([]container, 0, len(bitmaps))
|
||||
},
|
||||
}
|
||||
|
||||
orFunc := func() {
|
||||
// Assumes only structs with >=2 containers are passed
|
||||
for input := range inputChan {
|
||||
c := toBitmapContainer(input.containers[0]).lazyOR(input.containers[1])
|
||||
for _, next := range input.containers[2:] {
|
||||
c = c.lazyIOR(next)
|
||||
}
|
||||
c = repairAfterLazy(c)
|
||||
kx := keyedContainer{
|
||||
input.key,
|
||||
c,
|
||||
input.idx,
|
||||
}
|
||||
resultChan <- kx
|
||||
pool.Put(input.containers[:0])
|
||||
}
|
||||
}
|
||||
|
||||
go appenderRoutine(bitmapChan, resultChan, expectedKeysChan)
|
||||
|
||||
for i := 0; i < parallelism; i++ {
|
||||
go orFunc()
|
||||
}
|
||||
|
||||
idx := 0
|
||||
for h.Len() > 0 {
|
||||
ck := h.Next(pool.Get().([]container))
|
||||
if len(ck.containers) == 1 {
|
||||
resultChan <- keyedContainer{
|
||||
ck.key,
|
||||
ck.containers[0],
|
||||
idx,
|
||||
}
|
||||
pool.Put(ck.containers[:0])
|
||||
} else {
|
||||
ck.idx = idx
|
||||
inputChan <- ck
|
||||
}
|
||||
idx++
|
||||
}
|
||||
expectedKeysChan <- idx
|
||||
|
||||
bitmap := <-bitmapChan
|
||||
|
||||
close(inputChan)
|
||||
close(resultChan)
|
||||
close(expectedKeysChan)
|
||||
|
||||
return bitmap
|
||||
}
|
||||
|
||||
// ParAnd computes the intersection (AND) of all provided bitmaps in parallel,
|
||||
// where the parameter "parallelism" determines how many workers are to be used
|
||||
// (if it is set to 0, a default number of workers is chosen)
|
||||
func ParAnd(parallelism int, bitmaps ...*Bitmap) *Bitmap {
|
||||
bitmapCount := len(bitmaps)
|
||||
if bitmapCount == 0 {
|
||||
return NewBitmap()
|
||||
} else if bitmapCount == 1 {
|
||||
return bitmaps[0].Clone()
|
||||
}
|
||||
|
||||
if parallelism == 0 {
|
||||
parallelism = defaultWorkerCount
|
||||
}
|
||||
|
||||
h := newBitmapContainerHeap(bitmaps...)
|
||||
|
||||
bitmapChan := make(chan *Bitmap)
|
||||
inputChan := make(chan multipleContainers, 128)
|
||||
resultChan := make(chan keyedContainer, 32)
|
||||
expectedKeysChan := make(chan int)
|
||||
|
||||
andFunc := func() {
|
||||
// Assumes only structs with >=2 containers are passed
|
||||
for input := range inputChan {
|
||||
c := input.containers[0].and(input.containers[1])
|
||||
for _, next := range input.containers[2:] {
|
||||
if c.getCardinality() == 0 {
|
||||
break
|
||||
}
|
||||
c = c.iand(next)
|
||||
}
|
||||
|
||||
// Send a nil explicitly if the result of the intersection is an empty container
|
||||
if c.getCardinality() == 0 {
|
||||
c = nil
|
||||
}
|
||||
|
||||
kx := keyedContainer{
|
||||
input.key,
|
||||
c,
|
||||
input.idx,
|
||||
}
|
||||
resultChan <- kx
|
||||
}
|
||||
}
|
||||
|
||||
go appenderRoutine(bitmapChan, resultChan, expectedKeysChan)
|
||||
|
||||
for i := 0; i < parallelism; i++ {
|
||||
go andFunc()
|
||||
}
|
||||
|
||||
idx := 0
|
||||
for h.Len() > 0 {
|
||||
ck := h.Next(make([]container, 0, 4))
|
||||
if len(ck.containers) == bitmapCount {
|
||||
ck.idx = idx
|
||||
inputChan <- ck
|
||||
idx++
|
||||
}
|
||||
}
|
||||
expectedKeysChan <- idx
|
||||
|
||||
bitmap := <-bitmapChan
|
||||
|
||||
close(inputChan)
|
||||
close(resultChan)
|
||||
close(expectedKeysChan)
|
||||
|
||||
return bitmap
|
||||
}
|
||||
|
||||
// ParOr computes the union (OR) of all provided bitmaps in parallel,
|
||||
// where the parameter "parallelism" determines how many workers are to be used
|
||||
// (if it is set to 0, a default number of workers is chosen)
|
||||
func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
|
||||
var lKey uint16 = MaxUint16
|
||||
var hKey uint16 = 0
|
||||
|
||||
bitmapsFiltered := bitmaps[:0]
|
||||
for _, b := range bitmaps {
|
||||
if !b.IsEmpty() {
|
||||
bitmapsFiltered = append(bitmapsFiltered, b)
|
||||
}
|
||||
}
|
||||
bitmaps = bitmapsFiltered
|
||||
|
||||
for _, b := range bitmaps {
|
||||
lKey = minOfUint16(lKey, b.highlowcontainer.keys[0])
|
||||
hKey = maxOfUint16(hKey, b.highlowcontainer.keys[b.highlowcontainer.size()-1])
|
||||
}
|
||||
|
||||
if lKey == MaxUint16 && hKey == 0 {
|
||||
return New()
|
||||
} else if len(bitmaps) == 1 {
|
||||
return bitmaps[0]
|
||||
}
|
||||
|
||||
keyRange := hKey - lKey + 1
|
||||
if keyRange == 1 {
|
||||
// revert to FastOr. Since the key range is 0
|
||||
// no container-level aggregation parallelism is achievable
|
||||
return FastOr(bitmaps...)
|
||||
}
|
||||
|
||||
if parallelism == 0 {
|
||||
parallelism = defaultWorkerCount
|
||||
}
|
||||
|
||||
var chunkSize int
|
||||
var chunkCount int
|
||||
if parallelism*4 > int(keyRange) {
|
||||
chunkSize = 1
|
||||
chunkCount = int(keyRange)
|
||||
} else {
|
||||
chunkCount = parallelism * 4
|
||||
chunkSize = (int(keyRange) + chunkCount - 1) / chunkCount
|
||||
}
|
||||
|
||||
if chunkCount*chunkSize < int(keyRange) {
|
||||
// it's fine to panic to indicate an implementation error
|
||||
panic(fmt.Sprintf("invariant check failed: chunkCount * chunkSize < keyRange, %d * %d < %d", chunkCount, chunkSize, keyRange))
|
||||
}
|
||||
|
||||
chunks := make([]*roaringArray, chunkCount)
|
||||
|
||||
chunkSpecChan := make(chan parChunkSpec, minOfInt(maxOfInt(64, 2*parallelism), int(chunkCount)))
|
||||
chunkChan := make(chan parChunk, minOfInt(32, int(chunkCount)))
|
||||
|
||||
orFunc := func() {
|
||||
for spec := range chunkSpecChan {
|
||||
ra := lazyOrOnRange(&bitmaps[0].highlowcontainer, &bitmaps[1].highlowcontainer, spec.start, spec.end)
|
||||
for _, b := range bitmaps[2:] {
|
||||
ra = lazyIOrOnRange(ra, &b.highlowcontainer, spec.start, spec.end)
|
||||
}
|
||||
|
||||
for i, c := range ra.containers {
|
||||
ra.containers[i] = repairAfterLazy(c)
|
||||
}
|
||||
|
||||
chunkChan <- parChunk{ra, spec.idx}
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < parallelism; i++ {
|
||||
go orFunc()
|
||||
}
|
||||
|
||||
go func() {
|
||||
for i := 0; i < chunkCount; i++ {
|
||||
spec := parChunkSpec{
|
||||
start: uint16(int(lKey) + i*chunkSize),
|
||||
end: uint16(minOfInt(int(lKey)+(i+1)*chunkSize-1, int(hKey))),
|
||||
idx: int(i),
|
||||
}
|
||||
chunkSpecChan <- spec
|
||||
}
|
||||
}()
|
||||
|
||||
chunksRemaining := chunkCount
|
||||
for chunk := range chunkChan {
|
||||
chunks[chunk.idx] = chunk.ra
|
||||
chunksRemaining--
|
||||
if chunksRemaining == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
close(chunkChan)
|
||||
close(chunkSpecChan)
|
||||
|
||||
containerCount := 0
|
||||
for _, chunk := range chunks {
|
||||
containerCount += chunk.size()
|
||||
}
|
||||
|
||||
result := Bitmap{
|
||||
roaringArray{
|
||||
containers: make([]container, containerCount),
|
||||
keys: make([]uint16, containerCount),
|
||||
needCopyOnWrite: make([]bool, containerCount),
|
||||
},
|
||||
}
|
||||
|
||||
resultOffset := 0
|
||||
for _, chunk := range chunks {
|
||||
copy(result.highlowcontainer.containers[resultOffset:], chunk.containers)
|
||||
copy(result.highlowcontainer.keys[resultOffset:], chunk.keys)
|
||||
copy(result.highlowcontainer.needCopyOnWrite[resultOffset:], chunk.needCopyOnWrite)
|
||||
resultOffset += chunk.size()
|
||||
}
|
||||
|
||||
return &result
|
||||
}
|
||||
|
||||
type parChunkSpec struct {
|
||||
start uint16
|
||||
end uint16
|
||||
idx int
|
||||
}
|
||||
|
||||
type parChunk struct {
|
||||
ra *roaringArray
|
||||
idx int
|
||||
}
|
||||
|
||||
func (c parChunk) size() int {
|
||||
return c.ra.size()
|
||||
}
|
||||
|
||||
func parNaiveStartAt(ra *roaringArray, start uint16, last uint16) int {
|
||||
for idx, key := range ra.keys {
|
||||
if key >= start && key <= last {
|
||||
return idx
|
||||
} else if key > last {
|
||||
break
|
||||
}
|
||||
}
|
||||
return ra.size()
|
||||
}
|
||||
|
||||
func lazyOrOnRange(ra1, ra2 *roaringArray, start, last uint16) *roaringArray {
|
||||
answer := newRoaringArray()
|
||||
length1 := ra1.size()
|
||||
length2 := ra2.size()
|
||||
|
||||
idx1 := parNaiveStartAt(ra1, start, last)
|
||||
idx2 := parNaiveStartAt(ra2, start, last)
|
||||
|
||||
var key1 uint16
|
||||
var key2 uint16
|
||||
if idx1 < length1 && idx2 < length2 {
|
||||
key1 = ra1.getKeyAtIndex(idx1)
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
|
||||
for key1 <= last && key2 <= last {
|
||||
|
||||
if key1 < key2 {
|
||||
answer.appendCopy(*ra1, idx1)
|
||||
idx1++
|
||||
if idx1 == length1 {
|
||||
break
|
||||
}
|
||||
key1 = ra1.getKeyAtIndex(idx1)
|
||||
} else if key1 > key2 {
|
||||
answer.appendCopy(*ra2, idx2)
|
||||
idx2++
|
||||
if idx2 == length2 {
|
||||
break
|
||||
}
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
} else {
|
||||
c1 := ra1.getFastContainerAtIndex(idx1, false)
|
||||
|
||||
answer.appendContainer(key1, c1.lazyOR(ra2.getContainerAtIndex(idx2)), false)
|
||||
idx1++
|
||||
idx2++
|
||||
if idx1 == length1 || idx2 == length2 {
|
||||
break
|
||||
}
|
||||
|
||||
key1 = ra1.getKeyAtIndex(idx1)
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if idx2 < length2 {
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
for key2 <= last {
|
||||
answer.appendCopy(*ra2, idx2)
|
||||
idx2++
|
||||
if idx2 == length2 {
|
||||
break
|
||||
}
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
}
|
||||
}
|
||||
|
||||
if idx1 < length1 {
|
||||
key1 = ra1.getKeyAtIndex(idx1)
|
||||
for key1 <= last {
|
||||
answer.appendCopy(*ra1, idx1)
|
||||
idx1++
|
||||
if idx1 == length1 {
|
||||
break
|
||||
}
|
||||
key1 = ra1.getKeyAtIndex(idx1)
|
||||
}
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
func lazyIOrOnRange(ra1, ra2 *roaringArray, start, last uint16) *roaringArray {
|
||||
length1 := ra1.size()
|
||||
length2 := ra2.size()
|
||||
|
||||
idx1 := 0
|
||||
idx2 := parNaiveStartAt(ra2, start, last)
|
||||
|
||||
var key1 uint16
|
||||
var key2 uint16
|
||||
if idx1 < length1 && idx2 < length2 {
|
||||
key1 = ra1.getKeyAtIndex(idx1)
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
|
||||
for key1 <= last && key2 <= last {
|
||||
if key1 < key2 {
|
||||
idx1++
|
||||
if idx1 >= length1 {
|
||||
break
|
||||
}
|
||||
key1 = ra1.getKeyAtIndex(idx1)
|
||||
} else if key1 > key2 {
|
||||
ra1.insertNewKeyValueAt(idx1, key2, ra2.getContainerAtIndex(idx2))
|
||||
ra1.needCopyOnWrite[idx1] = true
|
||||
idx2++
|
||||
idx1++
|
||||
length1++
|
||||
if idx2 >= length2 {
|
||||
break
|
||||
}
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
} else {
|
||||
c1 := ra1.getFastContainerAtIndex(idx1, true)
|
||||
|
||||
ra1.containers[idx1] = c1.lazyIOR(ra2.getContainerAtIndex(idx2))
|
||||
ra1.needCopyOnWrite[idx1] = false
|
||||
idx1++
|
||||
idx2++
|
||||
if idx1 >= length1 || idx2 >= length2 {
|
||||
break
|
||||
}
|
||||
|
||||
key1 = ra1.getKeyAtIndex(idx1)
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
}
|
||||
}
|
||||
}
|
||||
if idx2 < length2 {
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
for key2 <= last {
|
||||
ra1.appendCopy(*ra2, idx2)
|
||||
idx2++
|
||||
if idx2 >= length2 {
|
||||
break
|
||||
}
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
}
|
||||
}
|
||||
return ra1
|
||||
}
|
11
vendor/github.com/RoaringBitmap/roaring/popcnt.go
generated
vendored
Normal file
11
vendor/github.com/RoaringBitmap/roaring/popcnt.go
generated
vendored
Normal file
@ -0,0 +1,11 @@
|
||||
// +build go1.9
|
||||
// "go1.9", from Go version 1.9 onward
|
||||
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
|
||||
|
||||
package roaring
|
||||
|
||||
import "math/bits"
|
||||
|
||||
func popcount(x uint64) uint64 {
|
||||
return uint64(bits.OnesCount64(x))
|
||||
}
|
103
vendor/github.com/RoaringBitmap/roaring/popcnt_amd64.s
generated
vendored
Normal file
103
vendor/github.com/RoaringBitmap/roaring/popcnt_amd64.s
generated
vendored
Normal file
@ -0,0 +1,103 @@
|
||||
// +build amd64,!appengine,!go1.9
|
||||
|
||||
TEXT ·hasAsm(SB),4,$0-1
|
||||
MOVQ $1, AX
|
||||
CPUID
|
||||
SHRQ $23, CX
|
||||
ANDQ $1, CX
|
||||
MOVB CX, ret+0(FP)
|
||||
RET
|
||||
|
||||
#define POPCNTQ_DX_DX BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0xd2
|
||||
|
||||
TEXT ·popcntSliceAsm(SB),4,$0-32
|
||||
XORQ AX, AX
|
||||
MOVQ s+0(FP), SI
|
||||
MOVQ s_len+8(FP), CX
|
||||
TESTQ CX, CX
|
||||
JZ popcntSliceEnd
|
||||
popcntSliceLoop:
|
||||
BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0x16 // POPCNTQ (SI), DX
|
||||
ADDQ DX, AX
|
||||
ADDQ $8, SI
|
||||
LOOP popcntSliceLoop
|
||||
popcntSliceEnd:
|
||||
MOVQ AX, ret+24(FP)
|
||||
RET
|
||||
|
||||
TEXT ·popcntMaskSliceAsm(SB),4,$0-56
|
||||
XORQ AX, AX
|
||||
MOVQ s+0(FP), SI
|
||||
MOVQ s_len+8(FP), CX
|
||||
TESTQ CX, CX
|
||||
JZ popcntMaskSliceEnd
|
||||
MOVQ m+24(FP), DI
|
||||
popcntMaskSliceLoop:
|
||||
MOVQ (DI), DX
|
||||
NOTQ DX
|
||||
ANDQ (SI), DX
|
||||
POPCNTQ_DX_DX
|
||||
ADDQ DX, AX
|
||||
ADDQ $8, SI
|
||||
ADDQ $8, DI
|
||||
LOOP popcntMaskSliceLoop
|
||||
popcntMaskSliceEnd:
|
||||
MOVQ AX, ret+48(FP)
|
||||
RET
|
||||
|
||||
TEXT ·popcntAndSliceAsm(SB),4,$0-56
|
||||
XORQ AX, AX
|
||||
MOVQ s+0(FP), SI
|
||||
MOVQ s_len+8(FP), CX
|
||||
TESTQ CX, CX
|
||||
JZ popcntAndSliceEnd
|
||||
MOVQ m+24(FP), DI
|
||||
popcntAndSliceLoop:
|
||||
MOVQ (DI), DX
|
||||
ANDQ (SI), DX
|
||||
POPCNTQ_DX_DX
|
||||
ADDQ DX, AX
|
||||
ADDQ $8, SI
|
||||
ADDQ $8, DI
|
||||
LOOP popcntAndSliceLoop
|
||||
popcntAndSliceEnd:
|
||||
MOVQ AX, ret+48(FP)
|
||||
RET
|
||||
|
||||
TEXT ·popcntOrSliceAsm(SB),4,$0-56
|
||||
XORQ AX, AX
|
||||
MOVQ s+0(FP), SI
|
||||
MOVQ s_len+8(FP), CX
|
||||
TESTQ CX, CX
|
||||
JZ popcntOrSliceEnd
|
||||
MOVQ m+24(FP), DI
|
||||
popcntOrSliceLoop:
|
||||
MOVQ (DI), DX
|
||||
ORQ (SI), DX
|
||||
POPCNTQ_DX_DX
|
||||
ADDQ DX, AX
|
||||
ADDQ $8, SI
|
||||
ADDQ $8, DI
|
||||
LOOP popcntOrSliceLoop
|
||||
popcntOrSliceEnd:
|
||||
MOVQ AX, ret+48(FP)
|
||||
RET
|
||||
|
||||
TEXT ·popcntXorSliceAsm(SB),4,$0-56
|
||||
XORQ AX, AX
|
||||
MOVQ s+0(FP), SI
|
||||
MOVQ s_len+8(FP), CX
|
||||
TESTQ CX, CX
|
||||
JZ popcntXorSliceEnd
|
||||
MOVQ m+24(FP), DI
|
||||
popcntXorSliceLoop:
|
||||
MOVQ (DI), DX
|
||||
XORQ (SI), DX
|
||||
POPCNTQ_DX_DX
|
||||
ADDQ DX, AX
|
||||
ADDQ $8, SI
|
||||
ADDQ $8, DI
|
||||
LOOP popcntXorSliceLoop
|
||||
popcntXorSliceEnd:
|
||||
MOVQ AX, ret+48(FP)
|
||||
RET
|
67
vendor/github.com/RoaringBitmap/roaring/popcnt_asm.go
generated
vendored
Normal file
67
vendor/github.com/RoaringBitmap/roaring/popcnt_asm.go
generated
vendored
Normal file
@ -0,0 +1,67 @@
|
||||
// +build amd64,!appengine,!go1.9
|
||||
|
||||
package roaring
|
||||
|
||||
// *** the following functions are defined in popcnt_amd64.s
|
||||
|
||||
//go:noescape
|
||||
|
||||
func hasAsm() bool
|
||||
|
||||
// useAsm is a flag used to select the GO or ASM implementation of the popcnt function
|
||||
var useAsm = hasAsm()
|
||||
|
||||
//go:noescape
|
||||
|
||||
func popcntSliceAsm(s []uint64) uint64
|
||||
|
||||
//go:noescape
|
||||
|
||||
func popcntMaskSliceAsm(s, m []uint64) uint64
|
||||
|
||||
//go:noescape
|
||||
|
||||
func popcntAndSliceAsm(s, m []uint64) uint64
|
||||
|
||||
//go:noescape
|
||||
|
||||
func popcntOrSliceAsm(s, m []uint64) uint64
|
||||
|
||||
//go:noescape
|
||||
|
||||
func popcntXorSliceAsm(s, m []uint64) uint64
|
||||
|
||||
func popcntSlice(s []uint64) uint64 {
|
||||
if useAsm {
|
||||
return popcntSliceAsm(s)
|
||||
}
|
||||
return popcntSliceGo(s)
|
||||
}
|
||||
|
||||
func popcntMaskSlice(s, m []uint64) uint64 {
|
||||
if useAsm {
|
||||
return popcntMaskSliceAsm(s, m)
|
||||
}
|
||||
return popcntMaskSliceGo(s, m)
|
||||
}
|
||||
|
||||
func popcntAndSlice(s, m []uint64) uint64 {
|
||||
if useAsm {
|
||||
return popcntAndSliceAsm(s, m)
|
||||
}
|
||||
return popcntAndSliceGo(s, m)
|
||||
}
|
||||
|
||||
func popcntOrSlice(s, m []uint64) uint64 {
|
||||
if useAsm {
|
||||
return popcntOrSliceAsm(s, m)
|
||||
}
|
||||
return popcntOrSliceGo(s, m)
|
||||
}
|
||||
|
||||
func popcntXorSlice(s, m []uint64) uint64 {
|
||||
if useAsm {
|
||||
return popcntXorSliceAsm(s, m)
|
||||
}
|
||||
return popcntXorSliceGo(s, m)
|
||||
}
|
17
vendor/github.com/RoaringBitmap/roaring/popcnt_compat.go
generated
vendored
Normal file
17
vendor/github.com/RoaringBitmap/roaring/popcnt_compat.go
generated
vendored
Normal file
@ -0,0 +1,17 @@
|
||||
// +build !go1.9
|
||||
|
||||
package roaring
|
||||
|
||||
// bit population count, take from
|
||||
// https://code.google.com/p/go/issues/detail?id=4988#c11
|
||||
// credit: https://code.google.com/u/arnehormann/
|
||||
// credit: https://play.golang.org/p/U7SogJ7psJ
|
||||
// credit: http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
|
||||
func popcount(x uint64) uint64 {
|
||||
x -= (x >> 1) & 0x5555555555555555
|
||||
x = (x>>2)&0x3333333333333333 + x&0x3333333333333333
|
||||
x += x >> 4
|
||||
x &= 0x0f0f0f0f0f0f0f0f
|
||||
x *= 0x0101010101010101
|
||||
return x >> 56
|
||||
}
|
23
vendor/github.com/RoaringBitmap/roaring/popcnt_generic.go
generated
vendored
Normal file
23
vendor/github.com/RoaringBitmap/roaring/popcnt_generic.go
generated
vendored
Normal file
@ -0,0 +1,23 @@
|
||||
// +build !amd64 appengine go1.9
|
||||
|
||||
package roaring
|
||||
|
||||
func popcntSlice(s []uint64) uint64 {
|
||||
return popcntSliceGo(s)
|
||||
}
|
||||
|
||||
func popcntMaskSlice(s, m []uint64) uint64 {
|
||||
return popcntMaskSliceGo(s, m)
|
||||
}
|
||||
|
||||
func popcntAndSlice(s, m []uint64) uint64 {
|
||||
return popcntAndSliceGo(s, m)
|
||||
}
|
||||
|
||||
func popcntOrSlice(s, m []uint64) uint64 {
|
||||
return popcntOrSliceGo(s, m)
|
||||
}
|
||||
|
||||
func popcntXorSlice(s, m []uint64) uint64 {
|
||||
return popcntXorSliceGo(s, m)
|
||||
}
|
41
vendor/github.com/RoaringBitmap/roaring/popcnt_slices.go
generated
vendored
Normal file
41
vendor/github.com/RoaringBitmap/roaring/popcnt_slices.go
generated
vendored
Normal file
@ -0,0 +1,41 @@
|
||||
package roaring
|
||||
|
||||
func popcntSliceGo(s []uint64) uint64 {
|
||||
cnt := uint64(0)
|
||||
for _, x := range s {
|
||||
cnt += popcount(x)
|
||||
}
|
||||
return cnt
|
||||
}
|
||||
|
||||
func popcntMaskSliceGo(s, m []uint64) uint64 {
|
||||
cnt := uint64(0)
|
||||
for i := range s {
|
||||
cnt += popcount(s[i] &^ m[i])
|
||||
}
|
||||
return cnt
|
||||
}
|
||||
|
||||
func popcntAndSliceGo(s, m []uint64) uint64 {
|
||||
cnt := uint64(0)
|
||||
for i := range s {
|
||||
cnt += popcount(s[i] & m[i])
|
||||
}
|
||||
return cnt
|
||||
}
|
||||
|
||||
func popcntOrSliceGo(s, m []uint64) uint64 {
|
||||
cnt := uint64(0)
|
||||
for i := range s {
|
||||
cnt += popcount(s[i] | m[i])
|
||||
}
|
||||
return cnt
|
||||
}
|
||||
|
||||
func popcntXorSliceGo(s, m []uint64) uint64 {
|
||||
cnt := uint64(0)
|
||||
for i := range s {
|
||||
cnt += popcount(s[i] ^ m[i])
|
||||
}
|
||||
return cnt
|
||||
}
|
101
vendor/github.com/RoaringBitmap/roaring/priorityqueue.go
generated
vendored
Normal file
101
vendor/github.com/RoaringBitmap/roaring/priorityqueue.go
generated
vendored
Normal file
@ -0,0 +1,101 @@
|
||||
package roaring
|
||||
|
||||
import "container/heap"
|
||||
|
||||
/////////////
|
||||
// The priorityQueue is used to keep Bitmaps sorted.
|
||||
////////////
|
||||
|
||||
type item struct {
|
||||
value *Bitmap
|
||||
index int
|
||||
}
|
||||
|
||||
type priorityQueue []*item
|
||||
|
||||
func (pq priorityQueue) Len() int { return len(pq) }
|
||||
|
||||
func (pq priorityQueue) Less(i, j int) bool {
|
||||
return pq[i].value.GetSizeInBytes() < pq[j].value.GetSizeInBytes()
|
||||
}
|
||||
|
||||
func (pq priorityQueue) Swap(i, j int) {
|
||||
pq[i], pq[j] = pq[j], pq[i]
|
||||
pq[i].index = i
|
||||
pq[j].index = j
|
||||
}
|
||||
|
||||
func (pq *priorityQueue) Push(x interface{}) {
|
||||
n := len(*pq)
|
||||
item := x.(*item)
|
||||
item.index = n
|
||||
*pq = append(*pq, item)
|
||||
}
|
||||
|
||||
func (pq *priorityQueue) Pop() interface{} {
|
||||
old := *pq
|
||||
n := len(old)
|
||||
item := old[n-1]
|
||||
item.index = -1 // for safety
|
||||
*pq = old[0 : n-1]
|
||||
return item
|
||||
}
|
||||
|
||||
func (pq *priorityQueue) update(item *item, value *Bitmap) {
|
||||
item.value = value
|
||||
heap.Fix(pq, item.index)
|
||||
}
|
||||
|
||||
/////////////
|
||||
// The containerPriorityQueue is used to keep the containers of various Bitmaps sorted.
|
||||
////////////
|
||||
|
||||
type containeritem struct {
|
||||
value *Bitmap
|
||||
keyindex int
|
||||
index int
|
||||
}
|
||||
|
||||
type containerPriorityQueue []*containeritem
|
||||
|
||||
func (pq containerPriorityQueue) Len() int { return len(pq) }
|
||||
|
||||
func (pq containerPriorityQueue) Less(i, j int) bool {
|
||||
k1 := pq[i].value.highlowcontainer.getKeyAtIndex(pq[i].keyindex)
|
||||
k2 := pq[j].value.highlowcontainer.getKeyAtIndex(pq[j].keyindex)
|
||||
if k1 != k2 {
|
||||
return k1 < k2
|
||||
}
|
||||
c1 := pq[i].value.highlowcontainer.getContainerAtIndex(pq[i].keyindex)
|
||||
c2 := pq[j].value.highlowcontainer.getContainerAtIndex(pq[j].keyindex)
|
||||
|
||||
return c1.getCardinality() > c2.getCardinality()
|
||||
}
|
||||
|
||||
func (pq containerPriorityQueue) Swap(i, j int) {
|
||||
pq[i], pq[j] = pq[j], pq[i]
|
||||
pq[i].index = i
|
||||
pq[j].index = j
|
||||
}
|
||||
|
||||
func (pq *containerPriorityQueue) Push(x interface{}) {
|
||||
n := len(*pq)
|
||||
item := x.(*containeritem)
|
||||
item.index = n
|
||||
*pq = append(*pq, item)
|
||||
}
|
||||
|
||||
func (pq *containerPriorityQueue) Pop() interface{} {
|
||||
old := *pq
|
||||
n := len(old)
|
||||
item := old[n-1]
|
||||
item.index = -1 // for safety
|
||||
*pq = old[0 : n-1]
|
||||
return item
|
||||
}
|
||||
|
||||
//func (pq *containerPriorityQueue) update(item *containeritem, value *Bitmap, keyindex int) {
|
||||
// item.value = value
|
||||
// item.keyindex = keyindex
|
||||
// heap.Fix(pq, item.index)
|
||||
//}
|
1667
vendor/github.com/RoaringBitmap/roaring/rle.go
generated
vendored
Normal file
1667
vendor/github.com/RoaringBitmap/roaring/rle.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1747
vendor/github.com/RoaringBitmap/roaring/rle16.go
generated
vendored
Normal file
1747
vendor/github.com/RoaringBitmap/roaring/rle16.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1126
vendor/github.com/RoaringBitmap/roaring/rle16_gen.go
generated
vendored
Normal file
1126
vendor/github.com/RoaringBitmap/roaring/rle16_gen.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1118
vendor/github.com/RoaringBitmap/roaring/rle_gen.go
generated
vendored
Normal file
1118
vendor/github.com/RoaringBitmap/roaring/rle_gen.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
163
vendor/github.com/RoaringBitmap/roaring/rlecommon.go
generated
vendored
Normal file
163
vendor/github.com/RoaringBitmap/roaring/rlecommon.go
generated
vendored
Normal file
@ -0,0 +1,163 @@
|
||||
package roaring
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// common to rle32.go and rle16.go
|
||||
|
||||
// rleVerbose controls whether p() prints show up.
|
||||
// The testing package sets this based on
|
||||
// testing.Verbose().
|
||||
var rleVerbose bool
|
||||
|
||||
// p is a shorthand for fmt.Printf with beginning and
|
||||
// trailing newlines. p() makes it easy
|
||||
// to add diagnostic print statements.
|
||||
func p(format string, args ...interface{}) {
|
||||
if rleVerbose {
|
||||
fmt.Printf("\n"+format+"\n", args...)
|
||||
}
|
||||
}
|
||||
|
||||
// MaxUint32 is the largest uint32 value.
|
||||
const MaxUint32 = 4294967295
|
||||
|
||||
// MaxUint16 is the largest 16 bit unsigned int.
|
||||
// This is the largest value an interval16 can store.
|
||||
const MaxUint16 = 65535
|
||||
|
||||
// searchOptions allows us to accelerate runContainer32.search with
|
||||
// prior knowledge of (mostly lower) bounds. This is used by Union
|
||||
// and Intersect.
|
||||
type searchOptions struct {
|
||||
// start here instead of at 0
|
||||
startIndex int64
|
||||
|
||||
// upper bound instead of len(rc.iv);
|
||||
// endxIndex == 0 means ignore the bound and use
|
||||
// endxIndex == n ==len(rc.iv) which is also
|
||||
// naturally the default for search()
|
||||
// when opt = nil.
|
||||
endxIndex int64
|
||||
}
|
||||
|
||||
// And finds the intersection of rc and b.
|
||||
func (rc *runContainer32) And(b *Bitmap) *Bitmap {
|
||||
out := NewBitmap()
|
||||
for _, p := range rc.iv {
|
||||
for i := p.start; i <= p.last; i++ {
|
||||
if b.Contains(i) {
|
||||
out.Add(i)
|
||||
}
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Xor returns the exclusive-or of rc and b.
|
||||
func (rc *runContainer32) Xor(b *Bitmap) *Bitmap {
|
||||
out := b.Clone()
|
||||
for _, p := range rc.iv {
|
||||
for v := p.start; v <= p.last; v++ {
|
||||
if out.Contains(v) {
|
||||
out.RemoveRange(uint64(v), uint64(v+1))
|
||||
} else {
|
||||
out.Add(v)
|
||||
}
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Or returns the union of rc and b.
|
||||
func (rc *runContainer32) Or(b *Bitmap) *Bitmap {
|
||||
out := b.Clone()
|
||||
for _, p := range rc.iv {
|
||||
for v := p.start; v <= p.last; v++ {
|
||||
out.Add(v)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// trial is used in the randomized testing of runContainers
|
||||
type trial struct {
|
||||
n int
|
||||
percentFill float64
|
||||
ntrial int
|
||||
|
||||
// only in the union test
|
||||
// only subtract test
|
||||
percentDelete float64
|
||||
|
||||
// only in 067 randomized operations
|
||||
// we do this + 1 passes
|
||||
numRandomOpsPass int
|
||||
|
||||
// allow sampling range control
|
||||
// only recent tests respect this.
|
||||
srang *interval16
|
||||
}
|
||||
|
||||
// And finds the intersection of rc and b.
|
||||
func (rc *runContainer16) And(b *Bitmap) *Bitmap {
|
||||
out := NewBitmap()
|
||||
for _, p := range rc.iv {
|
||||
plast := p.last()
|
||||
for i := p.start; i <= plast; i++ {
|
||||
if b.Contains(uint32(i)) {
|
||||
out.Add(uint32(i))
|
||||
}
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Xor returns the exclusive-or of rc and b.
|
||||
func (rc *runContainer16) Xor(b *Bitmap) *Bitmap {
|
||||
out := b.Clone()
|
||||
for _, p := range rc.iv {
|
||||
plast := p.last()
|
||||
for v := p.start; v <= plast; v++ {
|
||||
w := uint32(v)
|
||||
if out.Contains(w) {
|
||||
out.RemoveRange(uint64(w), uint64(w+1))
|
||||
} else {
|
||||
out.Add(w)
|
||||
}
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Or returns the union of rc and b.
|
||||
func (rc *runContainer16) Or(b *Bitmap) *Bitmap {
|
||||
out := b.Clone()
|
||||
for _, p := range rc.iv {
|
||||
plast := p.last()
|
||||
for v := p.start; v <= plast; v++ {
|
||||
out.Add(uint32(v))
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
//func (rc *runContainer32) and(container) container {
|
||||
// panic("TODO. not yet implemented")
|
||||
//}
|
||||
|
||||
// serializedSizeInBytes returns the number of bytes of memory
|
||||
// required by this runContainer16. This is for the
|
||||
// Roaring format, as specified https://github.com/RoaringBitmap/RoaringFormatSpec/
|
||||
func (rc *runContainer16) serializedSizeInBytes() int {
|
||||
// number of runs in one uint16, then each run
|
||||
// needs two more uint16
|
||||
return 2 + len(rc.iv)*4
|
||||
}
|
||||
|
||||
// serializedSizeInBytes returns the number of bytes of memory
|
||||
// required by this runContainer32.
|
||||
func (rc *runContainer32) serializedSizeInBytes() int {
|
||||
return 4 + len(rc.iv)*8
|
||||
}
|
695
vendor/github.com/RoaringBitmap/roaring/rlei.go
generated
vendored
Normal file
695
vendor/github.com/RoaringBitmap/roaring/rlei.go
generated
vendored
Normal file
@ -0,0 +1,695 @@
|
||||
package roaring
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
//
|
||||
// container interface methods for runContainer16
|
||||
//
|
||||
///////////////////////////////////////////////////
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// compile time verify we meet interface requirements
|
||||
var _ container = &runContainer16{}
|
||||
|
||||
func (rc *runContainer16) clone() container {
|
||||
return newRunContainer16CopyIv(rc.iv)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) minimum() uint16 {
|
||||
return rc.iv[0].start // assume not empty
|
||||
}
|
||||
|
||||
func (rc *runContainer16) maximum() uint16 {
|
||||
return rc.iv[len(rc.iv)-1].last() // assume not empty
|
||||
}
|
||||
|
||||
func (rc *runContainer16) isFull() bool {
|
||||
return (len(rc.iv) == 1) && ((rc.iv[0].start == 0) && (rc.iv[0].last() == MaxUint16))
|
||||
}
|
||||
|
||||
func (rc *runContainer16) and(a container) container {
|
||||
if rc.isFull() {
|
||||
return a.clone()
|
||||
}
|
||||
switch c := a.(type) {
|
||||
case *runContainer16:
|
||||
return rc.intersect(c)
|
||||
case *arrayContainer:
|
||||
return rc.andArray(c)
|
||||
case *bitmapContainer:
|
||||
return rc.andBitmapContainer(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (rc *runContainer16) andCardinality(a container) int {
|
||||
switch c := a.(type) {
|
||||
case *runContainer16:
|
||||
return int(rc.intersectCardinality(c))
|
||||
case *arrayContainer:
|
||||
return rc.andArrayCardinality(c)
|
||||
case *bitmapContainer:
|
||||
return rc.andBitmapContainerCardinality(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
// andBitmapContainer finds the intersection of rc and b.
|
||||
func (rc *runContainer16) andBitmapContainer(bc *bitmapContainer) container {
|
||||
bc2 := newBitmapContainerFromRun(rc)
|
||||
return bc2.andBitmap(bc)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) andArrayCardinality(ac *arrayContainer) int {
|
||||
pos := 0
|
||||
answer := 0
|
||||
maxpos := ac.getCardinality()
|
||||
if maxpos == 0 {
|
||||
return 0 // won't happen in actual code
|
||||
}
|
||||
v := ac.content[pos]
|
||||
mainloop:
|
||||
for _, p := range rc.iv {
|
||||
for v < p.start {
|
||||
pos++
|
||||
if pos == maxpos {
|
||||
break mainloop
|
||||
}
|
||||
v = ac.content[pos]
|
||||
}
|
||||
for v <= p.last() {
|
||||
answer++
|
||||
pos++
|
||||
if pos == maxpos {
|
||||
break mainloop
|
||||
}
|
||||
v = ac.content[pos]
|
||||
}
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iand(a container) container {
|
||||
if rc.isFull() {
|
||||
return a.clone()
|
||||
}
|
||||
switch c := a.(type) {
|
||||
case *runContainer16:
|
||||
return rc.inplaceIntersect(c)
|
||||
case *arrayContainer:
|
||||
return rc.andArray(c)
|
||||
case *bitmapContainer:
|
||||
return rc.iandBitmapContainer(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (rc *runContainer16) inplaceIntersect(rc2 *runContainer16) container {
|
||||
// TODO: optimize by doing less allocation, possibly?
|
||||
|
||||
// sect will be new
|
||||
sect := rc.intersect(rc2)
|
||||
*rc = *sect
|
||||
return rc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iandBitmapContainer(bc *bitmapContainer) container {
|
||||
isect := rc.andBitmapContainer(bc)
|
||||
*rc = *newRunContainer16FromContainer(isect)
|
||||
return rc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) andArray(ac *arrayContainer) container {
|
||||
if len(rc.iv) == 0 {
|
||||
return newArrayContainer()
|
||||
}
|
||||
|
||||
acCardinality := ac.getCardinality()
|
||||
c := newArrayContainerCapacity(acCardinality)
|
||||
|
||||
for rlePos, arrayPos := 0, 0; arrayPos < acCardinality; {
|
||||
iv := rc.iv[rlePos]
|
||||
arrayVal := ac.content[arrayPos]
|
||||
|
||||
for iv.last() < arrayVal {
|
||||
rlePos++
|
||||
if rlePos == len(rc.iv) {
|
||||
return c
|
||||
}
|
||||
iv = rc.iv[rlePos]
|
||||
}
|
||||
|
||||
if iv.start > arrayVal {
|
||||
arrayPos = advanceUntil(ac.content, arrayPos, len(ac.content), iv.start)
|
||||
} else {
|
||||
c.content = append(c.content, arrayVal)
|
||||
arrayPos++
|
||||
}
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
func (rc *runContainer16) andNot(a container) container {
|
||||
switch c := a.(type) {
|
||||
case *arrayContainer:
|
||||
return rc.andNotArray(c)
|
||||
case *bitmapContainer:
|
||||
return rc.andNotBitmap(c)
|
||||
case *runContainer16:
|
||||
return rc.andNotRunContainer16(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (rc *runContainer16) fillLeastSignificant16bits(x []uint32, i int, mask uint32) {
|
||||
k := 0
|
||||
var val int64
|
||||
for _, p := range rc.iv {
|
||||
n := p.runlen()
|
||||
for j := int64(0); j < n; j++ {
|
||||
val = int64(p.start) + j
|
||||
x[k+i] = uint32(val) | mask
|
||||
k++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (rc *runContainer16) getShortIterator() shortIterable {
|
||||
return rc.newRunIterator16()
|
||||
}
|
||||
|
||||
func (rc *runContainer16) getManyIterator() manyIterable {
|
||||
return rc.newManyRunIterator16()
|
||||
}
|
||||
|
||||
// add the values in the range [firstOfRange, endx). endx
|
||||
// is still abe to express 2^16 because it is an int not an uint16.
|
||||
func (rc *runContainer16) iaddRange(firstOfRange, endx int) container {
|
||||
|
||||
if firstOfRange >= endx {
|
||||
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange", endx))
|
||||
}
|
||||
addme := newRunContainer16TakeOwnership([]interval16{
|
||||
{
|
||||
start: uint16(firstOfRange),
|
||||
length: uint16(endx - 1 - firstOfRange),
|
||||
},
|
||||
})
|
||||
*rc = *rc.union(addme)
|
||||
return rc
|
||||
}
|
||||
|
||||
// remove the values in the range [firstOfRange,endx)
|
||||
func (rc *runContainer16) iremoveRange(firstOfRange, endx int) container {
|
||||
if firstOfRange >= endx {
|
||||
panic(fmt.Sprintf("request to iremove empty set [%v, %v),"+
|
||||
" nothing to do.", firstOfRange, endx))
|
||||
//return rc
|
||||
}
|
||||
x := newInterval16Range(uint16(firstOfRange), uint16(endx-1))
|
||||
rc.isubtract(x)
|
||||
return rc
|
||||
}
|
||||
|
||||
// not flip the values in the range [firstOfRange,endx)
|
||||
func (rc *runContainer16) not(firstOfRange, endx int) container {
|
||||
if firstOfRange >= endx {
|
||||
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange))
|
||||
}
|
||||
|
||||
return rc.Not(firstOfRange, endx)
|
||||
}
|
||||
|
||||
// Not flips the values in the range [firstOfRange,endx).
|
||||
// This is not inplace. Only the returned value has the flipped bits.
|
||||
//
|
||||
// Currently implemented as (!A intersect B) union (A minus B),
|
||||
// where A is rc, and B is the supplied [firstOfRange, endx) interval.
|
||||
//
|
||||
// TODO(time optimization): convert this to a single pass
|
||||
// algorithm by copying AndNotRunContainer16() and modifying it.
|
||||
// Current routine is correct but
|
||||
// makes 2 more passes through the arrays than should be
|
||||
// strictly necessary. Measure both ways though--this may not matter.
|
||||
//
|
||||
func (rc *runContainer16) Not(firstOfRange, endx int) *runContainer16 {
|
||||
|
||||
if firstOfRange >= endx {
|
||||
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange == %v", endx, firstOfRange))
|
||||
}
|
||||
|
||||
if firstOfRange >= endx {
|
||||
return rc.Clone()
|
||||
}
|
||||
|
||||
a := rc
|
||||
// algo:
|
||||
// (!A intersect B) union (A minus B)
|
||||
|
||||
nota := a.invert()
|
||||
|
||||
bs := []interval16{newInterval16Range(uint16(firstOfRange), uint16(endx-1))}
|
||||
b := newRunContainer16TakeOwnership(bs)
|
||||
|
||||
notAintersectB := nota.intersect(b)
|
||||
|
||||
aMinusB := a.AndNotRunContainer16(b)
|
||||
|
||||
rc2 := notAintersectB.union(aMinusB)
|
||||
return rc2
|
||||
}
|
||||
|
||||
// equals is now logical equals; it does not require the
|
||||
// same underlying container type.
|
||||
func (rc *runContainer16) equals(o container) bool {
|
||||
srb, ok := o.(*runContainer16)
|
||||
|
||||
if !ok {
|
||||
// maybe value instead of pointer
|
||||
val, valok := o.(*runContainer16)
|
||||
if valok {
|
||||
srb = val
|
||||
ok = true
|
||||
}
|
||||
}
|
||||
if ok {
|
||||
// Check if the containers are the same object.
|
||||
if rc == srb {
|
||||
return true
|
||||
}
|
||||
|
||||
if len(srb.iv) != len(rc.iv) {
|
||||
return false
|
||||
}
|
||||
|
||||
for i, v := range rc.iv {
|
||||
if v != srb.iv[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// use generic comparison
|
||||
if o.getCardinality() != rc.getCardinality() {
|
||||
return false
|
||||
}
|
||||
rit := rc.getShortIterator()
|
||||
bit := o.getShortIterator()
|
||||
|
||||
//k := 0
|
||||
for rit.hasNext() {
|
||||
if bit.next() != rit.next() {
|
||||
return false
|
||||
}
|
||||
//k++
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iaddReturnMinimized(x uint16) container {
|
||||
rc.Add(x)
|
||||
return rc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iadd(x uint16) (wasNew bool) {
|
||||
return rc.Add(x)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iremoveReturnMinimized(x uint16) container {
|
||||
rc.removeKey(x)
|
||||
return rc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iremove(x uint16) bool {
|
||||
return rc.removeKey(x)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) or(a container) container {
|
||||
if rc.isFull() {
|
||||
return rc.clone()
|
||||
}
|
||||
switch c := a.(type) {
|
||||
case *runContainer16:
|
||||
return rc.union(c)
|
||||
case *arrayContainer:
|
||||
return rc.orArray(c)
|
||||
case *bitmapContainer:
|
||||
return rc.orBitmapContainer(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (rc *runContainer16) orCardinality(a container) int {
|
||||
switch c := a.(type) {
|
||||
case *runContainer16:
|
||||
return int(rc.unionCardinality(c))
|
||||
case *arrayContainer:
|
||||
return rc.orArrayCardinality(c)
|
||||
case *bitmapContainer:
|
||||
return rc.orBitmapContainerCardinality(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
// orBitmapContainer finds the union of rc and bc.
|
||||
func (rc *runContainer16) orBitmapContainer(bc *bitmapContainer) container {
|
||||
bc2 := newBitmapContainerFromRun(rc)
|
||||
return bc2.iorBitmap(bc)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) andBitmapContainerCardinality(bc *bitmapContainer) int {
|
||||
answer := 0
|
||||
for i := range rc.iv {
|
||||
answer += bc.getCardinalityInRange(uint(rc.iv[i].start), uint(rc.iv[i].last())+1)
|
||||
}
|
||||
//bc.computeCardinality()
|
||||
return answer
|
||||
}
|
||||
|
||||
func (rc *runContainer16) orBitmapContainerCardinality(bc *bitmapContainer) int {
|
||||
return rc.getCardinality() + bc.getCardinality() - rc.andBitmapContainerCardinality(bc)
|
||||
}
|
||||
|
||||
// orArray finds the union of rc and ac.
|
||||
func (rc *runContainer16) orArray(ac *arrayContainer) container {
|
||||
bc1 := newBitmapContainerFromRun(rc)
|
||||
bc2 := ac.toBitmapContainer()
|
||||
return bc1.orBitmap(bc2)
|
||||
}
|
||||
|
||||
// orArray finds the union of rc and ac.
|
||||
func (rc *runContainer16) orArrayCardinality(ac *arrayContainer) int {
|
||||
return ac.getCardinality() + rc.getCardinality() - rc.andArrayCardinality(ac)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) ior(a container) container {
|
||||
if rc.isFull() {
|
||||
return rc
|
||||
}
|
||||
switch c := a.(type) {
|
||||
case *runContainer16:
|
||||
return rc.inplaceUnion(c)
|
||||
case *arrayContainer:
|
||||
return rc.iorArray(c)
|
||||
case *bitmapContainer:
|
||||
return rc.iorBitmapContainer(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (rc *runContainer16) inplaceUnion(rc2 *runContainer16) container {
|
||||
p("rc.inplaceUnion with len(rc2.iv)=%v", len(rc2.iv))
|
||||
for _, p := range rc2.iv {
|
||||
last := int64(p.last())
|
||||
for i := int64(p.start); i <= last; i++ {
|
||||
rc.Add(uint16(i))
|
||||
}
|
||||
}
|
||||
return rc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iorBitmapContainer(bc *bitmapContainer) container {
|
||||
|
||||
it := bc.getShortIterator()
|
||||
for it.hasNext() {
|
||||
rc.Add(it.next())
|
||||
}
|
||||
return rc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iorArray(ac *arrayContainer) container {
|
||||
it := ac.getShortIterator()
|
||||
for it.hasNext() {
|
||||
rc.Add(it.next())
|
||||
}
|
||||
return rc
|
||||
}
|
||||
|
||||
// lazyIOR is described (not yet implemented) in
|
||||
// this nice note from @lemire on
|
||||
// https://github.com/RoaringBitmap/roaring/pull/70#issuecomment-263613737
|
||||
//
|
||||
// Description of lazyOR and lazyIOR from @lemire:
|
||||
//
|
||||
// Lazy functions are optional and can be simply
|
||||
// wrapper around non-lazy functions.
|
||||
//
|
||||
// The idea of "laziness" is as follows. It is
|
||||
// inspired by the concept of lazy evaluation
|
||||
// you might be familiar with (functional programming
|
||||
// and all that). So a roaring bitmap is
|
||||
// such that all its containers are, in some
|
||||
// sense, chosen to use as little memory as
|
||||
// possible. This is nice. Also, all bitsets
|
||||
// are "cardinality aware" so that you can do
|
||||
// fast rank/select queries, or query the
|
||||
// cardinality of the whole bitmap... very fast,
|
||||
// without latency.
|
||||
//
|
||||
// However, imagine that you are aggregating 100
|
||||
// bitmaps together. So you OR the first two, then OR
|
||||
// that with the third one and so forth. Clearly,
|
||||
// intermediate bitmaps don't need to be as
|
||||
// compressed as possible, right? They can be
|
||||
// in a "dirty state". You only need the end
|
||||
// result to be in a nice state... which you
|
||||
// can achieve by calling repairAfterLazy at the end.
|
||||
//
|
||||
// The Java/C code does something special for
|
||||
// the in-place lazy OR runs. The idea is that
|
||||
// instead of taking two run containers and
|
||||
// generating a new one, we actually try to
|
||||
// do the computation in-place through a
|
||||
// technique invented by @gssiyankai (pinging him!).
|
||||
// What you do is you check whether the host
|
||||
// run container has lots of extra capacity.
|
||||
// If it does, you move its data at the end of
|
||||
// the backing array, and then you write
|
||||
// the answer at the beginning. What this
|
||||
// trick does is minimize memory allocations.
|
||||
//
|
||||
func (rc *runContainer16) lazyIOR(a container) container {
|
||||
// not lazy at the moment
|
||||
// TODO: make it lazy
|
||||
return rc.ior(a)
|
||||
|
||||
/*
|
||||
switch c := a.(type) {
|
||||
case *arrayContainer:
|
||||
return rc.lazyIorArray(c)
|
||||
case *bitmapContainer:
|
||||
return rc.lazyIorBitmap(c)
|
||||
case *runContainer16:
|
||||
return rc.lazyIorRun16(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
*/
|
||||
}
|
||||
|
||||
// lazyOR is described above in lazyIOR.
|
||||
func (rc *runContainer16) lazyOR(a container) container {
|
||||
|
||||
// not lazy at the moment
|
||||
// TODO: make it lazy
|
||||
return rc.or(a)
|
||||
|
||||
/*
|
||||
switch c := a.(type) {
|
||||
case *arrayContainer:
|
||||
return rc.lazyOrArray(c)
|
||||
case *bitmapContainer:
|
||||
return rc.lazyOrBitmap(c)
|
||||
case *runContainer16:
|
||||
return rc.lazyOrRunContainer16(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
*/
|
||||
}
|
||||
|
||||
func (rc *runContainer16) intersects(a container) bool {
|
||||
// TODO: optimize by doing inplace/less allocation, possibly?
|
||||
isect := rc.and(a)
|
||||
return isect.getCardinality() > 0
|
||||
}
|
||||
|
||||
func (rc *runContainer16) xor(a container) container {
|
||||
switch c := a.(type) {
|
||||
case *arrayContainer:
|
||||
return rc.xorArray(c)
|
||||
case *bitmapContainer:
|
||||
return rc.xorBitmap(c)
|
||||
case *runContainer16:
|
||||
return rc.xorRunContainer16(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iandNot(a container) container {
|
||||
switch c := a.(type) {
|
||||
case *arrayContainer:
|
||||
return rc.iandNotArray(c)
|
||||
case *bitmapContainer:
|
||||
return rc.iandNotBitmap(c)
|
||||
case *runContainer16:
|
||||
return rc.iandNotRunContainer16(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
// flip the values in the range [firstOfRange,endx)
|
||||
func (rc *runContainer16) inot(firstOfRange, endx int) container {
|
||||
if firstOfRange >= endx {
|
||||
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange))
|
||||
}
|
||||
// TODO: minimize copies, do it all inplace; not() makes a copy.
|
||||
rc = rc.Not(firstOfRange, endx)
|
||||
return rc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) getCardinality() int {
|
||||
return int(rc.cardinality())
|
||||
}
|
||||
|
||||
func (rc *runContainer16) rank(x uint16) int {
|
||||
n := int64(len(rc.iv))
|
||||
xx := int64(x)
|
||||
w, already, _ := rc.search(xx, nil)
|
||||
if w < 0 {
|
||||
return 0
|
||||
}
|
||||
if !already && w == n-1 {
|
||||
return rc.getCardinality()
|
||||
}
|
||||
var rnk int64
|
||||
if !already {
|
||||
for i := int64(0); i <= w; i++ {
|
||||
rnk += rc.iv[i].runlen()
|
||||
}
|
||||
return int(rnk)
|
||||
}
|
||||
for i := int64(0); i < w; i++ {
|
||||
rnk += rc.iv[i].runlen()
|
||||
}
|
||||
rnk += int64(x-rc.iv[w].start) + 1
|
||||
return int(rnk)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) selectInt(x uint16) int {
|
||||
return rc.selectInt16(x)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) andNotRunContainer16(b *runContainer16) container {
|
||||
return rc.AndNotRunContainer16(b)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) andNotArray(ac *arrayContainer) container {
|
||||
rcb := rc.toBitmapContainer()
|
||||
acb := ac.toBitmapContainer()
|
||||
return rcb.andNotBitmap(acb)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) andNotBitmap(bc *bitmapContainer) container {
|
||||
rcb := rc.toBitmapContainer()
|
||||
return rcb.andNotBitmap(bc)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) toBitmapContainer() *bitmapContainer {
|
||||
p("run16 toBitmap starting; rc has %v ranges", len(rc.iv))
|
||||
bc := newBitmapContainer()
|
||||
for i := range rc.iv {
|
||||
bc.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1)
|
||||
}
|
||||
bc.computeCardinality()
|
||||
return bc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iandNotRunContainer16(x2 *runContainer16) container {
|
||||
rcb := rc.toBitmapContainer()
|
||||
x2b := x2.toBitmapContainer()
|
||||
rcb.iandNotBitmapSurely(x2b)
|
||||
// TODO: check size and optimize the return value
|
||||
// TODO: is inplace modification really required? If not, elide the copy.
|
||||
rc2 := newRunContainer16FromBitmapContainer(rcb)
|
||||
*rc = *rc2
|
||||
return rc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iandNotArray(ac *arrayContainer) container {
|
||||
rcb := rc.toBitmapContainer()
|
||||
acb := ac.toBitmapContainer()
|
||||
rcb.iandNotBitmapSurely(acb)
|
||||
// TODO: check size and optimize the return value
|
||||
// TODO: is inplace modification really required? If not, elide the copy.
|
||||
rc2 := newRunContainer16FromBitmapContainer(rcb)
|
||||
*rc = *rc2
|
||||
return rc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iandNotBitmap(bc *bitmapContainer) container {
|
||||
rcb := rc.toBitmapContainer()
|
||||
rcb.iandNotBitmapSurely(bc)
|
||||
// TODO: check size and optimize the return value
|
||||
// TODO: is inplace modification really required? If not, elide the copy.
|
||||
rc2 := newRunContainer16FromBitmapContainer(rcb)
|
||||
*rc = *rc2
|
||||
return rc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) xorRunContainer16(x2 *runContainer16) container {
|
||||
rcb := rc.toBitmapContainer()
|
||||
x2b := x2.toBitmapContainer()
|
||||
return rcb.xorBitmap(x2b)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) xorArray(ac *arrayContainer) container {
|
||||
rcb := rc.toBitmapContainer()
|
||||
acb := ac.toBitmapContainer()
|
||||
return rcb.xorBitmap(acb)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) xorBitmap(bc *bitmapContainer) container {
|
||||
rcb := rc.toBitmapContainer()
|
||||
return rcb.xorBitmap(bc)
|
||||
}
|
||||
|
||||
// convert to bitmap or array *if needed*
|
||||
func (rc *runContainer16) toEfficientContainer() container {
|
||||
|
||||
// runContainer16SerializedSizeInBytes(numRuns)
|
||||
sizeAsRunContainer := rc.getSizeInBytes()
|
||||
sizeAsBitmapContainer := bitmapContainerSizeInBytes()
|
||||
card := int(rc.cardinality())
|
||||
sizeAsArrayContainer := arrayContainerSizeInBytes(card)
|
||||
if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
|
||||
return rc
|
||||
}
|
||||
if card <= arrayDefaultMaxSize {
|
||||
return rc.toArrayContainer()
|
||||
}
|
||||
bc := newBitmapContainerFromRun(rc)
|
||||
return bc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) toArrayContainer() *arrayContainer {
|
||||
ac := newArrayContainer()
|
||||
for i := range rc.iv {
|
||||
ac.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1)
|
||||
}
|
||||
return ac
|
||||
}
|
||||
|
||||
func newRunContainer16FromContainer(c container) *runContainer16 {
|
||||
|
||||
switch x := c.(type) {
|
||||
case *runContainer16:
|
||||
return x.Clone()
|
||||
case *arrayContainer:
|
||||
return newRunContainer16FromArray(x)
|
||||
case *bitmapContainer:
|
||||
return newRunContainer16FromBitmapContainer(x)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
1345
vendor/github.com/RoaringBitmap/roaring/roaring.go
generated
vendored
Normal file
1345
vendor/github.com/RoaringBitmap/roaring/roaring.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
893
vendor/github.com/RoaringBitmap/roaring/roaringarray.go
generated
vendored
Normal file
893
vendor/github.com/RoaringBitmap/roaring/roaringarray.go
generated
vendored
Normal file
@ -0,0 +1,893 @@
|
||||
package roaring
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
|
||||
snappy "github.com/glycerine/go-unsnap-stream"
|
||||
"github.com/tinylib/msgp/msgp"
|
||||
)
|
||||
|
||||
//go:generate msgp -unexported
|
||||
|
||||
type container interface {
|
||||
clone() container
|
||||
and(container) container
|
||||
andCardinality(container) int
|
||||
iand(container) container // i stands for inplace
|
||||
andNot(container) container
|
||||
iandNot(container) container // i stands for inplace
|
||||
getCardinality() int
|
||||
// rank returns the number of integers that are
|
||||
// smaller or equal to x. rank(infinity) would be getCardinality().
|
||||
rank(uint16) int
|
||||
|
||||
iadd(x uint16) bool // inplace, returns true if x was new.
|
||||
iaddReturnMinimized(uint16) container // may change return type to minimize storage.
|
||||
|
||||
//addRange(start, final int) container // range is [firstOfRange,lastOfRange) (unused)
|
||||
iaddRange(start, endx int) container // i stands for inplace, range is [firstOfRange,endx)
|
||||
|
||||
iremove(x uint16) bool // inplace, returns true if x was present.
|
||||
iremoveReturnMinimized(uint16) container // may change return type to minimize storage.
|
||||
|
||||
not(start, final int) container // range is [firstOfRange,lastOfRange)
|
||||
inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx)
|
||||
xor(r container) container
|
||||
getShortIterator() shortIterable
|
||||
getManyIterator() manyIterable
|
||||
contains(i uint16) bool
|
||||
maximum() uint16
|
||||
minimum() uint16
|
||||
|
||||
// equals is now logical equals; it does not require the
|
||||
// same underlying container types, but compares across
|
||||
// any of the implementations.
|
||||
equals(r container) bool
|
||||
|
||||
fillLeastSignificant16bits(array []uint32, i int, mask uint32)
|
||||
or(r container) container
|
||||
orCardinality(r container) int
|
||||
isFull() bool
|
||||
ior(r container) container // i stands for inplace
|
||||
intersects(r container) bool // whether the two containers intersect
|
||||
lazyOR(r container) container
|
||||
lazyIOR(r container) container
|
||||
getSizeInBytes() int
|
||||
//removeRange(start, final int) container // range is [firstOfRange,lastOfRange) (unused)
|
||||
iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange)
|
||||
selectInt(x uint16) int // selectInt returns the xth integer in the container
|
||||
serializedSizeInBytes() int
|
||||
readFrom(io.Reader) (int, error)
|
||||
writeTo(io.Writer) (int, error)
|
||||
|
||||
numberOfRuns() int
|
||||
toEfficientContainer() container
|
||||
String() string
|
||||
containerType() contype
|
||||
}
|
||||
|
||||
type contype uint8
|
||||
|
||||
const (
|
||||
bitmapContype contype = iota
|
||||
arrayContype
|
||||
run16Contype
|
||||
run32Contype
|
||||
)
|
||||
|
||||
// careful: range is [firstOfRange,lastOfRange]
|
||||
func rangeOfOnes(start, last int) container {
|
||||
if start > MaxUint16 {
|
||||
panic("rangeOfOnes called with start > MaxUint16")
|
||||
}
|
||||
if last > MaxUint16 {
|
||||
panic("rangeOfOnes called with last > MaxUint16")
|
||||
}
|
||||
if start < 0 {
|
||||
panic("rangeOfOnes called with start < 0")
|
||||
}
|
||||
if last < 0 {
|
||||
panic("rangeOfOnes called with last < 0")
|
||||
}
|
||||
return newRunContainer16Range(uint16(start), uint16(last))
|
||||
}
|
||||
|
||||
type roaringArray struct {
|
||||
keys []uint16
|
||||
containers []container `msg:"-"` // don't try to serialize directly.
|
||||
needCopyOnWrite []bool
|
||||
copyOnWrite bool
|
||||
|
||||
// conserz is used at serialization time
|
||||
// to serialize containers. Otherwise empty.
|
||||
conserz []containerSerz
|
||||
}
|
||||
|
||||
// containerSerz facilitates serializing container (tricky to
|
||||
// serialize because it is an interface) by providing a
|
||||
// light wrapper with a type identifier.
|
||||
type containerSerz struct {
|
||||
t contype `msg:"t"` // type
|
||||
r msgp.Raw `msg:"r"` // Raw msgpack of the actual container type
|
||||
}
|
||||
|
||||
func newRoaringArray() *roaringArray {
|
||||
return &roaringArray{}
|
||||
}
|
||||
|
||||
// runOptimize compresses the element containers to minimize space consumed.
|
||||
// Q: how does this interact with copyOnWrite and needCopyOnWrite?
|
||||
// A: since we aren't changing the logical content, just the representation,
|
||||
// we don't bother to check the needCopyOnWrite bits. We replace
|
||||
// (possibly all) elements of ra.containers in-place with space
|
||||
// optimized versions.
|
||||
func (ra *roaringArray) runOptimize() {
|
||||
for i := range ra.containers {
|
||||
ra.containers[i] = ra.containers[i].toEfficientContainer()
|
||||
}
|
||||
}
|
||||
|
||||
func (ra *roaringArray) appendContainer(key uint16, value container, mustCopyOnWrite bool) {
|
||||
ra.keys = append(ra.keys, key)
|
||||
ra.containers = append(ra.containers, value)
|
||||
ra.needCopyOnWrite = append(ra.needCopyOnWrite, mustCopyOnWrite)
|
||||
}
|
||||
|
||||
func (ra *roaringArray) appendWithoutCopy(sa roaringArray, startingindex int) {
|
||||
mustCopyOnWrite := sa.needCopyOnWrite[startingindex]
|
||||
ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], mustCopyOnWrite)
|
||||
}
|
||||
|
||||
func (ra *roaringArray) appendCopy(sa roaringArray, startingindex int) {
|
||||
// cow only if the two request it, or if we already have a lightweight copy
|
||||
copyonwrite := (ra.copyOnWrite && sa.copyOnWrite) || sa.needsCopyOnWrite(startingindex)
|
||||
if !copyonwrite {
|
||||
// since there is no copy-on-write, we need to clone the container (this is important)
|
||||
ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex].clone(), copyonwrite)
|
||||
} else {
|
||||
ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], copyonwrite)
|
||||
if !sa.needsCopyOnWrite(startingindex) {
|
||||
sa.setNeedsCopyOnWrite(startingindex)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (ra *roaringArray) appendWithoutCopyMany(sa roaringArray, startingindex, end int) {
|
||||
for i := startingindex; i < end; i++ {
|
||||
ra.appendWithoutCopy(sa, i)
|
||||
}
|
||||
}
|
||||
|
||||
func (ra *roaringArray) appendCopyMany(sa roaringArray, startingindex, end int) {
|
||||
for i := startingindex; i < end; i++ {
|
||||
ra.appendCopy(sa, i)
|
||||
}
|
||||
}
|
||||
|
||||
func (ra *roaringArray) appendCopiesUntil(sa roaringArray, stoppingKey uint16) {
|
||||
// cow only if the two request it, or if we already have a lightweight copy
|
||||
copyonwrite := ra.copyOnWrite && sa.copyOnWrite
|
||||
|
||||
for i := 0; i < sa.size(); i++ {
|
||||
if sa.keys[i] >= stoppingKey {
|
||||
break
|
||||
}
|
||||
thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i)
|
||||
if thiscopyonewrite {
|
||||
ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite)
|
||||
if !sa.needsCopyOnWrite(i) {
|
||||
sa.setNeedsCopyOnWrite(i)
|
||||
}
|
||||
|
||||
} else {
|
||||
// since there is no copy-on-write, we need to clone the container (this is important)
|
||||
ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite)
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (ra *roaringArray) appendCopiesAfter(sa roaringArray, beforeStart uint16) {
|
||||
// cow only if the two request it, or if we already have a lightweight copy
|
||||
copyonwrite := ra.copyOnWrite && sa.copyOnWrite
|
||||
|
||||
startLocation := sa.getIndex(beforeStart)
|
||||
if startLocation >= 0 {
|
||||
startLocation++
|
||||
} else {
|
||||
startLocation = -startLocation - 1
|
||||
}
|
||||
|
||||
for i := startLocation; i < sa.size(); i++ {
|
||||
thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i)
|
||||
if thiscopyonewrite {
|
||||
ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite)
|
||||
if !sa.needsCopyOnWrite(i) {
|
||||
sa.setNeedsCopyOnWrite(i)
|
||||
}
|
||||
} else {
|
||||
// since there is no copy-on-write, we need to clone the container (this is important)
|
||||
ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite)
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (ra *roaringArray) removeIndexRange(begin, end int) {
|
||||
if end <= begin {
|
||||
return
|
||||
}
|
||||
|
||||
r := end - begin
|
||||
|
||||
copy(ra.keys[begin:], ra.keys[end:])
|
||||
copy(ra.containers[begin:], ra.containers[end:])
|
||||
copy(ra.needCopyOnWrite[begin:], ra.needCopyOnWrite[end:])
|
||||
|
||||
ra.resize(len(ra.keys) - r)
|
||||
}
|
||||
|
||||
func (ra *roaringArray) resize(newsize int) {
|
||||
for k := newsize; k < len(ra.containers); k++ {
|
||||
ra.containers[k] = nil
|
||||
}
|
||||
|
||||
ra.keys = ra.keys[:newsize]
|
||||
ra.containers = ra.containers[:newsize]
|
||||
ra.needCopyOnWrite = ra.needCopyOnWrite[:newsize]
|
||||
}
|
||||
|
||||
func (ra *roaringArray) clear() {
|
||||
ra.resize(0)
|
||||
ra.copyOnWrite = false
|
||||
ra.conserz = nil
|
||||
}
|
||||
|
||||
func (ra *roaringArray) clone() *roaringArray {
|
||||
|
||||
sa := roaringArray{}
|
||||
sa.copyOnWrite = ra.copyOnWrite
|
||||
|
||||
// this is where copyOnWrite is used.
|
||||
if ra.copyOnWrite {
|
||||
sa.keys = make([]uint16, len(ra.keys))
|
||||
copy(sa.keys, ra.keys)
|
||||
sa.containers = make([]container, len(ra.containers))
|
||||
copy(sa.containers, ra.containers)
|
||||
sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite))
|
||||
|
||||
ra.markAllAsNeedingCopyOnWrite()
|
||||
sa.markAllAsNeedingCopyOnWrite()
|
||||
|
||||
// sa.needCopyOnWrite is shared
|
||||
} else {
|
||||
// make a full copy
|
||||
|
||||
sa.keys = make([]uint16, len(ra.keys))
|
||||
copy(sa.keys, ra.keys)
|
||||
|
||||
sa.containers = make([]container, len(ra.containers))
|
||||
for i := range sa.containers {
|
||||
sa.containers[i] = ra.containers[i].clone()
|
||||
}
|
||||
|
||||
sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite))
|
||||
}
|
||||
return &sa
|
||||
}
|
||||
|
||||
// unused function:
|
||||
//func (ra *roaringArray) containsKey(x uint16) bool {
|
||||
// return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0)
|
||||
//}
|
||||
|
||||
func (ra *roaringArray) getContainer(x uint16) container {
|
||||
i := ra.binarySearch(0, int64(len(ra.keys)), x)
|
||||
if i < 0 {
|
||||
return nil
|
||||
}
|
||||
return ra.containers[i]
|
||||
}
|
||||
|
||||
func (ra *roaringArray) getContainerAtIndex(i int) container {
|
||||
return ra.containers[i]
|
||||
}
|
||||
|
||||
func (ra *roaringArray) getFastContainerAtIndex(i int, needsWriteable bool) container {
|
||||
c := ra.getContainerAtIndex(i)
|
||||
switch t := c.(type) {
|
||||
case *arrayContainer:
|
||||
c = t.toBitmapContainer()
|
||||
case *runContainer16:
|
||||
if !t.isFull() {
|
||||
c = t.toBitmapContainer()
|
||||
}
|
||||
case *bitmapContainer:
|
||||
if needsWriteable && ra.needCopyOnWrite[i] {
|
||||
c = ra.containers[i].clone()
|
||||
}
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
func (ra *roaringArray) getWritableContainerAtIndex(i int) container {
|
||||
if ra.needCopyOnWrite[i] {
|
||||
ra.containers[i] = ra.containers[i].clone()
|
||||
ra.needCopyOnWrite[i] = false
|
||||
}
|
||||
return ra.containers[i]
|
||||
}
|
||||
|
||||
func (ra *roaringArray) getIndex(x uint16) int {
|
||||
// before the binary search, we optimize for frequent cases
|
||||
size := len(ra.keys)
|
||||
if (size == 0) || (ra.keys[size-1] == x) {
|
||||
return size - 1
|
||||
}
|
||||
return ra.binarySearch(0, int64(size), x)
|
||||
}
|
||||
|
||||
func (ra *roaringArray) getKeyAtIndex(i int) uint16 {
|
||||
return ra.keys[i]
|
||||
}
|
||||
|
||||
func (ra *roaringArray) insertNewKeyValueAt(i int, key uint16, value container) {
|
||||
ra.keys = append(ra.keys, 0)
|
||||
ra.containers = append(ra.containers, nil)
|
||||
|
||||
copy(ra.keys[i+1:], ra.keys[i:])
|
||||
copy(ra.containers[i+1:], ra.containers[i:])
|
||||
|
||||
ra.keys[i] = key
|
||||
ra.containers[i] = value
|
||||
|
||||
ra.needCopyOnWrite = append(ra.needCopyOnWrite, false)
|
||||
copy(ra.needCopyOnWrite[i+1:], ra.needCopyOnWrite[i:])
|
||||
ra.needCopyOnWrite[i] = false
|
||||
}
|
||||
|
||||
func (ra *roaringArray) remove(key uint16) bool {
|
||||
i := ra.binarySearch(0, int64(len(ra.keys)), key)
|
||||
if i >= 0 { // if a new key
|
||||
ra.removeAtIndex(i)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (ra *roaringArray) removeAtIndex(i int) {
|
||||
copy(ra.keys[i:], ra.keys[i+1:])
|
||||
copy(ra.containers[i:], ra.containers[i+1:])
|
||||
|
||||
copy(ra.needCopyOnWrite[i:], ra.needCopyOnWrite[i+1:])
|
||||
|
||||
ra.resize(len(ra.keys) - 1)
|
||||
}
|
||||
|
||||
func (ra *roaringArray) setContainerAtIndex(i int, c container) {
|
||||
ra.containers[i] = c
|
||||
}
|
||||
|
||||
func (ra *roaringArray) replaceKeyAndContainerAtIndex(i int, key uint16, c container, mustCopyOnWrite bool) {
|
||||
ra.keys[i] = key
|
||||
ra.containers[i] = c
|
||||
ra.needCopyOnWrite[i] = mustCopyOnWrite
|
||||
}
|
||||
|
||||
func (ra *roaringArray) size() int {
|
||||
return len(ra.keys)
|
||||
}
|
||||
|
||||
func (ra *roaringArray) binarySearch(begin, end int64, ikey uint16) int {
|
||||
low := begin
|
||||
high := end - 1
|
||||
for low+16 <= high {
|
||||
middleIndex := low + (high-low)/2 // avoid overflow
|
||||
middleValue := ra.keys[middleIndex]
|
||||
|
||||
if middleValue < ikey {
|
||||
low = middleIndex + 1
|
||||
} else if middleValue > ikey {
|
||||
high = middleIndex - 1
|
||||
} else {
|
||||
return int(middleIndex)
|
||||
}
|
||||
}
|
||||
for ; low <= high; low++ {
|
||||
val := ra.keys[low]
|
||||
if val >= ikey {
|
||||
if val == ikey {
|
||||
return int(low)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
return -int(low + 1)
|
||||
}
|
||||
|
||||
func (ra *roaringArray) equals(o interface{}) bool {
|
||||
srb, ok := o.(roaringArray)
|
||||
if ok {
|
||||
|
||||
if srb.size() != ra.size() {
|
||||
return false
|
||||
}
|
||||
for i, k := range ra.keys {
|
||||
if k != srb.keys[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
for i, c := range ra.containers {
|
||||
if !c.equals(srb.containers[i]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (ra *roaringArray) headerSize() uint64 {
|
||||
size := uint64(len(ra.keys))
|
||||
if ra.hasRunCompression() {
|
||||
if size < noOffsetThreshold { // for small bitmaps, we omit the offsets
|
||||
return 4 + (size+7)/8 + 4*size
|
||||
}
|
||||
return 4 + (size+7)/8 + 8*size // - 4 because we pack the size with the cookie
|
||||
}
|
||||
return 4 + 4 + 8*size
|
||||
|
||||
}
|
||||
|
||||
// should be dirt cheap
|
||||
func (ra *roaringArray) serializedSizeInBytes() uint64 {
|
||||
answer := ra.headerSize()
|
||||
for _, c := range ra.containers {
|
||||
answer += uint64(c.serializedSizeInBytes())
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
//
|
||||
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
|
||||
//
|
||||
func (ra *roaringArray) toBytes() ([]byte, error) {
|
||||
stream := &bytes.Buffer{}
|
||||
hasRun := ra.hasRunCompression()
|
||||
isRunSizeInBytes := 0
|
||||
cookieSize := 8
|
||||
if hasRun {
|
||||
cookieSize = 4
|
||||
isRunSizeInBytes = (len(ra.keys) + 7) / 8
|
||||
}
|
||||
descriptiveHeaderSize := 4 * len(ra.keys)
|
||||
preambleSize := cookieSize + isRunSizeInBytes + descriptiveHeaderSize
|
||||
|
||||
buf := make([]byte, preambleSize+4*len(ra.keys))
|
||||
|
||||
nw := 0
|
||||
|
||||
if hasRun {
|
||||
binary.LittleEndian.PutUint16(buf[0:], uint16(serialCookie))
|
||||
nw += 2
|
||||
binary.LittleEndian.PutUint16(buf[2:], uint16(len(ra.keys)-1))
|
||||
nw += 2
|
||||
|
||||
// compute isRun bitmap
|
||||
var ir []byte
|
||||
|
||||
isRun := newBitmapContainer()
|
||||
for i, c := range ra.containers {
|
||||
switch c.(type) {
|
||||
case *runContainer16:
|
||||
isRun.iadd(uint16(i))
|
||||
}
|
||||
}
|
||||
// convert to little endian
|
||||
ir = isRun.asLittleEndianByteSlice()[:isRunSizeInBytes]
|
||||
nw += copy(buf[nw:], ir)
|
||||
} else {
|
||||
binary.LittleEndian.PutUint32(buf[0:], uint32(serialCookieNoRunContainer))
|
||||
nw += 4
|
||||
binary.LittleEndian.PutUint32(buf[4:], uint32(len(ra.keys)))
|
||||
nw += 4
|
||||
}
|
||||
|
||||
// descriptive header
|
||||
for i, key := range ra.keys {
|
||||
binary.LittleEndian.PutUint16(buf[nw:], key)
|
||||
nw += 2
|
||||
c := ra.containers[i]
|
||||
binary.LittleEndian.PutUint16(buf[nw:], uint16(c.getCardinality()-1))
|
||||
nw += 2
|
||||
}
|
||||
|
||||
startOffset := int64(preambleSize + 4*len(ra.keys))
|
||||
if !hasRun || (len(ra.keys) >= noOffsetThreshold) {
|
||||
// offset header
|
||||
for _, c := range ra.containers {
|
||||
binary.LittleEndian.PutUint32(buf[nw:], uint32(startOffset))
|
||||
nw += 4
|
||||
switch rc := c.(type) {
|
||||
case *runContainer16:
|
||||
startOffset += 2 + int64(len(rc.iv))*4
|
||||
default:
|
||||
startOffset += int64(getSizeInBytesFromCardinality(c.getCardinality()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_, err := stream.Write(buf[:nw])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for i, c := range ra.containers {
|
||||
_ = i
|
||||
_, err := c.writeTo(stream)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return stream.Bytes(), nil
|
||||
}
|
||||
|
||||
//
|
||||
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
|
||||
//
|
||||
func (ra *roaringArray) writeTo(out io.Writer) (int64, error) {
|
||||
by, err := ra.toBytes()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
n, err := out.Write(by)
|
||||
if err == nil && n < len(by) {
|
||||
err = io.ErrShortWrite
|
||||
}
|
||||
return int64(n), err
|
||||
}
|
||||
|
||||
func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) {
|
||||
pos := 0
|
||||
if len(buf) < 8 {
|
||||
return 0, fmt.Errorf("buffer too small, expecting at least 8 bytes, was %d", len(buf))
|
||||
}
|
||||
|
||||
cookie := binary.LittleEndian.Uint32(buf)
|
||||
pos += 4
|
||||
var size uint32 // number of containers
|
||||
haveRunContainers := false
|
||||
var isRunBitmap []byte
|
||||
|
||||
// cookie header
|
||||
if cookie&0x0000FFFF == serialCookie {
|
||||
haveRunContainers = true
|
||||
size = uint32(uint16(cookie>>16) + 1) // number of containers
|
||||
|
||||
// create is-run-container bitmap
|
||||
isRunBitmapSize := (int(size) + 7) / 8
|
||||
if pos+isRunBitmapSize > len(buf) {
|
||||
return 0, fmt.Errorf("malformed bitmap, is-run bitmap overruns buffer at %d", pos+isRunBitmapSize)
|
||||
}
|
||||
|
||||
isRunBitmap = buf[pos : pos+isRunBitmapSize]
|
||||
pos += isRunBitmapSize
|
||||
} else if cookie == serialCookieNoRunContainer {
|
||||
size = binary.LittleEndian.Uint32(buf[pos:])
|
||||
pos += 4
|
||||
} else {
|
||||
return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
|
||||
}
|
||||
if size > (1 << 16) {
|
||||
return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.")
|
||||
}
|
||||
// descriptive header
|
||||
// keycard - is {key, cardinality} tuple slice
|
||||
if pos+2*2*int(size) > len(buf) {
|
||||
return 0, fmt.Errorf("malfomred bitmap, key-cardinality slice overruns buffer at %d", pos+2*2*int(size))
|
||||
}
|
||||
keycard := byteSliceAsUint16Slice(buf[pos : pos+2*2*int(size)])
|
||||
pos += 2 * 2 * int(size)
|
||||
|
||||
if !haveRunContainers || size >= noOffsetThreshold {
|
||||
pos += 4 * int(size)
|
||||
}
|
||||
|
||||
// Allocate slices upfront as number of containers is known
|
||||
if cap(ra.containers) >= int(size) {
|
||||
ra.containers = ra.containers[:size]
|
||||
} else {
|
||||
ra.containers = make([]container, size)
|
||||
}
|
||||
if cap(ra.keys) >= int(size) {
|
||||
ra.keys = ra.keys[:size]
|
||||
} else {
|
||||
ra.keys = make([]uint16, size)
|
||||
}
|
||||
if cap(ra.needCopyOnWrite) >= int(size) {
|
||||
ra.needCopyOnWrite = ra.needCopyOnWrite[:size]
|
||||
} else {
|
||||
ra.needCopyOnWrite = make([]bool, size)
|
||||
}
|
||||
|
||||
for i := uint32(0); i < size; i++ {
|
||||
key := uint16(keycard[2*i])
|
||||
card := int(keycard[2*i+1]) + 1
|
||||
ra.keys[i] = key
|
||||
ra.needCopyOnWrite[i] = true
|
||||
|
||||
if haveRunContainers && isRunBitmap[i/8]&(1<<(i%8)) != 0 {
|
||||
// run container
|
||||
nr := binary.LittleEndian.Uint16(buf[pos:])
|
||||
pos += 2
|
||||
if pos+int(nr)*4 > len(buf) {
|
||||
return 0, fmt.Errorf("malformed bitmap, a run container overruns buffer at %d:%d", pos, pos+int(nr)*4)
|
||||
}
|
||||
nb := runContainer16{
|
||||
iv: byteSliceAsInterval16Slice(buf[pos : pos+int(nr)*4]),
|
||||
card: int64(card),
|
||||
}
|
||||
pos += int(nr) * 4
|
||||
ra.containers[i] = &nb
|
||||
} else if card > arrayDefaultMaxSize {
|
||||
// bitmap container
|
||||
nb := bitmapContainer{
|
||||
cardinality: card,
|
||||
bitmap: byteSliceAsUint64Slice(buf[pos : pos+arrayDefaultMaxSize*2]),
|
||||
}
|
||||
pos += arrayDefaultMaxSize * 2
|
||||
ra.containers[i] = &nb
|
||||
} else {
|
||||
// array container
|
||||
nb := arrayContainer{
|
||||
byteSliceAsUint16Slice(buf[pos : pos+card*2]),
|
||||
}
|
||||
pos += card * 2
|
||||
ra.containers[i] = &nb
|
||||
}
|
||||
}
|
||||
|
||||
return int64(pos), nil
|
||||
}
|
||||
|
||||
func (ra *roaringArray) readFrom(stream io.Reader) (int64, error) {
|
||||
pos := 0
|
||||
var cookie uint32
|
||||
err := binary.Read(stream, binary.LittleEndian, &cookie)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err)
|
||||
}
|
||||
pos += 4
|
||||
var size uint32
|
||||
haveRunContainers := false
|
||||
var isRun *bitmapContainer
|
||||
if cookie&0x0000FFFF == serialCookie {
|
||||
haveRunContainers = true
|
||||
size = uint32(uint16(cookie>>16) + 1)
|
||||
bytesToRead := (int(size) + 7) / 8
|
||||
numwords := (bytesToRead + 7) / 8
|
||||
by := make([]byte, bytesToRead, numwords*8)
|
||||
nr, err := io.ReadFull(stream, by)
|
||||
if err != nil {
|
||||
return 8 + int64(nr), fmt.Errorf("error in readFrom: could not read the "+
|
||||
"runContainer bit flags of length %v bytes: %v", bytesToRead, err)
|
||||
}
|
||||
pos += bytesToRead
|
||||
by = by[:cap(by)]
|
||||
isRun = newBitmapContainer()
|
||||
for i := 0; i < numwords; i++ {
|
||||
isRun.bitmap[i] = binary.LittleEndian.Uint64(by)
|
||||
by = by[8:]
|
||||
}
|
||||
} else if cookie == serialCookieNoRunContainer {
|
||||
err = binary.Read(stream, binary.LittleEndian, &size)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error in roaringArray.readFrom: when reading size, got: %s", err)
|
||||
}
|
||||
pos += 4
|
||||
} else {
|
||||
return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
|
||||
}
|
||||
if size > (1 << 16) {
|
||||
return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.")
|
||||
}
|
||||
// descriptive header
|
||||
keycard := make([]uint16, 2*size, 2*size)
|
||||
err = binary.Read(stream, binary.LittleEndian, keycard)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
pos += 2 * 2 * int(size)
|
||||
// offset header
|
||||
if !haveRunContainers || size >= noOffsetThreshold {
|
||||
io.CopyN(ioutil.Discard, stream, 4*int64(size)) // we never skip ahead so this data can be ignored
|
||||
pos += 4 * int(size)
|
||||
}
|
||||
for i := uint32(0); i < size; i++ {
|
||||
key := int(keycard[2*i])
|
||||
card := int(keycard[2*i+1]) + 1
|
||||
if haveRunContainers && isRun.contains(uint16(i)) {
|
||||
nb := newRunContainer16()
|
||||
nr, err := nb.readFrom(stream)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
pos += nr
|
||||
ra.appendContainer(uint16(key), nb, false)
|
||||
} else if card > arrayDefaultMaxSize {
|
||||
nb := newBitmapContainer()
|
||||
nr, err := nb.readFrom(stream)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
nb.cardinality = card
|
||||
pos += nr
|
||||
ra.appendContainer(keycard[2*i], nb, false)
|
||||
} else {
|
||||
nb := newArrayContainerSize(card)
|
||||
nr, err := nb.readFrom(stream)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
pos += nr
|
||||
ra.appendContainer(keycard[2*i], nb, false)
|
||||
}
|
||||
}
|
||||
return int64(pos), nil
|
||||
}
|
||||
|
||||
func (ra *roaringArray) hasRunCompression() bool {
|
||||
for _, c := range ra.containers {
|
||||
switch c.(type) {
|
||||
case *runContainer16:
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (ra *roaringArray) writeToMsgpack(stream io.Writer) error {
|
||||
|
||||
ra.conserz = make([]containerSerz, len(ra.containers))
|
||||
for i, v := range ra.containers {
|
||||
switch cn := v.(type) {
|
||||
case *bitmapContainer:
|
||||
bts, err := cn.MarshalMsg(nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ra.conserz[i].t = bitmapContype
|
||||
ra.conserz[i].r = bts
|
||||
case *arrayContainer:
|
||||
bts, err := cn.MarshalMsg(nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ra.conserz[i].t = arrayContype
|
||||
ra.conserz[i].r = bts
|
||||
case *runContainer16:
|
||||
bts, err := cn.MarshalMsg(nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ra.conserz[i].t = run16Contype
|
||||
ra.conserz[i].r = bts
|
||||
default:
|
||||
panic(fmt.Errorf("Unrecognized container implementation: %T", cn))
|
||||
}
|
||||
}
|
||||
w := snappy.NewWriter(stream)
|
||||
err := msgp.Encode(w, ra)
|
||||
ra.conserz = nil
|
||||
return err
|
||||
}
|
||||
|
||||
func (ra *roaringArray) readFromMsgpack(stream io.Reader) error {
|
||||
r := snappy.NewReader(stream)
|
||||
err := msgp.Decode(r, ra)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(ra.containers) != len(ra.keys) {
|
||||
ra.containers = make([]container, len(ra.keys))
|
||||
}
|
||||
|
||||
for i, v := range ra.conserz {
|
||||
switch v.t {
|
||||
case bitmapContype:
|
||||
c := &bitmapContainer{}
|
||||
_, err = c.UnmarshalMsg(v.r)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ra.containers[i] = c
|
||||
case arrayContype:
|
||||
c := &arrayContainer{}
|
||||
_, err = c.UnmarshalMsg(v.r)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ra.containers[i] = c
|
||||
case run16Contype:
|
||||
c := &runContainer16{}
|
||||
_, err = c.UnmarshalMsg(v.r)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ra.containers[i] = c
|
||||
default:
|
||||
return fmt.Errorf("unrecognized contype serialization code: '%v'", v.t)
|
||||
}
|
||||
}
|
||||
ra.conserz = nil
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ra *roaringArray) advanceUntil(min uint16, pos int) int {
|
||||
lower := pos + 1
|
||||
|
||||
if lower >= len(ra.keys) || ra.keys[lower] >= min {
|
||||
return lower
|
||||
}
|
||||
|
||||
spansize := 1
|
||||
|
||||
for lower+spansize < len(ra.keys) && ra.keys[lower+spansize] < min {
|
||||
spansize *= 2
|
||||
}
|
||||
var upper int
|
||||
if lower+spansize < len(ra.keys) {
|
||||
upper = lower + spansize
|
||||
} else {
|
||||
upper = len(ra.keys) - 1
|
||||
}
|
||||
|
||||
if ra.keys[upper] == min {
|
||||
return upper
|
||||
}
|
||||
|
||||
if ra.keys[upper] < min {
|
||||
// means
|
||||
// array
|
||||
// has no
|
||||
// item
|
||||
// >= min
|
||||
// pos = array.length;
|
||||
return len(ra.keys)
|
||||
}
|
||||
|
||||
// we know that the next-smallest span was too small
|
||||
lower += (spansize >> 1)
|
||||
|
||||
mid := 0
|
||||
for lower+1 != upper {
|
||||
mid = (lower + upper) >> 1
|
||||
if ra.keys[mid] == min {
|
||||
return mid
|
||||
} else if ra.keys[mid] < min {
|
||||
lower = mid
|
||||
} else {
|
||||
upper = mid
|
||||
}
|
||||
}
|
||||
return upper
|
||||
}
|
||||
|
||||
func (ra *roaringArray) markAllAsNeedingCopyOnWrite() {
|
||||
for i := range ra.needCopyOnWrite {
|
||||
ra.needCopyOnWrite[i] = true
|
||||
}
|
||||
}
|
||||
|
||||
func (ra *roaringArray) needsCopyOnWrite(i int) bool {
|
||||
return ra.needCopyOnWrite[i]
|
||||
}
|
||||
|
||||
func (ra *roaringArray) setNeedsCopyOnWrite(i int) {
|
||||
ra.needCopyOnWrite[i] = true
|
||||
}
|
529
vendor/github.com/RoaringBitmap/roaring/roaringarray_gen.go
generated
vendored
Normal file
529
vendor/github.com/RoaringBitmap/roaring/roaringarray_gen.go
generated
vendored
Normal file
@ -0,0 +1,529 @@
|
||||
package roaring
|
||||
|
||||
// NOTE: THIS FILE WAS PRODUCED BY THE
|
||||
// MSGP CODE GENERATION TOOL (github.com/tinylib/msgp)
|
||||
// DO NOT EDIT
|
||||
|
||||
import (
|
||||
"github.com/tinylib/msgp/msgp"
|
||||
)
|
||||
|
||||
// DecodeMsg implements msgp.Decodable
|
||||
func (z *containerSerz) DecodeMsg(dc *msgp.Reader) (err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
var zxvk uint32
|
||||
zxvk, err = dc.ReadMapHeader()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for zxvk > 0 {
|
||||
zxvk--
|
||||
field, err = dc.ReadMapKeyPtr()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
switch msgp.UnsafeString(field) {
|
||||
case "t":
|
||||
{
|
||||
var zbzg uint8
|
||||
zbzg, err = dc.ReadUint8()
|
||||
z.t = contype(zbzg)
|
||||
}
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
case "r":
|
||||
err = z.r.DecodeMsg(dc)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
default:
|
||||
err = dc.Skip()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// EncodeMsg implements msgp.Encodable
|
||||
func (z *containerSerz) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
// map header, size 2
|
||||
// write "t"
|
||||
err = en.Append(0x82, 0xa1, 0x74)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = en.WriteUint8(uint8(z.t))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
// write "r"
|
||||
err = en.Append(0xa1, 0x72)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = z.r.EncodeMsg(en)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// MarshalMsg implements msgp.Marshaler
|
||||
func (z *containerSerz) MarshalMsg(b []byte) (o []byte, err error) {
|
||||
o = msgp.Require(b, z.Msgsize())
|
||||
// map header, size 2
|
||||
// string "t"
|
||||
o = append(o, 0x82, 0xa1, 0x74)
|
||||
o = msgp.AppendUint8(o, uint8(z.t))
|
||||
// string "r"
|
||||
o = append(o, 0xa1, 0x72)
|
||||
o, err = z.r.MarshalMsg(o)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// UnmarshalMsg implements msgp.Unmarshaler
|
||||
func (z *containerSerz) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
var zbai uint32
|
||||
zbai, bts, err = msgp.ReadMapHeaderBytes(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for zbai > 0 {
|
||||
zbai--
|
||||
field, bts, err = msgp.ReadMapKeyZC(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
switch msgp.UnsafeString(field) {
|
||||
case "t":
|
||||
{
|
||||
var zcmr uint8
|
||||
zcmr, bts, err = msgp.ReadUint8Bytes(bts)
|
||||
z.t = contype(zcmr)
|
||||
}
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
case "r":
|
||||
bts, err = z.r.UnmarshalMsg(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
default:
|
||||
bts, err = msgp.Skip(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
o = bts
|
||||
return
|
||||
}
|
||||
|
||||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
func (z *containerSerz) Msgsize() (s int) {
|
||||
s = 1 + 2 + msgp.Uint8Size + 2 + z.r.Msgsize()
|
||||
return
|
||||
}
|
||||
|
||||
// DecodeMsg implements msgp.Decodable
|
||||
func (z *contype) DecodeMsg(dc *msgp.Reader) (err error) {
|
||||
{
|
||||
var zajw uint8
|
||||
zajw, err = dc.ReadUint8()
|
||||
(*z) = contype(zajw)
|
||||
}
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// EncodeMsg implements msgp.Encodable
|
||||
func (z contype) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
err = en.WriteUint8(uint8(z))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// MarshalMsg implements msgp.Marshaler
|
||||
func (z contype) MarshalMsg(b []byte) (o []byte, err error) {
|
||||
o = msgp.Require(b, z.Msgsize())
|
||||
o = msgp.AppendUint8(o, uint8(z))
|
||||
return
|
||||
}
|
||||
|
||||
// UnmarshalMsg implements msgp.Unmarshaler
|
||||
func (z *contype) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
||||
{
|
||||
var zwht uint8
|
||||
zwht, bts, err = msgp.ReadUint8Bytes(bts)
|
||||
(*z) = contype(zwht)
|
||||
}
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
o = bts
|
||||
return
|
||||
}
|
||||
|
||||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
func (z contype) Msgsize() (s int) {
|
||||
s = msgp.Uint8Size
|
||||
return
|
||||
}
|
||||
|
||||
// DecodeMsg implements msgp.Decodable
|
||||
func (z *roaringArray) DecodeMsg(dc *msgp.Reader) (err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
var zlqf uint32
|
||||
zlqf, err = dc.ReadMapHeader()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for zlqf > 0 {
|
||||
zlqf--
|
||||
field, err = dc.ReadMapKeyPtr()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
switch msgp.UnsafeString(field) {
|
||||
case "keys":
|
||||
var zdaf uint32
|
||||
zdaf, err = dc.ReadArrayHeader()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if cap(z.keys) >= int(zdaf) {
|
||||
z.keys = (z.keys)[:zdaf]
|
||||
} else {
|
||||
z.keys = make([]uint16, zdaf)
|
||||
}
|
||||
for zhct := range z.keys {
|
||||
z.keys[zhct], err = dc.ReadUint16()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
case "needCopyOnWrite":
|
||||
var zpks uint32
|
||||
zpks, err = dc.ReadArrayHeader()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if cap(z.needCopyOnWrite) >= int(zpks) {
|
||||
z.needCopyOnWrite = (z.needCopyOnWrite)[:zpks]
|
||||
} else {
|
||||
z.needCopyOnWrite = make([]bool, zpks)
|
||||
}
|
||||
for zcua := range z.needCopyOnWrite {
|
||||
z.needCopyOnWrite[zcua], err = dc.ReadBool()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
case "copyOnWrite":
|
||||
z.copyOnWrite, err = dc.ReadBool()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
case "conserz":
|
||||
var zjfb uint32
|
||||
zjfb, err = dc.ReadArrayHeader()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if cap(z.conserz) >= int(zjfb) {
|
||||
z.conserz = (z.conserz)[:zjfb]
|
||||
} else {
|
||||
z.conserz = make([]containerSerz, zjfb)
|
||||
}
|
||||
for zxhx := range z.conserz {
|
||||
var zcxo uint32
|
||||
zcxo, err = dc.ReadMapHeader()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for zcxo > 0 {
|
||||
zcxo--
|
||||
field, err = dc.ReadMapKeyPtr()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
switch msgp.UnsafeString(field) {
|
||||
case "t":
|
||||
{
|
||||
var zeff uint8
|
||||
zeff, err = dc.ReadUint8()
|
||||
z.conserz[zxhx].t = contype(zeff)
|
||||
}
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
case "r":
|
||||
err = z.conserz[zxhx].r.DecodeMsg(dc)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
default:
|
||||
err = dc.Skip()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
default:
|
||||
err = dc.Skip()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// EncodeMsg implements msgp.Encodable
|
||||
func (z *roaringArray) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
// map header, size 4
|
||||
// write "keys"
|
||||
err = en.Append(0x84, 0xa4, 0x6b, 0x65, 0x79, 0x73)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = en.WriteArrayHeader(uint32(len(z.keys)))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for zhct := range z.keys {
|
||||
err = en.WriteUint16(z.keys[zhct])
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
// write "needCopyOnWrite"
|
||||
err = en.Append(0xaf, 0x6e, 0x65, 0x65, 0x64, 0x43, 0x6f, 0x70, 0x79, 0x4f, 0x6e, 0x57, 0x72, 0x69, 0x74, 0x65)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = en.WriteArrayHeader(uint32(len(z.needCopyOnWrite)))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for zcua := range z.needCopyOnWrite {
|
||||
err = en.WriteBool(z.needCopyOnWrite[zcua])
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
// write "copyOnWrite"
|
||||
err = en.Append(0xab, 0x63, 0x6f, 0x70, 0x79, 0x4f, 0x6e, 0x57, 0x72, 0x69, 0x74, 0x65)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = en.WriteBool(z.copyOnWrite)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
// write "conserz"
|
||||
err = en.Append(0xa7, 0x63, 0x6f, 0x6e, 0x73, 0x65, 0x72, 0x7a)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = en.WriteArrayHeader(uint32(len(z.conserz)))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for zxhx := range z.conserz {
|
||||
// map header, size 2
|
||||
// write "t"
|
||||
err = en.Append(0x82, 0xa1, 0x74)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = en.WriteUint8(uint8(z.conserz[zxhx].t))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
// write "r"
|
||||
err = en.Append(0xa1, 0x72)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = z.conserz[zxhx].r.EncodeMsg(en)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// MarshalMsg implements msgp.Marshaler
|
||||
func (z *roaringArray) MarshalMsg(b []byte) (o []byte, err error) {
|
||||
o = msgp.Require(b, z.Msgsize())
|
||||
// map header, size 4
|
||||
// string "keys"
|
||||
o = append(o, 0x84, 0xa4, 0x6b, 0x65, 0x79, 0x73)
|
||||
o = msgp.AppendArrayHeader(o, uint32(len(z.keys)))
|
||||
for zhct := range z.keys {
|
||||
o = msgp.AppendUint16(o, z.keys[zhct])
|
||||
}
|
||||
// string "needCopyOnWrite"
|
||||
o = append(o, 0xaf, 0x6e, 0x65, 0x65, 0x64, 0x43, 0x6f, 0x70, 0x79, 0x4f, 0x6e, 0x57, 0x72, 0x69, 0x74, 0x65)
|
||||
o = msgp.AppendArrayHeader(o, uint32(len(z.needCopyOnWrite)))
|
||||
for zcua := range z.needCopyOnWrite {
|
||||
o = msgp.AppendBool(o, z.needCopyOnWrite[zcua])
|
||||
}
|
||||
// string "copyOnWrite"
|
||||
o = append(o, 0xab, 0x63, 0x6f, 0x70, 0x79, 0x4f, 0x6e, 0x57, 0x72, 0x69, 0x74, 0x65)
|
||||
o = msgp.AppendBool(o, z.copyOnWrite)
|
||||
// string "conserz"
|
||||
o = append(o, 0xa7, 0x63, 0x6f, 0x6e, 0x73, 0x65, 0x72, 0x7a)
|
||||
o = msgp.AppendArrayHeader(o, uint32(len(z.conserz)))
|
||||
for zxhx := range z.conserz {
|
||||
// map header, size 2
|
||||
// string "t"
|
||||
o = append(o, 0x82, 0xa1, 0x74)
|
||||
o = msgp.AppendUint8(o, uint8(z.conserz[zxhx].t))
|
||||
// string "r"
|
||||
o = append(o, 0xa1, 0x72)
|
||||
o, err = z.conserz[zxhx].r.MarshalMsg(o)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// UnmarshalMsg implements msgp.Unmarshaler
|
||||
func (z *roaringArray) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
var zrsw uint32
|
||||
zrsw, bts, err = msgp.ReadMapHeaderBytes(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for zrsw > 0 {
|
||||
zrsw--
|
||||
field, bts, err = msgp.ReadMapKeyZC(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
switch msgp.UnsafeString(field) {
|
||||
case "keys":
|
||||
var zxpk uint32
|
||||
zxpk, bts, err = msgp.ReadArrayHeaderBytes(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if cap(z.keys) >= int(zxpk) {
|
||||
z.keys = (z.keys)[:zxpk]
|
||||
} else {
|
||||
z.keys = make([]uint16, zxpk)
|
||||
}
|
||||
for zhct := range z.keys {
|
||||
z.keys[zhct], bts, err = msgp.ReadUint16Bytes(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
case "needCopyOnWrite":
|
||||
var zdnj uint32
|
||||
zdnj, bts, err = msgp.ReadArrayHeaderBytes(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if cap(z.needCopyOnWrite) >= int(zdnj) {
|
||||
z.needCopyOnWrite = (z.needCopyOnWrite)[:zdnj]
|
||||
} else {
|
||||
z.needCopyOnWrite = make([]bool, zdnj)
|
||||
}
|
||||
for zcua := range z.needCopyOnWrite {
|
||||
z.needCopyOnWrite[zcua], bts, err = msgp.ReadBoolBytes(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
case "copyOnWrite":
|
||||
z.copyOnWrite, bts, err = msgp.ReadBoolBytes(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
case "conserz":
|
||||
var zobc uint32
|
||||
zobc, bts, err = msgp.ReadArrayHeaderBytes(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if cap(z.conserz) >= int(zobc) {
|
||||
z.conserz = (z.conserz)[:zobc]
|
||||
} else {
|
||||
z.conserz = make([]containerSerz, zobc)
|
||||
}
|
||||
for zxhx := range z.conserz {
|
||||
var zsnv uint32
|
||||
zsnv, bts, err = msgp.ReadMapHeaderBytes(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for zsnv > 0 {
|
||||
zsnv--
|
||||
field, bts, err = msgp.ReadMapKeyZC(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
switch msgp.UnsafeString(field) {
|
||||
case "t":
|
||||
{
|
||||
var zkgt uint8
|
||||
zkgt, bts, err = msgp.ReadUint8Bytes(bts)
|
||||
z.conserz[zxhx].t = contype(zkgt)
|
||||
}
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
case "r":
|
||||
bts, err = z.conserz[zxhx].r.UnmarshalMsg(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
default:
|
||||
bts, err = msgp.Skip(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
default:
|
||||
bts, err = msgp.Skip(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
o = bts
|
||||
return
|
||||
}
|
||||
|
||||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
func (z *roaringArray) Msgsize() (s int) {
|
||||
s = 1 + 5 + msgp.ArrayHeaderSize + (len(z.keys) * (msgp.Uint16Size)) + 16 + msgp.ArrayHeaderSize + (len(z.needCopyOnWrite) * (msgp.BoolSize)) + 12 + msgp.BoolSize + 8 + msgp.ArrayHeaderSize
|
||||
for zxhx := range z.conserz {
|
||||
s += 1 + 2 + msgp.Uint8Size + 2 + z.conserz[zxhx].r.Msgsize()
|
||||
}
|
||||
return
|
||||
}
|
83
vendor/github.com/RoaringBitmap/roaring/serialization.go
generated
vendored
Normal file
83
vendor/github.com/RoaringBitmap/roaring/serialization.go
generated
vendored
Normal file
@ -0,0 +1,83 @@
|
||||
package roaring
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/tinylib/msgp/msgp"
|
||||
)
|
||||
|
||||
// writeTo for runContainer16 follows this
|
||||
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
|
||||
//
|
||||
func (b *runContainer16) writeTo(stream io.Writer) (int, error) {
|
||||
buf := make([]byte, 2+4*len(b.iv))
|
||||
binary.LittleEndian.PutUint16(buf[0:], uint16(len(b.iv)))
|
||||
for i, v := range b.iv {
|
||||
binary.LittleEndian.PutUint16(buf[2+i*4:], v.start)
|
||||
binary.LittleEndian.PutUint16(buf[2+2+i*4:], v.length)
|
||||
}
|
||||
return stream.Write(buf)
|
||||
}
|
||||
|
||||
func (b *runContainer32) writeToMsgpack(stream io.Writer) (int, error) {
|
||||
bts, err := b.MarshalMsg(nil)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return stream.Write(bts)
|
||||
}
|
||||
|
||||
func (b *runContainer16) writeToMsgpack(stream io.Writer) (int, error) {
|
||||
bts, err := b.MarshalMsg(nil)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return stream.Write(bts)
|
||||
}
|
||||
|
||||
func (b *runContainer32) readFromMsgpack(stream io.Reader) (int, error) {
|
||||
err := msgp.Decode(stream, b)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
func (b *runContainer16) readFromMsgpack(stream io.Reader) (int, error) {
|
||||
err := msgp.Decode(stream, b)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
var errCorruptedStream = errors.New("insufficient/odd number of stored bytes, corrupted stream detected")
|
||||
|
||||
func (b *runContainer16) readFrom(stream io.Reader) (int, error) {
|
||||
b.iv = b.iv[:0]
|
||||
b.card = 0
|
||||
var numRuns uint16
|
||||
err := binary.Read(stream, binary.LittleEndian, &numRuns)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
nr := int(numRuns)
|
||||
encRun := make([]uint16, 2*nr)
|
||||
by := make([]byte, 4*nr)
|
||||
err = binary.Read(stream, binary.LittleEndian, &by)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
for i := range encRun {
|
||||
if len(by) < 2 {
|
||||
return 0, errCorruptedStream
|
||||
}
|
||||
encRun[i] = binary.LittleEndian.Uint16(by)
|
||||
by = by[2:]
|
||||
}
|
||||
for i := 0; i < nr; i++ {
|
||||
if i > 0 && b.iv[i-1].last() >= encRun[i*2] {
|
||||
return 0, fmt.Errorf("error: stored runContainer had runs that were not in sorted order!! (b.iv[i-1=%v].last = %v >= encRun[i=%v] = %v)", i-1, b.iv[i-1].last(), i, encRun[i*2])
|
||||
}
|
||||
b.iv = append(b.iv, interval16{start: encRun[i*2], length: encRun[i*2+1]})
|
||||
b.card += int64(encRun[i*2+1]) + 1
|
||||
}
|
||||
return 0, err
|
||||
}
|
118
vendor/github.com/RoaringBitmap/roaring/serialization_generic.go
generated
vendored
Normal file
118
vendor/github.com/RoaringBitmap/roaring/serialization_generic.go
generated
vendored
Normal file
@ -0,0 +1,118 @@
|
||||
// +build !amd64,!386 appengine
|
||||
|
||||
package roaring
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"io"
|
||||
)
|
||||
|
||||
func (b *arrayContainer) writeTo(stream io.Writer) (int, error) {
|
||||
buf := make([]byte, 2*len(b.content))
|
||||
for i, v := range b.content {
|
||||
base := i * 2
|
||||
buf[base] = byte(v)
|
||||
buf[base+1] = byte(v >> 8)
|
||||
}
|
||||
return stream.Write(buf)
|
||||
}
|
||||
|
||||
func (b *arrayContainer) readFrom(stream io.Reader) (int, error) {
|
||||
err := binary.Read(stream, binary.LittleEndian, b.content)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return 2 * len(b.content), nil
|
||||
}
|
||||
|
||||
func (b *bitmapContainer) writeTo(stream io.Writer) (int, error) {
|
||||
// Write set
|
||||
buf := make([]byte, 8*len(b.bitmap))
|
||||
for i, v := range b.bitmap {
|
||||
base := i * 8
|
||||
buf[base] = byte(v)
|
||||
buf[base+1] = byte(v >> 8)
|
||||
buf[base+2] = byte(v >> 16)
|
||||
buf[base+3] = byte(v >> 24)
|
||||
buf[base+4] = byte(v >> 32)
|
||||
buf[base+5] = byte(v >> 40)
|
||||
buf[base+6] = byte(v >> 48)
|
||||
buf[base+7] = byte(v >> 56)
|
||||
}
|
||||
return stream.Write(buf)
|
||||
}
|
||||
|
||||
func (b *bitmapContainer) readFrom(stream io.Reader) (int, error) {
|
||||
err := binary.Read(stream, binary.LittleEndian, b.bitmap)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
b.computeCardinality()
|
||||
return 8 * len(b.bitmap), nil
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
|
||||
by := make([]byte, len(bc.bitmap)*8)
|
||||
for i := range bc.bitmap {
|
||||
binary.LittleEndian.PutUint64(by[i*8:], bc.bitmap[i])
|
||||
}
|
||||
return by
|
||||
}
|
||||
|
||||
func uint64SliceAsByteSlice(slice []uint64) []byte {
|
||||
by := make([]byte, len(slice)*8)
|
||||
|
||||
for i, v := range slice {
|
||||
binary.LittleEndian.PutUint64(by[i*8:], v)
|
||||
}
|
||||
|
||||
return by
|
||||
}
|
||||
|
||||
func byteSliceAsUint16Slice(slice []byte) []uint16 {
|
||||
if len(slice)%2 != 0 {
|
||||
panic("Slice size should be divisible by 2")
|
||||
}
|
||||
|
||||
b := make([]uint16, len(slice)/2)
|
||||
|
||||
for i := range b {
|
||||
b[i] = binary.LittleEndian.Uint16(slice[2*i:])
|
||||
}
|
||||
|
||||
return b
|
||||
}
|
||||
|
||||
func byteSliceAsUint64Slice(slice []byte) []uint64 {
|
||||
if len(slice)%8 != 0 {
|
||||
panic("Slice size should be divisible by 8")
|
||||
}
|
||||
|
||||
b := make([]uint64, len(slice)/8)
|
||||
|
||||
for i := range b {
|
||||
b[i] = binary.LittleEndian.Uint64(slice[8*i:])
|
||||
}
|
||||
|
||||
return b
|
||||
}
|
||||
|
||||
// Converts a byte slice to a interval16 slice.
|
||||
// The function assumes that the slice byte buffer is run container data
|
||||
// encoded according to Roaring Format Spec
|
||||
func byteSliceAsInterval16Slice(byteSlice []byte) []interval16 {
|
||||
if len(byteSlice)%4 != 0 {
|
||||
panic("Slice size should be divisible by 4")
|
||||
}
|
||||
|
||||
intervalSlice := make([]interval16, len(byteSlice)/4)
|
||||
|
||||
for i := range intervalSlice {
|
||||
intervalSlice[i] = interval16{
|
||||
start: binary.LittleEndian.Uint16(byteSlice[i*4:]),
|
||||
length: binary.LittleEndian.Uint16(byteSlice[i*4+2:]),
|
||||
}
|
||||
}
|
||||
|
||||
return intervalSlice
|
||||
}
|
113
vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go
generated
vendored
Normal file
113
vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go
generated
vendored
Normal file
@ -0,0 +1,113 @@
|
||||
// +build 386 amd64,!appengine
|
||||
|
||||
package roaring
|
||||
|
||||
import (
|
||||
"io"
|
||||
"reflect"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
func (ac *arrayContainer) writeTo(stream io.Writer) (int, error) {
|
||||
buf := uint16SliceAsByteSlice(ac.content)
|
||||
return stream.Write(buf)
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) writeTo(stream io.Writer) (int, error) {
|
||||
buf := uint64SliceAsByteSlice(bc.bitmap)
|
||||
return stream.Write(buf)
|
||||
}
|
||||
|
||||
// readFrom reads an arrayContainer from stream.
|
||||
// PRE-REQUISITE: you must size the arrayContainer correctly (allocate b.content)
|
||||
// *before* you call readFrom. We can't guess the size in the stream
|
||||
// by this point.
|
||||
func (ac *arrayContainer) readFrom(stream io.Reader) (int, error) {
|
||||
buf := uint16SliceAsByteSlice(ac.content)
|
||||
return io.ReadFull(stream, buf)
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) readFrom(stream io.Reader) (int, error) {
|
||||
buf := uint64SliceAsByteSlice(bc.bitmap)
|
||||
n, err := io.ReadFull(stream, buf)
|
||||
bc.computeCardinality()
|
||||
return n, err
|
||||
}
|
||||
|
||||
func uint64SliceAsByteSlice(slice []uint64) []byte {
|
||||
// make a new slice header
|
||||
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
|
||||
// update its capacity and length
|
||||
header.Len *= 8
|
||||
header.Cap *= 8
|
||||
|
||||
// return it
|
||||
return *(*[]byte)(unsafe.Pointer(&header))
|
||||
}
|
||||
|
||||
func uint16SliceAsByteSlice(slice []uint16) []byte {
|
||||
// make a new slice header
|
||||
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
|
||||
// update its capacity and length
|
||||
header.Len *= 2
|
||||
header.Cap *= 2
|
||||
|
||||
// return it
|
||||
return *(*[]byte)(unsafe.Pointer(&header))
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
|
||||
return uint64SliceAsByteSlice(bc.bitmap)
|
||||
}
|
||||
|
||||
// Deserialization code follows
|
||||
|
||||
func byteSliceAsUint16Slice(slice []byte) []uint16 {
|
||||
if len(slice)%2 != 0 {
|
||||
panic("Slice size should be divisible by 2")
|
||||
}
|
||||
|
||||
// make a new slice header
|
||||
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
|
||||
// update its capacity and length
|
||||
header.Len /= 2
|
||||
header.Cap /= 2
|
||||
|
||||
// return it
|
||||
return *(*[]uint16)(unsafe.Pointer(&header))
|
||||
}
|
||||
|
||||
func byteSliceAsUint64Slice(slice []byte) []uint64 {
|
||||
if len(slice)%8 != 0 {
|
||||
panic("Slice size should be divisible by 8")
|
||||
}
|
||||
|
||||
// make a new slice header
|
||||
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
|
||||
// update its capacity and length
|
||||
header.Len /= 8
|
||||
header.Cap /= 8
|
||||
|
||||
// return it
|
||||
return *(*[]uint64)(unsafe.Pointer(&header))
|
||||
}
|
||||
|
||||
func byteSliceAsInterval16Slice(slice []byte) []interval16 {
|
||||
if len(slice)%4 != 0 {
|
||||
panic("Slice size should be divisible by 4")
|
||||
}
|
||||
|
||||
// make a new slice header
|
||||
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
|
||||
// update its capacity and length
|
||||
header.Len /= 4
|
||||
header.Cap /= 4
|
||||
|
||||
// return it
|
||||
return *(*[]interval16)(unsafe.Pointer(&header))
|
||||
}
|
21
vendor/github.com/RoaringBitmap/roaring/serializationfuzz.go
generated
vendored
Normal file
21
vendor/github.com/RoaringBitmap/roaring/serializationfuzz.go
generated
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
// +build gofuzz
|
||||
|
||||
package roaring
|
||||
|
||||
import "bytes"
|
||||
|
||||
func FuzzSerializationStream(data []byte) int {
|
||||
newrb := NewBitmap()
|
||||
if _, err := newrb.ReadFrom(bytes.NewReader(data)); err != nil {
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
func FuzzSerializationBuffer(data []byte) int {
|
||||
newrb := NewBitmap()
|
||||
if _, err := newrb.FromBuffer(data); err != nil {
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
}
|
609
vendor/github.com/RoaringBitmap/roaring/setutil.go
generated
vendored
Normal file
609
vendor/github.com/RoaringBitmap/roaring/setutil.go
generated
vendored
Normal file
@ -0,0 +1,609 @@
|
||||
package roaring
|
||||
|
||||
func equal(a, b []uint16) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
for i := range a {
|
||||
if a[i] != b[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func difference(set1 []uint16, set2 []uint16, buffer []uint16) int {
|
||||
if 0 == len(set2) {
|
||||
for k := 0; k < len(set1); k++ {
|
||||
buffer[k] = set1[k]
|
||||
}
|
||||
return len(set1)
|
||||
}
|
||||
if 0 == len(set1) {
|
||||
return 0
|
||||
}
|
||||
pos := 0
|
||||
k1 := 0
|
||||
k2 := 0
|
||||
buffer = buffer[:cap(buffer)]
|
||||
s1 := set1[k1]
|
||||
s2 := set2[k2]
|
||||
for {
|
||||
if s1 < s2 {
|
||||
buffer[pos] = s1
|
||||
pos++
|
||||
k1++
|
||||
if k1 >= len(set1) {
|
||||
break
|
||||
}
|
||||
s1 = set1[k1]
|
||||
} else if s1 == s2 {
|
||||
k1++
|
||||
k2++
|
||||
if k1 >= len(set1) {
|
||||
break
|
||||
}
|
||||
s1 = set1[k1]
|
||||
if k2 >= len(set2) {
|
||||
for ; k1 < len(set1); k1++ {
|
||||
buffer[pos] = set1[k1]
|
||||
pos++
|
||||
}
|
||||
break
|
||||
}
|
||||
s2 = set2[k2]
|
||||
} else { // if (val1>val2)
|
||||
k2++
|
||||
if k2 >= len(set2) {
|
||||
for ; k1 < len(set1); k1++ {
|
||||
buffer[pos] = set1[k1]
|
||||
pos++
|
||||
}
|
||||
break
|
||||
}
|
||||
s2 = set2[k2]
|
||||
}
|
||||
}
|
||||
return pos
|
||||
|
||||
}
|
||||
|
||||
func exclusiveUnion2by2(set1 []uint16, set2 []uint16, buffer []uint16) int {
|
||||
if 0 == len(set2) {
|
||||
buffer = buffer[:len(set1)]
|
||||
copy(buffer, set1[:])
|
||||
return len(set1)
|
||||
}
|
||||
if 0 == len(set1) {
|
||||
buffer = buffer[:len(set2)]
|
||||
copy(buffer, set2[:])
|
||||
return len(set2)
|
||||
}
|
||||
pos := 0
|
||||
k1 := 0
|
||||
k2 := 0
|
||||
s1 := set1[k1]
|
||||
s2 := set2[k2]
|
||||
buffer = buffer[:cap(buffer)]
|
||||
for {
|
||||
if s1 < s2 {
|
||||
buffer[pos] = s1
|
||||
pos++
|
||||
k1++
|
||||
if k1 >= len(set1) {
|
||||
for ; k2 < len(set2); k2++ {
|
||||
buffer[pos] = set2[k2]
|
||||
pos++
|
||||
}
|
||||
break
|
||||
}
|
||||
s1 = set1[k1]
|
||||
} else if s1 == s2 {
|
||||
k1++
|
||||
k2++
|
||||
if k1 >= len(set1) {
|
||||
for ; k2 < len(set2); k2++ {
|
||||
buffer[pos] = set2[k2]
|
||||
pos++
|
||||
}
|
||||
break
|
||||
}
|
||||
if k2 >= len(set2) {
|
||||
for ; k1 < len(set1); k1++ {
|
||||
buffer[pos] = set1[k1]
|
||||
pos++
|
||||
}
|
||||
break
|
||||
}
|
||||
s1 = set1[k1]
|
||||
s2 = set2[k2]
|
||||
} else { // if (val1>val2)
|
||||
buffer[pos] = s2
|
||||
pos++
|
||||
k2++
|
||||
if k2 >= len(set2) {
|
||||
for ; k1 < len(set1); k1++ {
|
||||
buffer[pos] = set1[k1]
|
||||
pos++
|
||||
}
|
||||
break
|
||||
}
|
||||
s2 = set2[k2]
|
||||
}
|
||||
}
|
||||
return pos
|
||||
}
|
||||
|
||||
func union2by2(set1 []uint16, set2 []uint16, buffer []uint16) int {
|
||||
pos := 0
|
||||
k1 := 0
|
||||
k2 := 0
|
||||
if 0 == len(set2) {
|
||||
buffer = buffer[:len(set1)]
|
||||
copy(buffer, set1[:])
|
||||
return len(set1)
|
||||
}
|
||||
if 0 == len(set1) {
|
||||
buffer = buffer[:len(set2)]
|
||||
copy(buffer, set2[:])
|
||||
return len(set2)
|
||||
}
|
||||
s1 := set1[k1]
|
||||
s2 := set2[k2]
|
||||
buffer = buffer[:cap(buffer)]
|
||||
for {
|
||||
if s1 < s2 {
|
||||
buffer[pos] = s1
|
||||
pos++
|
||||
k1++
|
||||
if k1 >= len(set1) {
|
||||
copy(buffer[pos:], set2[k2:])
|
||||
pos += len(set2) - k2
|
||||
break
|
||||
}
|
||||
s1 = set1[k1]
|
||||
} else if s1 == s2 {
|
||||
buffer[pos] = s1
|
||||
pos++
|
||||
k1++
|
||||
k2++
|
||||
if k1 >= len(set1) {
|
||||
copy(buffer[pos:], set2[k2:])
|
||||
pos += len(set2) - k2
|
||||
break
|
||||
}
|
||||
if k2 >= len(set2) {
|
||||
copy(buffer[pos:], set1[k1:])
|
||||
pos += len(set1) - k1
|
||||
break
|
||||
}
|
||||
s1 = set1[k1]
|
||||
s2 = set2[k2]
|
||||
} else { // if (set1[k1]>set2[k2])
|
||||
buffer[pos] = s2
|
||||
pos++
|
||||
k2++
|
||||
if k2 >= len(set2) {
|
||||
copy(buffer[pos:], set1[k1:])
|
||||
pos += len(set1) - k1
|
||||
break
|
||||
}
|
||||
s2 = set2[k2]
|
||||
}
|
||||
}
|
||||
return pos
|
||||
}
|
||||
|
||||
func union2by2Cardinality(set1 []uint16, set2 []uint16) int {
|
||||
pos := 0
|
||||
k1 := 0
|
||||
k2 := 0
|
||||
if 0 == len(set2) {
|
||||
return len(set1)
|
||||
}
|
||||
if 0 == len(set1) {
|
||||
return len(set2)
|
||||
}
|
||||
s1 := set1[k1]
|
||||
s2 := set2[k2]
|
||||
for {
|
||||
if s1 < s2 {
|
||||
pos++
|
||||
k1++
|
||||
if k1 >= len(set1) {
|
||||
pos += len(set2) - k2
|
||||
break
|
||||
}
|
||||
s1 = set1[k1]
|
||||
} else if s1 == s2 {
|
||||
pos++
|
||||
k1++
|
||||
k2++
|
||||
if k1 >= len(set1) {
|
||||
pos += len(set2) - k2
|
||||
break
|
||||
}
|
||||
if k2 >= len(set2) {
|
||||
pos += len(set1) - k1
|
||||
break
|
||||
}
|
||||
s1 = set1[k1]
|
||||
s2 = set2[k2]
|
||||
} else { // if (set1[k1]>set2[k2])
|
||||
pos++
|
||||
k2++
|
||||
if k2 >= len(set2) {
|
||||
pos += len(set1) - k1
|
||||
break
|
||||
}
|
||||
s2 = set2[k2]
|
||||
}
|
||||
}
|
||||
return pos
|
||||
}
|
||||
|
||||
func intersection2by2(
|
||||
set1 []uint16,
|
||||
set2 []uint16,
|
||||
buffer []uint16) int {
|
||||
|
||||
if len(set1)*64 < len(set2) {
|
||||
return onesidedgallopingintersect2by2(set1, set2, buffer)
|
||||
} else if len(set2)*64 < len(set1) {
|
||||
return onesidedgallopingintersect2by2(set2, set1, buffer)
|
||||
} else {
|
||||
return localintersect2by2(set1, set2, buffer)
|
||||
}
|
||||
}
|
||||
|
||||
func intersection2by2Cardinality(
|
||||
set1 []uint16,
|
||||
set2 []uint16) int {
|
||||
|
||||
if len(set1)*64 < len(set2) {
|
||||
return onesidedgallopingintersect2by2Cardinality(set1, set2)
|
||||
} else if len(set2)*64 < len(set1) {
|
||||
return onesidedgallopingintersect2by2Cardinality(set2, set1)
|
||||
} else {
|
||||
return localintersect2by2Cardinality(set1, set2)
|
||||
}
|
||||
}
|
||||
|
||||
func intersects2by2(
|
||||
set1 []uint16,
|
||||
set2 []uint16) bool {
|
||||
// could be optimized if one set is much larger than the other one
|
||||
if (0 == len(set1)) || (0 == len(set2)) {
|
||||
return false
|
||||
}
|
||||
k1 := 0
|
||||
k2 := 0
|
||||
s1 := set1[k1]
|
||||
s2 := set2[k2]
|
||||
mainwhile:
|
||||
for {
|
||||
|
||||
if s2 < s1 {
|
||||
for {
|
||||
k2++
|
||||
if k2 == len(set2) {
|
||||
break mainwhile
|
||||
}
|
||||
s2 = set2[k2]
|
||||
if s2 >= s1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if s1 < s2 {
|
||||
for {
|
||||
k1++
|
||||
if k1 == len(set1) {
|
||||
break mainwhile
|
||||
}
|
||||
s1 = set1[k1]
|
||||
if s1 >= s2 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
// (set2[k2] == set1[k1])
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func localintersect2by2(
|
||||
set1 []uint16,
|
||||
set2 []uint16,
|
||||
buffer []uint16) int {
|
||||
|
||||
if (0 == len(set1)) || (0 == len(set2)) {
|
||||
return 0
|
||||
}
|
||||
k1 := 0
|
||||
k2 := 0
|
||||
pos := 0
|
||||
buffer = buffer[:cap(buffer)]
|
||||
s1 := set1[k1]
|
||||
s2 := set2[k2]
|
||||
mainwhile:
|
||||
for {
|
||||
if s2 < s1 {
|
||||
for {
|
||||
k2++
|
||||
if k2 == len(set2) {
|
||||
break mainwhile
|
||||
}
|
||||
s2 = set2[k2]
|
||||
if s2 >= s1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if s1 < s2 {
|
||||
for {
|
||||
k1++
|
||||
if k1 == len(set1) {
|
||||
break mainwhile
|
||||
}
|
||||
s1 = set1[k1]
|
||||
if s1 >= s2 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
// (set2[k2] == set1[k1])
|
||||
buffer[pos] = s1
|
||||
pos++
|
||||
k1++
|
||||
if k1 == len(set1) {
|
||||
break
|
||||
}
|
||||
s1 = set1[k1]
|
||||
k2++
|
||||
if k2 == len(set2) {
|
||||
break
|
||||
}
|
||||
s2 = set2[k2]
|
||||
}
|
||||
}
|
||||
return pos
|
||||
}
|
||||
|
||||
func localintersect2by2Cardinality(
|
||||
set1 []uint16,
|
||||
set2 []uint16) int {
|
||||
|
||||
if (0 == len(set1)) || (0 == len(set2)) {
|
||||
return 0
|
||||
}
|
||||
k1 := 0
|
||||
k2 := 0
|
||||
pos := 0
|
||||
s1 := set1[k1]
|
||||
s2 := set2[k2]
|
||||
mainwhile:
|
||||
for {
|
||||
if s2 < s1 {
|
||||
for {
|
||||
k2++
|
||||
if k2 == len(set2) {
|
||||
break mainwhile
|
||||
}
|
||||
s2 = set2[k2]
|
||||
if s2 >= s1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if s1 < s2 {
|
||||
for {
|
||||
k1++
|
||||
if k1 == len(set1) {
|
||||
break mainwhile
|
||||
}
|
||||
s1 = set1[k1]
|
||||
if s1 >= s2 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
// (set2[k2] == set1[k1])
|
||||
pos++
|
||||
k1++
|
||||
if k1 == len(set1) {
|
||||
break
|
||||
}
|
||||
s1 = set1[k1]
|
||||
k2++
|
||||
if k2 == len(set2) {
|
||||
break
|
||||
}
|
||||
s2 = set2[k2]
|
||||
}
|
||||
}
|
||||
return pos
|
||||
}
|
||||
|
||||
func advanceUntil(
|
||||
array []uint16,
|
||||
pos int,
|
||||
length int,
|
||||
min uint16) int {
|
||||
lower := pos + 1
|
||||
|
||||
if lower >= length || array[lower] >= min {
|
||||
return lower
|
||||
}
|
||||
|
||||
spansize := 1
|
||||
|
||||
for lower+spansize < length && array[lower+spansize] < min {
|
||||
spansize *= 2
|
||||
}
|
||||
var upper int
|
||||
if lower+spansize < length {
|
||||
upper = lower + spansize
|
||||
} else {
|
||||
upper = length - 1
|
||||
}
|
||||
|
||||
if array[upper] == min {
|
||||
return upper
|
||||
}
|
||||
|
||||
if array[upper] < min {
|
||||
// means
|
||||
// array
|
||||
// has no
|
||||
// item
|
||||
// >= min
|
||||
// pos = array.length;
|
||||
return length
|
||||
}
|
||||
|
||||
// we know that the next-smallest span was too small
|
||||
lower += (spansize >> 1)
|
||||
|
||||
mid := 0
|
||||
for lower+1 != upper {
|
||||
mid = (lower + upper) >> 1
|
||||
if array[mid] == min {
|
||||
return mid
|
||||
} else if array[mid] < min {
|
||||
lower = mid
|
||||
} else {
|
||||
upper = mid
|
||||
}
|
||||
}
|
||||
return upper
|
||||
|
||||
}
|
||||
|
||||
func onesidedgallopingintersect2by2(
|
||||
smallset []uint16,
|
||||
largeset []uint16,
|
||||
buffer []uint16) int {
|
||||
|
||||
if 0 == len(smallset) {
|
||||
return 0
|
||||
}
|
||||
buffer = buffer[:cap(buffer)]
|
||||
k1 := 0
|
||||
k2 := 0
|
||||
pos := 0
|
||||
s1 := largeset[k1]
|
||||
s2 := smallset[k2]
|
||||
mainwhile:
|
||||
|
||||
for {
|
||||
if s1 < s2 {
|
||||
k1 = advanceUntil(largeset, k1, len(largeset), s2)
|
||||
if k1 == len(largeset) {
|
||||
break mainwhile
|
||||
}
|
||||
s1 = largeset[k1]
|
||||
}
|
||||
if s2 < s1 {
|
||||
k2++
|
||||
if k2 == len(smallset) {
|
||||
break mainwhile
|
||||
}
|
||||
s2 = smallset[k2]
|
||||
} else {
|
||||
|
||||
buffer[pos] = s2
|
||||
pos++
|
||||
k2++
|
||||
if k2 == len(smallset) {
|
||||
break
|
||||
}
|
||||
s2 = smallset[k2]
|
||||
k1 = advanceUntil(largeset, k1, len(largeset), s2)
|
||||
if k1 == len(largeset) {
|
||||
break mainwhile
|
||||
}
|
||||
s1 = largeset[k1]
|
||||
}
|
||||
|
||||
}
|
||||
return pos
|
||||
}
|
||||
|
||||
func onesidedgallopingintersect2by2Cardinality(
|
||||
smallset []uint16,
|
||||
largeset []uint16) int {
|
||||
|
||||
if 0 == len(smallset) {
|
||||
return 0
|
||||
}
|
||||
k1 := 0
|
||||
k2 := 0
|
||||
pos := 0
|
||||
s1 := largeset[k1]
|
||||
s2 := smallset[k2]
|
||||
mainwhile:
|
||||
|
||||
for {
|
||||
if s1 < s2 {
|
||||
k1 = advanceUntil(largeset, k1, len(largeset), s2)
|
||||
if k1 == len(largeset) {
|
||||
break mainwhile
|
||||
}
|
||||
s1 = largeset[k1]
|
||||
}
|
||||
if s2 < s1 {
|
||||
k2++
|
||||
if k2 == len(smallset) {
|
||||
break mainwhile
|
||||
}
|
||||
s2 = smallset[k2]
|
||||
} else {
|
||||
|
||||
pos++
|
||||
k2++
|
||||
if k2 == len(smallset) {
|
||||
break
|
||||
}
|
||||
s2 = smallset[k2]
|
||||
k1 = advanceUntil(largeset, k1, len(largeset), s2)
|
||||
if k1 == len(largeset) {
|
||||
break mainwhile
|
||||
}
|
||||
s1 = largeset[k1]
|
||||
}
|
||||
|
||||
}
|
||||
return pos
|
||||
}
|
||||
|
||||
func binarySearch(array []uint16, ikey uint16) int {
|
||||
low := 0
|
||||
high := len(array) - 1
|
||||
for low+16 <= high {
|
||||
middleIndex := int(uint32(low+high) >> 1)
|
||||
middleValue := array[middleIndex]
|
||||
if middleValue < ikey {
|
||||
low = middleIndex + 1
|
||||
} else if middleValue > ikey {
|
||||
high = middleIndex - 1
|
||||
} else {
|
||||
return middleIndex
|
||||
}
|
||||
}
|
||||
for ; low <= high; low++ {
|
||||
val := array[low]
|
||||
if val >= ikey {
|
||||
if val == ikey {
|
||||
return low
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
return -(low + 1)
|
||||
}
|
21
vendor/github.com/RoaringBitmap/roaring/shortiterator.go
generated
vendored
Normal file
21
vendor/github.com/RoaringBitmap/roaring/shortiterator.go
generated
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
package roaring
|
||||
|
||||
type shortIterable interface {
|
||||
hasNext() bool
|
||||
next() uint16
|
||||
}
|
||||
|
||||
type shortIterator struct {
|
||||
slice []uint16
|
||||
loc int
|
||||
}
|
||||
|
||||
func (si *shortIterator) hasNext() bool {
|
||||
return si.loc < len(si.slice)
|
||||
}
|
||||
|
||||
func (si *shortIterator) next() uint16 {
|
||||
a := si.slice[si.loc]
|
||||
si.loc++
|
||||
return a
|
||||
}
|
383
vendor/github.com/RoaringBitmap/roaring/smat.go
generated
vendored
Normal file
383
vendor/github.com/RoaringBitmap/roaring/smat.go
generated
vendored
Normal file
@ -0,0 +1,383 @@
|
||||
// +build gofuzz
|
||||
|
||||
/*
|
||||
# Instructions for smat testing for roaring
|
||||
|
||||
[smat](https://github.com/mschoch/smat) is a framework that provides
|
||||
state machine assisted fuzz testing.
|
||||
|
||||
To run the smat tests for roaring...
|
||||
|
||||
## Prerequisites
|
||||
|
||||
$ go get github.com/dvyukov/go-fuzz/go-fuzz
|
||||
$ go get github.com/dvyukov/go-fuzz/go-fuzz-build
|
||||
|
||||
## Steps
|
||||
|
||||
1. Generate initial smat corpus:
|
||||
```
|
||||
go test -tags=gofuzz -run=TestGenerateSmatCorpus
|
||||
```
|
||||
|
||||
2. Build go-fuzz test program with instrumentation:
|
||||
```
|
||||
go-fuzz-build -func FuzzSmat github.com/RoaringBitmap/roaring
|
||||
```
|
||||
|
||||
3. Run go-fuzz:
|
||||
```
|
||||
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
|
||||
```
|
||||
|
||||
You should see output like...
|
||||
```
|
||||
2016/09/16 13:58:35 slaves: 8, corpus: 1 (3s ago), crashers: 0, restarts: 1/0, execs: 0 (0/sec), cover: 0, uptime: 3s
|
||||
2016/09/16 13:58:38 slaves: 8, corpus: 1 (6s ago), crashers: 0, restarts: 1/0, execs: 0 (0/sec), cover: 0, uptime: 6s
|
||||
2016/09/16 13:58:41 slaves: 8, corpus: 1 (9s ago), crashers: 0, restarts: 1/44, execs: 44 (5/sec), cover: 0, uptime: 9s
|
||||
2016/09/16 13:58:44 slaves: 8, corpus: 1 (12s ago), crashers: 0, restarts: 1/45, execs: 45 (4/sec), cover: 0, uptime: 12s
|
||||
2016/09/16 13:58:47 slaves: 8, corpus: 1 (15s ago), crashers: 0, restarts: 1/46, execs: 46 (3/sec), cover: 0, uptime: 15s
|
||||
2016/09/16 13:58:50 slaves: 8, corpus: 1 (18s ago), crashers: 0, restarts: 1/47, execs: 47 (3/sec), cover: 0, uptime: 18s
|
||||
2016/09/16 13:58:53 slaves: 8, corpus: 1 (21s ago), crashers: 0, restarts: 1/63, execs: 63 (3/sec), cover: 0, uptime: 21s
|
||||
2016/09/16 13:58:56 slaves: 8, corpus: 1 (24s ago), crashers: 0, restarts: 1/65, execs: 65 (3/sec), cover: 0, uptime: 24s
|
||||
2016/09/16 13:58:59 slaves: 8, corpus: 1 (27s ago), crashers: 0, restarts: 1/66, execs: 66 (2/sec), cover: 0, uptime: 27s
|
||||
2016/09/16 13:59:02 slaves: 8, corpus: 1 (30s ago), crashers: 0, restarts: 1/67, execs: 67 (2/sec), cover: 0, uptime: 30s
|
||||
2016/09/16 13:59:05 slaves: 8, corpus: 1 (33s ago), crashers: 0, restarts: 1/83, execs: 83 (3/sec), cover: 0, uptime: 33s
|
||||
2016/09/16 13:59:08 slaves: 8, corpus: 1 (36s ago), crashers: 0, restarts: 1/84, execs: 84 (2/sec), cover: 0, uptime: 36s
|
||||
2016/09/16 13:59:11 slaves: 8, corpus: 2 (0s ago), crashers: 0, restarts: 1/85, execs: 85 (2/sec), cover: 0, uptime: 39s
|
||||
2016/09/16 13:59:14 slaves: 8, corpus: 17 (2s ago), crashers: 0, restarts: 1/86, execs: 86 (2/sec), cover: 480, uptime: 42s
|
||||
2016/09/16 13:59:17 slaves: 8, corpus: 17 (5s ago), crashers: 0, restarts: 1/66, execs: 132 (3/sec), cover: 487, uptime: 45s
|
||||
2016/09/16 13:59:20 slaves: 8, corpus: 17 (8s ago), crashers: 0, restarts: 1/440, execs: 2645 (55/sec), cover: 487, uptime: 48s
|
||||
|
||||
```
|
||||
|
||||
Let it run, and if the # of crashers is > 0, check out the reports in
|
||||
the workdir where you should be able to find the panic goroutine stack
|
||||
traces.
|
||||
*/
|
||||
|
||||
package roaring
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"github.com/mschoch/smat"
|
||||
"github.com/willf/bitset"
|
||||
)
|
||||
|
||||
// fuzz test using state machine driven by byte stream.
|
||||
func FuzzSmat(data []byte) int {
|
||||
return smat.Fuzz(&smatContext{}, smat.ActionID('S'), smat.ActionID('T'),
|
||||
smatActionMap, data)
|
||||
}
|
||||
|
||||
var smatDebug = false
|
||||
|
||||
func smatLog(prefix, format string, args ...interface{}) {
|
||||
if smatDebug {
|
||||
fmt.Print(prefix)
|
||||
fmt.Printf(format, args...)
|
||||
}
|
||||
}
|
||||
|
||||
type smatContext struct {
|
||||
pairs []*smatPair
|
||||
|
||||
// Two registers, x & y.
|
||||
x int
|
||||
y int
|
||||
|
||||
actions int
|
||||
}
|
||||
|
||||
type smatPair struct {
|
||||
bm *Bitmap
|
||||
bs *bitset.BitSet
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
var smatActionMap = smat.ActionMap{
|
||||
smat.ActionID('X'): smatAction("x++", smatWrap(func(c *smatContext) { c.x++ })),
|
||||
smat.ActionID('x'): smatAction("x--", smatWrap(func(c *smatContext) { c.x-- })),
|
||||
smat.ActionID('Y'): smatAction("y++", smatWrap(func(c *smatContext) { c.y++ })),
|
||||
smat.ActionID('y'): smatAction("y--", smatWrap(func(c *smatContext) { c.y-- })),
|
||||
smat.ActionID('*'): smatAction("x*y", smatWrap(func(c *smatContext) { c.x = c.x * c.y })),
|
||||
smat.ActionID('<'): smatAction("x<<", smatWrap(func(c *smatContext) { c.x = c.x << 1 })),
|
||||
|
||||
smat.ActionID('^'): smatAction("swap", smatWrap(func(c *smatContext) { c.x, c.y = c.y, c.x })),
|
||||
|
||||
smat.ActionID('['): smatAction(" pushPair", smatWrap(smatPushPair)),
|
||||
smat.ActionID(']'): smatAction(" popPair", smatWrap(smatPopPair)),
|
||||
|
||||
smat.ActionID('B'): smatAction(" setBit", smatWrap(smatSetBit)),
|
||||
smat.ActionID('b'): smatAction(" removeBit", smatWrap(smatRemoveBit)),
|
||||
|
||||
smat.ActionID('o'): smatAction(" or", smatWrap(smatOr)),
|
||||
smat.ActionID('a'): smatAction(" and", smatWrap(smatAnd)),
|
||||
|
||||
smat.ActionID('#'): smatAction(" cardinality", smatWrap(smatCardinality)),
|
||||
|
||||
smat.ActionID('O'): smatAction(" orCardinality", smatWrap(smatOrCardinality)),
|
||||
smat.ActionID('A'): smatAction(" andCardinality", smatWrap(smatAndCardinality)),
|
||||
|
||||
smat.ActionID('c'): smatAction(" clear", smatWrap(smatClear)),
|
||||
smat.ActionID('r'): smatAction(" runOptimize", smatWrap(smatRunOptimize)),
|
||||
|
||||
smat.ActionID('e'): smatAction(" isEmpty", smatWrap(smatIsEmpty)),
|
||||
|
||||
smat.ActionID('i'): smatAction(" intersects", smatWrap(smatIntersects)),
|
||||
|
||||
smat.ActionID('f'): smatAction(" flip", smatWrap(smatFlip)),
|
||||
|
||||
smat.ActionID('-'): smatAction(" difference", smatWrap(smatDifference)),
|
||||
}
|
||||
|
||||
var smatRunningPercentActions []smat.PercentAction
|
||||
|
||||
func init() {
|
||||
var ids []int
|
||||
for actionId := range smatActionMap {
|
||||
ids = append(ids, int(actionId))
|
||||
}
|
||||
sort.Ints(ids)
|
||||
|
||||
pct := 100 / len(smatActionMap)
|
||||
for _, actionId := range ids {
|
||||
smatRunningPercentActions = append(smatRunningPercentActions,
|
||||
smat.PercentAction{pct, smat.ActionID(actionId)})
|
||||
}
|
||||
|
||||
smatActionMap[smat.ActionID('S')] = smatAction("SETUP", smatSetupFunc)
|
||||
smatActionMap[smat.ActionID('T')] = smatAction("TEARDOWN", smatTeardownFunc)
|
||||
}
|
||||
|
||||
// We only have one smat state: running.
|
||||
func smatRunning(next byte) smat.ActionID {
|
||||
return smat.PercentExecute(next, smatRunningPercentActions...)
|
||||
}
|
||||
|
||||
func smatAction(name string, f func(ctx smat.Context) (smat.State, error)) func(smat.Context) (smat.State, error) {
|
||||
return func(ctx smat.Context) (smat.State, error) {
|
||||
c := ctx.(*smatContext)
|
||||
c.actions++
|
||||
|
||||
smatLog(" ", "%s\n", name)
|
||||
|
||||
return f(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
// Creates an smat action func based on a simple callback.
|
||||
func smatWrap(cb func(c *smatContext)) func(smat.Context) (next smat.State, err error) {
|
||||
return func(ctx smat.Context) (next smat.State, err error) {
|
||||
c := ctx.(*smatContext)
|
||||
cb(c)
|
||||
return smatRunning, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Invokes a callback function with the input v bounded to len(c.pairs).
|
||||
func (c *smatContext) withPair(v int, cb func(*smatPair)) {
|
||||
if len(c.pairs) > 0 {
|
||||
if v < 0 {
|
||||
v = -v
|
||||
}
|
||||
v = v % len(c.pairs)
|
||||
cb(c.pairs[v])
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
func smatSetupFunc(ctx smat.Context) (next smat.State, err error) {
|
||||
return smatRunning, nil
|
||||
}
|
||||
|
||||
func smatTeardownFunc(ctx smat.Context) (next smat.State, err error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
func smatPushPair(c *smatContext) {
|
||||
c.pairs = append(c.pairs, &smatPair{
|
||||
bm: NewBitmap(),
|
||||
bs: bitset.New(100),
|
||||
})
|
||||
}
|
||||
|
||||
func smatPopPair(c *smatContext) {
|
||||
if len(c.pairs) > 0 {
|
||||
c.pairs = c.pairs[0 : len(c.pairs)-1]
|
||||
}
|
||||
}
|
||||
|
||||
func smatSetBit(c *smatContext) {
|
||||
c.withPair(c.x, func(p *smatPair) {
|
||||
y := uint32(c.y)
|
||||
p.bm.AddInt(int(y))
|
||||
p.bs.Set(uint(y))
|
||||
p.checkEquals()
|
||||
})
|
||||
}
|
||||
|
||||
func smatRemoveBit(c *smatContext) {
|
||||
c.withPair(c.x, func(p *smatPair) {
|
||||
y := uint32(c.y)
|
||||
p.bm.Remove(y)
|
||||
p.bs.Clear(uint(y))
|
||||
p.checkEquals()
|
||||
})
|
||||
}
|
||||
|
||||
func smatAnd(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
c.withPair(c.y, func(py *smatPair) {
|
||||
px.bm.And(py.bm)
|
||||
px.bs = px.bs.Intersection(py.bs)
|
||||
px.checkEquals()
|
||||
py.checkEquals()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func smatOr(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
c.withPair(c.y, func(py *smatPair) {
|
||||
px.bm.Or(py.bm)
|
||||
px.bs = px.bs.Union(py.bs)
|
||||
px.checkEquals()
|
||||
py.checkEquals()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func smatAndCardinality(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
c.withPair(c.y, func(py *smatPair) {
|
||||
c0 := px.bm.AndCardinality(py.bm)
|
||||
c1 := px.bs.IntersectionCardinality(py.bs)
|
||||
if c0 != uint64(c1) {
|
||||
panic("expected same add cardinality")
|
||||
}
|
||||
px.checkEquals()
|
||||
py.checkEquals()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func smatOrCardinality(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
c.withPair(c.y, func(py *smatPair) {
|
||||
c0 := px.bm.OrCardinality(py.bm)
|
||||
c1 := px.bs.UnionCardinality(py.bs)
|
||||
if c0 != uint64(c1) {
|
||||
panic("expected same or cardinality")
|
||||
}
|
||||
px.checkEquals()
|
||||
py.checkEquals()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func smatRunOptimize(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
px.bm.RunOptimize()
|
||||
px.checkEquals()
|
||||
})
|
||||
}
|
||||
|
||||
func smatClear(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
px.bm.Clear()
|
||||
px.bs = px.bs.ClearAll()
|
||||
px.checkEquals()
|
||||
})
|
||||
}
|
||||
|
||||
func smatCardinality(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
c0 := px.bm.GetCardinality()
|
||||
c1 := px.bs.Count()
|
||||
if c0 != uint64(c1) {
|
||||
panic("expected same cardinality")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func smatIsEmpty(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
c0 := px.bm.IsEmpty()
|
||||
c1 := px.bs.None()
|
||||
if c0 != c1 {
|
||||
panic("expected same is empty")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func smatIntersects(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
c.withPair(c.y, func(py *smatPair) {
|
||||
v0 := px.bm.Intersects(py.bm)
|
||||
v1 := px.bs.IntersectionCardinality(py.bs) > 0
|
||||
if v0 != v1 {
|
||||
panic("intersects not equal")
|
||||
}
|
||||
|
||||
px.checkEquals()
|
||||
py.checkEquals()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func smatFlip(c *smatContext) {
|
||||
c.withPair(c.x, func(p *smatPair) {
|
||||
y := uint32(c.y)
|
||||
p.bm.Flip(uint64(y), uint64(y)+1)
|
||||
p.bs = p.bs.Flip(uint(y))
|
||||
p.checkEquals()
|
||||
})
|
||||
}
|
||||
|
||||
func smatDifference(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
c.withPair(c.y, func(py *smatPair) {
|
||||
px.bm.AndNot(py.bm)
|
||||
px.bs = px.bs.Difference(py.bs)
|
||||
px.checkEquals()
|
||||
py.checkEquals()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func (p *smatPair) checkEquals() {
|
||||
if !p.equalsBitSet(p.bs, p.bm) {
|
||||
panic("bitset mismatch")
|
||||
}
|
||||
}
|
||||
|
||||
func (p *smatPair) equalsBitSet(a *bitset.BitSet, b *Bitmap) bool {
|
||||
for i, e := a.NextSet(0); e; i, e = a.NextSet(i + 1) {
|
||||
if !b.ContainsInt(int(i)) {
|
||||
fmt.Printf("in a bitset, not b bitmap, i: %d\n", i)
|
||||
fmt.Printf(" a bitset: %s\n b bitmap: %s\n",
|
||||
a.String(), b.String())
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
i := b.Iterator()
|
||||
for i.HasNext() {
|
||||
v := i.Next()
|
||||
if !a.Test(uint(v)) {
|
||||
fmt.Printf("in b bitmap, not a bitset, v: %d\n", v)
|
||||
fmt.Printf(" a bitset: %s\n b bitmap: %s\n",
|
||||
a.String(), b.String())
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
315
vendor/github.com/RoaringBitmap/roaring/util.go
generated
vendored
Normal file
315
vendor/github.com/RoaringBitmap/roaring/util.go
generated
vendored
Normal file
@ -0,0 +1,315 @@
|
||||
package roaring
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"sort"
|
||||
)
|
||||
|
||||
const (
|
||||
arrayDefaultMaxSize = 4096 // containers with 4096 or fewer integers should be array containers.
|
||||
arrayLazyLowerBound = 1024
|
||||
maxCapacity = 1 << 16
|
||||
serialCookieNoRunContainer = 12346 // only arrays and bitmaps
|
||||
invalidCardinality = -1
|
||||
serialCookie = 12347 // runs, arrays, and bitmaps
|
||||
noOffsetThreshold = 4
|
||||
|
||||
// Compute wordSizeInBytes, the size of a word in bytes.
|
||||
_m = ^uint64(0)
|
||||
_logS = _m>>8&1 + _m>>16&1 + _m>>32&1
|
||||
wordSizeInBytes = 1 << _logS
|
||||
|
||||
// other constants used in ctz_generic.go
|
||||
wordSizeInBits = wordSizeInBytes << 3 // word size in bits
|
||||
)
|
||||
|
||||
const maxWord = 1<<wordSizeInBits - 1
|
||||
|
||||
// doesn't apply to runContainers
|
||||
func getSizeInBytesFromCardinality(card int) int {
|
||||
if card > arrayDefaultMaxSize {
|
||||
// bitmapContainer
|
||||
return maxCapacity / 8
|
||||
}
|
||||
// arrayContainer
|
||||
return 2 * card
|
||||
}
|
||||
|
||||
func fill(arr []uint64, val uint64) {
|
||||
for i := range arr {
|
||||
arr[i] = val
|
||||
}
|
||||
}
|
||||
func fillRange(arr []uint64, start, end int, val uint64) {
|
||||
for i := start; i < end; i++ {
|
||||
arr[i] = val
|
||||
}
|
||||
}
|
||||
|
||||
func fillArrayAND(container []uint16, bitmap1, bitmap2 []uint64) {
|
||||
if len(bitmap1) != len(bitmap2) {
|
||||
panic("array lengths don't match")
|
||||
}
|
||||
// TODO: rewrite in assembly
|
||||
pos := 0
|
||||
for k := range bitmap1 {
|
||||
bitset := bitmap1[k] & bitmap2[k]
|
||||
for bitset != 0 {
|
||||
t := bitset & -bitset
|
||||
container[pos] = uint16((k*64 + int(popcount(t-1))))
|
||||
pos = pos + 1
|
||||
bitset ^= t
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func fillArrayANDNOT(container []uint16, bitmap1, bitmap2 []uint64) {
|
||||
if len(bitmap1) != len(bitmap2) {
|
||||
panic("array lengths don't match")
|
||||
}
|
||||
// TODO: rewrite in assembly
|
||||
pos := 0
|
||||
for k := range bitmap1 {
|
||||
bitset := bitmap1[k] &^ bitmap2[k]
|
||||
for bitset != 0 {
|
||||
t := bitset & -bitset
|
||||
container[pos] = uint16((k*64 + int(popcount(t-1))))
|
||||
pos = pos + 1
|
||||
bitset ^= t
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func fillArrayXOR(container []uint16, bitmap1, bitmap2 []uint64) {
|
||||
if len(bitmap1) != len(bitmap2) {
|
||||
panic("array lengths don't match")
|
||||
}
|
||||
// TODO: rewrite in assembly
|
||||
pos := 0
|
||||
for k := 0; k < len(bitmap1); k++ {
|
||||
bitset := bitmap1[k] ^ bitmap2[k]
|
||||
for bitset != 0 {
|
||||
t := bitset & -bitset
|
||||
container[pos] = uint16((k*64 + int(popcount(t-1))))
|
||||
pos = pos + 1
|
||||
bitset ^= t
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func highbits(x uint32) uint16 {
|
||||
return uint16(x >> 16)
|
||||
}
|
||||
func lowbits(x uint32) uint16 {
|
||||
return uint16(x & 0xFFFF)
|
||||
}
|
||||
|
||||
const maxLowBit = 0xFFFF
|
||||
|
||||
func flipBitmapRange(bitmap []uint64, start int, end int) {
|
||||
if start >= end {
|
||||
return
|
||||
}
|
||||
firstword := start / 64
|
||||
endword := (end - 1) / 64
|
||||
bitmap[firstword] ^= ^(^uint64(0) << uint(start%64))
|
||||
for i := firstword; i < endword; i++ {
|
||||
//p("flipBitmapRange on i=%v", i)
|
||||
bitmap[i] = ^bitmap[i]
|
||||
}
|
||||
bitmap[endword] ^= ^uint64(0) >> (uint(-end) % 64)
|
||||
}
|
||||
|
||||
func resetBitmapRange(bitmap []uint64, start int, end int) {
|
||||
if start >= end {
|
||||
return
|
||||
}
|
||||
firstword := start / 64
|
||||
endword := (end - 1) / 64
|
||||
if firstword == endword {
|
||||
bitmap[firstword] &= ^((^uint64(0) << uint(start%64)) & (^uint64(0) >> (uint(-end) % 64)))
|
||||
return
|
||||
}
|
||||
bitmap[firstword] &= ^(^uint64(0) << uint(start%64))
|
||||
for i := firstword + 1; i < endword; i++ {
|
||||
bitmap[i] = 0
|
||||
}
|
||||
bitmap[endword] &= ^(^uint64(0) >> (uint(-end) % 64))
|
||||
|
||||
}
|
||||
|
||||
func setBitmapRange(bitmap []uint64, start int, end int) {
|
||||
if start >= end {
|
||||
return
|
||||
}
|
||||
firstword := start / 64
|
||||
endword := (end - 1) / 64
|
||||
if firstword == endword {
|
||||
bitmap[firstword] |= (^uint64(0) << uint(start%64)) & (^uint64(0) >> (uint(-end) % 64))
|
||||
return
|
||||
}
|
||||
bitmap[firstword] |= ^uint64(0) << uint(start%64)
|
||||
for i := firstword + 1; i < endword; i++ {
|
||||
bitmap[i] = ^uint64(0)
|
||||
}
|
||||
bitmap[endword] |= ^uint64(0) >> (uint(-end) % 64)
|
||||
}
|
||||
|
||||
func flipBitmapRangeAndCardinalityChange(bitmap []uint64, start int, end int) int {
|
||||
before := wordCardinalityForBitmapRange(bitmap, start, end)
|
||||
flipBitmapRange(bitmap, start, end)
|
||||
after := wordCardinalityForBitmapRange(bitmap, start, end)
|
||||
return int(after - before)
|
||||
}
|
||||
|
||||
func resetBitmapRangeAndCardinalityChange(bitmap []uint64, start int, end int) int {
|
||||
before := wordCardinalityForBitmapRange(bitmap, start, end)
|
||||
resetBitmapRange(bitmap, start, end)
|
||||
after := wordCardinalityForBitmapRange(bitmap, start, end)
|
||||
return int(after - before)
|
||||
}
|
||||
|
||||
func setBitmapRangeAndCardinalityChange(bitmap []uint64, start int, end int) int {
|
||||
before := wordCardinalityForBitmapRange(bitmap, start, end)
|
||||
setBitmapRange(bitmap, start, end)
|
||||
after := wordCardinalityForBitmapRange(bitmap, start, end)
|
||||
return int(after - before)
|
||||
}
|
||||
|
||||
func wordCardinalityForBitmapRange(bitmap []uint64, start int, end int) uint64 {
|
||||
answer := uint64(0)
|
||||
if start >= end {
|
||||
return answer
|
||||
}
|
||||
firstword := start / 64
|
||||
endword := (end - 1) / 64
|
||||
for i := firstword; i <= endword; i++ {
|
||||
answer += popcount(bitmap[i])
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
func selectBitPosition(w uint64, j int) int {
|
||||
seen := 0
|
||||
|
||||
// Divide 64bit
|
||||
part := w & 0xFFFFFFFF
|
||||
n := popcount(part)
|
||||
if n <= uint64(j) {
|
||||
part = w >> 32
|
||||
seen += 32
|
||||
j -= int(n)
|
||||
}
|
||||
w = part
|
||||
|
||||
// Divide 32bit
|
||||
part = w & 0xFFFF
|
||||
n = popcount(part)
|
||||
if n <= uint64(j) {
|
||||
part = w >> 16
|
||||
seen += 16
|
||||
j -= int(n)
|
||||
}
|
||||
w = part
|
||||
|
||||
// Divide 16bit
|
||||
part = w & 0xFF
|
||||
n = popcount(part)
|
||||
if n <= uint64(j) {
|
||||
part = w >> 8
|
||||
seen += 8
|
||||
j -= int(n)
|
||||
}
|
||||
w = part
|
||||
|
||||
// Lookup in final byte
|
||||
var counter uint
|
||||
for counter = 0; counter < 8; counter++ {
|
||||
j -= int((w >> counter) & 1)
|
||||
if j < 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
return seen + int(counter)
|
||||
|
||||
}
|
||||
|
||||
func panicOn(err error) {
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
type ph struct {
|
||||
orig int
|
||||
rand int
|
||||
}
|
||||
|
||||
type pha []ph
|
||||
|
||||
func (p pha) Len() int { return len(p) }
|
||||
func (p pha) Less(i, j int) bool { return p[i].rand < p[j].rand }
|
||||
func (p pha) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
|
||||
|
||||
func getRandomPermutation(n int) []int {
|
||||
r := make([]ph, n)
|
||||
for i := 0; i < n; i++ {
|
||||
r[i].orig = i
|
||||
r[i].rand = rand.Intn(1 << 29)
|
||||
}
|
||||
sort.Sort(pha(r))
|
||||
m := make([]int, n)
|
||||
for i := range m {
|
||||
m[i] = r[i].orig
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func minOfInt(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func maxOfInt(a, b int) int {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func maxOfUint16(a, b uint16) uint16 {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func minOfUint16(a, b uint16) uint16 {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func maxInt(a, b int) int {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func maxUint16(a, b uint16) uint16 {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func minUint16(a, b uint16) uint16 {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
2
vendor/github.com/blevesearch/bleve/README.md
generated
vendored
2
vendor/github.com/blevesearch/bleve/README.md
generated
vendored
@ -1,6 +1,6 @@
|
||||
#  bleve
|
||||
|
||||
[](https://travis-ci.org/blevesearch/bleve) [](https://coveralls.io/r/blevesearch/bleve?branch=master) [](https://godoc.org/github.com/blevesearch/bleve)
|
||||
[](https://travis-ci.org/blevesearch/bleve) [](https://coveralls.io/github/blevesearch/bleve?branch=master) [](https://godoc.org/github.com/blevesearch/bleve)
|
||||
[](https://gitter.im/blevesearch/bleve?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
[](https://codebeat.co/projects/github-com-blevesearch-bleve)
|
||||
[](https://goreportcard.com/report/blevesearch/bleve)
|
||||
|
41
vendor/github.com/blevesearch/bleve/analysis/freq.go
generated
vendored
41
vendor/github.com/blevesearch/bleve/analysis/freq.go
generated
vendored
@ -14,6 +14,22 @@
|
||||
|
||||
package analysis
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeTokenLocation int
|
||||
var reflectStaticSizeTokenFreq int
|
||||
|
||||
func init() {
|
||||
var tl TokenLocation
|
||||
reflectStaticSizeTokenLocation = int(reflect.TypeOf(tl).Size())
|
||||
var tf TokenFreq
|
||||
reflectStaticSizeTokenFreq = int(reflect.TypeOf(tf).Size())
|
||||
}
|
||||
|
||||
// TokenLocation represents one occurrence of a term at a particular location in
|
||||
// a field. Start, End and Position have the same meaning as in analysis.Token.
|
||||
// Field and ArrayPositions identify the field value in the source document.
|
||||
@ -26,6 +42,12 @@ type TokenLocation struct {
|
||||
Position int
|
||||
}
|
||||
|
||||
func (tl *TokenLocation) Size() int {
|
||||
rv := reflectStaticSizeTokenLocation
|
||||
rv += len(tl.ArrayPositions) * size.SizeOfUint64
|
||||
return rv
|
||||
}
|
||||
|
||||
// TokenFreq represents all the occurrences of a term in all fields of a
|
||||
// document.
|
||||
type TokenFreq struct {
|
||||
@ -34,6 +56,15 @@ type TokenFreq struct {
|
||||
frequency int
|
||||
}
|
||||
|
||||
func (tf *TokenFreq) Size() int {
|
||||
rv := reflectStaticSizeTokenFreq
|
||||
rv += len(tf.Term)
|
||||
for _, loc := range tf.Locations {
|
||||
rv += loc.Size()
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (tf *TokenFreq) Frequency() int {
|
||||
return tf.frequency
|
||||
}
|
||||
@ -42,6 +73,16 @@ func (tf *TokenFreq) Frequency() int {
|
||||
// fields.
|
||||
type TokenFrequencies map[string]*TokenFreq
|
||||
|
||||
func (tfs TokenFrequencies) Size() int {
|
||||
rv := size.SizeOfMap
|
||||
rv += len(tfs) * (size.SizeOfString + size.SizeOfPtr)
|
||||
for k, v := range tfs {
|
||||
rv += len(k)
|
||||
rv += v.Size()
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (tfs TokenFrequencies) MergeAll(remoteField string, other TokenFrequencies) {
|
||||
// walk the new token frequencies
|
||||
for tfk, tf := range other {
|
||||
|
3
vendor/github.com/blevesearch/bleve/config.go
generated
vendored
3
vendor/github.com/blevesearch/bleve/config.go
generated
vendored
@ -25,6 +25,9 @@ import (
|
||||
"github.com/blevesearch/bleve/index/upsidedown"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
"github.com/blevesearch/bleve/search/highlight/highlighter/html"
|
||||
|
||||
// force import of scorch so its accessible by default
|
||||
_ "github.com/blevesearch/bleve/index/scorch"
|
||||
)
|
||||
|
||||
var bleveExpVar = expvar.NewMap("bleve")
|
||||
|
30
vendor/github.com/blevesearch/bleve/document/document.go
generated
vendored
30
vendor/github.com/blevesearch/bleve/document/document.go
generated
vendored
@ -14,13 +14,24 @@
|
||||
|
||||
package document
|
||||
|
||||
import "fmt"
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeDocument int
|
||||
|
||||
func init() {
|
||||
var d Document
|
||||
reflectStaticSizeDocument = int(reflect.TypeOf(d).Size())
|
||||
}
|
||||
|
||||
type Document struct {
|
||||
ID string `json:"id"`
|
||||
Fields []Field `json:"fields"`
|
||||
CompositeFields []*CompositeField
|
||||
Number uint64 `json:"-"`
|
||||
}
|
||||
|
||||
func NewDocument(id string) *Document {
|
||||
@ -31,6 +42,21 @@ func NewDocument(id string) *Document {
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Document) Size() int {
|
||||
sizeInBytes := reflectStaticSizeDocument + size.SizeOfPtr +
|
||||
len(d.ID)
|
||||
|
||||
for _, entry := range d.Fields {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
for _, entry := range d.CompositeFields {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (d *Document) AddField(f Field) *Document {
|
||||
switch f := f.(type) {
|
||||
case *CompositeField:
|
||||
|
2
vendor/github.com/blevesearch/bleve/document/field.go
generated
vendored
2
vendor/github.com/blevesearch/bleve/document/field.go
generated
vendored
@ -36,4 +36,6 @@ type Field interface {
|
||||
// that this field represents - this is a common metric for tracking
|
||||
// the rate of indexing
|
||||
NumPlainTextBytes() uint64
|
||||
|
||||
Size() int
|
||||
}
|
||||
|
18
vendor/github.com/blevesearch/bleve/document/field_boolean.go
generated
vendored
18
vendor/github.com/blevesearch/bleve/document/field_boolean.go
generated
vendored
@ -16,11 +16,20 @@ package document
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
const DefaultBooleanIndexingOptions = StoreField | IndexField
|
||||
var reflectStaticSizeBooleanField int
|
||||
|
||||
func init() {
|
||||
var f BooleanField
|
||||
reflectStaticSizeBooleanField = int(reflect.TypeOf(f).Size())
|
||||
}
|
||||
|
||||
const DefaultBooleanIndexingOptions = StoreField | IndexField | DocValues
|
||||
|
||||
type BooleanField struct {
|
||||
name string
|
||||
@ -30,6 +39,13 @@ type BooleanField struct {
|
||||
numPlainTextBytes uint64
|
||||
}
|
||||
|
||||
func (b *BooleanField) Size() int {
|
||||
return reflectStaticSizeBooleanField + size.SizeOfPtr +
|
||||
len(b.name) +
|
||||
len(b.arrayPositions)*size.SizeOfUint64 +
|
||||
len(b.value)
|
||||
}
|
||||
|
||||
func (b *BooleanField) Name() string {
|
||||
return b.name
|
||||
}
|
||||
|
25
vendor/github.com/blevesearch/bleve/document/field_composite.go
generated
vendored
25
vendor/github.com/blevesearch/bleve/document/field_composite.go
generated
vendored
@ -15,9 +15,19 @@
|
||||
package document
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeCompositeField int
|
||||
|
||||
func init() {
|
||||
var cf CompositeField
|
||||
reflectStaticSizeCompositeField = int(reflect.TypeOf(cf).Size())
|
||||
}
|
||||
|
||||
const DefaultCompositeIndexingOptions = IndexField
|
||||
|
||||
type CompositeField struct {
|
||||
@ -54,6 +64,21 @@ func NewCompositeFieldWithIndexingOptions(name string, defaultInclude bool, incl
|
||||
return rv
|
||||
}
|
||||
|
||||
func (c *CompositeField) Size() int {
|
||||
sizeInBytes := reflectStaticSizeCompositeField + size.SizeOfPtr +
|
||||
len(c.name)
|
||||
|
||||
for k, _ := range c.includedFields {
|
||||
sizeInBytes += size.SizeOfString + len(k) + size.SizeOfBool
|
||||
}
|
||||
|
||||
for k, _ := range c.excludedFields {
|
||||
sizeInBytes += size.SizeOfString + len(k) + size.SizeOfBool
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (c *CompositeField) Name() string {
|
||||
return c.name
|
||||
}
|
||||
|
17
vendor/github.com/blevesearch/bleve/document/field_datetime.go
generated
vendored
17
vendor/github.com/blevesearch/bleve/document/field_datetime.go
generated
vendored
@ -17,13 +17,22 @@ package document
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/numeric"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
const DefaultDateTimeIndexingOptions = StoreField | IndexField
|
||||
var reflectStaticSizeDateTimeField int
|
||||
|
||||
func init() {
|
||||
var f DateTimeField
|
||||
reflectStaticSizeDateTimeField = int(reflect.TypeOf(f).Size())
|
||||
}
|
||||
|
||||
const DefaultDateTimeIndexingOptions = StoreField | IndexField | DocValues
|
||||
const DefaultDateTimePrecisionStep uint = 4
|
||||
|
||||
var MinTimeRepresentable = time.Unix(0, math.MinInt64)
|
||||
@ -37,6 +46,12 @@ type DateTimeField struct {
|
||||
numPlainTextBytes uint64
|
||||
}
|
||||
|
||||
func (n *DateTimeField) Size() int {
|
||||
return reflectStaticSizeDateTimeField + size.SizeOfPtr +
|
||||
len(n.name) +
|
||||
len(n.arrayPositions)*size.SizeOfUint64
|
||||
}
|
||||
|
||||
func (n *DateTimeField) Name() string {
|
||||
return n.name
|
||||
}
|
||||
|
15
vendor/github.com/blevesearch/bleve/document/field_geopoint.go
generated
vendored
15
vendor/github.com/blevesearch/bleve/document/field_geopoint.go
generated
vendored
@ -16,12 +16,21 @@ package document
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/geo"
|
||||
"github.com/blevesearch/bleve/numeric"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeGeoPointField int
|
||||
|
||||
func init() {
|
||||
var f GeoPointField
|
||||
reflectStaticSizeGeoPointField = int(reflect.TypeOf(f).Size())
|
||||
}
|
||||
|
||||
var GeoPrecisionStep uint = 9
|
||||
|
||||
type GeoPointField struct {
|
||||
@ -32,6 +41,12 @@ type GeoPointField struct {
|
||||
numPlainTextBytes uint64
|
||||
}
|
||||
|
||||
func (n *GeoPointField) Size() int {
|
||||
return reflectStaticSizeGeoPointField + size.SizeOfPtr +
|
||||
len(n.name) +
|
||||
len(n.arrayPositions)*size.SizeOfUint64
|
||||
}
|
||||
|
||||
func (n *GeoPointField) Name() string {
|
||||
return n.name
|
||||
}
|
||||
|
17
vendor/github.com/blevesearch/bleve/document/field_numeric.go
generated
vendored
17
vendor/github.com/blevesearch/bleve/document/field_numeric.go
generated
vendored
@ -16,12 +16,21 @@ package document
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/numeric"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
const DefaultNumericIndexingOptions = StoreField | IndexField
|
||||
var reflectStaticSizeNumericField int
|
||||
|
||||
func init() {
|
||||
var f NumericField
|
||||
reflectStaticSizeNumericField = int(reflect.TypeOf(f).Size())
|
||||
}
|
||||
|
||||
const DefaultNumericIndexingOptions = StoreField | IndexField | DocValues
|
||||
|
||||
const DefaultPrecisionStep uint = 4
|
||||
|
||||
@ -33,6 +42,12 @@ type NumericField struct {
|
||||
numPlainTextBytes uint64
|
||||
}
|
||||
|
||||
func (n *NumericField) Size() int {
|
||||
return reflectStaticSizeNumericField + size.SizeOfPtr +
|
||||
len(n.name) +
|
||||
len(n.arrayPositions)*size.SizeOfPtr
|
||||
}
|
||||
|
||||
func (n *NumericField) Name() string {
|
||||
return n.name
|
||||
}
|
||||
|
18
vendor/github.com/blevesearch/bleve/document/field_text.go
generated
vendored
18
vendor/github.com/blevesearch/bleve/document/field_text.go
generated
vendored
@ -16,11 +16,20 @@ package document
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
const DefaultTextIndexingOptions = IndexField
|
||||
var reflectStaticSizeTextField int
|
||||
|
||||
func init() {
|
||||
var f TextField
|
||||
reflectStaticSizeTextField = int(reflect.TypeOf(f).Size())
|
||||
}
|
||||
|
||||
const DefaultTextIndexingOptions = IndexField | DocValues
|
||||
|
||||
type TextField struct {
|
||||
name string
|
||||
@ -31,6 +40,13 @@ type TextField struct {
|
||||
numPlainTextBytes uint64
|
||||
}
|
||||
|
||||
func (t *TextField) Size() int {
|
||||
return reflectStaticSizeTextField + size.SizeOfPtr +
|
||||
len(t.name) +
|
||||
len(t.arrayPositions)*size.SizeOfUint64 +
|
||||
len(t.value)
|
||||
}
|
||||
|
||||
func (t *TextField) Name() string {
|
||||
return t.name
|
||||
}
|
||||
|
11
vendor/github.com/blevesearch/bleve/document/indexing_options.go
generated
vendored
11
vendor/github.com/blevesearch/bleve/document/indexing_options.go
generated
vendored
@ -20,6 +20,7 @@ const (
|
||||
IndexField IndexingOptions = 1 << iota
|
||||
StoreField
|
||||
IncludeTermVectors
|
||||
DocValues
|
||||
)
|
||||
|
||||
func (o IndexingOptions) IsIndexed() bool {
|
||||
@ -34,6 +35,10 @@ func (o IndexingOptions) IncludeTermVectors() bool {
|
||||
return o&IncludeTermVectors != 0
|
||||
}
|
||||
|
||||
func (o IndexingOptions) IncludeDocValues() bool {
|
||||
return o&DocValues != 0
|
||||
}
|
||||
|
||||
func (o IndexingOptions) String() string {
|
||||
rv := ""
|
||||
if o.IsIndexed() {
|
||||
@ -51,5 +56,11 @@ func (o IndexingOptions) String() string {
|
||||
}
|
||||
rv += "TV"
|
||||
}
|
||||
if o.IncludeDocValues() {
|
||||
if rv != "" {
|
||||
rv += ", "
|
||||
}
|
||||
rv += "DV"
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
139
vendor/github.com/blevesearch/bleve/geo/geo.go
generated
vendored
139
vendor/github.com/blevesearch/bleve/geo/geo.go
generated
vendored
@ -15,6 +15,7 @@
|
||||
package geo
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
"github.com/blevesearch/bleve/numeric"
|
||||
@ -26,6 +27,12 @@ var GeoBits uint = 32
|
||||
|
||||
var minLon = -180.0
|
||||
var minLat = -90.0
|
||||
var maxLon = 180.0
|
||||
var maxLat = 90.0
|
||||
var minLonRad = minLon * degreesToRadian
|
||||
var minLatRad = minLat * degreesToRadian
|
||||
var maxLonRad = maxLon * degreesToRadian
|
||||
var maxLatRad = maxLat * degreesToRadian
|
||||
var geoTolerance = 1E-6
|
||||
var lonScale = float64((uint64(0x1)<<GeoBits)-1) / 360.0
|
||||
var latScale = float64((uint64(0x1)<<GeoBits)-1) / 180.0
|
||||
@ -91,26 +98,8 @@ func BoundingBoxContains(lon, lat, minLon, minLat, maxLon, maxLat float64) bool
|
||||
compareGeo(lat, minLat) >= 0 && compareGeo(lat, maxLat) <= 0
|
||||
}
|
||||
|
||||
// ComputeBoundingBox will compute a bounding box around the provided point
|
||||
// which surrounds a circle of the provided radius (in meters).
|
||||
func ComputeBoundingBox(centerLon, centerLat,
|
||||
radius float64) (upperLeftLon float64, upperLeftLat float64,
|
||||
lowerRightLon float64, lowerRightLat float64) {
|
||||
_, tlat := pointFromLonLatBearing(centerLon, centerLat, 0, radius)
|
||||
rlon, _ := pointFromLonLatBearing(centerLon, centerLat, 90, radius)
|
||||
_, blat := pointFromLonLatBearing(centerLon, centerLat, 180, radius)
|
||||
llon, _ := pointFromLonLatBearing(centerLon, centerLat, 270, radius)
|
||||
return normalizeLon(llon), normalizeLat(tlat),
|
||||
normalizeLon(rlon), normalizeLat(blat)
|
||||
}
|
||||
|
||||
const degreesToRadian = math.Pi / 180
|
||||
const radiansToDegrees = 180 / math.Pi
|
||||
const flattening = 1.0 / 298.257223563
|
||||
const semiMajorAxis = 6378137
|
||||
const semiMinorAxis = semiMajorAxis * (1.0 - flattening)
|
||||
const semiMajorAxis2 = semiMajorAxis * semiMajorAxis
|
||||
const semiMinorAxis2 = semiMinorAxis * semiMinorAxis
|
||||
|
||||
// DegreesToRadians converts an angle in degrees to radians
|
||||
func DegreesToRadians(d float64) float64 {
|
||||
@ -122,84 +111,60 @@ func RadiansToDegrees(r float64) float64 {
|
||||
return r * radiansToDegrees
|
||||
}
|
||||
|
||||
// pointFromLonLatBearing starts that the provide lon,lat
|
||||
// then moves in the bearing direction (in degrees)
|
||||
// this move continues for the provided distance (in meters)
|
||||
// The lon, lat of this destination location is returned.
|
||||
func pointFromLonLatBearing(lon, lat, bearing,
|
||||
dist float64) (float64, float64) {
|
||||
var earthMeanRadiusMeters = 6371008.7714
|
||||
|
||||
alpha1 := DegreesToRadians(bearing)
|
||||
cosA1 := math.Cos(alpha1)
|
||||
sinA1 := math.Sin(alpha1)
|
||||
tanU1 := (1 - flattening) * math.Tan(DegreesToRadians(lat))
|
||||
cosU1 := 1 / math.Sqrt(1+tanU1*tanU1)
|
||||
sinU1 := tanU1 * cosU1
|
||||
sig1 := math.Atan2(tanU1, cosA1)
|
||||
sinAlpha := cosU1 * sinA1
|
||||
cosSqAlpha := 1 - sinAlpha*sinAlpha
|
||||
uSq := cosSqAlpha * (semiMajorAxis2 - semiMinorAxis2) / semiMinorAxis2
|
||||
A := 1 + uSq/16384*(4096+uSq*(-768+uSq*(320-175*uSq)))
|
||||
B := uSq / 1024 * (256 + uSq*(-128+uSq*(74-47*uSq)))
|
||||
func RectFromPointDistance(lon, lat, dist float64) (float64, float64, float64, float64, error) {
|
||||
err := checkLongitude(lon)
|
||||
if err != nil {
|
||||
return 0, 0, 0, 0, err
|
||||
}
|
||||
err = checkLatitude(lat)
|
||||
if err != nil {
|
||||
return 0, 0, 0, 0, err
|
||||
}
|
||||
radLon := DegreesToRadians(lon)
|
||||
radLat := DegreesToRadians(lat)
|
||||
radDistance := (dist + 7e-2) / earthMeanRadiusMeters
|
||||
|
||||
sigma := dist / (semiMinorAxis * A)
|
||||
minLatL := radLat - radDistance
|
||||
maxLatL := radLat + radDistance
|
||||
|
||||
cos25SigmaM := math.Cos(2*sig1 + sigma)
|
||||
sinSigma := math.Sin(sigma)
|
||||
cosSigma := math.Cos(sigma)
|
||||
deltaSigma := B * sinSigma * (cos25SigmaM + (B/4)*
|
||||
(cosSigma*(-1+2*cos25SigmaM*cos25SigmaM)-(B/6)*cos25SigmaM*
|
||||
(-1+4*sinSigma*sinSigma)*(-3+4*cos25SigmaM*cos25SigmaM)))
|
||||
sigmaP := sigma
|
||||
sigma = dist/(semiMinorAxis*A) + deltaSigma
|
||||
for math.Abs(sigma-sigmaP) > 1E-12 {
|
||||
cos25SigmaM = math.Cos(2*sig1 + sigma)
|
||||
sinSigma = math.Sin(sigma)
|
||||
cosSigma = math.Cos(sigma)
|
||||
deltaSigma = B * sinSigma * (cos25SigmaM + (B/4)*
|
||||
(cosSigma*(-1+2*cos25SigmaM*cos25SigmaM)-(B/6)*cos25SigmaM*
|
||||
(-1+4*sinSigma*sinSigma)*(-3+4*cos25SigmaM*cos25SigmaM)))
|
||||
sigmaP = sigma
|
||||
sigma = dist/(semiMinorAxis*A) + deltaSigma
|
||||
var minLonL, maxLonL float64
|
||||
if minLatL > minLatRad && maxLatL < maxLatRad {
|
||||
deltaLon := asin(sin(radDistance) / cos(radLat))
|
||||
minLonL = radLon - deltaLon
|
||||
if minLonL < minLonRad {
|
||||
minLonL += 2 * math.Pi
|
||||
}
|
||||
maxLonL = radLon + deltaLon
|
||||
if maxLonL > maxLonRad {
|
||||
maxLonL -= 2 * math.Pi
|
||||
}
|
||||
} else {
|
||||
// pole is inside distance
|
||||
minLatL = math.Max(minLatL, minLatRad)
|
||||
maxLatL = math.Min(maxLatL, maxLatRad)
|
||||
minLonL = minLonRad
|
||||
maxLonL = maxLonRad
|
||||
}
|
||||
|
||||
tmp := sinU1*sinSigma - cosU1*cosSigma*cosA1
|
||||
lat2 := math.Atan2(sinU1*cosSigma+cosU1*sinSigma*cosA1,
|
||||
(1-flattening)*math.Sqrt(sinAlpha*sinAlpha+tmp*tmp))
|
||||
lamda := math.Atan2(sinSigma*sinA1, cosU1*cosSigma-sinU1*sinSigma*cosA1)
|
||||
c := flattening / 16 * cosSqAlpha * (4 + flattening*(4-3*cosSqAlpha))
|
||||
lam := lamda - (1-c)*flattening*sinAlpha*
|
||||
(sigma+c*sinSigma*(cos25SigmaM+c*cosSigma*(-1+2*cos25SigmaM*cos25SigmaM)))
|
||||
|
||||
rvlon := lon + RadiansToDegrees(lam)
|
||||
rvlat := RadiansToDegrees(lat2)
|
||||
|
||||
return rvlon, rvlat
|
||||
return RadiansToDegrees(minLonL),
|
||||
RadiansToDegrees(maxLatL),
|
||||
RadiansToDegrees(maxLonL),
|
||||
RadiansToDegrees(minLatL),
|
||||
nil
|
||||
}
|
||||
|
||||
// normalizeLon normalizes a longitude value within the -180 to 180 range
|
||||
func normalizeLon(lonDeg float64) float64 {
|
||||
if lonDeg >= -180 && lonDeg <= 180 {
|
||||
return lonDeg
|
||||
func checkLatitude(latitude float64) error {
|
||||
if math.IsNaN(latitude) || latitude < minLat || latitude > maxLat {
|
||||
return fmt.Errorf("invalid latitude %f; must be between %f and %f", latitude, minLat, maxLat)
|
||||
}
|
||||
|
||||
off := math.Mod(lonDeg+180, 360)
|
||||
if off < 0 {
|
||||
return 180 + off
|
||||
} else if off == 0 && lonDeg > 0 {
|
||||
return 180
|
||||
}
|
||||
return -180 + off
|
||||
return nil
|
||||
}
|
||||
|
||||
// normalizeLat normalizes a latitude value within the -90 to 90 range
|
||||
func normalizeLat(latDeg float64) float64 {
|
||||
if latDeg >= -90 && latDeg <= 90 {
|
||||
return latDeg
|
||||
func checkLongitude(longitude float64) error {
|
||||
if math.IsNaN(longitude) || longitude < minLon || longitude > maxLon {
|
||||
return fmt.Errorf("invalid longitude %f; must be between %f and %f", longitude, minLon, maxLon)
|
||||
}
|
||||
off := math.Abs(math.Mod(latDeg+90, 360))
|
||||
if off <= 180 {
|
||||
return off - 90
|
||||
}
|
||||
return (360 - off) - 90
|
||||
return nil
|
||||
}
|
||||
|
6
vendor/github.com/blevesearch/bleve/geo/sloppy.go
generated
vendored
6
vendor/github.com/blevesearch/bleve/geo/sloppy.go
generated
vendored
@ -146,6 +146,12 @@ func earthDiameter(lat float64) float64 {
|
||||
return earthDiameterPerLatitude[int(index)]
|
||||
}
|
||||
|
||||
var pio2 = math.Pi / 2
|
||||
|
||||
func sin(a float64) float64 {
|
||||
return cos(a - pio2)
|
||||
}
|
||||
|
||||
// cos is a sloppy math (faster) implementation of math.Cos
|
||||
func cos(a float64) float64 {
|
||||
if a < 0.0 {
|
||||
|
44
vendor/github.com/blevesearch/bleve/index.go
generated
vendored
44
vendor/github.com/blevesearch/bleve/index.go
generated
vendored
@ -15,11 +15,13 @@
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
"github.com/blevesearch/bleve/mapping"
|
||||
"golang.org/x/net/context"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
// A Batch groups together multiple Index and Delete
|
||||
@ -31,6 +33,9 @@ import (
|
||||
type Batch struct {
|
||||
index Index
|
||||
internal *index.Batch
|
||||
|
||||
lastDocSize uint64
|
||||
totalSize uint64
|
||||
}
|
||||
|
||||
// Index adds the specified index operation to the
|
||||
@ -46,6 +51,30 @@ func (b *Batch) Index(id string, data interface{}) error {
|
||||
return err
|
||||
}
|
||||
b.internal.Update(doc)
|
||||
|
||||
b.lastDocSize = uint64(doc.Size() +
|
||||
len(id) + size.SizeOfString) // overhead from internal
|
||||
b.totalSize += b.lastDocSize
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *Batch) LastDocSize() uint64 {
|
||||
return b.lastDocSize
|
||||
}
|
||||
|
||||
func (b *Batch) TotalDocsSize() uint64 {
|
||||
return b.totalSize
|
||||
}
|
||||
|
||||
// IndexAdvanced adds the specified index operation to the
|
||||
// batch which skips the mapping. NOTE: the bleve Index is not updated
|
||||
// until the batch is executed.
|
||||
func (b *Batch) IndexAdvanced(doc *document.Document) (err error) {
|
||||
if doc.ID == "" {
|
||||
return ErrorEmptyID
|
||||
}
|
||||
b.internal.Update(doc)
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -65,7 +94,7 @@ func (b *Batch) SetInternal(key, val []byte) {
|
||||
b.internal.SetInternal(key, val)
|
||||
}
|
||||
|
||||
// SetInternal adds the specified delete internal
|
||||
// DeleteInternal adds the specified delete internal
|
||||
// operation to the batch. NOTE: the bleve Index is
|
||||
// not updated until the batch is executed.
|
||||
func (b *Batch) DeleteInternal(key []byte) {
|
||||
@ -99,12 +128,15 @@ func (b *Batch) Reset() {
|
||||
// them.
|
||||
//
|
||||
// The DocumentMapping used to index a value is deduced by the following rules:
|
||||
// 1) If value implements Classifier interface, resolve the mapping from Type().
|
||||
// 2) If value has a string field or value at IndexMapping.TypeField.
|
||||
// 1) If value implements mapping.bleveClassifier interface, resolve the mapping
|
||||
// from BleveType().
|
||||
// 2) If value implements mapping.Classifier interface, resolve the mapping
|
||||
// from Type().
|
||||
// 3) If value has a string field or value at IndexMapping.TypeField.
|
||||
// (defaulting to "_type"), use it to resolve the mapping. Fields addressing
|
||||
// is described below.
|
||||
// 3) If IndexMapping.DefaultType is registered, return it.
|
||||
// 4) Return IndexMapping.DefaultMapping.
|
||||
// 4) If IndexMapping.DefaultType is registered, return it.
|
||||
// 5) Return IndexMapping.DefaultMapping.
|
||||
//
|
||||
// Each field or nested field of the value is identified by a string path, then
|
||||
// mapped to one or several FieldMappings which extract the result for analysis.
|
||||
|
29
vendor/github.com/blevesearch/bleve/index/analysis.go
generated
vendored
29
vendor/github.com/blevesearch/bleve/index/analysis.go
generated
vendored
@ -14,7 +14,20 @@
|
||||
|
||||
package index
|
||||
|
||||
import "github.com/blevesearch/bleve/document"
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeAnalysisResult int
|
||||
|
||||
func init() {
|
||||
var ar AnalysisResult
|
||||
reflectStaticSizeAnalysisResult = int(reflect.TypeOf(ar).Size())
|
||||
}
|
||||
|
||||
type IndexRow interface {
|
||||
KeySize() int
|
||||
@ -29,6 +42,20 @@ type IndexRow interface {
|
||||
type AnalysisResult struct {
|
||||
DocID string
|
||||
Rows []IndexRow
|
||||
|
||||
// scorch
|
||||
Document *document.Document
|
||||
Analyzed []analysis.TokenFrequencies
|
||||
Length []int
|
||||
}
|
||||
|
||||
func (a *AnalysisResult) Size() int {
|
||||
rv := reflectStaticSizeAnalysisResult
|
||||
for _, analyzedI := range a.Analyzed {
|
||||
rv += analyzedI.Size()
|
||||
}
|
||||
rv += len(a.Length) * size.SizeOfInt
|
||||
return rv
|
||||
}
|
||||
|
||||
type AnalysisWork struct {
|
||||
|
69
vendor/github.com/blevesearch/bleve/index/index.go
generated
vendored
69
vendor/github.com/blevesearch/bleve/index/index.go
generated
vendored
@ -18,11 +18,23 @@ import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeTermFieldDoc int
|
||||
var reflectStaticSizeTermFieldVector int
|
||||
|
||||
func init() {
|
||||
var tfd TermFieldDoc
|
||||
reflectStaticSizeTermFieldDoc = int(reflect.TypeOf(tfd).Size())
|
||||
var tfv TermFieldVector
|
||||
reflectStaticSizeTermFieldVector = int(reflect.TypeOf(tfv).Size())
|
||||
}
|
||||
|
||||
var ErrorUnknownStorageType = fmt.Errorf("unknown storage type")
|
||||
|
||||
type Index interface {
|
||||
@ -68,6 +80,8 @@ type IndexReader interface {
|
||||
Document(id string) (*document.Document, error)
|
||||
DocumentVisitFieldTerms(id IndexInternalID, fields []string, visitor DocumentFieldTermVisitor) error
|
||||
|
||||
DocValueReader(fields []string) (DocValueReader, error)
|
||||
|
||||
Fields() ([]string, error)
|
||||
|
||||
GetInternal(key []byte) ([]byte, error)
|
||||
@ -84,6 +98,18 @@ type IndexReader interface {
|
||||
Close() error
|
||||
}
|
||||
|
||||
type IndexReaderRegexp interface {
|
||||
FieldDictRegexp(field string, regex []byte) (FieldDict, error)
|
||||
}
|
||||
|
||||
type IndexReaderFuzzy interface {
|
||||
FieldDictFuzzy(field string, term []byte, fuzziness int) (FieldDict, error)
|
||||
}
|
||||
|
||||
type IndexReaderOnly interface {
|
||||
FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error)
|
||||
}
|
||||
|
||||
// FieldTerms contains the terms used by a document, keyed by field
|
||||
type FieldTerms map[string][]string
|
||||
|
||||
@ -115,6 +141,11 @@ type TermFieldVector struct {
|
||||
End uint64
|
||||
}
|
||||
|
||||
func (tfv *TermFieldVector) Size() int {
|
||||
return reflectStaticSizeTermFieldVector + size.SizeOfPtr +
|
||||
len(tfv.Field) + len(tfv.ArrayPositions)*size.SizeOfUint64
|
||||
}
|
||||
|
||||
// IndexInternalID is an opaque document identifier interal to the index impl
|
||||
type IndexInternalID []byte
|
||||
|
||||
@ -134,14 +165,27 @@ type TermFieldDoc struct {
|
||||
Vectors []*TermFieldVector
|
||||
}
|
||||
|
||||
func (tfd *TermFieldDoc) Size() int {
|
||||
sizeInBytes := reflectStaticSizeTermFieldDoc + size.SizeOfPtr +
|
||||
len(tfd.Term) + len(tfd.ID)
|
||||
|
||||
for _, entry := range tfd.Vectors {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
// Reset allows an already allocated TermFieldDoc to be reused
|
||||
func (tfd *TermFieldDoc) Reset() *TermFieldDoc {
|
||||
// remember the []byte used for the ID
|
||||
id := tfd.ID
|
||||
vectors := tfd.Vectors
|
||||
// idiom to copy over from empty TermFieldDoc (0 allocations)
|
||||
*tfd = TermFieldDoc{}
|
||||
// reuse the []byte already allocated (and reset len to 0)
|
||||
tfd.ID = id[:0]
|
||||
tfd.Vectors = vectors[:0]
|
||||
return tfd
|
||||
}
|
||||
|
||||
@ -161,6 +205,8 @@ type TermFieldReader interface {
|
||||
// Count returns the number of documents contains the term in this field.
|
||||
Count() uint64
|
||||
Close() error
|
||||
|
||||
Size() int
|
||||
}
|
||||
|
||||
type DictEntry struct {
|
||||
@ -185,6 +231,9 @@ type DocIDReader interface {
|
||||
// will start there instead. If ID is greater than or equal to the end of
|
||||
// the range, Next() call will return io.EOF.
|
||||
Advance(ID IndexInternalID) (IndexInternalID, error)
|
||||
|
||||
Size() int
|
||||
|
||||
Close() error
|
||||
}
|
||||
|
||||
@ -239,3 +288,23 @@ func (b *Batch) Reset() {
|
||||
b.IndexOps = make(map[string]*document.Document)
|
||||
b.InternalOps = make(map[string][]byte)
|
||||
}
|
||||
|
||||
// Optimizable represents an optional interface that implementable by
|
||||
// optimizable resources (e.g., TermFieldReaders, Searchers). These
|
||||
// optimizable resources are provided the same OptimizableContext
|
||||
// instance, so that they can coordinate via dynamic interface
|
||||
// casting.
|
||||
type Optimizable interface {
|
||||
Optimize(kind string, octx OptimizableContext) (OptimizableContext, error)
|
||||
}
|
||||
|
||||
type OptimizableContext interface {
|
||||
// Once all the optimzable resources have been provided the same
|
||||
// OptimizableContext instance, the optimization preparations are
|
||||
// finished or completed via the Finish() method.
|
||||
Finish() error
|
||||
}
|
||||
|
||||
type DocValueReader interface {
|
||||
VisitDocValues(id IndexInternalID, visitor DocumentFieldTermVisitor) error
|
||||
}
|
||||
|
367
vendor/github.com/blevesearch/bleve/index/scorch/README.md
generated
vendored
Normal file
367
vendor/github.com/blevesearch/bleve/index/scorch/README.md
generated
vendored
Normal file
@ -0,0 +1,367 @@
|
||||
# scorch
|
||||
|
||||
## Definitions
|
||||
|
||||
Batch
|
||||
- A collection of Documents to mutate in the index.
|
||||
|
||||
Document
|
||||
- Has a unique identifier (arbitrary bytes).
|
||||
- Is comprised of a list of fields.
|
||||
|
||||
Field
|
||||
- Has a name (string).
|
||||
- Has a type (text, number, date, geopoint).
|
||||
- Has a value (depending on type).
|
||||
- Can be indexed, stored, or both.
|
||||
- If indexed, can be analyzed.
|
||||
-m If indexed, can optionally store term vectors.
|
||||
|
||||
## Scope
|
||||
|
||||
Scorch *MUST* implement the bleve.index API without requiring any changes to this API.
|
||||
|
||||
Scorch *MAY* introduce new interfaces, which can be discovered to allow use of new capabilities not in the current API.
|
||||
|
||||
## Implementation
|
||||
|
||||
The scorch implementation starts with the concept of a segmented index.
|
||||
|
||||
A segment is simply a slice, subset, or portion of the entire index. A segmented index is one which is composed of one or more segments. Although segments are created in a particular order, knowing this ordering is not required to achieve correct semantics when querying. Because there is no ordering, this means that when searching an index, you can (and should) search all the segments concurrently.
|
||||
|
||||
### Internal Wrapper
|
||||
|
||||
In order to accommodate the existing APIs while also improving the implementation, the scorch implementation includes some wrapper functionality that must be described.
|
||||
|
||||
#### \_id field
|
||||
|
||||
In scorch, field 0 is prearranged to be named \_id. All documents have a value for this field, which is the documents external identifier. In this version the field *MUST* be both indexed AND stored. The scorch wrapper adds this field, as it will not be present in the Document from the calling bleve code.
|
||||
|
||||
NOTE: If a document already contains a field \_id, it will be replaced. If this is problematic, the caller must ensure such a scenario does not happen.
|
||||
|
||||
### Proposed Structures
|
||||
|
||||
```
|
||||
type Segment interface {
|
||||
|
||||
Dictionary(field string) TermDictionary
|
||||
|
||||
}
|
||||
|
||||
type TermDictionary interface {
|
||||
|
||||
PostingsList(term string, excluding PostingsList) PostingsList
|
||||
|
||||
}
|
||||
|
||||
type PostingsList interface {
|
||||
|
||||
Next() Posting
|
||||
|
||||
And(other PostingsList) PostingsList
|
||||
Or(other PostingsList) PostingsList
|
||||
|
||||
}
|
||||
|
||||
type Posting interface {
|
||||
Number() uint64
|
||||
|
||||
Frequency() uint64
|
||||
Norm() float64
|
||||
|
||||
Locations() Locations
|
||||
}
|
||||
|
||||
type Locations interface {
|
||||
Start() uint64
|
||||
End() uint64
|
||||
Pos() uint64
|
||||
ArrayPositions() ...
|
||||
}
|
||||
|
||||
type DeletedDocs {
|
||||
|
||||
}
|
||||
|
||||
type SegmentSnapshot struct {
|
||||
segment Segment
|
||||
deleted PostingsList
|
||||
}
|
||||
|
||||
type IndexSnapshot struct {
|
||||
segment []SegmentSnapshot
|
||||
}
|
||||
```
|
||||
**What about errors?**
|
||||
**What about memory mgmnt or context?**
|
||||
**Postings List separate iterator to separate stateful from stateless**
|
||||
### Mutating the Index
|
||||
|
||||
The bleve.index API has methods for directly making individual mutations (Update/Delete/SetInternal/DeleteInternal), however for this first implementation, we assume that all of these calls can simply be turned into a Batch of size 1. This may be highly inefficient, but it will be correct. This decision is made based on the fact that Couchbase FTS always uses Batches.
|
||||
|
||||
NOTE: As a side-effect of this decision, it should be clear that performance tuning may depend on the batch size, which may in-turn require changes in FTS.
|
||||
|
||||
From this point forward, only Batch mutations will be discussed.
|
||||
|
||||
Sequence of Operations:
|
||||
|
||||
1. For each document in the batch, search through all existing segments. The goal is to build up a per-segment bitset which tells us which documents in that segment are obsoleted by the addition of the new segment we're currently building. NOTE: we're not ready for this change to take effect yet, so rather than this operation mutating anything, they simply return bitsets, which we can apply later. Logically, this is something like:
|
||||
|
||||
```
|
||||
foreach segment {
|
||||
dict := segment.Dictionary("\_id")
|
||||
postings := empty postings list
|
||||
foreach docID {
|
||||
postings = postings.Or(dict.PostingsList(docID, nil))
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
NOTE: it is illustrated above as nested for loops, but some or all of these could be concurrently. The end result is that for each segment, we have (possibly empty) bitset.
|
||||
|
||||
2. Also concurrent with 1, the documents in the batch are analyzed. This analysis proceeds using the existing analyzer pool.
|
||||
|
||||
3. (after 2 completes) Analyzed documents are fed into a function which builds a new Segment representing this information.
|
||||
|
||||
4. We now have everything we need to update the state of the system to include this new snapshot.
|
||||
|
||||
- Acquire a lock
|
||||
- Create a new IndexSnapshot
|
||||
- For each SegmentSnapshot in the IndexSnapshot, take the deleted PostingsList and OR it with the new postings list for this Segment. Construct a new SegmentSnapshot for the segment using this new deleted PostingsList. Append this SegmentSnapshot to the IndexSnapshot.
|
||||
- Create a new SegmentSnapshot wrapping our new segment with nil deleted docs.
|
||||
- Append the new SegmentSnapshot to the IndexSnapshot
|
||||
- Release the lock
|
||||
|
||||
An ASCII art example:
|
||||
```
|
||||
0 - Empty Index
|
||||
|
||||
No segments
|
||||
|
||||
IndexSnapshot
|
||||
segments []
|
||||
deleted []
|
||||
|
||||
|
||||
1 - Index Batch [ A B C ]
|
||||
|
||||
segment 0
|
||||
numbers [ 1 2 3 ]
|
||||
\_id [ A B C ]
|
||||
|
||||
IndexSnapshot
|
||||
segments [ 0 ]
|
||||
deleted [ nil ]
|
||||
|
||||
|
||||
2 - Index Batch [ B' ]
|
||||
|
||||
segment 0 1
|
||||
numbers [ 1 2 3 ] [ 1 ]
|
||||
\_id [ A B C ] [ B ]
|
||||
|
||||
Compute bitset segment-0-deleted-by-1:
|
||||
[ 0 1 0 ]
|
||||
|
||||
OR it with previous (nil) (call it 0-1)
|
||||
[ 0 1 0 ]
|
||||
|
||||
IndexSnapshot
|
||||
segments [ 0 1 ]
|
||||
deleted [ 0-1 nil ]
|
||||
|
||||
3 - Index Batch [ C' ]
|
||||
|
||||
segment 0 1 2
|
||||
numbers [ 1 2 3 ] [ 1 ] [ 1 ]
|
||||
\_id [ A B C ] [ B ] [ C ]
|
||||
|
||||
Compute bitset segment-0-deleted-by-2:
|
||||
[ 0 0 1 ]
|
||||
|
||||
OR it with previous ([ 0 1 0 ]) (call it 0-12)
|
||||
[ 0 1 1 ]
|
||||
|
||||
Compute bitset segment-1-deleted-by-2:
|
||||
[ 0 ]
|
||||
|
||||
OR it with previous (nil)
|
||||
still just nil
|
||||
|
||||
|
||||
IndexSnapshot
|
||||
segments [ 0 1 2 ]
|
||||
deleted [ 0-12 nil nil ]
|
||||
```
|
||||
|
||||
**is there opportunity to stop early when doc is found in one segment**
|
||||
**also, more efficient way to find bits for long lists of ids?**
|
||||
|
||||
### Searching
|
||||
|
||||
In the bleve.index API all searching starts by getting an IndexReader, which represents a snapshot of the index at a point in time.
|
||||
|
||||
As described in the section above, our index implementation maintains a pointer to the current IndexSnapshot. When a caller gets an IndexReader, they get a copy of this pointer, and can use it as long as they like. The IndexSnapshot contains SegmentSnapshots, which only contain pointers to immutable segments. The deleted posting lists associated with a segment change over time, but the particular deleted posting list in YOUR snapshot is immutable. This gives a stable view of the data.
|
||||
|
||||
#### Term Search
|
||||
|
||||
Term search is the only searching primitive exposed in today's bleve.index API. This ultimately could limit our ability to take advantage of the indexing improvements, but it also means it will be easier to get a first version of this working.
|
||||
|
||||
A term search for term T in field F will look something like this:
|
||||
|
||||
```
|
||||
searchResultPostings = empty
|
||||
foreach segment {
|
||||
dict := segment.Dictionary(F)
|
||||
segmentResultPostings = dict.PostingsList(T, segmentSnapshotDeleted)
|
||||
// make segmentLocal numbers into global numbers, and flip bits in searchResultPostings
|
||||
}
|
||||
```
|
||||
|
||||
The searchResultPostings will be a new implementation of the TermFieldReader inteface.
|
||||
|
||||
As a reminder this interface is:
|
||||
|
||||
```
|
||||
// TermFieldReader is the interface exposing the enumeration of documents
|
||||
// containing a given term in a given field. Documents are returned in byte
|
||||
// lexicographic order over their identifiers.
|
||||
type TermFieldReader interface {
|
||||
// Next returns the next document containing the term in this field, or nil
|
||||
// when it reaches the end of the enumeration. The preAlloced TermFieldDoc
|
||||
// is optional, and when non-nil, will be used instead of allocating memory.
|
||||
Next(preAlloced *TermFieldDoc) (*TermFieldDoc, error)
|
||||
|
||||
// Advance resets the enumeration at specified document or its immediate
|
||||
// follower.
|
||||
Advance(ID IndexInternalID, preAlloced *TermFieldDoc) (*TermFieldDoc, error)
|
||||
|
||||
// Count returns the number of documents contains the term in this field.
|
||||
Count() uint64
|
||||
Close() error
|
||||
}
|
||||
```
|
||||
|
||||
At first glance this appears problematic, we have no way to return documents in order of their identifiers. But it turns out the wording of this perhaps too strong, or a bit ambiguous. Originally, this referred to the external identifiers, but with the introduction of a distinction between internal/external identifiers, returning them in order of their internal identifiers is also acceptable. **ASIDE**: the reason for this is that most callers just use Next() and literally don't care what the order is, they could be in any order and it would be fine. There is only one search that cares and that is the ConjunctionSearcher, which relies on Next/Advance having very specific semantics. Later in this document we will have a proposal to split into multiple interfaces:
|
||||
|
||||
- The weakest interface, only supports Next() no ordering at all.
|
||||
- Ordered, supporting Advance()
|
||||
- And/Or'able capable of internally efficiently doing these ops with like interfaces (if not capable then can always fall back to external walking)
|
||||
|
||||
But, the good news is that we don't even have to do that for our first implementation. As long as the global numbers we use for internal identifiers are consistent within this IndexSnapshot, then Next() will be ordered by ascending document number, and Advance() will still work correctly.
|
||||
|
||||
NOTE: there is another place where we rely on the ordering of these hits, and that is in the "\_id" sort order. Previously this was the natural order, and a NOOP for the collector, now it must be implemented by actually sorting on the "\_id" field. We probably should introduce at least a marker interface to detect this.
|
||||
|
||||
An ASCII art example:
|
||||
|
||||
```
|
||||
Let's start with the IndexSnapshot we ended with earlier:
|
||||
|
||||
3 - Index Batch [ C' ]
|
||||
|
||||
segment 0 1 2
|
||||
numbers [ 1 2 3 ] [ 1 ] [ 1 ]
|
||||
\_id [ A B C ] [ B ] [ C ]
|
||||
|
||||
Compute bitset segment-0-deleted-by-2:
|
||||
[ 0 0 1 ]
|
||||
|
||||
OR it with previous ([ 0 1 0 ]) (call it 0-12)
|
||||
[ 0 1 1 ]
|
||||
|
||||
Compute bitset segment-1-deleted-by-2:
|
||||
[ 0 0 0 ]
|
||||
|
||||
OR it with previous (nil)
|
||||
still just nil
|
||||
|
||||
|
||||
IndexSnapshot
|
||||
segments [ 0 1 2 ]
|
||||
deleted [ 0-12 nil nil ]
|
||||
|
||||
Now let's search for the term 'cat' in the field 'desc' and let's assume that Document C (both versions) would match it.
|
||||
|
||||
Concurrently:
|
||||
|
||||
- Segment 0
|
||||
- Get Term Dictionary For Field 'desc'
|
||||
- From it get Postings List for term 'cat' EXCLUDING 0-12
|
||||
- raw segment matches [ 0 0 1 ] but excluding [ 0 1 1 ] gives [ 0 0 0 ]
|
||||
- Segment 1
|
||||
- Get Term Dictionary For Field 'desc'
|
||||
- From it get Postings List for term 'cat' excluding nil
|
||||
- [ 0 ]
|
||||
- Segment 2
|
||||
- Get Term Dictionary For Field 'desc'
|
||||
- From it get Postings List for term 'cat' excluding nil
|
||||
- [ 1 ]
|
||||
|
||||
Map local bitsets into global number space (global meaning cross-segment but still unique to this snapshot)
|
||||
|
||||
IndexSnapshot already should have mapping something like:
|
||||
0 - Offset 0
|
||||
1 - Offset 3 (because segment 0 had 3 docs)
|
||||
2 - Offset 4 (becuase segment 1 had 1 doc)
|
||||
|
||||
This maps to search result bitset:
|
||||
|
||||
[ 0 0 0 0 1]
|
||||
|
||||
Caller would call Next() and get doc number 5 (assuming 1 based indexing for now)
|
||||
|
||||
Caller could then ask to get term locations, stored fields, external doc ID for document number 5. Internally in the IndexSnapshot, we can now convert that back, and realize doc number 5 comes from segment 2, 5-4=1 so we're looking for doc number 1 in segment 2. That happens to be C...
|
||||
|
||||
```
|
||||
|
||||
#### Future improvements
|
||||
|
||||
In the future, interfaces to detect these non-serially operating TermFieldReaders could expose their own And() and Or() up to the higher level Conjunction/Disjunction searchers. Doing this alone offers some win, but also means there would be greater burden on the Searcher code rewriting logical expressions for maximum performance.
|
||||
|
||||
Another related topic is that of peak memory usage. With serially operating TermFieldReaders it was necessary to start them all at the same time and operate in unison. However, with these non-serially operating TermFieldReaders we have the option of doing a few at a time, consolidating them, dispoting the intermediaries, and then doing a few more. For very complex queries with many clauses this could reduce peak memory usage.
|
||||
|
||||
|
||||
### Memory Tracking
|
||||
|
||||
All segments must be able to produce two statistics, an estimate of their explicit memory usage, and their actual size on disk (if any). For in-memory segments, disk usage could be zero, and the memory usage represents the entire information content. For mmap-based disk segments, the memory could be as low as the size of tracking structure itself (say just a few pointers).
|
||||
|
||||
This would allow the implementation to throttle or block incoming mutations when a threshold memory usage has (or would be) exceeded.
|
||||
|
||||
### Persistence
|
||||
|
||||
Obviously, we want to support (but maybe not require) asynchronous persistence of segments. My expectation is that segments are initially built in memory. At some point they are persisted to disk. This poses some interesting challenges.
|
||||
|
||||
At runtime, the state of an index (it's IndexSnapshot) is not only the contents of the segments, but also the bitmasks of deleted documents. These bitmasks indirectly encode an ordering in which the segments were added. The reason is that the bitmasks encode which items have been obsoleted by other (subsequent or more future) segments. In the runtime implementation we compute bitmask deltas and then merge them at the same time we bring the new segment in. One idea is that we could take a similar approach on disk. When we persist a segment, we persist the bitmask deltas of segments known to exist at that time, and eventually these can get merged up into a base segment deleted bitmask.
|
||||
|
||||
This also relates to the topic rollback, addressed next...
|
||||
|
||||
|
||||
### Rollback
|
||||
|
||||
One desirable property in the Couchbase ecosystem is the ability to rollback to some previous (though typically not long ago) state. One idea for keeping this property in this design is to protect some of the most recent segments from merging. Then, if necessary, they could be "undone" to reveal previous states of the system. In these scenarios "undone" has to properly undo the deleted bitmasks on the other segments. Again, the current thinking is that rather than "undo" anything, it could be work that was deferred in the first place, thus making it easier to logically undo.
|
||||
|
||||
Another possibly related approach would be to tie this into our existing snapshot mechanism. Perhaps simulating a slow reader (holding onto index snapshots) for some period of time, can be the mechanism to achieve the desired end goal.
|
||||
|
||||
|
||||
### Internal Storage
|
||||
|
||||
The bleve.index API has support for "internal storage". The ability to store information under a separate name space.
|
||||
|
||||
This is not used for high volume storage, so it is tempting to think we could just put a small k/v store alongside the rest of the index. But, the reality is that this storage is used to maintain key information related to the rollback scenario. Because of this, its crucial that ordering and overwriting of key/value pairs correspond with actual segment persistence in the index. Based on this, I believe its important to put the internal key/value pairs inside the segments themselves. But, this also means that they must follow a similar "deleted" bitmask approach to obsolete values in older segments. But, this also seems to substantially increase the complexity of the solution because of the separate name space, it would appear to require its own bitmask. Further keys aren't numeric, which then implies yet another mapping from internal key to number, etc.
|
||||
|
||||
More thought is required here.
|
||||
|
||||
### Merging
|
||||
|
||||
The segmented index approach requires merging to prevent the number of segments from growing too large.
|
||||
|
||||
Recent experience with LSMs has taught us that having the correct merge strategy can make a huge difference in the overall performance of the system. In particular, a simple merge strategy which merges segments too aggressively can lead to high write amplification and unnecessarily rendering cached data useless.
|
||||
|
||||
A few simple principles have been identified.
|
||||
|
||||
- Roughly we merge multiple smaller segments into a single larger one.
|
||||
- The larger a segment gets the less likely we should be to ever merge it.
|
||||
- Segments with large numbers of deleted/obsoleted items are good candidates as the merge will result in a space savings.
|
||||
- Segments with all items deleted/obsoleted can be dropped.
|
||||
|
||||
Merging of a segment should be able to proceed even if that segment is held by an ongoing snapshot, it should only delay the removal of it.
|
56
vendor/github.com/blevesearch/bleve/index/scorch/event.go
generated
vendored
Normal file
56
vendor/github.com/blevesearch/bleve/index/scorch/event.go
generated
vendored
Normal file
@ -0,0 +1,56 @@
|
||||
// Copyright (c) 2018 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import "time"
|
||||
|
||||
// RegistryAsyncErrorCallbacks should be treated as read-only after
|
||||
// process init()'ialization.
|
||||
var RegistryAsyncErrorCallbacks = map[string]func(error){}
|
||||
|
||||
// RegistryEventCallbacks should be treated as read-only after
|
||||
// process init()'ialization.
|
||||
var RegistryEventCallbacks = map[string]func(Event){}
|
||||
|
||||
// Event represents the information provided in an OnEvent() callback.
|
||||
type Event struct {
|
||||
Kind EventKind
|
||||
Scorch *Scorch
|
||||
Duration time.Duration
|
||||
}
|
||||
|
||||
// EventKind represents an event code for OnEvent() callbacks.
|
||||
type EventKind int
|
||||
|
||||
// EventKindCloseStart is fired when a Scorch.Close() has begun.
|
||||
var EventKindCloseStart = EventKind(1)
|
||||
|
||||
// EventKindClose is fired when a scorch index has been fully closed.
|
||||
var EventKindClose = EventKind(2)
|
||||
|
||||
// EventKindMergerProgress is fired when the merger has completed a
|
||||
// round of merge processing.
|
||||
var EventKindMergerProgress = EventKind(3)
|
||||
|
||||
// EventKindPersisterProgress is fired when the persister has completed
|
||||
// a round of persistence processing.
|
||||
var EventKindPersisterProgress = EventKind(4)
|
||||
|
||||
// EventKindBatchIntroductionStart is fired when Batch() is invoked which
|
||||
// introduces a new segment.
|
||||
var EventKindBatchIntroductionStart = EventKind(5)
|
||||
|
||||
// EventKindBatchIntroduction is fired when Batch() completes.
|
||||
var EventKindBatchIntroduction = EventKind(6)
|
443
vendor/github.com/blevesearch/bleve/index/scorch/introducer.go
generated
vendored
Normal file
443
vendor/github.com/blevesearch/bleve/index/scorch/introducer.go
generated
vendored
Normal file
@ -0,0 +1,443 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
)
|
||||
|
||||
type segmentIntroduction struct {
|
||||
id uint64
|
||||
data segment.Segment
|
||||
obsoletes map[uint64]*roaring.Bitmap
|
||||
ids []string
|
||||
internal map[string][]byte
|
||||
|
||||
applied chan error
|
||||
persisted chan error
|
||||
}
|
||||
|
||||
type persistIntroduction struct {
|
||||
persisted map[uint64]segment.Segment
|
||||
applied notificationChan
|
||||
}
|
||||
|
||||
type epochWatcher struct {
|
||||
epoch uint64
|
||||
notifyCh notificationChan
|
||||
}
|
||||
|
||||
type snapshotReversion struct {
|
||||
snapshot *IndexSnapshot
|
||||
applied chan error
|
||||
persisted chan error
|
||||
}
|
||||
|
||||
func (s *Scorch) mainLoop() {
|
||||
var epochWatchers []*epochWatcher
|
||||
OUTER:
|
||||
for {
|
||||
atomic.AddUint64(&s.stats.TotIntroduceLoop, 1)
|
||||
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
break OUTER
|
||||
|
||||
case epochWatcher := <-s.introducerNotifier:
|
||||
epochWatchers = append(epochWatchers, epochWatcher)
|
||||
|
||||
case nextMerge := <-s.merges:
|
||||
s.introduceMerge(nextMerge)
|
||||
|
||||
case next := <-s.introductions:
|
||||
err := s.introduceSegment(next)
|
||||
if err != nil {
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
case persist := <-s.persists:
|
||||
s.introducePersist(persist)
|
||||
|
||||
case revertTo := <-s.revertToSnapshots:
|
||||
err := s.revertToSnapshot(revertTo)
|
||||
if err != nil {
|
||||
continue OUTER
|
||||
}
|
||||
}
|
||||
|
||||
var epochCurr uint64
|
||||
s.rootLock.RLock()
|
||||
if s.root != nil {
|
||||
epochCurr = s.root.epoch
|
||||
}
|
||||
s.rootLock.RUnlock()
|
||||
var epochWatchersNext []*epochWatcher
|
||||
for _, w := range epochWatchers {
|
||||
if w.epoch < epochCurr {
|
||||
close(w.notifyCh)
|
||||
} else {
|
||||
epochWatchersNext = append(epochWatchersNext, w)
|
||||
}
|
||||
}
|
||||
epochWatchers = epochWatchersNext
|
||||
}
|
||||
|
||||
s.asyncTasks.Done()
|
||||
}
|
||||
|
||||
func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
|
||||
atomic.AddUint64(&s.stats.TotIntroduceSegmentBeg, 1)
|
||||
defer atomic.AddUint64(&s.stats.TotIntroduceSegmentEnd, 1)
|
||||
|
||||
s.rootLock.RLock()
|
||||
root := s.root
|
||||
s.rootLock.RUnlock()
|
||||
|
||||
nsegs := len(root.segment)
|
||||
|
||||
// prepare new index snapshot
|
||||
newSnapshot := &IndexSnapshot{
|
||||
parent: s,
|
||||
segment: make([]*SegmentSnapshot, 0, nsegs+1),
|
||||
offsets: make([]uint64, 0, nsegs+1),
|
||||
internal: make(map[string][]byte, len(root.internal)),
|
||||
refs: 1,
|
||||
creator: "introduceSegment",
|
||||
}
|
||||
|
||||
// iterate through current segments
|
||||
var running uint64
|
||||
for i := range root.segment {
|
||||
// see if optimistic work included this segment
|
||||
delta, ok := next.obsoletes[root.segment[i].id]
|
||||
if !ok {
|
||||
var err error
|
||||
delta, err = root.segment[i].segment.DocNumbers(next.ids)
|
||||
if err != nil {
|
||||
next.applied <- fmt.Errorf("error computing doc numbers: %v", err)
|
||||
close(next.applied)
|
||||
_ = newSnapshot.DecRef()
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
newss := &SegmentSnapshot{
|
||||
id: root.segment[i].id,
|
||||
segment: root.segment[i].segment,
|
||||
cachedDocs: root.segment[i].cachedDocs,
|
||||
creator: root.segment[i].creator,
|
||||
}
|
||||
|
||||
// apply new obsoletions
|
||||
if root.segment[i].deleted == nil {
|
||||
newss.deleted = delta
|
||||
} else {
|
||||
newss.deleted = roaring.Or(root.segment[i].deleted, delta)
|
||||
}
|
||||
if newss.deleted.IsEmpty() {
|
||||
newss.deleted = nil
|
||||
}
|
||||
|
||||
// check for live size before copying
|
||||
if newss.LiveSize() > 0 {
|
||||
newSnapshot.segment = append(newSnapshot.segment, newss)
|
||||
root.segment[i].segment.AddRef()
|
||||
newSnapshot.offsets = append(newSnapshot.offsets, running)
|
||||
running += newss.segment.Count()
|
||||
}
|
||||
}
|
||||
|
||||
// append new segment, if any, to end of the new index snapshot
|
||||
if next.data != nil {
|
||||
newSegmentSnapshot := &SegmentSnapshot{
|
||||
id: next.id,
|
||||
segment: next.data, // take ownership of next.data's ref-count
|
||||
cachedDocs: &cachedDocs{cache: nil},
|
||||
creator: "introduceSegment",
|
||||
}
|
||||
newSnapshot.segment = append(newSnapshot.segment, newSegmentSnapshot)
|
||||
newSnapshot.offsets = append(newSnapshot.offsets, running)
|
||||
|
||||
// increment numItemsIntroduced which tracks the number of items
|
||||
// queued for persistence.
|
||||
atomic.AddUint64(&s.stats.TotIntroducedItems, newSegmentSnapshot.Count())
|
||||
atomic.AddUint64(&s.stats.TotIntroducedSegmentsBatch, 1)
|
||||
}
|
||||
// copy old values
|
||||
for key, oldVal := range root.internal {
|
||||
newSnapshot.internal[key] = oldVal
|
||||
}
|
||||
// set new values and apply deletes
|
||||
for key, newVal := range next.internal {
|
||||
if newVal != nil {
|
||||
newSnapshot.internal[key] = newVal
|
||||
} else {
|
||||
delete(newSnapshot.internal, key)
|
||||
}
|
||||
}
|
||||
|
||||
newSnapshot.updateSize()
|
||||
s.rootLock.Lock()
|
||||
if next.persisted != nil {
|
||||
s.rootPersisted = append(s.rootPersisted, next.persisted)
|
||||
}
|
||||
// swap in new index snapshot
|
||||
newSnapshot.epoch = s.nextSnapshotEpoch
|
||||
s.nextSnapshotEpoch++
|
||||
rootPrev := s.root
|
||||
s.root = newSnapshot
|
||||
// release lock
|
||||
s.rootLock.Unlock()
|
||||
|
||||
if rootPrev != nil {
|
||||
_ = rootPrev.DecRef()
|
||||
}
|
||||
|
||||
close(next.applied)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Scorch) introducePersist(persist *persistIntroduction) {
|
||||
atomic.AddUint64(&s.stats.TotIntroducePersistBeg, 1)
|
||||
defer atomic.AddUint64(&s.stats.TotIntroducePersistEnd, 1)
|
||||
|
||||
s.rootLock.Lock()
|
||||
root := s.root
|
||||
nextSnapshotEpoch := s.nextSnapshotEpoch
|
||||
s.nextSnapshotEpoch++
|
||||
s.rootLock.Unlock()
|
||||
|
||||
newIndexSnapshot := &IndexSnapshot{
|
||||
parent: s,
|
||||
epoch: nextSnapshotEpoch,
|
||||
segment: make([]*SegmentSnapshot, len(root.segment)),
|
||||
offsets: make([]uint64, len(root.offsets)),
|
||||
internal: make(map[string][]byte, len(root.internal)),
|
||||
refs: 1,
|
||||
creator: "introducePersist",
|
||||
}
|
||||
|
||||
for i, segmentSnapshot := range root.segment {
|
||||
// see if this segment has been replaced
|
||||
if replacement, ok := persist.persisted[segmentSnapshot.id]; ok {
|
||||
newSegmentSnapshot := &SegmentSnapshot{
|
||||
id: segmentSnapshot.id,
|
||||
segment: replacement,
|
||||
deleted: segmentSnapshot.deleted,
|
||||
cachedDocs: segmentSnapshot.cachedDocs,
|
||||
creator: "introducePersist",
|
||||
}
|
||||
newIndexSnapshot.segment[i] = newSegmentSnapshot
|
||||
delete(persist.persisted, segmentSnapshot.id)
|
||||
|
||||
// update items persisted incase of a new segment snapshot
|
||||
atomic.AddUint64(&s.stats.TotPersistedItems, newSegmentSnapshot.Count())
|
||||
atomic.AddUint64(&s.stats.TotPersistedSegments, 1)
|
||||
} else {
|
||||
newIndexSnapshot.segment[i] = root.segment[i]
|
||||
newIndexSnapshot.segment[i].segment.AddRef()
|
||||
}
|
||||
newIndexSnapshot.offsets[i] = root.offsets[i]
|
||||
}
|
||||
|
||||
for k, v := range root.internal {
|
||||
newIndexSnapshot.internal[k] = v
|
||||
}
|
||||
|
||||
newIndexSnapshot.updateSize()
|
||||
s.rootLock.Lock()
|
||||
rootPrev := s.root
|
||||
s.root = newIndexSnapshot
|
||||
s.rootLock.Unlock()
|
||||
|
||||
if rootPrev != nil {
|
||||
_ = rootPrev.DecRef()
|
||||
}
|
||||
|
||||
close(persist.applied)
|
||||
}
|
||||
|
||||
func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
|
||||
atomic.AddUint64(&s.stats.TotIntroduceMergeBeg, 1)
|
||||
defer atomic.AddUint64(&s.stats.TotIntroduceMergeEnd, 1)
|
||||
|
||||
s.rootLock.RLock()
|
||||
root := s.root
|
||||
s.rootLock.RUnlock()
|
||||
|
||||
newSnapshot := &IndexSnapshot{
|
||||
parent: s,
|
||||
internal: root.internal,
|
||||
refs: 1,
|
||||
creator: "introduceMerge",
|
||||
}
|
||||
|
||||
// iterate through current segments
|
||||
newSegmentDeleted := roaring.NewBitmap()
|
||||
var running uint64
|
||||
for i := range root.segment {
|
||||
segmentID := root.segment[i].id
|
||||
if segSnapAtMerge, ok := nextMerge.old[segmentID]; ok {
|
||||
// this segment is going away, see if anything else was deleted since we started the merge
|
||||
if segSnapAtMerge != nil && root.segment[i].deleted != nil {
|
||||
// assume all these deletes are new
|
||||
deletedSince := root.segment[i].deleted
|
||||
// if we already knew about some of them, remove
|
||||
if segSnapAtMerge.deleted != nil {
|
||||
deletedSince = roaring.AndNot(root.segment[i].deleted, segSnapAtMerge.deleted)
|
||||
}
|
||||
deletedSinceItr := deletedSince.Iterator()
|
||||
for deletedSinceItr.HasNext() {
|
||||
oldDocNum := deletedSinceItr.Next()
|
||||
newDocNum := nextMerge.oldNewDocNums[segmentID][oldDocNum]
|
||||
newSegmentDeleted.Add(uint32(newDocNum))
|
||||
}
|
||||
}
|
||||
// clean up the old segment map to figure out the
|
||||
// obsolete segments wrt root in meantime, whatever
|
||||
// segments left behind in old map after processing
|
||||
// the root segments would be the obsolete segment set
|
||||
delete(nextMerge.old, segmentID)
|
||||
} else if root.segment[i].LiveSize() > 0 {
|
||||
// this segment is staying
|
||||
newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
|
||||
id: root.segment[i].id,
|
||||
segment: root.segment[i].segment,
|
||||
deleted: root.segment[i].deleted,
|
||||
cachedDocs: root.segment[i].cachedDocs,
|
||||
creator: root.segment[i].creator,
|
||||
})
|
||||
root.segment[i].segment.AddRef()
|
||||
newSnapshot.offsets = append(newSnapshot.offsets, running)
|
||||
running += root.segment[i].segment.Count()
|
||||
}
|
||||
}
|
||||
|
||||
// before the newMerge introduction, need to clean the newly
|
||||
// merged segment wrt the current root segments, hence
|
||||
// applying the obsolete segment contents to newly merged segment
|
||||
for segID, ss := range nextMerge.old {
|
||||
obsoleted := ss.DocNumbersLive()
|
||||
if obsoleted != nil {
|
||||
obsoletedIter := obsoleted.Iterator()
|
||||
for obsoletedIter.HasNext() {
|
||||
oldDocNum := obsoletedIter.Next()
|
||||
newDocNum := nextMerge.oldNewDocNums[segID][oldDocNum]
|
||||
newSegmentDeleted.Add(uint32(newDocNum))
|
||||
}
|
||||
}
|
||||
}
|
||||
// In case where all the docs in the newly merged segment getting
|
||||
// deleted by the time we reach here, can skip the introduction.
|
||||
if nextMerge.new != nil &&
|
||||
nextMerge.new.Count() > newSegmentDeleted.GetCardinality() {
|
||||
// put new segment at end
|
||||
newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
|
||||
id: nextMerge.id,
|
||||
segment: nextMerge.new, // take ownership for nextMerge.new's ref-count
|
||||
deleted: newSegmentDeleted,
|
||||
cachedDocs: &cachedDocs{cache: nil},
|
||||
creator: "introduceMerge",
|
||||
})
|
||||
newSnapshot.offsets = append(newSnapshot.offsets, running)
|
||||
atomic.AddUint64(&s.stats.TotIntroducedSegmentsMerge, 1)
|
||||
}
|
||||
|
||||
newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
|
||||
|
||||
newSnapshot.updateSize()
|
||||
s.rootLock.Lock()
|
||||
// swap in new index snapshot
|
||||
newSnapshot.epoch = s.nextSnapshotEpoch
|
||||
s.nextSnapshotEpoch++
|
||||
rootPrev := s.root
|
||||
s.root = newSnapshot
|
||||
// release lock
|
||||
s.rootLock.Unlock()
|
||||
|
||||
if rootPrev != nil {
|
||||
_ = rootPrev.DecRef()
|
||||
}
|
||||
|
||||
// notify requester that we incorporated this
|
||||
nextMerge.notify <- newSnapshot
|
||||
close(nextMerge.notify)
|
||||
}
|
||||
|
||||
func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
|
||||
atomic.AddUint64(&s.stats.TotIntroduceRevertBeg, 1)
|
||||
defer atomic.AddUint64(&s.stats.TotIntroduceRevertEnd, 1)
|
||||
|
||||
if revertTo.snapshot == nil {
|
||||
err := fmt.Errorf("Cannot revert to a nil snapshot")
|
||||
revertTo.applied <- err
|
||||
return err
|
||||
}
|
||||
|
||||
// acquire lock
|
||||
s.rootLock.Lock()
|
||||
|
||||
// prepare a new index snapshot, based on next snapshot
|
||||
newSnapshot := &IndexSnapshot{
|
||||
parent: s,
|
||||
segment: make([]*SegmentSnapshot, len(revertTo.snapshot.segment)),
|
||||
offsets: revertTo.snapshot.offsets,
|
||||
internal: revertTo.snapshot.internal,
|
||||
epoch: s.nextSnapshotEpoch,
|
||||
refs: 1,
|
||||
creator: "revertToSnapshot",
|
||||
}
|
||||
s.nextSnapshotEpoch++
|
||||
|
||||
// iterate through segments
|
||||
for i, segmentSnapshot := range revertTo.snapshot.segment {
|
||||
newSnapshot.segment[i] = &SegmentSnapshot{
|
||||
id: segmentSnapshot.id,
|
||||
segment: segmentSnapshot.segment,
|
||||
deleted: segmentSnapshot.deleted,
|
||||
cachedDocs: segmentSnapshot.cachedDocs,
|
||||
creator: segmentSnapshot.creator,
|
||||
}
|
||||
newSnapshot.segment[i].segment.AddRef()
|
||||
|
||||
// remove segment from ineligibleForRemoval map
|
||||
filename := zapFileName(segmentSnapshot.id)
|
||||
delete(s.ineligibleForRemoval, filename)
|
||||
}
|
||||
|
||||
if revertTo.persisted != nil {
|
||||
s.rootPersisted = append(s.rootPersisted, revertTo.persisted)
|
||||
}
|
||||
|
||||
newSnapshot.updateSize()
|
||||
// swap in new snapshot
|
||||
rootPrev := s.root
|
||||
s.root = newSnapshot
|
||||
// release lock
|
||||
s.rootLock.Unlock()
|
||||
|
||||
if rootPrev != nil {
|
||||
_ = rootPrev.DecRef()
|
||||
}
|
||||
|
||||
close(revertTo.applied)
|
||||
|
||||
return nil
|
||||
}
|
342
vendor/github.com/blevesearch/bleve/index/scorch/merge.go
generated
vendored
Normal file
342
vendor/github.com/blevesearch/bleve/index/scorch/merge.go
generated
vendored
Normal file
@ -0,0 +1,342 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index/scorch/mergeplan"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment/zap"
|
||||
)
|
||||
|
||||
func (s *Scorch) mergerLoop() {
|
||||
var lastEpochMergePlanned uint64
|
||||
mergePlannerOptions, err := s.parseMergePlannerOptions()
|
||||
if err != nil {
|
||||
s.fireAsyncError(fmt.Errorf("mergePlannerOption json parsing err: %v", err))
|
||||
s.asyncTasks.Done()
|
||||
return
|
||||
}
|
||||
|
||||
OUTER:
|
||||
for {
|
||||
atomic.AddUint64(&s.stats.TotFileMergeLoopBeg, 1)
|
||||
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
break OUTER
|
||||
|
||||
default:
|
||||
// check to see if there is a new snapshot to persist
|
||||
s.rootLock.RLock()
|
||||
ourSnapshot := s.root
|
||||
ourSnapshot.AddRef()
|
||||
atomic.StoreUint64(&s.iStats.mergeSnapshotSize, uint64(ourSnapshot.Size()))
|
||||
atomic.StoreUint64(&s.iStats.mergeEpoch, ourSnapshot.epoch)
|
||||
s.rootLock.RUnlock()
|
||||
|
||||
if ourSnapshot.epoch != lastEpochMergePlanned {
|
||||
startTime := time.Now()
|
||||
|
||||
// lets get started
|
||||
err := s.planMergeAtSnapshot(ourSnapshot, mergePlannerOptions)
|
||||
if err != nil {
|
||||
atomic.StoreUint64(&s.iStats.mergeEpoch, 0)
|
||||
if err == ErrClosed {
|
||||
// index has been closed
|
||||
_ = ourSnapshot.DecRef()
|
||||
break OUTER
|
||||
}
|
||||
s.fireAsyncError(fmt.Errorf("merging err: %v", err))
|
||||
_ = ourSnapshot.DecRef()
|
||||
atomic.AddUint64(&s.stats.TotFileMergeLoopErr, 1)
|
||||
continue OUTER
|
||||
}
|
||||
lastEpochMergePlanned = ourSnapshot.epoch
|
||||
|
||||
s.fireEvent(EventKindMergerProgress, time.Since(startTime))
|
||||
}
|
||||
_ = ourSnapshot.DecRef()
|
||||
|
||||
// tell the persister we're waiting for changes
|
||||
// first make a epochWatcher chan
|
||||
ew := &epochWatcher{
|
||||
epoch: lastEpochMergePlanned,
|
||||
notifyCh: make(notificationChan, 1),
|
||||
}
|
||||
|
||||
// give it to the persister
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
break OUTER
|
||||
case s.persisterNotifier <- ew:
|
||||
}
|
||||
|
||||
// now wait for persister (but also detect close)
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
break OUTER
|
||||
case <-ew.notifyCh:
|
||||
}
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergeLoopEnd, 1)
|
||||
}
|
||||
|
||||
s.asyncTasks.Done()
|
||||
}
|
||||
|
||||
func (s *Scorch) parseMergePlannerOptions() (*mergeplan.MergePlanOptions,
|
||||
error) {
|
||||
mergePlannerOptions := mergeplan.DefaultMergePlanOptions
|
||||
if v, ok := s.config["scorchMergePlanOptions"]; ok {
|
||||
b, err := json.Marshal(v)
|
||||
if err != nil {
|
||||
return &mergePlannerOptions, err
|
||||
}
|
||||
|
||||
err = json.Unmarshal(b, &mergePlannerOptions)
|
||||
if err != nil {
|
||||
return &mergePlannerOptions, err
|
||||
}
|
||||
|
||||
err = mergeplan.ValidateMergePlannerOptions(&mergePlannerOptions)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return &mergePlannerOptions, nil
|
||||
}
|
||||
|
||||
func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
|
||||
options *mergeplan.MergePlanOptions) error {
|
||||
// build list of zap segments in this snapshot
|
||||
var onlyZapSnapshots []mergeplan.Segment
|
||||
for _, segmentSnapshot := range ourSnapshot.segment {
|
||||
if _, ok := segmentSnapshot.segment.(*zap.Segment); ok {
|
||||
onlyZapSnapshots = append(onlyZapSnapshots, segmentSnapshot)
|
||||
}
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlan, 1)
|
||||
|
||||
// give this list to the planner
|
||||
resultMergePlan, err := mergeplan.Plan(onlyZapSnapshots, options)
|
||||
if err != nil {
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanErr, 1)
|
||||
return fmt.Errorf("merge planning err: %v", err)
|
||||
}
|
||||
if resultMergePlan == nil {
|
||||
// nothing to do
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1)
|
||||
return nil
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1)
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks)))
|
||||
|
||||
// process tasks in serial for now
|
||||
var notifications []chan *IndexSnapshot
|
||||
for _, task := range resultMergePlan.Tasks {
|
||||
if len(task.Segments) == 0 {
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1)
|
||||
continue
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegments, uint64(len(task.Segments)))
|
||||
|
||||
oldMap := make(map[uint64]*SegmentSnapshot)
|
||||
newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
|
||||
segmentsToMerge := make([]*zap.Segment, 0, len(task.Segments))
|
||||
docsToDrop := make([]*roaring.Bitmap, 0, len(task.Segments))
|
||||
|
||||
for _, planSegment := range task.Segments {
|
||||
if segSnapshot, ok := planSegment.(*SegmentSnapshot); ok {
|
||||
oldMap[segSnapshot.id] = segSnapshot
|
||||
if zapSeg, ok := segSnapshot.segment.(*zap.Segment); ok {
|
||||
if segSnapshot.LiveSize() == 0 {
|
||||
atomic.AddUint64(&s.stats.TotFileMergeSegmentsEmpty, 1)
|
||||
oldMap[segSnapshot.id] = nil
|
||||
} else {
|
||||
segmentsToMerge = append(segmentsToMerge, zapSeg)
|
||||
docsToDrop = append(docsToDrop, segSnapshot.deleted)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var oldNewDocNums map[uint64][]uint64
|
||||
var segment segment.Segment
|
||||
if len(segmentsToMerge) > 0 {
|
||||
filename := zapFileName(newSegmentID)
|
||||
s.markIneligibleForRemoval(filename)
|
||||
path := s.path + string(os.PathSeparator) + filename
|
||||
|
||||
fileMergeZapStartTime := time.Now()
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
|
||||
newDocNums, nBytes, err := zap.Merge(segmentsToMerge, docsToDrop, path, DefaultChunkFactor)
|
||||
atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
|
||||
atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, nBytes)
|
||||
|
||||
fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime))
|
||||
atomic.AddUint64(&s.stats.TotFileMergeZapTime, fileMergeZapTime)
|
||||
if atomic.LoadUint64(&s.stats.MaxFileMergeZapTime) < fileMergeZapTime {
|
||||
atomic.StoreUint64(&s.stats.MaxFileMergeZapTime, fileMergeZapTime)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
s.unmarkIneligibleForRemoval(filename)
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
|
||||
return fmt.Errorf("merging failed: %v", err)
|
||||
}
|
||||
|
||||
segment, err = zap.Open(path)
|
||||
if err != nil {
|
||||
s.unmarkIneligibleForRemoval(filename)
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
|
||||
return err
|
||||
}
|
||||
oldNewDocNums = make(map[uint64][]uint64)
|
||||
for i, segNewDocNums := range newDocNums {
|
||||
oldNewDocNums[task.Segments[i].Id()] = segNewDocNums
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergeSegments, uint64(len(segmentsToMerge)))
|
||||
}
|
||||
|
||||
sm := &segmentMerge{
|
||||
id: newSegmentID,
|
||||
old: oldMap,
|
||||
oldNewDocNums: oldNewDocNums,
|
||||
new: segment,
|
||||
notify: make(chan *IndexSnapshot, 1),
|
||||
}
|
||||
notifications = append(notifications, sm.notify)
|
||||
|
||||
// give it to the introducer
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
_ = segment.Close()
|
||||
return ErrClosed
|
||||
case s.merges <- sm:
|
||||
atomic.AddUint64(&s.stats.TotFileMergeIntroductions, 1)
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksDone, 1)
|
||||
}
|
||||
|
||||
for _, notification := range notifications {
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
return ErrClosed
|
||||
case newSnapshot := <-notification:
|
||||
atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1)
|
||||
if newSnapshot != nil {
|
||||
_ = newSnapshot.DecRef()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type segmentMerge struct {
|
||||
id uint64
|
||||
old map[uint64]*SegmentSnapshot
|
||||
oldNewDocNums map[uint64][]uint64
|
||||
new segment.Segment
|
||||
notify chan *IndexSnapshot
|
||||
}
|
||||
|
||||
// perform a merging of the given SegmentBase instances into a new,
|
||||
// persisted segment, and synchronously introduce that new segment
|
||||
// into the root
|
||||
func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
|
||||
sbs []*zap.SegmentBase, sbsDrops []*roaring.Bitmap, sbsIndexes []int,
|
||||
chunkFactor uint32) (*IndexSnapshot, uint64, error) {
|
||||
atomic.AddUint64(&s.stats.TotMemMergeBeg, 1)
|
||||
|
||||
memMergeZapStartTime := time.Now()
|
||||
|
||||
atomic.AddUint64(&s.stats.TotMemMergeZapBeg, 1)
|
||||
|
||||
newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
|
||||
filename := zapFileName(newSegmentID)
|
||||
path := s.path + string(os.PathSeparator) + filename
|
||||
|
||||
newDocNums, _, err :=
|
||||
zap.MergeSegmentBases(sbs, sbsDrops, path, chunkFactor)
|
||||
|
||||
atomic.AddUint64(&s.stats.TotMemMergeZapEnd, 1)
|
||||
|
||||
memMergeZapTime := uint64(time.Since(memMergeZapStartTime))
|
||||
atomic.AddUint64(&s.stats.TotMemMergeZapTime, memMergeZapTime)
|
||||
if atomic.LoadUint64(&s.stats.MaxMemMergeZapTime) < memMergeZapTime {
|
||||
atomic.StoreUint64(&s.stats.MaxMemMergeZapTime, memMergeZapTime)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
segment, err := zap.Open(path)
|
||||
if err != nil {
|
||||
atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
// update persisted stats
|
||||
atomic.AddUint64(&s.stats.TotPersistedItems, segment.Count())
|
||||
atomic.AddUint64(&s.stats.TotPersistedSegments, 1)
|
||||
|
||||
sm := &segmentMerge{
|
||||
id: newSegmentID,
|
||||
old: make(map[uint64]*SegmentSnapshot),
|
||||
oldNewDocNums: make(map[uint64][]uint64),
|
||||
new: segment,
|
||||
notify: make(chan *IndexSnapshot, 1),
|
||||
}
|
||||
|
||||
for i, idx := range sbsIndexes {
|
||||
ss := snapshot.segment[idx]
|
||||
sm.old[ss.id] = ss
|
||||
sm.oldNewDocNums[ss.id] = newDocNums[i]
|
||||
}
|
||||
|
||||
select { // send to introducer
|
||||
case <-s.closeCh:
|
||||
_ = segment.DecRef()
|
||||
return nil, 0, ErrClosed
|
||||
case s.merges <- sm:
|
||||
}
|
||||
|
||||
select { // wait for introduction to complete
|
||||
case <-s.closeCh:
|
||||
return nil, 0, ErrClosed
|
||||
case newSnapshot := <-sm.notify:
|
||||
atomic.AddUint64(&s.stats.TotMemMergeSegments, uint64(len(sbs)))
|
||||
atomic.AddUint64(&s.stats.TotMemMergeDone, 1)
|
||||
return newSnapshot, newSegmentID, nil
|
||||
}
|
||||
}
|
386
vendor/github.com/blevesearch/bleve/index/scorch/mergeplan/merge_plan.go
generated
vendored
Normal file
386
vendor/github.com/blevesearch/bleve/index/scorch/mergeplan/merge_plan.go
generated
vendored
Normal file
@ -0,0 +1,386 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package mergeplan provides a segment merge planning approach that's
|
||||
// inspired by Lucene's TieredMergePolicy.java and descriptions like
|
||||
// http://blog.mikemccandless.com/2011/02/visualizing-lucenes-segment-merges.html
|
||||
package mergeplan
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// A Segment represents the information that the planner needs to
|
||||
// calculate segment merging.
|
||||
type Segment interface {
|
||||
// Unique id of the segment -- used for sorting.
|
||||
Id() uint64
|
||||
|
||||
// Full segment size (the size before any logical deletions).
|
||||
FullSize() int64
|
||||
|
||||
// Size of the live data of the segment; i.e., FullSize() minus
|
||||
// any logical deletions.
|
||||
LiveSize() int64
|
||||
}
|
||||
|
||||
// Plan() will functionally compute a merge plan. A segment will be
|
||||
// assigned to at most a single MergeTask in the output MergePlan. A
|
||||
// segment not assigned to any MergeTask means the segment should
|
||||
// remain unmerged.
|
||||
func Plan(segments []Segment, o *MergePlanOptions) (*MergePlan, error) {
|
||||
return plan(segments, o)
|
||||
}
|
||||
|
||||
// A MergePlan is the result of the Plan() API.
|
||||
//
|
||||
// The planner doesn’t know how or whether these tasks are executed --
|
||||
// that’s up to a separate merge execution system, which might execute
|
||||
// these tasks concurrently or not, and which might execute all the
|
||||
// tasks or not.
|
||||
type MergePlan struct {
|
||||
Tasks []*MergeTask
|
||||
}
|
||||
|
||||
// A MergeTask represents several segments that should be merged
|
||||
// together into a single segment.
|
||||
type MergeTask struct {
|
||||
Segments []Segment
|
||||
}
|
||||
|
||||
// The MergePlanOptions is designed to be reusable between planning calls.
|
||||
type MergePlanOptions struct {
|
||||
// Max # segments per logarithmic tier, or max width of any
|
||||
// logarithmic “step”. Smaller values mean more merging but fewer
|
||||
// segments. Should be >= SegmentsPerMergeTask, else you'll have
|
||||
// too much merging.
|
||||
MaxSegmentsPerTier int
|
||||
|
||||
// Max size of any segment produced after merging. Actual
|
||||
// merging, however, may produce segment sizes different than the
|
||||
// planner’s predicted sizes.
|
||||
MaxSegmentSize int64
|
||||
|
||||
// The growth factor for each tier in a staircase of idealized
|
||||
// segments computed by CalcBudget().
|
||||
TierGrowth float64
|
||||
|
||||
// The number of segments in any resulting MergeTask. e.g.,
|
||||
// len(result.Tasks[ * ].Segments) == SegmentsPerMergeTask.
|
||||
SegmentsPerMergeTask int
|
||||
|
||||
// Small segments are rounded up to this size, i.e., treated as
|
||||
// equal (floor) size for consideration. This is to prevent lots
|
||||
// of tiny segments from resulting in a long tail in the index.
|
||||
FloorSegmentSize int64
|
||||
|
||||
// Controls how aggressively merges that reclaim more deletions
|
||||
// are favored. Higher values will more aggressively target
|
||||
// merges that reclaim deletions, but be careful not to go so high
|
||||
// that way too much merging takes place; a value of 3.0 is
|
||||
// probably nearly too high. A value of 0.0 means deletions don't
|
||||
// impact merge selection.
|
||||
ReclaimDeletesWeight float64
|
||||
|
||||
// Optional, defaults to mergeplan.CalcBudget().
|
||||
CalcBudget func(totalSize int64, firstTierSize int64,
|
||||
o *MergePlanOptions) (budgetNumSegments int)
|
||||
|
||||
// Optional, defaults to mergeplan.ScoreSegments().
|
||||
ScoreSegments func(segments []Segment, o *MergePlanOptions) float64
|
||||
|
||||
// Optional.
|
||||
Logger func(string)
|
||||
}
|
||||
|
||||
// Returns the higher of the input or FloorSegmentSize.
|
||||
func (o *MergePlanOptions) RaiseToFloorSegmentSize(s int64) int64 {
|
||||
if s > o.FloorSegmentSize {
|
||||
return s
|
||||
}
|
||||
return o.FloorSegmentSize
|
||||
}
|
||||
|
||||
// MaxSegmentSizeLimit represents the maximum size of a segment,
|
||||
// this limit comes with hit-1 optimisation/max encoding limit uint31.
|
||||
const MaxSegmentSizeLimit = 1<<31 - 1
|
||||
|
||||
// ErrMaxSegmentSizeTooLarge is returned when the size of the segment
|
||||
// exceeds the MaxSegmentSizeLimit
|
||||
var ErrMaxSegmentSizeTooLarge = errors.New("MaxSegmentSize exceeds the size limit")
|
||||
|
||||
// DefaultMergePlanOptions suggests the default options.
|
||||
var DefaultMergePlanOptions = MergePlanOptions{
|
||||
MaxSegmentsPerTier: 10,
|
||||
MaxSegmentSize: 5000000,
|
||||
TierGrowth: 10.0,
|
||||
SegmentsPerMergeTask: 10,
|
||||
FloorSegmentSize: 2000,
|
||||
ReclaimDeletesWeight: 2.0,
|
||||
}
|
||||
|
||||
// -------------------------------------------
|
||||
|
||||
func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
|
||||
if len(segmentsIn) <= 1 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if o == nil {
|
||||
o = &DefaultMergePlanOptions
|
||||
}
|
||||
|
||||
segments := append([]Segment(nil), segmentsIn...) // Copy.
|
||||
|
||||
sort.Sort(byLiveSizeDescending(segments))
|
||||
|
||||
var minLiveSize int64 = math.MaxInt64
|
||||
|
||||
var eligibles []Segment
|
||||
var eligiblesLiveSize int64
|
||||
|
||||
for _, segment := range segments {
|
||||
if minLiveSize > segment.LiveSize() {
|
||||
minLiveSize = segment.LiveSize()
|
||||
}
|
||||
|
||||
// Only small-enough segments are eligible.
|
||||
if segment.LiveSize() < o.MaxSegmentSize/2 {
|
||||
eligibles = append(eligibles, segment)
|
||||
eligiblesLiveSize += segment.LiveSize()
|
||||
}
|
||||
}
|
||||
|
||||
minLiveSize = o.RaiseToFloorSegmentSize(minLiveSize)
|
||||
|
||||
calcBudget := o.CalcBudget
|
||||
if calcBudget == nil {
|
||||
calcBudget = CalcBudget
|
||||
}
|
||||
|
||||
budgetNumSegments := CalcBudget(eligiblesLiveSize, minLiveSize, o)
|
||||
|
||||
scoreSegments := o.ScoreSegments
|
||||
if scoreSegments == nil {
|
||||
scoreSegments = ScoreSegments
|
||||
}
|
||||
|
||||
rv := &MergePlan{}
|
||||
|
||||
var empties []Segment
|
||||
for _, eligible := range eligibles {
|
||||
if eligible.LiveSize() <= 0 {
|
||||
empties = append(empties, eligible)
|
||||
}
|
||||
}
|
||||
if len(empties) > 0 {
|
||||
rv.Tasks = append(rv.Tasks, &MergeTask{Segments: empties})
|
||||
eligibles = removeSegments(eligibles, empties)
|
||||
}
|
||||
|
||||
// While we’re over budget, keep looping, which might produce
|
||||
// another MergeTask.
|
||||
for len(eligibles) > 0 && (len(eligibles)+len(rv.Tasks)) > budgetNumSegments {
|
||||
// Track a current best roster as we examine and score
|
||||
// potential rosters of merges.
|
||||
var bestRoster []Segment
|
||||
var bestRosterScore float64 // Lower score is better.
|
||||
|
||||
for startIdx := 0; startIdx < len(eligibles); startIdx++ {
|
||||
var roster []Segment
|
||||
var rosterLiveSize int64
|
||||
|
||||
for idx := startIdx; idx < len(eligibles) && len(roster) < o.SegmentsPerMergeTask; idx++ {
|
||||
eligible := eligibles[idx]
|
||||
|
||||
if rosterLiveSize+eligible.LiveSize() < o.MaxSegmentSize {
|
||||
roster = append(roster, eligible)
|
||||
rosterLiveSize += eligible.LiveSize()
|
||||
}
|
||||
}
|
||||
|
||||
if len(roster) > 0 {
|
||||
rosterScore := scoreSegments(roster, o)
|
||||
|
||||
if len(bestRoster) <= 0 || rosterScore < bestRosterScore {
|
||||
bestRoster = roster
|
||||
bestRosterScore = rosterScore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(bestRoster) <= 0 {
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
rv.Tasks = append(rv.Tasks, &MergeTask{Segments: bestRoster})
|
||||
|
||||
eligibles = removeSegments(eligibles, bestRoster)
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
// Compute the number of segments that would be needed to cover the
|
||||
// totalSize, by climbing up a logarithmically growing staircase of
|
||||
// segment tiers.
|
||||
func CalcBudget(totalSize int64, firstTierSize int64, o *MergePlanOptions) (
|
||||
budgetNumSegments int) {
|
||||
tierSize := firstTierSize
|
||||
if tierSize < 1 {
|
||||
tierSize = 1
|
||||
}
|
||||
|
||||
maxSegmentsPerTier := o.MaxSegmentsPerTier
|
||||
if maxSegmentsPerTier < 1 {
|
||||
maxSegmentsPerTier = 1
|
||||
}
|
||||
|
||||
tierGrowth := o.TierGrowth
|
||||
if tierGrowth < 1.0 {
|
||||
tierGrowth = 1.0
|
||||
}
|
||||
|
||||
for totalSize > 0 {
|
||||
segmentsInTier := float64(totalSize) / float64(tierSize)
|
||||
if segmentsInTier < float64(maxSegmentsPerTier) {
|
||||
budgetNumSegments += int(math.Ceil(segmentsInTier))
|
||||
break
|
||||
}
|
||||
|
||||
budgetNumSegments += maxSegmentsPerTier
|
||||
totalSize -= int64(maxSegmentsPerTier) * tierSize
|
||||
tierSize = int64(float64(tierSize) * tierGrowth)
|
||||
}
|
||||
|
||||
return budgetNumSegments
|
||||
}
|
||||
|
||||
// Of note, removeSegments() keeps the ordering of the results stable.
|
||||
func removeSegments(segments []Segment, toRemove []Segment) []Segment {
|
||||
rv := make([]Segment, 0, len(segments)-len(toRemove))
|
||||
OUTER:
|
||||
for _, segment := range segments {
|
||||
for _, r := range toRemove {
|
||||
if segment == r {
|
||||
continue OUTER
|
||||
}
|
||||
}
|
||||
rv = append(rv, segment)
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
// Smaller result score is better.
|
||||
func ScoreSegments(segments []Segment, o *MergePlanOptions) float64 {
|
||||
var totBeforeSize int64
|
||||
var totAfterSize int64
|
||||
var totAfterSizeFloored int64
|
||||
|
||||
for _, segment := range segments {
|
||||
totBeforeSize += segment.FullSize()
|
||||
totAfterSize += segment.LiveSize()
|
||||
totAfterSizeFloored += o.RaiseToFloorSegmentSize(segment.LiveSize())
|
||||
}
|
||||
|
||||
if totBeforeSize <= 0 || totAfterSize <= 0 || totAfterSizeFloored <= 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Roughly guess the "balance" of the segments -- whether the
|
||||
// segments are about the same size.
|
||||
balance :=
|
||||
float64(o.RaiseToFloorSegmentSize(segments[0].LiveSize())) /
|
||||
float64(totAfterSizeFloored)
|
||||
|
||||
// Gently favor smaller merges over bigger ones. We don't want to
|
||||
// make the exponent too large else we end up with poor merges of
|
||||
// small segments in order to avoid the large merges.
|
||||
score := balance * math.Pow(float64(totAfterSize), 0.05)
|
||||
|
||||
// Strongly favor merges that reclaim deletes.
|
||||
nonDelRatio := float64(totAfterSize) / float64(totBeforeSize)
|
||||
|
||||
score *= math.Pow(nonDelRatio, o.ReclaimDeletesWeight)
|
||||
|
||||
return score
|
||||
}
|
||||
|
||||
// ------------------------------------------
|
||||
|
||||
// ToBarChart returns an ASCII rendering of the segments and the plan.
|
||||
// The barMax is the max width of the bars in the bar chart.
|
||||
func ToBarChart(prefix string, barMax int, segments []Segment, plan *MergePlan) string {
|
||||
rv := make([]string, 0, len(segments))
|
||||
|
||||
var maxFullSize int64
|
||||
for _, segment := range segments {
|
||||
if maxFullSize < segment.FullSize() {
|
||||
maxFullSize = segment.FullSize()
|
||||
}
|
||||
}
|
||||
if maxFullSize < 0 {
|
||||
maxFullSize = 1
|
||||
}
|
||||
|
||||
for _, segment := range segments {
|
||||
barFull := int(segment.FullSize())
|
||||
barLive := int(segment.LiveSize())
|
||||
|
||||
if maxFullSize > int64(barMax) {
|
||||
barFull = int(float64(barMax) * float64(barFull) / float64(maxFullSize))
|
||||
barLive = int(float64(barMax) * float64(barLive) / float64(maxFullSize))
|
||||
}
|
||||
|
||||
barKind := " "
|
||||
barChar := "."
|
||||
|
||||
if plan != nil {
|
||||
TASK_LOOP:
|
||||
for taski, task := range plan.Tasks {
|
||||
for _, taskSegment := range task.Segments {
|
||||
if taskSegment == segment {
|
||||
barKind = "*"
|
||||
barChar = fmt.Sprintf("%d", taski)
|
||||
break TASK_LOOP
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bar :=
|
||||
strings.Repeat(barChar, barLive)[0:barLive] +
|
||||
strings.Repeat("x", barFull-barLive)[0:barFull-barLive]
|
||||
|
||||
rv = append(rv, fmt.Sprintf("%s %5d: %5d /%5d - %s %s", prefix,
|
||||
segment.Id(),
|
||||
segment.LiveSize(),
|
||||
segment.FullSize(),
|
||||
barKind, bar))
|
||||
}
|
||||
|
||||
return strings.Join(rv, "\n")
|
||||
}
|
||||
|
||||
// ValidateMergePlannerOptions validates the merge planner options
|
||||
func ValidateMergePlannerOptions(options *MergePlanOptions) error {
|
||||
if options.MaxSegmentSize > MaxSegmentSizeLimit {
|
||||
return ErrMaxSegmentSizeTooLarge
|
||||
}
|
||||
return nil
|
||||
}
|
28
vendor/github.com/blevesearch/bleve/index/scorch/mergeplan/sort.go
generated
vendored
Normal file
28
vendor/github.com/blevesearch/bleve/index/scorch/mergeplan/sort.go
generated
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package mergeplan
|
||||
|
||||
type byLiveSizeDescending []Segment
|
||||
|
||||
func (a byLiveSizeDescending) Len() int { return len(a) }
|
||||
|
||||
func (a byLiveSizeDescending) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
||||
|
||||
func (a byLiveSizeDescending) Less(i, j int) bool {
|
||||
if a[i].LiveSize() != a[j].LiveSize() {
|
||||
return a[i].LiveSize() > a[j].LiveSize()
|
||||
}
|
||||
return a[i].Id() < a[j].Id()
|
||||
}
|
93
vendor/github.com/blevesearch/bleve/index/scorch/optimize.go
generated
vendored
Normal file
93
vendor/github.com/blevesearch/bleve/index/scorch/optimize.go
generated
vendored
Normal file
@ -0,0 +1,93 @@
|
||||
// Copyright (c) 2018 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment/zap"
|
||||
)
|
||||
|
||||
func (s *IndexSnapshotTermFieldReader) Optimize(kind string, octx index.OptimizableContext) (
|
||||
index.OptimizableContext, error) {
|
||||
if kind != "conjunction" {
|
||||
return octx, nil
|
||||
}
|
||||
|
||||
if octx == nil {
|
||||
octx = &OptimizeTFRConjunction{snapshot: s.snapshot}
|
||||
}
|
||||
|
||||
o, ok := octx.(*OptimizeTFRConjunction)
|
||||
if !ok {
|
||||
return octx, nil
|
||||
}
|
||||
|
||||
if o.snapshot != s.snapshot {
|
||||
return nil, fmt.Errorf("tried to optimize across different snapshots")
|
||||
}
|
||||
|
||||
o.tfrs = append(o.tfrs, s)
|
||||
|
||||
return o, nil
|
||||
}
|
||||
|
||||
type OptimizeTFRConjunction struct {
|
||||
snapshot *IndexSnapshot
|
||||
|
||||
tfrs []*IndexSnapshotTermFieldReader
|
||||
}
|
||||
|
||||
func (o *OptimizeTFRConjunction) Finish() error {
|
||||
if len(o.tfrs) <= 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
for i := range o.snapshot.segment {
|
||||
itr0, ok := o.tfrs[0].iterators[i].(*zap.PostingsIterator)
|
||||
if !ok || itr0.ActualBM == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
itr1, ok := o.tfrs[1].iterators[i].(*zap.PostingsIterator)
|
||||
if !ok || itr1.ActualBM == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
bm := roaring.And(itr0.ActualBM, itr1.ActualBM)
|
||||
|
||||
for _, tfr := range o.tfrs[2:] {
|
||||
itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
|
||||
if !ok || itr.ActualBM == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
bm.And(itr.ActualBM)
|
||||
}
|
||||
|
||||
for _, tfr := range o.tfrs {
|
||||
itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
|
||||
if ok && itr.ActualBM != nil {
|
||||
itr.ActualBM = bm
|
||||
itr.Actual = bm.Iterator()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
838
vendor/github.com/blevesearch/bleve/index/scorch/persister.go
generated
vendored
Normal file
838
vendor/github.com/blevesearch/bleve/index/scorch/persister.go
generated
vendored
Normal file
@ -0,0 +1,838 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment/zap"
|
||||
"github.com/boltdb/bolt"
|
||||
)
|
||||
|
||||
var DefaultChunkFactor uint32 = 1024
|
||||
|
||||
// Arbitrary number, need to make it configurable.
|
||||
// Lower values like 10/making persister really slow
|
||||
// doesn't work well as it is creating more files to
|
||||
// persist for in next persist iteration and spikes the # FDs.
|
||||
// Ideal value should let persister also proceed at
|
||||
// an optimum pace so that the merger can skip
|
||||
// many intermediate snapshots.
|
||||
// This needs to be based on empirical data.
|
||||
// TODO - may need to revisit this approach/value.
|
||||
var epochDistance = uint64(5)
|
||||
|
||||
type notificationChan chan struct{}
|
||||
|
||||
func (s *Scorch) persisterLoop() {
|
||||
defer s.asyncTasks.Done()
|
||||
|
||||
var persistWatchers []*epochWatcher
|
||||
var lastPersistedEpoch, lastMergedEpoch uint64
|
||||
var ew *epochWatcher
|
||||
OUTER:
|
||||
for {
|
||||
atomic.AddUint64(&s.stats.TotPersistLoopBeg, 1)
|
||||
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
break OUTER
|
||||
case ew = <-s.persisterNotifier:
|
||||
persistWatchers = append(persistWatchers, ew)
|
||||
default:
|
||||
}
|
||||
if ew != nil && ew.epoch > lastMergedEpoch {
|
||||
lastMergedEpoch = ew.epoch
|
||||
}
|
||||
lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
|
||||
lastMergedEpoch, persistWatchers)
|
||||
|
||||
var ourSnapshot *IndexSnapshot
|
||||
var ourPersisted []chan error
|
||||
|
||||
// check to see if there is a new snapshot to persist
|
||||
s.rootLock.Lock()
|
||||
if s.root != nil && s.root.epoch > lastPersistedEpoch {
|
||||
ourSnapshot = s.root
|
||||
ourSnapshot.AddRef()
|
||||
ourPersisted = s.rootPersisted
|
||||
s.rootPersisted = nil
|
||||
atomic.StoreUint64(&s.iStats.persistSnapshotSize, uint64(ourSnapshot.Size()))
|
||||
atomic.StoreUint64(&s.iStats.persistEpoch, ourSnapshot.epoch)
|
||||
}
|
||||
s.rootLock.Unlock()
|
||||
|
||||
if ourSnapshot != nil {
|
||||
startTime := time.Now()
|
||||
|
||||
err := s.persistSnapshot(ourSnapshot)
|
||||
for _, ch := range ourPersisted {
|
||||
if err != nil {
|
||||
ch <- err
|
||||
}
|
||||
close(ch)
|
||||
}
|
||||
if err != nil {
|
||||
atomic.StoreUint64(&s.iStats.persistEpoch, 0)
|
||||
if err == ErrClosed {
|
||||
// index has been closed
|
||||
_ = ourSnapshot.DecRef()
|
||||
break OUTER
|
||||
}
|
||||
s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err))
|
||||
_ = ourSnapshot.DecRef()
|
||||
atomic.AddUint64(&s.stats.TotPersistLoopErr, 1)
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
lastPersistedEpoch = ourSnapshot.epoch
|
||||
for _, ew := range persistWatchers {
|
||||
close(ew.notifyCh)
|
||||
}
|
||||
|
||||
persistWatchers = nil
|
||||
_ = ourSnapshot.DecRef()
|
||||
|
||||
changed := false
|
||||
s.rootLock.RLock()
|
||||
if s.root != nil && s.root.epoch != lastPersistedEpoch {
|
||||
changed = true
|
||||
}
|
||||
s.rootLock.RUnlock()
|
||||
|
||||
s.fireEvent(EventKindPersisterProgress, time.Since(startTime))
|
||||
|
||||
if changed {
|
||||
atomic.AddUint64(&s.stats.TotPersistLoopProgress, 1)
|
||||
continue OUTER
|
||||
}
|
||||
}
|
||||
|
||||
// tell the introducer we're waiting for changes
|
||||
w := &epochWatcher{
|
||||
epoch: lastPersistedEpoch,
|
||||
notifyCh: make(notificationChan, 1),
|
||||
}
|
||||
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
break OUTER
|
||||
case s.introducerNotifier <- w:
|
||||
}
|
||||
|
||||
s.removeOldData() // might as well cleanup while waiting
|
||||
|
||||
atomic.AddUint64(&s.stats.TotPersistLoopWait, 1)
|
||||
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
break OUTER
|
||||
case <-w.notifyCh:
|
||||
// woken up, next loop should pick up work
|
||||
atomic.AddUint64(&s.stats.TotPersistLoopWaitNotified, 1)
|
||||
case ew = <-s.persisterNotifier:
|
||||
// if the watchers are already caught up then let them wait,
|
||||
// else let them continue to do the catch up
|
||||
persistWatchers = append(persistWatchers, ew)
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotPersistLoopEnd, 1)
|
||||
}
|
||||
}
|
||||
|
||||
func notifyMergeWatchers(lastPersistedEpoch uint64,
|
||||
persistWatchers []*epochWatcher) []*epochWatcher {
|
||||
var watchersNext []*epochWatcher
|
||||
for _, w := range persistWatchers {
|
||||
if w.epoch < lastPersistedEpoch {
|
||||
close(w.notifyCh)
|
||||
} else {
|
||||
watchersNext = append(watchersNext, w)
|
||||
}
|
||||
}
|
||||
return watchersNext
|
||||
}
|
||||
|
||||
func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch uint64,
|
||||
persistWatchers []*epochWatcher) (uint64, []*epochWatcher) {
|
||||
|
||||
// first, let the watchers proceed if they lag behind
|
||||
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
|
||||
|
||||
OUTER:
|
||||
// check for slow merger and await until the merger catch up
|
||||
for lastPersistedEpoch > lastMergedEpoch+epochDistance {
|
||||
atomic.AddUint64(&s.stats.TotPersisterSlowMergerPause, 1)
|
||||
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
break OUTER
|
||||
case ew := <-s.persisterNotifier:
|
||||
persistWatchers = append(persistWatchers, ew)
|
||||
lastMergedEpoch = ew.epoch
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotPersisterSlowMergerResume, 1)
|
||||
|
||||
// let the watchers proceed if they lag behind
|
||||
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
|
||||
}
|
||||
|
||||
return lastMergedEpoch, persistWatchers
|
||||
}
|
||||
|
||||
func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
|
||||
persisted, err := s.persistSnapshotMaybeMerge(snapshot)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if persisted {
|
||||
return nil
|
||||
}
|
||||
|
||||
return s.persistSnapshotDirect(snapshot)
|
||||
}
|
||||
|
||||
// DefaultMinSegmentsForInMemoryMerge represents the default number of
|
||||
// in-memory zap segments that persistSnapshotMaybeMerge() needs to
|
||||
// see in an IndexSnapshot before it decides to merge and persist
|
||||
// those segments
|
||||
var DefaultMinSegmentsForInMemoryMerge = 2
|
||||
|
||||
// persistSnapshotMaybeMerge examines the snapshot and might merge and
|
||||
// persist the in-memory zap segments if there are enough of them
|
||||
func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) (
|
||||
bool, error) {
|
||||
// collect the in-memory zap segments (SegmentBase instances)
|
||||
var sbs []*zap.SegmentBase
|
||||
var sbsDrops []*roaring.Bitmap
|
||||
var sbsIndexes []int
|
||||
|
||||
for i, segmentSnapshot := range snapshot.segment {
|
||||
if sb, ok := segmentSnapshot.segment.(*zap.SegmentBase); ok {
|
||||
sbs = append(sbs, sb)
|
||||
sbsDrops = append(sbsDrops, segmentSnapshot.deleted)
|
||||
sbsIndexes = append(sbsIndexes, i)
|
||||
}
|
||||
}
|
||||
|
||||
if len(sbs) < DefaultMinSegmentsForInMemoryMerge {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
newSnapshot, newSegmentID, err := s.mergeSegmentBases(
|
||||
snapshot, sbs, sbsDrops, sbsIndexes, DefaultChunkFactor)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if newSnapshot == nil {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
defer func() {
|
||||
_ = newSnapshot.DecRef()
|
||||
}()
|
||||
|
||||
mergedSegmentIDs := map[uint64]struct{}{}
|
||||
for _, idx := range sbsIndexes {
|
||||
mergedSegmentIDs[snapshot.segment[idx].id] = struct{}{}
|
||||
}
|
||||
|
||||
// construct a snapshot that's logically equivalent to the input
|
||||
// snapshot, but with merged segments replaced by the new segment
|
||||
equiv := &IndexSnapshot{
|
||||
parent: snapshot.parent,
|
||||
segment: make([]*SegmentSnapshot, 0, len(snapshot.segment)),
|
||||
internal: snapshot.internal,
|
||||
epoch: snapshot.epoch,
|
||||
creator: "persistSnapshotMaybeMerge",
|
||||
}
|
||||
|
||||
// copy to the equiv the segments that weren't replaced
|
||||
for _, segment := range snapshot.segment {
|
||||
if _, wasMerged := mergedSegmentIDs[segment.id]; !wasMerged {
|
||||
equiv.segment = append(equiv.segment, segment)
|
||||
}
|
||||
}
|
||||
|
||||
// append to the equiv the new segment
|
||||
for _, segment := range newSnapshot.segment {
|
||||
if segment.id == newSegmentID {
|
||||
equiv.segment = append(equiv.segment, &SegmentSnapshot{
|
||||
id: newSegmentID,
|
||||
segment: segment.segment,
|
||||
deleted: nil, // nil since merging handled deletions
|
||||
})
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
err = s.persistSnapshotDirect(equiv)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
|
||||
// start a write transaction
|
||||
tx, err := s.rootBolt.Begin(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// defer rollback on error
|
||||
defer func() {
|
||||
if err != nil {
|
||||
_ = tx.Rollback()
|
||||
}
|
||||
}()
|
||||
|
||||
snapshotsBucket, err := tx.CreateBucketIfNotExists(boltSnapshotsBucket)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
newSnapshotKey := segment.EncodeUvarintAscending(nil, snapshot.epoch)
|
||||
snapshotBucket, err := snapshotsBucket.CreateBucketIfNotExists(newSnapshotKey)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// persist meta values
|
||||
metaBucket, err := snapshotBucket.CreateBucketIfNotExists(boltMetaDataKey)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = metaBucket.Put([]byte("type"), []byte(zap.Type))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
buf := make([]byte, binary.MaxVarintLen32)
|
||||
binary.BigEndian.PutUint32(buf, zap.Version)
|
||||
err = metaBucket.Put([]byte("version"), buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// persist internal values
|
||||
internalBucket, err := snapshotBucket.CreateBucketIfNotExists(boltInternalKey)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// TODO optimize writing these in order?
|
||||
for k, v := range snapshot.internal {
|
||||
err = internalBucket.Put([]byte(k), v)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
var filenames []string
|
||||
newSegmentPaths := make(map[uint64]string)
|
||||
|
||||
// first ensure that each segment in this snapshot has been persisted
|
||||
for _, segmentSnapshot := range snapshot.segment {
|
||||
snapshotSegmentKey := segment.EncodeUvarintAscending(nil, segmentSnapshot.id)
|
||||
snapshotSegmentBucket, err := snapshotBucket.CreateBucketIfNotExists(snapshotSegmentKey)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
switch seg := segmentSnapshot.segment.(type) {
|
||||
case *zap.SegmentBase:
|
||||
// need to persist this to disk
|
||||
filename := zapFileName(segmentSnapshot.id)
|
||||
path := s.path + string(os.PathSeparator) + filename
|
||||
err = zap.PersistSegmentBase(seg, path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error persisting segment: %v", err)
|
||||
}
|
||||
newSegmentPaths[segmentSnapshot.id] = path
|
||||
err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
filenames = append(filenames, filename)
|
||||
case *zap.Segment:
|
||||
path := seg.Path()
|
||||
filename := strings.TrimPrefix(path, s.path+string(os.PathSeparator))
|
||||
err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
filenames = append(filenames, filename)
|
||||
default:
|
||||
return fmt.Errorf("unknown segment type: %T", seg)
|
||||
}
|
||||
// store current deleted bits
|
||||
var roaringBuf bytes.Buffer
|
||||
if segmentSnapshot.deleted != nil {
|
||||
_, err = segmentSnapshot.deleted.WriteTo(&roaringBuf)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error persisting roaring bytes: %v", err)
|
||||
}
|
||||
err = snapshotSegmentBucket.Put(boltDeletedKey, roaringBuf.Bytes())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// we need to swap in a new root only when we've persisted 1 or
|
||||
// more segments -- whereby the new root would have 1-for-1
|
||||
// replacements of in-memory segments with file-based segments
|
||||
//
|
||||
// other cases like updates to internal values only, and/or when
|
||||
// there are only deletions, are already covered and persisted by
|
||||
// the newly populated boltdb snapshotBucket above
|
||||
if len(newSegmentPaths) > 0 {
|
||||
// now try to open all the new snapshots
|
||||
newSegments := make(map[uint64]segment.Segment)
|
||||
defer func() {
|
||||
for _, s := range newSegments {
|
||||
if s != nil {
|
||||
// cleanup segments that were opened but not
|
||||
// swapped into the new root
|
||||
_ = s.Close()
|
||||
}
|
||||
}
|
||||
}()
|
||||
for segmentID, path := range newSegmentPaths {
|
||||
newSegments[segmentID], err = zap.Open(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening new segment at %s, %v", path, err)
|
||||
}
|
||||
}
|
||||
|
||||
persist := &persistIntroduction{
|
||||
persisted: newSegments,
|
||||
applied: make(notificationChan),
|
||||
}
|
||||
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
err = ErrClosed
|
||||
return err
|
||||
case s.persists <- persist:
|
||||
}
|
||||
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
err = ErrClosed
|
||||
return err
|
||||
case <-persist.applied:
|
||||
}
|
||||
}
|
||||
|
||||
err = tx.Commit()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = s.rootBolt.Sync()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// allow files to become eligible for removal after commit, such
|
||||
// as file segments from snapshots that came from the merger
|
||||
s.rootLock.Lock()
|
||||
for _, filename := range filenames {
|
||||
delete(s.ineligibleForRemoval, filename)
|
||||
}
|
||||
s.rootLock.Unlock()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func zapFileName(epoch uint64) string {
|
||||
return fmt.Sprintf("%012x.zap", epoch)
|
||||
}
|
||||
|
||||
// bolt snapshot code
|
||||
|
||||
var boltSnapshotsBucket = []byte{'s'}
|
||||
var boltPathKey = []byte{'p'}
|
||||
var boltDeletedKey = []byte{'d'}
|
||||
var boltInternalKey = []byte{'i'}
|
||||
var boltMetaDataKey = []byte{'m'}
|
||||
|
||||
func (s *Scorch) loadFromBolt() error {
|
||||
return s.rootBolt.View(func(tx *bolt.Tx) error {
|
||||
snapshots := tx.Bucket(boltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil
|
||||
}
|
||||
foundRoot := false
|
||||
c := snapshots.Cursor()
|
||||
for k, _ := c.Last(); k != nil; k, _ = c.Prev() {
|
||||
_, snapshotEpoch, err := segment.DecodeUvarintAscending(k)
|
||||
if err != nil {
|
||||
log.Printf("unable to parse segment epoch %x, continuing", k)
|
||||
continue
|
||||
}
|
||||
if foundRoot {
|
||||
s.AddEligibleForRemoval(snapshotEpoch)
|
||||
continue
|
||||
}
|
||||
snapshot := snapshots.Bucket(k)
|
||||
if snapshot == nil {
|
||||
log.Printf("snapshot key, but bucket missing %x, continuing", k)
|
||||
s.AddEligibleForRemoval(snapshotEpoch)
|
||||
continue
|
||||
}
|
||||
indexSnapshot, err := s.loadSnapshot(snapshot)
|
||||
if err != nil {
|
||||
log.Printf("unable to load snapshot, %v, continuing", err)
|
||||
s.AddEligibleForRemoval(snapshotEpoch)
|
||||
continue
|
||||
}
|
||||
indexSnapshot.epoch = snapshotEpoch
|
||||
// set the nextSegmentID
|
||||
s.nextSegmentID, err = s.maxSegmentIDOnDisk()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
s.nextSegmentID++
|
||||
s.rootLock.Lock()
|
||||
s.nextSnapshotEpoch = snapshotEpoch + 1
|
||||
if s.root != nil {
|
||||
_ = s.root.DecRef()
|
||||
}
|
||||
s.root = indexSnapshot
|
||||
s.rootLock.Unlock()
|
||||
foundRoot = true
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
// LoadSnapshot loads the segment with the specified epoch
|
||||
// NOTE: this is currently ONLY intended to be used by the command-line tool
|
||||
func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) {
|
||||
err = s.rootBolt.View(func(tx *bolt.Tx) error {
|
||||
snapshots := tx.Bucket(boltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil
|
||||
}
|
||||
snapshotKey := segment.EncodeUvarintAscending(nil, epoch)
|
||||
snapshot := snapshots.Bucket(snapshotKey)
|
||||
if snapshot == nil {
|
||||
return fmt.Errorf("snapshot with epoch: %v - doesn't exist", epoch)
|
||||
}
|
||||
rv, err = s.loadSnapshot(snapshot)
|
||||
return err
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
|
||||
|
||||
rv := &IndexSnapshot{
|
||||
parent: s,
|
||||
internal: make(map[string][]byte),
|
||||
refs: 1,
|
||||
creator: "loadSnapshot",
|
||||
}
|
||||
var running uint64
|
||||
c := snapshot.Cursor()
|
||||
for k, _ := c.First(); k != nil; k, _ = c.Next() {
|
||||
if k[0] == boltInternalKey[0] {
|
||||
internalBucket := snapshot.Bucket(k)
|
||||
err := internalBucket.ForEach(func(key []byte, val []byte) error {
|
||||
copiedVal := append([]byte(nil), val...)
|
||||
rv.internal[string(key)] = copiedVal
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
_ = rv.DecRef()
|
||||
return nil, err
|
||||
}
|
||||
} else if k[0] != boltMetaDataKey[0] {
|
||||
segmentBucket := snapshot.Bucket(k)
|
||||
if segmentBucket == nil {
|
||||
_ = rv.DecRef()
|
||||
return nil, fmt.Errorf("segment key, but bucket missing % x", k)
|
||||
}
|
||||
segmentSnapshot, err := s.loadSegment(segmentBucket)
|
||||
if err != nil {
|
||||
_ = rv.DecRef()
|
||||
return nil, fmt.Errorf("failed to load segment: %v", err)
|
||||
}
|
||||
_, segmentSnapshot.id, err = segment.DecodeUvarintAscending(k)
|
||||
if err != nil {
|
||||
_ = rv.DecRef()
|
||||
return nil, fmt.Errorf("failed to decode segment id: %v", err)
|
||||
}
|
||||
rv.segment = append(rv.segment, segmentSnapshot)
|
||||
rv.offsets = append(rv.offsets, running)
|
||||
running += segmentSnapshot.segment.Count()
|
||||
}
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, error) {
|
||||
pathBytes := segmentBucket.Get(boltPathKey)
|
||||
if pathBytes == nil {
|
||||
return nil, fmt.Errorf("segment path missing")
|
||||
}
|
||||
segmentPath := s.path + string(os.PathSeparator) + string(pathBytes)
|
||||
segment, err := zap.Open(segmentPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error opening bolt segment: %v", err)
|
||||
}
|
||||
|
||||
rv := &SegmentSnapshot{
|
||||
segment: segment,
|
||||
cachedDocs: &cachedDocs{cache: nil},
|
||||
}
|
||||
deletedBytes := segmentBucket.Get(boltDeletedKey)
|
||||
if deletedBytes != nil {
|
||||
deletedBitmap := roaring.NewBitmap()
|
||||
r := bytes.NewReader(deletedBytes)
|
||||
_, err := deletedBitmap.ReadFrom(r)
|
||||
if err != nil {
|
||||
_ = segment.Close()
|
||||
return nil, fmt.Errorf("error reading deleted bytes: %v", err)
|
||||
}
|
||||
if !deletedBitmap.IsEmpty() {
|
||||
rv.deleted = deletedBitmap
|
||||
}
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
type uint64Descending []uint64
|
||||
|
||||
func (p uint64Descending) Len() int { return len(p) }
|
||||
func (p uint64Descending) Less(i, j int) bool { return p[i] > p[j] }
|
||||
func (p uint64Descending) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
|
||||
|
||||
func (s *Scorch) removeOldData() {
|
||||
removed, err := s.removeOldBoltSnapshots()
|
||||
if err != nil {
|
||||
s.fireAsyncError(fmt.Errorf("got err removing old bolt snapshots: %v", err))
|
||||
}
|
||||
|
||||
if removed > 0 {
|
||||
err = s.removeOldZapFiles()
|
||||
if err != nil {
|
||||
s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// NumSnapshotsToKeep represents how many recent, old snapshots to
|
||||
// keep around per Scorch instance. Useful for apps that require
|
||||
// rollback'ability.
|
||||
var NumSnapshotsToKeep = 1
|
||||
|
||||
// Removes enough snapshots from the rootBolt so that the
|
||||
// s.eligibleForRemoval stays under the NumSnapshotsToKeep policy.
|
||||
func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) {
|
||||
persistedEpochs, err := s.RootBoltSnapshotEpochs()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if len(persistedEpochs) <= s.numSnapshotsToKeep {
|
||||
// we need to keep everything
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// make a map of epochs to protect from deletion
|
||||
protectedEpochs := make(map[uint64]struct{}, s.numSnapshotsToKeep)
|
||||
for _, epoch := range persistedEpochs[0:s.numSnapshotsToKeep] {
|
||||
protectedEpochs[epoch] = struct{}{}
|
||||
}
|
||||
|
||||
var epochsToRemove []uint64
|
||||
var newEligible []uint64
|
||||
s.rootLock.Lock()
|
||||
for _, epoch := range s.eligibleForRemoval {
|
||||
if _, ok := protectedEpochs[epoch]; ok {
|
||||
// protected
|
||||
newEligible = append(newEligible, epoch)
|
||||
} else {
|
||||
epochsToRemove = append(epochsToRemove, epoch)
|
||||
}
|
||||
}
|
||||
s.eligibleForRemoval = newEligible
|
||||
s.rootLock.Unlock()
|
||||
|
||||
if len(epochsToRemove) <= 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
tx, err := s.rootBolt.Begin(true)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer func() {
|
||||
if err == nil {
|
||||
err = tx.Commit()
|
||||
} else {
|
||||
_ = tx.Rollback()
|
||||
}
|
||||
if err == nil {
|
||||
err = s.rootBolt.Sync()
|
||||
}
|
||||
}()
|
||||
|
||||
snapshots := tx.Bucket(boltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
for _, epochToRemove := range epochsToRemove {
|
||||
k := segment.EncodeUvarintAscending(nil, epochToRemove)
|
||||
err = snapshots.DeleteBucket(k)
|
||||
if err == bolt.ErrBucketNotFound {
|
||||
err = nil
|
||||
}
|
||||
if err == nil {
|
||||
numRemoved++
|
||||
}
|
||||
}
|
||||
|
||||
return numRemoved, err
|
||||
}
|
||||
|
||||
func (s *Scorch) maxSegmentIDOnDisk() (uint64, error) {
|
||||
currFileInfos, err := ioutil.ReadDir(s.path)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
var rv uint64
|
||||
for _, finfo := range currFileInfos {
|
||||
fname := finfo.Name()
|
||||
if filepath.Ext(fname) == ".zap" {
|
||||
prefix := strings.TrimSuffix(fname, ".zap")
|
||||
id, err2 := strconv.ParseUint(prefix, 16, 64)
|
||||
if err2 != nil {
|
||||
return 0, err2
|
||||
}
|
||||
if id > rv {
|
||||
rv = id
|
||||
}
|
||||
}
|
||||
}
|
||||
return rv, err
|
||||
}
|
||||
|
||||
// Removes any *.zap files which aren't listed in the rootBolt.
|
||||
func (s *Scorch) removeOldZapFiles() error {
|
||||
liveFileNames, err := s.loadZapFileNames()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
currFileInfos, err := ioutil.ReadDir(s.path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
s.rootLock.RLock()
|
||||
|
||||
for _, finfo := range currFileInfos {
|
||||
fname := finfo.Name()
|
||||
if filepath.Ext(fname) == ".zap" {
|
||||
if _, exists := liveFileNames[fname]; !exists && !s.ineligibleForRemoval[fname] {
|
||||
err := os.Remove(s.path + string(os.PathSeparator) + fname)
|
||||
if err != nil {
|
||||
log.Printf("got err removing file: %s, err: %v", fname, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
s.rootLock.RUnlock()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Scorch) RootBoltSnapshotEpochs() ([]uint64, error) {
|
||||
var rv []uint64
|
||||
err := s.rootBolt.View(func(tx *bolt.Tx) error {
|
||||
snapshots := tx.Bucket(boltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil
|
||||
}
|
||||
sc := snapshots.Cursor()
|
||||
for sk, _ := sc.Last(); sk != nil; sk, _ = sc.Prev() {
|
||||
_, snapshotEpoch, err := segment.DecodeUvarintAscending(sk)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
rv = append(rv, snapshotEpoch)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
return rv, err
|
||||
}
|
||||
|
||||
// Returns the *.zap file names that are listed in the rootBolt.
|
||||
func (s *Scorch) loadZapFileNames() (map[string]struct{}, error) {
|
||||
rv := map[string]struct{}{}
|
||||
err := s.rootBolt.View(func(tx *bolt.Tx) error {
|
||||
snapshots := tx.Bucket(boltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil
|
||||
}
|
||||
sc := snapshots.Cursor()
|
||||
for sk, _ := sc.First(); sk != nil; sk, _ = sc.Next() {
|
||||
snapshot := snapshots.Bucket(sk)
|
||||
if snapshot == nil {
|
||||
continue
|
||||
}
|
||||
segc := snapshot.Cursor()
|
||||
for segk, _ := segc.First(); segk != nil; segk, _ = segc.Next() {
|
||||
if segk[0] == boltInternalKey[0] {
|
||||
continue
|
||||
}
|
||||
segmentBucket := snapshot.Bucket(segk)
|
||||
if segmentBucket == nil {
|
||||
continue
|
||||
}
|
||||
pathBytes := segmentBucket.Get(boltPathKey)
|
||||
if pathBytes == nil {
|
||||
continue
|
||||
}
|
||||
pathString := string(pathBytes)
|
||||
rv[string(pathString)] = struct{}{}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
return rv, err
|
||||
}
|
573
vendor/github.com/blevesearch/bleve/index/scorch/scorch.go
generated
vendored
Normal file
573
vendor/github.com/blevesearch/bleve/index/scorch/scorch.go
generated
vendored
Normal file
@ -0,0 +1,573 @@
|
||||
// Copyright (c) 2018 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment/zap"
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
"github.com/boltdb/bolt"
|
||||
)
|
||||
|
||||
const Name = "scorch"
|
||||
|
||||
const Version uint8 = 2
|
||||
|
||||
var ErrClosed = fmt.Errorf("scorch closed")
|
||||
|
||||
type Scorch struct {
|
||||
readOnly bool
|
||||
version uint8
|
||||
config map[string]interface{}
|
||||
analysisQueue *index.AnalysisQueue
|
||||
stats Stats
|
||||
nextSegmentID uint64
|
||||
path string
|
||||
|
||||
unsafeBatch bool
|
||||
|
||||
rootLock sync.RWMutex
|
||||
root *IndexSnapshot // holds 1 ref-count on the root
|
||||
rootPersisted []chan error // closed when root is persisted
|
||||
nextSnapshotEpoch uint64
|
||||
eligibleForRemoval []uint64 // Index snapshot epochs that are safe to GC.
|
||||
ineligibleForRemoval map[string]bool // Filenames that should not be GC'ed yet.
|
||||
|
||||
numSnapshotsToKeep int
|
||||
closeCh chan struct{}
|
||||
introductions chan *segmentIntroduction
|
||||
persists chan *persistIntroduction
|
||||
merges chan *segmentMerge
|
||||
introducerNotifier chan *epochWatcher
|
||||
revertToSnapshots chan *snapshotReversion
|
||||
persisterNotifier chan *epochWatcher
|
||||
rootBolt *bolt.DB
|
||||
asyncTasks sync.WaitGroup
|
||||
|
||||
onEvent func(event Event)
|
||||
onAsyncError func(err error)
|
||||
|
||||
iStats internalStats
|
||||
}
|
||||
|
||||
type internalStats struct {
|
||||
persistEpoch uint64
|
||||
persistSnapshotSize uint64
|
||||
mergeEpoch uint64
|
||||
mergeSnapshotSize uint64
|
||||
newSegBufBytesAdded uint64
|
||||
newSegBufBytesRemoved uint64
|
||||
analysisBytesAdded uint64
|
||||
analysisBytesRemoved uint64
|
||||
}
|
||||
|
||||
func NewScorch(storeName string,
|
||||
config map[string]interface{},
|
||||
analysisQueue *index.AnalysisQueue) (index.Index, error) {
|
||||
rv := &Scorch{
|
||||
version: Version,
|
||||
config: config,
|
||||
analysisQueue: analysisQueue,
|
||||
nextSnapshotEpoch: 1,
|
||||
closeCh: make(chan struct{}),
|
||||
ineligibleForRemoval: map[string]bool{},
|
||||
}
|
||||
rv.root = &IndexSnapshot{parent: rv, refs: 1, creator: "NewScorch"}
|
||||
ro, ok := config["read_only"].(bool)
|
||||
if ok {
|
||||
rv.readOnly = ro
|
||||
}
|
||||
ub, ok := config["unsafe_batch"].(bool)
|
||||
if ok {
|
||||
rv.unsafeBatch = ub
|
||||
}
|
||||
ecbName, ok := config["eventCallbackName"].(string)
|
||||
if ok {
|
||||
rv.onEvent = RegistryEventCallbacks[ecbName]
|
||||
}
|
||||
aecbName, ok := config["asyncErrorCallbackName"].(string)
|
||||
if ok {
|
||||
rv.onAsyncError = RegistryAsyncErrorCallbacks[aecbName]
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (s *Scorch) fireEvent(kind EventKind, dur time.Duration) {
|
||||
if s.onEvent != nil {
|
||||
s.onEvent(Event{Kind: kind, Scorch: s, Duration: dur})
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scorch) fireAsyncError(err error) {
|
||||
if s.onAsyncError != nil {
|
||||
s.onAsyncError(err)
|
||||
}
|
||||
atomic.AddUint64(&s.stats.TotOnErrors, 1)
|
||||
}
|
||||
|
||||
func (s *Scorch) Open() error {
|
||||
err := s.openBolt()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
s.asyncTasks.Add(1)
|
||||
go s.mainLoop()
|
||||
|
||||
if !s.readOnly && s.path != "" {
|
||||
s.asyncTasks.Add(1)
|
||||
go s.persisterLoop()
|
||||
s.asyncTasks.Add(1)
|
||||
go s.mergerLoop()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Scorch) openBolt() error {
|
||||
var ok bool
|
||||
s.path, ok = s.config["path"].(string)
|
||||
if !ok {
|
||||
return fmt.Errorf("must specify path")
|
||||
}
|
||||
if s.path == "" {
|
||||
s.unsafeBatch = true
|
||||
}
|
||||
|
||||
var rootBoltOpt *bolt.Options
|
||||
if s.readOnly {
|
||||
rootBoltOpt = &bolt.Options{
|
||||
ReadOnly: true,
|
||||
}
|
||||
} else {
|
||||
if s.path != "" {
|
||||
err := os.MkdirAll(s.path, 0700)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rootBoltPath := s.path + string(os.PathSeparator) + "root.bolt"
|
||||
var err error
|
||||
if s.path != "" {
|
||||
s.rootBolt, err = bolt.Open(rootBoltPath, 0600, rootBoltOpt)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// now see if there is any existing state to load
|
||||
err = s.loadFromBolt()
|
||||
if err != nil {
|
||||
_ = s.Close()
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
s.introductions = make(chan *segmentIntroduction)
|
||||
s.persists = make(chan *persistIntroduction)
|
||||
s.merges = make(chan *segmentMerge)
|
||||
s.introducerNotifier = make(chan *epochWatcher, 1)
|
||||
s.revertToSnapshots = make(chan *snapshotReversion)
|
||||
s.persisterNotifier = make(chan *epochWatcher, 1)
|
||||
|
||||
if !s.readOnly && s.path != "" {
|
||||
err := s.removeOldZapFiles() // Before persister or merger create any new files.
|
||||
if err != nil {
|
||||
_ = s.Close()
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
s.numSnapshotsToKeep = NumSnapshotsToKeep
|
||||
if v, ok := s.config["numSnapshotsToKeep"]; ok {
|
||||
var t int
|
||||
if t, err = parseToInteger(v); err != nil {
|
||||
return fmt.Errorf("numSnapshotsToKeep parse err: %v", err)
|
||||
}
|
||||
if t > 0 {
|
||||
s.numSnapshotsToKeep = t
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Scorch) Close() (err error) {
|
||||
startTime := time.Now()
|
||||
defer func() {
|
||||
s.fireEvent(EventKindClose, time.Since(startTime))
|
||||
}()
|
||||
|
||||
s.fireEvent(EventKindCloseStart, 0)
|
||||
|
||||
// signal to async tasks we want to close
|
||||
close(s.closeCh)
|
||||
// wait for them to close
|
||||
s.asyncTasks.Wait()
|
||||
// now close the root bolt
|
||||
if s.rootBolt != nil {
|
||||
err = s.rootBolt.Close()
|
||||
s.rootLock.Lock()
|
||||
if s.root != nil {
|
||||
_ = s.root.DecRef()
|
||||
}
|
||||
s.root = nil
|
||||
s.rootLock.Unlock()
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (s *Scorch) Update(doc *document.Document) error {
|
||||
b := index.NewBatch()
|
||||
b.Update(doc)
|
||||
return s.Batch(b)
|
||||
}
|
||||
|
||||
func (s *Scorch) Delete(id string) error {
|
||||
b := index.NewBatch()
|
||||
b.Delete(id)
|
||||
return s.Batch(b)
|
||||
}
|
||||
|
||||
// Batch applices a batch of changes to the index atomically
|
||||
func (s *Scorch) Batch(batch *index.Batch) (err error) {
|
||||
start := time.Now()
|
||||
|
||||
defer func() {
|
||||
s.fireEvent(EventKindBatchIntroduction, time.Since(start))
|
||||
}()
|
||||
|
||||
resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps))
|
||||
|
||||
var numUpdates uint64
|
||||
var numDeletes uint64
|
||||
var numPlainTextBytes uint64
|
||||
var ids []string
|
||||
for docID, doc := range batch.IndexOps {
|
||||
if doc != nil {
|
||||
// insert _id field
|
||||
doc.AddField(document.NewTextFieldCustom("_id", nil, []byte(doc.ID), document.IndexField|document.StoreField, nil))
|
||||
numUpdates++
|
||||
numPlainTextBytes += doc.NumPlainTextBytes()
|
||||
} else {
|
||||
numDeletes++
|
||||
}
|
||||
ids = append(ids, docID)
|
||||
}
|
||||
|
||||
// FIXME could sort ids list concurrent with analysis?
|
||||
|
||||
go func() {
|
||||
for _, doc := range batch.IndexOps {
|
||||
if doc != nil {
|
||||
aw := index.NewAnalysisWork(s, doc, resultChan)
|
||||
// put the work on the queue
|
||||
s.analysisQueue.Queue(aw)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// wait for analysis result
|
||||
analysisResults := make([]*index.AnalysisResult, int(numUpdates))
|
||||
var itemsDeQueued uint64
|
||||
var totalAnalysisSize int
|
||||
for itemsDeQueued < numUpdates {
|
||||
result := <-resultChan
|
||||
resultSize := result.Size()
|
||||
atomic.AddUint64(&s.iStats.analysisBytesAdded, uint64(resultSize))
|
||||
totalAnalysisSize += resultSize
|
||||
analysisResults[itemsDeQueued] = result
|
||||
itemsDeQueued++
|
||||
}
|
||||
close(resultChan)
|
||||
defer atomic.AddUint64(&s.iStats.analysisBytesRemoved, uint64(totalAnalysisSize))
|
||||
|
||||
atomic.AddUint64(&s.stats.TotAnalysisTime, uint64(time.Since(start)))
|
||||
|
||||
indexStart := time.Now()
|
||||
|
||||
// notify handlers that we're about to introduce a segment
|
||||
s.fireEvent(EventKindBatchIntroductionStart, 0)
|
||||
|
||||
var newSegment segment.Segment
|
||||
var bufBytes uint64
|
||||
if len(analysisResults) > 0 {
|
||||
newSegment, bufBytes, err = zap.AnalysisResultsToSegmentBase(analysisResults, DefaultChunkFactor)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
atomic.AddUint64(&s.iStats.newSegBufBytesAdded, bufBytes)
|
||||
} else {
|
||||
atomic.AddUint64(&s.stats.TotBatchesEmpty, 1)
|
||||
}
|
||||
|
||||
err = s.prepareSegment(newSegment, ids, batch.InternalOps)
|
||||
if err != nil {
|
||||
if newSegment != nil {
|
||||
_ = newSegment.Close()
|
||||
}
|
||||
atomic.AddUint64(&s.stats.TotOnErrors, 1)
|
||||
} else {
|
||||
atomic.AddUint64(&s.stats.TotUpdates, numUpdates)
|
||||
atomic.AddUint64(&s.stats.TotDeletes, numDeletes)
|
||||
atomic.AddUint64(&s.stats.TotBatches, 1)
|
||||
atomic.AddUint64(&s.stats.TotIndexedPlainTextBytes, numPlainTextBytes)
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.iStats.newSegBufBytesRemoved, bufBytes)
|
||||
atomic.AddUint64(&s.stats.TotIndexTime, uint64(time.Since(indexStart)))
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
|
||||
internalOps map[string][]byte) error {
|
||||
|
||||
// new introduction
|
||||
introduction := &segmentIntroduction{
|
||||
id: atomic.AddUint64(&s.nextSegmentID, 1),
|
||||
data: newSegment,
|
||||
ids: ids,
|
||||
obsoletes: make(map[uint64]*roaring.Bitmap),
|
||||
internal: internalOps,
|
||||
applied: make(chan error),
|
||||
}
|
||||
|
||||
if !s.unsafeBatch {
|
||||
introduction.persisted = make(chan error, 1)
|
||||
}
|
||||
|
||||
// optimistically prepare obsoletes outside of rootLock
|
||||
s.rootLock.RLock()
|
||||
root := s.root
|
||||
root.AddRef()
|
||||
s.rootLock.RUnlock()
|
||||
|
||||
for _, seg := range root.segment {
|
||||
delta, err := seg.segment.DocNumbers(ids)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
introduction.obsoletes[seg.id] = delta
|
||||
}
|
||||
|
||||
_ = root.DecRef()
|
||||
|
||||
introStartTime := time.Now()
|
||||
|
||||
s.introductions <- introduction
|
||||
|
||||
// block until this segment is applied
|
||||
err := <-introduction.applied
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if introduction.persisted != nil {
|
||||
err = <-introduction.persisted
|
||||
}
|
||||
|
||||
introTime := uint64(time.Since(introStartTime))
|
||||
atomic.AddUint64(&s.stats.TotBatchIntroTime, introTime)
|
||||
if atomic.LoadUint64(&s.stats.MaxBatchIntroTime) < introTime {
|
||||
atomic.StoreUint64(&s.stats.MaxBatchIntroTime, introTime)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *Scorch) SetInternal(key, val []byte) error {
|
||||
b := index.NewBatch()
|
||||
b.SetInternal(key, val)
|
||||
return s.Batch(b)
|
||||
}
|
||||
|
||||
func (s *Scorch) DeleteInternal(key []byte) error {
|
||||
b := index.NewBatch()
|
||||
b.DeleteInternal(key)
|
||||
return s.Batch(b)
|
||||
}
|
||||
|
||||
// Reader returns a low-level accessor on the index data. Close it to
|
||||
// release associated resources.
|
||||
func (s *Scorch) Reader() (index.IndexReader, error) {
|
||||
return s.currentSnapshot(), nil
|
||||
}
|
||||
|
||||
func (s *Scorch) currentSnapshot() *IndexSnapshot {
|
||||
s.rootLock.RLock()
|
||||
rv := s.root
|
||||
rv.AddRef()
|
||||
s.rootLock.RUnlock()
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s *Scorch) Stats() json.Marshaler {
|
||||
return &s.stats
|
||||
}
|
||||
func (s *Scorch) StatsMap() map[string]interface{} {
|
||||
m := s.stats.ToMap()
|
||||
|
||||
if s.path != "" {
|
||||
finfos, err := ioutil.ReadDir(s.path)
|
||||
if err == nil {
|
||||
var numFilesOnDisk, numBytesUsedDisk uint64
|
||||
for _, finfo := range finfos {
|
||||
if !finfo.IsDir() {
|
||||
numBytesUsedDisk += uint64(finfo.Size())
|
||||
numFilesOnDisk++
|
||||
}
|
||||
}
|
||||
|
||||
m["CurOnDiskBytes"] = numBytesUsedDisk
|
||||
m["CurOnDiskFiles"] = numFilesOnDisk
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: consider one day removing these backwards compatible
|
||||
// names for apps using the old names
|
||||
m["updates"] = m["TotUpdates"]
|
||||
m["deletes"] = m["TotDeletes"]
|
||||
m["batches"] = m["TotBatches"]
|
||||
m["errors"] = m["TotOnErrors"]
|
||||
m["analysis_time"] = m["TotAnalysisTime"]
|
||||
m["index_time"] = m["TotIndexTime"]
|
||||
m["term_searchers_started"] = m["TotTermSearchersStarted"]
|
||||
m["term_searchers_finished"] = m["TotTermSearchersFinished"]
|
||||
m["num_plain_text_bytes_indexed"] = m["TotIndexedPlainTextBytes"]
|
||||
m["num_items_introduced"] = m["TotIntroducedItems"]
|
||||
m["num_items_persisted"] = m["TotPersistedItems"]
|
||||
m["num_bytes_used_disk"] = m["CurOnDiskBytes"]
|
||||
m["num_files_on_disk"] = m["CurOnDiskFiles"]
|
||||
m["total_compaction_written_bytes"] = m["TotFileMergeWrittenBytes"]
|
||||
|
||||
return m
|
||||
}
|
||||
|
||||
func (s *Scorch) Analyze(d *document.Document) *index.AnalysisResult {
|
||||
rv := &index.AnalysisResult{
|
||||
Document: d,
|
||||
Analyzed: make([]analysis.TokenFrequencies, len(d.Fields)+len(d.CompositeFields)),
|
||||
Length: make([]int, len(d.Fields)+len(d.CompositeFields)),
|
||||
}
|
||||
|
||||
for i, field := range d.Fields {
|
||||
if field.Options().IsIndexed() {
|
||||
fieldLength, tokenFreqs := field.Analyze()
|
||||
rv.Analyzed[i] = tokenFreqs
|
||||
rv.Length[i] = fieldLength
|
||||
|
||||
if len(d.CompositeFields) > 0 {
|
||||
// see if any of the composite fields need this
|
||||
for _, compositeField := range d.CompositeFields {
|
||||
compositeField.Compose(field.Name(), fieldLength, tokenFreqs)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s *Scorch) Advanced() (store.KVStore, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (s *Scorch) AddEligibleForRemoval(epoch uint64) {
|
||||
s.rootLock.Lock()
|
||||
if s.root == nil || s.root.epoch != epoch {
|
||||
s.eligibleForRemoval = append(s.eligibleForRemoval, epoch)
|
||||
}
|
||||
s.rootLock.Unlock()
|
||||
}
|
||||
|
||||
func (s *Scorch) MemoryUsed() uint64 {
|
||||
indexSnapshot := s.currentSnapshot()
|
||||
defer func() {
|
||||
_ = indexSnapshot.Close()
|
||||
}()
|
||||
|
||||
// Account for current root snapshot overhead
|
||||
memUsed := uint64(indexSnapshot.Size())
|
||||
|
||||
// Account for snapshot that the persister may be working on
|
||||
persistEpoch := atomic.LoadUint64(&s.iStats.persistEpoch)
|
||||
persistSnapshotSize := atomic.LoadUint64(&s.iStats.persistSnapshotSize)
|
||||
if persistEpoch != 0 && indexSnapshot.epoch > persistEpoch {
|
||||
// the snapshot that the persister is working on isn't the same as
|
||||
// the current snapshot
|
||||
memUsed += persistSnapshotSize
|
||||
}
|
||||
|
||||
// Account for snapshot that the merger may be working on
|
||||
mergeEpoch := atomic.LoadUint64(&s.iStats.mergeEpoch)
|
||||
mergeSnapshotSize := atomic.LoadUint64(&s.iStats.mergeSnapshotSize)
|
||||
if mergeEpoch != 0 && indexSnapshot.epoch > mergeEpoch {
|
||||
// the snapshot that the merger is working on isn't the same as
|
||||
// the current snapshot
|
||||
memUsed += mergeSnapshotSize
|
||||
}
|
||||
|
||||
memUsed += (atomic.LoadUint64(&s.iStats.newSegBufBytesAdded) -
|
||||
atomic.LoadUint64(&s.iStats.newSegBufBytesRemoved))
|
||||
|
||||
memUsed += (atomic.LoadUint64(&s.iStats.analysisBytesAdded) -
|
||||
atomic.LoadUint64(&s.iStats.analysisBytesRemoved))
|
||||
|
||||
return memUsed
|
||||
}
|
||||
|
||||
func (s *Scorch) markIneligibleForRemoval(filename string) {
|
||||
s.rootLock.Lock()
|
||||
s.ineligibleForRemoval[filename] = true
|
||||
s.rootLock.Unlock()
|
||||
}
|
||||
|
||||
func (s *Scorch) unmarkIneligibleForRemoval(filename string) {
|
||||
s.rootLock.Lock()
|
||||
delete(s.ineligibleForRemoval, filename)
|
||||
s.rootLock.Unlock()
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterIndexType(Name, NewScorch)
|
||||
}
|
||||
|
||||
func parseToInteger(i interface{}) (int, error) {
|
||||
switch v := i.(type) {
|
||||
case float64:
|
||||
return int(v), nil
|
||||
case int:
|
||||
return v, nil
|
||||
|
||||
default:
|
||||
return 0, fmt.Errorf("expects int or float64 value")
|
||||
}
|
||||
}
|
130
vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go
generated
vendored
Normal file
130
vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go
generated
vendored
Normal file
@ -0,0 +1,130 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package segment
|
||||
|
||||
import (
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
)
|
||||
|
||||
type EmptySegment struct{}
|
||||
|
||||
func (e *EmptySegment) Dictionary(field string) (TermDictionary, error) {
|
||||
return &EmptyDictionary{}, nil
|
||||
}
|
||||
|
||||
func (e *EmptySegment) VisitDocument(num uint64, visitor DocumentFieldValueVisitor) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *EmptySegment) DocID(num uint64) ([]byte, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (e *EmptySegment) Count() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (e *EmptySegment) DocNumbers([]string) (*roaring.Bitmap, error) {
|
||||
r := roaring.NewBitmap()
|
||||
return r, nil
|
||||
}
|
||||
|
||||
func (e *EmptySegment) Fields() []string {
|
||||
return []string{}
|
||||
}
|
||||
|
||||
func (e *EmptySegment) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *EmptySegment) Size() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (e *EmptySegment) AddRef() {
|
||||
}
|
||||
|
||||
func (e *EmptySegment) DecRef() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
type EmptyDictionary struct{}
|
||||
|
||||
func (e *EmptyDictionary) PostingsList(term []byte,
|
||||
except *roaring.Bitmap, prealloc PostingsList) (PostingsList, error) {
|
||||
return &EmptyPostingsList{}, nil
|
||||
}
|
||||
|
||||
func (e *EmptyDictionary) Iterator() DictionaryIterator {
|
||||
return &EmptyDictionaryIterator{}
|
||||
}
|
||||
|
||||
func (e *EmptyDictionary) PrefixIterator(prefix string) DictionaryIterator {
|
||||
return &EmptyDictionaryIterator{}
|
||||
}
|
||||
|
||||
func (e *EmptyDictionary) RangeIterator(start, end string) DictionaryIterator {
|
||||
return &EmptyDictionaryIterator{}
|
||||
}
|
||||
|
||||
func (e *EmptyDictionary) RegexpIterator(start string) DictionaryIterator {
|
||||
return &EmptyDictionaryIterator{}
|
||||
}
|
||||
|
||||
func (e *EmptyDictionary) FuzzyIterator(term string,
|
||||
fuzziness int) DictionaryIterator {
|
||||
return &EmptyDictionaryIterator{}
|
||||
}
|
||||
|
||||
func (e *EmptyDictionary) OnlyIterator(onlyTerms [][]byte,
|
||||
includeCount bool) DictionaryIterator {
|
||||
return &EmptyDictionaryIterator{}
|
||||
}
|
||||
|
||||
type EmptyDictionaryIterator struct{}
|
||||
|
||||
func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
type EmptyPostingsList struct{}
|
||||
|
||||
func (e *EmptyPostingsList) Iterator(includeFreq, includeNorm, includeLocations bool,
|
||||
prealloc PostingsIterator) PostingsIterator {
|
||||
return &EmptyPostingsIterator{}
|
||||
}
|
||||
|
||||
func (e *EmptyPostingsList) Size() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (e *EmptyPostingsList) Count() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
type EmptyPostingsIterator struct{}
|
||||
|
||||
func (e *EmptyPostingsIterator) Next() (Posting, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (e *EmptyPostingsIterator) Size() int {
|
||||
return 0
|
||||
}
|
94
vendor/github.com/blevesearch/bleve/index/scorch/segment/int.go
generated
vendored
Normal file
94
vendor/github.com/blevesearch/bleve/index/scorch/segment/int.go
generated
vendored
Normal file
@ -0,0 +1,94 @@
|
||||
// Copyright 2014 The Cockroach Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
// implied. See the License for the specific language governing
|
||||
// permissions and limitations under the License.
|
||||
|
||||
// This code originated from:
|
||||
// https://github.com/cockroachdb/cockroach/blob/2dd65dde5d90c157f4b93f92502ca1063b904e1d/pkg/util/encoding/encoding.go
|
||||
|
||||
// Modified to not use pkg/errors
|
||||
|
||||
package segment
|
||||
|
||||
import "fmt"
|
||||
|
||||
const (
|
||||
MaxVarintSize = 9
|
||||
|
||||
// IntMin is chosen such that the range of int tags does not overlap the
|
||||
// ascii character set that is frequently used in testing.
|
||||
IntMin = 0x80 // 128
|
||||
intMaxWidth = 8
|
||||
intZero = IntMin + intMaxWidth // 136
|
||||
intSmall = IntMax - intZero - intMaxWidth // 109
|
||||
// IntMax is the maximum int tag value.
|
||||
IntMax = 0xfd // 253
|
||||
)
|
||||
|
||||
// EncodeUvarintAscending encodes the uint64 value using a variable length
|
||||
// (length-prefixed) representation. The length is encoded as a single
|
||||
// byte indicating the number of encoded bytes (-8) to follow. See
|
||||
// EncodeVarintAscending for rationale. The encoded bytes are appended to the
|
||||
// supplied buffer and the final buffer is returned.
|
||||
func EncodeUvarintAscending(b []byte, v uint64) []byte {
|
||||
switch {
|
||||
case v <= intSmall:
|
||||
return append(b, intZero+byte(v))
|
||||
case v <= 0xff:
|
||||
return append(b, IntMax-7, byte(v))
|
||||
case v <= 0xffff:
|
||||
return append(b, IntMax-6, byte(v>>8), byte(v))
|
||||
case v <= 0xffffff:
|
||||
return append(b, IntMax-5, byte(v>>16), byte(v>>8), byte(v))
|
||||
case v <= 0xffffffff:
|
||||
return append(b, IntMax-4, byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
|
||||
case v <= 0xffffffffff:
|
||||
return append(b, IntMax-3, byte(v>>32), byte(v>>24), byte(v>>16), byte(v>>8),
|
||||
byte(v))
|
||||
case v <= 0xffffffffffff:
|
||||
return append(b, IntMax-2, byte(v>>40), byte(v>>32), byte(v>>24), byte(v>>16),
|
||||
byte(v>>8), byte(v))
|
||||
case v <= 0xffffffffffffff:
|
||||
return append(b, IntMax-1, byte(v>>48), byte(v>>40), byte(v>>32), byte(v>>24),
|
||||
byte(v>>16), byte(v>>8), byte(v))
|
||||
default:
|
||||
return append(b, IntMax, byte(v>>56), byte(v>>48), byte(v>>40), byte(v>>32),
|
||||
byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
|
||||
}
|
||||
}
|
||||
|
||||
// DecodeUvarintAscending decodes a varint encoded uint64 from the input
|
||||
// buffer. The remainder of the input buffer and the decoded uint64
|
||||
// are returned.
|
||||
func DecodeUvarintAscending(b []byte) ([]byte, uint64, error) {
|
||||
if len(b) == 0 {
|
||||
return nil, 0, fmt.Errorf("insufficient bytes to decode uvarint value")
|
||||
}
|
||||
length := int(b[0]) - intZero
|
||||
b = b[1:] // skip length byte
|
||||
if length <= intSmall {
|
||||
return b, uint64(length), nil
|
||||
}
|
||||
length -= intSmall
|
||||
if length < 0 || length > 8 {
|
||||
return nil, 0, fmt.Errorf("invalid uvarint length of %d", length)
|
||||
} else if len(b) < length {
|
||||
return nil, 0, fmt.Errorf("insufficient bytes to decode uvarint value: %q", b)
|
||||
}
|
||||
var v uint64
|
||||
// It is faster to range over the elements in a slice than to index
|
||||
// into the slice on each loop iteration.
|
||||
for _, t := range b[:length] {
|
||||
v = (v << 8) | uint64(t)
|
||||
}
|
||||
return b[length:], v, nil
|
||||
}
|
126
vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go
generated
vendored
Normal file
126
vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go
generated
vendored
Normal file
@ -0,0 +1,126 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package segment
|
||||
|
||||
import (
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
)
|
||||
|
||||
// DocumentFieldValueVisitor defines a callback to be visited for each
|
||||
// stored field value. The return value determines if the visitor
|
||||
// should keep going. Returning true continues visiting, false stops.
|
||||
type DocumentFieldValueVisitor func(field string, typ byte, value []byte, pos []uint64) bool
|
||||
|
||||
type Segment interface {
|
||||
Dictionary(field string) (TermDictionary, error)
|
||||
|
||||
VisitDocument(num uint64, visitor DocumentFieldValueVisitor) error
|
||||
|
||||
DocID(num uint64) ([]byte, error)
|
||||
|
||||
Count() uint64
|
||||
|
||||
DocNumbers([]string) (*roaring.Bitmap, error)
|
||||
|
||||
Fields() []string
|
||||
|
||||
Close() error
|
||||
|
||||
Size() int
|
||||
|
||||
AddRef()
|
||||
DecRef() error
|
||||
}
|
||||
|
||||
type TermDictionary interface {
|
||||
PostingsList(term []byte, except *roaring.Bitmap, prealloc PostingsList) (PostingsList, error)
|
||||
|
||||
Iterator() DictionaryIterator
|
||||
PrefixIterator(prefix string) DictionaryIterator
|
||||
RangeIterator(start, end string) DictionaryIterator
|
||||
RegexpIterator(regex string) DictionaryIterator
|
||||
FuzzyIterator(term string, fuzziness int) DictionaryIterator
|
||||
OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator
|
||||
}
|
||||
|
||||
type DictionaryIterator interface {
|
||||
Next() (*index.DictEntry, error)
|
||||
}
|
||||
|
||||
type PostingsList interface {
|
||||
Iterator(includeFreq, includeNorm, includeLocations bool, prealloc PostingsIterator) PostingsIterator
|
||||
|
||||
Size() int
|
||||
|
||||
Count() uint64
|
||||
|
||||
// NOTE deferred for future work
|
||||
|
||||
// And(other PostingsList) PostingsList
|
||||
// Or(other PostingsList) PostingsList
|
||||
}
|
||||
|
||||
type PostingsIterator interface {
|
||||
// The caller is responsible for copying whatever it needs from
|
||||
// the returned Posting instance before calling Next(), as some
|
||||
// implementations may return a shared instance to reduce memory
|
||||
// allocations.
|
||||
Next() (Posting, error)
|
||||
|
||||
// Advance will return the posting with the specified doc number
|
||||
// or if there is no such posting, the next posting.
|
||||
// Callers MUST NOT attempt to pass a docNum that is less than or
|
||||
// equal to the currently visited posting doc Num.
|
||||
Advance(docNum uint64) (Posting, error)
|
||||
|
||||
Size() int
|
||||
}
|
||||
|
||||
type Posting interface {
|
||||
Number() uint64
|
||||
|
||||
Frequency() uint64
|
||||
Norm() float64
|
||||
|
||||
Locations() []Location
|
||||
|
||||
Size() int
|
||||
}
|
||||
|
||||
type Location interface {
|
||||
Field() string
|
||||
Start() uint64
|
||||
End() uint64
|
||||
Pos() uint64
|
||||
ArrayPositions() []uint64
|
||||
Size() int
|
||||
}
|
||||
|
||||
// DocumentFieldTermVisitable is implemented by various scorch segment
|
||||
// implementations with persistence for the un inverting of the
|
||||
// postings or other indexed values.
|
||||
type DocumentFieldTermVisitable interface {
|
||||
VisitDocumentFieldTerms(localDocNum uint64, fields []string,
|
||||
visitor index.DocumentFieldTermVisitor, optional DocVisitState) (DocVisitState, error)
|
||||
|
||||
// VisitableDocValueFields implementation should return
|
||||
// the list of fields which are document value persisted and
|
||||
// therefore visitable by the above VisitDocumentFieldTerms method.
|
||||
VisitableDocValueFields() ([]string, error)
|
||||
}
|
||||
|
||||
type DocVisitState interface {
|
||||
}
|
167
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/README.md
generated
vendored
Normal file
167
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/README.md
generated
vendored
Normal file
@ -0,0 +1,167 @@
|
||||
# zap file format
|
||||
|
||||
The file is written in the reverse order that we typically access data. This helps us write in one pass since later sections of the file require file offsets of things we've already written.
|
||||
|
||||
Current usage:
|
||||
|
||||
- mmap the entire file
|
||||
- crc-32 bytes and version are in fixed position at end of the file
|
||||
- reading remainder of footer could be version specific
|
||||
- remainder of footer gives us:
|
||||
- 3 important offsets (docValue , fields index and stored data index)
|
||||
- 2 important values (number of docs and chunk factor)
|
||||
- field data is processed once and memoized onto the heap so that we never have to go back to disk for it
|
||||
- access to stored data by doc number means first navigating to the stored data index, then accessing a fixed position offset into that slice, which gives us the actual address of the data. the first bytes of that section tell us the size of data so that we know where it ends.
|
||||
- access to all other indexed data follows the following pattern:
|
||||
- first know the field name -> convert to id
|
||||
- next navigate to term dictionary for that field
|
||||
- some operations stop here and do dictionary ops
|
||||
- next use dictionary to navigate to posting list for a specific term
|
||||
- walk posting list
|
||||
- if necessary, walk posting details as we go
|
||||
- if location info is desired, consult location bitmap to see if it is there
|
||||
|
||||
## stored fields section
|
||||
|
||||
- for each document
|
||||
- preparation phase:
|
||||
- produce a slice of metadata bytes and data bytes
|
||||
- produce these slices in field id order
|
||||
- field value is appended to the data slice
|
||||
- metadata slice is varint encoded with the following values for each field value
|
||||
- field id (uint16)
|
||||
- field type (byte)
|
||||
- field value start offset in uncompressed data slice (uint64)
|
||||
- field value length (uint64)
|
||||
- field number of array positions (uint64)
|
||||
- one additional value for each array position (uint64)
|
||||
- compress the data slice using snappy
|
||||
- file writing phase:
|
||||
- remember the start offset for this document
|
||||
- write out meta data length (varint uint64)
|
||||
- write out compressed data length (varint uint64)
|
||||
- write out the metadata bytes
|
||||
- write out the compressed data bytes
|
||||
|
||||
## stored fields idx
|
||||
|
||||
- for each document
|
||||
- write start offset (remembered from previous section) of stored data (big endian uint64)
|
||||
|
||||
With this index and a known document number, we have direct access to all the stored field data.
|
||||
|
||||
## posting details (freq/norm) section
|
||||
|
||||
- for each posting list
|
||||
- produce a slice containing multiple consecutive chunks (each chunk is varint stream)
|
||||
- produce a slice remembering offsets of where each chunk starts
|
||||
- preparation phase:
|
||||
- for each hit in the posting list
|
||||
- if this hit is in next chunk close out encoding of last chunk and record offset start of next
|
||||
- encode term frequency (uint64)
|
||||
- encode norm factor (float32)
|
||||
- file writing phase:
|
||||
- remember start position for this posting list details
|
||||
- write out number of chunks that follow (varint uint64)
|
||||
- write out length of each chunk (each a varint uint64)
|
||||
- write out the byte slice containing all the chunk data
|
||||
|
||||
If you know the doc number you're interested in, this format lets you jump to the correct chunk (docNum/chunkFactor) directly and then seek within that chunk until you find it.
|
||||
|
||||
## posting details (location) section
|
||||
|
||||
- for each posting list
|
||||
- produce a slice containing multiple consecutive chunks (each chunk is varint stream)
|
||||
- produce a slice remembering offsets of where each chunk starts
|
||||
- preparation phase:
|
||||
- for each hit in the posting list
|
||||
- if this hit is in next chunk close out encoding of last chunk and record offset start of next
|
||||
- encode field (uint16)
|
||||
- encode field pos (uint64)
|
||||
- encode field start (uint64)
|
||||
- encode field end (uint64)
|
||||
- encode number of array positions to follow (uint64)
|
||||
- encode each array position (each uint64)
|
||||
- file writing phase:
|
||||
- remember start position for this posting list details
|
||||
- write out number of chunks that follow (varint uint64)
|
||||
- write out length of each chunk (each a varint uint64)
|
||||
- write out the byte slice containing all the chunk data
|
||||
|
||||
If you know the doc number you're interested in, this format lets you jump to the correct chunk (docNum/chunkFactor) directly and then seek within that chunk until you find it.
|
||||
|
||||
## bitmaps of hits with location info
|
||||
|
||||
- for each posting list
|
||||
- preparation phase:
|
||||
- encode roaring bitmap (inidicating which hits have location details indexed) posting list to bytes (so we know the length)
|
||||
- file writing phase:
|
||||
- remember the start position for this bitmap
|
||||
- write length of encoded roaring bitmap
|
||||
- write the serialized roaring bitmap data
|
||||
|
||||
## postings list section
|
||||
|
||||
- for each posting list
|
||||
- preparation phase:
|
||||
- encode roaring bitmap posting list to bytes (so we know the length)
|
||||
- file writing phase:
|
||||
- remember the start position for this posting list
|
||||
- write freq/norm details offset (remembered from previous, as varint uint64)
|
||||
- write location details offset (remembered from previous, as varint uint64)
|
||||
- write location bitmap offset (remembered from pervious, as varint uint64)
|
||||
- write length of encoded roaring bitmap
|
||||
- write the serialized roaring bitmap data
|
||||
|
||||
## dictionary
|
||||
|
||||
- for each field
|
||||
- preparation phase:
|
||||
- encode vellum FST with dictionary data pointing to file offset of posting list (remembered from previous)
|
||||
- file writing phase:
|
||||
- remember the start position of this persistDictionary
|
||||
- write length of vellum data (varint uint64)
|
||||
- write out vellum data
|
||||
|
||||
## fields section
|
||||
|
||||
- for each field
|
||||
- file writing phase:
|
||||
- remember start offset for each field
|
||||
- write dictionary address (remembered from previous) (varint uint64)
|
||||
- write length of field name (varint uint64)
|
||||
- write field name bytes
|
||||
|
||||
## fields idx
|
||||
|
||||
- for each field
|
||||
- file writing phase:
|
||||
- write big endian uint64 of start offset for each field
|
||||
|
||||
NOTE: currently we don't know or record the length of this fields index. Instead we rely on the fact that we know it immediately precedes a footer of known size.
|
||||
|
||||
## fields DocValue
|
||||
|
||||
- for each field
|
||||
- preparation phase:
|
||||
- produce a slice containing multiple consecutive chunks, where each chunk is composed of a meta section followed by compressed columnar field data
|
||||
- produce a slice remembering the length of each chunk
|
||||
- file writing phase:
|
||||
- remember the start position of this first field DocValue offset in the footer
|
||||
- write out number of chunks that follow (varint uint64)
|
||||
- write out length of each chunk (each a varint uint64)
|
||||
- write out the byte slice containing all the chunk data
|
||||
|
||||
NOTE: currently the meta header inside each chunk gives clue to the location offsets and size of the data pertaining to a given docID and any
|
||||
read operation leverage that meta information to extract the document specific data from the file.
|
||||
|
||||
## footer
|
||||
|
||||
- file writing phase
|
||||
- write number of docs (big endian uint64)
|
||||
- write stored field index location (big endian uint64)
|
||||
- write field index location (big endian uint64)
|
||||
- write field docValue location (big endian uint64)
|
||||
- write out chunk factor (big endian uint32)
|
||||
- write out version (big endian uint32)
|
||||
- write out file CRC of everything preceding this (big endian uint32)
|
149
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go
generated
vendored
Normal file
149
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go
generated
vendored
Normal file
@ -0,0 +1,149 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package zap
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"math"
|
||||
"os"
|
||||
)
|
||||
|
||||
const Version uint32 = 11
|
||||
|
||||
const Type string = "zap"
|
||||
|
||||
const fieldNotUninverted = math.MaxUint64
|
||||
|
||||
// PersistSegmentBase persists SegmentBase in the zap file format.
|
||||
func PersistSegmentBase(sb *SegmentBase, path string) error {
|
||||
flag := os.O_RDWR | os.O_CREATE
|
||||
|
||||
f, err := os.OpenFile(path, flag, 0600)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cleanup := func() {
|
||||
_ = f.Close()
|
||||
_ = os.Remove(path)
|
||||
}
|
||||
|
||||
br := bufio.NewWriter(f)
|
||||
|
||||
_, err = br.Write(sb.mem)
|
||||
if err != nil {
|
||||
cleanup()
|
||||
return err
|
||||
}
|
||||
|
||||
err = persistFooter(sb.numDocs, sb.storedIndexOffset, sb.fieldsIndexOffset, sb.docValueOffset,
|
||||
sb.chunkFactor, sb.memCRC, br)
|
||||
if err != nil {
|
||||
cleanup()
|
||||
return err
|
||||
}
|
||||
|
||||
err = br.Flush()
|
||||
if err != nil {
|
||||
cleanup()
|
||||
return err
|
||||
}
|
||||
|
||||
err = f.Sync()
|
||||
if err != nil {
|
||||
cleanup()
|
||||
return err
|
||||
}
|
||||
|
||||
err = f.Close()
|
||||
if err != nil {
|
||||
cleanup()
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func persistStoredFieldValues(fieldID int,
|
||||
storedFieldValues [][]byte, stf []byte, spf [][]uint64,
|
||||
curr int, metaEncode varintEncoder, data []byte) (
|
||||
int, []byte, error) {
|
||||
for i := 0; i < len(storedFieldValues); i++ {
|
||||
// encode field
|
||||
_, err := metaEncode(uint64(fieldID))
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
// encode type
|
||||
_, err = metaEncode(uint64(stf[i]))
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
// encode start offset
|
||||
_, err = metaEncode(uint64(curr))
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
// end len
|
||||
_, err = metaEncode(uint64(len(storedFieldValues[i])))
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
// encode number of array pos
|
||||
_, err = metaEncode(uint64(len(spf[i])))
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
// encode all array positions
|
||||
for _, pos := range spf[i] {
|
||||
_, err = metaEncode(pos)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
data = append(data, storedFieldValues[i]...)
|
||||
curr += len(storedFieldValues[i])
|
||||
}
|
||||
|
||||
return curr, data, nil
|
||||
}
|
||||
|
||||
func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32,
|
||||
fieldsMap map[string]uint16, fieldsInv []string, numDocs uint64,
|
||||
storedIndexOffset uint64, fieldsIndexOffset uint64, docValueOffset uint64,
|
||||
dictLocs []uint64) (*SegmentBase, error) {
|
||||
sb := &SegmentBase{
|
||||
mem: mem,
|
||||
memCRC: memCRC,
|
||||
chunkFactor: chunkFactor,
|
||||
fieldsMap: fieldsMap,
|
||||
fieldsInv: fieldsInv,
|
||||
numDocs: numDocs,
|
||||
storedIndexOffset: storedIndexOffset,
|
||||
fieldsIndexOffset: fieldsIndexOffset,
|
||||
docValueOffset: docValueOffset,
|
||||
dictLocs: dictLocs,
|
||||
fieldDvReaders: make(map[uint16]*docValueReader),
|
||||
}
|
||||
sb.updateSize()
|
||||
|
||||
err := sb.loadDvReaders()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return sb, nil
|
||||
}
|
230
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/contentcoder.go
generated
vendored
Normal file
230
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/contentcoder.go
generated
vendored
Normal file
@ -0,0 +1,230 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package zap
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
"reflect"
|
||||
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
var reflectStaticSizeMetaData int
|
||||
|
||||
func init() {
|
||||
var md MetaData
|
||||
reflectStaticSizeMetaData = int(reflect.TypeOf(md).Size())
|
||||
}
|
||||
|
||||
var termSeparator byte = 0xff
|
||||
var termSeparatorSplitSlice = []byte{termSeparator}
|
||||
|
||||
type chunkedContentCoder struct {
|
||||
final []byte
|
||||
chunkSize uint64
|
||||
currChunk uint64
|
||||
chunkLens []uint64
|
||||
|
||||
w io.Writer
|
||||
progressiveWrite bool
|
||||
|
||||
chunkMetaBuf bytes.Buffer
|
||||
chunkBuf bytes.Buffer
|
||||
|
||||
chunkMeta []MetaData
|
||||
|
||||
compressed []byte // temp buf for snappy compression
|
||||
}
|
||||
|
||||
// MetaData represents the data information inside a
|
||||
// chunk.
|
||||
type MetaData struct {
|
||||
DocNum uint64 // docNum of the data inside the chunk
|
||||
DocDvOffset uint64 // offset of data inside the chunk for the given docid
|
||||
}
|
||||
|
||||
// newChunkedContentCoder returns a new chunk content coder which
|
||||
// packs data into chunks based on the provided chunkSize
|
||||
func newChunkedContentCoder(chunkSize uint64, maxDocNum uint64,
|
||||
w io.Writer, progressiveWrite bool) *chunkedContentCoder {
|
||||
total := maxDocNum/chunkSize + 1
|
||||
rv := &chunkedContentCoder{
|
||||
chunkSize: chunkSize,
|
||||
chunkLens: make([]uint64, total),
|
||||
chunkMeta: make([]MetaData, 0, total),
|
||||
w: w,
|
||||
progressiveWrite: progressiveWrite,
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
// Reset lets you reuse this chunked content coder. Buffers are reset
|
||||
// and re used. You cannot change the chunk size.
|
||||
func (c *chunkedContentCoder) Reset() {
|
||||
c.currChunk = 0
|
||||
c.final = c.final[:0]
|
||||
c.chunkBuf.Reset()
|
||||
c.chunkMetaBuf.Reset()
|
||||
for i := range c.chunkLens {
|
||||
c.chunkLens[i] = 0
|
||||
}
|
||||
c.chunkMeta = c.chunkMeta[:0]
|
||||
}
|
||||
|
||||
// Close indicates you are done calling Add() this allows
|
||||
// the final chunk to be encoded.
|
||||
func (c *chunkedContentCoder) Close() error {
|
||||
return c.flushContents()
|
||||
}
|
||||
|
||||
func (c *chunkedContentCoder) flushContents() error {
|
||||
// flush the contents, with meta information at first
|
||||
buf := make([]byte, binary.MaxVarintLen64)
|
||||
n := binary.PutUvarint(buf, uint64(len(c.chunkMeta)))
|
||||
_, err := c.chunkMetaBuf.Write(buf[:n])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// write out the metaData slice
|
||||
for _, meta := range c.chunkMeta {
|
||||
_, err := writeUvarints(&c.chunkMetaBuf, meta.DocNum, meta.DocDvOffset)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// write the metadata to final data
|
||||
metaData := c.chunkMetaBuf.Bytes()
|
||||
c.final = append(c.final, c.chunkMetaBuf.Bytes()...)
|
||||
// write the compressed data to the final data
|
||||
c.compressed = snappy.Encode(c.compressed[:cap(c.compressed)], c.chunkBuf.Bytes())
|
||||
c.final = append(c.final, c.compressed...)
|
||||
|
||||
c.chunkLens[c.currChunk] = uint64(len(c.compressed) + len(metaData))
|
||||
|
||||
if c.progressiveWrite {
|
||||
_, err := c.w.Write(c.final)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.final = c.final[:0]
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Add encodes the provided byte slice into the correct chunk for the provided
|
||||
// doc num. You MUST call Add() with increasing docNums.
|
||||
func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
|
||||
chunk := docNum / c.chunkSize
|
||||
if chunk != c.currChunk {
|
||||
// flush out the previous chunk details
|
||||
err := c.flushContents()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// clearing the chunk specific meta for next chunk
|
||||
c.chunkBuf.Reset()
|
||||
c.chunkMetaBuf.Reset()
|
||||
c.chunkMeta = c.chunkMeta[:0]
|
||||
c.currChunk = chunk
|
||||
}
|
||||
|
||||
// get the starting offset for this doc
|
||||
dvOffset := c.chunkBuf.Len()
|
||||
dvSize, err := c.chunkBuf.Write(vals)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c.chunkMeta = append(c.chunkMeta, MetaData{
|
||||
DocNum: docNum,
|
||||
DocDvOffset: uint64(dvOffset + dvSize),
|
||||
})
|
||||
return nil
|
||||
}
|
||||
|
||||
// Write commits all the encoded chunked contents to the provided writer.
|
||||
//
|
||||
// | ..... data ..... | chunk offsets (varints)
|
||||
// | position of chunk offsets (uint64) | number of offsets (uint64) |
|
||||
//
|
||||
func (c *chunkedContentCoder) Write() (int, error) {
|
||||
var tw int
|
||||
|
||||
if c.final != nil {
|
||||
// write out the data section first
|
||||
nw, err := c.w.Write(c.final)
|
||||
tw += nw
|
||||
if err != nil {
|
||||
return tw, err
|
||||
}
|
||||
}
|
||||
|
||||
chunkOffsetsStart := uint64(tw)
|
||||
|
||||
if cap(c.final) < binary.MaxVarintLen64 {
|
||||
c.final = make([]byte, binary.MaxVarintLen64)
|
||||
} else {
|
||||
c.final = c.final[0:binary.MaxVarintLen64]
|
||||
}
|
||||
chunkOffsets := modifyLengthsToEndOffsets(c.chunkLens)
|
||||
// write out the chunk offsets
|
||||
for _, chunkOffset := range chunkOffsets {
|
||||
n := binary.PutUvarint(c.final, chunkOffset)
|
||||
nw, err := c.w.Write(c.final[:n])
|
||||
tw += nw
|
||||
if err != nil {
|
||||
return tw, err
|
||||
}
|
||||
}
|
||||
|
||||
chunkOffsetsLen := uint64(tw) - chunkOffsetsStart
|
||||
|
||||
c.final = c.final[0:8]
|
||||
// write out the length of chunk offsets
|
||||
binary.BigEndian.PutUint64(c.final, chunkOffsetsLen)
|
||||
nw, err := c.w.Write(c.final)
|
||||
tw += nw
|
||||
if err != nil {
|
||||
return tw, err
|
||||
}
|
||||
|
||||
// write out the number of chunks
|
||||
binary.BigEndian.PutUint64(c.final, uint64(len(c.chunkLens)))
|
||||
nw, err = c.w.Write(c.final)
|
||||
tw += nw
|
||||
if err != nil {
|
||||
return tw, err
|
||||
}
|
||||
|
||||
c.final = c.final[:0]
|
||||
|
||||
return tw, nil
|
||||
}
|
||||
|
||||
// ReadDocValueBoundary elicits the start, end offsets from a
|
||||
// metaData header slice
|
||||
func ReadDocValueBoundary(chunk int, metaHeaders []MetaData) (uint64, uint64) {
|
||||
var start uint64
|
||||
if chunk > 0 {
|
||||
start = metaHeaders[chunk-1].DocDvOffset
|
||||
}
|
||||
return start, metaHeaders[chunk].DocDvOffset
|
||||
}
|
51
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/count.go
generated
vendored
Normal file
51
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/count.go
generated
vendored
Normal file
@ -0,0 +1,51 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package zap
|
||||
|
||||
import (
|
||||
"hash/crc32"
|
||||
"io"
|
||||
)
|
||||
|
||||
// CountHashWriter is a wrapper around a Writer which counts the number of
|
||||
// bytes which have been written and computes a crc32 hash
|
||||
type CountHashWriter struct {
|
||||
w io.Writer
|
||||
crc uint32
|
||||
n int
|
||||
}
|
||||
|
||||
// NewCountHashWriter returns a CountHashWriter which wraps the provided Writer
|
||||
func NewCountHashWriter(w io.Writer) *CountHashWriter {
|
||||
return &CountHashWriter{w: w}
|
||||
}
|
||||
|
||||
// Write writes the provided bytes to the wrapped writer and counts the bytes
|
||||
func (c *CountHashWriter) Write(b []byte) (int, error) {
|
||||
n, err := c.w.Write(b)
|
||||
c.crc = crc32.Update(c.crc, crc32.IEEETable, b[:n])
|
||||
c.n += n
|
||||
return n, err
|
||||
}
|
||||
|
||||
// Count returns the number of bytes written
|
||||
func (c *CountHashWriter) Count() int {
|
||||
return c.n
|
||||
}
|
||||
|
||||
// Sum32 returns the CRC-32 hash of the content written to this writer
|
||||
func (c *CountHashWriter) Sum32() uint32 {
|
||||
return c.crc
|
||||
}
|
289
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go
generated
vendored
Normal file
289
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go
generated
vendored
Normal file
@ -0,0 +1,289 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package zap
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/couchbase/vellum"
|
||||
"github.com/couchbase/vellum/levenshtein"
|
||||
"github.com/couchbase/vellum/regexp"
|
||||
)
|
||||
|
||||
// Dictionary is the zap representation of the term dictionary
|
||||
type Dictionary struct {
|
||||
sb *SegmentBase
|
||||
field string
|
||||
fieldID uint16
|
||||
fst *vellum.FST
|
||||
fstReader *vellum.Reader
|
||||
}
|
||||
|
||||
// PostingsList returns the postings list for the specified term
|
||||
func (d *Dictionary) PostingsList(term []byte, except *roaring.Bitmap,
|
||||
prealloc segment.PostingsList) (segment.PostingsList, error) {
|
||||
var preallocPL *PostingsList
|
||||
pl, ok := prealloc.(*PostingsList)
|
||||
if ok && pl != nil {
|
||||
preallocPL = pl
|
||||
}
|
||||
return d.postingsList(term, except, preallocPL)
|
||||
}
|
||||
|
||||
func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
|
||||
if d.fstReader == nil {
|
||||
if rv == nil || rv == emptyPostingsList {
|
||||
return emptyPostingsList, nil
|
||||
}
|
||||
return d.postingsListInit(rv, except), nil
|
||||
}
|
||||
|
||||
postingsOffset, exists, err := d.fstReader.Get(term)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("vellum err: %v", err)
|
||||
}
|
||||
if !exists {
|
||||
if rv == nil || rv == emptyPostingsList {
|
||||
return emptyPostingsList, nil
|
||||
}
|
||||
return d.postingsListInit(rv, except), nil
|
||||
}
|
||||
|
||||
return d.postingsListFromOffset(postingsOffset, except, rv)
|
||||
}
|
||||
|
||||
func (d *Dictionary) postingsListFromOffset(postingsOffset uint64, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
|
||||
rv = d.postingsListInit(rv, except)
|
||||
|
||||
err := rv.read(postingsOffset, d)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap) *PostingsList {
|
||||
if rv == nil || rv == emptyPostingsList {
|
||||
rv = &PostingsList{}
|
||||
} else {
|
||||
postings := rv.postings
|
||||
if postings != nil {
|
||||
postings.Clear()
|
||||
}
|
||||
|
||||
*rv = PostingsList{} // clear the struct
|
||||
|
||||
rv.postings = postings
|
||||
}
|
||||
rv.sb = d.sb
|
||||
rv.except = except
|
||||
return rv
|
||||
}
|
||||
|
||||
// Iterator returns an iterator for this dictionary
|
||||
func (d *Dictionary) Iterator() segment.DictionaryIterator {
|
||||
rv := &DictionaryIterator{
|
||||
d: d,
|
||||
}
|
||||
|
||||
if d.fst != nil {
|
||||
itr, err := d.fst.Iterator(nil, nil)
|
||||
if err == nil {
|
||||
rv.itr = itr
|
||||
} else if err != nil && err != vellum.ErrIteratorDone {
|
||||
rv.err = err
|
||||
}
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
// PrefixIterator returns an iterator which only visits terms having the
|
||||
// the specified prefix
|
||||
func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
|
||||
rv := &DictionaryIterator{
|
||||
d: d,
|
||||
}
|
||||
|
||||
if d.fst != nil {
|
||||
r, err := regexp.New(prefix + ".*")
|
||||
if err == nil {
|
||||
itr, err := d.fst.Search(r, nil, nil)
|
||||
if err == nil {
|
||||
rv.itr = itr
|
||||
} else if err != nil && err != vellum.ErrIteratorDone {
|
||||
rv.err = err
|
||||
}
|
||||
} else {
|
||||
rv.err = err
|
||||
}
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
// RangeIterator returns an iterator which only visits terms between the
|
||||
// start and end terms. NOTE: bleve.index API specifies the end is inclusive.
|
||||
func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator {
|
||||
rv := &DictionaryIterator{
|
||||
d: d,
|
||||
}
|
||||
|
||||
// need to increment the end position to be inclusive
|
||||
endBytes := []byte(end)
|
||||
if endBytes[len(endBytes)-1] < 0xff {
|
||||
endBytes[len(endBytes)-1]++
|
||||
} else {
|
||||
endBytes = append(endBytes, 0xff)
|
||||
}
|
||||
|
||||
if d.fst != nil {
|
||||
itr, err := d.fst.Iterator([]byte(start), endBytes)
|
||||
if err == nil {
|
||||
rv.itr = itr
|
||||
} else if err != nil && err != vellum.ErrIteratorDone {
|
||||
rv.err = err
|
||||
}
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
// RegexpIterator returns an iterator which only visits terms having the
|
||||
// the specified regex
|
||||
func (d *Dictionary) RegexpIterator(regex string) segment.DictionaryIterator {
|
||||
rv := &DictionaryIterator{
|
||||
d: d,
|
||||
}
|
||||
|
||||
if d.fst != nil {
|
||||
r, err := regexp.New(regex)
|
||||
if err == nil {
|
||||
itr, err2 := d.fst.Search(r, nil, nil)
|
||||
if err2 == nil {
|
||||
rv.itr = itr
|
||||
} else if err2 != nil && err2 != vellum.ErrIteratorDone {
|
||||
rv.err = err2
|
||||
}
|
||||
} else {
|
||||
rv.err = err
|
||||
}
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
// FuzzyIterator returns an iterator which only visits terms having the
|
||||
// the specified edit/levenshtein distance
|
||||
func (d *Dictionary) FuzzyIterator(term string,
|
||||
fuzziness int) segment.DictionaryIterator {
|
||||
rv := &DictionaryIterator{
|
||||
d: d,
|
||||
}
|
||||
|
||||
if d.fst != nil {
|
||||
la, err := levenshtein.New(term, fuzziness)
|
||||
if err == nil {
|
||||
itr, err2 := d.fst.Search(la, nil, nil)
|
||||
if err2 == nil {
|
||||
rv.itr = itr
|
||||
} else if err2 != nil && err2 != vellum.ErrIteratorDone {
|
||||
rv.err = err2
|
||||
}
|
||||
} else {
|
||||
rv.err = err
|
||||
}
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func (d *Dictionary) OnlyIterator(onlyTerms [][]byte,
|
||||
includeCount bool) segment.DictionaryIterator {
|
||||
|
||||
rv := &DictionaryIterator{
|
||||
d: d,
|
||||
omitCount: !includeCount,
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
builder, err := vellum.New(&buf, nil)
|
||||
if err != nil {
|
||||
rv.err = err
|
||||
return rv
|
||||
}
|
||||
for _, term := range onlyTerms {
|
||||
err = builder.Insert(term, 0)
|
||||
if err != nil {
|
||||
rv.err = err
|
||||
return rv
|
||||
}
|
||||
}
|
||||
err = builder.Close()
|
||||
if err != nil {
|
||||
rv.err = err
|
||||
return rv
|
||||
}
|
||||
|
||||
onlyFST, err := vellum.Load(buf.Bytes())
|
||||
if err != nil {
|
||||
rv.err = err
|
||||
return rv
|
||||
}
|
||||
|
||||
itr, err := d.fst.Search(onlyFST, nil, nil)
|
||||
if err == nil {
|
||||
rv.itr = itr
|
||||
} else if err != nil && err != vellum.ErrIteratorDone {
|
||||
rv.err = err
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
// DictionaryIterator is an iterator for term dictionary
|
||||
type DictionaryIterator struct {
|
||||
d *Dictionary
|
||||
itr vellum.Iterator
|
||||
err error
|
||||
tmp PostingsList
|
||||
entry index.DictEntry
|
||||
omitCount bool
|
||||
}
|
||||
|
||||
// Next returns the next entry in the dictionary
|
||||
func (i *DictionaryIterator) Next() (*index.DictEntry, error) {
|
||||
if i.err != nil && i.err != vellum.ErrIteratorDone {
|
||||
return nil, i.err
|
||||
} else if i.itr == nil || i.err == vellum.ErrIteratorDone {
|
||||
return nil, nil
|
||||
}
|
||||
term, postingsOffset := i.itr.Current()
|
||||
i.entry.Term = string(term)
|
||||
if !i.omitCount {
|
||||
i.err = i.tmp.read(postingsOffset, i.d)
|
||||
if i.err != nil {
|
||||
return nil, i.err
|
||||
}
|
||||
i.entry.Count = i.tmp.Count()
|
||||
}
|
||||
i.err = i.itr.Next()
|
||||
return &i.entry, nil
|
||||
}
|
312
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go
generated
vendored
Normal file
312
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go
generated
vendored
Normal file
@ -0,0 +1,312 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package zap
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
var reflectStaticSizedocValueReader int
|
||||
|
||||
func init() {
|
||||
var dvi docValueReader
|
||||
reflectStaticSizedocValueReader = int(reflect.TypeOf(dvi).Size())
|
||||
}
|
||||
|
||||
type docNumTermsVisitor func(docNum uint64, terms []byte) error
|
||||
|
||||
type docVisitState struct {
|
||||
dvrs map[uint16]*docValueReader
|
||||
segment *Segment
|
||||
}
|
||||
|
||||
type docValueReader struct {
|
||||
field string
|
||||
curChunkNum uint64
|
||||
chunkOffsets []uint64
|
||||
dvDataLoc uint64
|
||||
curChunkHeader []MetaData
|
||||
curChunkData []byte // compressed data cache
|
||||
uncompressed []byte // temp buf for snappy decompression
|
||||
}
|
||||
|
||||
func (di *docValueReader) size() int {
|
||||
return reflectStaticSizedocValueReader + size.SizeOfPtr +
|
||||
len(di.field) +
|
||||
len(di.chunkOffsets)*size.SizeOfUint64 +
|
||||
len(di.curChunkHeader)*reflectStaticSizeMetaData +
|
||||
len(di.curChunkData)
|
||||
}
|
||||
|
||||
func (di *docValueReader) cloneInto(rv *docValueReader) *docValueReader {
|
||||
if rv == nil {
|
||||
rv = &docValueReader{}
|
||||
}
|
||||
|
||||
rv.field = di.field
|
||||
rv.curChunkNum = math.MaxUint64
|
||||
rv.chunkOffsets = di.chunkOffsets // immutable, so it's sharable
|
||||
rv.dvDataLoc = di.dvDataLoc
|
||||
rv.curChunkHeader = nil
|
||||
rv.curChunkData = nil
|
||||
rv.uncompressed = nil
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func (di *docValueReader) fieldName() string {
|
||||
return di.field
|
||||
}
|
||||
|
||||
func (di *docValueReader) curChunkNumber() uint64 {
|
||||
return di.curChunkNum
|
||||
}
|
||||
|
||||
func (s *SegmentBase) loadFieldDocValueReader(field string,
|
||||
fieldDvLocStart, fieldDvLocEnd uint64) (*docValueReader, error) {
|
||||
// get the docValue offset for the given fields
|
||||
if fieldDvLocStart == fieldNotUninverted {
|
||||
return nil, fmt.Errorf("loadFieldDocValueReader: "+
|
||||
"no docValues found for field: %s", field)
|
||||
}
|
||||
|
||||
// read the number of chunks, and chunk offsets position
|
||||
var numChunks, chunkOffsetsPosition uint64
|
||||
|
||||
if fieldDvLocEnd-fieldDvLocStart > 16 {
|
||||
numChunks = binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-8 : fieldDvLocEnd])
|
||||
// read the length of chunk offsets
|
||||
chunkOffsetsLen := binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-16 : fieldDvLocEnd-8])
|
||||
// acquire position of chunk offsets
|
||||
chunkOffsetsPosition = (fieldDvLocEnd - 16) - chunkOffsetsLen
|
||||
}
|
||||
|
||||
fdvIter := &docValueReader{
|
||||
curChunkNum: math.MaxUint64,
|
||||
field: field,
|
||||
chunkOffsets: make([]uint64, int(numChunks)),
|
||||
}
|
||||
|
||||
// read the chunk offsets
|
||||
var offset uint64
|
||||
for i := 0; i < int(numChunks); i++ {
|
||||
loc, read := binary.Uvarint(s.mem[chunkOffsetsPosition+offset : chunkOffsetsPosition+offset+binary.MaxVarintLen64])
|
||||
if read <= 0 {
|
||||
return nil, fmt.Errorf("corrupted chunk offset during segment load")
|
||||
}
|
||||
fdvIter.chunkOffsets[i] = loc
|
||||
offset += uint64(read)
|
||||
}
|
||||
|
||||
// set the data offset
|
||||
fdvIter.dvDataLoc = fieldDvLocStart
|
||||
|
||||
return fdvIter, nil
|
||||
}
|
||||
|
||||
func (di *docValueReader) loadDvChunk(chunkNumber uint64, s *SegmentBase) error {
|
||||
// advance to the chunk where the docValues
|
||||
// reside for the given docNum
|
||||
destChunkDataLoc, curChunkEnd := di.dvDataLoc, di.dvDataLoc
|
||||
start, end := readChunkBoundary(int(chunkNumber), di.chunkOffsets)
|
||||
if start >= end {
|
||||
di.curChunkHeader = di.curChunkHeader[:0]
|
||||
di.curChunkData = nil
|
||||
di.curChunkNum = chunkNumber
|
||||
di.uncompressed = di.uncompressed[:0]
|
||||
return nil
|
||||
}
|
||||
|
||||
destChunkDataLoc += start
|
||||
curChunkEnd += end
|
||||
|
||||
// read the number of docs reside in the chunk
|
||||
numDocs, read := binary.Uvarint(s.mem[destChunkDataLoc : destChunkDataLoc+binary.MaxVarintLen64])
|
||||
if read <= 0 {
|
||||
return fmt.Errorf("failed to read the chunk")
|
||||
}
|
||||
chunkMetaLoc := destChunkDataLoc + uint64(read)
|
||||
|
||||
offset := uint64(0)
|
||||
di.curChunkHeader = make([]MetaData, int(numDocs))
|
||||
for i := 0; i < int(numDocs); i++ {
|
||||
di.curChunkHeader[i].DocNum, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||
offset += uint64(read)
|
||||
di.curChunkHeader[i].DocDvOffset, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
|
||||
offset += uint64(read)
|
||||
}
|
||||
|
||||
compressedDataLoc := chunkMetaLoc + offset
|
||||
dataLength := curChunkEnd - compressedDataLoc
|
||||
di.curChunkData = s.mem[compressedDataLoc : compressedDataLoc+dataLength]
|
||||
di.curChunkNum = chunkNumber
|
||||
di.uncompressed = di.uncompressed[:0]
|
||||
return nil
|
||||
}
|
||||
|
||||
func (di *docValueReader) iterateAllDocValues(s *SegmentBase, visitor docNumTermsVisitor) error {
|
||||
for i := 0; i < len(di.chunkOffsets); i++ {
|
||||
err := di.loadDvChunk(uint64(i), s)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if di.curChunkData == nil || len(di.curChunkHeader) <= 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// uncompress the already loaded data
|
||||
uncompressed, err := snappy.Decode(di.uncompressed[:cap(di.uncompressed)], di.curChunkData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
di.uncompressed = uncompressed
|
||||
|
||||
start := uint64(0)
|
||||
for _, entry := range di.curChunkHeader {
|
||||
err = visitor(entry.DocNum, uncompressed[start:entry.DocDvOffset])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
start = entry.DocDvOffset
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (di *docValueReader) visitDocValues(docNum uint64,
|
||||
visitor index.DocumentFieldTermVisitor) error {
|
||||
// binary search the term locations for the docNum
|
||||
start, end := di.getDocValueLocs(docNum)
|
||||
if start == math.MaxUint64 || end == math.MaxUint64 || start == end {
|
||||
return nil
|
||||
}
|
||||
|
||||
var uncompressed []byte
|
||||
var err error
|
||||
// use the uncompressed copy if available
|
||||
if len(di.uncompressed) > 0 {
|
||||
uncompressed = di.uncompressed
|
||||
} else {
|
||||
// uncompress the already loaded data
|
||||
uncompressed, err = snappy.Decode(di.uncompressed[:cap(di.uncompressed)], di.curChunkData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
di.uncompressed = uncompressed
|
||||
}
|
||||
|
||||
// pick the terms for the given docNum
|
||||
uncompressed = uncompressed[start:end]
|
||||
for {
|
||||
i := bytes.Index(uncompressed, termSeparatorSplitSlice)
|
||||
if i < 0 {
|
||||
break
|
||||
}
|
||||
|
||||
visitor(di.field, uncompressed[0:i])
|
||||
uncompressed = uncompressed[i+1:]
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (di *docValueReader) getDocValueLocs(docNum uint64) (uint64, uint64) {
|
||||
i := sort.Search(len(di.curChunkHeader), func(i int) bool {
|
||||
return di.curChunkHeader[i].DocNum >= docNum
|
||||
})
|
||||
if i < len(di.curChunkHeader) && di.curChunkHeader[i].DocNum == docNum {
|
||||
return ReadDocValueBoundary(i, di.curChunkHeader)
|
||||
}
|
||||
return math.MaxUint64, math.MaxUint64
|
||||
}
|
||||
|
||||
// VisitDocumentFieldTerms is an implementation of the
|
||||
// DocumentFieldTermVisitable interface
|
||||
func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
|
||||
visitor index.DocumentFieldTermVisitor, dvsIn segment.DocVisitState) (
|
||||
segment.DocVisitState, error) {
|
||||
dvs, ok := dvsIn.(*docVisitState)
|
||||
if !ok || dvs == nil {
|
||||
dvs = &docVisitState{}
|
||||
} else {
|
||||
if dvs.segment != s {
|
||||
dvs.segment = s
|
||||
dvs.dvrs = nil
|
||||
}
|
||||
}
|
||||
|
||||
var fieldIDPlus1 uint16
|
||||
if dvs.dvrs == nil {
|
||||
dvs.dvrs = make(map[uint16]*docValueReader, len(fields))
|
||||
for _, field := range fields {
|
||||
if fieldIDPlus1, ok = s.fieldsMap[field]; !ok {
|
||||
continue
|
||||
}
|
||||
fieldID := fieldIDPlus1 - 1
|
||||
if dvIter, exists := s.fieldDvReaders[fieldID]; exists &&
|
||||
dvIter != nil {
|
||||
dvs.dvrs[fieldID] = dvIter.cloneInto(dvs.dvrs[fieldID])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// find the chunkNumber where the docValues are stored
|
||||
docInChunk := localDocNum / uint64(s.chunkFactor)
|
||||
var dvr *docValueReader
|
||||
for _, field := range fields {
|
||||
if fieldIDPlus1, ok = s.fieldsMap[field]; !ok {
|
||||
continue
|
||||
}
|
||||
fieldID := fieldIDPlus1 - 1
|
||||
if dvr, ok = dvs.dvrs[fieldID]; ok && dvr != nil {
|
||||
// check if the chunk is already loaded
|
||||
if docInChunk != dvr.curChunkNumber() {
|
||||
err := dvr.loadDvChunk(docInChunk, &s.SegmentBase)
|
||||
if err != nil {
|
||||
return dvs, err
|
||||
}
|
||||
}
|
||||
|
||||
_ = dvr.visitDocValues(localDocNum, visitor)
|
||||
}
|
||||
}
|
||||
return dvs, nil
|
||||
}
|
||||
|
||||
// VisitableDocValueFields returns the list of fields with
|
||||
// persisted doc value terms ready to be visitable using the
|
||||
// VisitDocumentFieldTerms method.
|
||||
func (s *Segment) VisitableDocValueFields() ([]string, error) {
|
||||
rv := make([]string, 0, len(s.fieldDvReaders))
|
||||
for fieldID, field := range s.fieldsInv {
|
||||
if dvIter, ok := s.fieldDvReaders[uint16(fieldID)]; ok &&
|
||||
dvIter != nil {
|
||||
rv = append(rv, field)
|
||||
}
|
||||
}
|
||||
return rv, nil
|
||||
}
|
124
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/enumerator.go
generated
vendored
Normal file
124
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/enumerator.go
generated
vendored
Normal file
@ -0,0 +1,124 @@
|
||||
// Copyright (c) 2018 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package zap
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
"github.com/couchbase/vellum"
|
||||
)
|
||||
|
||||
// enumerator provides an ordered traversal of multiple vellum
|
||||
// iterators. Like JOIN of iterators, the enumerator produces a
|
||||
// sequence of (key, iteratorIndex, value) tuples, sorted by key ASC,
|
||||
// then iteratorIndex ASC, where the same key might be seen or
|
||||
// repeated across multiple child iterators.
|
||||
type enumerator struct {
|
||||
itrs []vellum.Iterator
|
||||
currKs [][]byte
|
||||
currVs []uint64
|
||||
|
||||
lowK []byte
|
||||
lowIdxs []int
|
||||
lowCurr int
|
||||
}
|
||||
|
||||
// newEnumerator returns a new enumerator over the vellum Iterators
|
||||
func newEnumerator(itrs []vellum.Iterator) (*enumerator, error) {
|
||||
rv := &enumerator{
|
||||
itrs: itrs,
|
||||
currKs: make([][]byte, len(itrs)),
|
||||
currVs: make([]uint64, len(itrs)),
|
||||
lowIdxs: make([]int, 0, len(itrs)),
|
||||
}
|
||||
for i, itr := range rv.itrs {
|
||||
rv.currKs[i], rv.currVs[i] = itr.Current()
|
||||
}
|
||||
rv.updateMatches()
|
||||
if rv.lowK == nil {
|
||||
return rv, vellum.ErrIteratorDone
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
// updateMatches maintains the low key matches based on the currKs
|
||||
func (m *enumerator) updateMatches() {
|
||||
m.lowK = nil
|
||||
m.lowIdxs = m.lowIdxs[:0]
|
||||
m.lowCurr = 0
|
||||
|
||||
for i, key := range m.currKs {
|
||||
if key == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
cmp := bytes.Compare(key, m.lowK)
|
||||
if cmp < 0 || m.lowK == nil {
|
||||
// reached a new low
|
||||
m.lowK = key
|
||||
m.lowIdxs = m.lowIdxs[:0]
|
||||
m.lowIdxs = append(m.lowIdxs, i)
|
||||
} else if cmp == 0 {
|
||||
m.lowIdxs = append(m.lowIdxs, i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Current returns the enumerator's current key, iterator-index, and
|
||||
// value. If the enumerator is not pointing at a valid value (because
|
||||
// Next returned an error previously), Current will return nil,0,0.
|
||||
func (m *enumerator) Current() ([]byte, int, uint64) {
|
||||
var i int
|
||||
var v uint64
|
||||
if m.lowCurr < len(m.lowIdxs) {
|
||||
i = m.lowIdxs[m.lowCurr]
|
||||
v = m.currVs[i]
|
||||
}
|
||||
return m.lowK, i, v
|
||||
}
|
||||
|
||||
// Next advances the enumerator to the next key/iterator/value result,
|
||||
// else vellum.ErrIteratorDone is returned.
|
||||
func (m *enumerator) Next() error {
|
||||
m.lowCurr += 1
|
||||
if m.lowCurr >= len(m.lowIdxs) {
|
||||
// move all the current low iterators forwards
|
||||
for _, vi := range m.lowIdxs {
|
||||
err := m.itrs[vi].Next()
|
||||
if err != nil && err != vellum.ErrIteratorDone {
|
||||
return err
|
||||
}
|
||||
m.currKs[vi], m.currVs[vi] = m.itrs[vi].Current()
|
||||
}
|
||||
m.updateMatches()
|
||||
}
|
||||
if m.lowK == nil {
|
||||
return vellum.ErrIteratorDone
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close all the underlying Iterators. The first error, if any, will
|
||||
// be returned.
|
||||
func (m *enumerator) Close() error {
|
||||
var rv error
|
||||
for _, itr := range m.itrs {
|
||||
err := itr.Close()
|
||||
if rv == nil {
|
||||
rv = err
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
172
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/intcoder.go
generated
vendored
Normal file
172
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/intcoder.go
generated
vendored
Normal file
@ -0,0 +1,172 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package zap
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
)
|
||||
|
||||
type chunkedIntCoder struct {
|
||||
final []byte
|
||||
chunkSize uint64
|
||||
chunkBuf bytes.Buffer
|
||||
chunkLens []uint64
|
||||
currChunk uint64
|
||||
|
||||
buf []byte
|
||||
}
|
||||
|
||||
// newChunkedIntCoder returns a new chunk int coder which packs data into
|
||||
// chunks based on the provided chunkSize and supports up to the specified
|
||||
// maxDocNum
|
||||
func newChunkedIntCoder(chunkSize uint64, maxDocNum uint64) *chunkedIntCoder {
|
||||
total := maxDocNum/chunkSize + 1
|
||||
rv := &chunkedIntCoder{
|
||||
chunkSize: chunkSize,
|
||||
chunkLens: make([]uint64, total),
|
||||
final: make([]byte, 0, 64),
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
// Reset lets you reuse this chunked int coder. buffers are reset and reused
|
||||
// from previous use. you cannot change the chunk size or max doc num.
|
||||
func (c *chunkedIntCoder) Reset() {
|
||||
c.final = c.final[:0]
|
||||
c.chunkBuf.Reset()
|
||||
c.currChunk = 0
|
||||
for i := range c.chunkLens {
|
||||
c.chunkLens[i] = 0
|
||||
}
|
||||
}
|
||||
|
||||
// Add encodes the provided integers into the correct chunk for the provided
|
||||
// doc num. You MUST call Add() with increasing docNums.
|
||||
func (c *chunkedIntCoder) Add(docNum uint64, vals ...uint64) error {
|
||||
chunk := docNum / c.chunkSize
|
||||
if chunk != c.currChunk {
|
||||
// starting a new chunk
|
||||
c.Close()
|
||||
c.chunkBuf.Reset()
|
||||
c.currChunk = chunk
|
||||
}
|
||||
|
||||
if len(c.buf) < binary.MaxVarintLen64 {
|
||||
c.buf = make([]byte, binary.MaxVarintLen64)
|
||||
}
|
||||
|
||||
for _, val := range vals {
|
||||
wb := binary.PutUvarint(c.buf, val)
|
||||
_, err := c.chunkBuf.Write(c.buf[:wb])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *chunkedIntCoder) AddBytes(docNum uint64, buf []byte) error {
|
||||
chunk := docNum / c.chunkSize
|
||||
if chunk != c.currChunk {
|
||||
// starting a new chunk
|
||||
c.Close()
|
||||
c.chunkBuf.Reset()
|
||||
c.currChunk = chunk
|
||||
}
|
||||
|
||||
_, err := c.chunkBuf.Write(buf)
|
||||
return err
|
||||
}
|
||||
|
||||
// Close indicates you are done calling Add() this allows the final chunk
|
||||
// to be encoded.
|
||||
func (c *chunkedIntCoder) Close() {
|
||||
encodingBytes := c.chunkBuf.Bytes()
|
||||
c.chunkLens[c.currChunk] = uint64(len(encodingBytes))
|
||||
c.final = append(c.final, encodingBytes...)
|
||||
c.currChunk = uint64(cap(c.chunkLens)) // sentinel to detect double close
|
||||
}
|
||||
|
||||
// Write commits all the encoded chunked integers to the provided writer.
|
||||
func (c *chunkedIntCoder) Write(w io.Writer) (int, error) {
|
||||
bufNeeded := binary.MaxVarintLen64 * (1 + len(c.chunkLens))
|
||||
if len(c.buf) < bufNeeded {
|
||||
c.buf = make([]byte, bufNeeded)
|
||||
}
|
||||
buf := c.buf
|
||||
|
||||
// convert the chunk lengths into chunk offsets
|
||||
chunkOffsets := modifyLengthsToEndOffsets(c.chunkLens)
|
||||
|
||||
// write out the number of chunks & each chunk offsets
|
||||
n := binary.PutUvarint(buf, uint64(len(chunkOffsets)))
|
||||
for _, chunkOffset := range chunkOffsets {
|
||||
n += binary.PutUvarint(buf[n:], chunkOffset)
|
||||
}
|
||||
|
||||
tw, err := w.Write(buf[:n])
|
||||
if err != nil {
|
||||
return tw, err
|
||||
}
|
||||
|
||||
// write out the data
|
||||
nw, err := w.Write(c.final)
|
||||
tw += nw
|
||||
if err != nil {
|
||||
return tw, err
|
||||
}
|
||||
return tw, nil
|
||||
}
|
||||
|
||||
func (c *chunkedIntCoder) FinalSize() int {
|
||||
return len(c.final)
|
||||
}
|
||||
|
||||
// modifyLengthsToEndOffsets converts the chunk length array
|
||||
// to a chunk offset array. The readChunkBoundary
|
||||
// will figure out the start and end of every chunk from
|
||||
// these offsets. Starting offset of i'th index is stored
|
||||
// in i-1'th position except for 0'th index and ending offset
|
||||
// is stored at i'th index position.
|
||||
// For 0'th element, starting position is always zero.
|
||||
// eg:
|
||||
// Lens -> 5 5 5 5 => 5 10 15 20
|
||||
// Lens -> 0 5 0 5 => 0 5 5 10
|
||||
// Lens -> 0 0 0 5 => 0 0 0 5
|
||||
// Lens -> 5 0 0 0 => 5 5 5 5
|
||||
// Lens -> 0 5 0 0 => 0 5 5 5
|
||||
// Lens -> 0 0 5 0 => 0 0 5 5
|
||||
func modifyLengthsToEndOffsets(lengths []uint64) []uint64 {
|
||||
var runningOffset uint64
|
||||
var index, i int
|
||||
for i = 1; i <= len(lengths); i++ {
|
||||
runningOffset += lengths[i-1]
|
||||
lengths[index] = runningOffset
|
||||
index++
|
||||
}
|
||||
return lengths
|
||||
}
|
||||
|
||||
func readChunkBoundary(chunk int, offsets []uint64) (uint64, uint64) {
|
||||
var start uint64
|
||||
if chunk > 0 {
|
||||
start = offsets[chunk-1]
|
||||
}
|
||||
return start, offsets[chunk]
|
||||
}
|
801
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go
generated
vendored
Normal file
801
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go
generated
vendored
Normal file
@ -0,0 +1,801 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package zap
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"sort"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/couchbase/vellum"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
var DefaultFileMergerBufferSize = 1024 * 1024
|
||||
|
||||
const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc
|
||||
|
||||
// Merge takes a slice of zap segments and bit masks describing which
|
||||
// documents may be dropped, and creates a new segment containing the
|
||||
// remaining data. This new segment is built at the specified path,
|
||||
// with the provided chunkFactor.
|
||||
func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
|
||||
chunkFactor uint32) ([][]uint64, uint64, error) {
|
||||
segmentBases := make([]*SegmentBase, len(segments))
|
||||
for segmenti, segment := range segments {
|
||||
segmentBases[segmenti] = &segment.SegmentBase
|
||||
}
|
||||
|
||||
return MergeSegmentBases(segmentBases, drops, path, chunkFactor)
|
||||
}
|
||||
|
||||
func MergeSegmentBases(segmentBases []*SegmentBase, drops []*roaring.Bitmap, path string,
|
||||
chunkFactor uint32) ([][]uint64, uint64, error) {
|
||||
flag := os.O_RDWR | os.O_CREATE
|
||||
|
||||
f, err := os.OpenFile(path, flag, 0600)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
cleanup := func() {
|
||||
_ = f.Close()
|
||||
_ = os.Remove(path)
|
||||
}
|
||||
|
||||
// buffer the output
|
||||
br := bufio.NewWriterSize(f, DefaultFileMergerBufferSize)
|
||||
|
||||
// wrap it for counting (tracking offsets)
|
||||
cr := NewCountHashWriter(br)
|
||||
|
||||
newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, _, _, _, err :=
|
||||
MergeToWriter(segmentBases, drops, chunkFactor, cr)
|
||||
if err != nil {
|
||||
cleanup()
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
err = persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset,
|
||||
docValueOffset, chunkFactor, cr.Sum32(), cr)
|
||||
if err != nil {
|
||||
cleanup()
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
err = br.Flush()
|
||||
if err != nil {
|
||||
cleanup()
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
err = f.Sync()
|
||||
if err != nil {
|
||||
cleanup()
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
err = f.Close()
|
||||
if err != nil {
|
||||
cleanup()
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
return newDocNums, uint64(cr.Count()), nil
|
||||
}
|
||||
|
||||
func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap,
|
||||
chunkFactor uint32, cr *CountHashWriter) (
|
||||
newDocNums [][]uint64,
|
||||
numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64,
|
||||
dictLocs []uint64, fieldsInv []string, fieldsMap map[string]uint16,
|
||||
err error) {
|
||||
docValueOffset = uint64(fieldNotUninverted)
|
||||
|
||||
var fieldsSame bool
|
||||
fieldsSame, fieldsInv = mergeFields(segments)
|
||||
fieldsMap = mapFields(fieldsInv)
|
||||
|
||||
numDocs = computeNewDocCount(segments, drops)
|
||||
if numDocs > 0 {
|
||||
storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops,
|
||||
fieldsMap, fieldsInv, fieldsSame, numDocs, cr)
|
||||
if err != nil {
|
||||
return nil, 0, 0, 0, 0, nil, nil, nil, err
|
||||
}
|
||||
|
||||
dictLocs, docValueOffset, err = persistMergedRest(segments, drops,
|
||||
fieldsInv, fieldsMap, fieldsSame,
|
||||
newDocNums, numDocs, chunkFactor, cr)
|
||||
if err != nil {
|
||||
return nil, 0, 0, 0, 0, nil, nil, nil, err
|
||||
}
|
||||
} else {
|
||||
dictLocs = make([]uint64, len(fieldsInv))
|
||||
}
|
||||
|
||||
fieldsIndexOffset, err = persistFields(fieldsInv, cr, dictLocs)
|
||||
if err != nil {
|
||||
return nil, 0, 0, 0, 0, nil, nil, nil, err
|
||||
}
|
||||
|
||||
return newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, dictLocs, fieldsInv, fieldsMap, nil
|
||||
}
|
||||
|
||||
// mapFields takes the fieldsInv list and returns a map of fieldName
|
||||
// to fieldID+1
|
||||
func mapFields(fields []string) map[string]uint16 {
|
||||
rv := make(map[string]uint16, len(fields))
|
||||
for i, fieldName := range fields {
|
||||
rv[fieldName] = uint16(i) + 1
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
// computeNewDocCount determines how many documents will be in the newly
|
||||
// merged segment when obsoleted docs are dropped
|
||||
func computeNewDocCount(segments []*SegmentBase, drops []*roaring.Bitmap) uint64 {
|
||||
var newDocCount uint64
|
||||
for segI, segment := range segments {
|
||||
newDocCount += segment.numDocs
|
||||
if drops[segI] != nil {
|
||||
newDocCount -= drops[segI].GetCardinality()
|
||||
}
|
||||
}
|
||||
return newDocCount
|
||||
}
|
||||
|
||||
func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
|
||||
fieldsInv []string, fieldsMap map[string]uint16, fieldsSame bool,
|
||||
newDocNumsIn [][]uint64, newSegDocCount uint64, chunkFactor uint32,
|
||||
w *CountHashWriter) ([]uint64, uint64, error) {
|
||||
|
||||
var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
|
||||
var bufLoc []uint64
|
||||
|
||||
var postings *PostingsList
|
||||
var postItr *PostingsIterator
|
||||
|
||||
rv := make([]uint64, len(fieldsInv))
|
||||
fieldDvLocsStart := make([]uint64, len(fieldsInv))
|
||||
fieldDvLocsEnd := make([]uint64, len(fieldsInv))
|
||||
|
||||
tfEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
|
||||
locEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
|
||||
|
||||
var vellumBuf bytes.Buffer
|
||||
newVellum, err := vellum.New(&vellumBuf, nil)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
newRoaring := roaring.NewBitmap()
|
||||
|
||||
// for each field
|
||||
for fieldID, fieldName := range fieldsInv {
|
||||
|
||||
// collect FST iterators from all active segments for this field
|
||||
var newDocNums [][]uint64
|
||||
var drops []*roaring.Bitmap
|
||||
var dicts []*Dictionary
|
||||
var itrs []vellum.Iterator
|
||||
|
||||
var segmentsInFocus []*SegmentBase
|
||||
|
||||
for segmentI, segment := range segments {
|
||||
dict, err2 := segment.dictionary(fieldName)
|
||||
if err2 != nil {
|
||||
return nil, 0, err2
|
||||
}
|
||||
if dict != nil && dict.fst != nil {
|
||||
itr, err2 := dict.fst.Iterator(nil, nil)
|
||||
if err2 != nil && err2 != vellum.ErrIteratorDone {
|
||||
return nil, 0, err2
|
||||
}
|
||||
if itr != nil {
|
||||
newDocNums = append(newDocNums, newDocNumsIn[segmentI])
|
||||
if dropsIn[segmentI] != nil && !dropsIn[segmentI].IsEmpty() {
|
||||
drops = append(drops, dropsIn[segmentI])
|
||||
} else {
|
||||
drops = append(drops, nil)
|
||||
}
|
||||
dicts = append(dicts, dict)
|
||||
itrs = append(itrs, itr)
|
||||
segmentsInFocus = append(segmentsInFocus, segment)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var prevTerm []byte
|
||||
|
||||
newRoaring.Clear()
|
||||
|
||||
var lastDocNum, lastFreq, lastNorm uint64
|
||||
|
||||
// determines whether to use "1-hit" encoding optimization
|
||||
// when a term appears in only 1 doc, with no loc info,
|
||||
// has freq of 1, and the docNum fits into 31-bits
|
||||
use1HitEncoding := func(termCardinality uint64) (bool, uint64, uint64) {
|
||||
if termCardinality == uint64(1) && locEncoder.FinalSize() <= 0 {
|
||||
docNum := uint64(newRoaring.Minimum())
|
||||
if under32Bits(docNum) && docNum == lastDocNum && lastFreq == 1 {
|
||||
return true, docNum, lastNorm
|
||||
}
|
||||
}
|
||||
return false, 0, 0
|
||||
}
|
||||
|
||||
finishTerm := func(term []byte) error {
|
||||
if term == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
tfEncoder.Close()
|
||||
locEncoder.Close()
|
||||
|
||||
postingsOffset, err := writePostings(newRoaring,
|
||||
tfEncoder, locEncoder, use1HitEncoding, w, bufMaxVarintLen64)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if postingsOffset > 0 {
|
||||
err = newVellum.Insert(term, postingsOffset)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
newRoaring.Clear()
|
||||
|
||||
tfEncoder.Reset()
|
||||
locEncoder.Reset()
|
||||
|
||||
lastDocNum = 0
|
||||
lastFreq = 0
|
||||
lastNorm = 0
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
enumerator, err := newEnumerator(itrs)
|
||||
|
||||
for err == nil {
|
||||
term, itrI, postingsOffset := enumerator.Current()
|
||||
|
||||
if !bytes.Equal(prevTerm, term) {
|
||||
// if the term changed, write out the info collected
|
||||
// for the previous term
|
||||
err2 := finishTerm(prevTerm)
|
||||
if err2 != nil {
|
||||
return nil, 0, err2
|
||||
}
|
||||
}
|
||||
|
||||
var err2 error
|
||||
postings, err2 = dicts[itrI].postingsListFromOffset(
|
||||
postingsOffset, drops[itrI], postings)
|
||||
if err2 != nil {
|
||||
return nil, 0, err2
|
||||
}
|
||||
|
||||
postItr = postings.iterator(true, true, true, postItr)
|
||||
|
||||
if fieldsSame {
|
||||
// can optimize by copying freq/norm/loc bytes directly
|
||||
lastDocNum, lastFreq, lastNorm, err = mergeTermFreqNormLocsByCopying(
|
||||
term, postItr, newDocNums[itrI], newRoaring,
|
||||
tfEncoder, locEncoder)
|
||||
} else {
|
||||
lastDocNum, lastFreq, lastNorm, bufLoc, err = mergeTermFreqNormLocs(
|
||||
fieldsMap, term, postItr, newDocNums[itrI], newRoaring,
|
||||
tfEncoder, locEncoder, bufLoc)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
prevTerm = prevTerm[:0] // copy to prevTerm in case Next() reuses term mem
|
||||
prevTerm = append(prevTerm, term...)
|
||||
|
||||
err = enumerator.Next()
|
||||
}
|
||||
if err != nil && err != vellum.ErrIteratorDone {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
err = finishTerm(prevTerm)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
dictOffset := uint64(w.Count())
|
||||
|
||||
err = newVellum.Close()
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
vellumData := vellumBuf.Bytes()
|
||||
|
||||
// write out the length of the vellum data
|
||||
n := binary.PutUvarint(bufMaxVarintLen64, uint64(len(vellumData)))
|
||||
_, err = w.Write(bufMaxVarintLen64[:n])
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
// write this vellum to disk
|
||||
_, err = w.Write(vellumData)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
rv[fieldID] = dictOffset
|
||||
|
||||
// get the field doc value offset (start)
|
||||
fieldDvLocsStart[fieldID] = uint64(w.Count())
|
||||
|
||||
// update the field doc values
|
||||
fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), newSegDocCount-1, w, true)
|
||||
|
||||
fdvReadersAvailable := false
|
||||
var dvIterClone *docValueReader
|
||||
for segmentI, segment := range segmentsInFocus {
|
||||
fieldIDPlus1 := uint16(segment.fieldsMap[fieldName])
|
||||
if dvIter, exists := segment.fieldDvReaders[fieldIDPlus1-1]; exists &&
|
||||
dvIter != nil {
|
||||
fdvReadersAvailable = true
|
||||
dvIterClone = dvIter.cloneInto(dvIterClone)
|
||||
err = dvIterClone.iterateAllDocValues(segment, func(docNum uint64, terms []byte) error {
|
||||
if newDocNums[segmentI][docNum] == docDropped {
|
||||
return nil
|
||||
}
|
||||
err := fdvEncoder.Add(newDocNums[segmentI][docNum], terms)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if fdvReadersAvailable {
|
||||
err = fdvEncoder.Close()
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
// persist the doc value details for this field
|
||||
_, err = fdvEncoder.Write()
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
// get the field doc value offset (end)
|
||||
fieldDvLocsEnd[fieldID] = uint64(w.Count())
|
||||
} else {
|
||||
fieldDvLocsStart[fieldID] = fieldNotUninverted
|
||||
fieldDvLocsEnd[fieldID] = fieldNotUninverted
|
||||
}
|
||||
|
||||
// reset vellum buffer and vellum builder
|
||||
vellumBuf.Reset()
|
||||
err = newVellum.Reset(&vellumBuf)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
}
|
||||
|
||||
fieldDvLocsOffset := uint64(w.Count())
|
||||
|
||||
buf := bufMaxVarintLen64
|
||||
for i := 0; i < len(fieldDvLocsStart); i++ {
|
||||
n := binary.PutUvarint(buf, fieldDvLocsStart[i])
|
||||
_, err := w.Write(buf[:n])
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
n = binary.PutUvarint(buf, fieldDvLocsEnd[i])
|
||||
_, err = w.Write(buf[:n])
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
}
|
||||
|
||||
return rv, fieldDvLocsOffset, nil
|
||||
}
|
||||
|
||||
func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *PostingsIterator,
|
||||
newDocNums []uint64, newRoaring *roaring.Bitmap,
|
||||
tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder, bufLoc []uint64) (
|
||||
lastDocNum uint64, lastFreq uint64, lastNorm uint64, bufLocOut []uint64, err error) {
|
||||
next, err := postItr.Next()
|
||||
for next != nil && err == nil {
|
||||
hitNewDocNum := newDocNums[next.Number()]
|
||||
if hitNewDocNum == docDropped {
|
||||
return 0, 0, 0, nil, fmt.Errorf("see hit with dropped docNum")
|
||||
}
|
||||
|
||||
newRoaring.Add(uint32(hitNewDocNum))
|
||||
|
||||
nextFreq := next.Frequency()
|
||||
nextNorm := uint64(math.Float32bits(float32(next.Norm())))
|
||||
|
||||
locs := next.Locations()
|
||||
|
||||
err = tfEncoder.Add(hitNewDocNum,
|
||||
encodeFreqHasLocs(nextFreq, len(locs) > 0), nextNorm)
|
||||
if err != nil {
|
||||
return 0, 0, 0, nil, err
|
||||
}
|
||||
|
||||
if len(locs) > 0 {
|
||||
numBytesLocs := 0
|
||||
for _, loc := range locs {
|
||||
ap := loc.ArrayPositions()
|
||||
numBytesLocs += totalUvarintBytes(uint64(fieldsMap[loc.Field()]-1),
|
||||
loc.Pos(), loc.Start(), loc.End(), uint64(len(ap)), ap)
|
||||
}
|
||||
|
||||
err = locEncoder.Add(hitNewDocNum, uint64(numBytesLocs))
|
||||
if err != nil {
|
||||
return 0, 0, 0, nil, err
|
||||
}
|
||||
|
||||
for _, loc := range locs {
|
||||
ap := loc.ArrayPositions()
|
||||
if cap(bufLoc) < 5+len(ap) {
|
||||
bufLoc = make([]uint64, 0, 5+len(ap))
|
||||
}
|
||||
args := bufLoc[0:5]
|
||||
args[0] = uint64(fieldsMap[loc.Field()] - 1)
|
||||
args[1] = loc.Pos()
|
||||
args[2] = loc.Start()
|
||||
args[3] = loc.End()
|
||||
args[4] = uint64(len(ap))
|
||||
args = append(args, ap...)
|
||||
err = locEncoder.Add(hitNewDocNum, args...)
|
||||
if err != nil {
|
||||
return 0, 0, 0, nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lastDocNum = hitNewDocNum
|
||||
lastFreq = nextFreq
|
||||
lastNorm = nextNorm
|
||||
|
||||
next, err = postItr.Next()
|
||||
}
|
||||
|
||||
return lastDocNum, lastFreq, lastNorm, bufLoc, err
|
||||
}
|
||||
|
||||
func mergeTermFreqNormLocsByCopying(term []byte, postItr *PostingsIterator,
|
||||
newDocNums []uint64, newRoaring *roaring.Bitmap,
|
||||
tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder) (
|
||||
lastDocNum uint64, lastFreq uint64, lastNorm uint64, err error) {
|
||||
nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err :=
|
||||
postItr.nextBytes()
|
||||
for err == nil && len(nextFreqNormBytes) > 0 {
|
||||
hitNewDocNum := newDocNums[nextDocNum]
|
||||
if hitNewDocNum == docDropped {
|
||||
return 0, 0, 0, fmt.Errorf("see hit with dropped doc num")
|
||||
}
|
||||
|
||||
newRoaring.Add(uint32(hitNewDocNum))
|
||||
err = tfEncoder.AddBytes(hitNewDocNum, nextFreqNormBytes)
|
||||
if err != nil {
|
||||
return 0, 0, 0, err
|
||||
}
|
||||
|
||||
if len(nextLocBytes) > 0 {
|
||||
err = locEncoder.AddBytes(hitNewDocNum, nextLocBytes)
|
||||
if err != nil {
|
||||
return 0, 0, 0, err
|
||||
}
|
||||
}
|
||||
|
||||
lastDocNum = hitNewDocNum
|
||||
lastFreq = nextFreq
|
||||
lastNorm = nextNorm
|
||||
|
||||
nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err =
|
||||
postItr.nextBytes()
|
||||
}
|
||||
|
||||
return lastDocNum, lastFreq, lastNorm, err
|
||||
}
|
||||
|
||||
func writePostings(postings *roaring.Bitmap, tfEncoder, locEncoder *chunkedIntCoder,
|
||||
use1HitEncoding func(uint64) (bool, uint64, uint64),
|
||||
w *CountHashWriter, bufMaxVarintLen64 []byte) (
|
||||
offset uint64, err error) {
|
||||
termCardinality := postings.GetCardinality()
|
||||
if termCardinality <= 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
if use1HitEncoding != nil {
|
||||
encodeAs1Hit, docNum1Hit, normBits1Hit := use1HitEncoding(termCardinality)
|
||||
if encodeAs1Hit {
|
||||
return FSTValEncode1Hit(docNum1Hit, normBits1Hit), nil
|
||||
}
|
||||
}
|
||||
|
||||
tfOffset := uint64(w.Count())
|
||||
_, err = tfEncoder.Write(w)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
locOffset := uint64(w.Count())
|
||||
_, err = locEncoder.Write(w)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
postingsOffset := uint64(w.Count())
|
||||
|
||||
n := binary.PutUvarint(bufMaxVarintLen64, tfOffset)
|
||||
_, err = w.Write(bufMaxVarintLen64[:n])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
n = binary.PutUvarint(bufMaxVarintLen64, locOffset)
|
||||
_, err = w.Write(bufMaxVarintLen64[:n])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
_, err = writeRoaringWithLen(postings, w, bufMaxVarintLen64)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return postingsOffset, nil
|
||||
}
|
||||
|
||||
type varintEncoder func(uint64) (int, error)
|
||||
|
||||
func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
|
||||
fieldsMap map[string]uint16, fieldsInv []string, fieldsSame bool, newSegDocCount uint64,
|
||||
w *CountHashWriter) (uint64, [][]uint64, error) {
|
||||
var rv [][]uint64 // The remapped or newDocNums for each segment.
|
||||
|
||||
var newDocNum uint64
|
||||
|
||||
var curr int
|
||||
var data, compressed []byte
|
||||
var metaBuf bytes.Buffer
|
||||
varBuf := make([]byte, binary.MaxVarintLen64)
|
||||
metaEncode := func(val uint64) (int, error) {
|
||||
wb := binary.PutUvarint(varBuf, val)
|
||||
return metaBuf.Write(varBuf[:wb])
|
||||
}
|
||||
|
||||
vals := make([][][]byte, len(fieldsInv))
|
||||
typs := make([][]byte, len(fieldsInv))
|
||||
poss := make([][][]uint64, len(fieldsInv))
|
||||
|
||||
docNumOffsets := make([]uint64, newSegDocCount)
|
||||
|
||||
// for each segment
|
||||
for segI, segment := range segments {
|
||||
segNewDocNums := make([]uint64, segment.numDocs)
|
||||
|
||||
dropsI := drops[segI]
|
||||
|
||||
// optimize when the field mapping is the same across all
|
||||
// segments and there are no deletions, via byte-copying
|
||||
// of stored docs bytes directly to the writer
|
||||
if fieldsSame && (dropsI == nil || dropsI.GetCardinality() == 0) {
|
||||
err := segment.copyStoredDocs(newDocNum, docNumOffsets, w)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
|
||||
for i := uint64(0); i < segment.numDocs; i++ {
|
||||
segNewDocNums[i] = newDocNum
|
||||
newDocNum++
|
||||
}
|
||||
rv = append(rv, segNewDocNums)
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
// for each doc num
|
||||
for docNum := uint64(0); docNum < segment.numDocs; docNum++ {
|
||||
// TODO: roaring's API limits docNums to 32-bits?
|
||||
if dropsI != nil && dropsI.Contains(uint32(docNum)) {
|
||||
segNewDocNums[docNum] = docDropped
|
||||
continue
|
||||
}
|
||||
|
||||
segNewDocNums[docNum] = newDocNum
|
||||
|
||||
curr = 0
|
||||
metaBuf.Reset()
|
||||
data = data[:0]
|
||||
|
||||
// collect all the data
|
||||
for i := 0; i < len(fieldsInv); i++ {
|
||||
vals[i] = vals[i][:0]
|
||||
typs[i] = typs[i][:0]
|
||||
poss[i] = poss[i][:0]
|
||||
}
|
||||
err := segment.VisitDocument(docNum, func(field string, typ byte, value []byte, pos []uint64) bool {
|
||||
fieldID := int(fieldsMap[field]) - 1
|
||||
vals[fieldID] = append(vals[fieldID], value)
|
||||
typs[fieldID] = append(typs[fieldID], typ)
|
||||
poss[fieldID] = append(poss[fieldID], pos)
|
||||
return true
|
||||
})
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
|
||||
// _id field special case optimizes ExternalID() lookups
|
||||
idFieldVal := vals[uint16(0)][0]
|
||||
_, err = metaEncode(uint64(len(idFieldVal)))
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
|
||||
// now walk the non-"_id" fields in order
|
||||
for fieldID := 1; fieldID < len(fieldsInv); fieldID++ {
|
||||
storedFieldValues := vals[fieldID]
|
||||
|
||||
stf := typs[fieldID]
|
||||
spf := poss[fieldID]
|
||||
|
||||
var err2 error
|
||||
curr, data, err2 = persistStoredFieldValues(fieldID,
|
||||
storedFieldValues, stf, spf, curr, metaEncode, data)
|
||||
if err2 != nil {
|
||||
return 0, nil, err2
|
||||
}
|
||||
}
|
||||
|
||||
metaBytes := metaBuf.Bytes()
|
||||
|
||||
compressed = snappy.Encode(compressed[:cap(compressed)], data)
|
||||
|
||||
// record where we're about to start writing
|
||||
docNumOffsets[newDocNum] = uint64(w.Count())
|
||||
|
||||
// write out the meta len and compressed data len
|
||||
_, err = writeUvarints(w,
|
||||
uint64(len(metaBytes)),
|
||||
uint64(len(idFieldVal)+len(compressed)))
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
// now write the meta
|
||||
_, err = w.Write(metaBytes)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
// now write the _id field val (counted as part of the 'compressed' data)
|
||||
_, err = w.Write(idFieldVal)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
// now write the compressed data
|
||||
_, err = w.Write(compressed)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
|
||||
newDocNum++
|
||||
}
|
||||
|
||||
rv = append(rv, segNewDocNums)
|
||||
}
|
||||
|
||||
// return value is the start of the stored index
|
||||
storedIndexOffset := uint64(w.Count())
|
||||
|
||||
// now write out the stored doc index
|
||||
for _, docNumOffset := range docNumOffsets {
|
||||
err := binary.Write(w, binary.BigEndian, docNumOffset)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return storedIndexOffset, rv, nil
|
||||
}
|
||||
|
||||
// copyStoredDocs writes out a segment's stored doc info, optimized by
|
||||
// using a single Write() call for the entire set of bytes. The
|
||||
// newDocNumOffsets is filled with the new offsets for each doc.
|
||||
func (s *SegmentBase) copyStoredDocs(newDocNum uint64, newDocNumOffsets []uint64,
|
||||
w *CountHashWriter) error {
|
||||
if s.numDocs <= 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
indexOffset0, storedOffset0, _, _, _ :=
|
||||
s.getDocStoredOffsets(0) // the segment's first doc
|
||||
|
||||
indexOffsetN, storedOffsetN, readN, metaLenN, dataLenN :=
|
||||
s.getDocStoredOffsets(s.numDocs - 1) // the segment's last doc
|
||||
|
||||
storedOffset0New := uint64(w.Count())
|
||||
|
||||
storedBytes := s.mem[storedOffset0 : storedOffsetN+readN+metaLenN+dataLenN]
|
||||
_, err := w.Write(storedBytes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// remap the storedOffset's for the docs into new offsets relative
|
||||
// to storedOffset0New, filling the given docNumOffsetsOut array
|
||||
for indexOffset := indexOffset0; indexOffset <= indexOffsetN; indexOffset += 8 {
|
||||
storedOffset := binary.BigEndian.Uint64(s.mem[indexOffset : indexOffset+8])
|
||||
storedOffsetNew := storedOffset - storedOffset0 + storedOffset0New
|
||||
newDocNumOffsets[newDocNum] = storedOffsetNew
|
||||
newDocNum += 1
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// mergeFields builds a unified list of fields used across all the
|
||||
// input segments, and computes whether the fields are the same across
|
||||
// segments (which depends on fields to be sorted in the same way
|
||||
// across segments)
|
||||
func mergeFields(segments []*SegmentBase) (bool, []string) {
|
||||
fieldsSame := true
|
||||
|
||||
var segment0Fields []string
|
||||
if len(segments) > 0 {
|
||||
segment0Fields = segments[0].Fields()
|
||||
}
|
||||
|
||||
fieldsExist := map[string]struct{}{}
|
||||
for _, segment := range segments {
|
||||
fields := segment.Fields()
|
||||
for fieldi, field := range fields {
|
||||
fieldsExist[field] = struct{}{}
|
||||
if len(segment0Fields) != len(fields) || segment0Fields[fieldi] != field {
|
||||
fieldsSame = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rv := make([]string, 0, len(fieldsExist))
|
||||
// ensure _id stays first
|
||||
rv = append(rv, "_id")
|
||||
for k := range fieldsExist {
|
||||
if k != "_id" {
|
||||
rv = append(rv, k)
|
||||
}
|
||||
}
|
||||
|
||||
sort.Strings(rv[1:]) // leave _id as first
|
||||
|
||||
return fieldsSame, rv
|
||||
}
|
826
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go
generated
vendored
Normal file
826
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go
generated
vendored
Normal file
@ -0,0 +1,826 @@
|
||||
// Copyright (c) 2018 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package zap
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"math"
|
||||
"sort"
|
||||
"sync"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/couchbase/vellum"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
var NewSegmentBufferNumResultsBump int = 100
|
||||
var NewSegmentBufferNumResultsFactor float64 = 1.0
|
||||
var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0
|
||||
|
||||
// AnalysisResultsToSegmentBase produces an in-memory zap-encoded
|
||||
// SegmentBase from analysis results
|
||||
func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
|
||||
chunkFactor uint32) (*SegmentBase, uint64, error) {
|
||||
s := interimPool.Get().(*interim)
|
||||
|
||||
var br bytes.Buffer
|
||||
if s.lastNumDocs > 0 {
|
||||
// use previous results to initialize the buf with an estimate
|
||||
// size, but note that the interim instance comes from a
|
||||
// global interimPool, so multiple scorch instances indexing
|
||||
// different docs can lead to low quality estimates
|
||||
estimateAvgBytesPerDoc := int(float64(s.lastOutSize/s.lastNumDocs) *
|
||||
NewSegmentBufferNumResultsFactor)
|
||||
estimateNumResults := int(float64(len(results)+NewSegmentBufferNumResultsBump) *
|
||||
NewSegmentBufferAvgBytesPerDocFactor)
|
||||
br.Grow(estimateAvgBytesPerDoc * estimateNumResults)
|
||||
}
|
||||
|
||||
s.results = results
|
||||
s.chunkFactor = chunkFactor
|
||||
s.w = NewCountHashWriter(&br)
|
||||
|
||||
storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets,
|
||||
err := s.convert()
|
||||
if err != nil {
|
||||
return nil, uint64(0), err
|
||||
}
|
||||
|
||||
sb, err := InitSegmentBase(br.Bytes(), s.w.Sum32(), chunkFactor,
|
||||
s.FieldsMap, s.FieldsInv, uint64(len(results)),
|
||||
storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets)
|
||||
|
||||
if err == nil && s.reset() == nil {
|
||||
s.lastNumDocs = len(results)
|
||||
s.lastOutSize = len(br.Bytes())
|
||||
interimPool.Put(s)
|
||||
}
|
||||
|
||||
return sb, uint64(len(br.Bytes())), err
|
||||
}
|
||||
|
||||
var interimPool = sync.Pool{New: func() interface{} { return &interim{} }}
|
||||
|
||||
// interim holds temporary working data used while converting from
|
||||
// analysis results to a zap-encoded segment
|
||||
type interim struct {
|
||||
results []*index.AnalysisResult
|
||||
|
||||
chunkFactor uint32
|
||||
|
||||
w *CountHashWriter
|
||||
|
||||
// FieldsMap adds 1 to field id to avoid zero value issues
|
||||
// name -> field id + 1
|
||||
FieldsMap map[string]uint16
|
||||
|
||||
// FieldsInv is the inverse of FieldsMap
|
||||
// field id -> name
|
||||
FieldsInv []string
|
||||
|
||||
// Term dictionaries for each field
|
||||
// field id -> term -> postings list id + 1
|
||||
Dicts []map[string]uint64
|
||||
|
||||
// Terms for each field, where terms are sorted ascending
|
||||
// field id -> []term
|
||||
DictKeys [][]string
|
||||
|
||||
// Fields whose IncludeDocValues is true
|
||||
// field id -> bool
|
||||
IncludeDocValues []bool
|
||||
|
||||
// postings id -> bitmap of docNums
|
||||
Postings []*roaring.Bitmap
|
||||
|
||||
// postings id -> freq/norm's, one for each docNum in postings
|
||||
FreqNorms [][]interimFreqNorm
|
||||
freqNormsBacking []interimFreqNorm
|
||||
|
||||
// postings id -> locs, one for each freq
|
||||
Locs [][]interimLoc
|
||||
locsBacking []interimLoc
|
||||
|
||||
numTermsPerPostingsList []int // key is postings list id
|
||||
numLocsPerPostingsList []int // key is postings list id
|
||||
|
||||
builder *vellum.Builder
|
||||
builderBuf bytes.Buffer
|
||||
|
||||
metaBuf bytes.Buffer
|
||||
|
||||
tmp0 []byte
|
||||
tmp1 []byte
|
||||
|
||||
lastNumDocs int
|
||||
lastOutSize int
|
||||
}
|
||||
|
||||
func (s *interim) reset() (err error) {
|
||||
s.results = nil
|
||||
s.chunkFactor = 0
|
||||
s.w = nil
|
||||
s.FieldsMap = nil
|
||||
s.FieldsInv = nil
|
||||
for i := range s.Dicts {
|
||||
s.Dicts[i] = nil
|
||||
}
|
||||
s.Dicts = s.Dicts[:0]
|
||||
for i := range s.DictKeys {
|
||||
s.DictKeys[i] = s.DictKeys[i][:0]
|
||||
}
|
||||
s.DictKeys = s.DictKeys[:0]
|
||||
for i := range s.IncludeDocValues {
|
||||
s.IncludeDocValues[i] = false
|
||||
}
|
||||
s.IncludeDocValues = s.IncludeDocValues[:0]
|
||||
for _, idn := range s.Postings {
|
||||
idn.Clear()
|
||||
}
|
||||
s.Postings = s.Postings[:0]
|
||||
s.FreqNorms = s.FreqNorms[:0]
|
||||
for i := range s.freqNormsBacking {
|
||||
s.freqNormsBacking[i] = interimFreqNorm{}
|
||||
}
|
||||
s.freqNormsBacking = s.freqNormsBacking[:0]
|
||||
s.Locs = s.Locs[:0]
|
||||
for i := range s.locsBacking {
|
||||
s.locsBacking[i] = interimLoc{}
|
||||
}
|
||||
s.locsBacking = s.locsBacking[:0]
|
||||
s.numTermsPerPostingsList = s.numTermsPerPostingsList[:0]
|
||||
s.numLocsPerPostingsList = s.numLocsPerPostingsList[:0]
|
||||
s.builderBuf.Reset()
|
||||
if s.builder != nil {
|
||||
err = s.builder.Reset(&s.builderBuf)
|
||||
}
|
||||
s.metaBuf.Reset()
|
||||
s.tmp0 = s.tmp0[:0]
|
||||
s.tmp1 = s.tmp1[:0]
|
||||
s.lastNumDocs = 0
|
||||
s.lastOutSize = 0
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *interim) grabBuf(size int) []byte {
|
||||
buf := s.tmp0
|
||||
if cap(buf) < size {
|
||||
buf = make([]byte, size)
|
||||
s.tmp0 = buf
|
||||
}
|
||||
return buf[0:size]
|
||||
}
|
||||
|
||||
type interimStoredField struct {
|
||||
vals [][]byte
|
||||
typs []byte
|
||||
arrayposs [][]uint64 // array positions
|
||||
}
|
||||
|
||||
type interimFreqNorm struct {
|
||||
freq uint64
|
||||
norm float32
|
||||
numLocs int
|
||||
}
|
||||
|
||||
type interimLoc struct {
|
||||
fieldID uint16
|
||||
pos uint64
|
||||
start uint64
|
||||
end uint64
|
||||
arrayposs []uint64
|
||||
}
|
||||
|
||||
func (s *interim) convert() (uint64, uint64, uint64, []uint64, error) {
|
||||
s.FieldsMap = map[string]uint16{}
|
||||
|
||||
s.getOrDefineField("_id") // _id field is fieldID 0
|
||||
|
||||
for _, result := range s.results {
|
||||
for _, field := range result.Document.CompositeFields {
|
||||
s.getOrDefineField(field.Name())
|
||||
}
|
||||
for _, field := range result.Document.Fields {
|
||||
s.getOrDefineField(field.Name())
|
||||
}
|
||||
}
|
||||
|
||||
sort.Strings(s.FieldsInv[1:]) // keep _id as first field
|
||||
|
||||
for fieldID, fieldName := range s.FieldsInv {
|
||||
s.FieldsMap[fieldName] = uint16(fieldID + 1)
|
||||
}
|
||||
|
||||
if cap(s.IncludeDocValues) >= len(s.FieldsInv) {
|
||||
s.IncludeDocValues = s.IncludeDocValues[:len(s.FieldsInv)]
|
||||
} else {
|
||||
s.IncludeDocValues = make([]bool, len(s.FieldsInv))
|
||||
}
|
||||
|
||||
s.prepareDicts()
|
||||
|
||||
for _, dict := range s.DictKeys {
|
||||
sort.Strings(dict)
|
||||
}
|
||||
|
||||
s.processDocuments()
|
||||
|
||||
storedIndexOffset, err := s.writeStoredFields()
|
||||
if err != nil {
|
||||
return 0, 0, 0, nil, err
|
||||
}
|
||||
|
||||
var fdvIndexOffset uint64
|
||||
var dictOffsets []uint64
|
||||
|
||||
if len(s.results) > 0 {
|
||||
fdvIndexOffset, dictOffsets, err = s.writeDicts()
|
||||
if err != nil {
|
||||
return 0, 0, 0, nil, err
|
||||
}
|
||||
} else {
|
||||
dictOffsets = make([]uint64, len(s.FieldsInv))
|
||||
}
|
||||
|
||||
fieldsIndexOffset, err := persistFields(s.FieldsInv, s.w, dictOffsets)
|
||||
if err != nil {
|
||||
return 0, 0, 0, nil, err
|
||||
}
|
||||
|
||||
return storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets, nil
|
||||
}
|
||||
|
||||
func (s *interim) getOrDefineField(fieldName string) int {
|
||||
fieldIDPlus1, exists := s.FieldsMap[fieldName]
|
||||
if !exists {
|
||||
fieldIDPlus1 = uint16(len(s.FieldsInv) + 1)
|
||||
s.FieldsMap[fieldName] = fieldIDPlus1
|
||||
s.FieldsInv = append(s.FieldsInv, fieldName)
|
||||
|
||||
s.Dicts = append(s.Dicts, make(map[string]uint64))
|
||||
|
||||
n := len(s.DictKeys)
|
||||
if n < cap(s.DictKeys) {
|
||||
s.DictKeys = s.DictKeys[:n+1]
|
||||
s.DictKeys[n] = s.DictKeys[n][:0]
|
||||
} else {
|
||||
s.DictKeys = append(s.DictKeys, []string(nil))
|
||||
}
|
||||
}
|
||||
|
||||
return int(fieldIDPlus1 - 1)
|
||||
}
|
||||
|
||||
// fill Dicts and DictKeys from analysis results
|
||||
func (s *interim) prepareDicts() {
|
||||
var pidNext int
|
||||
|
||||
var totTFs int
|
||||
var totLocs int
|
||||
|
||||
visitField := func(fieldID uint16, tfs analysis.TokenFrequencies) {
|
||||
dict := s.Dicts[fieldID]
|
||||
dictKeys := s.DictKeys[fieldID]
|
||||
|
||||
for term, tf := range tfs {
|
||||
pidPlus1, exists := dict[term]
|
||||
if !exists {
|
||||
pidNext++
|
||||
pidPlus1 = uint64(pidNext)
|
||||
|
||||
dict[term] = pidPlus1
|
||||
dictKeys = append(dictKeys, term)
|
||||
|
||||
s.numTermsPerPostingsList = append(s.numTermsPerPostingsList, 0)
|
||||
s.numLocsPerPostingsList = append(s.numLocsPerPostingsList, 0)
|
||||
}
|
||||
|
||||
pid := pidPlus1 - 1
|
||||
|
||||
s.numTermsPerPostingsList[pid] += 1
|
||||
s.numLocsPerPostingsList[pid] += len(tf.Locations)
|
||||
|
||||
totLocs += len(tf.Locations)
|
||||
}
|
||||
|
||||
totTFs += len(tfs)
|
||||
|
||||
s.DictKeys[fieldID] = dictKeys
|
||||
}
|
||||
|
||||
for _, result := range s.results {
|
||||
// walk each composite field
|
||||
for _, field := range result.Document.CompositeFields {
|
||||
fieldID := uint16(s.getOrDefineField(field.Name()))
|
||||
_, tf := field.Analyze()
|
||||
visitField(fieldID, tf)
|
||||
}
|
||||
|
||||
// walk each field
|
||||
for i, field := range result.Document.Fields {
|
||||
fieldID := uint16(s.getOrDefineField(field.Name()))
|
||||
tf := result.Analyzed[i]
|
||||
visitField(fieldID, tf)
|
||||
}
|
||||
}
|
||||
|
||||
numPostingsLists := pidNext
|
||||
|
||||
if cap(s.Postings) >= numPostingsLists {
|
||||
s.Postings = s.Postings[:numPostingsLists]
|
||||
} else {
|
||||
postings := make([]*roaring.Bitmap, numPostingsLists)
|
||||
copy(postings, s.Postings[:cap(s.Postings)])
|
||||
for i := 0; i < numPostingsLists; i++ {
|
||||
if postings[i] == nil {
|
||||
postings[i] = roaring.New()
|
||||
}
|
||||
}
|
||||
s.Postings = postings
|
||||
}
|
||||
|
||||
if cap(s.FreqNorms) >= numPostingsLists {
|
||||
s.FreqNorms = s.FreqNorms[:numPostingsLists]
|
||||
} else {
|
||||
s.FreqNorms = make([][]interimFreqNorm, numPostingsLists)
|
||||
}
|
||||
|
||||
if cap(s.freqNormsBacking) >= totTFs {
|
||||
s.freqNormsBacking = s.freqNormsBacking[:totTFs]
|
||||
} else {
|
||||
s.freqNormsBacking = make([]interimFreqNorm, totTFs)
|
||||
}
|
||||
|
||||
freqNormsBacking := s.freqNormsBacking
|
||||
for pid, numTerms := range s.numTermsPerPostingsList {
|
||||
s.FreqNorms[pid] = freqNormsBacking[0:0]
|
||||
freqNormsBacking = freqNormsBacking[numTerms:]
|
||||
}
|
||||
|
||||
if cap(s.Locs) >= numPostingsLists {
|
||||
s.Locs = s.Locs[:numPostingsLists]
|
||||
} else {
|
||||
s.Locs = make([][]interimLoc, numPostingsLists)
|
||||
}
|
||||
|
||||
if cap(s.locsBacking) >= totLocs {
|
||||
s.locsBacking = s.locsBacking[:totLocs]
|
||||
} else {
|
||||
s.locsBacking = make([]interimLoc, totLocs)
|
||||
}
|
||||
|
||||
locsBacking := s.locsBacking
|
||||
for pid, numLocs := range s.numLocsPerPostingsList {
|
||||
s.Locs[pid] = locsBacking[0:0]
|
||||
locsBacking = locsBacking[numLocs:]
|
||||
}
|
||||
}
|
||||
|
||||
func (s *interim) processDocuments() {
|
||||
numFields := len(s.FieldsInv)
|
||||
reuseFieldLens := make([]int, numFields)
|
||||
reuseFieldTFs := make([]analysis.TokenFrequencies, numFields)
|
||||
|
||||
for docNum, result := range s.results {
|
||||
for i := 0; i < numFields; i++ { // clear these for reuse
|
||||
reuseFieldLens[i] = 0
|
||||
reuseFieldTFs[i] = nil
|
||||
}
|
||||
|
||||
s.processDocument(uint64(docNum), result,
|
||||
reuseFieldLens, reuseFieldTFs)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *interim) processDocument(docNum uint64,
|
||||
result *index.AnalysisResult,
|
||||
fieldLens []int, fieldTFs []analysis.TokenFrequencies) {
|
||||
visitField := func(fieldID uint16, fieldName string,
|
||||
ln int, tf analysis.TokenFrequencies) {
|
||||
fieldLens[fieldID] += ln
|
||||
|
||||
existingFreqs := fieldTFs[fieldID]
|
||||
if existingFreqs != nil {
|
||||
existingFreqs.MergeAll(fieldName, tf)
|
||||
} else {
|
||||
fieldTFs[fieldID] = tf
|
||||
}
|
||||
}
|
||||
|
||||
// walk each composite field
|
||||
for _, field := range result.Document.CompositeFields {
|
||||
fieldID := uint16(s.getOrDefineField(field.Name()))
|
||||
ln, tf := field.Analyze()
|
||||
visitField(fieldID, field.Name(), ln, tf)
|
||||
}
|
||||
|
||||
// walk each field
|
||||
for i, field := range result.Document.Fields {
|
||||
fieldID := uint16(s.getOrDefineField(field.Name()))
|
||||
ln := result.Length[i]
|
||||
tf := result.Analyzed[i]
|
||||
visitField(fieldID, field.Name(), ln, tf)
|
||||
}
|
||||
|
||||
// now that it's been rolled up into fieldTFs, walk that
|
||||
for fieldID, tfs := range fieldTFs {
|
||||
dict := s.Dicts[fieldID]
|
||||
norm := float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))
|
||||
|
||||
for term, tf := range tfs {
|
||||
pid := dict[term] - 1
|
||||
bs := s.Postings[pid]
|
||||
bs.Add(uint32(docNum))
|
||||
|
||||
s.FreqNorms[pid] = append(s.FreqNorms[pid],
|
||||
interimFreqNorm{
|
||||
freq: uint64(tf.Frequency()),
|
||||
norm: norm,
|
||||
numLocs: len(tf.Locations),
|
||||
})
|
||||
|
||||
if len(tf.Locations) > 0 {
|
||||
locs := s.Locs[pid]
|
||||
|
||||
for _, loc := range tf.Locations {
|
||||
var locf = uint16(fieldID)
|
||||
if loc.Field != "" {
|
||||
locf = uint16(s.getOrDefineField(loc.Field))
|
||||
}
|
||||
var arrayposs []uint64
|
||||
if len(loc.ArrayPositions) > 0 {
|
||||
arrayposs = loc.ArrayPositions
|
||||
}
|
||||
locs = append(locs, interimLoc{
|
||||
fieldID: locf,
|
||||
pos: uint64(loc.Position),
|
||||
start: uint64(loc.Start),
|
||||
end: uint64(loc.End),
|
||||
arrayposs: arrayposs,
|
||||
})
|
||||
}
|
||||
|
||||
s.Locs[pid] = locs
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *interim) writeStoredFields() (
|
||||
storedIndexOffset uint64, err error) {
|
||||
varBuf := make([]byte, binary.MaxVarintLen64)
|
||||
metaEncode := func(val uint64) (int, error) {
|
||||
wb := binary.PutUvarint(varBuf, val)
|
||||
return s.metaBuf.Write(varBuf[:wb])
|
||||
}
|
||||
|
||||
data, compressed := s.tmp0[:0], s.tmp1[:0]
|
||||
defer func() { s.tmp0, s.tmp1 = data, compressed }()
|
||||
|
||||
// keyed by docNum
|
||||
docStoredOffsets := make([]uint64, len(s.results))
|
||||
|
||||
// keyed by fieldID, for the current doc in the loop
|
||||
docStoredFields := map[uint16]interimStoredField{}
|
||||
|
||||
for docNum, result := range s.results {
|
||||
for fieldID := range docStoredFields { // reset for next doc
|
||||
delete(docStoredFields, fieldID)
|
||||
}
|
||||
|
||||
for _, field := range result.Document.Fields {
|
||||
fieldID := uint16(s.getOrDefineField(field.Name()))
|
||||
|
||||
opts := field.Options()
|
||||
|
||||
if opts.IsStored() {
|
||||
isf := docStoredFields[fieldID]
|
||||
isf.vals = append(isf.vals, field.Value())
|
||||
isf.typs = append(isf.typs, encodeFieldType(field))
|
||||
isf.arrayposs = append(isf.arrayposs, field.ArrayPositions())
|
||||
docStoredFields[fieldID] = isf
|
||||
}
|
||||
|
||||
if opts.IncludeDocValues() {
|
||||
s.IncludeDocValues[fieldID] = true
|
||||
}
|
||||
}
|
||||
|
||||
var curr int
|
||||
|
||||
s.metaBuf.Reset()
|
||||
data = data[:0]
|
||||
|
||||
// _id field special case optimizes ExternalID() lookups
|
||||
idFieldVal := docStoredFields[uint16(0)].vals[0]
|
||||
_, err = metaEncode(uint64(len(idFieldVal)))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// handle non-"_id" fields
|
||||
for fieldID := 1; fieldID < len(s.FieldsInv); fieldID++ {
|
||||
isf, exists := docStoredFields[uint16(fieldID)]
|
||||
if exists {
|
||||
curr, data, err = persistStoredFieldValues(
|
||||
fieldID, isf.vals, isf.typs, isf.arrayposs,
|
||||
curr, metaEncode, data)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
metaBytes := s.metaBuf.Bytes()
|
||||
|
||||
compressed = snappy.Encode(compressed[:cap(compressed)], data)
|
||||
|
||||
docStoredOffsets[docNum] = uint64(s.w.Count())
|
||||
|
||||
_, err := writeUvarints(s.w,
|
||||
uint64(len(metaBytes)),
|
||||
uint64(len(idFieldVal)+len(compressed)))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
_, err = s.w.Write(metaBytes)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
_, err = s.w.Write(idFieldVal)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
_, err = s.w.Write(compressed)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
storedIndexOffset = uint64(s.w.Count())
|
||||
|
||||
for _, docStoredOffset := range docStoredOffsets {
|
||||
err = binary.Write(s.w, binary.BigEndian, docStoredOffset)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
return storedIndexOffset, nil
|
||||
}
|
||||
|
||||
func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err error) {
|
||||
dictOffsets = make([]uint64, len(s.FieldsInv))
|
||||
|
||||
fdvOffsetsStart := make([]uint64, len(s.FieldsInv))
|
||||
fdvOffsetsEnd := make([]uint64, len(s.FieldsInv))
|
||||
|
||||
buf := s.grabBuf(binary.MaxVarintLen64)
|
||||
|
||||
tfEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1))
|
||||
locEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1))
|
||||
fdvEncoder := newChunkedContentCoder(uint64(s.chunkFactor), uint64(len(s.results)-1), s.w, false)
|
||||
|
||||
var docTermMap [][]byte
|
||||
|
||||
if s.builder == nil {
|
||||
s.builder, err = vellum.New(&s.builderBuf, nil)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
for fieldID, terms := range s.DictKeys {
|
||||
if cap(docTermMap) < len(s.results) {
|
||||
docTermMap = make([][]byte, len(s.results))
|
||||
} else {
|
||||
docTermMap = docTermMap[0:len(s.results)]
|
||||
for docNum := range docTermMap { // reset the docTermMap
|
||||
docTermMap[docNum] = docTermMap[docNum][:0]
|
||||
}
|
||||
}
|
||||
|
||||
dict := s.Dicts[fieldID]
|
||||
|
||||
for _, term := range terms { // terms are already sorted
|
||||
pid := dict[term] - 1
|
||||
|
||||
postingsBS := s.Postings[pid]
|
||||
|
||||
freqNorms := s.FreqNorms[pid]
|
||||
freqNormOffset := 0
|
||||
|
||||
locs := s.Locs[pid]
|
||||
locOffset := 0
|
||||
|
||||
postingsItr := postingsBS.Iterator()
|
||||
for postingsItr.HasNext() {
|
||||
docNum := uint64(postingsItr.Next())
|
||||
|
||||
freqNorm := freqNorms[freqNormOffset]
|
||||
|
||||
err = tfEncoder.Add(docNum,
|
||||
encodeFreqHasLocs(freqNorm.freq, freqNorm.numLocs > 0),
|
||||
uint64(math.Float32bits(freqNorm.norm)))
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
|
||||
if freqNorm.numLocs > 0 {
|
||||
numBytesLocs := 0
|
||||
for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] {
|
||||
numBytesLocs += totalUvarintBytes(
|
||||
uint64(loc.fieldID), loc.pos, loc.start, loc.end,
|
||||
uint64(len(loc.arrayposs)), loc.arrayposs)
|
||||
}
|
||||
|
||||
err = locEncoder.Add(docNum, uint64(numBytesLocs))
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
|
||||
for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] {
|
||||
err = locEncoder.Add(docNum,
|
||||
uint64(loc.fieldID), loc.pos, loc.start, loc.end,
|
||||
uint64(len(loc.arrayposs)))
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
|
||||
err = locEncoder.Add(docNum, loc.arrayposs...)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
locOffset += freqNorm.numLocs
|
||||
}
|
||||
|
||||
freqNormOffset++
|
||||
|
||||
docTermMap[docNum] = append(
|
||||
append(docTermMap[docNum], term...),
|
||||
termSeparator)
|
||||
}
|
||||
|
||||
tfEncoder.Close()
|
||||
locEncoder.Close()
|
||||
|
||||
postingsOffset, err :=
|
||||
writePostings(postingsBS, tfEncoder, locEncoder, nil, s.w, buf)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
|
||||
if postingsOffset > uint64(0) {
|
||||
err = s.builder.Insert([]byte(term), postingsOffset)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
tfEncoder.Reset()
|
||||
locEncoder.Reset()
|
||||
}
|
||||
|
||||
err = s.builder.Close()
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
|
||||
// record where this dictionary starts
|
||||
dictOffsets[fieldID] = uint64(s.w.Count())
|
||||
|
||||
vellumData := s.builderBuf.Bytes()
|
||||
|
||||
// write out the length of the vellum data
|
||||
n := binary.PutUvarint(buf, uint64(len(vellumData)))
|
||||
_, err = s.w.Write(buf[:n])
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
|
||||
// write this vellum to disk
|
||||
_, err = s.w.Write(vellumData)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
|
||||
// reset vellum for reuse
|
||||
s.builderBuf.Reset()
|
||||
|
||||
err = s.builder.Reset(&s.builderBuf)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
|
||||
// write the field doc values
|
||||
if s.IncludeDocValues[fieldID] {
|
||||
for docNum, docTerms := range docTermMap {
|
||||
if len(docTerms) > 0 {
|
||||
err = fdvEncoder.Add(uint64(docNum), docTerms)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
err = fdvEncoder.Close()
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
|
||||
fdvOffsetsStart[fieldID] = uint64(s.w.Count())
|
||||
|
||||
_, err = fdvEncoder.Write()
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
|
||||
fdvOffsetsEnd[fieldID] = uint64(s.w.Count())
|
||||
|
||||
fdvEncoder.Reset()
|
||||
} else {
|
||||
fdvOffsetsStart[fieldID] = fieldNotUninverted
|
||||
fdvOffsetsEnd[fieldID] = fieldNotUninverted
|
||||
}
|
||||
}
|
||||
|
||||
fdvIndexOffset = uint64(s.w.Count())
|
||||
|
||||
for i := 0; i < len(fdvOffsetsStart); i++ {
|
||||
n := binary.PutUvarint(buf, fdvOffsetsStart[i])
|
||||
_, err := s.w.Write(buf[:n])
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
n = binary.PutUvarint(buf, fdvOffsetsEnd[i])
|
||||
_, err = s.w.Write(buf[:n])
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return fdvIndexOffset, dictOffsets, nil
|
||||
}
|
||||
|
||||
func encodeFieldType(f document.Field) byte {
|
||||
fieldType := byte('x')
|
||||
switch f.(type) {
|
||||
case *document.TextField:
|
||||
fieldType = 't'
|
||||
case *document.NumericField:
|
||||
fieldType = 'n'
|
||||
case *document.DateTimeField:
|
||||
fieldType = 'd'
|
||||
case *document.BooleanField:
|
||||
fieldType = 'b'
|
||||
case *document.GeoPointField:
|
||||
fieldType = 'g'
|
||||
case *document.CompositeField:
|
||||
fieldType = 'c'
|
||||
}
|
||||
return fieldType
|
||||
}
|
||||
|
||||
// returns the total # of bytes needed to encode the given uint64's
|
||||
// into binary.PutUVarint() encoding
|
||||
func totalUvarintBytes(a, b, c, d, e uint64, more []uint64) (n int) {
|
||||
n = numUvarintBytes(a)
|
||||
n += numUvarintBytes(b)
|
||||
n += numUvarintBytes(c)
|
||||
n += numUvarintBytes(d)
|
||||
n += numUvarintBytes(e)
|
||||
for _, v := range more {
|
||||
n += numUvarintBytes(v)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// returns # of bytes needed to encode x in binary.PutUvarint() encoding
|
||||
func numUvarintBytes(x uint64) (n int) {
|
||||
for x >= 0x80 {
|
||||
x >>= 7
|
||||
n++
|
||||
}
|
||||
return n + 1
|
||||
}
|
790
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go
generated
vendored
Normal file
790
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go
generated
vendored
Normal file
@ -0,0 +1,790 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package zap
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"reflect"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizePostingsList int
|
||||
var reflectStaticSizePostingsIterator int
|
||||
var reflectStaticSizePosting int
|
||||
var reflectStaticSizeLocation int
|
||||
|
||||
func init() {
|
||||
var pl PostingsList
|
||||
reflectStaticSizePostingsList = int(reflect.TypeOf(pl).Size())
|
||||
var pi PostingsIterator
|
||||
reflectStaticSizePostingsIterator = int(reflect.TypeOf(pi).Size())
|
||||
var p Posting
|
||||
reflectStaticSizePosting = int(reflect.TypeOf(p).Size())
|
||||
var l Location
|
||||
reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
|
||||
}
|
||||
|
||||
// FST or vellum value (uint64) encoding is determined by the top two
|
||||
// highest-order or most significant bits...
|
||||
//
|
||||
// encoding : MSB
|
||||
// name : 63 62 61...to...bit #0 (LSB)
|
||||
// ----------+---+---+---------------------------------------------------
|
||||
// general : 0 | 0 | 62-bits of postingsOffset.
|
||||
// ~ : 0 | 1 | reserved for future.
|
||||
// 1-hit : 1 | 0 | 31-bits of positive float31 norm | 31-bits docNum.
|
||||
// ~ : 1 | 1 | reserved for future.
|
||||
//
|
||||
// Encoding "general" is able to handle all cases, where the
|
||||
// postingsOffset points to more information about the postings for
|
||||
// the term.
|
||||
//
|
||||
// Encoding "1-hit" is used to optimize a commonly seen case when a
|
||||
// term has only a single hit. For example, a term in the _id field
|
||||
// will have only 1 hit. The "1-hit" encoding is used for a term
|
||||
// in a field when...
|
||||
//
|
||||
// - term vector info is disabled for that field;
|
||||
// - and, the term appears in only a single doc for that field;
|
||||
// - and, the term's freq is exactly 1 in that single doc for that field;
|
||||
// - and, the docNum must fit into 31-bits;
|
||||
//
|
||||
// Otherwise, the "general" encoding is used instead.
|
||||
//
|
||||
// In the "1-hit" encoding, the field in that single doc may have
|
||||
// other terms, which is supported in the "1-hit" encoding by the
|
||||
// positive float31 norm.
|
||||
|
||||
const FSTValEncodingMask = uint64(0xc000000000000000)
|
||||
const FSTValEncodingGeneral = uint64(0x0000000000000000)
|
||||
const FSTValEncoding1Hit = uint64(0x8000000000000000)
|
||||
|
||||
func FSTValEncode1Hit(docNum uint64, normBits uint64) uint64 {
|
||||
return FSTValEncoding1Hit | ((mask31Bits & normBits) << 31) | (mask31Bits & docNum)
|
||||
}
|
||||
|
||||
func FSTValDecode1Hit(v uint64) (docNum uint64, normBits uint64) {
|
||||
return (mask31Bits & v), (mask31Bits & (v >> 31))
|
||||
}
|
||||
|
||||
const mask31Bits = uint64(0x000000007fffffff)
|
||||
|
||||
func under32Bits(x uint64) bool {
|
||||
return x <= mask31Bits
|
||||
}
|
||||
|
||||
const docNum1HitFinished = math.MaxUint64
|
||||
|
||||
// PostingsList is an in-memory represenation of a postings list
|
||||
type PostingsList struct {
|
||||
sb *SegmentBase
|
||||
postingsOffset uint64
|
||||
freqOffset uint64
|
||||
locOffset uint64
|
||||
postings *roaring.Bitmap
|
||||
except *roaring.Bitmap
|
||||
|
||||
// when normBits1Hit != 0, then this postings list came from a
|
||||
// 1-hit encoding, and only the docNum1Hit & normBits1Hit apply
|
||||
docNum1Hit uint64
|
||||
normBits1Hit uint64
|
||||
}
|
||||
|
||||
// represents an immutable, empty postings list
|
||||
var emptyPostingsList = &PostingsList{}
|
||||
|
||||
func (p *PostingsList) Size() int {
|
||||
sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr
|
||||
|
||||
if p.except != nil {
|
||||
sizeInBytes += int(p.except.GetSizeInBytes())
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (p *PostingsList) OrInto(receiver *roaring.Bitmap) {
|
||||
if p.normBits1Hit != 0 {
|
||||
receiver.Add(uint32(p.docNum1Hit))
|
||||
return
|
||||
}
|
||||
|
||||
if p.postings != nil {
|
||||
receiver.Or(p.postings)
|
||||
}
|
||||
}
|
||||
|
||||
// Iterator returns an iterator for this postings list
|
||||
func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocs bool,
|
||||
prealloc segment.PostingsIterator) segment.PostingsIterator {
|
||||
if p.normBits1Hit == 0 && p.postings == nil {
|
||||
return emptyPostingsIterator
|
||||
}
|
||||
|
||||
var preallocPI *PostingsIterator
|
||||
pi, ok := prealloc.(*PostingsIterator)
|
||||
if ok && pi != nil {
|
||||
preallocPI = pi
|
||||
}
|
||||
if preallocPI == emptyPostingsIterator {
|
||||
preallocPI = nil
|
||||
}
|
||||
|
||||
return p.iterator(includeFreq, includeNorm, includeLocs, preallocPI)
|
||||
}
|
||||
|
||||
func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
|
||||
rv *PostingsIterator) *PostingsIterator {
|
||||
if rv == nil {
|
||||
rv = &PostingsIterator{}
|
||||
} else {
|
||||
freqNormReader := rv.freqNormReader
|
||||
if freqNormReader != nil {
|
||||
freqNormReader.Reset([]byte(nil))
|
||||
}
|
||||
|
||||
locReader := rv.locReader
|
||||
if locReader != nil {
|
||||
locReader.Reset([]byte(nil))
|
||||
}
|
||||
|
||||
freqChunkOffsets := rv.freqChunkOffsets[:0]
|
||||
locChunkOffsets := rv.locChunkOffsets[:0]
|
||||
|
||||
nextLocs := rv.nextLocs[:0]
|
||||
nextSegmentLocs := rv.nextSegmentLocs[:0]
|
||||
|
||||
buf := rv.buf
|
||||
|
||||
*rv = PostingsIterator{} // clear the struct
|
||||
|
||||
rv.freqNormReader = freqNormReader
|
||||
rv.locReader = locReader
|
||||
|
||||
rv.freqChunkOffsets = freqChunkOffsets
|
||||
rv.locChunkOffsets = locChunkOffsets
|
||||
|
||||
rv.nextLocs = nextLocs
|
||||
rv.nextSegmentLocs = nextSegmentLocs
|
||||
|
||||
rv.buf = buf
|
||||
}
|
||||
rv.postings = p
|
||||
|
||||
if p.normBits1Hit != 0 {
|
||||
// "1-hit" encoding
|
||||
rv.docNum1Hit = p.docNum1Hit
|
||||
rv.normBits1Hit = p.normBits1Hit
|
||||
|
||||
if p.except != nil && p.except.Contains(uint32(rv.docNum1Hit)) {
|
||||
rv.docNum1Hit = docNum1HitFinished
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
// "general" encoding, check if empty
|
||||
if p.postings == nil {
|
||||
return rv
|
||||
}
|
||||
|
||||
var n uint64
|
||||
var read int
|
||||
|
||||
// prepare the freq chunk details
|
||||
rv.includeFreqNorm = includeFreq || includeNorm
|
||||
if rv.includeFreqNorm {
|
||||
var numFreqChunks uint64
|
||||
numFreqChunks, read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
|
||||
n += uint64(read)
|
||||
if cap(rv.freqChunkOffsets) >= int(numFreqChunks) {
|
||||
rv.freqChunkOffsets = rv.freqChunkOffsets[:int(numFreqChunks)]
|
||||
} else {
|
||||
rv.freqChunkOffsets = make([]uint64, int(numFreqChunks))
|
||||
}
|
||||
for i := 0; i < int(numFreqChunks); i++ {
|
||||
rv.freqChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
|
||||
n += uint64(read)
|
||||
}
|
||||
rv.freqChunkStart = p.freqOffset + n
|
||||
}
|
||||
|
||||
// prepare the loc chunk details
|
||||
rv.includeLocs = includeLocs
|
||||
if rv.includeLocs {
|
||||
n = 0
|
||||
var numLocChunks uint64
|
||||
numLocChunks, read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
|
||||
n += uint64(read)
|
||||
if cap(rv.locChunkOffsets) >= int(numLocChunks) {
|
||||
rv.locChunkOffsets = rv.locChunkOffsets[:int(numLocChunks)]
|
||||
} else {
|
||||
rv.locChunkOffsets = make([]uint64, int(numLocChunks))
|
||||
}
|
||||
for i := 0; i < int(numLocChunks); i++ {
|
||||
rv.locChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
|
||||
n += uint64(read)
|
||||
}
|
||||
rv.locChunkStart = p.locOffset + n
|
||||
}
|
||||
|
||||
rv.all = p.postings.Iterator()
|
||||
if p.except != nil {
|
||||
rv.ActualBM = roaring.AndNot(p.postings, p.except)
|
||||
rv.Actual = rv.ActualBM.Iterator()
|
||||
} else {
|
||||
rv.ActualBM = p.postings
|
||||
rv.Actual = p.postings.Iterator()
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
// Count returns the number of items on this postings list
|
||||
func (p *PostingsList) Count() uint64 {
|
||||
var n uint64
|
||||
if p.normBits1Hit != 0 {
|
||||
n = 1
|
||||
} else if p.postings != nil {
|
||||
n = p.postings.GetCardinality()
|
||||
}
|
||||
var e uint64
|
||||
if p.except != nil {
|
||||
e = p.except.GetCardinality()
|
||||
}
|
||||
if n <= e {
|
||||
return 0
|
||||
}
|
||||
return n - e
|
||||
}
|
||||
|
||||
func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
|
||||
rv.postingsOffset = postingsOffset
|
||||
|
||||
// handle "1-hit" encoding special case
|
||||
if rv.postingsOffset&FSTValEncodingMask == FSTValEncoding1Hit {
|
||||
return rv.init1Hit(postingsOffset)
|
||||
}
|
||||
|
||||
// read the location of the freq/norm details
|
||||
var n uint64
|
||||
var read int
|
||||
|
||||
rv.freqOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+binary.MaxVarintLen64])
|
||||
n += uint64(read)
|
||||
|
||||
rv.locOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
|
||||
n += uint64(read)
|
||||
|
||||
var postingsLen uint64
|
||||
postingsLen, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
|
||||
n += uint64(read)
|
||||
|
||||
roaringBytes := d.sb.mem[postingsOffset+n : postingsOffset+n+postingsLen]
|
||||
|
||||
if rv.postings == nil {
|
||||
rv.postings = roaring.NewBitmap()
|
||||
}
|
||||
_, err := rv.postings.FromBuffer(roaringBytes)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error loading roaring bitmap: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rv *PostingsList) init1Hit(fstVal uint64) error {
|
||||
docNum, normBits := FSTValDecode1Hit(fstVal)
|
||||
|
||||
rv.docNum1Hit = docNum
|
||||
rv.normBits1Hit = normBits
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// PostingsIterator provides a way to iterate through the postings list
|
||||
type PostingsIterator struct {
|
||||
postings *PostingsList
|
||||
all roaring.IntIterable
|
||||
Actual roaring.IntIterable
|
||||
ActualBM *roaring.Bitmap
|
||||
|
||||
currChunk uint32
|
||||
currChunkFreqNorm []byte
|
||||
currChunkLoc []byte
|
||||
|
||||
freqNormReader *bytes.Reader
|
||||
locReader *bytes.Reader
|
||||
|
||||
freqChunkOffsets []uint64
|
||||
freqChunkStart uint64
|
||||
|
||||
locChunkOffsets []uint64
|
||||
locChunkStart uint64
|
||||
|
||||
next Posting // reused across Next() calls
|
||||
nextLocs []Location // reused across Next() calls
|
||||
nextSegmentLocs []segment.Location // reused across Next() calls
|
||||
|
||||
docNum1Hit uint64
|
||||
normBits1Hit uint64
|
||||
|
||||
buf []byte
|
||||
|
||||
includeFreqNorm bool
|
||||
includeLocs bool
|
||||
}
|
||||
|
||||
var emptyPostingsIterator = &PostingsIterator{}
|
||||
|
||||
func (i *PostingsIterator) Size() int {
|
||||
sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr +
|
||||
len(i.currChunkFreqNorm) +
|
||||
len(i.currChunkLoc) +
|
||||
len(i.freqChunkOffsets)*size.SizeOfUint64 +
|
||||
len(i.locChunkOffsets)*size.SizeOfUint64 +
|
||||
i.next.Size()
|
||||
|
||||
for _, entry := range i.nextLocs {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (i *PostingsIterator) loadChunk(chunk int) error {
|
||||
if i.includeFreqNorm {
|
||||
if chunk >= len(i.freqChunkOffsets) {
|
||||
return fmt.Errorf("tried to load freq chunk that doesn't exist %d/(%d)",
|
||||
chunk, len(i.freqChunkOffsets))
|
||||
}
|
||||
|
||||
end, start := i.freqChunkStart, i.freqChunkStart
|
||||
s, e := readChunkBoundary(chunk, i.freqChunkOffsets)
|
||||
start += s
|
||||
end += e
|
||||
i.currChunkFreqNorm = i.postings.sb.mem[start:end]
|
||||
if i.freqNormReader == nil {
|
||||
i.freqNormReader = bytes.NewReader(i.currChunkFreqNorm)
|
||||
} else {
|
||||
i.freqNormReader.Reset(i.currChunkFreqNorm)
|
||||
}
|
||||
}
|
||||
|
||||
if i.includeLocs {
|
||||
if chunk >= len(i.locChunkOffsets) {
|
||||
return fmt.Errorf("tried to load loc chunk that doesn't exist %d/(%d)",
|
||||
chunk, len(i.locChunkOffsets))
|
||||
}
|
||||
|
||||
end, start := i.locChunkStart, i.locChunkStart
|
||||
s, e := readChunkBoundary(chunk, i.locChunkOffsets)
|
||||
start += s
|
||||
end += e
|
||||
i.currChunkLoc = i.postings.sb.mem[start:end]
|
||||
if i.locReader == nil {
|
||||
i.locReader = bytes.NewReader(i.currChunkLoc)
|
||||
} else {
|
||||
i.locReader.Reset(i.currChunkLoc)
|
||||
}
|
||||
}
|
||||
|
||||
i.currChunk = uint32(chunk)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) {
|
||||
if i.normBits1Hit != 0 {
|
||||
return 1, i.normBits1Hit, false, nil
|
||||
}
|
||||
|
||||
freqHasLocs, err := binary.ReadUvarint(i.freqNormReader)
|
||||
if err != nil {
|
||||
return 0, 0, false, fmt.Errorf("error reading frequency: %v", err)
|
||||
}
|
||||
freq, hasLocs := decodeFreqHasLocs(freqHasLocs)
|
||||
|
||||
normBits, err := binary.ReadUvarint(i.freqNormReader)
|
||||
if err != nil {
|
||||
return 0, 0, false, fmt.Errorf("error reading norm: %v", err)
|
||||
}
|
||||
|
||||
return freq, normBits, hasLocs, err
|
||||
}
|
||||
|
||||
func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 {
|
||||
rv := freq << 1
|
||||
if hasLocs {
|
||||
rv = rv | 0x01 // 0'th LSB encodes whether there are locations
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) {
|
||||
freq := freqHasLocs >> 1
|
||||
hasLocs := freqHasLocs&0x01 != 0
|
||||
return freq, hasLocs
|
||||
}
|
||||
|
||||
// readLocation processes all the integers on the stream representing a single
|
||||
// location. if you care about it, pass in a non-nil location struct, and we
|
||||
// will fill it. if you don't care about it, pass in nil and we safely consume
|
||||
// the contents.
|
||||
func (i *PostingsIterator) readLocation(l *Location) error {
|
||||
// read off field
|
||||
fieldID, err := binary.ReadUvarint(i.locReader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading location field: %v", err)
|
||||
}
|
||||
// read off pos
|
||||
pos, err := binary.ReadUvarint(i.locReader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading location pos: %v", err)
|
||||
}
|
||||
// read off start
|
||||
start, err := binary.ReadUvarint(i.locReader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading location start: %v", err)
|
||||
}
|
||||
// read off end
|
||||
end, err := binary.ReadUvarint(i.locReader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading location end: %v", err)
|
||||
}
|
||||
// read off num array pos
|
||||
numArrayPos, err := binary.ReadUvarint(i.locReader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading location num array pos: %v", err)
|
||||
}
|
||||
|
||||
// group these together for less branching
|
||||
if l != nil {
|
||||
l.field = i.postings.sb.fieldsInv[fieldID]
|
||||
l.pos = pos
|
||||
l.start = start
|
||||
l.end = end
|
||||
if cap(l.ap) < int(numArrayPos) {
|
||||
l.ap = make([]uint64, int(numArrayPos))
|
||||
} else {
|
||||
l.ap = l.ap[:int(numArrayPos)]
|
||||
}
|
||||
}
|
||||
|
||||
// read off array positions
|
||||
for k := 0; k < int(numArrayPos); k++ {
|
||||
ap, err := binary.ReadUvarint(i.locReader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading array position: %v", err)
|
||||
}
|
||||
if l != nil {
|
||||
l.ap[k] = ap
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Next returns the next posting on the postings list, or nil at the end
|
||||
func (i *PostingsIterator) Next() (segment.Posting, error) {
|
||||
return i.nextAtOrAfter(0)
|
||||
}
|
||||
|
||||
// Advance returns the posting at the specified docNum or it is not present
|
||||
// the next posting, or if the end is reached, nil
|
||||
func (i *PostingsIterator) Advance(docNum uint64) (segment.Posting, error) {
|
||||
return i.nextAtOrAfter(docNum)
|
||||
}
|
||||
|
||||
// Next returns the next posting on the postings list, or nil at the end
|
||||
func (i *PostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.Posting, error) {
|
||||
docNum, exists, err := i.nextDocNumAtOrAfter(atOrAfter)
|
||||
if err != nil || !exists {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
i.next = Posting{} // clear the struct
|
||||
rv := &i.next
|
||||
rv.docNum = docNum
|
||||
|
||||
if !i.includeFreqNorm {
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
var normBits uint64
|
||||
var hasLocs bool
|
||||
|
||||
rv.freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rv.norm = math.Float32frombits(uint32(normBits))
|
||||
|
||||
if i.includeLocs && hasLocs {
|
||||
// prepare locations into reused slices, where we assume
|
||||
// rv.freq >= "number of locs", since in a composite field,
|
||||
// some component fields might have their IncludeTermVector
|
||||
// flags disabled while other component fields are enabled
|
||||
if cap(i.nextLocs) >= int(rv.freq) {
|
||||
i.nextLocs = i.nextLocs[0:rv.freq]
|
||||
} else {
|
||||
i.nextLocs = make([]Location, rv.freq, rv.freq*2)
|
||||
}
|
||||
if cap(i.nextSegmentLocs) < int(rv.freq) {
|
||||
i.nextSegmentLocs = make([]segment.Location, rv.freq, rv.freq*2)
|
||||
}
|
||||
rv.locs = i.nextSegmentLocs[:0]
|
||||
|
||||
numLocsBytes, err := binary.ReadUvarint(i.locReader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error reading location numLocsBytes: %v", err)
|
||||
}
|
||||
|
||||
j := 0
|
||||
startBytesRemaining := i.locReader.Len() // # bytes remaining in the locReader
|
||||
for startBytesRemaining-i.locReader.Len() < int(numLocsBytes) {
|
||||
err := i.readLocation(&i.nextLocs[j])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv.locs = append(rv.locs, &i.nextLocs[j])
|
||||
j++
|
||||
}
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
var freqHasLocs1Hit = encodeFreqHasLocs(1, false)
|
||||
|
||||
// nextBytes returns the docNum and the encoded freq & loc bytes for
|
||||
// the next posting
|
||||
func (i *PostingsIterator) nextBytes() (
|
||||
docNumOut uint64, freq uint64, normBits uint64,
|
||||
bytesFreqNorm []byte, bytesLoc []byte, err error) {
|
||||
docNum, exists, err := i.nextDocNumAtOrAfter(0)
|
||||
if err != nil || !exists {
|
||||
return 0, 0, 0, nil, nil, err
|
||||
}
|
||||
|
||||
if i.normBits1Hit != 0 {
|
||||
if i.buf == nil {
|
||||
i.buf = make([]byte, binary.MaxVarintLen64*2)
|
||||
}
|
||||
n := binary.PutUvarint(i.buf, freqHasLocs1Hit)
|
||||
n += binary.PutUvarint(i.buf[n:], i.normBits1Hit)
|
||||
return docNum, uint64(1), i.normBits1Hit, i.buf[:n], nil, nil
|
||||
}
|
||||
|
||||
startFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
|
||||
|
||||
var hasLocs bool
|
||||
|
||||
freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
|
||||
if err != nil {
|
||||
return 0, 0, 0, nil, nil, err
|
||||
}
|
||||
|
||||
endFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
|
||||
bytesFreqNorm = i.currChunkFreqNorm[startFreqNorm:endFreqNorm]
|
||||
|
||||
if hasLocs {
|
||||
startLoc := len(i.currChunkLoc) - i.locReader.Len()
|
||||
|
||||
numLocsBytes, err := binary.ReadUvarint(i.locReader)
|
||||
if err != nil {
|
||||
return 0, 0, 0, nil, nil,
|
||||
fmt.Errorf("error reading location nextBytes numLocs: %v", err)
|
||||
}
|
||||
|
||||
// skip over all the location bytes
|
||||
_, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent)
|
||||
if err != nil {
|
||||
return 0, 0, 0, nil, nil, err
|
||||
}
|
||||
|
||||
endLoc := len(i.currChunkLoc) - i.locReader.Len()
|
||||
bytesLoc = i.currChunkLoc[startLoc:endLoc]
|
||||
}
|
||||
|
||||
return docNum, freq, normBits, bytesFreqNorm, bytesLoc, nil
|
||||
}
|
||||
|
||||
// nextDocNum returns the next docNum on the postings list, and also
|
||||
// sets up the currChunk / loc related fields of the iterator.
|
||||
func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool, error) {
|
||||
if i.normBits1Hit != 0 {
|
||||
if i.docNum1Hit == docNum1HitFinished {
|
||||
return 0, false, nil
|
||||
}
|
||||
if i.docNum1Hit < atOrAfter {
|
||||
// advanced past our 1-hit
|
||||
i.docNum1Hit = docNum1HitFinished // consume our 1-hit docNum
|
||||
return 0, false, nil
|
||||
}
|
||||
docNum := i.docNum1Hit
|
||||
i.docNum1Hit = docNum1HitFinished // consume our 1-hit docNum
|
||||
return docNum, true, nil
|
||||
}
|
||||
|
||||
if i.Actual == nil || !i.Actual.HasNext() {
|
||||
return 0, false, nil
|
||||
}
|
||||
|
||||
n := i.Actual.Next()
|
||||
for uint64(n) < atOrAfter && i.Actual.HasNext() {
|
||||
n = i.Actual.Next()
|
||||
}
|
||||
if uint64(n) < atOrAfter {
|
||||
// couldn't find anything
|
||||
return 0, false, nil
|
||||
}
|
||||
allN := i.all.Next()
|
||||
|
||||
nChunk := n / i.postings.sb.chunkFactor
|
||||
allNChunk := allN / i.postings.sb.chunkFactor
|
||||
|
||||
// n is the next actual hit (excluding some postings), and
|
||||
// allN is the next hit in the full postings, and
|
||||
// if they don't match, move 'all' forwards until they do
|
||||
for allN != n {
|
||||
// in the same chunk, so move the freq/norm/loc decoders forward
|
||||
if i.includeFreqNorm && allNChunk == nChunk {
|
||||
if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
|
||||
err := i.loadChunk(int(nChunk))
|
||||
if err != nil {
|
||||
return 0, false, fmt.Errorf("error loading chunk: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// read off freq/offsets even though we don't care about them
|
||||
_, _, hasLocs, err := i.readFreqNormHasLocs()
|
||||
if err != nil {
|
||||
return 0, false, err
|
||||
}
|
||||
|
||||
if i.includeLocs && hasLocs {
|
||||
numLocsBytes, err := binary.ReadUvarint(i.locReader)
|
||||
if err != nil {
|
||||
return 0, false, fmt.Errorf("error reading location numLocsBytes: %v", err)
|
||||
}
|
||||
|
||||
// skip over all the location bytes
|
||||
_, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent)
|
||||
if err != nil {
|
||||
return 0, false, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
allN = i.all.Next()
|
||||
allNChunk = allN / i.postings.sb.chunkFactor
|
||||
}
|
||||
|
||||
if i.includeFreqNorm && (i.currChunk != nChunk || i.currChunkFreqNorm == nil) {
|
||||
err := i.loadChunk(int(nChunk))
|
||||
if err != nil {
|
||||
return 0, false, fmt.Errorf("error loading chunk: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return uint64(n), true, nil
|
||||
}
|
||||
|
||||
// Posting is a single entry in a postings list
|
||||
type Posting struct {
|
||||
docNum uint64
|
||||
freq uint64
|
||||
norm float32
|
||||
locs []segment.Location
|
||||
}
|
||||
|
||||
func (p *Posting) Size() int {
|
||||
sizeInBytes := reflectStaticSizePosting
|
||||
|
||||
for _, entry := range p.locs {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
// Number returns the document number of this posting in this segment
|
||||
func (p *Posting) Number() uint64 {
|
||||
return p.docNum
|
||||
}
|
||||
|
||||
// Frequency returns the frequence of occurance of this term in this doc/field
|
||||
func (p *Posting) Frequency() uint64 {
|
||||
return p.freq
|
||||
}
|
||||
|
||||
// Norm returns the normalization factor for this posting
|
||||
func (p *Posting) Norm() float64 {
|
||||
return float64(p.norm)
|
||||
}
|
||||
|
||||
// Locations returns the location information for each occurance
|
||||
func (p *Posting) Locations() []segment.Location {
|
||||
return p.locs
|
||||
}
|
||||
|
||||
// Location represents the location of a single occurance
|
||||
type Location struct {
|
||||
field string
|
||||
pos uint64
|
||||
start uint64
|
||||
end uint64
|
||||
ap []uint64
|
||||
}
|
||||
|
||||
func (l *Location) Size() int {
|
||||
return reflectStaticSizeLocation +
|
||||
len(l.field) +
|
||||
len(l.ap)*size.SizeOfUint64
|
||||
}
|
||||
|
||||
// Field returns the name of the field (useful in composite fields to know
|
||||
// which original field the value came from)
|
||||
func (l *Location) Field() string {
|
||||
return l.field
|
||||
}
|
||||
|
||||
// Start returns the start byte offset of this occurance
|
||||
func (l *Location) Start() uint64 {
|
||||
return l.start
|
||||
}
|
||||
|
||||
// End returns the end byte offset of this occurance
|
||||
func (l *Location) End() uint64 {
|
||||
return l.end
|
||||
}
|
||||
|
||||
// Pos returns the 1-based phrase position of this occurance
|
||||
func (l *Location) Pos() uint64 {
|
||||
return l.pos
|
||||
}
|
||||
|
||||
// ArrayPositions returns the array position vector associated with this occurance
|
||||
func (l *Location) ArrayPositions() []uint64 {
|
||||
return l.ap
|
||||
}
|
43
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/read.go
generated
vendored
Normal file
43
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/read.go
generated
vendored
Normal file
@ -0,0 +1,43 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package zap
|
||||
|
||||
import "encoding/binary"
|
||||
|
||||
func (s *SegmentBase) getDocStoredMetaAndCompressed(docNum uint64) ([]byte, []byte) {
|
||||
_, storedOffset, n, metaLen, dataLen := s.getDocStoredOffsets(docNum)
|
||||
|
||||
meta := s.mem[storedOffset+n : storedOffset+n+metaLen]
|
||||
data := s.mem[storedOffset+n+metaLen : storedOffset+n+metaLen+dataLen]
|
||||
|
||||
return meta, data
|
||||
}
|
||||
|
||||
func (s *SegmentBase) getDocStoredOffsets(docNum uint64) (
|
||||
uint64, uint64, uint64, uint64, uint64) {
|
||||
indexOffset := s.storedIndexOffset + (8 * docNum)
|
||||
|
||||
storedOffset := binary.BigEndian.Uint64(s.mem[indexOffset : indexOffset+8])
|
||||
|
||||
var n uint64
|
||||
|
||||
metaLen, read := binary.Uvarint(s.mem[storedOffset : storedOffset+binary.MaxVarintLen64])
|
||||
n += uint64(read)
|
||||
|
||||
dataLen, read := binary.Uvarint(s.mem[storedOffset+n : storedOffset+n+binary.MaxVarintLen64])
|
||||
n += uint64(read)
|
||||
|
||||
return indexOffset, storedOffset, n, metaLen, dataLen
|
||||
}
|
534
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go
generated
vendored
Normal file
534
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go
generated
vendored
Normal file
@ -0,0 +1,534 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package zap
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"reflect"
|
||||
"sync"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
"github.com/couchbase/vellum"
|
||||
mmap "github.com/edsrzf/mmap-go"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
var reflectStaticSizeSegmentBase int
|
||||
|
||||
func init() {
|
||||
var sb SegmentBase
|
||||
reflectStaticSizeSegmentBase = int(reflect.TypeOf(sb).Size())
|
||||
}
|
||||
|
||||
// Open returns a zap impl of a segment
|
||||
func Open(path string) (segment.Segment, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
mm, err := mmap.Map(f, mmap.RDONLY, 0)
|
||||
if err != nil {
|
||||
// mmap failed, try to close the file
|
||||
_ = f.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rv := &Segment{
|
||||
SegmentBase: SegmentBase{
|
||||
mem: mm[0 : len(mm)-FooterSize],
|
||||
fieldsMap: make(map[string]uint16),
|
||||
fieldDvReaders: make(map[uint16]*docValueReader),
|
||||
},
|
||||
f: f,
|
||||
mm: mm,
|
||||
path: path,
|
||||
refs: 1,
|
||||
}
|
||||
rv.SegmentBase.updateSize()
|
||||
|
||||
err = rv.loadConfig()
|
||||
if err != nil {
|
||||
_ = rv.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = rv.loadFields()
|
||||
if err != nil {
|
||||
_ = rv.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = rv.loadDvReaders()
|
||||
if err != nil {
|
||||
_ = rv.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
// SegmentBase is a memory only, read-only implementation of the
|
||||
// segment.Segment interface, using zap's data representation.
|
||||
type SegmentBase struct {
|
||||
mem []byte
|
||||
memCRC uint32
|
||||
chunkFactor uint32
|
||||
fieldsMap map[string]uint16 // fieldName -> fieldID+1
|
||||
fieldsInv []string // fieldID -> fieldName
|
||||
numDocs uint64
|
||||
storedIndexOffset uint64
|
||||
fieldsIndexOffset uint64
|
||||
docValueOffset uint64
|
||||
dictLocs []uint64
|
||||
fieldDvReaders map[uint16]*docValueReader // naive chunk cache per field
|
||||
size uint64
|
||||
}
|
||||
|
||||
func (sb *SegmentBase) Size() int {
|
||||
return int(sb.size)
|
||||
}
|
||||
|
||||
func (sb *SegmentBase) updateSize() {
|
||||
sizeInBytes := reflectStaticSizeSegmentBase +
|
||||
cap(sb.mem)
|
||||
|
||||
// fieldsMap
|
||||
for k, _ := range sb.fieldsMap {
|
||||
sizeInBytes += (len(k) + size.SizeOfString) + size.SizeOfUint16
|
||||
}
|
||||
|
||||
// fieldsInv, dictLocs
|
||||
for _, entry := range sb.fieldsInv {
|
||||
sizeInBytes += len(entry) + size.SizeOfString
|
||||
}
|
||||
sizeInBytes += len(sb.dictLocs) * size.SizeOfUint64
|
||||
|
||||
// fieldDvReaders
|
||||
for _, v := range sb.fieldDvReaders {
|
||||
sizeInBytes += size.SizeOfUint16 + size.SizeOfPtr
|
||||
if v != nil {
|
||||
sizeInBytes += v.size()
|
||||
}
|
||||
}
|
||||
|
||||
sb.size = uint64(sizeInBytes)
|
||||
}
|
||||
|
||||
func (sb *SegmentBase) AddRef() {}
|
||||
func (sb *SegmentBase) DecRef() (err error) { return nil }
|
||||
func (sb *SegmentBase) Close() (err error) { return nil }
|
||||
|
||||
// Segment implements a persisted segment.Segment interface, by
|
||||
// embedding an mmap()'ed SegmentBase.
|
||||
type Segment struct {
|
||||
SegmentBase
|
||||
|
||||
f *os.File
|
||||
mm mmap.MMap
|
||||
path string
|
||||
version uint32
|
||||
crc uint32
|
||||
|
||||
m sync.Mutex // Protects the fields that follow.
|
||||
refs int64
|
||||
}
|
||||
|
||||
func (s *Segment) Size() int {
|
||||
// 8 /* size of file pointer */
|
||||
// 4 /* size of version -> uint32 */
|
||||
// 4 /* size of crc -> uint32 */
|
||||
sizeOfUints := 16
|
||||
|
||||
sizeInBytes := (len(s.path) + size.SizeOfString) + sizeOfUints
|
||||
|
||||
// mutex, refs -> int64
|
||||
sizeInBytes += 16
|
||||
|
||||
// do not include the mmap'ed part
|
||||
return sizeInBytes + s.SegmentBase.Size() - cap(s.mem)
|
||||
}
|
||||
|
||||
func (s *Segment) AddRef() {
|
||||
s.m.Lock()
|
||||
s.refs++
|
||||
s.m.Unlock()
|
||||
}
|
||||
|
||||
func (s *Segment) DecRef() (err error) {
|
||||
s.m.Lock()
|
||||
s.refs--
|
||||
if s.refs == 0 {
|
||||
err = s.closeActual()
|
||||
}
|
||||
s.m.Unlock()
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *Segment) loadConfig() error {
|
||||
crcOffset := len(s.mm) - 4
|
||||
s.crc = binary.BigEndian.Uint32(s.mm[crcOffset : crcOffset+4])
|
||||
|
||||
verOffset := crcOffset - 4
|
||||
s.version = binary.BigEndian.Uint32(s.mm[verOffset : verOffset+4])
|
||||
if s.version != Version {
|
||||
return fmt.Errorf("unsupported version %d", s.version)
|
||||
}
|
||||
|
||||
chunkOffset := verOffset - 4
|
||||
s.chunkFactor = binary.BigEndian.Uint32(s.mm[chunkOffset : chunkOffset+4])
|
||||
|
||||
docValueOffset := chunkOffset - 8
|
||||
s.docValueOffset = binary.BigEndian.Uint64(s.mm[docValueOffset : docValueOffset+8])
|
||||
|
||||
fieldsIndexOffset := docValueOffset - 8
|
||||
s.fieldsIndexOffset = binary.BigEndian.Uint64(s.mm[fieldsIndexOffset : fieldsIndexOffset+8])
|
||||
|
||||
storedIndexOffset := fieldsIndexOffset - 8
|
||||
s.storedIndexOffset = binary.BigEndian.Uint64(s.mm[storedIndexOffset : storedIndexOffset+8])
|
||||
|
||||
numDocsOffset := storedIndexOffset - 8
|
||||
s.numDocs = binary.BigEndian.Uint64(s.mm[numDocsOffset : numDocsOffset+8])
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *SegmentBase) loadFields() error {
|
||||
// NOTE for now we assume the fields index immediately preceeds
|
||||
// the footer, and if this changes, need to adjust accordingly (or
|
||||
// store explicit length), where s.mem was sliced from s.mm in Open().
|
||||
fieldsIndexEnd := uint64(len(s.mem))
|
||||
|
||||
// iterate through fields index
|
||||
var fieldID uint64
|
||||
for s.fieldsIndexOffset+(8*fieldID) < fieldsIndexEnd {
|
||||
addr := binary.BigEndian.Uint64(s.mem[s.fieldsIndexOffset+(8*fieldID) : s.fieldsIndexOffset+(8*fieldID)+8])
|
||||
|
||||
dictLoc, read := binary.Uvarint(s.mem[addr:fieldsIndexEnd])
|
||||
n := uint64(read)
|
||||
s.dictLocs = append(s.dictLocs, dictLoc)
|
||||
|
||||
var nameLen uint64
|
||||
nameLen, read = binary.Uvarint(s.mem[addr+n : fieldsIndexEnd])
|
||||
n += uint64(read)
|
||||
|
||||
name := string(s.mem[addr+n : addr+n+nameLen])
|
||||
s.fieldsInv = append(s.fieldsInv, name)
|
||||
s.fieldsMap[name] = uint16(fieldID + 1)
|
||||
|
||||
fieldID++
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Dictionary returns the term dictionary for the specified field
|
||||
func (s *SegmentBase) Dictionary(field string) (segment.TermDictionary, error) {
|
||||
dict, err := s.dictionary(field)
|
||||
if err == nil && dict == nil {
|
||||
return &segment.EmptyDictionary{}, nil
|
||||
}
|
||||
return dict, err
|
||||
}
|
||||
|
||||
func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
|
||||
fieldIDPlus1 := sb.fieldsMap[field]
|
||||
if fieldIDPlus1 > 0 {
|
||||
rv = &Dictionary{
|
||||
sb: sb,
|
||||
field: field,
|
||||
fieldID: fieldIDPlus1 - 1,
|
||||
}
|
||||
|
||||
dictStart := sb.dictLocs[rv.fieldID]
|
||||
if dictStart > 0 {
|
||||
// read the length of the vellum data
|
||||
vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64])
|
||||
fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
|
||||
if fstBytes != nil {
|
||||
rv.fst, err = vellum.Load(fstBytes)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err)
|
||||
}
|
||||
rv.fstReader, err = rv.fst.Reader()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("dictionary field %s vellum Reader err: %v", field, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
// visitDocumentCtx holds data structures that are reusable across
|
||||
// multiple VisitDocument() calls to avoid memory allocations
|
||||
type visitDocumentCtx struct {
|
||||
buf []byte
|
||||
reader bytes.Reader
|
||||
arrayPos []uint64
|
||||
}
|
||||
|
||||
var visitDocumentCtxPool = sync.Pool{
|
||||
New: func() interface{} {
|
||||
reuse := &visitDocumentCtx{}
|
||||
return reuse
|
||||
},
|
||||
}
|
||||
|
||||
// VisitDocument invokes the DocFieldValueVistor for each stored field
|
||||
// for the specified doc number
|
||||
func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
|
||||
// first make sure this is a valid number in this segment
|
||||
if num < s.numDocs {
|
||||
vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx)
|
||||
|
||||
meta, compressed := s.getDocStoredMetaAndCompressed(num)
|
||||
|
||||
vdc.reader.Reset(meta)
|
||||
|
||||
// handle _id field special case
|
||||
idFieldValLen, err := binary.ReadUvarint(&vdc.reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
idFieldVal := compressed[:idFieldValLen]
|
||||
|
||||
keepGoing := visitor("_id", byte('t'), idFieldVal, nil)
|
||||
if !keepGoing {
|
||||
visitDocumentCtxPool.Put(vdc)
|
||||
return nil
|
||||
}
|
||||
|
||||
// handle non-"_id" fields
|
||||
compressed = compressed[idFieldValLen:]
|
||||
|
||||
uncompressed, err := snappy.Decode(vdc.buf[:cap(vdc.buf)], compressed)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for keepGoing {
|
||||
field, err := binary.ReadUvarint(&vdc.reader)
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
typ, err := binary.ReadUvarint(&vdc.reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
offset, err := binary.ReadUvarint(&vdc.reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
l, err := binary.ReadUvarint(&vdc.reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
numap, err := binary.ReadUvarint(&vdc.reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var arrayPos []uint64
|
||||
if numap > 0 {
|
||||
if cap(vdc.arrayPos) < int(numap) {
|
||||
vdc.arrayPos = make([]uint64, numap)
|
||||
}
|
||||
arrayPos = vdc.arrayPos[:numap]
|
||||
for i := 0; i < int(numap); i++ {
|
||||
ap, err := binary.ReadUvarint(&vdc.reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
arrayPos[i] = ap
|
||||
}
|
||||
}
|
||||
|
||||
value := uncompressed[offset : offset+l]
|
||||
keepGoing = visitor(s.fieldsInv[field], byte(typ), value, arrayPos)
|
||||
}
|
||||
|
||||
vdc.buf = uncompressed
|
||||
visitDocumentCtxPool.Put(vdc)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DocID returns the value of the _id field for the given docNum
|
||||
func (s *SegmentBase) DocID(num uint64) ([]byte, error) {
|
||||
if num >= s.numDocs {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx)
|
||||
|
||||
meta, compressed := s.getDocStoredMetaAndCompressed(num)
|
||||
|
||||
vdc.reader.Reset(meta)
|
||||
|
||||
// handle _id field special case
|
||||
idFieldValLen, err := binary.ReadUvarint(&vdc.reader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
idFieldVal := compressed[:idFieldValLen]
|
||||
|
||||
visitDocumentCtxPool.Put(vdc)
|
||||
|
||||
return idFieldVal, nil
|
||||
}
|
||||
|
||||
// Count returns the number of documents in this segment.
|
||||
func (s *SegmentBase) Count() uint64 {
|
||||
return s.numDocs
|
||||
}
|
||||
|
||||
// DocNumbers returns a bitset corresponding to the doc numbers of all the
|
||||
// provided _id strings
|
||||
func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
|
||||
rv := roaring.New()
|
||||
|
||||
if len(s.fieldsMap) > 0 {
|
||||
idDict, err := s.dictionary("_id")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
postingsList := emptyPostingsList
|
||||
for _, id := range ids {
|
||||
postingsList, err = idDict.postingsList([]byte(id), nil, postingsList)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
postingsList.OrInto(rv)
|
||||
}
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
// Fields returns the field names used in this segment
|
||||
func (s *SegmentBase) Fields() []string {
|
||||
return s.fieldsInv
|
||||
}
|
||||
|
||||
// Path returns the path of this segment on disk
|
||||
func (s *Segment) Path() string {
|
||||
return s.path
|
||||
}
|
||||
|
||||
// Close releases all resources associated with this segment
|
||||
func (s *Segment) Close() (err error) {
|
||||
return s.DecRef()
|
||||
}
|
||||
|
||||
func (s *Segment) closeActual() (err error) {
|
||||
if s.mm != nil {
|
||||
err = s.mm.Unmap()
|
||||
}
|
||||
// try to close file even if unmap failed
|
||||
if s.f != nil {
|
||||
err2 := s.f.Close()
|
||||
if err == nil {
|
||||
// try to return first error
|
||||
err = err2
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// some helpers i started adding for the command-line utility
|
||||
|
||||
// Data returns the underlying mmaped data slice
|
||||
func (s *Segment) Data() []byte {
|
||||
return s.mm
|
||||
}
|
||||
|
||||
// CRC returns the CRC value stored in the file footer
|
||||
func (s *Segment) CRC() uint32 {
|
||||
return s.crc
|
||||
}
|
||||
|
||||
// Version returns the file version in the file footer
|
||||
func (s *Segment) Version() uint32 {
|
||||
return s.version
|
||||
}
|
||||
|
||||
// ChunkFactor returns the chunk factor in the file footer
|
||||
func (s *Segment) ChunkFactor() uint32 {
|
||||
return s.chunkFactor
|
||||
}
|
||||
|
||||
// FieldsIndexOffset returns the fields index offset in the file footer
|
||||
func (s *Segment) FieldsIndexOffset() uint64 {
|
||||
return s.fieldsIndexOffset
|
||||
}
|
||||
|
||||
// StoredIndexOffset returns the stored value index offset in the file footer
|
||||
func (s *Segment) StoredIndexOffset() uint64 {
|
||||
return s.storedIndexOffset
|
||||
}
|
||||
|
||||
// DocValueOffset returns the docValue offset in the file footer
|
||||
func (s *Segment) DocValueOffset() uint64 {
|
||||
return s.docValueOffset
|
||||
}
|
||||
|
||||
// NumDocs returns the number of documents in the file footer
|
||||
func (s *Segment) NumDocs() uint64 {
|
||||
return s.numDocs
|
||||
}
|
||||
|
||||
// DictAddr is a helper function to compute the file offset where the
|
||||
// dictionary is stored for the specified field.
|
||||
func (s *Segment) DictAddr(field string) (uint64, error) {
|
||||
fieldIDPlus1, ok := s.fieldsMap[field]
|
||||
if !ok {
|
||||
return 0, fmt.Errorf("no such field '%s'", field)
|
||||
}
|
||||
|
||||
return s.dictLocs[fieldIDPlus1-1], nil
|
||||
}
|
||||
|
||||
func (s *SegmentBase) loadDvReaders() error {
|
||||
if s.docValueOffset == fieldNotUninverted {
|
||||
return nil
|
||||
}
|
||||
|
||||
var read uint64
|
||||
for fieldID, field := range s.fieldsInv {
|
||||
var fieldLocStart, fieldLocEnd uint64
|
||||
var n int
|
||||
fieldLocStart, n = binary.Uvarint(s.mem[s.docValueOffset+read : s.docValueOffset+read+binary.MaxVarintLen64])
|
||||
if n <= 0 {
|
||||
return fmt.Errorf("loadDvReaders: failed to read the docvalue offset start for field %d", fieldID)
|
||||
}
|
||||
read += uint64(n)
|
||||
fieldLocEnd, n = binary.Uvarint(s.mem[s.docValueOffset+read : s.docValueOffset+read+binary.MaxVarintLen64])
|
||||
if n <= 0 {
|
||||
return fmt.Errorf("loadDvReaders: failed to read the docvalue offset end for field %d", fieldID)
|
||||
}
|
||||
read += uint64(n)
|
||||
|
||||
s.fieldDvReaders[uint16(fieldID)], _ = s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd)
|
||||
}
|
||||
return nil
|
||||
}
|
145
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/write.go
generated
vendored
Normal file
145
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/write.go
generated
vendored
Normal file
@ -0,0 +1,145 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package zap
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"io"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
)
|
||||
|
||||
// writes out the length of the roaring bitmap in bytes as varint
|
||||
// then writes out the roaring bitmap itself
|
||||
func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer,
|
||||
reuseBufVarint []byte) (int, error) {
|
||||
buf, err := r.ToBytes()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
var tw int
|
||||
|
||||
// write out the length
|
||||
n := binary.PutUvarint(reuseBufVarint, uint64(len(buf)))
|
||||
nw, err := w.Write(reuseBufVarint[:n])
|
||||
tw += nw
|
||||
if err != nil {
|
||||
return tw, err
|
||||
}
|
||||
|
||||
// write out the roaring bytes
|
||||
nw, err = w.Write(buf)
|
||||
tw += nw
|
||||
if err != nil {
|
||||
return tw, err
|
||||
}
|
||||
|
||||
return tw, nil
|
||||
}
|
||||
|
||||
func persistFields(fieldsInv []string, w *CountHashWriter, dictLocs []uint64) (uint64, error) {
|
||||
var rv uint64
|
||||
var fieldsOffsets []uint64
|
||||
|
||||
for fieldID, fieldName := range fieldsInv {
|
||||
// record start of this field
|
||||
fieldsOffsets = append(fieldsOffsets, uint64(w.Count()))
|
||||
|
||||
// write out the dict location and field name length
|
||||
_, err := writeUvarints(w, dictLocs[fieldID], uint64(len(fieldName)))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// write out the field name
|
||||
_, err = w.Write([]byte(fieldName))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
// now write out the fields index
|
||||
rv = uint64(w.Count())
|
||||
for fieldID := range fieldsInv {
|
||||
err := binary.Write(w, binary.BigEndian, fieldsOffsets[fieldID])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
// FooterSize is the size of the footer record in bytes
|
||||
// crc + ver + chunk + field offset + stored offset + num docs + docValueOffset
|
||||
const FooterSize = 4 + 4 + 4 + 8 + 8 + 8 + 8
|
||||
|
||||
func persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64,
|
||||
chunkFactor uint32, crcBeforeFooter uint32, writerIn io.Writer) error {
|
||||
w := NewCountHashWriter(writerIn)
|
||||
w.crc = crcBeforeFooter
|
||||
|
||||
// write out the number of docs
|
||||
err := binary.Write(w, binary.BigEndian, numDocs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// write out the stored field index location:
|
||||
err = binary.Write(w, binary.BigEndian, storedIndexOffset)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// write out the field index location
|
||||
err = binary.Write(w, binary.BigEndian, fieldsIndexOffset)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// write out the fieldDocValue location
|
||||
err = binary.Write(w, binary.BigEndian, docValueOffset)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// write out 32-bit chunk factor
|
||||
err = binary.Write(w, binary.BigEndian, chunkFactor)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// write out 32-bit version
|
||||
err = binary.Write(w, binary.BigEndian, Version)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// write out CRC-32 of everything upto but not including this CRC
|
||||
err = binary.Write(w, binary.BigEndian, w.crc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func writeUvarints(w io.Writer, vals ...uint64) (tw int, err error) {
|
||||
buf := make([]byte, binary.MaxVarintLen64)
|
||||
for _, val := range vals {
|
||||
n := binary.PutUvarint(buf, val)
|
||||
var nw int
|
||||
nw, err = w.Write(buf[:n])
|
||||
tw += nw
|
||||
if err != nil {
|
||||
return tw, err
|
||||
}
|
||||
}
|
||||
return tw, err
|
||||
}
|
638
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go
generated
vendored
Normal file
638
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go
generated
vendored
Normal file
@ -0,0 +1,638 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"container/heap"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sort"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
)
|
||||
|
||||
type asynchSegmentResult struct {
|
||||
dictItr segment.DictionaryIterator
|
||||
|
||||
index int
|
||||
docs *roaring.Bitmap
|
||||
|
||||
postings segment.PostingsList
|
||||
|
||||
err error
|
||||
}
|
||||
|
||||
var reflectStaticSizeIndexSnapshot int
|
||||
|
||||
func init() {
|
||||
var is interface{} = IndexSnapshot{}
|
||||
reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size())
|
||||
}
|
||||
|
||||
type IndexSnapshot struct {
|
||||
parent *Scorch
|
||||
segment []*SegmentSnapshot
|
||||
offsets []uint64
|
||||
internal map[string][]byte
|
||||
epoch uint64
|
||||
size uint64
|
||||
creator string
|
||||
|
||||
m sync.Mutex // Protects the fields that follow.
|
||||
refs int64
|
||||
|
||||
m2 sync.Mutex // Protects the fields that follow.
|
||||
fieldTFRs map[string][]*IndexSnapshotTermFieldReader // keyed by field, recycled TFR's
|
||||
fieldDicts map[string][]segment.TermDictionary // keyed by field, recycled dicts
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) Segments() []*SegmentSnapshot {
|
||||
return i.segment
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) Internal() map[string][]byte {
|
||||
return i.internal
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) AddRef() {
|
||||
i.m.Lock()
|
||||
i.refs++
|
||||
i.m.Unlock()
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) DecRef() (err error) {
|
||||
i.m.Lock()
|
||||
i.refs--
|
||||
if i.refs == 0 {
|
||||
for _, s := range i.segment {
|
||||
if s != nil {
|
||||
err2 := s.segment.DecRef()
|
||||
if err == nil {
|
||||
err = err2
|
||||
}
|
||||
}
|
||||
}
|
||||
if i.parent != nil {
|
||||
go i.parent.AddEligibleForRemoval(i.epoch)
|
||||
}
|
||||
}
|
||||
i.m.Unlock()
|
||||
return err
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) Close() error {
|
||||
return i.DecRef()
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) Size() int {
|
||||
return int(i.size)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) updateSize() {
|
||||
i.size += uint64(reflectStaticSizeIndexSnapshot)
|
||||
for _, s := range i.segment {
|
||||
i.size += uint64(s.Size())
|
||||
}
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
|
||||
|
||||
results := make(chan *asynchSegmentResult)
|
||||
for index, segment := range i.segment {
|
||||
go func(index int, segment *SegmentSnapshot) {
|
||||
dict, err := segment.Dictionary(field)
|
||||
if err != nil {
|
||||
results <- &asynchSegmentResult{err: err}
|
||||
} else {
|
||||
results <- &asynchSegmentResult{dictItr: makeItr(dict)}
|
||||
}
|
||||
}(index, segment)
|
||||
}
|
||||
|
||||
var err error
|
||||
rv := &IndexSnapshotFieldDict{
|
||||
snapshot: i,
|
||||
cursors: make([]*segmentDictCursor, 0, len(i.segment)),
|
||||
}
|
||||
for count := 0; count < len(i.segment); count++ {
|
||||
asr := <-results
|
||||
if asr.err != nil && err == nil {
|
||||
err = asr.err
|
||||
} else {
|
||||
next, err2 := asr.dictItr.Next()
|
||||
if err2 != nil && err == nil {
|
||||
err = err2
|
||||
}
|
||||
if next != nil {
|
||||
rv.cursors = append(rv.cursors, &segmentDictCursor{
|
||||
itr: asr.dictItr,
|
||||
curr: *next,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
// after ensuring we've read all items on channel
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// prepare heap
|
||||
heap.Init(rv)
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) {
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.Iterator()
|
||||
})
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte,
|
||||
endTerm []byte) (index.FieldDict, error) {
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.RangeIterator(string(startTerm), string(endTerm))
|
||||
})
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictPrefix(field string,
|
||||
termPrefix []byte) (index.FieldDict, error) {
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.PrefixIterator(string(termPrefix))
|
||||
})
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictRegexp(field string,
|
||||
termRegex []byte) (index.FieldDict, error) {
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.RegexpIterator(string(termRegex))
|
||||
})
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictFuzzy(field string,
|
||||
term []byte, fuzziness int) (index.FieldDict, error) {
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.FuzzyIterator(string(term), fuzziness)
|
||||
})
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictOnly(field string,
|
||||
onlyTerms [][]byte, includeCount bool) (index.FieldDict, error) {
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.OnlyIterator(onlyTerms, includeCount)
|
||||
})
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
|
||||
results := make(chan *asynchSegmentResult)
|
||||
for index, segment := range i.segment {
|
||||
go func(index int, segment *SegmentSnapshot) {
|
||||
results <- &asynchSegmentResult{
|
||||
index: index,
|
||||
docs: segment.DocNumbersLive(),
|
||||
}
|
||||
}(index, segment)
|
||||
}
|
||||
|
||||
return i.newDocIDReader(results)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) DocIDReaderOnly(ids []string) (index.DocIDReader, error) {
|
||||
results := make(chan *asynchSegmentResult)
|
||||
for index, segment := range i.segment {
|
||||
go func(index int, segment *SegmentSnapshot) {
|
||||
docs, err := segment.DocNumbers(ids)
|
||||
if err != nil {
|
||||
results <- &asynchSegmentResult{err: err}
|
||||
} else {
|
||||
results <- &asynchSegmentResult{
|
||||
index: index,
|
||||
docs: docs,
|
||||
}
|
||||
}
|
||||
}(index, segment)
|
||||
}
|
||||
|
||||
return i.newDocIDReader(results)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) newDocIDReader(results chan *asynchSegmentResult) (index.DocIDReader, error) {
|
||||
rv := &IndexSnapshotDocIDReader{
|
||||
snapshot: i,
|
||||
iterators: make([]roaring.IntIterable, len(i.segment)),
|
||||
}
|
||||
var err error
|
||||
for count := 0; count < len(i.segment); count++ {
|
||||
asr := <-results
|
||||
if asr.err != nil && err != nil {
|
||||
err = asr.err
|
||||
} else {
|
||||
rv.iterators[asr.index] = asr.docs.Iterator()
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) Fields() ([]string, error) {
|
||||
// FIXME not making this concurrent for now as it's not used in hot path
|
||||
// of any searches at the moment (just a debug aid)
|
||||
fieldsMap := map[string]struct{}{}
|
||||
for _, segment := range i.segment {
|
||||
fields := segment.Fields()
|
||||
for _, field := range fields {
|
||||
fieldsMap[field] = struct{}{}
|
||||
}
|
||||
}
|
||||
rv := make([]string, 0, len(fieldsMap))
|
||||
for k := range fieldsMap {
|
||||
rv = append(rv, k)
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) GetInternal(key []byte) ([]byte, error) {
|
||||
return i.internal[string(key)], nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) DocCount() (uint64, error) {
|
||||
var rv uint64
|
||||
for _, segment := range i.segment {
|
||||
rv += segment.Count()
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) Document(id string) (rv *document.Document, err error) {
|
||||
// FIXME could be done more efficiently directly, but reusing for simplicity
|
||||
tfr, err := i.TermFieldReader([]byte(id), "_id", false, false, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if cerr := tfr.Close(); err == nil && cerr != nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
next, err := tfr.Next(nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if next == nil {
|
||||
// no such doc exists
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
docNum, err := docInternalToNumber(next.ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
|
||||
|
||||
rv = document.NewDocument(id)
|
||||
err = i.segment[segmentIndex].VisitDocument(localDocNum, func(name string, typ byte, value []byte, pos []uint64) bool {
|
||||
if name == "_id" {
|
||||
return true
|
||||
}
|
||||
switch typ {
|
||||
case 't':
|
||||
rv.AddField(document.NewTextField(name, pos, value))
|
||||
case 'n':
|
||||
rv.AddField(document.NewNumericFieldFromBytes(name, pos, value))
|
||||
case 'd':
|
||||
rv.AddField(document.NewDateTimeFieldFromBytes(name, pos, value))
|
||||
case 'b':
|
||||
rv.AddField(document.NewBooleanFieldFromBytes(name, pos, value))
|
||||
case 'g':
|
||||
rv.AddField(document.NewGeoPointFieldFromBytes(name, pos, value))
|
||||
}
|
||||
|
||||
return true
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) segmentIndexAndLocalDocNumFromGlobal(docNum uint64) (int, uint64) {
|
||||
segmentIndex := sort.Search(len(i.offsets),
|
||||
func(x int) bool {
|
||||
return i.offsets[x] > docNum
|
||||
}) - 1
|
||||
|
||||
localDocNum := docNum - i.offsets[segmentIndex]
|
||||
return int(segmentIndex), localDocNum
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) ExternalID(id index.IndexInternalID) (string, error) {
|
||||
docNum, err := docInternalToNumber(id)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
|
||||
|
||||
v, err := i.segment[segmentIndex].DocID(localDocNum)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if v == nil {
|
||||
return "", fmt.Errorf("document number %d not found", docNum)
|
||||
}
|
||||
|
||||
return string(v), nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err error) {
|
||||
// FIXME could be done more efficiently directly, but reusing for simplicity
|
||||
tfr, err := i.TermFieldReader([]byte(id), "_id", false, false, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if cerr := tfr.Close(); err == nil && cerr != nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
next, err := tfr.Next(nil)
|
||||
if err != nil || next == nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return next.ID, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
|
||||
includeNorm, includeTermVectors bool) (tfr index.TermFieldReader, err error) {
|
||||
rv, dicts := i.allocTermFieldReaderDicts(field)
|
||||
|
||||
rv.term = term
|
||||
rv.field = field
|
||||
rv.snapshot = i
|
||||
if rv.postings == nil {
|
||||
rv.postings = make([]segment.PostingsList, len(i.segment))
|
||||
}
|
||||
if rv.iterators == nil {
|
||||
rv.iterators = make([]segment.PostingsIterator, len(i.segment))
|
||||
}
|
||||
rv.segmentOffset = 0
|
||||
rv.includeFreq = includeFreq
|
||||
rv.includeNorm = includeNorm
|
||||
rv.includeTermVectors = includeTermVectors
|
||||
rv.currPosting = nil
|
||||
rv.currID = rv.currID[:0]
|
||||
|
||||
if dicts == nil {
|
||||
dicts = make([]segment.TermDictionary, len(i.segment))
|
||||
for i, segment := range i.segment {
|
||||
dict, err := segment.Dictionary(field)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dicts[i] = dict
|
||||
}
|
||||
}
|
||||
rv.dicts = dicts
|
||||
|
||||
for i := range i.segment {
|
||||
pl, err := dicts[i].PostingsList(term, nil, rv.postings[i])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv.postings[i] = pl
|
||||
rv.iterators[i] = pl.Iterator(includeFreq, includeNorm, includeTermVectors, rv.iterators[i])
|
||||
}
|
||||
atomic.AddUint64(&i.parent.stats.TotTermSearchersStarted, uint64(1))
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) allocTermFieldReaderDicts(field string) (
|
||||
tfr *IndexSnapshotTermFieldReader, dicts []segment.TermDictionary) {
|
||||
i.m2.Lock()
|
||||
if i.fieldDicts != nil {
|
||||
dicts = i.fieldDicts[field]
|
||||
}
|
||||
if i.fieldTFRs != nil {
|
||||
tfrs := i.fieldTFRs[field]
|
||||
last := len(tfrs) - 1
|
||||
if last >= 0 {
|
||||
rv := tfrs[last]
|
||||
tfrs[last] = nil
|
||||
i.fieldTFRs[field] = tfrs[:last]
|
||||
i.m2.Unlock()
|
||||
return rv, dicts
|
||||
}
|
||||
}
|
||||
i.m2.Unlock()
|
||||
return &IndexSnapshotTermFieldReader{}, dicts
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader) {
|
||||
i.m2.Lock()
|
||||
if i.fieldTFRs == nil {
|
||||
i.fieldTFRs = map[string][]*IndexSnapshotTermFieldReader{}
|
||||
}
|
||||
i.fieldTFRs[tfr.field] = append(i.fieldTFRs[tfr.field], tfr)
|
||||
if i.fieldDicts == nil {
|
||||
i.fieldDicts = map[string][]segment.TermDictionary{}
|
||||
}
|
||||
i.fieldDicts[tfr.field] = tfr.dicts
|
||||
i.m2.Unlock()
|
||||
}
|
||||
|
||||
func docNumberToBytes(buf []byte, in uint64) []byte {
|
||||
if len(buf) != 8 {
|
||||
if cap(buf) >= 8 {
|
||||
buf = buf[0:8]
|
||||
} else {
|
||||
buf = make([]byte, 8)
|
||||
}
|
||||
}
|
||||
binary.BigEndian.PutUint64(buf, in)
|
||||
return buf
|
||||
}
|
||||
|
||||
func docInternalToNumber(in index.IndexInternalID) (uint64, error) {
|
||||
if len(in) != 8 {
|
||||
return 0, fmt.Errorf("wrong len for IndexInternalID: %q", in)
|
||||
}
|
||||
return binary.BigEndian.Uint64(in), nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID,
|
||||
fields []string, visitor index.DocumentFieldTermVisitor) error {
|
||||
_, err := i.documentVisitFieldTerms(id, fields, visitor, nil)
|
||||
return err
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) documentVisitFieldTerms(id index.IndexInternalID,
|
||||
fields []string, visitor index.DocumentFieldTermVisitor, dvs segment.DocVisitState) (
|
||||
segment.DocVisitState, error) {
|
||||
|
||||
docNum, err := docInternalToNumber(id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
|
||||
if segmentIndex >= len(i.segment) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
ss := i.segment[segmentIndex]
|
||||
|
||||
if zaps, ok := ss.segment.(segment.DocumentFieldTermVisitable); ok {
|
||||
// get the list of doc value persisted fields
|
||||
pFields, err := zaps.VisitableDocValueFields()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// assort the fields for which terms look up have to
|
||||
// be performed runtime
|
||||
dvPendingFields := extractDvPendingFields(fields, pFields)
|
||||
// all fields are doc value persisted
|
||||
if len(dvPendingFields) == 0 {
|
||||
return zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor, dvs)
|
||||
}
|
||||
|
||||
// concurrently trigger the runtime doc value preparations for
|
||||
// pending fields as well as the visit of the persisted doc values
|
||||
errCh := make(chan error, 1)
|
||||
|
||||
go func() {
|
||||
defer close(errCh)
|
||||
err := ss.cachedDocs.prepareFields(dvPendingFields, ss)
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
}
|
||||
}()
|
||||
|
||||
// visit the requested persisted dv while the cache preparation in progress
|
||||
dvs, err = zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor, dvs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// err out if fieldCache preparation failed
|
||||
err = <-errCh
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
visitDocumentFieldCacheTerms(localDocNum, dvPendingFields, ss, visitor)
|
||||
return dvs, nil
|
||||
}
|
||||
|
||||
return dvs, prepareCacheVisitDocumentFieldTerms(localDocNum, fields, ss, visitor)
|
||||
}
|
||||
|
||||
func prepareCacheVisitDocumentFieldTerms(localDocNum uint64, fields []string,
|
||||
ss *SegmentSnapshot, visitor index.DocumentFieldTermVisitor) error {
|
||||
err := ss.cachedDocs.prepareFields(fields, ss)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
visitDocumentFieldCacheTerms(localDocNum, fields, ss, visitor)
|
||||
return nil
|
||||
}
|
||||
|
||||
func visitDocumentFieldCacheTerms(localDocNum uint64, fields []string,
|
||||
ss *SegmentSnapshot, visitor index.DocumentFieldTermVisitor) {
|
||||
|
||||
for _, field := range fields {
|
||||
if cachedFieldDocs, exists := ss.cachedDocs.cache[field]; exists {
|
||||
if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists {
|
||||
for {
|
||||
i := bytes.Index(tlist, TermSeparatorSplitSlice)
|
||||
if i < 0 {
|
||||
break
|
||||
}
|
||||
visitor(field, tlist[0:i])
|
||||
tlist = tlist[i+1:]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func extractDvPendingFields(requestedFields, persistedFields []string) []string {
|
||||
removeMap := make(map[string]struct{}, len(persistedFields))
|
||||
for _, str := range persistedFields {
|
||||
removeMap[str] = struct{}{}
|
||||
}
|
||||
|
||||
rv := make([]string, 0, len(requestedFields))
|
||||
for _, s := range requestedFields {
|
||||
if _, ok := removeMap[s]; !ok {
|
||||
rv = append(rv, s)
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) DocValueReader(fields []string) (index.DocValueReader, error) {
|
||||
return &DocValueReader{i: i, fields: fields}, nil
|
||||
}
|
||||
|
||||
type DocValueReader struct {
|
||||
i *IndexSnapshot
|
||||
fields []string
|
||||
dvs segment.DocVisitState
|
||||
}
|
||||
|
||||
func (dvr *DocValueReader) VisitDocValues(id index.IndexInternalID,
|
||||
visitor index.DocumentFieldTermVisitor) (err error) {
|
||||
dvr.dvs, err = dvr.i.documentVisitFieldTerms(id, dvr.fields, visitor, dvr.dvs)
|
||||
return err
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) DumpAll() chan interface{} {
|
||||
rv := make(chan interface{})
|
||||
go func() {
|
||||
close(rv)
|
||||
}()
|
||||
return rv
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) DumpDoc(id string) chan interface{} {
|
||||
rv := make(chan interface{})
|
||||
go func() {
|
||||
close(rv)
|
||||
}()
|
||||
return rv
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) DumpFields() chan interface{} {
|
||||
rv := make(chan interface{})
|
||||
go func() {
|
||||
close(rv)
|
||||
}()
|
||||
return rv
|
||||
}
|
93
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go
generated
vendored
Normal file
93
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go
generated
vendored
Normal file
@ -0,0 +1,93 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
)
|
||||
|
||||
type segmentDictCursor struct {
|
||||
itr segment.DictionaryIterator
|
||||
curr index.DictEntry
|
||||
}
|
||||
|
||||
type IndexSnapshotFieldDict struct {
|
||||
snapshot *IndexSnapshot
|
||||
cursors []*segmentDictCursor
|
||||
entry index.DictEntry
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotFieldDict) Len() int { return len(i.cursors) }
|
||||
func (i *IndexSnapshotFieldDict) Less(a, b int) bool {
|
||||
return i.cursors[a].curr.Term < i.cursors[b].curr.Term
|
||||
}
|
||||
func (i *IndexSnapshotFieldDict) Swap(a, b int) {
|
||||
i.cursors[a], i.cursors[b] = i.cursors[b], i.cursors[a]
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotFieldDict) Push(x interface{}) {
|
||||
i.cursors = append(i.cursors, x.(*segmentDictCursor))
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotFieldDict) Pop() interface{} {
|
||||
n := len(i.cursors)
|
||||
x := i.cursors[n-1]
|
||||
i.cursors = i.cursors[0 : n-1]
|
||||
return x
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
|
||||
if len(i.cursors) <= 0 {
|
||||
return nil, nil
|
||||
}
|
||||
i.entry = i.cursors[0].curr
|
||||
next, err := i.cursors[0].itr.Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next == nil {
|
||||
// at end of this cursor, remove it
|
||||
heap.Pop(i)
|
||||
} else {
|
||||
// modified heap, fix it
|
||||
i.cursors[0].curr = *next
|
||||
heap.Fix(i, 0)
|
||||
}
|
||||
// look for any other entries with the exact same term
|
||||
for len(i.cursors) > 0 && i.cursors[0].curr.Term == i.entry.Term {
|
||||
i.entry.Count += i.cursors[0].curr.Count
|
||||
next, err := i.cursors[0].itr.Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next == nil {
|
||||
// at end of this cursor, remove it
|
||||
heap.Pop(i)
|
||||
} else {
|
||||
// modified heap, fix it
|
||||
i.cursors[0].curr = *next
|
||||
heap.Fix(i, 0)
|
||||
}
|
||||
}
|
||||
|
||||
return &i.entry, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotFieldDict) Close() error {
|
||||
return nil
|
||||
}
|
80
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_doc.go
generated
vendored
Normal file
80
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_doc.go
generated
vendored
Normal file
@ -0,0 +1,80 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"reflect"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeIndexSnapshotDocIDReader int
|
||||
|
||||
func init() {
|
||||
var isdr IndexSnapshotDocIDReader
|
||||
reflectStaticSizeIndexSnapshotDocIDReader = int(reflect.TypeOf(isdr).Size())
|
||||
}
|
||||
|
||||
type IndexSnapshotDocIDReader struct {
|
||||
snapshot *IndexSnapshot
|
||||
iterators []roaring.IntIterable
|
||||
segmentOffset int
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotDocIDReader) Size() int {
|
||||
return reflectStaticSizeIndexSnapshotDocIDReader + size.SizeOfPtr
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotDocIDReader) Next() (index.IndexInternalID, error) {
|
||||
for i.segmentOffset < len(i.iterators) {
|
||||
if !i.iterators[i.segmentOffset].HasNext() {
|
||||
i.segmentOffset++
|
||||
continue
|
||||
}
|
||||
next := i.iterators[i.segmentOffset].Next()
|
||||
// make segment number into global number by adding offset
|
||||
globalOffset := i.snapshot.offsets[i.segmentOffset]
|
||||
return docNumberToBytes(nil, uint64(next)+globalOffset), nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotDocIDReader) Advance(ID index.IndexInternalID) (index.IndexInternalID, error) {
|
||||
// FIXME do something better
|
||||
next, err := i.Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next == nil {
|
||||
return nil, nil
|
||||
}
|
||||
for bytes.Compare(next, ID) < 0 {
|
||||
next, err = i.Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next == nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
return next, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotDocIDReader) Close() error {
|
||||
return nil
|
||||
}
|
185
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_tfr.go
generated
vendored
Normal file
185
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_tfr.go
generated
vendored
Normal file
@ -0,0 +1,185 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeIndexSnapshotTermFieldReader int
|
||||
|
||||
func init() {
|
||||
var istfr IndexSnapshotTermFieldReader
|
||||
reflectStaticSizeIndexSnapshotTermFieldReader = int(reflect.TypeOf(istfr).Size())
|
||||
}
|
||||
|
||||
type IndexSnapshotTermFieldReader struct {
|
||||
term []byte
|
||||
field string
|
||||
snapshot *IndexSnapshot
|
||||
dicts []segment.TermDictionary
|
||||
postings []segment.PostingsList
|
||||
iterators []segment.PostingsIterator
|
||||
segmentOffset int
|
||||
includeFreq bool
|
||||
includeNorm bool
|
||||
includeTermVectors bool
|
||||
currPosting segment.Posting
|
||||
currID index.IndexInternalID
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotTermFieldReader) Size() int {
|
||||
sizeInBytes := reflectStaticSizeIndexSnapshotTermFieldReader + size.SizeOfPtr +
|
||||
len(i.term) +
|
||||
len(i.field) +
|
||||
len(i.currID)
|
||||
|
||||
for _, entry := range i.postings {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
for _, entry := range i.iterators {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
if i.currPosting != nil {
|
||||
sizeInBytes += i.currPosting.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
|
||||
rv := preAlloced
|
||||
if rv == nil {
|
||||
rv = &index.TermFieldDoc{}
|
||||
}
|
||||
// find the next hit
|
||||
for i.segmentOffset < len(i.postings) {
|
||||
next, err := i.iterators[i.segmentOffset].Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next != nil {
|
||||
// make segment number into global number by adding offset
|
||||
globalOffset := i.snapshot.offsets[i.segmentOffset]
|
||||
nnum := next.Number()
|
||||
rv.ID = docNumberToBytes(rv.ID, nnum+globalOffset)
|
||||
i.postingToTermFieldDoc(next, rv)
|
||||
|
||||
i.currID = rv.ID
|
||||
i.currPosting = next
|
||||
return rv, nil
|
||||
}
|
||||
i.segmentOffset++
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotTermFieldReader) postingToTermFieldDoc(next segment.Posting, rv *index.TermFieldDoc) {
|
||||
if i.includeFreq {
|
||||
rv.Freq = next.Frequency()
|
||||
}
|
||||
if i.includeNorm {
|
||||
rv.Norm = next.Norm()
|
||||
}
|
||||
if i.includeTermVectors {
|
||||
locs := next.Locations()
|
||||
if cap(rv.Vectors) < len(locs) {
|
||||
rv.Vectors = make([]*index.TermFieldVector, len(locs))
|
||||
backing := make([]index.TermFieldVector, len(locs))
|
||||
for i := range backing {
|
||||
rv.Vectors[i] = &backing[i]
|
||||
}
|
||||
}
|
||||
rv.Vectors = rv.Vectors[:len(locs)]
|
||||
for i, loc := range locs {
|
||||
*rv.Vectors[i] = index.TermFieldVector{
|
||||
Start: loc.Start(),
|
||||
End: loc.End(),
|
||||
Pos: loc.Pos(),
|
||||
ArrayPositions: loc.ArrayPositions(),
|
||||
Field: loc.Field(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
|
||||
// FIXME do something better
|
||||
// for now, if we need to seek backwards, then restart from the beginning
|
||||
if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 {
|
||||
i2, err := i.snapshot.TermFieldReader(i.term, i.field,
|
||||
i.includeFreq, i.includeNorm, i.includeTermVectors)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
*i = *(i2.(*IndexSnapshotTermFieldReader))
|
||||
}
|
||||
num, err := docInternalToNumber(ID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error converting to doc number % x - %v", ID, err)
|
||||
}
|
||||
segIndex, ldocNum := i.snapshot.segmentIndexAndLocalDocNumFromGlobal(num)
|
||||
if segIndex >= len(i.snapshot.segment) {
|
||||
return nil, fmt.Errorf("computed segment index %d out of bounds %d",
|
||||
segIndex, len(i.snapshot.segment))
|
||||
}
|
||||
// skip directly to the target segment
|
||||
i.segmentOffset = segIndex
|
||||
next, err := i.iterators[i.segmentOffset].Advance(ldocNum)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next == nil {
|
||||
// we jumped directly to the segment that should have contained it
|
||||
// but it wasn't there, so reuse Next() which should correctly
|
||||
// get the next hit after it (we moved i.segmentOffset)
|
||||
return i.Next(preAlloced)
|
||||
}
|
||||
|
||||
if preAlloced == nil {
|
||||
preAlloced = &index.TermFieldDoc{}
|
||||
}
|
||||
preAlloced.ID = docNumberToBytes(preAlloced.ID, next.Number()+
|
||||
i.snapshot.offsets[segIndex])
|
||||
i.postingToTermFieldDoc(next, preAlloced)
|
||||
i.currID = preAlloced.ID
|
||||
i.currPosting = next
|
||||
return preAlloced, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotTermFieldReader) Count() uint64 {
|
||||
var rv uint64
|
||||
for _, posting := range i.postings {
|
||||
rv += posting.Count()
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotTermFieldReader) Close() error {
|
||||
if i.snapshot != nil {
|
||||
atomic.AddUint64(&i.snapshot.parent.stats.TotTermSearchersFinished, uint64(1))
|
||||
i.snapshot.recycleTermFieldReader(i)
|
||||
}
|
||||
return nil
|
||||
}
|
173
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_rollback.go
generated
vendored
Normal file
173
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_rollback.go
generated
vendored
Normal file
@ -0,0 +1,173 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/boltdb/bolt"
|
||||
)
|
||||
|
||||
type RollbackPoint struct {
|
||||
epoch uint64
|
||||
meta map[string][]byte
|
||||
}
|
||||
|
||||
func (r *RollbackPoint) GetInternal(key []byte) []byte {
|
||||
return r.meta[string(key)]
|
||||
}
|
||||
|
||||
// RollbackPoints returns an array of rollback points available for
|
||||
// the application to rollback to, with more recent rollback points
|
||||
// (higher epochs) coming first.
|
||||
func (s *Scorch) RollbackPoints() ([]*RollbackPoint, error) {
|
||||
if s.rootBolt == nil {
|
||||
return nil, fmt.Errorf("RollbackPoints: root is nil")
|
||||
}
|
||||
|
||||
// start a read-only bolt transaction
|
||||
tx, err := s.rootBolt.Begin(false)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("RollbackPoints: failed to start" +
|
||||
" read-only transaction")
|
||||
}
|
||||
|
||||
// read-only bolt transactions to be rolled back
|
||||
defer func() {
|
||||
_ = tx.Rollback()
|
||||
}()
|
||||
|
||||
snapshots := tx.Bucket(boltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
rollbackPoints := []*RollbackPoint{}
|
||||
|
||||
c1 := snapshots.Cursor()
|
||||
for k, _ := c1.Last(); k != nil; k, _ = c1.Prev() {
|
||||
_, snapshotEpoch, err := segment.DecodeUvarintAscending(k)
|
||||
if err != nil {
|
||||
log.Printf("RollbackPoints:"+
|
||||
" unable to parse segment epoch %x, continuing", k)
|
||||
continue
|
||||
}
|
||||
|
||||
snapshot := snapshots.Bucket(k)
|
||||
if snapshot == nil {
|
||||
log.Printf("RollbackPoints:"+
|
||||
" snapshot key, but bucket missing %x, continuing", k)
|
||||
continue
|
||||
}
|
||||
|
||||
meta := map[string][]byte{}
|
||||
c2 := snapshot.Cursor()
|
||||
for j, _ := c2.First(); j != nil; j, _ = c2.Next() {
|
||||
if j[0] == boltInternalKey[0] {
|
||||
internalBucket := snapshot.Bucket(j)
|
||||
err = internalBucket.ForEach(func(key []byte, val []byte) error {
|
||||
copiedVal := append([]byte(nil), val...)
|
||||
meta[string(key)] = copiedVal
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Printf("RollbackPoints:"+
|
||||
" failed in fetching internal data: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
rollbackPoints = append(rollbackPoints, &RollbackPoint{
|
||||
epoch: snapshotEpoch,
|
||||
meta: meta,
|
||||
})
|
||||
}
|
||||
|
||||
return rollbackPoints, nil
|
||||
}
|
||||
|
||||
// Rollback atomically and durably (if unsafeBatch is unset) brings
|
||||
// the store back to the point in time as represented by the
|
||||
// RollbackPoint. Rollback() should only be passed a RollbackPoint
|
||||
// that came from the same store using the RollbackPoints() API.
|
||||
func (s *Scorch) Rollback(to *RollbackPoint) error {
|
||||
if to == nil {
|
||||
return fmt.Errorf("Rollback: RollbackPoint is nil")
|
||||
}
|
||||
|
||||
if s.rootBolt == nil {
|
||||
return fmt.Errorf("Rollback: root is nil")
|
||||
}
|
||||
|
||||
revert := &snapshotReversion{}
|
||||
|
||||
s.rootLock.Lock()
|
||||
|
||||
err := s.rootBolt.View(func(tx *bolt.Tx) error {
|
||||
snapshots := tx.Bucket(boltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return fmt.Errorf("Rollback: no snapshots available")
|
||||
}
|
||||
|
||||
pos := segment.EncodeUvarintAscending(nil, to.epoch)
|
||||
|
||||
snapshot := snapshots.Bucket(pos)
|
||||
if snapshot == nil {
|
||||
return fmt.Errorf("Rollback: snapshot not found")
|
||||
}
|
||||
|
||||
indexSnapshot, err := s.loadSnapshot(snapshot)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Rollback: unable to load snapshot: %v", err)
|
||||
}
|
||||
|
||||
// add segments referenced by loaded index snapshot to the
|
||||
// ineligibleForRemoval map
|
||||
for _, segSnap := range indexSnapshot.segment {
|
||||
filename := zapFileName(segSnap.id)
|
||||
s.ineligibleForRemoval[filename] = true
|
||||
}
|
||||
|
||||
revert.snapshot = indexSnapshot
|
||||
revert.applied = make(chan error)
|
||||
revert.persisted = make(chan error)
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
s.rootLock.Unlock()
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// introduce the reversion
|
||||
s.revertToSnapshots <- revert
|
||||
|
||||
// block until this snapshot is applied
|
||||
err = <-revert.applied
|
||||
if err != nil {
|
||||
return fmt.Errorf("Rollback: failed with err: %v", err)
|
||||
}
|
||||
|
||||
return <-revert.persisted
|
||||
}
|
272
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go
generated
vendored
Normal file
272
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go
generated
vendored
Normal file
@ -0,0 +1,272 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var TermSeparator byte = 0xff
|
||||
|
||||
var TermSeparatorSplitSlice = []byte{TermSeparator}
|
||||
|
||||
type SegmentDictionarySnapshot struct {
|
||||
s *SegmentSnapshot
|
||||
d segment.TermDictionary
|
||||
}
|
||||
|
||||
func (s *SegmentDictionarySnapshot) PostingsList(term []byte, except *roaring.Bitmap,
|
||||
prealloc segment.PostingsList) (segment.PostingsList, error) {
|
||||
// TODO: if except is non-nil, perhaps need to OR it with s.s.deleted?
|
||||
return s.d.PostingsList(term, s.s.deleted, prealloc)
|
||||
}
|
||||
|
||||
func (s *SegmentDictionarySnapshot) Iterator() segment.DictionaryIterator {
|
||||
return s.d.Iterator()
|
||||
}
|
||||
|
||||
func (s *SegmentDictionarySnapshot) PrefixIterator(prefix string) segment.DictionaryIterator {
|
||||
return s.d.PrefixIterator(prefix)
|
||||
}
|
||||
|
||||
func (s *SegmentDictionarySnapshot) RangeIterator(start, end string) segment.DictionaryIterator {
|
||||
return s.d.RangeIterator(start, end)
|
||||
}
|
||||
|
||||
func (s *SegmentDictionarySnapshot) RegexpIterator(regex string) segment.DictionaryIterator {
|
||||
return s.d.RegexpIterator(regex)
|
||||
}
|
||||
|
||||
func (s *SegmentDictionarySnapshot) FuzzyIterator(term string,
|
||||
fuzziness int) segment.DictionaryIterator {
|
||||
return s.d.FuzzyIterator(term, fuzziness)
|
||||
}
|
||||
|
||||
func (s *SegmentDictionarySnapshot) OnlyIterator(onlyTerms [][]byte,
|
||||
includeCount bool) segment.DictionaryIterator {
|
||||
return s.d.OnlyIterator(onlyTerms, includeCount)
|
||||
}
|
||||
|
||||
type SegmentSnapshot struct {
|
||||
id uint64
|
||||
segment segment.Segment
|
||||
deleted *roaring.Bitmap
|
||||
creator string
|
||||
|
||||
cachedDocs *cachedDocs
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) Segment() segment.Segment {
|
||||
return s.segment
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) Deleted() *roaring.Bitmap {
|
||||
return s.deleted
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) Id() uint64 {
|
||||
return s.id
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) FullSize() int64 {
|
||||
return int64(s.segment.Count())
|
||||
}
|
||||
|
||||
func (s SegmentSnapshot) LiveSize() int64 {
|
||||
return int64(s.Count())
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) Close() error {
|
||||
return s.segment.Close()
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
|
||||
return s.segment.VisitDocument(num, visitor)
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) DocID(num uint64) ([]byte, error) {
|
||||
return s.segment.DocID(num)
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) Count() uint64 {
|
||||
|
||||
rv := s.segment.Count()
|
||||
if s.deleted != nil {
|
||||
rv -= s.deleted.GetCardinality()
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) Dictionary(field string) (segment.TermDictionary, error) {
|
||||
d, err := s.segment.Dictionary(field)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &SegmentDictionarySnapshot{
|
||||
s: s,
|
||||
d: d,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) DocNumbers(docIDs []string) (*roaring.Bitmap, error) {
|
||||
rv, err := s.segment.DocNumbers(docIDs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if s.deleted != nil {
|
||||
rv.AndNot(s.deleted)
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
// DocNumbersLive returns bitsit containing doc numbers for all live docs
|
||||
func (s *SegmentSnapshot) DocNumbersLive() *roaring.Bitmap {
|
||||
rv := roaring.NewBitmap()
|
||||
rv.AddRange(0, s.segment.Count())
|
||||
if s.deleted != nil {
|
||||
rv.AndNot(s.deleted)
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) Fields() []string {
|
||||
return s.segment.Fields()
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) Size() (rv int) {
|
||||
rv = s.segment.Size()
|
||||
if s.deleted != nil {
|
||||
rv += int(s.deleted.GetSizeInBytes())
|
||||
}
|
||||
rv += s.cachedDocs.Size()
|
||||
return
|
||||
}
|
||||
|
||||
type cachedFieldDocs struct {
|
||||
readyCh chan struct{} // closed when the cachedFieldDocs.docs is ready to be used.
|
||||
err error // Non-nil if there was an error when preparing this cachedFieldDocs.
|
||||
docs map[uint64][]byte // Keyed by localDocNum, value is a list of terms delimited by 0xFF.
|
||||
size uint64
|
||||
}
|
||||
|
||||
func (cfd *cachedFieldDocs) prepareFields(field string, ss *SegmentSnapshot) {
|
||||
defer close(cfd.readyCh)
|
||||
|
||||
cfd.size += uint64(size.SizeOfUint64) /* size field */
|
||||
dict, err := ss.segment.Dictionary(field)
|
||||
if err != nil {
|
||||
cfd.err = err
|
||||
return
|
||||
}
|
||||
|
||||
var postings segment.PostingsList
|
||||
var postingsItr segment.PostingsIterator
|
||||
|
||||
dictItr := dict.Iterator()
|
||||
next, err := dictItr.Next()
|
||||
for err == nil && next != nil {
|
||||
var err1 error
|
||||
postings, err1 = dict.PostingsList([]byte(next.Term), nil, postings)
|
||||
if err1 != nil {
|
||||
cfd.err = err1
|
||||
return
|
||||
}
|
||||
|
||||
cfd.size += uint64(size.SizeOfUint64) /* map key */
|
||||
postingsItr = postings.Iterator(false, false, false, postingsItr)
|
||||
nextPosting, err2 := postingsItr.Next()
|
||||
for err2 == nil && nextPosting != nil {
|
||||
docNum := nextPosting.Number()
|
||||
cfd.docs[docNum] = append(cfd.docs[docNum], []byte(next.Term)...)
|
||||
cfd.docs[docNum] = append(cfd.docs[docNum], TermSeparator)
|
||||
cfd.size += uint64(len(next.Term) + 1) // map value
|
||||
nextPosting, err2 = postingsItr.Next()
|
||||
}
|
||||
|
||||
if err2 != nil {
|
||||
cfd.err = err2
|
||||
return
|
||||
}
|
||||
|
||||
next, err = dictItr.Next()
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
cfd.err = err
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
type cachedDocs struct {
|
||||
m sync.Mutex // As the cache is asynchronously prepared, need a lock
|
||||
cache map[string]*cachedFieldDocs // Keyed by field
|
||||
size uint64
|
||||
}
|
||||
|
||||
func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error {
|
||||
c.m.Lock()
|
||||
if c.cache == nil {
|
||||
c.cache = make(map[string]*cachedFieldDocs, len(ss.Fields()))
|
||||
}
|
||||
|
||||
for _, field := range wantedFields {
|
||||
_, exists := c.cache[field]
|
||||
if !exists {
|
||||
c.cache[field] = &cachedFieldDocs{
|
||||
readyCh: make(chan struct{}),
|
||||
docs: make(map[uint64][]byte),
|
||||
}
|
||||
|
||||
go c.cache[field].prepareFields(field, ss)
|
||||
}
|
||||
}
|
||||
|
||||
for _, field := range wantedFields {
|
||||
cachedFieldDocs := c.cache[field]
|
||||
c.m.Unlock()
|
||||
<-cachedFieldDocs.readyCh
|
||||
|
||||
if cachedFieldDocs.err != nil {
|
||||
return cachedFieldDocs.err
|
||||
}
|
||||
c.m.Lock()
|
||||
}
|
||||
c.updateSizeLOCKED()
|
||||
|
||||
c.m.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *cachedDocs) Size() int {
|
||||
return int(atomic.LoadUint64(&c.size))
|
||||
}
|
||||
|
||||
func (c *cachedDocs) updateSizeLOCKED() {
|
||||
sizeInBytes := 0
|
||||
for k, v := range c.cache { // cachedFieldDocs
|
||||
sizeInBytes += len(k)
|
||||
if v != nil {
|
||||
for _, entry := range v.docs { // docs
|
||||
sizeInBytes += 8 /* size of uint64 */ + len(entry)
|
||||
}
|
||||
}
|
||||
}
|
||||
atomic.StoreUint64(&c.size, uint64(sizeInBytes))
|
||||
}
|
129
vendor/github.com/blevesearch/bleve/index/scorch/stats.go
generated
vendored
Normal file
129
vendor/github.com/blevesearch/bleve/index/scorch/stats.go
generated
vendored
Normal file
@ -0,0 +1,129 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"reflect"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
// Stats tracks statistics about the index, fields that are
|
||||
// prefixed like CurXxxx are gauges (can go up and down),
|
||||
// and fields that are prefixed like TotXxxx are monotonically
|
||||
// increasing counters.
|
||||
type Stats struct {
|
||||
TotUpdates uint64
|
||||
TotDeletes uint64
|
||||
|
||||
TotBatches uint64
|
||||
TotBatchesEmpty uint64
|
||||
TotBatchIntroTime uint64
|
||||
MaxBatchIntroTime uint64
|
||||
|
||||
TotOnErrors uint64
|
||||
|
||||
TotAnalysisTime uint64
|
||||
TotIndexTime uint64
|
||||
|
||||
TotIndexedPlainTextBytes uint64
|
||||
|
||||
TotTermSearchersStarted uint64
|
||||
TotTermSearchersFinished uint64
|
||||
|
||||
TotIntroduceLoop uint64
|
||||
TotIntroduceSegmentBeg uint64
|
||||
TotIntroduceSegmentEnd uint64
|
||||
TotIntroducePersistBeg uint64
|
||||
TotIntroducePersistEnd uint64
|
||||
TotIntroduceMergeBeg uint64
|
||||
TotIntroduceMergeEnd uint64
|
||||
TotIntroduceRevertBeg uint64
|
||||
TotIntroduceRevertEnd uint64
|
||||
|
||||
TotIntroducedItems uint64
|
||||
TotIntroducedSegmentsBatch uint64
|
||||
TotIntroducedSegmentsMerge uint64
|
||||
|
||||
TotPersistLoopBeg uint64
|
||||
TotPersistLoopErr uint64
|
||||
TotPersistLoopProgress uint64
|
||||
TotPersistLoopWait uint64
|
||||
TotPersistLoopWaitNotified uint64
|
||||
TotPersistLoopEnd uint64
|
||||
|
||||
TotPersistedItems uint64
|
||||
TotPersistedSegments uint64
|
||||
|
||||
TotPersisterSlowMergerPause uint64
|
||||
TotPersisterSlowMergerResume uint64
|
||||
|
||||
TotFileMergeLoopBeg uint64
|
||||
TotFileMergeLoopErr uint64
|
||||
TotFileMergeLoopEnd uint64
|
||||
|
||||
TotFileMergePlan uint64
|
||||
TotFileMergePlanErr uint64
|
||||
TotFileMergePlanNone uint64
|
||||
TotFileMergePlanOk uint64
|
||||
|
||||
TotFileMergePlanTasks uint64
|
||||
TotFileMergePlanTasksDone uint64
|
||||
TotFileMergePlanTasksErr uint64
|
||||
TotFileMergePlanTasksSegments uint64
|
||||
TotFileMergePlanTasksSegmentsEmpty uint64
|
||||
|
||||
TotFileMergeSegmentsEmpty uint64
|
||||
TotFileMergeSegments uint64
|
||||
TotFileMergeWrittenBytes uint64
|
||||
|
||||
TotFileMergeZapBeg uint64
|
||||
TotFileMergeZapEnd uint64
|
||||
TotFileMergeZapTime uint64
|
||||
MaxFileMergeZapTime uint64
|
||||
|
||||
TotFileMergeIntroductions uint64
|
||||
TotFileMergeIntroductionsDone uint64
|
||||
|
||||
TotMemMergeBeg uint64
|
||||
TotMemMergeErr uint64
|
||||
TotMemMergeDone uint64
|
||||
TotMemMergeZapBeg uint64
|
||||
TotMemMergeZapEnd uint64
|
||||
TotMemMergeZapTime uint64
|
||||
MaxMemMergeZapTime uint64
|
||||
TotMemMergeSegments uint64
|
||||
}
|
||||
|
||||
// atomically populates the returned map
|
||||
func (s *Stats) ToMap() map[string]interface{} {
|
||||
m := map[string]interface{}{}
|
||||
sve := reflect.ValueOf(s).Elem()
|
||||
svet := sve.Type()
|
||||
for i := 0; i < svet.NumField(); i++ {
|
||||
svef := sve.Field(i)
|
||||
if svef.CanAddr() {
|
||||
svefp := svef.Addr().Interface()
|
||||
m[svet.Field(i).Name] = atomic.LoadUint64(svefp.(*uint64))
|
||||
}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// MarshalJSON implements json.Marshaler, and in contrast to standard
|
||||
// json marshaling provides atomic safety
|
||||
func (s *Stats) MarshalJSON() ([]byte, error) {
|
||||
return json.Marshal(s.ToMap())
|
||||
}
|
23
vendor/github.com/blevesearch/bleve/index/upsidedown/index_reader.go
generated
vendored
23
vendor/github.com/blevesearch/bleve/index/upsidedown/index_reader.go
generated
vendored
@ -15,11 +15,20 @@
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
)
|
||||
|
||||
var reflectStaticSizeIndexReader int
|
||||
|
||||
func init() {
|
||||
var ir IndexReader
|
||||
reflectStaticSizeIndexReader = int(reflect.TypeOf(ir).Size())
|
||||
}
|
||||
|
||||
type IndexReader struct {
|
||||
index *UpsideDownCouch
|
||||
kvreader store.KVReader
|
||||
@ -201,3 +210,17 @@ func incrementBytes(in []byte) []byte {
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (i *IndexReader) DocValueReader(fields []string) (index.DocValueReader, error) {
|
||||
return &DocValueReader{i: i, fields: fields}, nil
|
||||
}
|
||||
|
||||
type DocValueReader struct {
|
||||
i *IndexReader
|
||||
fields []string
|
||||
}
|
||||
|
||||
func (dvr *DocValueReader) VisitDocValues(id index.IndexInternalID,
|
||||
visitor index.DocumentFieldTermVisitor) error {
|
||||
return dvr.i.DocumentVisitFieldTerms(id, dvr.fields, visitor)
|
||||
}
|
||||
|
54
vendor/github.com/blevesearch/bleve/index/upsidedown/reader.go
generated
vendored
54
vendor/github.com/blevesearch/bleve/index/upsidedown/reader.go
generated
vendored
@ -16,13 +16,27 @@ package upsidedown
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"reflect"
|
||||
"sort"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeUpsideDownCouchTermFieldReader int
|
||||
var reflectStaticSizeUpsideDownCouchDocIDReader int
|
||||
|
||||
func init() {
|
||||
var tfr UpsideDownCouchTermFieldReader
|
||||
reflectStaticSizeUpsideDownCouchTermFieldReader =
|
||||
int(reflect.TypeOf(tfr).Size())
|
||||
var cdr UpsideDownCouchDocIDReader
|
||||
reflectStaticSizeUpsideDownCouchDocIDReader =
|
||||
int(reflect.TypeOf(cdr).Size())
|
||||
}
|
||||
|
||||
type UpsideDownCouchTermFieldReader struct {
|
||||
count uint64
|
||||
indexReader *IndexReader
|
||||
@ -35,6 +49,19 @@ type UpsideDownCouchTermFieldReader struct {
|
||||
includeTermVectors bool
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchTermFieldReader) Size() int {
|
||||
sizeInBytes := reflectStaticSizeUpsideDownCouchTermFieldReader + size.SizeOfPtr +
|
||||
len(r.term) +
|
||||
r.tfrPrealloc.Size() +
|
||||
len(r.keyBuf)
|
||||
|
||||
if r.tfrNext != nil {
|
||||
sizeInBytes += r.tfrNext.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) {
|
||||
bufNeeded := termFrequencyRowKeySize(term, nil)
|
||||
if bufNeeded < dictionaryRowKeySize(term) {
|
||||
@ -174,8 +201,18 @@ type UpsideDownCouchDocIDReader struct {
|
||||
onlyMode bool
|
||||
}
|
||||
|
||||
func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) {
|
||||
func (r *UpsideDownCouchDocIDReader) Size() int {
|
||||
sizeInBytes := reflectStaticSizeUpsideDownCouchDocIDReader +
|
||||
reflectStaticSizeIndexReader + size.SizeOfPtr
|
||||
|
||||
for _, entry := range r.only {
|
||||
sizeInBytes += size.SizeOfString + len(entry)
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) {
|
||||
startBytes := []byte{0x0}
|
||||
endBytes := []byte{0xff}
|
||||
|
||||
@ -190,15 +227,18 @@ func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDo
|
||||
}
|
||||
|
||||
func newUpsideDownCouchDocIDReaderOnly(indexReader *IndexReader, ids []string) (*UpsideDownCouchDocIDReader, error) {
|
||||
// we don't actually own the list of ids, so if before we sort we must copy
|
||||
idsCopy := make([]string, len(ids))
|
||||
copy(idsCopy, ids)
|
||||
// ensure ids are sorted
|
||||
sort.Strings(ids)
|
||||
sort.Strings(idsCopy)
|
||||
startBytes := []byte{0x0}
|
||||
if len(ids) > 0 {
|
||||
startBytes = []byte(ids[0])
|
||||
if len(idsCopy) > 0 {
|
||||
startBytes = []byte(idsCopy[0])
|
||||
}
|
||||
endBytes := []byte{0xff}
|
||||
if len(ids) > 0 {
|
||||
endBytes = incrementBytes([]byte(ids[len(ids)-1]))
|
||||
if len(idsCopy) > 0 {
|
||||
endBytes = incrementBytes([]byte(idsCopy[len(idsCopy)-1]))
|
||||
}
|
||||
bisr := NewBackIndexRow(startBytes, nil, nil)
|
||||
bier := NewBackIndexRow(endBytes, nil, nil)
|
||||
@ -207,7 +247,7 @@ func newUpsideDownCouchDocIDReaderOnly(indexReader *IndexReader, ids []string) (
|
||||
return &UpsideDownCouchDocIDReader{
|
||||
indexReader: indexReader,
|
||||
iterator: it,
|
||||
only: ids,
|
||||
only: idsCopy,
|
||||
onlyMode: true,
|
||||
}, nil
|
||||
}
|
||||
|
29
vendor/github.com/blevesearch/bleve/index/upsidedown/row.go
generated
vendored
29
vendor/github.com/blevesearch/bleve/index/upsidedown/row.go
generated
vendored
@ -20,10 +20,22 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/size"
|
||||
"github.com/golang/protobuf/proto"
|
||||
)
|
||||
|
||||
var reflectStaticSizeTermFrequencyRow int
|
||||
var reflectStaticSizeTermVector int
|
||||
|
||||
func init() {
|
||||
var tfr TermFrequencyRow
|
||||
reflectStaticSizeTermFrequencyRow = int(reflect.TypeOf(tfr).Size())
|
||||
var tv TermVector
|
||||
reflectStaticSizeTermVector = int(reflect.TypeOf(tv).Size())
|
||||
}
|
||||
|
||||
const ByteSeparator byte = 0xff
|
||||
|
||||
type UpsideDownCouchRowStream chan UpsideDownCouchRow
|
||||
@ -358,6 +370,11 @@ type TermVector struct {
|
||||
end uint64
|
||||
}
|
||||
|
||||
func (tv *TermVector) Size() int {
|
||||
return reflectStaticSizeTermVector + size.SizeOfPtr +
|
||||
len(tv.arrayPositions)*size.SizeOfUint64
|
||||
}
|
||||
|
||||
func (tv *TermVector) String() string {
|
||||
return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d ArrayPositions: %#v", tv.field, tv.pos, tv.start, tv.end, tv.arrayPositions)
|
||||
}
|
||||
@ -371,6 +388,18 @@ type TermFrequencyRow struct {
|
||||
field uint16
|
||||
}
|
||||
|
||||
func (tfr *TermFrequencyRow) Size() int {
|
||||
sizeInBytes := reflectStaticSizeTermFrequencyRow +
|
||||
len(tfr.term) +
|
||||
len(tfr.doc)
|
||||
|
||||
for _, entry := range tfr.vectors {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (tfr *TermFrequencyRow) Term() []byte {
|
||||
return tfr.term
|
||||
}
|
||||
|
13
vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go
generated
vendored
13
vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go
generated
vendored
@ -293,7 +293,7 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]Upsi
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) Open() (err error) {
|
||||
//acquire the write mutex for the duratin of Open()
|
||||
// acquire the write mutex for the duration of Open()
|
||||
udc.writeMutex.Lock()
|
||||
defer udc.writeMutex.Unlock()
|
||||
|
||||
@ -837,6 +837,11 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
|
||||
docBackIndexRowErr = err
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
if cerr := kvreader.Close(); err == nil && cerr != nil {
|
||||
docBackIndexRowErr = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
for docID, doc := range batch.IndexOps {
|
||||
backIndexRow, err := backIndexRowForDoc(kvreader, index.IndexInternalID(docID))
|
||||
@ -847,12 +852,6 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
|
||||
|
||||
docBackIndexRowCh <- &docBackIndexRow{docID, doc, backIndexRow}
|
||||
}
|
||||
|
||||
err = kvreader.Close()
|
||||
if err != nil {
|
||||
docBackIndexRowErr = err
|
||||
return
|
||||
}
|
||||
}()
|
||||
|
||||
// wait for analysis result
|
||||
|
3
vendor/github.com/blevesearch/bleve/index_alias_impl.go
generated
vendored
3
vendor/github.com/blevesearch/bleve/index_alias_impl.go
generated
vendored
@ -15,12 +15,11 @@
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
|
127
vendor/github.com/blevesearch/bleve/index_impl.go
generated
vendored
127
vendor/github.com/blevesearch/bleve/index_impl.go
generated
vendored
@ -15,6 +15,7 @@
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
@ -22,8 +23,6 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
@ -51,6 +50,12 @@ const storePath = "store"
|
||||
|
||||
var mappingInternalKey = []byte("_mapping")
|
||||
|
||||
const SearchQueryStartCallbackKey = "_search_query_start_callback_key"
|
||||
const SearchQueryEndCallbackKey = "_search_query_end_callback_key"
|
||||
|
||||
type SearchQueryStartCallbackFn func(size uint64) error
|
||||
type SearchQueryEndCallbackFn func(size uint64) error
|
||||
|
||||
func indexStorePath(path string) string {
|
||||
return path + string(os.PathSeparator) + storePath
|
||||
}
|
||||
@ -253,6 +258,24 @@ func (i *indexImpl) Index(id string, data interface{}) (err error) {
|
||||
return
|
||||
}
|
||||
|
||||
// IndexAdvanced takes a document.Document object
|
||||
// skips the mapping and indexes it.
|
||||
func (i *indexImpl) IndexAdvanced(doc *document.Document) (err error) {
|
||||
if doc.ID == "" {
|
||||
return ErrorEmptyID
|
||||
}
|
||||
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return ErrorIndexClosed
|
||||
}
|
||||
|
||||
err = i.i.Update(doc)
|
||||
return
|
||||
}
|
||||
|
||||
// Delete entries for the specified identifier from
|
||||
// the index.
|
||||
func (i *indexImpl) Delete(id string) (err error) {
|
||||
@ -345,8 +368,70 @@ func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) {
|
||||
return i.SearchInContext(context.Background(), req)
|
||||
}
|
||||
|
||||
var documentMatchEmptySize int
|
||||
var searchContextEmptySize int
|
||||
var facetResultEmptySize int
|
||||
var documentEmptySize int
|
||||
|
||||
func init() {
|
||||
var dm search.DocumentMatch
|
||||
documentMatchEmptySize = dm.Size()
|
||||
|
||||
var sc search.SearchContext
|
||||
searchContextEmptySize = sc.Size()
|
||||
|
||||
var fr search.FacetResult
|
||||
facetResultEmptySize = fr.Size()
|
||||
|
||||
var d document.Document
|
||||
documentEmptySize = d.Size()
|
||||
}
|
||||
|
||||
// memNeededForSearch is a helper function that returns an estimate of RAM
|
||||
// needed to execute a search request.
|
||||
func memNeededForSearch(req *SearchRequest,
|
||||
searcher search.Searcher,
|
||||
topnCollector *collector.TopNCollector) uint64 {
|
||||
|
||||
backingSize := req.Size + req.From + 1
|
||||
if req.Size+req.From > collector.PreAllocSizeSkipCap {
|
||||
backingSize = collector.PreAllocSizeSkipCap + 1
|
||||
}
|
||||
numDocMatches := backingSize + searcher.DocumentMatchPoolSize()
|
||||
|
||||
estimate := 0
|
||||
|
||||
// overhead, size in bytes from collector
|
||||
estimate += topnCollector.Size()
|
||||
|
||||
// pre-allocing DocumentMatchPool
|
||||
estimate += searchContextEmptySize + numDocMatches*documentMatchEmptySize
|
||||
|
||||
// searcher overhead
|
||||
estimate += searcher.Size()
|
||||
|
||||
// overhead from results, lowestMatchOutsideResults
|
||||
estimate += (numDocMatches + 1) * documentMatchEmptySize
|
||||
|
||||
// additional overhead from SearchResult
|
||||
estimate += reflectStaticSizeSearchResult + reflectStaticSizeSearchStatus
|
||||
|
||||
// overhead from facet results
|
||||
if req.Facets != nil {
|
||||
estimate += len(req.Facets) * facetResultEmptySize
|
||||
}
|
||||
|
||||
// highlighting, store
|
||||
if len(req.Fields) > 0 || req.Highlight != nil {
|
||||
// Size + From => number of hits
|
||||
estimate += (req.Size + req.From) * documentEmptySize
|
||||
}
|
||||
|
||||
return uint64(estimate)
|
||||
}
|
||||
|
||||
// SearchInContext executes a search request operation within the provided
|
||||
// Context. Returns a SearchResult object or an error.
|
||||
// Context. Returns a SearchResult object or an error.
|
||||
func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr *SearchResult, err error) {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
@ -411,6 +496,24 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
||||
collector.SetFacetsBuilder(facetsBuilder)
|
||||
}
|
||||
|
||||
memNeeded := memNeededForSearch(req, searcher, collector)
|
||||
if cb := ctx.Value(SearchQueryStartCallbackKey); cb != nil {
|
||||
if cbF, ok := cb.(SearchQueryStartCallbackFn); ok {
|
||||
err = cbF(memNeeded)
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if cb := ctx.Value(SearchQueryEndCallbackKey); cb != nil {
|
||||
if cbF, ok := cb.(SearchQueryEndCallbackFn); ok {
|
||||
defer func() {
|
||||
_ = cbF(memNeeded)
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
err = collector.Collect(ctx, searcher, indexReader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -442,7 +545,8 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
||||
doc, err := indexReader.Document(hit.ID)
|
||||
if err == nil && doc != nil {
|
||||
if len(req.Fields) > 0 {
|
||||
for _, f := range req.Fields {
|
||||
fieldsToLoad := deDuplicate(req.Fields)
|
||||
for _, f := range fieldsToLoad {
|
||||
for _, docF := range doc.Fields {
|
||||
if f == "*" || docF.Name() == f {
|
||||
var value interface{}
|
||||
@ -516,9 +620,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
||||
return &SearchResult{
|
||||
Status: &SearchStatus{
|
||||
Total: 1,
|
||||
Failed: 0,
|
||||
Successful: 1,
|
||||
Errors: make(map[string]error),
|
||||
},
|
||||
Request: req,
|
||||
Hits: hits,
|
||||
@ -738,3 +840,16 @@ func (f *indexImplFieldDict) Close() error {
|
||||
}
|
||||
return f.indexReader.Close()
|
||||
}
|
||||
|
||||
// helper function to remove duplicate entries from slice of strings
|
||||
func deDuplicate(fields []string) []string {
|
||||
entries := make(map[string]struct{})
|
||||
ret := []string{}
|
||||
for _, entry := range fields {
|
||||
if _, exists := entries[entry]; !exists {
|
||||
entries[entry] = struct{}{}
|
||||
ret = append(ret, entry)
|
||||
}
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
36
vendor/github.com/blevesearch/bleve/mapping/document.go
generated
vendored
36
vendor/github.com/blevesearch/bleve/mapping/document.go
generated
vendored
@ -15,6 +15,7 @@
|
||||
package mapping
|
||||
|
||||
import (
|
||||
"encoding"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"reflect"
|
||||
@ -178,6 +179,7 @@ OUTER:
|
||||
continue OUTER
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
return current
|
||||
}
|
||||
@ -481,6 +483,17 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
|
||||
fieldMapping := newDateTimeFieldMappingDynamic(context.im)
|
||||
fieldMapping.processTime(property, pathString, path, indexes, context)
|
||||
}
|
||||
case encoding.TextMarshaler:
|
||||
txt, err := property.MarshalText()
|
||||
if err == nil && subDocMapping != nil {
|
||||
// index by explicit mapping
|
||||
for _, fieldMapping := range subDocMapping.Fields {
|
||||
if fieldMapping.Type == "text" {
|
||||
fieldMapping.processString(string(txt), pathString, path, indexes, context)
|
||||
}
|
||||
}
|
||||
}
|
||||
dm.walkDocument(property, path, indexes, context)
|
||||
default:
|
||||
if subDocMapping != nil {
|
||||
for _, fieldMapping := range subDocMapping.Fields {
|
||||
@ -491,7 +504,7 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
|
||||
}
|
||||
dm.walkDocument(property, path, indexes, context)
|
||||
}
|
||||
case reflect.Map:
|
||||
case reflect.Map, reflect.Slice:
|
||||
if subDocMapping != nil {
|
||||
for _, fieldMapping := range subDocMapping.Fields {
|
||||
if fieldMapping.Type == "geopoint" {
|
||||
@ -500,6 +513,27 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
|
||||
}
|
||||
}
|
||||
dm.walkDocument(property, path, indexes, context)
|
||||
case reflect.Ptr:
|
||||
if !propertyValue.IsNil() {
|
||||
switch property := property.(type) {
|
||||
case encoding.TextMarshaler:
|
||||
|
||||
txt, err := property.MarshalText()
|
||||
if err == nil && subDocMapping != nil {
|
||||
// index by explicit mapping
|
||||
for _, fieldMapping := range subDocMapping.Fields {
|
||||
if fieldMapping.Type == "text" {
|
||||
fieldMapping.processString(string(txt), pathString, path, indexes, context)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
dm.walkDocument(property, path, indexes, context)
|
||||
}
|
||||
|
||||
default:
|
||||
dm.walkDocument(property, path, indexes, context)
|
||||
}
|
||||
}
|
||||
default:
|
||||
dm.walkDocument(property, path, indexes, context)
|
||||
}
|
||||
|
26
vendor/github.com/blevesearch/bleve/mapping/field.go
generated
vendored
26
vendor/github.com/blevesearch/bleve/mapping/field.go
generated
vendored
@ -26,8 +26,9 @@ import (
|
||||
|
||||
// control the default behavior for dynamic fields (those not explicitly mapped)
|
||||
var (
|
||||
IndexDynamic = true
|
||||
StoreDynamic = true
|
||||
IndexDynamic = true
|
||||
StoreDynamic = true
|
||||
DocValuesDynamic = true // TODO revisit default?
|
||||
)
|
||||
|
||||
// A FieldMapping describes how a specific item
|
||||
@ -54,6 +55,10 @@ type FieldMapping struct {
|
||||
IncludeTermVectors bool `json:"include_term_vectors,omitempty"`
|
||||
IncludeInAll bool `json:"include_in_all,omitempty"`
|
||||
DateFormat string `json:"date_format,omitempty"`
|
||||
|
||||
// DocValues, if true makes the index uninverting possible for this field
|
||||
// It is useful for faceting and sorting queries.
|
||||
DocValues bool `json:"docvalues,omitempty"`
|
||||
}
|
||||
|
||||
// NewTextFieldMapping returns a default field mapping for text
|
||||
@ -64,6 +69,7 @@ func NewTextFieldMapping() *FieldMapping {
|
||||
Index: true,
|
||||
IncludeTermVectors: true,
|
||||
IncludeInAll: true,
|
||||
DocValues: true,
|
||||
}
|
||||
}
|
||||
|
||||
@ -71,6 +77,7 @@ func newTextFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
|
||||
rv := NewTextFieldMapping()
|
||||
rv.Store = im.StoreDynamic
|
||||
rv.Index = im.IndexDynamic
|
||||
rv.DocValues = im.DocValuesDynamic
|
||||
return rv
|
||||
}
|
||||
|
||||
@ -81,6 +88,7 @@ func NewNumericFieldMapping() *FieldMapping {
|
||||
Store: true,
|
||||
Index: true,
|
||||
IncludeInAll: true,
|
||||
DocValues: true,
|
||||
}
|
||||
}
|
||||
|
||||
@ -88,6 +96,7 @@ func newNumericFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
|
||||
rv := NewNumericFieldMapping()
|
||||
rv.Store = im.StoreDynamic
|
||||
rv.Index = im.IndexDynamic
|
||||
rv.DocValues = im.DocValuesDynamic
|
||||
return rv
|
||||
}
|
||||
|
||||
@ -98,6 +107,7 @@ func NewDateTimeFieldMapping() *FieldMapping {
|
||||
Store: true,
|
||||
Index: true,
|
||||
IncludeInAll: true,
|
||||
DocValues: true,
|
||||
}
|
||||
}
|
||||
|
||||
@ -105,6 +115,7 @@ func newDateTimeFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
|
||||
rv := NewDateTimeFieldMapping()
|
||||
rv.Store = im.StoreDynamic
|
||||
rv.Index = im.IndexDynamic
|
||||
rv.DocValues = im.DocValuesDynamic
|
||||
return rv
|
||||
}
|
||||
|
||||
@ -115,6 +126,7 @@ func NewBooleanFieldMapping() *FieldMapping {
|
||||
Store: true,
|
||||
Index: true,
|
||||
IncludeInAll: true,
|
||||
DocValues: true,
|
||||
}
|
||||
}
|
||||
|
||||
@ -122,6 +134,7 @@ func newBooleanFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
|
||||
rv := NewBooleanFieldMapping()
|
||||
rv.Store = im.StoreDynamic
|
||||
rv.Index = im.IndexDynamic
|
||||
rv.DocValues = im.DocValuesDynamic
|
||||
return rv
|
||||
}
|
||||
|
||||
@ -132,6 +145,7 @@ func NewGeoPointFieldMapping() *FieldMapping {
|
||||
Store: true,
|
||||
Index: true,
|
||||
IncludeInAll: true,
|
||||
DocValues: true,
|
||||
}
|
||||
}
|
||||
|
||||
@ -147,6 +161,9 @@ func (fm *FieldMapping) Options() document.IndexingOptions {
|
||||
if fm.IncludeTermVectors {
|
||||
rv |= document.IncludeTermVectors
|
||||
}
|
||||
if fm.DocValues {
|
||||
rv |= document.DocValues
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
@ -308,6 +325,11 @@ func (fm *FieldMapping) UnmarshalJSON(data []byte) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case "docvalues":
|
||||
err := json.Unmarshal(v, &fm.DocValues)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
invalidKeys = append(invalidKeys, k)
|
||||
}
|
||||
|
19
vendor/github.com/blevesearch/bleve/mapping/index.go
generated
vendored
19
vendor/github.com/blevesearch/bleve/mapping/index.go
generated
vendored
@ -50,6 +50,7 @@ type IndexMappingImpl struct {
|
||||
DefaultField string `json:"default_field"`
|
||||
StoreDynamic bool `json:"store_dynamic"`
|
||||
IndexDynamic bool `json:"index_dynamic"`
|
||||
DocValuesDynamic bool `json:"docvalues_dynamic,omitempty"`
|
||||
CustomAnalysis *customAnalysis `json:"analysis,omitempty"`
|
||||
cache *registry.Cache
|
||||
}
|
||||
@ -154,6 +155,7 @@ func NewIndexMapping() *IndexMappingImpl {
|
||||
DefaultField: defaultField,
|
||||
IndexDynamic: IndexDynamic,
|
||||
StoreDynamic: StoreDynamic,
|
||||
DocValuesDynamic: DocValuesDynamic,
|
||||
CustomAnalysis: newCustomAnalysis(),
|
||||
cache: registry.NewCache(),
|
||||
}
|
||||
@ -217,6 +219,7 @@ func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error {
|
||||
im.TypeMapping = make(map[string]*DocumentMapping)
|
||||
im.StoreDynamic = StoreDynamic
|
||||
im.IndexDynamic = IndexDynamic
|
||||
im.DocValuesDynamic = DocValuesDynamic
|
||||
|
||||
var invalidKeys []string
|
||||
for k, v := range tmp {
|
||||
@ -271,6 +274,11 @@ func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case "docvalues_dynamic":
|
||||
err := json.Unmarshal(v, &im.DocValuesDynamic)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
invalidKeys = append(invalidKeys, k)
|
||||
}
|
||||
@ -289,7 +297,12 @@ func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error {
|
||||
}
|
||||
|
||||
func (im *IndexMappingImpl) determineType(data interface{}) string {
|
||||
// first see if the object implements Classifier
|
||||
// first see if the object implements bleveClassifier
|
||||
bleveClassifier, ok := data.(bleveClassifier)
|
||||
if ok {
|
||||
return bleveClassifier.BleveType()
|
||||
}
|
||||
// next see if the object implements Classifier
|
||||
classifier, ok := data.(Classifier)
|
||||
if ok {
|
||||
return classifier.Type()
|
||||
@ -313,7 +326,7 @@ func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}
|
||||
|
||||
// see if the _all field was disabled
|
||||
allMapping := docMapping.documentMappingForPath("_all")
|
||||
if allMapping == nil || (allMapping.Enabled != false) {
|
||||
if allMapping == nil || allMapping.Enabled {
|
||||
field := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, walkContext.excludedFromAll, document.IndexField|document.IncludeTermVectors)
|
||||
doc.AddField(field)
|
||||
}
|
||||
@ -334,7 +347,7 @@ func (im *IndexMappingImpl) newWalkContext(doc *document.Document, dm *DocumentM
|
||||
doc: doc,
|
||||
im: im,
|
||||
dm: dm,
|
||||
excludedFromAll: []string{},
|
||||
excludedFromAll: []string{"_id"},
|
||||
}
|
||||
}
|
||||
|
||||
|
13
vendor/github.com/blevesearch/bleve/mapping/mapping.go
generated
vendored
13
vendor/github.com/blevesearch/bleve/mapping/mapping.go
generated
vendored
@ -22,12 +22,21 @@ import (
|
||||
"github.com/blevesearch/bleve/document"
|
||||
)
|
||||
|
||||
// A Classifier is an interface describing any object
|
||||
// which knows how to identify its own type.
|
||||
// A Classifier is an interface describing any object which knows how to
|
||||
// identify its own type. Alternatively, if a struct already has a Type
|
||||
// field or method in conflict, one can use BleveType instead.
|
||||
type Classifier interface {
|
||||
Type() string
|
||||
}
|
||||
|
||||
// A bleveClassifier is an interface describing any object which knows how
|
||||
// to identify its own type. This is introduced as an alternative to the
|
||||
// Classifier interface which often has naming conflicts with existing
|
||||
// structures.
|
||||
type bleveClassifier interface {
|
||||
BleveType() string
|
||||
}
|
||||
|
||||
var logger = log.New(ioutil.Discard, "bleve mapping ", log.LstdFlags)
|
||||
|
||||
// SetLog sets the logger used for logging
|
||||
|
4
vendor/github.com/blevesearch/bleve/query.go
generated
vendored
4
vendor/github.com/blevesearch/bleve/query.go
generated
vendored
@ -209,8 +209,8 @@ func NewGeoBoundingBoxQuery(topLeftLon, topLeftLat, bottomRightLon, bottomRightL
|
||||
return query.NewGeoBoundingBoxQuery(topLeftLon, topLeftLat, bottomRightLon, bottomRightLat)
|
||||
}
|
||||
|
||||
// NewGeoDistanceQuery creates a new Query for performing geo bounding
|
||||
// box searches. The arguments describe a position and a distance. Documents
|
||||
// NewGeoDistanceQuery creates a new Query for performing geo distance
|
||||
// searches. The arguments describe a position and a distance. Documents
|
||||
// which have an indexed geo point which is less than or equal to the provided
|
||||
// distance from the given position will be returned.
|
||||
func NewGeoDistanceQuery(lon, lat float64, distance string) *query.GeoDistanceQuery {
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user