Update server dependencies
This commit is contained in:
parent
fb8fec38ff
commit
de36fe682a
883 changed files with 147940 additions and 68404 deletions
206
vendor/github.com/couchbase/vellum/levenshtein/dfa.go
generated
vendored
Normal file
206
vendor/github.com/couchbase/vellum/levenshtein/dfa.go
generated
vendored
Normal file
|
@ -0,0 +1,206 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package levenshtein
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"unicode"
|
||||
|
||||
"github.com/couchbase/vellum/utf8"
|
||||
)
|
||||
|
||||
type dfa struct {
|
||||
states statesStack
|
||||
}
|
||||
|
||||
type state struct {
|
||||
next []int
|
||||
match bool
|
||||
}
|
||||
|
||||
func (s *state) String() string {
|
||||
rv := " |"
|
||||
for i := 0; i < 16; i++ {
|
||||
rv += fmt.Sprintf("% 5x", i)
|
||||
}
|
||||
rv += "\n"
|
||||
for i := 0; i < len(s.next); i++ {
|
||||
if i%16 == 0 {
|
||||
rv += fmt.Sprintf("%x |", i/16)
|
||||
}
|
||||
if s.next[i] != 0 {
|
||||
rv += fmt.Sprintf("% 5d", s.next[i])
|
||||
} else {
|
||||
rv += " -"
|
||||
}
|
||||
if i%16 == 15 {
|
||||
rv += "\n"
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
type dfaBuilder struct {
|
||||
dfa *dfa
|
||||
lev *dynamicLevenshtein
|
||||
cache map[string]int
|
||||
keyBuf []byte
|
||||
}
|
||||
|
||||
func newDfaBuilder(lev *dynamicLevenshtein) *dfaBuilder {
|
||||
dfab := &dfaBuilder{
|
||||
dfa: &dfa{
|
||||
states: make([]*state, 0, 16),
|
||||
},
|
||||
lev: lev,
|
||||
cache: make(map[string]int, 1024),
|
||||
}
|
||||
dfab.newState(false) // create state 0, invalid
|
||||
return dfab
|
||||
}
|
||||
|
||||
func (b *dfaBuilder) build() (*dfa, error) {
|
||||
var stack intsStack
|
||||
stack = stack.Push(b.lev.start())
|
||||
seen := make(map[int]struct{})
|
||||
|
||||
var levState []int
|
||||
stack, levState = stack.Pop()
|
||||
for levState != nil {
|
||||
dfaSi := b.cachedState(levState)
|
||||
mmToSi, mmMismatchState, err := b.addMismatchUtf8States(dfaSi, levState)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if mmToSi != 0 {
|
||||
if _, ok := seen[mmToSi]; !ok {
|
||||
seen[mmToSi] = struct{}{}
|
||||
stack = stack.Push(mmMismatchState)
|
||||
}
|
||||
}
|
||||
|
||||
i := 0
|
||||
for _, r := range b.lev.query {
|
||||
if uint(levState[i]) > b.lev.distance {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
levNext := b.lev.accept(levState, &r)
|
||||
nextSi := b.cachedState(levNext)
|
||||
if nextSi != 0 {
|
||||
err = b.addUtf8Sequences(true, dfaSi, nextSi, r, r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if _, ok := seen[nextSi]; !ok {
|
||||
seen[nextSi] = struct{}{}
|
||||
stack = stack.Push(levNext)
|
||||
}
|
||||
}
|
||||
i++
|
||||
}
|
||||
|
||||
if len(b.dfa.states) > StateLimit {
|
||||
return nil, ErrTooManyStates
|
||||
}
|
||||
|
||||
stack, levState = stack.Pop()
|
||||
}
|
||||
|
||||
return b.dfa, nil
|
||||
}
|
||||
|
||||
func (b *dfaBuilder) cachedState(levState []int) int {
|
||||
rv, _ := b.cached(levState)
|
||||
return rv
|
||||
}
|
||||
|
||||
func levStateKey(levState []int, buf []byte) []byte {
|
||||
if cap(buf) < 8*len(levState) {
|
||||
buf = make([]byte, 8*len(levState))
|
||||
} else {
|
||||
buf = buf[0 : 8*len(levState)]
|
||||
}
|
||||
for i, state := range levState {
|
||||
binary.LittleEndian.PutUint64(buf[i*8:], uint64(state))
|
||||
}
|
||||
return buf
|
||||
}
|
||||
|
||||
func (b *dfaBuilder) cached(levState []int) (int, bool) {
|
||||
if !b.lev.canMatch(levState) {
|
||||
return 0, true
|
||||
}
|
||||
b.keyBuf = levStateKey(levState, b.keyBuf)
|
||||
v, ok := b.cache[string(b.keyBuf)]
|
||||
if ok {
|
||||
return v, true
|
||||
}
|
||||
match := b.lev.isMatch(levState)
|
||||
b.dfa.states = b.dfa.states.Push(&state{
|
||||
next: make([]int, 256),
|
||||
match: match,
|
||||
})
|
||||
newV := len(b.dfa.states) - 1
|
||||
b.cache[string(b.keyBuf)] = newV
|
||||
return newV, false
|
||||
}
|
||||
|
||||
func (b *dfaBuilder) addMismatchUtf8States(fromSi int, levState []int) (int, []int, error) {
|
||||
mmState := b.lev.accept(levState, nil)
|
||||
toSi, _ := b.cached(mmState)
|
||||
if toSi == 0 {
|
||||
return 0, nil, nil
|
||||
}
|
||||
err := b.addUtf8Sequences(false, fromSi, toSi, 0, unicode.MaxRune)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
return toSi, mmState, nil
|
||||
}
|
||||
|
||||
func (b *dfaBuilder) addUtf8Sequences(overwrite bool, fromSi, toSi int, fromChar, toChar rune) error {
|
||||
sequences, err := utf8.NewSequences(fromChar, toChar)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, seq := range sequences {
|
||||
fsi := fromSi
|
||||
for _, utf8r := range seq[:len(seq)-1] {
|
||||
tsi := b.newState(false)
|
||||
b.addUtf8Range(overwrite, fsi, tsi, utf8r)
|
||||
fsi = tsi
|
||||
}
|
||||
b.addUtf8Range(overwrite, fsi, toSi, seq[len(seq)-1])
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *dfaBuilder) addUtf8Range(overwrite bool, from, to int, rang *utf8.Range) {
|
||||
for by := rang.Start; by <= rang.End; by++ {
|
||||
if overwrite || b.dfa.states[from].next[by] == 0 {
|
||||
b.dfa.states[from].next[by] = to
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (b *dfaBuilder) newState(match bool) int {
|
||||
b.dfa.states = append(b.dfa.states, &state{
|
||||
next: make([]int, 256),
|
||||
match: match,
|
||||
})
|
||||
return len(b.dfa.states) - 1
|
||||
}
|
90
vendor/github.com/couchbase/vellum/levenshtein/levenshtein.go
generated
vendored
Normal file
90
vendor/github.com/couchbase/vellum/levenshtein/levenshtein.go
generated
vendored
Normal file
|
@ -0,0 +1,90 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package levenshtein
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// StateLimit is the maximum number of states allowed
|
||||
const StateLimit = 10000
|
||||
|
||||
// ErrTooManyStates is returned if you attempt to build a Levenshtein
|
||||
// automaton which requries too many states.
|
||||
var ErrTooManyStates = fmt.Errorf("dfa contains more than %d states", StateLimit)
|
||||
|
||||
// Levenshtein implements the vellum.Automaton interface for matching
|
||||
// terms within the specified Levenshtein edit-distance of the queried
|
||||
// term. This automaton recognizes utf-8 encoded bytes and computes
|
||||
// the edit distance on the result code-points, not on the raw bytes.
|
||||
type Levenshtein struct {
|
||||
prog *dynamicLevenshtein
|
||||
dfa *dfa
|
||||
}
|
||||
|
||||
// New creates a new Levenshtein automaton for the specified
|
||||
// query string and edit distance.
|
||||
func New(query string, distance int) (*Levenshtein, error) {
|
||||
lev := &dynamicLevenshtein{
|
||||
query: query,
|
||||
distance: uint(distance),
|
||||
}
|
||||
dfabuilder := newDfaBuilder(lev)
|
||||
dfa, err := dfabuilder.build()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &Levenshtein{
|
||||
prog: lev,
|
||||
dfa: dfa,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Start returns the start state of this automaton.
|
||||
func (l *Levenshtein) Start() int {
|
||||
return 1
|
||||
}
|
||||
|
||||
// IsMatch returns if the specified state is a matching state.
|
||||
func (l *Levenshtein) IsMatch(s int) bool {
|
||||
if s < len(l.dfa.states) {
|
||||
return l.dfa.states[s].match
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// CanMatch returns if the specified state can ever transition to a matching
|
||||
// state.
|
||||
func (l *Levenshtein) CanMatch(s int) bool {
|
||||
if s < len(l.dfa.states) && s > 0 {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// WillAlwaysMatch returns if the specified state will always end in a
|
||||
// matching state.
|
||||
func (l *Levenshtein) WillAlwaysMatch(s int) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// Accept returns the new state, resulting from the transite byte b
|
||||
// when currently in the state s.
|
||||
func (l *Levenshtein) Accept(s int, b byte) int {
|
||||
if s < len(l.dfa.states) {
|
||||
return l.dfa.states[s].next[b]
|
||||
}
|
||||
return 0
|
||||
}
|
78
vendor/github.com/couchbase/vellum/levenshtein/rune.go
generated
vendored
Normal file
78
vendor/github.com/couchbase/vellum/levenshtein/rune.go
generated
vendored
Normal file
|
@ -0,0 +1,78 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package levenshtein
|
||||
|
||||
import "unicode/utf8"
|
||||
|
||||
// dynamicLevenshtein is the rune-based automaton, which is used
|
||||
// during the building of the ut8-aware byte-based automaton
|
||||
type dynamicLevenshtein struct {
|
||||
query string
|
||||
distance uint
|
||||
}
|
||||
|
||||
func (d *dynamicLevenshtein) start() []int {
|
||||
runeCount := utf8.RuneCountInString(d.query)
|
||||
rv := make([]int, runeCount+1)
|
||||
for i := 0; i < runeCount+1; i++ {
|
||||
rv[i] = i
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (d *dynamicLevenshtein) isMatch(state []int) bool {
|
||||
last := state[len(state)-1]
|
||||
if uint(last) <= d.distance {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (d *dynamicLevenshtein) canMatch(state []int) bool {
|
||||
if len(state) > 0 {
|
||||
min := state[0]
|
||||
for i := 1; i < len(state); i++ {
|
||||
if state[i] < min {
|
||||
min = state[i]
|
||||
}
|
||||
}
|
||||
if uint(min) <= d.distance {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (d *dynamicLevenshtein) accept(state []int, r *rune) []int {
|
||||
next := []int{state[0] + 1}
|
||||
i := 0
|
||||
for _, c := range d.query {
|
||||
var cost int
|
||||
if r == nil || c != *r {
|
||||
cost = 1
|
||||
}
|
||||
v := min(min(next[i]+1, state[i+1]+1), state[i]+cost)
|
||||
next = append(next, min(v, int(d.distance)+1))
|
||||
i++
|
||||
}
|
||||
return next
|
||||
}
|
||||
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
49
vendor/github.com/couchbase/vellum/levenshtein/stack.go
generated
vendored
Normal file
49
vendor/github.com/couchbase/vellum/levenshtein/stack.go
generated
vendored
Normal file
|
@ -0,0 +1,49 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package levenshtein
|
||||
|
||||
import "fmt"
|
||||
|
||||
type statesStack []*state
|
||||
|
||||
func (s statesStack) String() string {
|
||||
rv := ""
|
||||
for i := 0; i < len(s); i++ {
|
||||
matchStr := ""
|
||||
if s[i].match {
|
||||
matchStr = " (MATCH) "
|
||||
}
|
||||
rv += fmt.Sprintf("state %d%s:\n%v\n", i, matchStr, s[i])
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s statesStack) Push(v *state) statesStack {
|
||||
return append(s, v)
|
||||
}
|
||||
|
||||
type intsStack [][]int
|
||||
|
||||
func (s intsStack) Push(v []int) intsStack {
|
||||
return append(s, v)
|
||||
}
|
||||
|
||||
func (s intsStack) Pop() (intsStack, []int) {
|
||||
l := len(s)
|
||||
if l < 1 {
|
||||
return s, nil
|
||||
}
|
||||
return s[:l-1], s[l-1]
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue