Switch from Godep to go vendoring

This commit is contained in:
Ken-Håvard Lieng 2016-03-01 01:51:26 +01:00
parent 6b37713bc0
commit cd317761c5
1504 changed files with 263076 additions and 34441 deletions

View file

@ -0,0 +1,57 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package it
import (
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/registry"
"github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter"
"github.com/blevesearch/bleve/analysis/tokenizers/unicode"
)
const AnalyzerName = "it"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
tokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
elisionFilter, err := cache.TokenFilterNamed(ElisionName)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
if err != nil {
return nil, err
}
stopItFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
stemmerItFilter, err := cache.TokenFilterNamed(LightStemmerName)
if err != nil {
return nil, err
}
rv := analysis.Analyzer{
Tokenizer: tokenizer,
TokenFilters: []analysis.TokenFilter{
elisionFilter,
toLowerFilter,
stopItFilter,
stemmerItFilter,
},
}
return &rv, nil
}
func init() {
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
}

View file

@ -0,0 +1,82 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package it
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/registry"
)
func TestItalianAnalyzer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
// stemming
{
input: []byte("abbandonata"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("abbandonat"),
},
},
},
{
input: []byte("abbandonati"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("abbandonat"),
},
},
},
// stop word
{
input: []byte("dallo"),
output: analysis.TokenStream{},
},
// contractions
{
input: []byte("dell'Italia"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ital"),
},
},
},
{
input: []byte("l'Italiano"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("italian"),
},
},
},
}
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
for i, tok := range actual {
if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
}
}
}
}

View file

@ -0,0 +1,45 @@
package it
import (
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/registry"
)
const ArticlesName = "articles_it"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis
var ItalianArticles = []byte(`
c
l
all
dall
dell
nell
sull
coll
pell
gl
agl
dagl
degl
negl
sugl
un
m
t
s
v
d
`)
func ArticlesTokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(ItalianArticles)
return rv, err
}
func init() {
registry.RegisterTokenMap(ArticlesName, ArticlesTokenMapConstructor)
}

View file

@ -0,0 +1,32 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package it
import (
"fmt"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/analysis/token_filters/elision_filter"
"github.com/blevesearch/bleve/registry"
)
const ElisionName = "elision_it"
func ElisionFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
articlesTokenMap, err := cache.TokenMapNamed(ArticlesName)
if err != nil {
return nil, fmt.Errorf("error building elision filter: %v", err)
}
return elision_filter.NewElisionFilter(articlesTokenMap), nil
}
func init() {
registry.RegisterTokenFilter(ElisionName, ElisionFilterConstructor)
}

View file

@ -0,0 +1,50 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package it
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/registry"
)
func TestItalianElision(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("dell'Italia"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("Italia"),
},
},
},
}
cache := registry.NewCache()
elisionFilter, err := cache.TokenFilterNamed(ElisionName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := elisionFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
}
}
}

View file

@ -0,0 +1,96 @@
// Copyright (c) 2015 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package it
import (
"bytes"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/registry"
)
const LightStemmerName = "stemmer_it_light"
type ItalianLightStemmerFilter struct {
}
func NewItalianLightStemmerFilterFilter() *ItalianLightStemmerFilter {
return &ItalianLightStemmerFilter{}
}
func (s *ItalianLightStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
runes := bytes.Runes(token.Term)
runes = stem(runes)
token.Term = analysis.BuildTermFromRunes(runes)
}
return input
}
func stem(input []rune) []rune {
inputLen := len(input)
if inputLen < 6 {
return input
}
for i := 0; i < inputLen; i++ {
switch input[i] {
case 'à', 'á', 'â', 'ä':
input[i] = 'a'
case 'ò', 'ó', 'ô', 'ö':
input[i] = 'o'
case 'è', 'é', 'ê', 'ë':
input[i] = 'e'
case 'ù', 'ú', 'û', 'ü':
input[i] = 'u'
case 'ì', 'í', 'î', 'ï':
input[i] = 'i'
}
}
switch input[inputLen-1] {
case 'e':
if input[inputLen-2] == 'i' || input[inputLen-2] == 'h' {
return input[0 : inputLen-2]
} else {
return input[0 : inputLen-1]
}
case 'i':
if input[inputLen-2] == 'h' || input[inputLen-2] == 'i' {
return input[0 : inputLen-2]
} else {
return input[0 : inputLen-1]
}
case 'a':
if input[inputLen-2] == 'i' {
return input[0 : inputLen-2]
} else {
return input[0 : inputLen-1]
}
case 'o':
if input[inputLen-2] == 'i' {
return input[0 : inputLen-2]
} else {
return input[0 : inputLen-1]
}
}
return input
}
func ItalianLightStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewItalianLightStemmerFilterFilter(), nil
}
func init() {
registry.RegisterTokenFilter(LightStemmerName, ItalianLightStemmerFilterConstructor)
}

View file

@ -0,0 +1,62 @@
// Copyright (c) 2015 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package it
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/registry"
)
func TestItalianLightStemmer(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ragazzo"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ragazz"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ragazzi"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ragazz"),
},
},
},
}
cache := registry.NewCache()
filter, err := cache.TokenFilterNamed(LightStemmerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := filter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
}
}
}

View file

@ -0,0 +1,28 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
// +build libstemmer full
package it
import (
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/analysis/token_filters/stemmer_filter"
"github.com/blevesearch/bleve/registry"
)
const StemmerName = "stemmer_it"
func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return stemmer_filter.NewStemmerFilter("it")
}
func init() {
registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor)
}

View file

@ -0,0 +1,28 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package it
import (
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter"
"github.com/blevesearch/bleve/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
}
func init() {
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
}

View file

@ -0,0 +1,327 @@
package it
import (
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/registry"
)
const StopName = "stop_it"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
// ` was changed to ' to allow for literal string
var ItalianStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| An Italian stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line.
ad | a (to) before vowel
al | a + il
allo | a + lo
ai | a + i
agli | a + gli
all | a + l'
agl | a + gl'
alla | a + la
alle | a + le
con | with
col | con + il
coi | con + i (forms collo, cogli etc are now very rare)
da | from
dal | da + il
dallo | da + lo
dai | da + i
dagli | da + gli
dall | da + l'
dagl | da + gll'
dalla | da + la
dalle | da + le
di | of
del | di + il
dello | di + lo
dei | di + i
degli | di + gli
dell | di + l'
degl | di + gl'
della | di + la
delle | di + le
in | in
nel | in + el
nello | in + lo
nei | in + i
negli | in + gli
nell | in + l'
negl | in + gl'
nella | in + la
nelle | in + le
su | on
sul | su + il
sullo | su + lo
sui | su + i
sugli | su + gli
sull | su + l'
sugl | su + gl'
sulla | su + la
sulle | su + le
per | through, by
tra | among
contro | against
io | I
tu | thou
lui | he
lei | she
noi | we
voi | you
loro | they
mio | my
mia |
miei |
mie |
tuo |
tua |
tuoi | thy
tue |
suo |
sua |
suoi | his, her
sue |
nostro | our
nostra |
nostri |
nostre |
vostro | your
vostra |
vostri |
vostre |
mi | me
ti | thee
ci | us, there
vi | you, there
lo | him, the
la | her, the
li | them
le | them, the
gli | to him, the
ne | from there etc
il | the
un | a
uno | a
una | a
ma | but
ed | and
se | if
perché | why, because
anche | also
come | how
dov | where (as dov')
dove | where
che | who, that
chi | who
cui | whom
non | not
più | more
quale | who, that
quanto | how much
quanti |
quanta |
quante |
quello | that
quelli |
quella |
quelle |
questo | this
questi |
questa |
queste |
si | yes
tutto | all
tutti | all
| single letter forms:
a | at
c | as c' for ce or ci
e | and
i | the
l | as l'
o | or
| forms of avere, to have (not including the infinitive):
ho
hai
ha
abbiamo
avete
hanno
abbia
abbiate
abbiano
avrò
avrai
avrà
avremo
avrete
avranno
avrei
avresti
avrebbe
avremmo
avreste
avrebbero
avevo
avevi
aveva
avevamo
avevate
avevano
ebbi
avesti
ebbe
avemmo
aveste
ebbero
avessi
avesse
avessimo
avessero
avendo
avuto
avuta
avuti
avute
| forms of essere, to be (not including the infinitive):
sono
sei
è
siamo
siete
sia
siate
siano
sarò
sarai
sarà
saremo
sarete
saranno
sarei
saresti
sarebbe
saremmo
sareste
sarebbero
ero
eri
era
eravamo
eravate
erano
fui
fosti
fu
fummo
foste
furono
fossi
fosse
fossimo
fossero
essendo
| forms of fare, to do (not including the infinitive, fa, fat-):
faccio
fai
facciamo
fanno
faccia
facciate
facciano
farò
farai
farà
faremo
farete
faranno
farei
faresti
farebbe
faremmo
fareste
farebbero
facevo
facevi
faceva
facevamo
facevate
facevano
feci
facesti
fece
facemmo
faceste
fecero
facessi
facesse
facessimo
facessero
facendo
| forms of stare, to be (not including the infinitive):
sto
stai
sta
stiamo
stanno
stia
stiate
stiano
starò
starai
starà
staremo
starete
staranno
starei
staresti
starebbe
staremmo
stareste
starebbero
stavo
stavi
stava
stavamo
stavate
stavano
stetti
stesti
stette
stemmo
steste
stettero
stessi
stesse
stessimo
stessero
stando
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(ItalianStopWords)
return rv, err
}
func init() {
registry.RegisterTokenMap(StopName, TokenMapConstructor)
}