Switch from Godep to go vendoring

2016-03-01 01:51:26 +01:00 · 2016-03-01 01:51:26 +01:00 · cd317761c5
commit cd317761c5
parent 6b37713bc0
1504 changed files with 263076 additions and 34441 deletions
--- a/vendor/github.com/blevesearch/bleve/analysis/language/ar/analyzer_ar.go
+++ b/vendor/github.com/blevesearch/bleve/analysis/language/ar/analyzer_ar.go
@ -0,0 +1,60 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+
+package ar
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+
+	"github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter"
+	"github.com/blevesearch/bleve/analysis/token_filters/unicode_normalize"
+	"github.com/blevesearch/bleve/analysis/tokenizers/unicode"
+)
+
+const AnalyzerName = "ar"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
+	tokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
+	if err != nil {
+		return nil, err
+	}
+	normalizeFilter := unicode_normalize.MustNewUnicodeNormalizeFilter(unicode_normalize.NFKC)
+	stopArFilter, err := cache.TokenFilterNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	normalizeArFilter, err := cache.TokenFilterNamed(NormalizeName)
+	if err != nil {
+		return nil, err
+	}
+	stemmerArFilter, err := cache.TokenFilterNamed(StemmerName)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.Analyzer{
+		Tokenizer: tokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			toLowerFilter,
+			normalizeFilter,
+			stopArFilter,
+			normalizeArFilter,
+			stemmerArFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
+}
--- a/vendor/github.com/blevesearch/bleve/analysis/language/ar/analyzer_ar_test.go
+++ b/vendor/github.com/blevesearch/bleve/analysis/language/ar/analyzer_ar_test.go
@ -0,0 +1,179 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+
+package ar
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func TestArabicAnalyzer(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		{
+			input: []byte("كبير"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("كبير"),
+					Position: 1,
+					Start:    0,
+					End:      8,
+				},
+			},
+		},
+		// feminine marker
+		{
+			input: []byte("كبيرة"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("كبير"),
+					Position: 1,
+					Start:    0,
+					End:      10,
+				},
+			},
+		},
+		{
+			input: []byte("مشروب"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("مشروب"),
+					Position: 1,
+					Start:    0,
+					End:      10,
+				},
+			},
+		},
+		// plural -at
+		{
+			input: []byte("مشروبات"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("مشروب"),
+					Position: 1,
+					Start:    0,
+					End:      14,
+				},
+			},
+		},
+		// plural -in
+		{
+			input: []byte("أمريكيين"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("امريك"),
+					Position: 1,
+					Start:    0,
+					End:      16,
+				},
+			},
+		},
+		// singular with bare alif
+		{
+			input: []byte("امريكي"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("امريك"),
+					Position: 1,
+					Start:    0,
+					End:      12,
+				},
+			},
+		},
+		{
+			input: []byte("كتاب"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("كتاب"),
+					Position: 1,
+					Start:    0,
+					End:      8,
+				},
+			},
+		},
+		// definite article
+		{
+			input: []byte("الكتاب"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("كتاب"),
+					Position: 1,
+					Start:    0,
+					End:      12,
+				},
+			},
+		},
+		{
+			input: []byte("ما ملكت أيمانكم"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("ملكت"),
+					Position: 2,
+					Start:    5,
+					End:      13,
+				},
+				&analysis.Token{
+					Term:     []byte("ايمانكم"),
+					Position: 3,
+					Start:    14,
+					End:      28,
+				},
+			},
+		},
+		// stopwords
+		{
+			input: []byte("الذين ملكت أيمانكم"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("ملكت"),
+					Position: 2,
+					Start:    11,
+					End:      19,
+				},
+				&analysis.Token{
+					Term:     []byte("ايمانكم"),
+					Position: 3,
+					Start:    20,
+					End:      34,
+				},
+			},
+		},
+		// presentation form normalization
+		{
+			input: []byte("ﺍﻟﺴﻼﻢ"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("سلام"),
+					Position: 1,
+					Start:    0,
+					End:      15,
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := analyzer.Analyze(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %v, got %v", test.output, actual)
+			t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
--- a/vendor/github.com/blevesearch/bleve/analysis/language/ar/arabic_normalize.go
+++ b/vendor/github.com/blevesearch/bleve/analysis/language/ar/arabic_normalize.go
@ -0,0 +1,80 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+
+package ar
+
+import (
+	"bytes"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+const NormalizeName = "normalize_ar"
+
+const (
+	Alef           = '\u0627'
+	AlefMadda      = '\u0622'
+	AlefHamzaAbove = '\u0623'
+	AlefHamzaBelow = '\u0625'
+	Yeh            = '\u064A'
+	DotlessYeh     = '\u0649'
+	TehMarbuta     = '\u0629'
+	Heh            = '\u0647'
+	Tatweel        = '\u0640'
+	Fathatan       = '\u064B'
+	Dammatan       = '\u064C'
+	Kasratan       = '\u064D'
+	Fatha          = '\u064E'
+	Damma          = '\u064F'
+	Kasra          = '\u0650'
+	Shadda         = '\u0651'
+	Sukun          = '\u0652'
+)
+
+type ArabicNormalizeFilter struct {
+}
+
+func NewArabicNormalizeFilter() *ArabicNormalizeFilter {
+	return &ArabicNormalizeFilter{}
+}
+
+func (s *ArabicNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		term := normalize(token.Term)
+		token.Term = term
+	}
+	return input
+}
+
+func normalize(input []byte) []byte {
+	runes := bytes.Runes(input)
+	for i := 0; i < len(runes); i++ {
+		switch runes[i] {
+		case AlefMadda, AlefHamzaAbove, AlefHamzaBelow:
+			runes[i] = Alef
+		case DotlessYeh:
+			runes[i] = Yeh
+		case TehMarbuta:
+			runes[i] = Heh
+		case Tatweel, Kasratan, Dammatan, Fathatan, Fatha, Damma, Kasra, Shadda, Sukun:
+			runes = analysis.DeleteRune(runes, i)
+			i--
+		}
+	}
+	return analysis.BuildTermFromRunes(runes)
+}
+
+func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewArabicNormalizeFilter(), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
+}
--- a/vendor/github.com/blevesearch/bleve/analysis/language/ar/arabic_normalize_test.go
+++ b/vendor/github.com/blevesearch/bleve/analysis/language/ar/arabic_normalize_test.go
@ -0,0 +1,229 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+
+package ar
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+)
+
+func TestArabicNormalizeFilter(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		// AlifMadda
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("آجن"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("اجن"),
+				},
+			},
+		},
+		// AlifHamzaAbove
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("أحمد"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("احمد"),
+				},
+			},
+		},
+		// AlifHamzaBelow
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("إعاذ"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("اعاذ"),
+				},
+			},
+		},
+		// AlifMaksura
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("بنى"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("بني"),
+				},
+			},
+		},
+		// TehMarbuta
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("فاطمة"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("فاطمه"),
+				},
+			},
+		},
+		// Tatweel
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("روبرـــــت"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("روبرت"),
+				},
+			},
+		},
+		// Fatha
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("مَبنا"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("مبنا"),
+				},
+			},
+		},
+		// Kasra
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("علِي"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("علي"),
+				},
+			},
+		},
+		// Damma
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("بُوات"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("بوات"),
+				},
+			},
+		},
+		// Fathatan
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ولداً"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ولدا"),
+				},
+			},
+		},
+		// Kasratan
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ولدٍ"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ولد"),
+				},
+			},
+		},
+		// Dammatan
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ولدٌ"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ولد"),
+				},
+			},
+		},
+		// Sukun
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("نلْسون"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("نلسون"),
+				},
+			},
+		},
+		// Shaddah
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("هتميّ"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("هتمي"),
+				},
+			},
+		},
+		// empty
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+		},
+	}
+
+	arabicNormalizeFilter := NewArabicNormalizeFilter()
+	for _, test := range tests {
+		actual := arabicNormalizeFilter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %#v, got %#v", test.output, actual)
+			t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
--- a/vendor/github.com/blevesearch/bleve/analysis/language/ar/stemmer_ar.go
+++ b/vendor/github.com/blevesearch/bleve/analysis/language/ar/stemmer_ar.go
@ -0,0 +1,113 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+
+package ar
+
+import (
+	"bytes"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+const StemmerName = "stemmer_ar"
+
+// These were obtained from org.apache.lucene.analysis.ar.ArabicStemmer
+var prefixes = [][]rune{
+	[]rune("ال"),
+	[]rune("وال"),
+	[]rune("بال"),
+	[]rune("كال"),
+	[]rune("فال"),
+	[]rune("لل"),
+	[]rune("و"),
+}
+var suffixes = [][]rune{
+	[]rune("ها"),
+	[]rune("ان"),
+	[]rune("ات"),
+	[]rune("ون"),
+	[]rune("ين"),
+	[]rune("يه"),
+	[]rune("ية"),
+	[]rune("ه"),
+	[]rune("ة"),
+	[]rune("ي"),
+}
+
+type ArabicStemmerFilter struct{}
+
+func NewArabicStemmerFilter() *ArabicStemmerFilter {
+	return &ArabicStemmerFilter{}
+}
+
+func (s *ArabicStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		term := stem(token.Term)
+		token.Term = term
+	}
+	return input
+}
+
+func canStemPrefix(input, prefix []rune) bool {
+	// Wa- prefix requires at least 3 characters.
+	if len(prefix) == 1 && len(input) < 4 {
+		return false
+	}
+	// Other prefixes require only 2.
+	if len(input)-len(prefix) < 2 {
+		return false
+	}
+	for i := range prefix {
+		if prefix[i] != input[i] {
+			return false
+		}
+	}
+	return true
+}
+
+func canStemSuffix(input, suffix []rune) bool {
+	// All suffixes require at least 2 characters after stemming.
+	if len(input)-len(suffix) < 2 {
+		return false
+	}
+	stemEnd := len(input) - len(suffix)
+	for i := range suffix {
+		if suffix[i] != input[stemEnd+i] {
+			return false
+		}
+	}
+	return true
+}
+
+func stem(input []byte) []byte {
+	runes := bytes.Runes(input)
+	// Strip a single prefix.
+	for _, p := range prefixes {
+		if canStemPrefix(runes, p) {
+			runes = runes[len(p):]
+			break
+		}
+	}
+	// Strip off multiple suffixes, in their order in the suffixes array.
+	for _, s := range suffixes {
+		if canStemSuffix(runes, s) {
+			runes = runes[:len(runes)-len(s)]
+		}
+	}
+	return analysis.BuildTermFromRunes(runes)
+}
+
+func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewArabicStemmerFilter(), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor)
+}
--- a/vendor/github.com/blevesearch/bleve/analysis/language/ar/stemmer_ar_test.go
+++ b/vendor/github.com/blevesearch/bleve/analysis/language/ar/stemmer_ar_test.go
@ -0,0 +1,392 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+
+package ar
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+)
+
+func TestArabicStemmerFilter(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		// AlPrefix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("الحسن"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("حسن"),
+				},
+			},
+		},
+		// WalPrefix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("والحسن"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("حسن"),
+				},
+			},
+		},
+		// BalPrefix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("بالحسن"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("حسن"),
+				},
+			},
+		},
+		// KalPrefix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("كالحسن"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("حسن"),
+				},
+			},
+		},
+		// FalPrefix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("فالحسن"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("حسن"),
+				},
+			},
+		},
+		// LlPrefix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("للاخر"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("اخر"),
+				},
+			},
+		},
+		// WaPrefix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("وحسن"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("حسن"),
+				},
+			},
+		},
+		// AhSuffix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("زوجها"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("زوج"),
+				},
+			},
+		},
+		// AnSuffix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهدان"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهد"),
+				},
+			},
+		},
+		// AtSuffix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهدات"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهد"),
+				},
+			},
+		},
+		// WnSuffix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهدون"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهد"),
+				},
+			},
+		},
+		// YnSuffix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهدين"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهد"),
+				},
+			},
+		},
+		// YhSuffix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهديه"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهد"),
+				},
+			},
+		},
+		// YpSuffix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهدية"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهد"),
+				},
+			},
+		},
+		// HSuffix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهده"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهد"),
+				},
+			},
+		},
+		// PSuffix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهدة"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهد"),
+				},
+			},
+		},
+		// YSuffix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهدي"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهد"),
+				},
+			},
+		},
+		// ComboPrefSuf
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("وساهدون"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهد"),
+				},
+			},
+		},
+		// ComboSuf
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهدهات"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهد"),
+				},
+			},
+		},
+		// ShouldntStem
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("الو"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("الو"),
+				},
+			},
+		},
+		// NonArabic
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("English"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("English"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("سلام"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("سلام"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("السلام"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("سلام"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("سلامة"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("سلام"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("السلامة"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("سلام"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("الوصل"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("وصل"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("والصل"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("صل"),
+				},
+			},
+		},
+		// Empty
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+		},
+	}
+
+	arabicStemmerFilter := NewArabicStemmerFilter()
+	for _, test := range tests {
+		actual := arabicStemmerFilter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %#v, got %#v", test.output, actual)
+			t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
--- a/vendor/github.com/blevesearch/bleve/analysis/language/ar/stop_filter_ar.go
+++ b/vendor/github.com/blevesearch/bleve/analysis/language/ar/stop_filter_ar.go
@ -0,0 +1,28 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+
+package ar
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+}
--- a/vendor/github.com/blevesearch/bleve/analysis/language/ar/stop_words_ar.go
+++ b/vendor/github.com/blevesearch/bleve/analysis/language/ar/stop_words_ar.go
@ -0,0 +1,149 @@
+package ar
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+const StopName = "stop_ar"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis
+// ` was changed to ' to allow for literal string
+
+var ArabicStopWords = []byte(`# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# Cleaned on October 11, 2009 (not normalized, so use before normalization)
+# This means that when modifying this list, you might need to add some 
+# redundant entries, for example containing forms with both أ and ا
+من
+ومن
+منها
+منه
+في
+وفي
+فيها
+فيه
+و
+ف
+ثم
+او
+أو
+ب
+بها
+به
+ا
+أ
+اى
+اي
+أي
+أى
+لا
+ولا
+الا
+ألا
+إلا
+لكن
+ما
+وما
+كما
+فما
+عن
+مع
+اذا
+إذا
+ان
+أن
+إن
+انها
+أنها
+إنها
+انه
+أنه
+إنه
+بان
+بأن
+فان
+فأن
+وان
+وأن
+وإن
+التى
+التي
+الذى
+الذي
+الذين
+الى
+الي
+إلى
+إلي
+على
+عليها
+عليه
+اما
+أما
+إما
+ايضا
+أيضا
+كل
+وكل
+لم
+ولم
+لن
+ولن
+هى
+هي
+هو
+وهى
+وهي
+وهو
+فهى
+فهي
+فهو
+انت
+أنت
+لك
+لها
+له
+هذه
+هذا
+تلك
+ذلك
+هناك
+كانت
+كان
+يكون
+تكون
+وكانت
+وكان
+غير
+بعض
+قد
+نحو
+بين
+بينما
+منذ
+ضمن
+حيث
+الان
+الآن
+خلال
+بعد
+قبل
+حتى
+عند
+عندما
+لدى
+جميع
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(ArabicStopWords)
+	return rv, err
+}
+
+func init() {
+	registry.RegisterTokenMap(StopName, TokenMapConstructor)
+}