Upgrade server dependencies, manage them with govendor
This commit is contained in:
parent
ebee2746d6
commit
971278e7e5
@ -24,8 +24,8 @@ func Run(dir, domain, email, port string) (*state, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
client, err := acme.NewClient(URL, &user, KeySize)
|
||||
client.ExcludeChallenges([]string{"tls-sni-01"})
|
||||
client, err := acme.NewClient(URL, &user, acme.RSA2048)
|
||||
client.ExcludeChallenges([]acme.Challenge{acme.TLSSNI01})
|
||||
client.SetHTTPAddress(port)
|
||||
|
||||
if user.Registration == nil {
|
||||
@ -123,7 +123,7 @@ func (s *state) setOCSP(ocsp []byte) {
|
||||
}
|
||||
|
||||
func (s *state) obtain() error {
|
||||
cert, errors := s.client.ObtainCertificate([]string{s.domain}, true, nil)
|
||||
cert, errors := s.client.ObtainCertificate([]string{s.domain}, true, nil, false)
|
||||
if err := errors[s.domain]; err != nil {
|
||||
if _, ok := err.(acme.TOSError); ok {
|
||||
err := s.client.AgreeToTOS()
|
||||
@ -180,7 +180,7 @@ func (s *state) renew() bool {
|
||||
meta.PrivateKey = key
|
||||
|
||||
Renew:
|
||||
newMeta, err := s.client.RenewCertificate(meta, true)
|
||||
newMeta, err := s.client.RenewCertificate(meta, true, false)
|
||||
if err != nil {
|
||||
if _, ok := err.(acme.TOSError); ok {
|
||||
err := s.client.AgreeToTOS()
|
||||
|
@ -1,6 +1,7 @@
|
||||
package letsencrypt
|
||||
|
||||
import (
|
||||
"crypto"
|
||||
"crypto/rand"
|
||||
"crypto/rsa"
|
||||
"crypto/x509"
|
||||
@ -17,7 +18,7 @@ const defaultUser = "default"
|
||||
type User struct {
|
||||
Email string
|
||||
Registration *acme.RegistrationResource
|
||||
key *rsa.PrivateKey
|
||||
key crypto.PrivateKey
|
||||
}
|
||||
|
||||
func (u User) GetEmail() string {
|
||||
@ -28,7 +29,7 @@ func (u User) GetRegistration() *acme.RegistrationResource {
|
||||
return u.Registration
|
||||
}
|
||||
|
||||
func (u User) GetPrivateKey() *rsa.PrivateKey {
|
||||
func (u User) GetPrivateKey() crypto.PrivateKey {
|
||||
return u.key
|
||||
}
|
||||
|
||||
@ -86,7 +87,7 @@ func saveUser(user User) error {
|
||||
return ioutil.WriteFile(directory.UserRegistration(user.Email), jsonBytes, 0600)
|
||||
}
|
||||
|
||||
func loadRSAPrivateKey(file string) (*rsa.PrivateKey, error) {
|
||||
func loadRSAPrivateKey(file string) (crypto.PrivateKey, error) {
|
||||
keyBytes, err := ioutil.ReadFile(file)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -95,8 +96,10 @@ func loadRSAPrivateKey(file string) (*rsa.PrivateKey, error) {
|
||||
return x509.ParsePKCS1PrivateKey(keyBlock.Bytes)
|
||||
}
|
||||
|
||||
func saveRSAPrivateKey(key *rsa.PrivateKey, file string) error {
|
||||
pemKey := pem.Block{Type: "RSA PRIVATE KEY", Bytes: x509.MarshalPKCS1PrivateKey(key)}
|
||||
func saveRSAPrivateKey(key crypto.PrivateKey, file string) error {
|
||||
pemKey := pem.Block{
|
||||
Type: "RSA PRIVATE KEY", Bytes: x509.MarshalPKCS1PrivateKey(key.(*rsa.PrivateKey)),
|
||||
}
|
||||
keyOut, err := os.Create(file)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -5,7 +5,6 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/xenolf/lego/acme"
|
||||
)
|
||||
|
||||
func tempdir() string {
|
||||
@ -14,18 +13,10 @@ func tempdir() string {
|
||||
}
|
||||
|
||||
func testUser(t *testing.T, email string) {
|
||||
reg := &acme.RegistrationResource{
|
||||
URI: "test.com",
|
||||
Body: acme.Registration{
|
||||
Agreement: "agree?",
|
||||
},
|
||||
}
|
||||
|
||||
user, err := newUser(email)
|
||||
assert.Nil(t, err)
|
||||
key := user.GetPrivateKey()
|
||||
assert.NotNil(t, key)
|
||||
user.Registration = reg
|
||||
|
||||
err = saveUser(user)
|
||||
assert.Nil(t, err)
|
||||
@ -34,7 +25,6 @@ func testUser(t *testing.T, email string) {
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, email, user.GetEmail())
|
||||
assert.Equal(t, key, user.GetPrivateKey())
|
||||
assert.Equal(t, reg, user.GetRegistration())
|
||||
}
|
||||
|
||||
func TestUser(t *testing.T) {
|
||||
|
@ -55,7 +55,8 @@ func handleAuth(w http.ResponseWriter, r *http.Request) *Session {
|
||||
token, err := parseToken(cookie.Value)
|
||||
|
||||
if err == nil && token.Valid {
|
||||
userID := uint64(token.Claims["UserID"].(float64))
|
||||
claims := token.Claims.(jwt.MapClaims)
|
||||
userID := uint64(claims["UserID"].(float64))
|
||||
|
||||
log.Println(r.RemoteAddr, "[Auth] GET", r.URL.Path, "| Valid token | User ID:", userID)
|
||||
|
||||
@ -91,7 +92,8 @@ func newUser(w http.ResponseWriter, r *http.Request) *Session {
|
||||
go session.run()
|
||||
|
||||
token := jwt.New(jwt.SigningMethodHS256)
|
||||
token.Claims["UserID"] = user.ID
|
||||
claims := token.Claims.(jwt.MapClaims)
|
||||
claims["UserID"] = user.ID
|
||||
tokenString, err := token.SignedString(hmacKey)
|
||||
if err != nil {
|
||||
return nil
|
||||
|
@ -118,7 +118,8 @@ func (u *User) SearchMessages(server, channel, q string) ([]Message, error) {
|
||||
contentQuery.SetField("content")
|
||||
contentQuery.SetFuzziness(2)
|
||||
|
||||
query := bleve.NewBooleanQuery([]bleve.Query{serverQuery, channelQuery, contentQuery}, nil, nil)
|
||||
query := bleve.NewBooleanQuery()
|
||||
query.AddMust(serverQuery, channelQuery, contentQuery)
|
||||
|
||||
search := bleve.NewSearchRequest(query)
|
||||
searchResults, err := u.messageIndex.Search(search)
|
||||
|
3
vendor/github.com/BurntSushi/toml/COMPATIBLE
generated
vendored
3
vendor/github.com/BurntSushi/toml/COMPATIBLE
generated
vendored
@ -1,3 +0,0 @@
|
||||
Compatible with TOML version
|
||||
[v0.2.0](https://github.com/mojombo/toml/blob/master/versions/toml-v0.2.0.md)
|
||||
|
14
vendor/github.com/BurntSushi/toml/COPYING
generated
vendored
14
vendor/github.com/BurntSushi/toml/COPYING
generated
vendored
@ -1,14 +0,0 @@
|
||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||
Version 2, December 2004
|
||||
|
||||
Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim or modified
|
||||
copies of this license document, and changing it is allowed as long
|
||||
as the name is changed.
|
||||
|
||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. You just DO WHAT THE FUCK YOU WANT TO.
|
||||
|
19
vendor/github.com/BurntSushi/toml/Makefile
generated
vendored
19
vendor/github.com/BurntSushi/toml/Makefile
generated
vendored
@ -1,19 +0,0 @@
|
||||
install:
|
||||
go install ./...
|
||||
|
||||
test: install
|
||||
go test -v
|
||||
toml-test toml-test-decoder
|
||||
toml-test -encoder toml-test-encoder
|
||||
|
||||
fmt:
|
||||
gofmt -w *.go */*.go
|
||||
colcheck *.go */*.go
|
||||
|
||||
tags:
|
||||
find ./ -name '*.go' -print0 | xargs -0 gotags > TAGS
|
||||
|
||||
push:
|
||||
git push origin master
|
||||
git push github master
|
||||
|
220
vendor/github.com/BurntSushi/toml/README.md
generated
vendored
220
vendor/github.com/BurntSushi/toml/README.md
generated
vendored
@ -1,220 +0,0 @@
|
||||
## TOML parser and encoder for Go with reflection
|
||||
|
||||
TOML stands for Tom's Obvious, Minimal Language. This Go package provides a
|
||||
reflection interface similar to Go's standard library `json` and `xml`
|
||||
packages. This package also supports the `encoding.TextUnmarshaler` and
|
||||
`encoding.TextMarshaler` interfaces so that you can define custom data
|
||||
representations. (There is an example of this below.)
|
||||
|
||||
Spec: https://github.com/mojombo/toml
|
||||
|
||||
Compatible with TOML version
|
||||
[v0.2.0](https://github.com/toml-lang/toml/blob/master/versions/en/toml-v0.2.0.md)
|
||||
|
||||
Documentation: http://godoc.org/github.com/BurntSushi/toml
|
||||
|
||||
Installation:
|
||||
|
||||
```bash
|
||||
go get github.com/BurntSushi/toml
|
||||
```
|
||||
|
||||
Try the toml validator:
|
||||
|
||||
```bash
|
||||
go get github.com/BurntSushi/toml/cmd/tomlv
|
||||
tomlv some-toml-file.toml
|
||||
```
|
||||
|
||||
[![Build status](https://api.travis-ci.org/BurntSushi/toml.png)](https://travis-ci.org/BurntSushi/toml)
|
||||
|
||||
|
||||
### Testing
|
||||
|
||||
This package passes all tests in
|
||||
[toml-test](https://github.com/BurntSushi/toml-test) for both the decoder
|
||||
and the encoder.
|
||||
|
||||
### Examples
|
||||
|
||||
This package works similarly to how the Go standard library handles `XML`
|
||||
and `JSON`. Namely, data is loaded into Go values via reflection.
|
||||
|
||||
For the simplest example, consider some TOML file as just a list of keys
|
||||
and values:
|
||||
|
||||
```toml
|
||||
Age = 25
|
||||
Cats = [ "Cauchy", "Plato" ]
|
||||
Pi = 3.14
|
||||
Perfection = [ 6, 28, 496, 8128 ]
|
||||
DOB = 1987-07-05T05:45:00Z
|
||||
```
|
||||
|
||||
Which could be defined in Go as:
|
||||
|
||||
```go
|
||||
type Config struct {
|
||||
Age int
|
||||
Cats []string
|
||||
Pi float64
|
||||
Perfection []int
|
||||
DOB time.Time // requires `import time`
|
||||
}
|
||||
```
|
||||
|
||||
And then decoded with:
|
||||
|
||||
```go
|
||||
var conf Config
|
||||
if _, err := toml.Decode(tomlData, &conf); err != nil {
|
||||
// handle error
|
||||
}
|
||||
```
|
||||
|
||||
You can also use struct tags if your struct field name doesn't map to a TOML
|
||||
key value directly:
|
||||
|
||||
```toml
|
||||
some_key_NAME = "wat"
|
||||
```
|
||||
|
||||
```go
|
||||
type TOML struct {
|
||||
ObscureKey string `toml:"some_key_NAME"`
|
||||
}
|
||||
```
|
||||
|
||||
### Using the `encoding.TextUnmarshaler` interface
|
||||
|
||||
Here's an example that automatically parses duration strings into
|
||||
`time.Duration` values:
|
||||
|
||||
```toml
|
||||
[[song]]
|
||||
name = "Thunder Road"
|
||||
duration = "4m49s"
|
||||
|
||||
[[song]]
|
||||
name = "Stairway to Heaven"
|
||||
duration = "8m03s"
|
||||
```
|
||||
|
||||
Which can be decoded with:
|
||||
|
||||
```go
|
||||
type song struct {
|
||||
Name string
|
||||
Duration duration
|
||||
}
|
||||
type songs struct {
|
||||
Song []song
|
||||
}
|
||||
var favorites songs
|
||||
if _, err := toml.Decode(blob, &favorites); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
for _, s := range favorites.Song {
|
||||
fmt.Printf("%s (%s)\n", s.Name, s.Duration)
|
||||
}
|
||||
```
|
||||
|
||||
And you'll also need a `duration` type that satisfies the
|
||||
`encoding.TextUnmarshaler` interface:
|
||||
|
||||
```go
|
||||
type duration struct {
|
||||
time.Duration
|
||||
}
|
||||
|
||||
func (d *duration) UnmarshalText(text []byte) error {
|
||||
var err error
|
||||
d.Duration, err = time.ParseDuration(string(text))
|
||||
return err
|
||||
}
|
||||
```
|
||||
|
||||
### More complex usage
|
||||
|
||||
Here's an example of how to load the example from the official spec page:
|
||||
|
||||
```toml
|
||||
# This is a TOML document. Boom.
|
||||
|
||||
title = "TOML Example"
|
||||
|
||||
[owner]
|
||||
name = "Tom Preston-Werner"
|
||||
organization = "GitHub"
|
||||
bio = "GitHub Cofounder & CEO\nLikes tater tots and beer."
|
||||
dob = 1979-05-27T07:32:00Z # First class dates? Why not?
|
||||
|
||||
[database]
|
||||
server = "192.168.1.1"
|
||||
ports = [ 8001, 8001, 8002 ]
|
||||
connection_max = 5000
|
||||
enabled = true
|
||||
|
||||
[servers]
|
||||
|
||||
# You can indent as you please. Tabs or spaces. TOML don't care.
|
||||
[servers.alpha]
|
||||
ip = "10.0.0.1"
|
||||
dc = "eqdc10"
|
||||
|
||||
[servers.beta]
|
||||
ip = "10.0.0.2"
|
||||
dc = "eqdc10"
|
||||
|
||||
[clients]
|
||||
data = [ ["gamma", "delta"], [1, 2] ] # just an update to make sure parsers support it
|
||||
|
||||
# Line breaks are OK when inside arrays
|
||||
hosts = [
|
||||
"alpha",
|
||||
"omega"
|
||||
]
|
||||
```
|
||||
|
||||
And the corresponding Go types are:
|
||||
|
||||
```go
|
||||
type tomlConfig struct {
|
||||
Title string
|
||||
Owner ownerInfo
|
||||
DB database `toml:"database"`
|
||||
Servers map[string]server
|
||||
Clients clients
|
||||
}
|
||||
|
||||
type ownerInfo struct {
|
||||
Name string
|
||||
Org string `toml:"organization"`
|
||||
Bio string
|
||||
DOB time.Time
|
||||
}
|
||||
|
||||
type database struct {
|
||||
Server string
|
||||
Ports []int
|
||||
ConnMax int `toml:"connection_max"`
|
||||
Enabled bool
|
||||
}
|
||||
|
||||
type server struct {
|
||||
IP string
|
||||
DC string
|
||||
}
|
||||
|
||||
type clients struct {
|
||||
Data [][]interface{}
|
||||
Hosts []string
|
||||
}
|
||||
```
|
||||
|
||||
Note that a case insensitive match will be tried if an exact match can't be
|
||||
found.
|
||||
|
||||
A working example of the above can be found in `_examples/example.{go,toml}`.
|
||||
|
61
vendor/github.com/BurntSushi/toml/_examples/example.go
generated
vendored
61
vendor/github.com/BurntSushi/toml/_examples/example.go
generated
vendored
@ -1,61 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/BurntSushi/toml"
|
||||
)
|
||||
|
||||
type tomlConfig struct {
|
||||
Title string
|
||||
Owner ownerInfo
|
||||
DB database `toml:"database"`
|
||||
Servers map[string]server
|
||||
Clients clients
|
||||
}
|
||||
|
||||
type ownerInfo struct {
|
||||
Name string
|
||||
Org string `toml:"organization"`
|
||||
Bio string
|
||||
DOB time.Time
|
||||
}
|
||||
|
||||
type database struct {
|
||||
Server string
|
||||
Ports []int
|
||||
ConnMax int `toml:"connection_max"`
|
||||
Enabled bool
|
||||
}
|
||||
|
||||
type server struct {
|
||||
IP string
|
||||
DC string
|
||||
}
|
||||
|
||||
type clients struct {
|
||||
Data [][]interface{}
|
||||
Hosts []string
|
||||
}
|
||||
|
||||
func main() {
|
||||
var config tomlConfig
|
||||
if _, err := toml.DecodeFile("example.toml", &config); err != nil {
|
||||
fmt.Println(err)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Printf("Title: %s\n", config.Title)
|
||||
fmt.Printf("Owner: %s (%s, %s), Born: %s\n",
|
||||
config.Owner.Name, config.Owner.Org, config.Owner.Bio,
|
||||
config.Owner.DOB)
|
||||
fmt.Printf("Database: %s %v (Max conn. %d), Enabled? %v\n",
|
||||
config.DB.Server, config.DB.Ports, config.DB.ConnMax,
|
||||
config.DB.Enabled)
|
||||
for serverName, server := range config.Servers {
|
||||
fmt.Printf("Server: %s (%s, %s)\n", serverName, server.IP, server.DC)
|
||||
}
|
||||
fmt.Printf("Client data: %v\n", config.Clients.Data)
|
||||
fmt.Printf("Client hosts: %v\n", config.Clients.Hosts)
|
||||
}
|
22
vendor/github.com/BurntSushi/toml/_examples/hard.toml
generated
vendored
22
vendor/github.com/BurntSushi/toml/_examples/hard.toml
generated
vendored
@ -1,22 +0,0 @@
|
||||
# Test file for TOML
|
||||
# Only this one tries to emulate a TOML file written by a user of the kind of parser writers probably hate
|
||||
# This part you'll really hate
|
||||
|
||||
[the]
|
||||
test_string = "You'll hate me after this - #" # " Annoying, isn't it?
|
||||
|
||||
[the.hard]
|
||||
test_array = [ "] ", " # "] # ] There you go, parse this!
|
||||
test_array2 = [ "Test #11 ]proved that", "Experiment #9 was a success" ]
|
||||
# You didn't think it'd as easy as chucking out the last #, did you?
|
||||
another_test_string = " Same thing, but with a string #"
|
||||
harder_test_string = " And when \"'s are in the string, along with # \"" # "and comments are there too"
|
||||
# Things will get harder
|
||||
|
||||
[the.hard.bit#]
|
||||
what? = "You don't think some user won't do that?"
|
||||
multi_line_array = [
|
||||
"]",
|
||||
# ] Oh yes I did
|
||||
]
|
||||
|
4
vendor/github.com/BurntSushi/toml/_examples/implicit.toml
generated
vendored
4
vendor/github.com/BurntSushi/toml/_examples/implicit.toml
generated
vendored
@ -1,4 +0,0 @@
|
||||
# [x] you
|
||||
# [x.y] don't
|
||||
# [x.y.z] need these
|
||||
[x.y.z.w] # for this to work
|
6
vendor/github.com/BurntSushi/toml/_examples/invalid-apples.toml
generated
vendored
6
vendor/github.com/BurntSushi/toml/_examples/invalid-apples.toml
generated
vendored
@ -1,6 +0,0 @@
|
||||
# DO NOT WANT
|
||||
[fruit]
|
||||
type = "apple"
|
||||
|
||||
[fruit.type]
|
||||
apple = "yes"
|
5
vendor/github.com/BurntSushi/toml/_examples/readme1.toml
generated
vendored
5
vendor/github.com/BurntSushi/toml/_examples/readme1.toml
generated
vendored
@ -1,5 +0,0 @@
|
||||
Age = 25
|
||||
Cats = [ "Cauchy", "Plato" ]
|
||||
Pi = 3.14
|
||||
Perfection = [ 6, 28, 496, 8128 ]
|
||||
DOB = 1987-07-05T05:45:00Z
|
1
vendor/github.com/BurntSushi/toml/_examples/readme2.toml
generated
vendored
1
vendor/github.com/BurntSushi/toml/_examples/readme2.toml
generated
vendored
@ -1 +0,0 @@
|
||||
some_key_NAME = "wat"
|
14
vendor/github.com/BurntSushi/toml/cmd/toml-test-decoder/COPYING
generated
vendored
14
vendor/github.com/BurntSushi/toml/cmd/toml-test-decoder/COPYING
generated
vendored
@ -1,14 +0,0 @@
|
||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||
Version 2, December 2004
|
||||
|
||||
Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim or modified
|
||||
copies of this license document, and changing it is allowed as long
|
||||
as the name is changed.
|
||||
|
||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. You just DO WHAT THE FUCK YOU WANT TO.
|
||||
|
14
vendor/github.com/BurntSushi/toml/cmd/toml-test-decoder/README.md
generated
vendored
14
vendor/github.com/BurntSushi/toml/cmd/toml-test-decoder/README.md
generated
vendored
@ -1,14 +0,0 @@
|
||||
# Implements the TOML test suite interface
|
||||
|
||||
This is an implementation of the interface expected by
|
||||
[toml-test](https://github.com/BurntSushi/toml-test) for my
|
||||
[toml parser written in Go](https://github.com/BurntSushi/toml).
|
||||
In particular, it maps TOML data on `stdin` to a JSON format on `stdout`.
|
||||
|
||||
|
||||
Compatible with TOML version
|
||||
[v0.2.0](https://github.com/mojombo/toml/blob/master/versions/toml-v0.2.0.md)
|
||||
|
||||
Compatible with `toml-test` version
|
||||
[v0.2.0](https://github.com/BurntSushi/toml-test/tree/v0.2.0)
|
||||
|
90
vendor/github.com/BurntSushi/toml/cmd/toml-test-decoder/main.go
generated
vendored
90
vendor/github.com/BurntSushi/toml/cmd/toml-test-decoder/main.go
generated
vendored
@ -1,90 +0,0 @@
|
||||
// Command toml-test-decoder satisfies the toml-test interface for testing
|
||||
// TOML decoders. Namely, it accepts TOML on stdin and outputs JSON on stdout.
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path"
|
||||
"time"
|
||||
|
||||
"github.com/BurntSushi/toml"
|
||||
)
|
||||
|
||||
func init() {
|
||||
log.SetFlags(0)
|
||||
|
||||
flag.Usage = usage
|
||||
flag.Parse()
|
||||
}
|
||||
|
||||
func usage() {
|
||||
log.Printf("Usage: %s < toml-file\n", path.Base(os.Args[0]))
|
||||
flag.PrintDefaults()
|
||||
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
func main() {
|
||||
if flag.NArg() != 0 {
|
||||
flag.Usage()
|
||||
}
|
||||
|
||||
var tmp interface{}
|
||||
if _, err := toml.DecodeReader(os.Stdin, &tmp); err != nil {
|
||||
log.Fatalf("Error decoding TOML: %s", err)
|
||||
}
|
||||
|
||||
typedTmp := translate(tmp)
|
||||
if err := json.NewEncoder(os.Stdout).Encode(typedTmp); err != nil {
|
||||
log.Fatalf("Error encoding JSON: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func translate(tomlData interface{}) interface{} {
|
||||
switch orig := tomlData.(type) {
|
||||
case map[string]interface{}:
|
||||
typed := make(map[string]interface{}, len(orig))
|
||||
for k, v := range orig {
|
||||
typed[k] = translate(v)
|
||||
}
|
||||
return typed
|
||||
case []map[string]interface{}:
|
||||
typed := make([]map[string]interface{}, len(orig))
|
||||
for i, v := range orig {
|
||||
typed[i] = translate(v).(map[string]interface{})
|
||||
}
|
||||
return typed
|
||||
case []interface{}:
|
||||
typed := make([]interface{}, len(orig))
|
||||
for i, v := range orig {
|
||||
typed[i] = translate(v)
|
||||
}
|
||||
|
||||
// We don't really need to tag arrays, but let's be future proof.
|
||||
// (If TOML ever supports tuples, we'll need this.)
|
||||
return tag("array", typed)
|
||||
case time.Time:
|
||||
return tag("datetime", orig.Format("2006-01-02T15:04:05Z"))
|
||||
case bool:
|
||||
return tag("bool", fmt.Sprintf("%v", orig))
|
||||
case int64:
|
||||
return tag("integer", fmt.Sprintf("%d", orig))
|
||||
case float64:
|
||||
return tag("float", fmt.Sprintf("%v", orig))
|
||||
case string:
|
||||
return tag("string", orig)
|
||||
}
|
||||
|
||||
panic(fmt.Sprintf("Unknown type: %T", tomlData))
|
||||
}
|
||||
|
||||
func tag(typeName string, data interface{}) map[string]interface{} {
|
||||
return map[string]interface{}{
|
||||
"type": typeName,
|
||||
"value": data,
|
||||
}
|
||||
}
|
14
vendor/github.com/BurntSushi/toml/cmd/toml-test-encoder/COPYING
generated
vendored
14
vendor/github.com/BurntSushi/toml/cmd/toml-test-encoder/COPYING
generated
vendored
@ -1,14 +0,0 @@
|
||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||
Version 2, December 2004
|
||||
|
||||
Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim or modified
|
||||
copies of this license document, and changing it is allowed as long
|
||||
as the name is changed.
|
||||
|
||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. You just DO WHAT THE FUCK YOU WANT TO.
|
||||
|
14
vendor/github.com/BurntSushi/toml/cmd/toml-test-encoder/README.md
generated
vendored
14
vendor/github.com/BurntSushi/toml/cmd/toml-test-encoder/README.md
generated
vendored
@ -1,14 +0,0 @@
|
||||
# Implements the TOML test suite interface for TOML encoders
|
||||
|
||||
This is an implementation of the interface expected by
|
||||
[toml-test](https://github.com/BurntSushi/toml-test) for the
|
||||
[TOML encoder](https://github.com/BurntSushi/toml).
|
||||
In particular, it maps JSON data on `stdin` to a TOML format on `stdout`.
|
||||
|
||||
|
||||
Compatible with TOML version
|
||||
[v0.2.0](https://github.com/mojombo/toml/blob/master/versions/toml-v0.2.0.md)
|
||||
|
||||
Compatible with `toml-test` version
|
||||
[v0.2.0](https://github.com/BurntSushi/toml-test/tree/v0.2.0)
|
||||
|
131
vendor/github.com/BurntSushi/toml/cmd/toml-test-encoder/main.go
generated
vendored
131
vendor/github.com/BurntSushi/toml/cmd/toml-test-encoder/main.go
generated
vendored
@ -1,131 +0,0 @@
|
||||
// Command toml-test-encoder satisfies the toml-test interface for testing
|
||||
// TOML encoders. Namely, it accepts JSON on stdin and outputs TOML on stdout.
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"log"
|
||||
"os"
|
||||
"path"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/BurntSushi/toml"
|
||||
)
|
||||
|
||||
func init() {
|
||||
log.SetFlags(0)
|
||||
|
||||
flag.Usage = usage
|
||||
flag.Parse()
|
||||
}
|
||||
|
||||
func usage() {
|
||||
log.Printf("Usage: %s < json-file\n", path.Base(os.Args[0]))
|
||||
flag.PrintDefaults()
|
||||
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
func main() {
|
||||
if flag.NArg() != 0 {
|
||||
flag.Usage()
|
||||
}
|
||||
|
||||
var tmp interface{}
|
||||
if err := json.NewDecoder(os.Stdin).Decode(&tmp); err != nil {
|
||||
log.Fatalf("Error decoding JSON: %s", err)
|
||||
}
|
||||
|
||||
tomlData := translate(tmp)
|
||||
if err := toml.NewEncoder(os.Stdout).Encode(tomlData); err != nil {
|
||||
log.Fatalf("Error encoding TOML: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func translate(typedJson interface{}) interface{} {
|
||||
switch v := typedJson.(type) {
|
||||
case map[string]interface{}:
|
||||
if len(v) == 2 && in("type", v) && in("value", v) {
|
||||
return untag(v)
|
||||
}
|
||||
m := make(map[string]interface{}, len(v))
|
||||
for k, v2 := range v {
|
||||
m[k] = translate(v2)
|
||||
}
|
||||
return m
|
||||
case []interface{}:
|
||||
tabArray := make([]map[string]interface{}, len(v))
|
||||
for i := range v {
|
||||
if m, ok := translate(v[i]).(map[string]interface{}); ok {
|
||||
tabArray[i] = m
|
||||
} else {
|
||||
log.Fatalf("JSON arrays may only contain objects. This " +
|
||||
"corresponds to only tables being allowed in " +
|
||||
"TOML table arrays.")
|
||||
}
|
||||
}
|
||||
return tabArray
|
||||
}
|
||||
log.Fatalf("Unrecognized JSON format '%T'.", typedJson)
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
func untag(typed map[string]interface{}) interface{} {
|
||||
t := typed["type"].(string)
|
||||
v := typed["value"]
|
||||
switch t {
|
||||
case "string":
|
||||
return v.(string)
|
||||
case "integer":
|
||||
v := v.(string)
|
||||
n, err := strconv.Atoi(v)
|
||||
if err != nil {
|
||||
log.Fatalf("Could not parse '%s' as integer: %s", v, err)
|
||||
}
|
||||
return n
|
||||
case "float":
|
||||
v := v.(string)
|
||||
f, err := strconv.ParseFloat(v, 64)
|
||||
if err != nil {
|
||||
log.Fatalf("Could not parse '%s' as float64: %s", v, err)
|
||||
}
|
||||
return f
|
||||
case "datetime":
|
||||
v := v.(string)
|
||||
t, err := time.Parse("2006-01-02T15:04:05Z", v)
|
||||
if err != nil {
|
||||
log.Fatalf("Could not parse '%s' as a datetime: %s", v, err)
|
||||
}
|
||||
return t
|
||||
case "bool":
|
||||
v := v.(string)
|
||||
switch v {
|
||||
case "true":
|
||||
return true
|
||||
case "false":
|
||||
return false
|
||||
}
|
||||
log.Fatalf("Could not parse '%s' as a boolean.", v)
|
||||
case "array":
|
||||
v := v.([]interface{})
|
||||
array := make([]interface{}, len(v))
|
||||
for i := range v {
|
||||
if m, ok := v[i].(map[string]interface{}); ok {
|
||||
array[i] = untag(m)
|
||||
} else {
|
||||
log.Fatalf("Arrays may only contain other arrays or "+
|
||||
"primitive values, but found a '%T'.", m)
|
||||
}
|
||||
}
|
||||
return array
|
||||
}
|
||||
log.Fatalf("Unrecognized tag type '%s'.", t)
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
func in(key string, m map[string]interface{}) bool {
|
||||
_, ok := m[key]
|
||||
return ok
|
||||
}
|
14
vendor/github.com/BurntSushi/toml/cmd/tomlv/COPYING
generated
vendored
14
vendor/github.com/BurntSushi/toml/cmd/tomlv/COPYING
generated
vendored
@ -1,14 +0,0 @@
|
||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||
Version 2, December 2004
|
||||
|
||||
Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim or modified
|
||||
copies of this license document, and changing it is allowed as long
|
||||
as the name is changed.
|
||||
|
||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. You just DO WHAT THE FUCK YOU WANT TO.
|
||||
|
22
vendor/github.com/BurntSushi/toml/cmd/tomlv/README.md
generated
vendored
22
vendor/github.com/BurntSushi/toml/cmd/tomlv/README.md
generated
vendored
@ -1,22 +0,0 @@
|
||||
# TOML Validator
|
||||
|
||||
If Go is installed, it's simple to try it out:
|
||||
|
||||
```bash
|
||||
go get github.com/BurntSushi/toml/cmd/tomlv
|
||||
tomlv some-toml-file.toml
|
||||
```
|
||||
|
||||
You can see the types of every key in a TOML file with:
|
||||
|
||||
```bash
|
||||
tomlv -types some-toml-file.toml
|
||||
```
|
||||
|
||||
At the moment, only one error message is reported at a time. Error messages
|
||||
include line numbers. No output means that the files given are valid TOML, or
|
||||
there is a bug in `tomlv`.
|
||||
|
||||
Compatible with TOML version
|
||||
[v0.1.0](https://github.com/mojombo/toml/blob/master/versions/toml-v0.1.0.md)
|
||||
|
61
vendor/github.com/BurntSushi/toml/cmd/tomlv/main.go
generated
vendored
61
vendor/github.com/BurntSushi/toml/cmd/tomlv/main.go
generated
vendored
@ -1,61 +0,0 @@
|
||||
// Command tomlv validates TOML documents and prints each key's type.
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
"text/tabwriter"
|
||||
|
||||
"github.com/BurntSushi/toml"
|
||||
)
|
||||
|
||||
var (
|
||||
flagTypes = false
|
||||
)
|
||||
|
||||
func init() {
|
||||
log.SetFlags(0)
|
||||
|
||||
flag.BoolVar(&flagTypes, "types", flagTypes,
|
||||
"When set, the types of every defined key will be shown.")
|
||||
|
||||
flag.Usage = usage
|
||||
flag.Parse()
|
||||
}
|
||||
|
||||
func usage() {
|
||||
log.Printf("Usage: %s toml-file [ toml-file ... ]\n",
|
||||
path.Base(os.Args[0]))
|
||||
flag.PrintDefaults()
|
||||
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
func main() {
|
||||
if flag.NArg() < 1 {
|
||||
flag.Usage()
|
||||
}
|
||||
for _, f := range flag.Args() {
|
||||
var tmp interface{}
|
||||
md, err := toml.DecodeFile(f, &tmp)
|
||||
if err != nil {
|
||||
log.Fatalf("Error in '%s': %s", f, err)
|
||||
}
|
||||
if flagTypes {
|
||||
printTypes(md)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func printTypes(md toml.MetaData) {
|
||||
tabw := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
|
||||
for _, key := range md.Keys() {
|
||||
fmt.Fprintf(tabw, "%s%s\t%s\n",
|
||||
strings.Repeat(" ", len(key)-1), key, md.Type(key...))
|
||||
}
|
||||
tabw.Flush()
|
||||
}
|
493
vendor/github.com/BurntSushi/toml/decode.go
generated
vendored
493
vendor/github.com/BurntSushi/toml/decode.go
generated
vendored
@ -1,493 +0,0 @@
|
||||
package toml
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"reflect"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
var e = fmt.Errorf
|
||||
|
||||
// Unmarshaler is the interface implemented by objects that can unmarshal a
|
||||
// TOML description of themselves.
|
||||
type Unmarshaler interface {
|
||||
UnmarshalTOML(interface{}) error
|
||||
}
|
||||
|
||||
// Unmarshal decodes the contents of `p` in TOML format into a pointer `v`.
|
||||
func Unmarshal(p []byte, v interface{}) error {
|
||||
_, err := Decode(string(p), v)
|
||||
return err
|
||||
}
|
||||
|
||||
// Primitive is a TOML value that hasn't been decoded into a Go value.
|
||||
// When using the various `Decode*` functions, the type `Primitive` may
|
||||
// be given to any value, and its decoding will be delayed.
|
||||
//
|
||||
// A `Primitive` value can be decoded using the `PrimitiveDecode` function.
|
||||
//
|
||||
// The underlying representation of a `Primitive` value is subject to change.
|
||||
// Do not rely on it.
|
||||
//
|
||||
// N.B. Primitive values are still parsed, so using them will only avoid
|
||||
// the overhead of reflection. They can be useful when you don't know the
|
||||
// exact type of TOML data until run time.
|
||||
type Primitive struct {
|
||||
undecoded interface{}
|
||||
context Key
|
||||
}
|
||||
|
||||
// DEPRECATED!
|
||||
//
|
||||
// Use MetaData.PrimitiveDecode instead.
|
||||
func PrimitiveDecode(primValue Primitive, v interface{}) error {
|
||||
md := MetaData{decoded: make(map[string]bool)}
|
||||
return md.unify(primValue.undecoded, rvalue(v))
|
||||
}
|
||||
|
||||
// PrimitiveDecode is just like the other `Decode*` functions, except it
|
||||
// decodes a TOML value that has already been parsed. Valid primitive values
|
||||
// can *only* be obtained from values filled by the decoder functions,
|
||||
// including this method. (i.e., `v` may contain more `Primitive`
|
||||
// values.)
|
||||
//
|
||||
// Meta data for primitive values is included in the meta data returned by
|
||||
// the `Decode*` functions with one exception: keys returned by the Undecoded
|
||||
// method will only reflect keys that were decoded. Namely, any keys hidden
|
||||
// behind a Primitive will be considered undecoded. Executing this method will
|
||||
// update the undecoded keys in the meta data. (See the example.)
|
||||
func (md *MetaData) PrimitiveDecode(primValue Primitive, v interface{}) error {
|
||||
md.context = primValue.context
|
||||
defer func() { md.context = nil }()
|
||||
return md.unify(primValue.undecoded, rvalue(v))
|
||||
}
|
||||
|
||||
// Decode will decode the contents of `data` in TOML format into a pointer
|
||||
// `v`.
|
||||
//
|
||||
// TOML hashes correspond to Go structs or maps. (Dealer's choice. They can be
|
||||
// used interchangeably.)
|
||||
//
|
||||
// TOML arrays of tables correspond to either a slice of structs or a slice
|
||||
// of maps.
|
||||
//
|
||||
// TOML datetimes correspond to Go `time.Time` values.
|
||||
//
|
||||
// All other TOML types (float, string, int, bool and array) correspond
|
||||
// to the obvious Go types.
|
||||
//
|
||||
// An exception to the above rules is if a type implements the
|
||||
// encoding.TextUnmarshaler interface. In this case, any primitive TOML value
|
||||
// (floats, strings, integers, booleans and datetimes) will be converted to
|
||||
// a byte string and given to the value's UnmarshalText method. See the
|
||||
// Unmarshaler example for a demonstration with time duration strings.
|
||||
//
|
||||
// Key mapping
|
||||
//
|
||||
// TOML keys can map to either keys in a Go map or field names in a Go
|
||||
// struct. The special `toml` struct tag may be used to map TOML keys to
|
||||
// struct fields that don't match the key name exactly. (See the example.)
|
||||
// A case insensitive match to struct names will be tried if an exact match
|
||||
// can't be found.
|
||||
//
|
||||
// The mapping between TOML values and Go values is loose. That is, there
|
||||
// may exist TOML values that cannot be placed into your representation, and
|
||||
// there may be parts of your representation that do not correspond to
|
||||
// TOML values. This loose mapping can be made stricter by using the IsDefined
|
||||
// and/or Undecoded methods on the MetaData returned.
|
||||
//
|
||||
// This decoder will not handle cyclic types. If a cyclic type is passed,
|
||||
// `Decode` will not terminate.
|
||||
func Decode(data string, v interface{}) (MetaData, error) {
|
||||
p, err := parse(data)
|
||||
if err != nil {
|
||||
return MetaData{}, err
|
||||
}
|
||||
md := MetaData{
|
||||
p.mapping, p.types, p.ordered,
|
||||
make(map[string]bool, len(p.ordered)), nil,
|
||||
}
|
||||
return md, md.unify(p.mapping, rvalue(v))
|
||||
}
|
||||
|
||||
// DecodeFile is just like Decode, except it will automatically read the
|
||||
// contents of the file at `fpath` and decode it for you.
|
||||
func DecodeFile(fpath string, v interface{}) (MetaData, error) {
|
||||
bs, err := ioutil.ReadFile(fpath)
|
||||
if err != nil {
|
||||
return MetaData{}, err
|
||||
}
|
||||
return Decode(string(bs), v)
|
||||
}
|
||||
|
||||
// DecodeReader is just like Decode, except it will consume all bytes
|
||||
// from the reader and decode it for you.
|
||||
func DecodeReader(r io.Reader, v interface{}) (MetaData, error) {
|
||||
bs, err := ioutil.ReadAll(r)
|
||||
if err != nil {
|
||||
return MetaData{}, err
|
||||
}
|
||||
return Decode(string(bs), v)
|
||||
}
|
||||
|
||||
// unify performs a sort of type unification based on the structure of `rv`,
|
||||
// which is the client representation.
|
||||
//
|
||||
// Any type mismatch produces an error. Finding a type that we don't know
|
||||
// how to handle produces an unsupported type error.
|
||||
func (md *MetaData) unify(data interface{}, rv reflect.Value) error {
|
||||
|
||||
// Special case. Look for a `Primitive` value.
|
||||
if rv.Type() == reflect.TypeOf((*Primitive)(nil)).Elem() {
|
||||
// Save the undecoded data and the key context into the primitive
|
||||
// value.
|
||||
context := make(Key, len(md.context))
|
||||
copy(context, md.context)
|
||||
rv.Set(reflect.ValueOf(Primitive{
|
||||
undecoded: data,
|
||||
context: context,
|
||||
}))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Special case. Unmarshaler Interface support.
|
||||
if rv.CanAddr() {
|
||||
if v, ok := rv.Addr().Interface().(Unmarshaler); ok {
|
||||
return v.UnmarshalTOML(data)
|
||||
}
|
||||
}
|
||||
|
||||
// Special case. Handle time.Time values specifically.
|
||||
// TODO: Remove this code when we decide to drop support for Go 1.1.
|
||||
// This isn't necessary in Go 1.2 because time.Time satisfies the encoding
|
||||
// interfaces.
|
||||
if rv.Type().AssignableTo(rvalue(time.Time{}).Type()) {
|
||||
return md.unifyDatetime(data, rv)
|
||||
}
|
||||
|
||||
// Special case. Look for a value satisfying the TextUnmarshaler interface.
|
||||
if v, ok := rv.Interface().(TextUnmarshaler); ok {
|
||||
return md.unifyText(data, v)
|
||||
}
|
||||
// BUG(burntsushi)
|
||||
// The behavior here is incorrect whenever a Go type satisfies the
|
||||
// encoding.TextUnmarshaler interface but also corresponds to a TOML
|
||||
// hash or array. In particular, the unmarshaler should only be applied
|
||||
// to primitive TOML values. But at this point, it will be applied to
|
||||
// all kinds of values and produce an incorrect error whenever those values
|
||||
// are hashes or arrays (including arrays of tables).
|
||||
|
||||
k := rv.Kind()
|
||||
|
||||
// laziness
|
||||
if k >= reflect.Int && k <= reflect.Uint64 {
|
||||
return md.unifyInt(data, rv)
|
||||
}
|
||||
switch k {
|
||||
case reflect.Ptr:
|
||||
elem := reflect.New(rv.Type().Elem())
|
||||
err := md.unify(data, reflect.Indirect(elem))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
rv.Set(elem)
|
||||
return nil
|
||||
case reflect.Struct:
|
||||
return md.unifyStruct(data, rv)
|
||||
case reflect.Map:
|
||||
return md.unifyMap(data, rv)
|
||||
case reflect.Array:
|
||||
return md.unifyArray(data, rv)
|
||||
case reflect.Slice:
|
||||
return md.unifySlice(data, rv)
|
||||
case reflect.String:
|
||||
return md.unifyString(data, rv)
|
||||
case reflect.Bool:
|
||||
return md.unifyBool(data, rv)
|
||||
case reflect.Interface:
|
||||
// we only support empty interfaces.
|
||||
if rv.NumMethod() > 0 {
|
||||
return e("Unsupported type '%s'.", rv.Kind())
|
||||
}
|
||||
return md.unifyAnything(data, rv)
|
||||
case reflect.Float32:
|
||||
fallthrough
|
||||
case reflect.Float64:
|
||||
return md.unifyFloat64(data, rv)
|
||||
}
|
||||
return e("Unsupported type '%s'.", rv.Kind())
|
||||
}
|
||||
|
||||
func (md *MetaData) unifyStruct(mapping interface{}, rv reflect.Value) error {
|
||||
tmap, ok := mapping.(map[string]interface{})
|
||||
if !ok {
|
||||
return mismatch(rv, "map", mapping)
|
||||
}
|
||||
|
||||
for key, datum := range tmap {
|
||||
var f *field
|
||||
fields := cachedTypeFields(rv.Type())
|
||||
for i := range fields {
|
||||
ff := &fields[i]
|
||||
if ff.name == key {
|
||||
f = ff
|
||||
break
|
||||
}
|
||||
if f == nil && strings.EqualFold(ff.name, key) {
|
||||
f = ff
|
||||
}
|
||||
}
|
||||
if f != nil {
|
||||
subv := rv
|
||||
for _, i := range f.index {
|
||||
subv = indirect(subv.Field(i))
|
||||
}
|
||||
if isUnifiable(subv) {
|
||||
md.decoded[md.context.add(key).String()] = true
|
||||
md.context = append(md.context, key)
|
||||
if err := md.unify(datum, subv); err != nil {
|
||||
return e("Type mismatch for '%s.%s': %s",
|
||||
rv.Type().String(), f.name, err)
|
||||
}
|
||||
md.context = md.context[0 : len(md.context)-1]
|
||||
} else if f.name != "" {
|
||||
// Bad user! No soup for you!
|
||||
return e("Field '%s.%s' is unexported, and therefore cannot "+
|
||||
"be loaded with reflection.", rv.Type().String(), f.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (md *MetaData) unifyMap(mapping interface{}, rv reflect.Value) error {
|
||||
tmap, ok := mapping.(map[string]interface{})
|
||||
if !ok {
|
||||
return badtype("map", mapping)
|
||||
}
|
||||
if rv.IsNil() {
|
||||
rv.Set(reflect.MakeMap(rv.Type()))
|
||||
}
|
||||
for k, v := range tmap {
|
||||
md.decoded[md.context.add(k).String()] = true
|
||||
md.context = append(md.context, k)
|
||||
|
||||
rvkey := indirect(reflect.New(rv.Type().Key()))
|
||||
rvval := reflect.Indirect(reflect.New(rv.Type().Elem()))
|
||||
if err := md.unify(v, rvval); err != nil {
|
||||
return err
|
||||
}
|
||||
md.context = md.context[0 : len(md.context)-1]
|
||||
|
||||
rvkey.SetString(k)
|
||||
rv.SetMapIndex(rvkey, rvval)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (md *MetaData) unifyArray(data interface{}, rv reflect.Value) error {
|
||||
datav := reflect.ValueOf(data)
|
||||
if datav.Kind() != reflect.Slice {
|
||||
return badtype("slice", data)
|
||||
}
|
||||
sliceLen := datav.Len()
|
||||
if sliceLen != rv.Len() {
|
||||
return e("expected array length %d; got TOML array of length %d",
|
||||
rv.Len(), sliceLen)
|
||||
}
|
||||
return md.unifySliceArray(datav, rv)
|
||||
}
|
||||
|
||||
func (md *MetaData) unifySlice(data interface{}, rv reflect.Value) error {
|
||||
datav := reflect.ValueOf(data)
|
||||
if datav.Kind() != reflect.Slice {
|
||||
return badtype("slice", data)
|
||||
}
|
||||
sliceLen := datav.Len()
|
||||
if rv.IsNil() || rv.Len() < datav.Len() {
|
||||
rv.Set(reflect.MakeSlice(rv.Type(), sliceLen, sliceLen))
|
||||
}
|
||||
rv.SetLen(datav.Len())
|
||||
return md.unifySliceArray(datav, rv)
|
||||
}
|
||||
|
||||
func (md *MetaData) unifySliceArray(data, rv reflect.Value) error {
|
||||
sliceLen := data.Len()
|
||||
for i := 0; i < sliceLen; i++ {
|
||||
v := data.Index(i).Interface()
|
||||
sliceval := indirect(rv.Index(i))
|
||||
if err := md.unify(v, sliceval); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (md *MetaData) unifyDatetime(data interface{}, rv reflect.Value) error {
|
||||
if _, ok := data.(time.Time); ok {
|
||||
rv.Set(reflect.ValueOf(data))
|
||||
return nil
|
||||
}
|
||||
return badtype("time.Time", data)
|
||||
}
|
||||
|
||||
func (md *MetaData) unifyString(data interface{}, rv reflect.Value) error {
|
||||
if s, ok := data.(string); ok {
|
||||
rv.SetString(s)
|
||||
return nil
|
||||
}
|
||||
return badtype("string", data)
|
||||
}
|
||||
|
||||
func (md *MetaData) unifyFloat64(data interface{}, rv reflect.Value) error {
|
||||
if num, ok := data.(float64); ok {
|
||||
switch rv.Kind() {
|
||||
case reflect.Float32:
|
||||
fallthrough
|
||||
case reflect.Float64:
|
||||
rv.SetFloat(num)
|
||||
default:
|
||||
panic("bug")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return badtype("float", data)
|
||||
}
|
||||
|
||||
func (md *MetaData) unifyInt(data interface{}, rv reflect.Value) error {
|
||||
if num, ok := data.(int64); ok {
|
||||
if rv.Kind() >= reflect.Int && rv.Kind() <= reflect.Int64 {
|
||||
switch rv.Kind() {
|
||||
case reflect.Int, reflect.Int64:
|
||||
// No bounds checking necessary.
|
||||
case reflect.Int8:
|
||||
if num < math.MinInt8 || num > math.MaxInt8 {
|
||||
return e("Value '%d' is out of range for int8.", num)
|
||||
}
|
||||
case reflect.Int16:
|
||||
if num < math.MinInt16 || num > math.MaxInt16 {
|
||||
return e("Value '%d' is out of range for int16.", num)
|
||||
}
|
||||
case reflect.Int32:
|
||||
if num < math.MinInt32 || num > math.MaxInt32 {
|
||||
return e("Value '%d' is out of range for int32.", num)
|
||||
}
|
||||
}
|
||||
rv.SetInt(num)
|
||||
} else if rv.Kind() >= reflect.Uint && rv.Kind() <= reflect.Uint64 {
|
||||
unum := uint64(num)
|
||||
switch rv.Kind() {
|
||||
case reflect.Uint, reflect.Uint64:
|
||||
// No bounds checking necessary.
|
||||
case reflect.Uint8:
|
||||
if num < 0 || unum > math.MaxUint8 {
|
||||
return e("Value '%d' is out of range for uint8.", num)
|
||||
}
|
||||
case reflect.Uint16:
|
||||
if num < 0 || unum > math.MaxUint16 {
|
||||
return e("Value '%d' is out of range for uint16.", num)
|
||||
}
|
||||
case reflect.Uint32:
|
||||
if num < 0 || unum > math.MaxUint32 {
|
||||
return e("Value '%d' is out of range for uint32.", num)
|
||||
}
|
||||
}
|
||||
rv.SetUint(unum)
|
||||
} else {
|
||||
panic("unreachable")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return badtype("integer", data)
|
||||
}
|
||||
|
||||
func (md *MetaData) unifyBool(data interface{}, rv reflect.Value) error {
|
||||
if b, ok := data.(bool); ok {
|
||||
rv.SetBool(b)
|
||||
return nil
|
||||
}
|
||||
return badtype("boolean", data)
|
||||
}
|
||||
|
||||
func (md *MetaData) unifyAnything(data interface{}, rv reflect.Value) error {
|
||||
rv.Set(reflect.ValueOf(data))
|
||||
return nil
|
||||
}
|
||||
|
||||
func (md *MetaData) unifyText(data interface{}, v TextUnmarshaler) error {
|
||||
var s string
|
||||
switch sdata := data.(type) {
|
||||
case TextMarshaler:
|
||||
text, err := sdata.MarshalText()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
s = string(text)
|
||||
case fmt.Stringer:
|
||||
s = sdata.String()
|
||||
case string:
|
||||
s = sdata
|
||||
case bool:
|
||||
s = fmt.Sprintf("%v", sdata)
|
||||
case int64:
|
||||
s = fmt.Sprintf("%d", sdata)
|
||||
case float64:
|
||||
s = fmt.Sprintf("%f", sdata)
|
||||
default:
|
||||
return badtype("primitive (string-like)", data)
|
||||
}
|
||||
if err := v.UnmarshalText([]byte(s)); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// rvalue returns a reflect.Value of `v`. All pointers are resolved.
|
||||
func rvalue(v interface{}) reflect.Value {
|
||||
return indirect(reflect.ValueOf(v))
|
||||
}
|
||||
|
||||
// indirect returns the value pointed to by a pointer.
|
||||
// Pointers are followed until the value is not a pointer.
|
||||
// New values are allocated for each nil pointer.
|
||||
//
|
||||
// An exception to this rule is if the value satisfies an interface of
|
||||
// interest to us (like encoding.TextUnmarshaler).
|
||||
func indirect(v reflect.Value) reflect.Value {
|
||||
if v.Kind() != reflect.Ptr {
|
||||
if v.CanAddr() {
|
||||
pv := v.Addr()
|
||||
if _, ok := pv.Interface().(TextUnmarshaler); ok {
|
||||
return pv
|
||||
}
|
||||
}
|
||||
return v
|
||||
}
|
||||
if v.IsNil() {
|
||||
v.Set(reflect.New(v.Type().Elem()))
|
||||
}
|
||||
return indirect(reflect.Indirect(v))
|
||||
}
|
||||
|
||||
func isUnifiable(rv reflect.Value) bool {
|
||||
if rv.CanSet() {
|
||||
return true
|
||||
}
|
||||
if _, ok := rv.Interface().(TextUnmarshaler); ok {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func badtype(expected string, data interface{}) error {
|
||||
return e("Expected %s but found '%T'.", expected, data)
|
||||
}
|
||||
|
||||
func mismatch(user reflect.Value, expected string, data interface{}) error {
|
||||
return e("Type mismatch for %s. Expected %s but found '%T'.",
|
||||
user.Type().String(), expected, data)
|
||||
}
|
122
vendor/github.com/BurntSushi/toml/decode_meta.go
generated
vendored
122
vendor/github.com/BurntSushi/toml/decode_meta.go
generated
vendored
@ -1,122 +0,0 @@
|
||||
package toml
|
||||
|
||||
import "strings"
|
||||
|
||||
// MetaData allows access to meta information about TOML data that may not
|
||||
// be inferrable via reflection. In particular, whether a key has been defined
|
||||
// and the TOML type of a key.
|
||||
type MetaData struct {
|
||||
mapping map[string]interface{}
|
||||
types map[string]tomlType
|
||||
keys []Key
|
||||
decoded map[string]bool
|
||||
context Key // Used only during decoding.
|
||||
}
|
||||
|
||||
// IsDefined returns true if the key given exists in the TOML data. The key
|
||||
// should be specified hierarchially. e.g.,
|
||||
//
|
||||
// // access the TOML key 'a.b.c'
|
||||
// IsDefined("a", "b", "c")
|
||||
//
|
||||
// IsDefined will return false if an empty key given. Keys are case sensitive.
|
||||
func (md *MetaData) IsDefined(key ...string) bool {
|
||||
if len(key) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
var hash map[string]interface{}
|
||||
var ok bool
|
||||
var hashOrVal interface{} = md.mapping
|
||||
for _, k := range key {
|
||||
if hash, ok = hashOrVal.(map[string]interface{}); !ok {
|
||||
return false
|
||||
}
|
||||
if hashOrVal, ok = hash[k]; !ok {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Type returns a string representation of the type of the key specified.
|
||||
//
|
||||
// Type will return the empty string if given an empty key or a key that
|
||||
// does not exist. Keys are case sensitive.
|
||||
func (md *MetaData) Type(key ...string) string {
|
||||
fullkey := strings.Join(key, ".")
|
||||
if typ, ok := md.types[fullkey]; ok {
|
||||
return typ.typeString()
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// Key is the type of any TOML key, including key groups. Use (MetaData).Keys
|
||||
// to get values of this type.
|
||||
type Key []string
|
||||
|
||||
func (k Key) String() string {
|
||||
return strings.Join(k, ".")
|
||||
}
|
||||
|
||||
func (k Key) maybeQuotedAll() string {
|
||||
var ss []string
|
||||
for i := range k {
|
||||
ss = append(ss, k.maybeQuoted(i))
|
||||
}
|
||||
return strings.Join(ss, ".")
|
||||
}
|
||||
|
||||
func (k Key) maybeQuoted(i int) string {
|
||||
quote := false
|
||||
for _, c := range k[i] {
|
||||
if !isBareKeyChar(c) {
|
||||
quote = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if quote {
|
||||
return "\"" + strings.Replace(k[i], "\"", "\\\"", -1) + "\""
|
||||
} else {
|
||||
return k[i]
|
||||
}
|
||||
}
|
||||
|
||||
func (k Key) add(piece string) Key {
|
||||
newKey := make(Key, len(k)+1)
|
||||
copy(newKey, k)
|
||||
newKey[len(k)] = piece
|
||||
return newKey
|
||||
}
|
||||
|
||||
// Keys returns a slice of every key in the TOML data, including key groups.
|
||||
// Each key is itself a slice, where the first element is the top of the
|
||||
// hierarchy and the last is the most specific.
|
||||
//
|
||||
// The list will have the same order as the keys appeared in the TOML data.
|
||||
//
|
||||
// All keys returned are non-empty.
|
||||
func (md *MetaData) Keys() []Key {
|
||||
return md.keys
|
||||
}
|
||||
|
||||
// Undecoded returns all keys that have not been decoded in the order in which
|
||||
// they appear in the original TOML document.
|
||||
//
|
||||
// This includes keys that haven't been decoded because of a Primitive value.
|
||||
// Once the Primitive value is decoded, the keys will be considered decoded.
|
||||
//
|
||||
// Also note that decoding into an empty interface will result in no decoding,
|
||||
// and so no keys will be considered decoded.
|
||||
//
|
||||
// In this sense, the Undecoded keys correspond to keys in the TOML document
|
||||
// that do not have a concrete type in your representation.
|
||||
func (md *MetaData) Undecoded() []Key {
|
||||
undecoded := make([]Key, 0, len(md.keys))
|
||||
for _, key := range md.keys {
|
||||
if !md.decoded[key.String()] {
|
||||
undecoded = append(undecoded, key)
|
||||
}
|
||||
}
|
||||
return undecoded
|
||||
}
|
1018
vendor/github.com/BurntSushi/toml/decode_test.go
generated
vendored
1018
vendor/github.com/BurntSushi/toml/decode_test.go
generated
vendored
File diff suppressed because it is too large
Load Diff
27
vendor/github.com/BurntSushi/toml/doc.go
generated
vendored
27
vendor/github.com/BurntSushi/toml/doc.go
generated
vendored
@ -1,27 +0,0 @@
|
||||
/*
|
||||
Package toml provides facilities for decoding and encoding TOML configuration
|
||||
files via reflection. There is also support for delaying decoding with
|
||||
the Primitive type, and querying the set of keys in a TOML document with the
|
||||
MetaData type.
|
||||
|
||||
The specification implemented: https://github.com/mojombo/toml
|
||||
|
||||
The sub-command github.com/BurntSushi/toml/cmd/tomlv can be used to verify
|
||||
whether a file is a valid TOML document. It can also be used to print the
|
||||
type of each key in a TOML document.
|
||||
|
||||
Testing
|
||||
|
||||
There are two important types of tests used for this package. The first is
|
||||
contained inside '*_test.go' files and uses the standard Go unit testing
|
||||
framework. These tests are primarily devoted to holistically testing the
|
||||
decoder and encoder.
|
||||
|
||||
The second type of testing is used to verify the implementation's adherence
|
||||
to the TOML specification. These tests have been factored into their own
|
||||
project: https://github.com/BurntSushi/toml-test
|
||||
|
||||
The reason the tests are in a separate project is so that they can be used by
|
||||
any implementation of TOML. Namely, it is language agnostic.
|
||||
*/
|
||||
package toml
|
562
vendor/github.com/BurntSushi/toml/encode.go
generated
vendored
562
vendor/github.com/BurntSushi/toml/encode.go
generated
vendored
@ -1,562 +0,0 @@
|
||||
package toml
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type tomlEncodeError struct{ error }
|
||||
|
||||
var (
|
||||
errArrayMixedElementTypes = errors.New(
|
||||
"can't encode array with mixed element types")
|
||||
errArrayNilElement = errors.New(
|
||||
"can't encode array with nil element")
|
||||
errNonString = errors.New(
|
||||
"can't encode a map with non-string key type")
|
||||
errAnonNonStruct = errors.New(
|
||||
"can't encode an anonymous field that is not a struct")
|
||||
errArrayNoTable = errors.New(
|
||||
"TOML array element can't contain a table")
|
||||
errNoKey = errors.New(
|
||||
"top-level values must be a Go map or struct")
|
||||
errAnything = errors.New("") // used in testing
|
||||
)
|
||||
|
||||
var quotedReplacer = strings.NewReplacer(
|
||||
"\t", "\\t",
|
||||
"\n", "\\n",
|
||||
"\r", "\\r",
|
||||
"\"", "\\\"",
|
||||
"\\", "\\\\",
|
||||
)
|
||||
|
||||
// Encoder controls the encoding of Go values to a TOML document to some
|
||||
// io.Writer.
|
||||
//
|
||||
// The indentation level can be controlled with the Indent field.
|
||||
type Encoder struct {
|
||||
// A single indentation level. By default it is two spaces.
|
||||
Indent string
|
||||
|
||||
// hasWritten is whether we have written any output to w yet.
|
||||
hasWritten bool
|
||||
w *bufio.Writer
|
||||
}
|
||||
|
||||
// NewEncoder returns a TOML encoder that encodes Go values to the io.Writer
|
||||
// given. By default, a single indentation level is 2 spaces.
|
||||
func NewEncoder(w io.Writer) *Encoder {
|
||||
return &Encoder{
|
||||
w: bufio.NewWriter(w),
|
||||
Indent: " ",
|
||||
}
|
||||
}
|
||||
|
||||
// Encode writes a TOML representation of the Go value to the underlying
|
||||
// io.Writer. If the value given cannot be encoded to a valid TOML document,
|
||||
// then an error is returned.
|
||||
//
|
||||
// The mapping between Go values and TOML values should be precisely the same
|
||||
// as for the Decode* functions. Similarly, the TextMarshaler interface is
|
||||
// supported by encoding the resulting bytes as strings. (If you want to write
|
||||
// arbitrary binary data then you will need to use something like base64 since
|
||||
// TOML does not have any binary types.)
|
||||
//
|
||||
// When encoding TOML hashes (i.e., Go maps or structs), keys without any
|
||||
// sub-hashes are encoded first.
|
||||
//
|
||||
// If a Go map is encoded, then its keys are sorted alphabetically for
|
||||
// deterministic output. More control over this behavior may be provided if
|
||||
// there is demand for it.
|
||||
//
|
||||
// Encoding Go values without a corresponding TOML representation---like map
|
||||
// types with non-string keys---will cause an error to be returned. Similarly
|
||||
// for mixed arrays/slices, arrays/slices with nil elements, embedded
|
||||
// non-struct types and nested slices containing maps or structs.
|
||||
// (e.g., [][]map[string]string is not allowed but []map[string]string is OK
|
||||
// and so is []map[string][]string.)
|
||||
func (enc *Encoder) Encode(v interface{}) error {
|
||||
rv := eindirect(reflect.ValueOf(v))
|
||||
if err := enc.safeEncode(Key([]string{}), rv); err != nil {
|
||||
return err
|
||||
}
|
||||
return enc.w.Flush()
|
||||
}
|
||||
|
||||
func (enc *Encoder) safeEncode(key Key, rv reflect.Value) (err error) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
if terr, ok := r.(tomlEncodeError); ok {
|
||||
err = terr.error
|
||||
return
|
||||
}
|
||||
panic(r)
|
||||
}
|
||||
}()
|
||||
enc.encode(key, rv)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (enc *Encoder) encode(key Key, rv reflect.Value) {
|
||||
// Special case. Time needs to be in ISO8601 format.
|
||||
// Special case. If we can marshal the type to text, then we used that.
|
||||
// Basically, this prevents the encoder for handling these types as
|
||||
// generic structs (or whatever the underlying type of a TextMarshaler is).
|
||||
switch rv.Interface().(type) {
|
||||
case time.Time, TextMarshaler:
|
||||
enc.keyEqElement(key, rv)
|
||||
return
|
||||
}
|
||||
|
||||
k := rv.Kind()
|
||||
switch k {
|
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32,
|
||||
reflect.Int64,
|
||||
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32,
|
||||
reflect.Uint64,
|
||||
reflect.Float32, reflect.Float64, reflect.String, reflect.Bool:
|
||||
enc.keyEqElement(key, rv)
|
||||
case reflect.Array, reflect.Slice:
|
||||
if typeEqual(tomlArrayHash, tomlTypeOfGo(rv)) {
|
||||
enc.eArrayOfTables(key, rv)
|
||||
} else {
|
||||
enc.keyEqElement(key, rv)
|
||||
}
|
||||
case reflect.Interface:
|
||||
if rv.IsNil() {
|
||||
return
|
||||
}
|
||||
enc.encode(key, rv.Elem())
|
||||
case reflect.Map:
|
||||
if rv.IsNil() {
|
||||
return
|
||||
}
|
||||
enc.eTable(key, rv)
|
||||
case reflect.Ptr:
|
||||
if rv.IsNil() {
|
||||
return
|
||||
}
|
||||
enc.encode(key, rv.Elem())
|
||||
case reflect.Struct:
|
||||
enc.eTable(key, rv)
|
||||
default:
|
||||
panic(e("Unsupported type for key '%s': %s", key, k))
|
||||
}
|
||||
}
|
||||
|
||||
// eElement encodes any value that can be an array element (primitives and
|
||||
// arrays).
|
||||
func (enc *Encoder) eElement(rv reflect.Value) {
|
||||
switch v := rv.Interface().(type) {
|
||||
case time.Time:
|
||||
// Special case time.Time as a primitive. Has to come before
|
||||
// TextMarshaler below because time.Time implements
|
||||
// encoding.TextMarshaler, but we need to always use UTC.
|
||||
enc.wf(v.In(time.FixedZone("UTC", 0)).Format("2006-01-02T15:04:05Z"))
|
||||
return
|
||||
case TextMarshaler:
|
||||
// Special case. Use text marshaler if it's available for this value.
|
||||
if s, err := v.MarshalText(); err != nil {
|
||||
encPanic(err)
|
||||
} else {
|
||||
enc.writeQuoted(string(s))
|
||||
}
|
||||
return
|
||||
}
|
||||
switch rv.Kind() {
|
||||
case reflect.Bool:
|
||||
enc.wf(strconv.FormatBool(rv.Bool()))
|
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32,
|
||||
reflect.Int64:
|
||||
enc.wf(strconv.FormatInt(rv.Int(), 10))
|
||||
case reflect.Uint, reflect.Uint8, reflect.Uint16,
|
||||
reflect.Uint32, reflect.Uint64:
|
||||
enc.wf(strconv.FormatUint(rv.Uint(), 10))
|
||||
case reflect.Float32:
|
||||
enc.wf(floatAddDecimal(strconv.FormatFloat(rv.Float(), 'f', -1, 32)))
|
||||
case reflect.Float64:
|
||||
enc.wf(floatAddDecimal(strconv.FormatFloat(rv.Float(), 'f', -1, 64)))
|
||||
case reflect.Array, reflect.Slice:
|
||||
enc.eArrayOrSliceElement(rv)
|
||||
case reflect.Interface:
|
||||
enc.eElement(rv.Elem())
|
||||
case reflect.String:
|
||||
enc.writeQuoted(rv.String())
|
||||
default:
|
||||
panic(e("Unexpected primitive type: %s", rv.Kind()))
|
||||
}
|
||||
}
|
||||
|
||||
// By the TOML spec, all floats must have a decimal with at least one
|
||||
// number on either side.
|
||||
func floatAddDecimal(fstr string) string {
|
||||
if !strings.Contains(fstr, ".") {
|
||||
return fstr + ".0"
|
||||
}
|
||||
return fstr
|
||||
}
|
||||
|
||||
func (enc *Encoder) writeQuoted(s string) {
|
||||
enc.wf("\"%s\"", quotedReplacer.Replace(s))
|
||||
}
|
||||
|
||||
func (enc *Encoder) eArrayOrSliceElement(rv reflect.Value) {
|
||||
length := rv.Len()
|
||||
enc.wf("[")
|
||||
for i := 0; i < length; i++ {
|
||||
elem := rv.Index(i)
|
||||
enc.eElement(elem)
|
||||
if i != length-1 {
|
||||
enc.wf(", ")
|
||||
}
|
||||
}
|
||||
enc.wf("]")
|
||||
}
|
||||
|
||||
func (enc *Encoder) eArrayOfTables(key Key, rv reflect.Value) {
|
||||
if len(key) == 0 {
|
||||
encPanic(errNoKey)
|
||||
}
|
||||
for i := 0; i < rv.Len(); i++ {
|
||||
trv := rv.Index(i)
|
||||
if isNil(trv) {
|
||||
continue
|
||||
}
|
||||
panicIfInvalidKey(key)
|
||||
enc.newline()
|
||||
enc.wf("%s[[%s]]", enc.indentStr(key), key.maybeQuotedAll())
|
||||
enc.newline()
|
||||
enc.eMapOrStruct(key, trv)
|
||||
}
|
||||
}
|
||||
|
||||
func (enc *Encoder) eTable(key Key, rv reflect.Value) {
|
||||
panicIfInvalidKey(key)
|
||||
if len(key) == 1 {
|
||||
// Output an extra new line between top-level tables.
|
||||
// (The newline isn't written if nothing else has been written though.)
|
||||
enc.newline()
|
||||
}
|
||||
if len(key) > 0 {
|
||||
enc.wf("%s[%s]", enc.indentStr(key), key.maybeQuotedAll())
|
||||
enc.newline()
|
||||
}
|
||||
enc.eMapOrStruct(key, rv)
|
||||
}
|
||||
|
||||
func (enc *Encoder) eMapOrStruct(key Key, rv reflect.Value) {
|
||||
switch rv := eindirect(rv); rv.Kind() {
|
||||
case reflect.Map:
|
||||
enc.eMap(key, rv)
|
||||
case reflect.Struct:
|
||||
enc.eStruct(key, rv)
|
||||
default:
|
||||
panic("eTable: unhandled reflect.Value Kind: " + rv.Kind().String())
|
||||
}
|
||||
}
|
||||
|
||||
func (enc *Encoder) eMap(key Key, rv reflect.Value) {
|
||||
rt := rv.Type()
|
||||
if rt.Key().Kind() != reflect.String {
|
||||
encPanic(errNonString)
|
||||
}
|
||||
|
||||
// Sort keys so that we have deterministic output. And write keys directly
|
||||
// underneath this key first, before writing sub-structs or sub-maps.
|
||||
var mapKeysDirect, mapKeysSub []string
|
||||
for _, mapKey := range rv.MapKeys() {
|
||||
k := mapKey.String()
|
||||
if typeIsHash(tomlTypeOfGo(rv.MapIndex(mapKey))) {
|
||||
mapKeysSub = append(mapKeysSub, k)
|
||||
} else {
|
||||
mapKeysDirect = append(mapKeysDirect, k)
|
||||
}
|
||||
}
|
||||
|
||||
var writeMapKeys = func(mapKeys []string) {
|
||||
sort.Strings(mapKeys)
|
||||
for _, mapKey := range mapKeys {
|
||||
mrv := rv.MapIndex(reflect.ValueOf(mapKey))
|
||||
if isNil(mrv) {
|
||||
// Don't write anything for nil fields.
|
||||
continue
|
||||
}
|
||||
enc.encode(key.add(mapKey), mrv)
|
||||
}
|
||||
}
|
||||
writeMapKeys(mapKeysDirect)
|
||||
writeMapKeys(mapKeysSub)
|
||||
}
|
||||
|
||||
func (enc *Encoder) eStruct(key Key, rv reflect.Value) {
|
||||
// Write keys for fields directly under this key first, because if we write
|
||||
// a field that creates a new table, then all keys under it will be in that
|
||||
// table (not the one we're writing here).
|
||||
rt := rv.Type()
|
||||
var fieldsDirect, fieldsSub [][]int
|
||||
var addFields func(rt reflect.Type, rv reflect.Value, start []int)
|
||||
addFields = func(rt reflect.Type, rv reflect.Value, start []int) {
|
||||
for i := 0; i < rt.NumField(); i++ {
|
||||
f := rt.Field(i)
|
||||
// skip unexported fields
|
||||
if f.PkgPath != "" && !f.Anonymous {
|
||||
continue
|
||||
}
|
||||
frv := rv.Field(i)
|
||||
if f.Anonymous {
|
||||
t := f.Type
|
||||
switch t.Kind() {
|
||||
case reflect.Struct:
|
||||
addFields(t, frv, f.Index)
|
||||
continue
|
||||
case reflect.Ptr:
|
||||
if t.Elem().Kind() == reflect.Struct {
|
||||
if !frv.IsNil() {
|
||||
addFields(t.Elem(), frv.Elem(), f.Index)
|
||||
}
|
||||
continue
|
||||
}
|
||||
// Fall through to the normal field encoding logic below
|
||||
// for non-struct anonymous fields.
|
||||
}
|
||||
}
|
||||
|
||||
if typeIsHash(tomlTypeOfGo(frv)) {
|
||||
fieldsSub = append(fieldsSub, append(start, f.Index...))
|
||||
} else {
|
||||
fieldsDirect = append(fieldsDirect, append(start, f.Index...))
|
||||
}
|
||||
}
|
||||
}
|
||||
addFields(rt, rv, nil)
|
||||
|
||||
var writeFields = func(fields [][]int) {
|
||||
for _, fieldIndex := range fields {
|
||||
sft := rt.FieldByIndex(fieldIndex)
|
||||
sf := rv.FieldByIndex(fieldIndex)
|
||||
if isNil(sf) {
|
||||
// Don't write anything for nil fields.
|
||||
continue
|
||||
}
|
||||
|
||||
keyName := sft.Tag.Get("toml")
|
||||
if keyName == "-" {
|
||||
continue
|
||||
}
|
||||
if keyName == "" {
|
||||
keyName = sft.Name
|
||||
}
|
||||
|
||||
keyName, opts := getOptions(keyName)
|
||||
if _, ok := opts["omitempty"]; ok && isEmpty(sf) {
|
||||
continue
|
||||
} else if _, ok := opts["omitzero"]; ok && isZero(sf) {
|
||||
continue
|
||||
}
|
||||
|
||||
enc.encode(key.add(keyName), sf)
|
||||
}
|
||||
}
|
||||
writeFields(fieldsDirect)
|
||||
writeFields(fieldsSub)
|
||||
}
|
||||
|
||||
// tomlTypeName returns the TOML type name of the Go value's type. It is
|
||||
// used to determine whether the types of array elements are mixed (which is
|
||||
// forbidden). If the Go value is nil, then it is illegal for it to be an array
|
||||
// element, and valueIsNil is returned as true.
|
||||
|
||||
// Returns the TOML type of a Go value. The type may be `nil`, which means
|
||||
// no concrete TOML type could be found.
|
||||
func tomlTypeOfGo(rv reflect.Value) tomlType {
|
||||
if isNil(rv) || !rv.IsValid() {
|
||||
return nil
|
||||
}
|
||||
switch rv.Kind() {
|
||||
case reflect.Bool:
|
||||
return tomlBool
|
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32,
|
||||
reflect.Int64,
|
||||
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32,
|
||||
reflect.Uint64:
|
||||
return tomlInteger
|
||||
case reflect.Float32, reflect.Float64:
|
||||
return tomlFloat
|
||||
case reflect.Array, reflect.Slice:
|
||||
if typeEqual(tomlHash, tomlArrayType(rv)) {
|
||||
return tomlArrayHash
|
||||
} else {
|
||||
return tomlArray
|
||||
}
|
||||
case reflect.Ptr, reflect.Interface:
|
||||
return tomlTypeOfGo(rv.Elem())
|
||||
case reflect.String:
|
||||
return tomlString
|
||||
case reflect.Map:
|
||||
return tomlHash
|
||||
case reflect.Struct:
|
||||
switch rv.Interface().(type) {
|
||||
case time.Time:
|
||||
return tomlDatetime
|
||||
case TextMarshaler:
|
||||
return tomlString
|
||||
default:
|
||||
return tomlHash
|
||||
}
|
||||
default:
|
||||
panic("unexpected reflect.Kind: " + rv.Kind().String())
|
||||
}
|
||||
}
|
||||
|
||||
// tomlArrayType returns the element type of a TOML array. The type returned
|
||||
// may be nil if it cannot be determined (e.g., a nil slice or a zero length
|
||||
// slize). This function may also panic if it finds a type that cannot be
|
||||
// expressed in TOML (such as nil elements, heterogeneous arrays or directly
|
||||
// nested arrays of tables).
|
||||
func tomlArrayType(rv reflect.Value) tomlType {
|
||||
if isNil(rv) || !rv.IsValid() || rv.Len() == 0 {
|
||||
return nil
|
||||
}
|
||||
firstType := tomlTypeOfGo(rv.Index(0))
|
||||
if firstType == nil {
|
||||
encPanic(errArrayNilElement)
|
||||
}
|
||||
|
||||
rvlen := rv.Len()
|
||||
for i := 1; i < rvlen; i++ {
|
||||
elem := rv.Index(i)
|
||||
switch elemType := tomlTypeOfGo(elem); {
|
||||
case elemType == nil:
|
||||
encPanic(errArrayNilElement)
|
||||
case !typeEqual(firstType, elemType):
|
||||
encPanic(errArrayMixedElementTypes)
|
||||
}
|
||||
}
|
||||
// If we have a nested array, then we must make sure that the nested
|
||||
// array contains ONLY primitives.
|
||||
// This checks arbitrarily nested arrays.
|
||||
if typeEqual(firstType, tomlArray) || typeEqual(firstType, tomlArrayHash) {
|
||||
nest := tomlArrayType(eindirect(rv.Index(0)))
|
||||
if typeEqual(nest, tomlHash) || typeEqual(nest, tomlArrayHash) {
|
||||
encPanic(errArrayNoTable)
|
||||
}
|
||||
}
|
||||
return firstType
|
||||
}
|
||||
|
||||
func getOptions(keyName string) (string, map[string]struct{}) {
|
||||
opts := make(map[string]struct{})
|
||||
ss := strings.Split(keyName, ",")
|
||||
name := ss[0]
|
||||
if len(ss) > 1 {
|
||||
for _, opt := range ss {
|
||||
opts[opt] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
return name, opts
|
||||
}
|
||||
|
||||
func isZero(rv reflect.Value) bool {
|
||||
switch rv.Kind() {
|
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
||||
if rv.Int() == 0 {
|
||||
return true
|
||||
}
|
||||
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
|
||||
if rv.Uint() == 0 {
|
||||
return true
|
||||
}
|
||||
case reflect.Float32, reflect.Float64:
|
||||
if rv.Float() == 0.0 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func isEmpty(rv reflect.Value) bool {
|
||||
switch rv.Kind() {
|
||||
case reflect.String:
|
||||
if len(strings.TrimSpace(rv.String())) == 0 {
|
||||
return true
|
||||
}
|
||||
case reflect.Array, reflect.Slice, reflect.Map:
|
||||
if rv.Len() == 0 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (enc *Encoder) newline() {
|
||||
if enc.hasWritten {
|
||||
enc.wf("\n")
|
||||
}
|
||||
}
|
||||
|
||||
func (enc *Encoder) keyEqElement(key Key, val reflect.Value) {
|
||||
if len(key) == 0 {
|
||||
encPanic(errNoKey)
|
||||
}
|
||||
panicIfInvalidKey(key)
|
||||
enc.wf("%s%s = ", enc.indentStr(key), key.maybeQuoted(len(key)-1))
|
||||
enc.eElement(val)
|
||||
enc.newline()
|
||||
}
|
||||
|
||||
func (enc *Encoder) wf(format string, v ...interface{}) {
|
||||
if _, err := fmt.Fprintf(enc.w, format, v...); err != nil {
|
||||
encPanic(err)
|
||||
}
|
||||
enc.hasWritten = true
|
||||
}
|
||||
|
||||
func (enc *Encoder) indentStr(key Key) string {
|
||||
return strings.Repeat(enc.Indent, len(key)-1)
|
||||
}
|
||||
|
||||
func encPanic(err error) {
|
||||
panic(tomlEncodeError{err})
|
||||
}
|
||||
|
||||
func eindirect(v reflect.Value) reflect.Value {
|
||||
switch v.Kind() {
|
||||
case reflect.Ptr, reflect.Interface:
|
||||
return eindirect(v.Elem())
|
||||
default:
|
||||
return v
|
||||
}
|
||||
}
|
||||
|
||||
func isNil(rv reflect.Value) bool {
|
||||
switch rv.Kind() {
|
||||
case reflect.Interface, reflect.Map, reflect.Ptr, reflect.Slice:
|
||||
return rv.IsNil()
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func panicIfInvalidKey(key Key) {
|
||||
for _, k := range key {
|
||||
if len(k) == 0 {
|
||||
encPanic(e("Key '%s' is not a valid table name. Key names "+
|
||||
"cannot be empty.", key.maybeQuotedAll()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func isValidKeyName(s string) bool {
|
||||
return len(s) != 0
|
||||
}
|
566
vendor/github.com/BurntSushi/toml/encode_test.go
generated
vendored
566
vendor/github.com/BurntSushi/toml/encode_test.go
generated
vendored
@ -1,566 +0,0 @@
|
||||
package toml
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"log"
|
||||
"net"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestEncodeRoundTrip(t *testing.T) {
|
||||
type Config struct {
|
||||
Age int
|
||||
Cats []string
|
||||
Pi float64
|
||||
Perfection []int
|
||||
DOB time.Time
|
||||
Ipaddress net.IP
|
||||
}
|
||||
|
||||
var inputs = Config{
|
||||
13,
|
||||
[]string{"one", "two", "three"},
|
||||
3.145,
|
||||
[]int{11, 2, 3, 4},
|
||||
time.Now(),
|
||||
net.ParseIP("192.168.59.254"),
|
||||
}
|
||||
|
||||
var firstBuffer bytes.Buffer
|
||||
e := NewEncoder(&firstBuffer)
|
||||
err := e.Encode(inputs)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
var outputs Config
|
||||
if _, err := Decode(firstBuffer.String(), &outputs); err != nil {
|
||||
log.Printf("Could not decode:\n-----\n%s\n-----\n",
|
||||
firstBuffer.String())
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// could test each value individually, but I'm lazy
|
||||
var secondBuffer bytes.Buffer
|
||||
e2 := NewEncoder(&secondBuffer)
|
||||
err = e2.Encode(outputs)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if firstBuffer.String() != secondBuffer.String() {
|
||||
t.Error(
|
||||
firstBuffer.String(),
|
||||
"\n\n is not identical to\n\n",
|
||||
secondBuffer.String())
|
||||
}
|
||||
}
|
||||
|
||||
// XXX(burntsushi)
|
||||
// I think these tests probably should be removed. They are good, but they
|
||||
// ought to be obsolete by toml-test.
|
||||
func TestEncode(t *testing.T) {
|
||||
type Embedded struct {
|
||||
Int int `toml:"_int"`
|
||||
}
|
||||
type NonStruct int
|
||||
|
||||
date := time.Date(2014, 5, 11, 20, 30, 40, 0, time.FixedZone("IST", 3600))
|
||||
dateStr := "2014-05-11T19:30:40Z"
|
||||
|
||||
tests := map[string]struct {
|
||||
input interface{}
|
||||
wantOutput string
|
||||
wantError error
|
||||
}{
|
||||
"bool field": {
|
||||
input: struct {
|
||||
BoolTrue bool
|
||||
BoolFalse bool
|
||||
}{true, false},
|
||||
wantOutput: "BoolTrue = true\nBoolFalse = false\n",
|
||||
},
|
||||
"int fields": {
|
||||
input: struct {
|
||||
Int int
|
||||
Int8 int8
|
||||
Int16 int16
|
||||
Int32 int32
|
||||
Int64 int64
|
||||
}{1, 2, 3, 4, 5},
|
||||
wantOutput: "Int = 1\nInt8 = 2\nInt16 = 3\nInt32 = 4\nInt64 = 5\n",
|
||||
},
|
||||
"uint fields": {
|
||||
input: struct {
|
||||
Uint uint
|
||||
Uint8 uint8
|
||||
Uint16 uint16
|
||||
Uint32 uint32
|
||||
Uint64 uint64
|
||||
}{1, 2, 3, 4, 5},
|
||||
wantOutput: "Uint = 1\nUint8 = 2\nUint16 = 3\nUint32 = 4" +
|
||||
"\nUint64 = 5\n",
|
||||
},
|
||||
"float fields": {
|
||||
input: struct {
|
||||
Float32 float32
|
||||
Float64 float64
|
||||
}{1.5, 2.5},
|
||||
wantOutput: "Float32 = 1.5\nFloat64 = 2.5\n",
|
||||
},
|
||||
"string field": {
|
||||
input: struct{ String string }{"foo"},
|
||||
wantOutput: "String = \"foo\"\n",
|
||||
},
|
||||
"string field and unexported field": {
|
||||
input: struct {
|
||||
String string
|
||||
unexported int
|
||||
}{"foo", 0},
|
||||
wantOutput: "String = \"foo\"\n",
|
||||
},
|
||||
"datetime field in UTC": {
|
||||
input: struct{ Date time.Time }{date},
|
||||
wantOutput: fmt.Sprintf("Date = %s\n", dateStr),
|
||||
},
|
||||
"datetime field as primitive": {
|
||||
// Using a map here to fail if isStructOrMap() returns true for
|
||||
// time.Time.
|
||||
input: map[string]interface{}{
|
||||
"Date": date,
|
||||
"Int": 1,
|
||||
},
|
||||
wantOutput: fmt.Sprintf("Date = %s\nInt = 1\n", dateStr),
|
||||
},
|
||||
"array fields": {
|
||||
input: struct {
|
||||
IntArray0 [0]int
|
||||
IntArray3 [3]int
|
||||
}{[0]int{}, [3]int{1, 2, 3}},
|
||||
wantOutput: "IntArray0 = []\nIntArray3 = [1, 2, 3]\n",
|
||||
},
|
||||
"slice fields": {
|
||||
input: struct{ IntSliceNil, IntSlice0, IntSlice3 []int }{
|
||||
nil, []int{}, []int{1, 2, 3},
|
||||
},
|
||||
wantOutput: "IntSlice0 = []\nIntSlice3 = [1, 2, 3]\n",
|
||||
},
|
||||
"datetime slices": {
|
||||
input: struct{ DatetimeSlice []time.Time }{
|
||||
[]time.Time{date, date},
|
||||
},
|
||||
wantOutput: fmt.Sprintf("DatetimeSlice = [%s, %s]\n",
|
||||
dateStr, dateStr),
|
||||
},
|
||||
"nested arrays and slices": {
|
||||
input: struct {
|
||||
SliceOfArrays [][2]int
|
||||
ArrayOfSlices [2][]int
|
||||
SliceOfArraysOfSlices [][2][]int
|
||||
ArrayOfSlicesOfArrays [2][][2]int
|
||||
SliceOfMixedArrays [][2]interface{}
|
||||
ArrayOfMixedSlices [2][]interface{}
|
||||
}{
|
||||
[][2]int{{1, 2}, {3, 4}},
|
||||
[2][]int{{1, 2}, {3, 4}},
|
||||
[][2][]int{
|
||||
{
|
||||
{1, 2}, {3, 4},
|
||||
},
|
||||
{
|
||||
{5, 6}, {7, 8},
|
||||
},
|
||||
},
|
||||
[2][][2]int{
|
||||
{
|
||||
{1, 2}, {3, 4},
|
||||
},
|
||||
{
|
||||
{5, 6}, {7, 8},
|
||||
},
|
||||
},
|
||||
[][2]interface{}{
|
||||
{1, 2}, {"a", "b"},
|
||||
},
|
||||
[2][]interface{}{
|
||||
{1, 2}, {"a", "b"},
|
||||
},
|
||||
},
|
||||
wantOutput: `SliceOfArrays = [[1, 2], [3, 4]]
|
||||
ArrayOfSlices = [[1, 2], [3, 4]]
|
||||
SliceOfArraysOfSlices = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
|
||||
ArrayOfSlicesOfArrays = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
|
||||
SliceOfMixedArrays = [[1, 2], ["a", "b"]]
|
||||
ArrayOfMixedSlices = [[1, 2], ["a", "b"]]
|
||||
`,
|
||||
},
|
||||
"empty slice": {
|
||||
input: struct{ Empty []interface{} }{[]interface{}{}},
|
||||
wantOutput: "Empty = []\n",
|
||||
},
|
||||
"(error) slice with element type mismatch (string and integer)": {
|
||||
input: struct{ Mixed []interface{} }{[]interface{}{1, "a"}},
|
||||
wantError: errArrayMixedElementTypes,
|
||||
},
|
||||
"(error) slice with element type mismatch (integer and float)": {
|
||||
input: struct{ Mixed []interface{} }{[]interface{}{1, 2.5}},
|
||||
wantError: errArrayMixedElementTypes,
|
||||
},
|
||||
"slice with elems of differing Go types, same TOML types": {
|
||||
input: struct {
|
||||
MixedInts []interface{}
|
||||
MixedFloats []interface{}
|
||||
}{
|
||||
[]interface{}{
|
||||
int(1), int8(2), int16(3), int32(4), int64(5),
|
||||
uint(1), uint8(2), uint16(3), uint32(4), uint64(5),
|
||||
},
|
||||
[]interface{}{float32(1.5), float64(2.5)},
|
||||
},
|
||||
wantOutput: "MixedInts = [1, 2, 3, 4, 5, 1, 2, 3, 4, 5]\n" +
|
||||
"MixedFloats = [1.5, 2.5]\n",
|
||||
},
|
||||
"(error) slice w/ element type mismatch (one is nested array)": {
|
||||
input: struct{ Mixed []interface{} }{
|
||||
[]interface{}{1, []interface{}{2}},
|
||||
},
|
||||
wantError: errArrayMixedElementTypes,
|
||||
},
|
||||
"(error) slice with 1 nil element": {
|
||||
input: struct{ NilElement1 []interface{} }{[]interface{}{nil}},
|
||||
wantError: errArrayNilElement,
|
||||
},
|
||||
"(error) slice with 1 nil element (and other non-nil elements)": {
|
||||
input: struct{ NilElement []interface{} }{
|
||||
[]interface{}{1, nil},
|
||||
},
|
||||
wantError: errArrayNilElement,
|
||||
},
|
||||
"simple map": {
|
||||
input: map[string]int{"a": 1, "b": 2},
|
||||
wantOutput: "a = 1\nb = 2\n",
|
||||
},
|
||||
"map with interface{} value type": {
|
||||
input: map[string]interface{}{"a": 1, "b": "c"},
|
||||
wantOutput: "a = 1\nb = \"c\"\n",
|
||||
},
|
||||
"map with interface{} value type, some of which are structs": {
|
||||
input: map[string]interface{}{
|
||||
"a": struct{ Int int }{2},
|
||||
"b": 1,
|
||||
},
|
||||
wantOutput: "b = 1\n\n[a]\n Int = 2\n",
|
||||
},
|
||||
"nested map": {
|
||||
input: map[string]map[string]int{
|
||||
"a": {"b": 1},
|
||||
"c": {"d": 2},
|
||||
},
|
||||
wantOutput: "[a]\n b = 1\n\n[c]\n d = 2\n",
|
||||
},
|
||||
"nested struct": {
|
||||
input: struct{ Struct struct{ Int int } }{
|
||||
struct{ Int int }{1},
|
||||
},
|
||||
wantOutput: "[Struct]\n Int = 1\n",
|
||||
},
|
||||
"nested struct and non-struct field": {
|
||||
input: struct {
|
||||
Struct struct{ Int int }
|
||||
Bool bool
|
||||
}{struct{ Int int }{1}, true},
|
||||
wantOutput: "Bool = true\n\n[Struct]\n Int = 1\n",
|
||||
},
|
||||
"2 nested structs": {
|
||||
input: struct{ Struct1, Struct2 struct{ Int int } }{
|
||||
struct{ Int int }{1}, struct{ Int int }{2},
|
||||
},
|
||||
wantOutput: "[Struct1]\n Int = 1\n\n[Struct2]\n Int = 2\n",
|
||||
},
|
||||
"deeply nested structs": {
|
||||
input: struct {
|
||||
Struct1, Struct2 struct{ Struct3 *struct{ Int int } }
|
||||
}{
|
||||
struct{ Struct3 *struct{ Int int } }{&struct{ Int int }{1}},
|
||||
struct{ Struct3 *struct{ Int int } }{nil},
|
||||
},
|
||||
wantOutput: "[Struct1]\n [Struct1.Struct3]\n Int = 1" +
|
||||
"\n\n[Struct2]\n",
|
||||
},
|
||||
"nested struct with nil struct elem": {
|
||||
input: struct {
|
||||
Struct struct{ Inner *struct{ Int int } }
|
||||
}{
|
||||
struct{ Inner *struct{ Int int } }{nil},
|
||||
},
|
||||
wantOutput: "[Struct]\n",
|
||||
},
|
||||
"nested struct with no fields": {
|
||||
input: struct {
|
||||
Struct struct{ Inner struct{} }
|
||||
}{
|
||||
struct{ Inner struct{} }{struct{}{}},
|
||||
},
|
||||
wantOutput: "[Struct]\n [Struct.Inner]\n",
|
||||
},
|
||||
"struct with tags": {
|
||||
input: struct {
|
||||
Struct struct {
|
||||
Int int `toml:"_int"`
|
||||
} `toml:"_struct"`
|
||||
Bool bool `toml:"_bool"`
|
||||
}{
|
||||
struct {
|
||||
Int int `toml:"_int"`
|
||||
}{1}, true,
|
||||
},
|
||||
wantOutput: "_bool = true\n\n[_struct]\n _int = 1\n",
|
||||
},
|
||||
"embedded struct": {
|
||||
input: struct{ Embedded }{Embedded{1}},
|
||||
wantOutput: "_int = 1\n",
|
||||
},
|
||||
"embedded *struct": {
|
||||
input: struct{ *Embedded }{&Embedded{1}},
|
||||
wantOutput: "_int = 1\n",
|
||||
},
|
||||
"nested embedded struct": {
|
||||
input: struct {
|
||||
Struct struct{ Embedded } `toml:"_struct"`
|
||||
}{struct{ Embedded }{Embedded{1}}},
|
||||
wantOutput: "[_struct]\n _int = 1\n",
|
||||
},
|
||||
"nested embedded *struct": {
|
||||
input: struct {
|
||||
Struct struct{ *Embedded } `toml:"_struct"`
|
||||
}{struct{ *Embedded }{&Embedded{1}}},
|
||||
wantOutput: "[_struct]\n _int = 1\n",
|
||||
},
|
||||
"embedded non-struct": {
|
||||
input: struct{ NonStruct }{5},
|
||||
wantOutput: "NonStruct = 5\n",
|
||||
},
|
||||
"array of tables": {
|
||||
input: struct {
|
||||
Structs []*struct{ Int int } `toml:"struct"`
|
||||
}{
|
||||
[]*struct{ Int int }{{1}, {3}},
|
||||
},
|
||||
wantOutput: "[[struct]]\n Int = 1\n\n[[struct]]\n Int = 3\n",
|
||||
},
|
||||
"array of tables order": {
|
||||
input: map[string]interface{}{
|
||||
"map": map[string]interface{}{
|
||||
"zero": 5,
|
||||
"arr": []map[string]int{
|
||||
{
|
||||
"friend": 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
wantOutput: "[map]\n zero = 5\n\n [[map.arr]]\n friend = 5\n",
|
||||
},
|
||||
"(error) top-level slice": {
|
||||
input: []struct{ Int int }{{1}, {2}, {3}},
|
||||
wantError: errNoKey,
|
||||
},
|
||||
"(error) slice of slice": {
|
||||
input: struct {
|
||||
Slices [][]struct{ Int int }
|
||||
}{
|
||||
[][]struct{ Int int }{{{1}}, {{2}}, {{3}}},
|
||||
},
|
||||
wantError: errArrayNoTable,
|
||||
},
|
||||
"(error) map no string key": {
|
||||
input: map[int]string{1: ""},
|
||||
wantError: errNonString,
|
||||
},
|
||||
"(error) empty key name": {
|
||||
input: map[string]int{"": 1},
|
||||
wantError: errAnything,
|
||||
},
|
||||
"(error) empty map name": {
|
||||
input: map[string]interface{}{
|
||||
"": map[string]int{"v": 1},
|
||||
},
|
||||
wantError: errAnything,
|
||||
},
|
||||
}
|
||||
for label, test := range tests {
|
||||
encodeExpected(t, label, test.input, test.wantOutput, test.wantError)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncodeNestedTableArrays(t *testing.T) {
|
||||
type song struct {
|
||||
Name string `toml:"name"`
|
||||
}
|
||||
type album struct {
|
||||
Name string `toml:"name"`
|
||||
Songs []song `toml:"songs"`
|
||||
}
|
||||
type springsteen struct {
|
||||
Albums []album `toml:"albums"`
|
||||
}
|
||||
value := springsteen{
|
||||
[]album{
|
||||
{"Born to Run",
|
||||
[]song{{"Jungleland"}, {"Meeting Across the River"}}},
|
||||
{"Born in the USA",
|
||||
[]song{{"Glory Days"}, {"Dancing in the Dark"}}},
|
||||
},
|
||||
}
|
||||
expected := `[[albums]]
|
||||
name = "Born to Run"
|
||||
|
||||
[[albums.songs]]
|
||||
name = "Jungleland"
|
||||
|
||||
[[albums.songs]]
|
||||
name = "Meeting Across the River"
|
||||
|
||||
[[albums]]
|
||||
name = "Born in the USA"
|
||||
|
||||
[[albums.songs]]
|
||||
name = "Glory Days"
|
||||
|
||||
[[albums.songs]]
|
||||
name = "Dancing in the Dark"
|
||||
`
|
||||
encodeExpected(t, "nested table arrays", value, expected, nil)
|
||||
}
|
||||
|
||||
func TestEncodeArrayHashWithNormalHashOrder(t *testing.T) {
|
||||
type Alpha struct {
|
||||
V int
|
||||
}
|
||||
type Beta struct {
|
||||
V int
|
||||
}
|
||||
type Conf struct {
|
||||
V int
|
||||
A Alpha
|
||||
B []Beta
|
||||
}
|
||||
|
||||
val := Conf{
|
||||
V: 1,
|
||||
A: Alpha{2},
|
||||
B: []Beta{{3}},
|
||||
}
|
||||
expected := "V = 1\n\n[A]\n V = 2\n\n[[B]]\n V = 3\n"
|
||||
encodeExpected(t, "array hash with normal hash order", val, expected, nil)
|
||||
}
|
||||
|
||||
func TestEncodeWithOmitEmpty(t *testing.T) {
|
||||
type simple struct {
|
||||
User string `toml:"user"`
|
||||
Pass string `toml:"password,omitempty"`
|
||||
}
|
||||
|
||||
value := simple{"Testing", ""}
|
||||
expected := fmt.Sprintf("user = %q\n", value.User)
|
||||
encodeExpected(t, "simple with omitempty, is empty", value, expected, nil)
|
||||
value.Pass = "some password"
|
||||
expected = fmt.Sprintf("user = %q\npassword = %q\n", value.User, value.Pass)
|
||||
encodeExpected(t, "simple with omitempty, not empty", value, expected, nil)
|
||||
}
|
||||
|
||||
func TestEncodeWithOmitZero(t *testing.T) {
|
||||
type simple struct {
|
||||
Number int `toml:"number,omitzero"`
|
||||
Real float64 `toml:"real,omitzero"`
|
||||
Unsigned uint `toml:"unsigned,omitzero"`
|
||||
}
|
||||
|
||||
value := simple{0, 0.0, uint(0)}
|
||||
expected := ""
|
||||
|
||||
encodeExpected(t, "simple with omitzero, all zero", value, expected, nil)
|
||||
|
||||
value.Number = 10
|
||||
value.Real = 20
|
||||
value.Unsigned = 5
|
||||
expected = `number = 10
|
||||
real = 20.0
|
||||
unsigned = 5
|
||||
`
|
||||
encodeExpected(t, "simple with omitzero, non-zero", value, expected, nil)
|
||||
}
|
||||
|
||||
func TestEncodeAnonymousStructPointerField(t *testing.T) {
|
||||
type Sub struct{}
|
||||
type simple struct {
|
||||
*Sub
|
||||
}
|
||||
|
||||
value := simple{}
|
||||
expected := ""
|
||||
encodeExpected(t, "nil anonymous struct pointer field", value, expected, nil)
|
||||
|
||||
value = simple{Sub: &Sub{}}
|
||||
expected = ""
|
||||
encodeExpected(t, "non-nil anonymous struct pointer field", value, expected, nil)
|
||||
}
|
||||
|
||||
func TestEncodeIgnoredFields(t *testing.T) {
|
||||
type simple struct {
|
||||
Number int `toml:"-"`
|
||||
}
|
||||
value := simple{}
|
||||
expected := ""
|
||||
encodeExpected(t, "ignored field", value, expected, nil)
|
||||
}
|
||||
|
||||
func encodeExpected(
|
||||
t *testing.T, label string, val interface{}, wantStr string, wantErr error,
|
||||
) {
|
||||
var buf bytes.Buffer
|
||||
enc := NewEncoder(&buf)
|
||||
err := enc.Encode(val)
|
||||
if err != wantErr {
|
||||
if wantErr != nil {
|
||||
if wantErr == errAnything && err != nil {
|
||||
return
|
||||
}
|
||||
t.Errorf("%s: want Encode error %v, got %v", label, wantErr, err)
|
||||
} else {
|
||||
t.Errorf("%s: Encode failed: %s", label, err)
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if got := buf.String(); wantStr != got {
|
||||
t.Errorf("%s: want\n-----\n%q\n-----\nbut got\n-----\n%q\n-----\n",
|
||||
label, wantStr, got)
|
||||
}
|
||||
}
|
||||
|
||||
func ExampleEncoder_Encode() {
|
||||
date, _ := time.Parse(time.RFC822, "14 Mar 10 18:00 UTC")
|
||||
var config = map[string]interface{}{
|
||||
"date": date,
|
||||
"counts": []int{1, 1, 2, 3, 5, 8},
|
||||
"hash": map[string]string{
|
||||
"key1": "val1",
|
||||
"key2": "val2",
|
||||
},
|
||||
}
|
||||
buf := new(bytes.Buffer)
|
||||
if err := NewEncoder(buf).Encode(config); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
fmt.Println(buf.String())
|
||||
|
||||
// Output:
|
||||
// counts = [1, 1, 2, 3, 5, 8]
|
||||
// date = 2010-03-14T18:00:00Z
|
||||
//
|
||||
// [hash]
|
||||
// key1 = "val1"
|
||||
// key2 = "val2"
|
||||
}
|
19
vendor/github.com/BurntSushi/toml/encoding_types.go
generated
vendored
19
vendor/github.com/BurntSushi/toml/encoding_types.go
generated
vendored
@ -1,19 +0,0 @@
|
||||
// +build go1.2
|
||||
|
||||
package toml
|
||||
|
||||
// In order to support Go 1.1, we define our own TextMarshaler and
|
||||
// TextUnmarshaler types. For Go 1.2+, we just alias them with the
|
||||
// standard library interfaces.
|
||||
|
||||
import (
|
||||
"encoding"
|
||||
)
|
||||
|
||||
// TextMarshaler is a synonym for encoding.TextMarshaler. It is defined here
|
||||
// so that Go 1.1 can be supported.
|
||||
type TextMarshaler encoding.TextMarshaler
|
||||
|
||||
// TextUnmarshaler is a synonym for encoding.TextUnmarshaler. It is defined
|
||||
// here so that Go 1.1 can be supported.
|
||||
type TextUnmarshaler encoding.TextUnmarshaler
|
18
vendor/github.com/BurntSushi/toml/encoding_types_1.1.go
generated
vendored
18
vendor/github.com/BurntSushi/toml/encoding_types_1.1.go
generated
vendored
@ -1,18 +0,0 @@
|
||||
// +build !go1.2
|
||||
|
||||
package toml
|
||||
|
||||
// These interfaces were introduced in Go 1.2, so we add them manually when
|
||||
// compiling for Go 1.1.
|
||||
|
||||
// TextMarshaler is a synonym for encoding.TextMarshaler. It is defined here
|
||||
// so that Go 1.1 can be supported.
|
||||
type TextMarshaler interface {
|
||||
MarshalText() (text []byte, err error)
|
||||
}
|
||||
|
||||
// TextUnmarshaler is a synonym for encoding.TextUnmarshaler. It is defined
|
||||
// here so that Go 1.1 can be supported.
|
||||
type TextUnmarshaler interface {
|
||||
UnmarshalText(text []byte) error
|
||||
}
|
871
vendor/github.com/BurntSushi/toml/lex.go
generated
vendored
871
vendor/github.com/BurntSushi/toml/lex.go
generated
vendored
@ -1,871 +0,0 @@
|
||||
package toml
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type itemType int
|
||||
|
||||
const (
|
||||
itemError itemType = iota
|
||||
itemNIL // used in the parser to indicate no type
|
||||
itemEOF
|
||||
itemText
|
||||
itemString
|
||||
itemRawString
|
||||
itemMultilineString
|
||||
itemRawMultilineString
|
||||
itemBool
|
||||
itemInteger
|
||||
itemFloat
|
||||
itemDatetime
|
||||
itemArray // the start of an array
|
||||
itemArrayEnd
|
||||
itemTableStart
|
||||
itemTableEnd
|
||||
itemArrayTableStart
|
||||
itemArrayTableEnd
|
||||
itemKeyStart
|
||||
itemCommentStart
|
||||
)
|
||||
|
||||
const (
|
||||
eof = 0
|
||||
tableStart = '['
|
||||
tableEnd = ']'
|
||||
arrayTableStart = '['
|
||||
arrayTableEnd = ']'
|
||||
tableSep = '.'
|
||||
keySep = '='
|
||||
arrayStart = '['
|
||||
arrayEnd = ']'
|
||||
arrayValTerm = ','
|
||||
commentStart = '#'
|
||||
stringStart = '"'
|
||||
stringEnd = '"'
|
||||
rawStringStart = '\''
|
||||
rawStringEnd = '\''
|
||||
)
|
||||
|
||||
type stateFn func(lx *lexer) stateFn
|
||||
|
||||
type lexer struct {
|
||||
input string
|
||||
start int
|
||||
pos int
|
||||
width int
|
||||
line int
|
||||
state stateFn
|
||||
items chan item
|
||||
|
||||
// A stack of state functions used to maintain context.
|
||||
// The idea is to reuse parts of the state machine in various places.
|
||||
// For example, values can appear at the top level or within arbitrarily
|
||||
// nested arrays. The last state on the stack is used after a value has
|
||||
// been lexed. Similarly for comments.
|
||||
stack []stateFn
|
||||
}
|
||||
|
||||
type item struct {
|
||||
typ itemType
|
||||
val string
|
||||
line int
|
||||
}
|
||||
|
||||
func (lx *lexer) nextItem() item {
|
||||
for {
|
||||
select {
|
||||
case item := <-lx.items:
|
||||
return item
|
||||
default:
|
||||
lx.state = lx.state(lx)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func lex(input string) *lexer {
|
||||
lx := &lexer{
|
||||
input: input + "\n",
|
||||
state: lexTop,
|
||||
line: 1,
|
||||
items: make(chan item, 10),
|
||||
stack: make([]stateFn, 0, 10),
|
||||
}
|
||||
return lx
|
||||
}
|
||||
|
||||
func (lx *lexer) push(state stateFn) {
|
||||
lx.stack = append(lx.stack, state)
|
||||
}
|
||||
|
||||
func (lx *lexer) pop() stateFn {
|
||||
if len(lx.stack) == 0 {
|
||||
return lx.errorf("BUG in lexer: no states to pop.")
|
||||
}
|
||||
last := lx.stack[len(lx.stack)-1]
|
||||
lx.stack = lx.stack[0 : len(lx.stack)-1]
|
||||
return last
|
||||
}
|
||||
|
||||
func (lx *lexer) current() string {
|
||||
return lx.input[lx.start:lx.pos]
|
||||
}
|
||||
|
||||
func (lx *lexer) emit(typ itemType) {
|
||||
lx.items <- item{typ, lx.current(), lx.line}
|
||||
lx.start = lx.pos
|
||||
}
|
||||
|
||||
func (lx *lexer) emitTrim(typ itemType) {
|
||||
lx.items <- item{typ, strings.TrimSpace(lx.current()), lx.line}
|
||||
lx.start = lx.pos
|
||||
}
|
||||
|
||||
func (lx *lexer) next() (r rune) {
|
||||
if lx.pos >= len(lx.input) {
|
||||
lx.width = 0
|
||||
return eof
|
||||
}
|
||||
|
||||
if lx.input[lx.pos] == '\n' {
|
||||
lx.line++
|
||||
}
|
||||
r, lx.width = utf8.DecodeRuneInString(lx.input[lx.pos:])
|
||||
lx.pos += lx.width
|
||||
return r
|
||||
}
|
||||
|
||||
// ignore skips over the pending input before this point.
|
||||
func (lx *lexer) ignore() {
|
||||
lx.start = lx.pos
|
||||
}
|
||||
|
||||
// backup steps back one rune. Can be called only once per call of next.
|
||||
func (lx *lexer) backup() {
|
||||
lx.pos -= lx.width
|
||||
if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' {
|
||||
lx.line--
|
||||
}
|
||||
}
|
||||
|
||||
// accept consumes the next rune if it's equal to `valid`.
|
||||
func (lx *lexer) accept(valid rune) bool {
|
||||
if lx.next() == valid {
|
||||
return true
|
||||
}
|
||||
lx.backup()
|
||||
return false
|
||||
}
|
||||
|
||||
// peek returns but does not consume the next rune in the input.
|
||||
func (lx *lexer) peek() rune {
|
||||
r := lx.next()
|
||||
lx.backup()
|
||||
return r
|
||||
}
|
||||
|
||||
// errorf stops all lexing by emitting an error and returning `nil`.
|
||||
// Note that any value that is a character is escaped if it's a special
|
||||
// character (new lines, tabs, etc.).
|
||||
func (lx *lexer) errorf(format string, values ...interface{}) stateFn {
|
||||
lx.items <- item{
|
||||
itemError,
|
||||
fmt.Sprintf(format, values...),
|
||||
lx.line,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// lexTop consumes elements at the top level of TOML data.
|
||||
func lexTop(lx *lexer) stateFn {
|
||||
r := lx.next()
|
||||
if isWhitespace(r) || isNL(r) {
|
||||
return lexSkip(lx, lexTop)
|
||||
}
|
||||
|
||||
switch r {
|
||||
case commentStart:
|
||||
lx.push(lexTop)
|
||||
return lexCommentStart
|
||||
case tableStart:
|
||||
return lexTableStart
|
||||
case eof:
|
||||
if lx.pos > lx.start {
|
||||
return lx.errorf("Unexpected EOF.")
|
||||
}
|
||||
lx.emit(itemEOF)
|
||||
return nil
|
||||
}
|
||||
|
||||
// At this point, the only valid item can be a key, so we back up
|
||||
// and let the key lexer do the rest.
|
||||
lx.backup()
|
||||
lx.push(lexTopEnd)
|
||||
return lexKeyStart
|
||||
}
|
||||
|
||||
// lexTopEnd is entered whenever a top-level item has been consumed. (A value
|
||||
// or a table.) It must see only whitespace, and will turn back to lexTop
|
||||
// upon a new line. If it sees EOF, it will quit the lexer successfully.
|
||||
func lexTopEnd(lx *lexer) stateFn {
|
||||
r := lx.next()
|
||||
switch {
|
||||
case r == commentStart:
|
||||
// a comment will read to a new line for us.
|
||||
lx.push(lexTop)
|
||||
return lexCommentStart
|
||||
case isWhitespace(r):
|
||||
return lexTopEnd
|
||||
case isNL(r):
|
||||
lx.ignore()
|
||||
return lexTop
|
||||
case r == eof:
|
||||
lx.ignore()
|
||||
return lexTop
|
||||
}
|
||||
return lx.errorf("Expected a top-level item to end with a new line, "+
|
||||
"comment or EOF, but got %q instead.", r)
|
||||
}
|
||||
|
||||
// lexTable lexes the beginning of a table. Namely, it makes sure that
|
||||
// it starts with a character other than '.' and ']'.
|
||||
// It assumes that '[' has already been consumed.
|
||||
// It also handles the case that this is an item in an array of tables.
|
||||
// e.g., '[[name]]'.
|
||||
func lexTableStart(lx *lexer) stateFn {
|
||||
if lx.peek() == arrayTableStart {
|
||||
lx.next()
|
||||
lx.emit(itemArrayTableStart)
|
||||
lx.push(lexArrayTableEnd)
|
||||
} else {
|
||||
lx.emit(itemTableStart)
|
||||
lx.push(lexTableEnd)
|
||||
}
|
||||
return lexTableNameStart
|
||||
}
|
||||
|
||||
func lexTableEnd(lx *lexer) stateFn {
|
||||
lx.emit(itemTableEnd)
|
||||
return lexTopEnd
|
||||
}
|
||||
|
||||
func lexArrayTableEnd(lx *lexer) stateFn {
|
||||
if r := lx.next(); r != arrayTableEnd {
|
||||
return lx.errorf("Expected end of table array name delimiter %q, "+
|
||||
"but got %q instead.", arrayTableEnd, r)
|
||||
}
|
||||
lx.emit(itemArrayTableEnd)
|
||||
return lexTopEnd
|
||||
}
|
||||
|
||||
func lexTableNameStart(lx *lexer) stateFn {
|
||||
switch r := lx.peek(); {
|
||||
case r == tableEnd || r == eof:
|
||||
return lx.errorf("Unexpected end of table name. (Table names cannot " +
|
||||
"be empty.)")
|
||||
case r == tableSep:
|
||||
return lx.errorf("Unexpected table separator. (Table names cannot " +
|
||||
"be empty.)")
|
||||
case r == stringStart || r == rawStringStart:
|
||||
lx.ignore()
|
||||
lx.push(lexTableNameEnd)
|
||||
return lexValue // reuse string lexing
|
||||
default:
|
||||
return lexBareTableName
|
||||
}
|
||||
}
|
||||
|
||||
// lexTableName lexes the name of a table. It assumes that at least one
|
||||
// valid character for the table has already been read.
|
||||
func lexBareTableName(lx *lexer) stateFn {
|
||||
switch r := lx.next(); {
|
||||
case isBareKeyChar(r):
|
||||
return lexBareTableName
|
||||
case r == tableSep || r == tableEnd:
|
||||
lx.backup()
|
||||
lx.emitTrim(itemText)
|
||||
return lexTableNameEnd
|
||||
default:
|
||||
return lx.errorf("Bare keys cannot contain %q.", r)
|
||||
}
|
||||
}
|
||||
|
||||
// lexTableNameEnd reads the end of a piece of a table name, optionally
|
||||
// consuming whitespace.
|
||||
func lexTableNameEnd(lx *lexer) stateFn {
|
||||
switch r := lx.next(); {
|
||||
case isWhitespace(r):
|
||||
return lexTableNameEnd
|
||||
case r == tableSep:
|
||||
lx.ignore()
|
||||
return lexTableNameStart
|
||||
case r == tableEnd:
|
||||
return lx.pop()
|
||||
default:
|
||||
return lx.errorf("Expected '.' or ']' to end table name, but got %q "+
|
||||
"instead.", r)
|
||||
}
|
||||
}
|
||||
|
||||
// lexKeyStart consumes a key name up until the first non-whitespace character.
|
||||
// lexKeyStart will ignore whitespace.
|
||||
func lexKeyStart(lx *lexer) stateFn {
|
||||
r := lx.peek()
|
||||
switch {
|
||||
case r == keySep:
|
||||
return lx.errorf("Unexpected key separator %q.", keySep)
|
||||
case isWhitespace(r) || isNL(r):
|
||||
lx.next()
|
||||
return lexSkip(lx, lexKeyStart)
|
||||
case r == stringStart || r == rawStringStart:
|
||||
lx.ignore()
|
||||
lx.emit(itemKeyStart)
|
||||
lx.push(lexKeyEnd)
|
||||
return lexValue // reuse string lexing
|
||||
default:
|
||||
lx.ignore()
|
||||
lx.emit(itemKeyStart)
|
||||
return lexBareKey
|
||||
}
|
||||
}
|
||||
|
||||
// lexBareKey consumes the text of a bare key. Assumes that the first character
|
||||
// (which is not whitespace) has not yet been consumed.
|
||||
func lexBareKey(lx *lexer) stateFn {
|
||||
switch r := lx.next(); {
|
||||
case isBareKeyChar(r):
|
||||
return lexBareKey
|
||||
case isWhitespace(r):
|
||||
lx.emitTrim(itemText)
|
||||
return lexKeyEnd
|
||||
case r == keySep:
|
||||
lx.backup()
|
||||
lx.emitTrim(itemText)
|
||||
return lexKeyEnd
|
||||
default:
|
||||
return lx.errorf("Bare keys cannot contain %q.", r)
|
||||
}
|
||||
}
|
||||
|
||||
// lexKeyEnd consumes the end of a key and trims whitespace (up to the key
|
||||
// separator).
|
||||
func lexKeyEnd(lx *lexer) stateFn {
|
||||
switch r := lx.next(); {
|
||||
case r == keySep:
|
||||
return lexSkip(lx, lexValue)
|
||||
case isWhitespace(r):
|
||||
return lexSkip(lx, lexKeyEnd)
|
||||
default:
|
||||
return lx.errorf("Expected key separator %q, but got %q instead.",
|
||||
keySep, r)
|
||||
}
|
||||
}
|
||||
|
||||
// lexValue starts the consumption of a value anywhere a value is expected.
|
||||
// lexValue will ignore whitespace.
|
||||
// After a value is lexed, the last state on the next is popped and returned.
|
||||
func lexValue(lx *lexer) stateFn {
|
||||
// We allow whitespace to precede a value, but NOT new lines.
|
||||
// In array syntax, the array states are responsible for ignoring new
|
||||
// lines.
|
||||
r := lx.next()
|
||||
if isWhitespace(r) {
|
||||
return lexSkip(lx, lexValue)
|
||||
}
|
||||
|
||||
switch {
|
||||
case r == arrayStart:
|
||||
lx.ignore()
|
||||
lx.emit(itemArray)
|
||||
return lexArrayValue
|
||||
case r == stringStart:
|
||||
if lx.accept(stringStart) {
|
||||
if lx.accept(stringStart) {
|
||||
lx.ignore() // Ignore """
|
||||
return lexMultilineString
|
||||
}
|
||||
lx.backup()
|
||||
}
|
||||
lx.ignore() // ignore the '"'
|
||||
return lexString
|
||||
case r == rawStringStart:
|
||||
if lx.accept(rawStringStart) {
|
||||
if lx.accept(rawStringStart) {
|
||||
lx.ignore() // Ignore """
|
||||
return lexMultilineRawString
|
||||
}
|
||||
lx.backup()
|
||||
}
|
||||
lx.ignore() // ignore the "'"
|
||||
return lexRawString
|
||||
case r == 't':
|
||||
return lexTrue
|
||||
case r == 'f':
|
||||
return lexFalse
|
||||
case r == '-':
|
||||
return lexNumberStart
|
||||
case isDigit(r):
|
||||
lx.backup() // avoid an extra state and use the same as above
|
||||
return lexNumberOrDateStart
|
||||
case r == '.': // special error case, be kind to users
|
||||
return lx.errorf("Floats must start with a digit, not '.'.")
|
||||
}
|
||||
return lx.errorf("Expected value but found %q instead.", r)
|
||||
}
|
||||
|
||||
// lexArrayValue consumes one value in an array. It assumes that '[' or ','
|
||||
// have already been consumed. All whitespace and new lines are ignored.
|
||||
func lexArrayValue(lx *lexer) stateFn {
|
||||
r := lx.next()
|
||||
switch {
|
||||
case isWhitespace(r) || isNL(r):
|
||||
return lexSkip(lx, lexArrayValue)
|
||||
case r == commentStart:
|
||||
lx.push(lexArrayValue)
|
||||
return lexCommentStart
|
||||
case r == arrayValTerm:
|
||||
return lx.errorf("Unexpected array value terminator %q.",
|
||||
arrayValTerm)
|
||||
case r == arrayEnd:
|
||||
return lexArrayEnd
|
||||
}
|
||||
|
||||
lx.backup()
|
||||
lx.push(lexArrayValueEnd)
|
||||
return lexValue
|
||||
}
|
||||
|
||||
// lexArrayValueEnd consumes the cruft between values of an array. Namely,
|
||||
// it ignores whitespace and expects either a ',' or a ']'.
|
||||
func lexArrayValueEnd(lx *lexer) stateFn {
|
||||
r := lx.next()
|
||||
switch {
|
||||
case isWhitespace(r) || isNL(r):
|
||||
return lexSkip(lx, lexArrayValueEnd)
|
||||
case r == commentStart:
|
||||
lx.push(lexArrayValueEnd)
|
||||
return lexCommentStart
|
||||
case r == arrayValTerm:
|
||||
lx.ignore()
|
||||
return lexArrayValue // move on to the next value
|
||||
case r == arrayEnd:
|
||||
return lexArrayEnd
|
||||
}
|
||||
return lx.errorf("Expected an array value terminator %q or an array "+
|
||||
"terminator %q, but got %q instead.", arrayValTerm, arrayEnd, r)
|
||||
}
|
||||
|
||||
// lexArrayEnd finishes the lexing of an array. It assumes that a ']' has
|
||||
// just been consumed.
|
||||
func lexArrayEnd(lx *lexer) stateFn {
|
||||
lx.ignore()
|
||||
lx.emit(itemArrayEnd)
|
||||
return lx.pop()
|
||||
}
|
||||
|
||||
// lexString consumes the inner contents of a string. It assumes that the
|
||||
// beginning '"' has already been consumed and ignored.
|
||||
func lexString(lx *lexer) stateFn {
|
||||
r := lx.next()
|
||||
switch {
|
||||
case isNL(r):
|
||||
return lx.errorf("Strings cannot contain new lines.")
|
||||
case r == '\\':
|
||||
lx.push(lexString)
|
||||
return lexStringEscape
|
||||
case r == stringEnd:
|
||||
lx.backup()
|
||||
lx.emit(itemString)
|
||||
lx.next()
|
||||
lx.ignore()
|
||||
return lx.pop()
|
||||
}
|
||||
return lexString
|
||||
}
|
||||
|
||||
// lexMultilineString consumes the inner contents of a string. It assumes that
|
||||
// the beginning '"""' has already been consumed and ignored.
|
||||
func lexMultilineString(lx *lexer) stateFn {
|
||||
r := lx.next()
|
||||
switch {
|
||||
case r == '\\':
|
||||
return lexMultilineStringEscape
|
||||
case r == stringEnd:
|
||||
if lx.accept(stringEnd) {
|
||||
if lx.accept(stringEnd) {
|
||||
lx.backup()
|
||||
lx.backup()
|
||||
lx.backup()
|
||||
lx.emit(itemMultilineString)
|
||||
lx.next()
|
||||
lx.next()
|
||||
lx.next()
|
||||
lx.ignore()
|
||||
return lx.pop()
|
||||
}
|
||||
lx.backup()
|
||||
}
|
||||
}
|
||||
return lexMultilineString
|
||||
}
|
||||
|
||||
// lexRawString consumes a raw string. Nothing can be escaped in such a string.
|
||||
// It assumes that the beginning "'" has already been consumed and ignored.
|
||||
func lexRawString(lx *lexer) stateFn {
|
||||
r := lx.next()
|
||||
switch {
|
||||
case isNL(r):
|
||||
return lx.errorf("Strings cannot contain new lines.")
|
||||
case r == rawStringEnd:
|
||||
lx.backup()
|
||||
lx.emit(itemRawString)
|
||||
lx.next()
|
||||
lx.ignore()
|
||||
return lx.pop()
|
||||
}
|
||||
return lexRawString
|
||||
}
|
||||
|
||||
// lexMultilineRawString consumes a raw string. Nothing can be escaped in such
|
||||
// a string. It assumes that the beginning "'" has already been consumed and
|
||||
// ignored.
|
||||
func lexMultilineRawString(lx *lexer) stateFn {
|
||||
r := lx.next()
|
||||
switch {
|
||||
case r == rawStringEnd:
|
||||
if lx.accept(rawStringEnd) {
|
||||
if lx.accept(rawStringEnd) {
|
||||
lx.backup()
|
||||
lx.backup()
|
||||
lx.backup()
|
||||
lx.emit(itemRawMultilineString)
|
||||
lx.next()
|
||||
lx.next()
|
||||
lx.next()
|
||||
lx.ignore()
|
||||
return lx.pop()
|
||||
}
|
||||
lx.backup()
|
||||
}
|
||||
}
|
||||
return lexMultilineRawString
|
||||
}
|
||||
|
||||
// lexMultilineStringEscape consumes an escaped character. It assumes that the
|
||||
// preceding '\\' has already been consumed.
|
||||
func lexMultilineStringEscape(lx *lexer) stateFn {
|
||||
// Handle the special case first:
|
||||
if isNL(lx.next()) {
|
||||
return lexMultilineString
|
||||
} else {
|
||||
lx.backup()
|
||||
lx.push(lexMultilineString)
|
||||
return lexStringEscape(lx)
|
||||
}
|
||||
}
|
||||
|
||||
func lexStringEscape(lx *lexer) stateFn {
|
||||
r := lx.next()
|
||||
switch r {
|
||||
case 'b':
|
||||
fallthrough
|
||||
case 't':
|
||||
fallthrough
|
||||
case 'n':
|
||||
fallthrough
|
||||
case 'f':
|
||||
fallthrough
|
||||
case 'r':
|
||||
fallthrough
|
||||
case '"':
|
||||
fallthrough
|
||||
case '\\':
|
||||
return lx.pop()
|
||||
case 'u':
|
||||
return lexShortUnicodeEscape
|
||||
case 'U':
|
||||
return lexLongUnicodeEscape
|
||||
}
|
||||
return lx.errorf("Invalid escape character %q. Only the following "+
|
||||
"escape characters are allowed: "+
|
||||
"\\b, \\t, \\n, \\f, \\r, \\\", \\/, \\\\, "+
|
||||
"\\uXXXX and \\UXXXXXXXX.", r)
|
||||
}
|
||||
|
||||
func lexShortUnicodeEscape(lx *lexer) stateFn {
|
||||
var r rune
|
||||
for i := 0; i < 4; i++ {
|
||||
r = lx.next()
|
||||
if !isHexadecimal(r) {
|
||||
return lx.errorf("Expected four hexadecimal digits after '\\u', "+
|
||||
"but got '%s' instead.", lx.current())
|
||||
}
|
||||
}
|
||||
return lx.pop()
|
||||
}
|
||||
|
||||
func lexLongUnicodeEscape(lx *lexer) stateFn {
|
||||
var r rune
|
||||
for i := 0; i < 8; i++ {
|
||||
r = lx.next()
|
||||
if !isHexadecimal(r) {
|
||||
return lx.errorf("Expected eight hexadecimal digits after '\\U', "+
|
||||
"but got '%s' instead.", lx.current())
|
||||
}
|
||||
}
|
||||
return lx.pop()
|
||||
}
|
||||
|
||||
// lexNumberOrDateStart consumes either a (positive) integer, float or
|
||||
// datetime. It assumes that NO negative sign has been consumed.
|
||||
func lexNumberOrDateStart(lx *lexer) stateFn {
|
||||
r := lx.next()
|
||||
if !isDigit(r) {
|
||||
if r == '.' {
|
||||
return lx.errorf("Floats must start with a digit, not '.'.")
|
||||
} else {
|
||||
return lx.errorf("Expected a digit but got %q.", r)
|
||||
}
|
||||
}
|
||||
return lexNumberOrDate
|
||||
}
|
||||
|
||||
// lexNumberOrDate consumes either a (positive) integer, float or datetime.
|
||||
func lexNumberOrDate(lx *lexer) stateFn {
|
||||
r := lx.next()
|
||||
switch {
|
||||
case r == '-':
|
||||
if lx.pos-lx.start != 5 {
|
||||
return lx.errorf("All ISO8601 dates must be in full Zulu form.")
|
||||
}
|
||||
return lexDateAfterYear
|
||||
case isDigit(r):
|
||||
return lexNumberOrDate
|
||||
case r == '.':
|
||||
return lexFloatStart
|
||||
}
|
||||
|
||||
lx.backup()
|
||||
lx.emit(itemInteger)
|
||||
return lx.pop()
|
||||
}
|
||||
|
||||
// lexDateAfterYear consumes a full Zulu Datetime in ISO8601 format.
|
||||
// It assumes that "YYYY-" has already been consumed.
|
||||
func lexDateAfterYear(lx *lexer) stateFn {
|
||||
formats := []rune{
|
||||
// digits are '0'.
|
||||
// everything else is direct equality.
|
||||
'0', '0', '-', '0', '0',
|
||||
'T',
|
||||
'0', '0', ':', '0', '0', ':', '0', '0',
|
||||
'Z',
|
||||
}
|
||||
for _, f := range formats {
|
||||
r := lx.next()
|
||||
if f == '0' {
|
||||
if !isDigit(r) {
|
||||
return lx.errorf("Expected digit in ISO8601 datetime, "+
|
||||
"but found %q instead.", r)
|
||||
}
|
||||
} else if f != r {
|
||||
return lx.errorf("Expected %q in ISO8601 datetime, "+
|
||||
"but found %q instead.", f, r)
|
||||
}
|
||||
}
|
||||
lx.emit(itemDatetime)
|
||||
return lx.pop()
|
||||
}
|
||||
|
||||
// lexNumberStart consumes either an integer or a float. It assumes that
|
||||
// a negative sign has already been read, but that *no* digits have been
|
||||
// consumed. lexNumberStart will move to the appropriate integer or float
|
||||
// states.
|
||||
func lexNumberStart(lx *lexer) stateFn {
|
||||
// we MUST see a digit. Even floats have to start with a digit.
|
||||
r := lx.next()
|
||||
if !isDigit(r) {
|
||||
if r == '.' {
|
||||
return lx.errorf("Floats must start with a digit, not '.'.")
|
||||
} else {
|
||||
return lx.errorf("Expected a digit but got %q.", r)
|
||||
}
|
||||
}
|
||||
return lexNumber
|
||||
}
|
||||
|
||||
// lexNumber consumes an integer or a float after seeing the first digit.
|
||||
func lexNumber(lx *lexer) stateFn {
|
||||
r := lx.next()
|
||||
switch {
|
||||
case isDigit(r):
|
||||
return lexNumber
|
||||
case r == '.':
|
||||
return lexFloatStart
|
||||
}
|
||||
|
||||
lx.backup()
|
||||
lx.emit(itemInteger)
|
||||
return lx.pop()
|
||||
}
|
||||
|
||||
// lexFloatStart starts the consumption of digits of a float after a '.'.
|
||||
// Namely, at least one digit is required.
|
||||
func lexFloatStart(lx *lexer) stateFn {
|
||||
r := lx.next()
|
||||
if !isDigit(r) {
|
||||
return lx.errorf("Floats must have a digit after the '.', but got "+
|
||||
"%q instead.", r)
|
||||
}
|
||||
return lexFloat
|
||||
}
|
||||
|
||||
// lexFloat consumes the digits of a float after a '.'.
|
||||
// Assumes that one digit has been consumed after a '.' already.
|
||||
func lexFloat(lx *lexer) stateFn {
|
||||
r := lx.next()
|
||||
if isDigit(r) {
|
||||
return lexFloat
|
||||
}
|
||||
|
||||
lx.backup()
|
||||
lx.emit(itemFloat)
|
||||
return lx.pop()
|
||||
}
|
||||
|
||||
// lexConst consumes the s[1:] in s. It assumes that s[0] has already been
|
||||
// consumed.
|
||||
func lexConst(lx *lexer, s string) stateFn {
|
||||
for i := range s[1:] {
|
||||
if r := lx.next(); r != rune(s[i+1]) {
|
||||
return lx.errorf("Expected %q, but found %q instead.", s[:i+1],
|
||||
s[:i]+string(r))
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// lexTrue consumes the "rue" in "true". It assumes that 't' has already
|
||||
// been consumed.
|
||||
func lexTrue(lx *lexer) stateFn {
|
||||
if fn := lexConst(lx, "true"); fn != nil {
|
||||
return fn
|
||||
}
|
||||
lx.emit(itemBool)
|
||||
return lx.pop()
|
||||
}
|
||||
|
||||
// lexFalse consumes the "alse" in "false". It assumes that 'f' has already
|
||||
// been consumed.
|
||||
func lexFalse(lx *lexer) stateFn {
|
||||
if fn := lexConst(lx, "false"); fn != nil {
|
||||
return fn
|
||||
}
|
||||
lx.emit(itemBool)
|
||||
return lx.pop()
|
||||
}
|
||||
|
||||
// lexCommentStart begins the lexing of a comment. It will emit
|
||||
// itemCommentStart and consume no characters, passing control to lexComment.
|
||||
func lexCommentStart(lx *lexer) stateFn {
|
||||
lx.ignore()
|
||||
lx.emit(itemCommentStart)
|
||||
return lexComment
|
||||
}
|
||||
|
||||
// lexComment lexes an entire comment. It assumes that '#' has been consumed.
|
||||
// It will consume *up to* the first new line character, and pass control
|
||||
// back to the last state on the stack.
|
||||
func lexComment(lx *lexer) stateFn {
|
||||
r := lx.peek()
|
||||
if isNL(r) || r == eof {
|
||||
lx.emit(itemText)
|
||||
return lx.pop()
|
||||
}
|
||||
lx.next()
|
||||
return lexComment
|
||||
}
|
||||
|
||||
// lexSkip ignores all slurped input and moves on to the next state.
|
||||
func lexSkip(lx *lexer, nextState stateFn) stateFn {
|
||||
return func(lx *lexer) stateFn {
|
||||
lx.ignore()
|
||||
return nextState
|
||||
}
|
||||
}
|
||||
|
||||
// isWhitespace returns true if `r` is a whitespace character according
|
||||
// to the spec.
|
||||
func isWhitespace(r rune) bool {
|
||||
return r == '\t' || r == ' '
|
||||
}
|
||||
|
||||
func isNL(r rune) bool {
|
||||
return r == '\n' || r == '\r'
|
||||
}
|
||||
|
||||
func isDigit(r rune) bool {
|
||||
return r >= '0' && r <= '9'
|
||||
}
|
||||
|
||||
func isHexadecimal(r rune) bool {
|
||||
return (r >= '0' && r <= '9') ||
|
||||
(r >= 'a' && r <= 'f') ||
|
||||
(r >= 'A' && r <= 'F')
|
||||
}
|
||||
|
||||
func isBareKeyChar(r rune) bool {
|
||||
return (r >= 'A' && r <= 'Z') ||
|
||||
(r >= 'a' && r <= 'z') ||
|
||||
(r >= '0' && r <= '9') ||
|
||||
r == '_' ||
|
||||
r == '-'
|
||||
}
|
||||
|
||||
func (itype itemType) String() string {
|
||||
switch itype {
|
||||
case itemError:
|
||||
return "Error"
|
||||
case itemNIL:
|
||||
return "NIL"
|
||||
case itemEOF:
|
||||
return "EOF"
|
||||
case itemText:
|
||||
return "Text"
|
||||
case itemString:
|
||||
return "String"
|
||||
case itemRawString:
|
||||
return "String"
|
||||
case itemMultilineString:
|
||||
return "String"
|
||||
case itemRawMultilineString:
|
||||
return "String"
|
||||
case itemBool:
|
||||
return "Bool"
|
||||
case itemInteger:
|
||||
return "Integer"
|
||||
case itemFloat:
|
||||
return "Float"
|
||||
case itemDatetime:
|
||||
return "DateTime"
|
||||
case itemTableStart:
|
||||
return "TableStart"
|
||||
case itemTableEnd:
|
||||
return "TableEnd"
|
||||
case itemKeyStart:
|
||||
return "KeyStart"
|
||||
case itemArray:
|
||||
return "Array"
|
||||
case itemArrayEnd:
|
||||
return "ArrayEnd"
|
||||
case itemCommentStart:
|
||||
return "CommentStart"
|
||||
}
|
||||
panic(fmt.Sprintf("BUG: Unknown type '%d'.", int(itype)))
|
||||
}
|
||||
|
||||
func (item item) String() string {
|
||||
return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val)
|
||||
}
|
493
vendor/github.com/BurntSushi/toml/parse.go
generated
vendored
493
vendor/github.com/BurntSushi/toml/parse.go
generated
vendored
@ -1,493 +0,0 @@
|
||||
package toml
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type parser struct {
|
||||
mapping map[string]interface{}
|
||||
types map[string]tomlType
|
||||
lx *lexer
|
||||
|
||||
// A list of keys in the order that they appear in the TOML data.
|
||||
ordered []Key
|
||||
|
||||
// the full key for the current hash in scope
|
||||
context Key
|
||||
|
||||
// the base key name for everything except hashes
|
||||
currentKey string
|
||||
|
||||
// rough approximation of line number
|
||||
approxLine int
|
||||
|
||||
// A map of 'key.group.names' to whether they were created implicitly.
|
||||
implicits map[string]bool
|
||||
}
|
||||
|
||||
type parseError string
|
||||
|
||||
func (pe parseError) Error() string {
|
||||
return string(pe)
|
||||
}
|
||||
|
||||
func parse(data string) (p *parser, err error) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
var ok bool
|
||||
if err, ok = r.(parseError); ok {
|
||||
return
|
||||
}
|
||||
panic(r)
|
||||
}
|
||||
}()
|
||||
|
||||
p = &parser{
|
||||
mapping: make(map[string]interface{}),
|
||||
types: make(map[string]tomlType),
|
||||
lx: lex(data),
|
||||
ordered: make([]Key, 0),
|
||||
implicits: make(map[string]bool),
|
||||
}
|
||||
for {
|
||||
item := p.next()
|
||||
if item.typ == itemEOF {
|
||||
break
|
||||
}
|
||||
p.topLevel(item)
|
||||
}
|
||||
|
||||
return p, nil
|
||||
}
|
||||
|
||||
func (p *parser) panicf(format string, v ...interface{}) {
|
||||
msg := fmt.Sprintf("Near line %d (last key parsed '%s'): %s",
|
||||
p.approxLine, p.current(), fmt.Sprintf(format, v...))
|
||||
panic(parseError(msg))
|
||||
}
|
||||
|
||||
func (p *parser) next() item {
|
||||
it := p.lx.nextItem()
|
||||
if it.typ == itemError {
|
||||
p.panicf("%s", it.val)
|
||||
}
|
||||
return it
|
||||
}
|
||||
|
||||
func (p *parser) bug(format string, v ...interface{}) {
|
||||
log.Panicf("BUG: %s\n\n", fmt.Sprintf(format, v...))
|
||||
}
|
||||
|
||||
func (p *parser) expect(typ itemType) item {
|
||||
it := p.next()
|
||||
p.assertEqual(typ, it.typ)
|
||||
return it
|
||||
}
|
||||
|
||||
func (p *parser) assertEqual(expected, got itemType) {
|
||||
if expected != got {
|
||||
p.bug("Expected '%s' but got '%s'.", expected, got)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *parser) topLevel(item item) {
|
||||
switch item.typ {
|
||||
case itemCommentStart:
|
||||
p.approxLine = item.line
|
||||
p.expect(itemText)
|
||||
case itemTableStart:
|
||||
kg := p.next()
|
||||
p.approxLine = kg.line
|
||||
|
||||
var key Key
|
||||
for ; kg.typ != itemTableEnd && kg.typ != itemEOF; kg = p.next() {
|
||||
key = append(key, p.keyString(kg))
|
||||
}
|
||||
p.assertEqual(itemTableEnd, kg.typ)
|
||||
|
||||
p.establishContext(key, false)
|
||||
p.setType("", tomlHash)
|
||||
p.ordered = append(p.ordered, key)
|
||||
case itemArrayTableStart:
|
||||
kg := p.next()
|
||||
p.approxLine = kg.line
|
||||
|
||||
var key Key
|
||||
for ; kg.typ != itemArrayTableEnd && kg.typ != itemEOF; kg = p.next() {
|
||||
key = append(key, p.keyString(kg))
|
||||
}
|
||||
p.assertEqual(itemArrayTableEnd, kg.typ)
|
||||
|
||||
p.establishContext(key, true)
|
||||
p.setType("", tomlArrayHash)
|
||||
p.ordered = append(p.ordered, key)
|
||||
case itemKeyStart:
|
||||
kname := p.next()
|
||||
p.approxLine = kname.line
|
||||
p.currentKey = p.keyString(kname)
|
||||
|
||||
val, typ := p.value(p.next())
|
||||
p.setValue(p.currentKey, val)
|
||||
p.setType(p.currentKey, typ)
|
||||
p.ordered = append(p.ordered, p.context.add(p.currentKey))
|
||||
p.currentKey = ""
|
||||
default:
|
||||
p.bug("Unexpected type at top level: %s", item.typ)
|
||||
}
|
||||
}
|
||||
|
||||
// Gets a string for a key (or part of a key in a table name).
|
||||
func (p *parser) keyString(it item) string {
|
||||
switch it.typ {
|
||||
case itemText:
|
||||
return it.val
|
||||
case itemString, itemMultilineString,
|
||||
itemRawString, itemRawMultilineString:
|
||||
s, _ := p.value(it)
|
||||
return s.(string)
|
||||
default:
|
||||
p.bug("Unexpected key type: %s", it.typ)
|
||||
panic("unreachable")
|
||||
}
|
||||
}
|
||||
|
||||
// value translates an expected value from the lexer into a Go value wrapped
|
||||
// as an empty interface.
|
||||
func (p *parser) value(it item) (interface{}, tomlType) {
|
||||
switch it.typ {
|
||||
case itemString:
|
||||
return p.replaceEscapes(it.val), p.typeOfPrimitive(it)
|
||||
case itemMultilineString:
|
||||
trimmed := stripFirstNewline(stripEscapedWhitespace(it.val))
|
||||
return p.replaceEscapes(trimmed), p.typeOfPrimitive(it)
|
||||
case itemRawString:
|
||||
return it.val, p.typeOfPrimitive(it)
|
||||
case itemRawMultilineString:
|
||||
return stripFirstNewline(it.val), p.typeOfPrimitive(it)
|
||||
case itemBool:
|
||||
switch it.val {
|
||||
case "true":
|
||||
return true, p.typeOfPrimitive(it)
|
||||
case "false":
|
||||
return false, p.typeOfPrimitive(it)
|
||||
}
|
||||
p.bug("Expected boolean value, but got '%s'.", it.val)
|
||||
case itemInteger:
|
||||
num, err := strconv.ParseInt(it.val, 10, 64)
|
||||
if err != nil {
|
||||
// See comment below for floats describing why we make a
|
||||
// distinction between a bug and a user error.
|
||||
if e, ok := err.(*strconv.NumError); ok &&
|
||||
e.Err == strconv.ErrRange {
|
||||
|
||||
p.panicf("Integer '%s' is out of the range of 64-bit "+
|
||||
"signed integers.", it.val)
|
||||
} else {
|
||||
p.bug("Expected integer value, but got '%s'.", it.val)
|
||||
}
|
||||
}
|
||||
return num, p.typeOfPrimitive(it)
|
||||
case itemFloat:
|
||||
num, err := strconv.ParseFloat(it.val, 64)
|
||||
if err != nil {
|
||||
// Distinguish float values. Normally, it'd be a bug if the lexer
|
||||
// provides an invalid float, but it's possible that the float is
|
||||
// out of range of valid values (which the lexer cannot determine).
|
||||
// So mark the former as a bug but the latter as a legitimate user
|
||||
// error.
|
||||
//
|
||||
// This is also true for integers.
|
||||
if e, ok := err.(*strconv.NumError); ok &&
|
||||
e.Err == strconv.ErrRange {
|
||||
|
||||
p.panicf("Float '%s' is out of the range of 64-bit "+
|
||||
"IEEE-754 floating-point numbers.", it.val)
|
||||
} else {
|
||||
p.bug("Expected float value, but got '%s'.", it.val)
|
||||
}
|
||||
}
|
||||
return num, p.typeOfPrimitive(it)
|
||||
case itemDatetime:
|
||||
t, err := time.Parse("2006-01-02T15:04:05Z", it.val)
|
||||
if err != nil {
|
||||
p.panicf("Invalid RFC3339 Zulu DateTime: '%s'.", it.val)
|
||||
}
|
||||
return t, p.typeOfPrimitive(it)
|
||||
case itemArray:
|
||||
array := make([]interface{}, 0)
|
||||
types := make([]tomlType, 0)
|
||||
|
||||
for it = p.next(); it.typ != itemArrayEnd; it = p.next() {
|
||||
if it.typ == itemCommentStart {
|
||||
p.expect(itemText)
|
||||
continue
|
||||
}
|
||||
|
||||
val, typ := p.value(it)
|
||||
array = append(array, val)
|
||||
types = append(types, typ)
|
||||
}
|
||||
return array, p.typeOfArray(types)
|
||||
}
|
||||
p.bug("Unexpected value type: %s", it.typ)
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
// establishContext sets the current context of the parser,
|
||||
// where the context is either a hash or an array of hashes. Which one is
|
||||
// set depends on the value of the `array` parameter.
|
||||
//
|
||||
// Establishing the context also makes sure that the key isn't a duplicate, and
|
||||
// will create implicit hashes automatically.
|
||||
func (p *parser) establishContext(key Key, array bool) {
|
||||
var ok bool
|
||||
|
||||
// Always start at the top level and drill down for our context.
|
||||
hashContext := p.mapping
|
||||
keyContext := make(Key, 0)
|
||||
|
||||
// We only need implicit hashes for key[0:-1]
|
||||
for _, k := range key[0 : len(key)-1] {
|
||||
_, ok = hashContext[k]
|
||||
keyContext = append(keyContext, k)
|
||||
|
||||
// No key? Make an implicit hash and move on.
|
||||
if !ok {
|
||||
p.addImplicit(keyContext)
|
||||
hashContext[k] = make(map[string]interface{})
|
||||
}
|
||||
|
||||
// If the hash context is actually an array of tables, then set
|
||||
// the hash context to the last element in that array.
|
||||
//
|
||||
// Otherwise, it better be a table, since this MUST be a key group (by
|
||||
// virtue of it not being the last element in a key).
|
||||
switch t := hashContext[k].(type) {
|
||||
case []map[string]interface{}:
|
||||
hashContext = t[len(t)-1]
|
||||
case map[string]interface{}:
|
||||
hashContext = t
|
||||
default:
|
||||
p.panicf("Key '%s' was already created as a hash.", keyContext)
|
||||
}
|
||||
}
|
||||
|
||||
p.context = keyContext
|
||||
if array {
|
||||
// If this is the first element for this array, then allocate a new
|
||||
// list of tables for it.
|
||||
k := key[len(key)-1]
|
||||
if _, ok := hashContext[k]; !ok {
|
||||
hashContext[k] = make([]map[string]interface{}, 0, 5)
|
||||
}
|
||||
|
||||
// Add a new table. But make sure the key hasn't already been used
|
||||
// for something else.
|
||||
if hash, ok := hashContext[k].([]map[string]interface{}); ok {
|
||||
hashContext[k] = append(hash, make(map[string]interface{}))
|
||||
} else {
|
||||
p.panicf("Key '%s' was already created and cannot be used as "+
|
||||
"an array.", keyContext)
|
||||
}
|
||||
} else {
|
||||
p.setValue(key[len(key)-1], make(map[string]interface{}))
|
||||
}
|
||||
p.context = append(p.context, key[len(key)-1])
|
||||
}
|
||||
|
||||
// setValue sets the given key to the given value in the current context.
|
||||
// It will make sure that the key hasn't already been defined, account for
|
||||
// implicit key groups.
|
||||
func (p *parser) setValue(key string, value interface{}) {
|
||||
var tmpHash interface{}
|
||||
var ok bool
|
||||
|
||||
hash := p.mapping
|
||||
keyContext := make(Key, 0)
|
||||
for _, k := range p.context {
|
||||
keyContext = append(keyContext, k)
|
||||
if tmpHash, ok = hash[k]; !ok {
|
||||
p.bug("Context for key '%s' has not been established.", keyContext)
|
||||
}
|
||||
switch t := tmpHash.(type) {
|
||||
case []map[string]interface{}:
|
||||
// The context is a table of hashes. Pick the most recent table
|
||||
// defined as the current hash.
|
||||
hash = t[len(t)-1]
|
||||
case map[string]interface{}:
|
||||
hash = t
|
||||
default:
|
||||
p.bug("Expected hash to have type 'map[string]interface{}', but "+
|
||||
"it has '%T' instead.", tmpHash)
|
||||
}
|
||||
}
|
||||
keyContext = append(keyContext, key)
|
||||
|
||||
if _, ok := hash[key]; ok {
|
||||
// Typically, if the given key has already been set, then we have
|
||||
// to raise an error since duplicate keys are disallowed. However,
|
||||
// it's possible that a key was previously defined implicitly. In this
|
||||
// case, it is allowed to be redefined concretely. (See the
|
||||
// `tests/valid/implicit-and-explicit-after.toml` test in `toml-test`.)
|
||||
//
|
||||
// But we have to make sure to stop marking it as an implicit. (So that
|
||||
// another redefinition provokes an error.)
|
||||
//
|
||||
// Note that since it has already been defined (as a hash), we don't
|
||||
// want to overwrite it. So our business is done.
|
||||
if p.isImplicit(keyContext) {
|
||||
p.removeImplicit(keyContext)
|
||||
return
|
||||
}
|
||||
|
||||
// Otherwise, we have a concrete key trying to override a previous
|
||||
// key, which is *always* wrong.
|
||||
p.panicf("Key '%s' has already been defined.", keyContext)
|
||||
}
|
||||
hash[key] = value
|
||||
}
|
||||
|
||||
// setType sets the type of a particular value at a given key.
|
||||
// It should be called immediately AFTER setValue.
|
||||
//
|
||||
// Note that if `key` is empty, then the type given will be applied to the
|
||||
// current context (which is either a table or an array of tables).
|
||||
func (p *parser) setType(key string, typ tomlType) {
|
||||
keyContext := make(Key, 0, len(p.context)+1)
|
||||
for _, k := range p.context {
|
||||
keyContext = append(keyContext, k)
|
||||
}
|
||||
if len(key) > 0 { // allow type setting for hashes
|
||||
keyContext = append(keyContext, key)
|
||||
}
|
||||
p.types[keyContext.String()] = typ
|
||||
}
|
||||
|
||||
// addImplicit sets the given Key as having been created implicitly.
|
||||
func (p *parser) addImplicit(key Key) {
|
||||
p.implicits[key.String()] = true
|
||||
}
|
||||
|
||||
// removeImplicit stops tagging the given key as having been implicitly
|
||||
// created.
|
||||
func (p *parser) removeImplicit(key Key) {
|
||||
p.implicits[key.String()] = false
|
||||
}
|
||||
|
||||
// isImplicit returns true if the key group pointed to by the key was created
|
||||
// implicitly.
|
||||
func (p *parser) isImplicit(key Key) bool {
|
||||
return p.implicits[key.String()]
|
||||
}
|
||||
|
||||
// current returns the full key name of the current context.
|
||||
func (p *parser) current() string {
|
||||
if len(p.currentKey) == 0 {
|
||||
return p.context.String()
|
||||
}
|
||||
if len(p.context) == 0 {
|
||||
return p.currentKey
|
||||
}
|
||||
return fmt.Sprintf("%s.%s", p.context, p.currentKey)
|
||||
}
|
||||
|
||||
func stripFirstNewline(s string) string {
|
||||
if len(s) == 0 || s[0] != '\n' {
|
||||
return s
|
||||
}
|
||||
return s[1:]
|
||||
}
|
||||
|
||||
func stripEscapedWhitespace(s string) string {
|
||||
esc := strings.Split(s, "\\\n")
|
||||
if len(esc) > 1 {
|
||||
for i := 1; i < len(esc); i++ {
|
||||
esc[i] = strings.TrimLeftFunc(esc[i], unicode.IsSpace)
|
||||
}
|
||||
}
|
||||
return strings.Join(esc, "")
|
||||
}
|
||||
|
||||
func (p *parser) replaceEscapes(str string) string {
|
||||
var replaced []rune
|
||||
s := []byte(str)
|
||||
r := 0
|
||||
for r < len(s) {
|
||||
if s[r] != '\\' {
|
||||
c, size := utf8.DecodeRune(s[r:])
|
||||
r += size
|
||||
replaced = append(replaced, c)
|
||||
continue
|
||||
}
|
||||
r += 1
|
||||
if r >= len(s) {
|
||||
p.bug("Escape sequence at end of string.")
|
||||
return ""
|
||||
}
|
||||
switch s[r] {
|
||||
default:
|
||||
p.bug("Expected valid escape code after \\, but got %q.", s[r])
|
||||
return ""
|
||||
case 'b':
|
||||
replaced = append(replaced, rune(0x0008))
|
||||
r += 1
|
||||
case 't':
|
||||
replaced = append(replaced, rune(0x0009))
|
||||
r += 1
|
||||
case 'n':
|
||||
replaced = append(replaced, rune(0x000A))
|
||||
r += 1
|
||||
case 'f':
|
||||
replaced = append(replaced, rune(0x000C))
|
||||
r += 1
|
||||
case 'r':
|
||||
replaced = append(replaced, rune(0x000D))
|
||||
r += 1
|
||||
case '"':
|
||||
replaced = append(replaced, rune(0x0022))
|
||||
r += 1
|
||||
case '\\':
|
||||
replaced = append(replaced, rune(0x005C))
|
||||
r += 1
|
||||
case 'u':
|
||||
// At this point, we know we have a Unicode escape of the form
|
||||
// `uXXXX` at [r, r+5). (Because the lexer guarantees this
|
||||
// for us.)
|
||||
escaped := p.asciiEscapeToUnicode(s[r+1 : r+5])
|
||||
replaced = append(replaced, escaped)
|
||||
r += 5
|
||||
case 'U':
|
||||
// At this point, we know we have a Unicode escape of the form
|
||||
// `uXXXX` at [r, r+9). (Because the lexer guarantees this
|
||||
// for us.)
|
||||
escaped := p.asciiEscapeToUnicode(s[r+1 : r+9])
|
||||
replaced = append(replaced, escaped)
|
||||
r += 9
|
||||
}
|
||||
}
|
||||
return string(replaced)
|
||||
}
|
||||
|
||||
func (p *parser) asciiEscapeToUnicode(bs []byte) rune {
|
||||
s := string(bs)
|
||||
hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32)
|
||||
if err != nil {
|
||||
p.bug("Could not parse '%s' as a hexadecimal number, but the "+
|
||||
"lexer claims it's OK: %s", s, err)
|
||||
}
|
||||
if !utf8.ValidRune(rune(hex)) {
|
||||
p.panicf("Escaped character '\\u%s' is not valid UTF-8.", s)
|
||||
}
|
||||
return rune(hex)
|
||||
}
|
||||
|
||||
func isStringType(ty itemType) bool {
|
||||
return ty == itemString || ty == itemMultilineString ||
|
||||
ty == itemRawString || ty == itemRawMultilineString
|
||||
}
|
1
vendor/github.com/BurntSushi/toml/session.vim
generated
vendored
1
vendor/github.com/BurntSushi/toml/session.vim
generated
vendored
@ -1 +0,0 @@
|
||||
au BufWritePost *.go silent!make tags > /dev/null 2>&1
|
91
vendor/github.com/BurntSushi/toml/type_check.go
generated
vendored
91
vendor/github.com/BurntSushi/toml/type_check.go
generated
vendored
@ -1,91 +0,0 @@
|
||||
package toml
|
||||
|
||||
// tomlType represents any Go type that corresponds to a TOML type.
|
||||
// While the first draft of the TOML spec has a simplistic type system that
|
||||
// probably doesn't need this level of sophistication, we seem to be militating
|
||||
// toward adding real composite types.
|
||||
type tomlType interface {
|
||||
typeString() string
|
||||
}
|
||||
|
||||
// typeEqual accepts any two types and returns true if they are equal.
|
||||
func typeEqual(t1, t2 tomlType) bool {
|
||||
if t1 == nil || t2 == nil {
|
||||
return false
|
||||
}
|
||||
return t1.typeString() == t2.typeString()
|
||||
}
|
||||
|
||||
func typeIsHash(t tomlType) bool {
|
||||
return typeEqual(t, tomlHash) || typeEqual(t, tomlArrayHash)
|
||||
}
|
||||
|
||||
type tomlBaseType string
|
||||
|
||||
func (btype tomlBaseType) typeString() string {
|
||||
return string(btype)
|
||||
}
|
||||
|
||||
func (btype tomlBaseType) String() string {
|
||||
return btype.typeString()
|
||||
}
|
||||
|
||||
var (
|
||||
tomlInteger tomlBaseType = "Integer"
|
||||
tomlFloat tomlBaseType = "Float"
|
||||
tomlDatetime tomlBaseType = "Datetime"
|
||||
tomlString tomlBaseType = "String"
|
||||
tomlBool tomlBaseType = "Bool"
|
||||
tomlArray tomlBaseType = "Array"
|
||||
tomlHash tomlBaseType = "Hash"
|
||||
tomlArrayHash tomlBaseType = "ArrayHash"
|
||||
)
|
||||
|
||||
// typeOfPrimitive returns a tomlType of any primitive value in TOML.
|
||||
// Primitive values are: Integer, Float, Datetime, String and Bool.
|
||||
//
|
||||
// Passing a lexer item other than the following will cause a BUG message
|
||||
// to occur: itemString, itemBool, itemInteger, itemFloat, itemDatetime.
|
||||
func (p *parser) typeOfPrimitive(lexItem item) tomlType {
|
||||
switch lexItem.typ {
|
||||
case itemInteger:
|
||||
return tomlInteger
|
||||
case itemFloat:
|
||||
return tomlFloat
|
||||
case itemDatetime:
|
||||
return tomlDatetime
|
||||
case itemString:
|
||||
return tomlString
|
||||
case itemMultilineString:
|
||||
return tomlString
|
||||
case itemRawString:
|
||||
return tomlString
|
||||
case itemRawMultilineString:
|
||||
return tomlString
|
||||
case itemBool:
|
||||
return tomlBool
|
||||
}
|
||||
p.bug("Cannot infer primitive type of lex item '%s'.", lexItem)
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
// typeOfArray returns a tomlType for an array given a list of types of its
|
||||
// values.
|
||||
//
|
||||
// In the current spec, if an array is homogeneous, then its type is always
|
||||
// "Array". If the array is not homogeneous, an error is generated.
|
||||
func (p *parser) typeOfArray(types []tomlType) tomlType {
|
||||
// Empty arrays are cool.
|
||||
if len(types) == 0 {
|
||||
return tomlArray
|
||||
}
|
||||
|
||||
theType := types[0]
|
||||
for _, t := range types[1:] {
|
||||
if !typeEqual(theType, t) {
|
||||
p.panicf("Array contains values of type '%s' and '%s', but "+
|
||||
"arrays must be homogeneous.", theType, t)
|
||||
}
|
||||
}
|
||||
return tomlArray
|
||||
}
|
241
vendor/github.com/BurntSushi/toml/type_fields.go
generated
vendored
241
vendor/github.com/BurntSushi/toml/type_fields.go
generated
vendored
@ -1,241 +0,0 @@
|
||||
package toml
|
||||
|
||||
// Struct field handling is adapted from code in encoding/json:
|
||||
//
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the Go distribution.
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// A field represents a single field found in a struct.
|
||||
type field struct {
|
||||
name string // the name of the field (`toml` tag included)
|
||||
tag bool // whether field has a `toml` tag
|
||||
index []int // represents the depth of an anonymous field
|
||||
typ reflect.Type // the type of the field
|
||||
}
|
||||
|
||||
// byName sorts field by name, breaking ties with depth,
|
||||
// then breaking ties with "name came from toml tag", then
|
||||
// breaking ties with index sequence.
|
||||
type byName []field
|
||||
|
||||
func (x byName) Len() int { return len(x) }
|
||||
|
||||
func (x byName) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
|
||||
|
||||
func (x byName) Less(i, j int) bool {
|
||||
if x[i].name != x[j].name {
|
||||
return x[i].name < x[j].name
|
||||
}
|
||||
if len(x[i].index) != len(x[j].index) {
|
||||
return len(x[i].index) < len(x[j].index)
|
||||
}
|
||||
if x[i].tag != x[j].tag {
|
||||
return x[i].tag
|
||||
}
|
||||
return byIndex(x).Less(i, j)
|
||||
}
|
||||
|
||||
// byIndex sorts field by index sequence.
|
||||
type byIndex []field
|
||||
|
||||
func (x byIndex) Len() int { return len(x) }
|
||||
|
||||
func (x byIndex) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
|
||||
|
||||
func (x byIndex) Less(i, j int) bool {
|
||||
for k, xik := range x[i].index {
|
||||
if k >= len(x[j].index) {
|
||||
return false
|
||||
}
|
||||
if xik != x[j].index[k] {
|
||||
return xik < x[j].index[k]
|
||||
}
|
||||
}
|
||||
return len(x[i].index) < len(x[j].index)
|
||||
}
|
||||
|
||||
// typeFields returns a list of fields that TOML should recognize for the given
|
||||
// type. The algorithm is breadth-first search over the set of structs to
|
||||
// include - the top struct and then any reachable anonymous structs.
|
||||
func typeFields(t reflect.Type) []field {
|
||||
// Anonymous fields to explore at the current level and the next.
|
||||
current := []field{}
|
||||
next := []field{{typ: t}}
|
||||
|
||||
// Count of queued names for current level and the next.
|
||||
count := map[reflect.Type]int{}
|
||||
nextCount := map[reflect.Type]int{}
|
||||
|
||||
// Types already visited at an earlier level.
|
||||
visited := map[reflect.Type]bool{}
|
||||
|
||||
// Fields found.
|
||||
var fields []field
|
||||
|
||||
for len(next) > 0 {
|
||||
current, next = next, current[:0]
|
||||
count, nextCount = nextCount, map[reflect.Type]int{}
|
||||
|
||||
for _, f := range current {
|
||||
if visited[f.typ] {
|
||||
continue
|
||||
}
|
||||
visited[f.typ] = true
|
||||
|
||||
// Scan f.typ for fields to include.
|
||||
for i := 0; i < f.typ.NumField(); i++ {
|
||||
sf := f.typ.Field(i)
|
||||
if sf.PkgPath != "" && !sf.Anonymous { // unexported
|
||||
continue
|
||||
}
|
||||
name, _ := getOptions(sf.Tag.Get("toml"))
|
||||
if name == "-" {
|
||||
continue
|
||||
}
|
||||
index := make([]int, len(f.index)+1)
|
||||
copy(index, f.index)
|
||||
index[len(f.index)] = i
|
||||
|
||||
ft := sf.Type
|
||||
if ft.Name() == "" && ft.Kind() == reflect.Ptr {
|
||||
// Follow pointer.
|
||||
ft = ft.Elem()
|
||||
}
|
||||
|
||||
// Record found field and index sequence.
|
||||
if name != "" || !sf.Anonymous || ft.Kind() != reflect.Struct {
|
||||
tagged := name != ""
|
||||
if name == "" {
|
||||
name = sf.Name
|
||||
}
|
||||
fields = append(fields, field{name, tagged, index, ft})
|
||||
if count[f.typ] > 1 {
|
||||
// If there were multiple instances, add a second,
|
||||
// so that the annihilation code will see a duplicate.
|
||||
// It only cares about the distinction between 1 or 2,
|
||||
// so don't bother generating any more copies.
|
||||
fields = append(fields, fields[len(fields)-1])
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Record new anonymous struct to explore in next round.
|
||||
nextCount[ft]++
|
||||
if nextCount[ft] == 1 {
|
||||
f := field{name: ft.Name(), index: index, typ: ft}
|
||||
next = append(next, f)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sort.Sort(byName(fields))
|
||||
|
||||
// Delete all fields that are hidden by the Go rules for embedded fields,
|
||||
// except that fields with TOML tags are promoted.
|
||||
|
||||
// The fields are sorted in primary order of name, secondary order
|
||||
// of field index length. Loop over names; for each name, delete
|
||||
// hidden fields by choosing the one dominant field that survives.
|
||||
out := fields[:0]
|
||||
for advance, i := 0, 0; i < len(fields); i += advance {
|
||||
// One iteration per name.
|
||||
// Find the sequence of fields with the name of this first field.
|
||||
fi := fields[i]
|
||||
name := fi.name
|
||||
for advance = 1; i+advance < len(fields); advance++ {
|
||||
fj := fields[i+advance]
|
||||
if fj.name != name {
|
||||
break
|
||||
}
|
||||
}
|
||||
if advance == 1 { // Only one field with this name
|
||||
out = append(out, fi)
|
||||
continue
|
||||
}
|
||||
dominant, ok := dominantField(fields[i : i+advance])
|
||||
if ok {
|
||||
out = append(out, dominant)
|
||||
}
|
||||
}
|
||||
|
||||
fields = out
|
||||
sort.Sort(byIndex(fields))
|
||||
|
||||
return fields
|
||||
}
|
||||
|
||||
// dominantField looks through the fields, all of which are known to
|
||||
// have the same name, to find the single field that dominates the
|
||||
// others using Go's embedding rules, modified by the presence of
|
||||
// TOML tags. If there are multiple top-level fields, the boolean
|
||||
// will be false: This condition is an error in Go and we skip all
|
||||
// the fields.
|
||||
func dominantField(fields []field) (field, bool) {
|
||||
// The fields are sorted in increasing index-length order. The winner
|
||||
// must therefore be one with the shortest index length. Drop all
|
||||
// longer entries, which is easy: just truncate the slice.
|
||||
length := len(fields[0].index)
|
||||
tagged := -1 // Index of first tagged field.
|
||||
for i, f := range fields {
|
||||
if len(f.index) > length {
|
||||
fields = fields[:i]
|
||||
break
|
||||
}
|
||||
if f.tag {
|
||||
if tagged >= 0 {
|
||||
// Multiple tagged fields at the same level: conflict.
|
||||
// Return no field.
|
||||
return field{}, false
|
||||
}
|
||||
tagged = i
|
||||
}
|
||||
}
|
||||
if tagged >= 0 {
|
||||
return fields[tagged], true
|
||||
}
|
||||
// All remaining fields have the same length. If there's more than one,
|
||||
// we have a conflict (two fields named "X" at the same level) and we
|
||||
// return no field.
|
||||
if len(fields) > 1 {
|
||||
return field{}, false
|
||||
}
|
||||
return fields[0], true
|
||||
}
|
||||
|
||||
var fieldCache struct {
|
||||
sync.RWMutex
|
||||
m map[reflect.Type][]field
|
||||
}
|
||||
|
||||
// cachedTypeFields is like typeFields but uses a cache to avoid repeated work.
|
||||
func cachedTypeFields(t reflect.Type) []field {
|
||||
fieldCache.RLock()
|
||||
f := fieldCache.m[t]
|
||||
fieldCache.RUnlock()
|
||||
if f != nil {
|
||||
return f
|
||||
}
|
||||
|
||||
// Compute fields without lock.
|
||||
// Might duplicate effort but won't hold other computations back.
|
||||
f = typeFields(t)
|
||||
if f == nil {
|
||||
f = []field{}
|
||||
}
|
||||
|
||||
fieldCache.Lock()
|
||||
if fieldCache.m == nil {
|
||||
fieldCache.m = map[reflect.Type][]field{}
|
||||
}
|
||||
fieldCache.m[t] = f
|
||||
fieldCache.Unlock()
|
||||
return f
|
||||
}
|
16
vendor/github.com/blevesearch/bleve/CONTRIBUTING.md
generated
vendored
Normal file
16
vendor/github.com/blevesearch/bleve/CONTRIBUTING.md
generated
vendored
Normal file
@ -0,0 +1,16 @@
|
||||
# Contributing to Bleve
|
||||
|
||||
We look forward to your contributions, but ask that you first review these guidelines.
|
||||
|
||||
### Sign the CLA
|
||||
|
||||
As Bleve is a Couchbase project we require contributors accept the [Couchbase Contributor License Agreement](http://review.couchbase.org/static/individual_agreement.html). To sign this agreement log into the Couchbase [code review tool](http://review.couchbase.org/). The Bleve project does not use this code review tool but it is still used to track acceptance of the contributor license agreements.
|
||||
|
||||
### Submitting a Pull Request
|
||||
|
||||
All types of contributions are welcome, but please keep the following in mind:
|
||||
|
||||
- If you're planning a large change, you should really discuss it in a github issue or on the google group first. This helps avoid duplicate effort and spending time on something that may not be merged.
|
||||
- Existing tests should continue to pass, new tests for the contribution are nice to have.
|
||||
- All code should have gone through `go fmt`
|
||||
- All code should pass `go vet`
|
54
vendor/github.com/blevesearch/bleve/README.md
generated
vendored
54
vendor/github.com/blevesearch/bleve/README.md
generated
vendored
@ -1,10 +1,14 @@
|
||||
# ![bleve](docs/bleve.png) bleve
|
||||
|
||||
[![Build Status](https://travis-ci.org/blevesearch/bleve.svg?branch=master)](https://travis-ci.org/blevesearch/bleve) [![Coverage Status](https://coveralls.io/repos/blevesearch/bleve/badge.png?branch=master)](https://coveralls.io/r/blevesearch/bleve?branch=master) [![GoDoc](https://godoc.org/github.com/blevesearch/bleve?status.svg)](https://godoc.org/github.com/blevesearch/bleve) [![Join the chat at https://gitter.im/blevesearch/bleve](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/blevesearch/bleve?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
[![Build Status](https://travis-ci.org/blevesearch/bleve.svg?branch=master)](https://travis-ci.org/blevesearch/bleve) [![Coverage Status](https://coveralls.io/repos/blevesearch/bleve/badge.png?branch=master)](https://coveralls.io/r/blevesearch/bleve?branch=master) [![GoDoc](https://godoc.org/github.com/blevesearch/bleve?status.svg)](https://godoc.org/github.com/blevesearch/bleve)
|
||||
[![Join the chat at https://gitter.im/blevesearch/bleve](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/blevesearch/bleve?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
[![codebeat](https://codebeat.co/badges/38a7cbc9-9cf5-41c0-a315-0746178230f4)](https://codebeat.co/projects/github-com-blevesearch-bleve)
|
||||
[![Go Report Card](https://goreportcard.com/badge/blevesearch/bleve)](https://goreportcard.com/report/blevesearch/bleve)
|
||||
[![Sourcegraph](https://sourcegraph.com/github.com/blevesearch/bleve/-/badge.svg)](https://sourcegraph.com/github.com/blevesearch/bleve?badge) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
|
||||
|
||||
modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/)
|
||||
|
||||
Try out bleve live by [searching our wiki](http://wikisearch.blevesearch.com/search/).
|
||||
Try out bleve live by [searching the bleve website](http://www.blevesearch.com/search/?q=bleve).
|
||||
|
||||
## Features
|
||||
|
||||
@ -16,7 +20,7 @@ Try out bleve live by [searching our wiki](http://wikisearch.blevesearch.com/sea
|
||||
* Term, Phrase, Match, Match Phrase, Prefix
|
||||
* Conjunction, Disjunction, Boolean
|
||||
* Numeric Range, Date Range
|
||||
* Simple query [syntax](https://github.com/blevesearch/bleve/wiki/Query-String-Query) for human entry
|
||||
* Simple query [syntax](http://www.blevesearch.com/docs/Query-String-Query/) for human entry
|
||||
* tf-idf Scoring
|
||||
* Search result match highlighting
|
||||
* Supports Aggregating Facets:
|
||||
@ -30,32 +34,34 @@ Discuss usage and development of bleve in the [google group](https://groups.goog
|
||||
|
||||
## Indexing
|
||||
|
||||
message := struct{
|
||||
Id string
|
||||
From string
|
||||
Body string
|
||||
}{
|
||||
Id: "example",
|
||||
From: "marty.schoch@gmail.com",
|
||||
Body: "bleve indexing is easy",
|
||||
}
|
||||
```go
|
||||
message := struct{
|
||||
Id string
|
||||
From string
|
||||
Body string
|
||||
}{
|
||||
Id: "example",
|
||||
From: "marty.schoch@gmail.com",
|
||||
Body: "bleve indexing is easy",
|
||||
}
|
||||
|
||||
mapping := bleve.NewIndexMapping()
|
||||
index, err := bleve.New("example.bleve", mapping)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
index.Index(message.Id, message)
|
||||
mapping := bleve.NewIndexMapping()
|
||||
index, err := bleve.New("example.bleve", mapping)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
index.Index(message.Id, message)
|
||||
```
|
||||
|
||||
## Querying
|
||||
|
||||
index, _ := bleve.Open("example.bleve")
|
||||
query := bleve.NewQueryStringQuery("bleve")
|
||||
searchRequest := bleve.NewSearchRequest(query)
|
||||
searchResult, _ := index.Search(searchRequest)
|
||||
```go
|
||||
index, _ := bleve.Open("example.bleve")
|
||||
query := bleve.NewQueryStringQuery("bleve")
|
||||
searchRequest := bleve.NewSearchRequest(query)
|
||||
searchResult, _ := index.Search(searchRequest)
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
Apache License Version 2.0
|
||||
|
||||
|
||||
|
52
vendor/github.com/blevesearch/bleve/analysis/analyzer/standard/standard.go
generated
vendored
Normal file
52
vendor/github.com/blevesearch/bleve/analysis/analyzer/standard/standard.go
generated
vendored
Normal file
@ -0,0 +1,52 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package standard
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/lang/en"
|
||||
"github.com/blevesearch/bleve/analysis/token/lowercase"
|
||||
"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "standard"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
tokenizer, err := cache.TokenizerNamed(unicode.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stopEnFilter, err := cache.TokenFilterNamed(en.StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: tokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
toLowerFilter,
|
||||
stopEnFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
}
|
130
vendor/github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer/custom_analyzer.go
generated
vendored
130
vendor/github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer/custom_analyzer.go
generated
vendored
@ -1,130 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package standard_analyzer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "custom"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
|
||||
var err error
|
||||
var charFilters []analysis.CharFilter
|
||||
charFiltersNames, ok := config["char_filters"].([]string)
|
||||
if ok {
|
||||
charFilters, err = getCharFilters(charFiltersNames, cache)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
charFiltersNamesInterfaceSlice, ok := config["char_filters"].([]interface{})
|
||||
if ok {
|
||||
charFiltersNames, err := convertInterfaceSliceToStringSlice(charFiltersNamesInterfaceSlice, "char filter")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
charFilters, err = getCharFilters(charFiltersNames, cache)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tokenizerName, ok := config["tokenizer"].(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify tokenizer")
|
||||
}
|
||||
|
||||
tokenizer, err := cache.TokenizerNamed(tokenizerName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var tokenFilters []analysis.TokenFilter
|
||||
tokenFiltersNames, ok := config["token_filters"].([]string)
|
||||
if ok {
|
||||
tokenFilters, err = getTokenFilters(tokenFiltersNames, cache)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
tokenFiltersNamesInterfaceSlice, ok := config["token_filters"].([]interface{})
|
||||
if ok {
|
||||
tokenFiltersNames, err := convertInterfaceSliceToStringSlice(tokenFiltersNamesInterfaceSlice, "token filter")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tokenFilters, err = getTokenFilters(tokenFiltersNames, cache)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: tokenizer,
|
||||
}
|
||||
if charFilters != nil {
|
||||
rv.CharFilters = charFilters
|
||||
}
|
||||
if tokenFilters != nil {
|
||||
rv.TokenFilters = tokenFilters
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
}
|
||||
|
||||
func getCharFilters(charFilterNames []string, cache *registry.Cache) ([]analysis.CharFilter, error) {
|
||||
charFilters := make([]analysis.CharFilter, len(charFilterNames))
|
||||
for i, charFilterName := range charFilterNames {
|
||||
charFilter, err := cache.CharFilterNamed(charFilterName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
charFilters[i] = charFilter
|
||||
}
|
||||
|
||||
return charFilters, nil
|
||||
}
|
||||
|
||||
func getTokenFilters(tokenFilterNames []string, cache *registry.Cache) ([]analysis.TokenFilter, error) {
|
||||
tokenFilters := make([]analysis.TokenFilter, len(tokenFilterNames))
|
||||
for i, tokenFilterName := range tokenFilterNames {
|
||||
tokenFilter, err := cache.TokenFilterNamed(tokenFilterName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tokenFilters[i] = tokenFilter
|
||||
}
|
||||
|
||||
return tokenFilters, nil
|
||||
}
|
||||
|
||||
func convertInterfaceSliceToStringSlice(interfaceSlice []interface{}, objType string) ([]string, error) {
|
||||
stringSlice := make([]string, len(interfaceSlice))
|
||||
for i, interfaceObj := range interfaceSlice {
|
||||
stringObj, ok := interfaceObj.(string)
|
||||
if ok {
|
||||
stringSlice[i] = stringObj
|
||||
} else {
|
||||
return nil, fmt.Errorf(objType + " name must be a string")
|
||||
}
|
||||
}
|
||||
|
||||
return stringSlice, nil
|
||||
}
|
@ -1,49 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
// +build cld2 full
|
||||
|
||||
package detect_lang_analyzer
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/token_filters/cld2"
|
||||
"github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter"
|
||||
"github.com/blevesearch/bleve/analysis/tokenizers/single_token"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "detect_lang"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
keywordTokenizer, err := cache.TokenizerNamed(single_token.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
detectLangFilter, err := cache.TokenFilterNamed(cld2.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: keywordTokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
toLowerFilter,
|
||||
detectLangFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
}
|
@ -1,33 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package keyword_analyzer
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/tokenizers/single_token"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "keyword"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
keywordTokenizer, err := cache.TokenizerNamed(single_token.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: keywordTokenizer,
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
}
|
@ -1,41 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package simple_analyzer
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter"
|
||||
"github.com/blevesearch/bleve/analysis/tokenizers/unicode"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "simple"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
tokenizer, err := cache.TokenizerNamed(unicode.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: tokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
toLowerFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
}
|
@ -1,47 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package standard_analyzer
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/language/en"
|
||||
"github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter"
|
||||
"github.com/blevesearch/bleve/analysis/tokenizers/unicode"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "standard"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
tokenizer, err := cache.TokenizerNamed(unicode.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stopEnFilter, err := cache.TokenFilterNamed(en.StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: tokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
toLowerFilter,
|
||||
stopEnFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
}
|
@ -1,33 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package ignore_byte_array_converter
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
type IgnoreByteArrayConverter struct{}
|
||||
|
||||
func NewIgnoreByteArrayConverter() *IgnoreByteArrayConverter {
|
||||
return &IgnoreByteArrayConverter{}
|
||||
}
|
||||
|
||||
func (c *IgnoreByteArrayConverter) Convert(in []byte) (interface{}, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func Constructor(config map[string]interface{}, cache *registry.Cache) (analysis.ByteArrayConverter, error) {
|
||||
return NewIgnoreByteArrayConverter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterByteArrayConverter("ignore", Constructor)
|
||||
}
|
@ -1,40 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package json_byte_array_converter
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
type JSONByteArrayConverter struct{}
|
||||
|
||||
func NewJSONByteArrayConverter() *JSONByteArrayConverter {
|
||||
return &JSONByteArrayConverter{}
|
||||
}
|
||||
|
||||
func (c *JSONByteArrayConverter) Convert(in []byte) (interface{}, error) {
|
||||
var rv map[string]interface{}
|
||||
err := json.Unmarshal(in, &rv)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func Constructor(config map[string]interface{}, cache *registry.Cache) (analysis.ByteArrayConverter, error) {
|
||||
return NewJSONByteArrayConverter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterByteArrayConverter("json", Constructor)
|
||||
}
|
@ -1,33 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package string_byte_array_converter
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
type StringByteArrayConverter struct{}
|
||||
|
||||
func NewStringByteArrayConverter() *StringByteArrayConverter {
|
||||
return &StringByteArrayConverter{}
|
||||
}
|
||||
|
||||
func (c *StringByteArrayConverter) Convert(in []byte) (interface{}, error) {
|
||||
return string(in), nil
|
||||
}
|
||||
|
||||
func Constructor(config map[string]interface{}, cache *registry.Cache) (analysis.ByteArrayConverter, error) {
|
||||
return NewStringByteArrayConverter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterByteArrayConverter("string", Constructor)
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package html_char_filter
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/char_filters/regexp_char_filter"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "html"
|
||||
|
||||
var htmlCharFilterRegexp = regexp.MustCompile(`</?[!\w]+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/?>`)
|
||||
|
||||
func CharFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.CharFilter, error) {
|
||||
replaceBytes := []byte(" ")
|
||||
return regexp_char_filter.NewRegexpCharFilter(htmlCharFilterRegexp, replaceBytes), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterCharFilter(Name, CharFilterConstructor)
|
||||
}
|
@ -1,58 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package regexp_char_filter
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"regexp"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "regexp"
|
||||
|
||||
type RegexpCharFilter struct {
|
||||
r *regexp.Regexp
|
||||
replacement []byte
|
||||
}
|
||||
|
||||
func NewRegexpCharFilter(r *regexp.Regexp, replacement []byte) *RegexpCharFilter {
|
||||
return &RegexpCharFilter{
|
||||
r: r,
|
||||
replacement: replacement,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *RegexpCharFilter) Filter(input []byte) []byte {
|
||||
return s.r.ReplaceAllFunc(input, func(in []byte) []byte { return bytes.Repeat(s.replacement, len(in)) })
|
||||
}
|
||||
|
||||
func RegexpCharFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.CharFilter, error) {
|
||||
regexpStr, ok := config["regexp"].(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify regexp")
|
||||
}
|
||||
r, err := regexp.Compile(regexpStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to build regexp char filter: %v", err)
|
||||
}
|
||||
replaceBytes := []byte(" ")
|
||||
replaceStr, ok := config["replace"].(string)
|
||||
if ok {
|
||||
replaceBytes = []byte(replaceStr)
|
||||
}
|
||||
return NewRegexpCharFilter(r, replaceBytes), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterCharFilter(Name, RegexpCharFilterConstructor)
|
||||
}
|
@ -1,82 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package regexp_char_filter
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"regexp"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestRegexpCharFilter(t *testing.T) {
|
||||
|
||||
htmlTagPattern := `</?[!\w]+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/?>`
|
||||
htmlRegex := regexp.MustCompile(htmlTagPattern)
|
||||
|
||||
tests := []struct {
|
||||
input []byte
|
||||
output []byte
|
||||
}{
|
||||
{
|
||||
input: []byte(`<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
|
||||
<h1>My First Heading</h1>
|
||||
|
||||
<p>My first paragraph.</p>
|
||||
|
||||
</body>
|
||||
</html>`),
|
||||
output: []byte(`
|
||||
|
||||
|
||||
|
||||
My First Heading
|
||||
|
||||
My first paragraph.
|
||||
|
||||
|
||||
`),
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
filter := NewRegexpCharFilter(htmlRegex, []byte{' '})
|
||||
output := filter.Filter(test.input)
|
||||
if !reflect.DeepEqual(output, test.output) {
|
||||
t.Errorf("Expected:\n`%s`\ngot:\n`%s`\nfor:\n`%s`\n", string(test.output), string(output), string(test.input))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestZeroWidthNonJoinerCharFilter(t *testing.T) {
|
||||
|
||||
zeroWidthNonJoinerPattern := `\x{200C}`
|
||||
zeroWidthNonJoinerRegex := regexp.MustCompile(zeroWidthNonJoinerPattern)
|
||||
|
||||
tests := []struct {
|
||||
input []byte
|
||||
output []byte
|
||||
}{
|
||||
{
|
||||
input: []byte("water\u200Cunder\u200Cthe\u200Cbridge"),
|
||||
output: []byte("water under the bridge"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
filter := NewRegexpCharFilter(zeroWidthNonJoinerRegex, []byte{' '})
|
||||
output := filter.Filter(test.input)
|
||||
if !reflect.DeepEqual(output, test.output) {
|
||||
t.Errorf("Expected:\n`%s`\ngot:\n`%s`\nfor:\n`%s`\n", string(test.output), string(output), string(test.input))
|
||||
}
|
||||
}
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package zero_width_non_joiner
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/char_filters/regexp_char_filter"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "zero_width_spaces"
|
||||
|
||||
var zeroWidthNonJoinerRegexp = regexp.MustCompile(`\x{200C}`)
|
||||
|
||||
func CharFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.CharFilter, error) {
|
||||
replaceBytes := []byte(" ")
|
||||
return regexp_char_filter.NewRegexpCharFilter(zeroWidthNonJoinerRegexp, replaceBytes), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterCharFilter(Name, CharFilterConstructor)
|
||||
}
|
64
vendor/github.com/blevesearch/bleve/analysis/datetime/flexible/flexible.go
generated
vendored
Normal file
64
vendor/github.com/blevesearch/bleve/analysis/datetime/flexible/flexible.go
generated
vendored
Normal file
@ -0,0 +1,64 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package flexible
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "flexiblego"
|
||||
|
||||
type DateTimeParser struct {
|
||||
layouts []string
|
||||
}
|
||||
|
||||
func New(layouts []string) *DateTimeParser {
|
||||
return &DateTimeParser{
|
||||
layouts: layouts,
|
||||
}
|
||||
}
|
||||
|
||||
func (p *DateTimeParser) ParseDateTime(input string) (time.Time, error) {
|
||||
for _, layout := range p.layouts {
|
||||
rv, err := time.Parse(layout, input)
|
||||
if err == nil {
|
||||
return rv, nil
|
||||
}
|
||||
}
|
||||
return time.Time{}, analysis.ErrInvalidDateTime
|
||||
}
|
||||
|
||||
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
|
||||
layouts, ok := config["layouts"].([]interface{})
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify layouts")
|
||||
}
|
||||
var layoutStrs []string
|
||||
for _, layout := range layouts {
|
||||
layoutStr, ok := layout.(string)
|
||||
if ok {
|
||||
layoutStrs = append(layoutStrs, layoutStr)
|
||||
}
|
||||
}
|
||||
return New(layoutStrs), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
}
|
45
vendor/github.com/blevesearch/bleve/analysis/datetime/optional/optional.go
generated
vendored
Normal file
45
vendor/github.com/blevesearch/bleve/analysis/datetime/optional/optional.go
generated
vendored
Normal file
@ -0,0 +1,45 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package optional
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/datetime/flexible"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "dateTimeOptional"
|
||||
|
||||
const rfc3339NoTimezone = "2006-01-02T15:04:05"
|
||||
const rfc3339NoTimezoneNoT = "2006-01-02 15:04:05"
|
||||
const rfc3339NoTime = "2006-01-02"
|
||||
|
||||
var layouts = []string{
|
||||
time.RFC3339Nano,
|
||||
time.RFC3339,
|
||||
rfc3339NoTimezone,
|
||||
rfc3339NoTimezoneNoT,
|
||||
rfc3339NoTime,
|
||||
}
|
||||
|
||||
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
|
||||
return flexible.New(layouts), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
}
|
@ -1,40 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package html_char_filter
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/datetime_parsers/flexible_go"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "dateTimeOptional"
|
||||
|
||||
const rfc3339NoTimezone = "2006-01-02T15:04:05"
|
||||
const rfc3339NoTimezoneNoT = "2006-01-02 15:04:05"
|
||||
const rfc3339NoTime = "2006-01-02"
|
||||
|
||||
var layouts = []string{
|
||||
time.RFC3339Nano,
|
||||
time.RFC3339,
|
||||
rfc3339NoTimezone,
|
||||
rfc3339NoTimezoneNoT,
|
||||
rfc3339NoTime,
|
||||
}
|
||||
|
||||
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
|
||||
return flexible_go.NewFlexibleGoDateTimeParser(layouts), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
}
|
59
vendor/github.com/blevesearch/bleve/analysis/datetime_parsers/flexible_go/flexible_go.go
generated
vendored
59
vendor/github.com/blevesearch/bleve/analysis/datetime_parsers/flexible_go/flexible_go.go
generated
vendored
@ -1,59 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package flexible_go
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "flexiblego"
|
||||
|
||||
type FlexibleGoDateTimeParser struct {
|
||||
layouts []string
|
||||
}
|
||||
|
||||
func NewFlexibleGoDateTimeParser(layouts []string) *FlexibleGoDateTimeParser {
|
||||
return &FlexibleGoDateTimeParser{
|
||||
layouts: layouts,
|
||||
}
|
||||
}
|
||||
|
||||
func (p *FlexibleGoDateTimeParser) ParseDateTime(input string) (time.Time, error) {
|
||||
for _, layout := range p.layouts {
|
||||
rv, err := time.Parse(layout, input)
|
||||
if err == nil {
|
||||
return rv, nil
|
||||
}
|
||||
}
|
||||
return time.Time{}, analysis.ErrInvalidDateTime
|
||||
}
|
||||
|
||||
func FlexibleGoDateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
|
||||
layouts, ok := config["layouts"].([]interface{})
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify layouts")
|
||||
}
|
||||
layoutStrs := make([]string, 0)
|
||||
for _, layout := range layouts {
|
||||
layoutStr, ok := layout.(string)
|
||||
if ok {
|
||||
layoutStrs = append(layoutStrs, layoutStr)
|
||||
}
|
||||
}
|
||||
return NewFlexibleGoDateTimeParser(layoutStrs), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterDateTimeParser(Name, FlexibleGoDateTimeParserConstructor)
|
||||
}
|
@ -1,84 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package flexible_go
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
)
|
||||
|
||||
func TestFlexibleDateTimeParser(t *testing.T) {
|
||||
testLocation := time.FixedZone("", -8*60*60)
|
||||
|
||||
tests := []struct {
|
||||
input string
|
||||
expectedTime time.Time
|
||||
expectedError error
|
||||
}{
|
||||
{
|
||||
input: "2014-08-03",
|
||||
expectedTime: time.Date(2014, 8, 3, 0, 0, 0, 0, time.UTC),
|
||||
expectedError: nil,
|
||||
},
|
||||
{
|
||||
input: "2014-08-03T15:59:30",
|
||||
expectedTime: time.Date(2014, 8, 3, 15, 59, 30, 0, time.UTC),
|
||||
expectedError: nil,
|
||||
},
|
||||
{
|
||||
input: "2014-08-03 15:59:30",
|
||||
expectedTime: time.Date(2014, 8, 3, 15, 59, 30, 0, time.UTC),
|
||||
expectedError: nil,
|
||||
},
|
||||
{
|
||||
input: "2014-08-03T15:59:30-08:00",
|
||||
expectedTime: time.Date(2014, 8, 3, 15, 59, 30, 0, testLocation),
|
||||
expectedError: nil,
|
||||
},
|
||||
{
|
||||
input: "2014-08-03T15:59:30.999999999-08:00",
|
||||
expectedTime: time.Date(2014, 8, 3, 15, 59, 30, 999999999, testLocation),
|
||||
expectedError: nil,
|
||||
},
|
||||
{
|
||||
input: "not a date time",
|
||||
expectedTime: time.Time{},
|
||||
expectedError: analysis.ErrInvalidDateTime,
|
||||
},
|
||||
}
|
||||
|
||||
rfc3339NoTimezone := "2006-01-02T15:04:05"
|
||||
rfc3339NoTimezoneNoT := "2006-01-02 15:04:05"
|
||||
rfc3339NoTime := "2006-01-02"
|
||||
|
||||
dateOptionalTimeParser := NewFlexibleGoDateTimeParser(
|
||||
[]string{
|
||||
time.RFC3339Nano,
|
||||
time.RFC3339,
|
||||
rfc3339NoTimezone,
|
||||
rfc3339NoTimezoneNoT,
|
||||
rfc3339NoTime,
|
||||
})
|
||||
|
||||
for _, test := range tests {
|
||||
actualTime, actualErr := dateOptionalTimeParser.ParseDateTime(test.input)
|
||||
if actualErr != test.expectedError {
|
||||
t.Errorf("expected error %#v, got %#v", test.expectedError, actualErr)
|
||||
continue
|
||||
}
|
||||
if !reflect.DeepEqual(actualTime, test.expectedTime) {
|
||||
t.Errorf("expected time %#v, got %#v", test.expectedTime, actualTime)
|
||||
t.Errorf("expected location %#v,\n got %#v", test.expectedTime.Location(), actualTime.Location())
|
||||
}
|
||||
}
|
||||
}
|
135
vendor/github.com/blevesearch/bleve/analysis/freq.go
generated
vendored
135
vendor/github.com/blevesearch/bleve/analysis/freq.go
generated
vendored
@ -1,88 +1,111 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package analysis
|
||||
|
||||
// TokenLocation represents one occurrence of a term at a particular location in
|
||||
// a field. Start, End and Position have the same meaning as in analysis.Token.
|
||||
// Field and ArrayPositions identify the field value in the source document.
|
||||
// See document.Field for details.
|
||||
type TokenLocation struct {
|
||||
Field string
|
||||
Start int
|
||||
End int
|
||||
Position int
|
||||
Field string
|
||||
ArrayPositions []uint64
|
||||
Start int
|
||||
End int
|
||||
Position int
|
||||
}
|
||||
|
||||
// TokenFreq represents all the occurrences of a term in all fields of a
|
||||
// document.
|
||||
type TokenFreq struct {
|
||||
Term []byte
|
||||
Locations []*TokenLocation
|
||||
frequency int
|
||||
}
|
||||
|
||||
type TokenFrequencies []*TokenFreq
|
||||
func (tf *TokenFreq) Frequency() int {
|
||||
return tf.frequency
|
||||
}
|
||||
|
||||
func (tfs TokenFrequencies) MergeAll(remoteField string, other TokenFrequencies) TokenFrequencies {
|
||||
// put existing tokens into a map
|
||||
index := make(map[string]*TokenFreq)
|
||||
for _, tf := range tfs {
|
||||
index[string(tf.Term)] = tf
|
||||
}
|
||||
// TokenFrequencies maps document terms to their combined frequencies from all
|
||||
// fields.
|
||||
type TokenFrequencies map[string]*TokenFreq
|
||||
|
||||
func (tfs TokenFrequencies) MergeAll(remoteField string, other TokenFrequencies) {
|
||||
// walk the new token frequencies
|
||||
for _, tf := range other {
|
||||
for tfk, tf := range other {
|
||||
// set the remoteField value in incoming token freqs
|
||||
for _, l := range tf.Locations {
|
||||
l.Field = remoteField
|
||||
}
|
||||
existingTf, exists := index[string(tf.Term)]
|
||||
existingTf, exists := tfs[tfk]
|
||||
if exists {
|
||||
existingTf.Locations = append(existingTf.Locations, tf.Locations...)
|
||||
existingTf.frequency = existingTf.frequency + tf.frequency
|
||||
} else {
|
||||
index[string(tf.Term)] = tf
|
||||
tfs[tfk] = &TokenFreq{
|
||||
Term: tf.Term,
|
||||
frequency: tf.frequency,
|
||||
Locations: make([]*TokenLocation, len(tf.Locations)),
|
||||
}
|
||||
copy(tfs[tfk].Locations, tf.Locations)
|
||||
}
|
||||
}
|
||||
// flatten map back to array
|
||||
rv := make(TokenFrequencies, len(index))
|
||||
i := 0
|
||||
for _, tf := range index {
|
||||
rv[i] = tf
|
||||
i++
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func TokenFrequency(tokens TokenStream) TokenFrequencies {
|
||||
index := make(map[string]*TokenFreq)
|
||||
func TokenFrequency(tokens TokenStream, arrayPositions []uint64, includeTermVectors bool) TokenFrequencies {
|
||||
rv := make(map[string]*TokenFreq, len(tokens))
|
||||
|
||||
for _, token := range tokens {
|
||||
curr, ok := index[string(token.Term)]
|
||||
if ok {
|
||||
curr.Locations = append(curr.Locations, &TokenLocation{
|
||||
Start: token.Start,
|
||||
End: token.End,
|
||||
Position: token.Position,
|
||||
})
|
||||
} else {
|
||||
index[string(token.Term)] = &TokenFreq{
|
||||
Term: token.Term,
|
||||
Locations: []*TokenLocation{
|
||||
&TokenLocation{
|
||||
Start: token.Start,
|
||||
End: token.End,
|
||||
Position: token.Position,
|
||||
},
|
||||
},
|
||||
if includeTermVectors {
|
||||
tls := make([]TokenLocation, len(tokens))
|
||||
tlNext := 0
|
||||
|
||||
for _, token := range tokens {
|
||||
tls[tlNext] = TokenLocation{
|
||||
ArrayPositions: arrayPositions,
|
||||
Start: token.Start,
|
||||
End: token.End,
|
||||
Position: token.Position,
|
||||
}
|
||||
|
||||
curr, ok := rv[string(token.Term)]
|
||||
if ok {
|
||||
curr.Locations = append(curr.Locations, &tls[tlNext])
|
||||
curr.frequency++
|
||||
} else {
|
||||
rv[string(token.Term)] = &TokenFreq{
|
||||
Term: token.Term,
|
||||
Locations: []*TokenLocation{&tls[tlNext]},
|
||||
frequency: 1,
|
||||
}
|
||||
}
|
||||
|
||||
tlNext++
|
||||
}
|
||||
} else {
|
||||
for _, token := range tokens {
|
||||
curr, exists := rv[string(token.Term)]
|
||||
if exists {
|
||||
curr.frequency++
|
||||
} else {
|
||||
rv[string(token.Term)] = &TokenFreq{
|
||||
Term: token.Term,
|
||||
frequency: 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rv := make(TokenFrequencies, len(index))
|
||||
i := 0
|
||||
for _, tf := range index {
|
||||
rv[i] = tf
|
||||
i++
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
167
vendor/github.com/blevesearch/bleve/analysis/freq_test.go
generated
vendored
167
vendor/github.com/blevesearch/bleve/analysis/freq_test.go
generated
vendored
@ -1,167 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package analysis
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestTokenFrequency(t *testing.T) {
|
||||
tokens := TokenStream{
|
||||
&Token{
|
||||
Term: []byte("water"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&Token{
|
||||
Term: []byte("water"),
|
||||
Position: 2,
|
||||
Start: 6,
|
||||
End: 11,
|
||||
},
|
||||
}
|
||||
expectedResult := TokenFrequencies{
|
||||
&TokenFreq{
|
||||
Term: []byte("water"),
|
||||
Locations: []*TokenLocation{
|
||||
&TokenLocation{
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TokenLocation{
|
||||
Position: 2,
|
||||
Start: 6,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
result := TokenFrequency(tokens)
|
||||
if !reflect.DeepEqual(result, expectedResult) {
|
||||
t.Errorf("expected %#v, got %#v", expectedResult, result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTokenFrequenciesMergeAll(t *testing.T) {
|
||||
tf1 := TokenFrequencies{
|
||||
&TokenFreq{
|
||||
Term: []byte("water"),
|
||||
Locations: []*TokenLocation{
|
||||
&TokenLocation{
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TokenLocation{
|
||||
Position: 2,
|
||||
Start: 6,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
tf2 := TokenFrequencies{
|
||||
&TokenFreq{
|
||||
Term: []byte("water"),
|
||||
Locations: []*TokenLocation{
|
||||
&TokenLocation{
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TokenLocation{
|
||||
Position: 2,
|
||||
Start: 6,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
expectedResult := TokenFrequencies{
|
||||
&TokenFreq{
|
||||
Term: []byte("water"),
|
||||
Locations: []*TokenLocation{
|
||||
&TokenLocation{
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TokenLocation{
|
||||
Position: 2,
|
||||
Start: 6,
|
||||
End: 11,
|
||||
},
|
||||
&TokenLocation{
|
||||
Field: "tf2",
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TokenLocation{
|
||||
Field: "tf2",
|
||||
Position: 2,
|
||||
Start: 6,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
tf1.MergeAll("tf2", tf2)
|
||||
if !reflect.DeepEqual(tf1, expectedResult) {
|
||||
t.Errorf("expected %#v, got %#v", expectedResult, tf1)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTokenFrequenciesMergeAllLeftEmpty(t *testing.T) {
|
||||
tf1 := TokenFrequencies{}
|
||||
tf2 := TokenFrequencies{
|
||||
&TokenFreq{
|
||||
Term: []byte("water"),
|
||||
Locations: []*TokenLocation{
|
||||
&TokenLocation{
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TokenLocation{
|
||||
Position: 2,
|
||||
Start: 6,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
expectedResult := TokenFrequencies{
|
||||
&TokenFreq{
|
||||
Term: []byte("water"),
|
||||
Locations: []*TokenLocation{
|
||||
&TokenLocation{
|
||||
Field: "tf2",
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TokenLocation{
|
||||
Field: "tf2",
|
||||
Position: 2,
|
||||
Start: 6,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
result := tf1.MergeAll("tf2", tf2)
|
||||
if !reflect.DeepEqual(result, expectedResult) {
|
||||
t.Errorf("expected %#v, got %#v", expectedResult, result)
|
||||
}
|
||||
}
|
70
vendor/github.com/blevesearch/bleve/analysis/lang/en/analyzer_en.go
generated
vendored
Normal file
70
vendor/github.com/blevesearch/bleve/analysis/lang/en/analyzer_en.go
generated
vendored
Normal file
@ -0,0 +1,70 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package en implements an analyzer with reasonable defaults for processing
|
||||
// English text.
|
||||
//
|
||||
// It strips possessive suffixes ('s), transforms tokens to lower case,
|
||||
// removes stopwords from a built-in list, and applies porter stemming.
|
||||
//
|
||||
// The built-in stopwords list is defined in EnglishStopWords.
|
||||
package en
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis/token/lowercase"
|
||||
"github.com/blevesearch/bleve/analysis/token/porter"
|
||||
"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
|
||||
)
|
||||
|
||||
const AnalyzerName = "en"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
tokenizer, err := cache.TokenizerNamed(unicode.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
possEnFilter, err := cache.TokenFilterNamed(PossessiveName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stopEnFilter, err := cache.TokenFilterNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stemmerEnFilter, err := cache.TokenFilterNamed(porter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: tokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
possEnFilter,
|
||||
toLowerFilter,
|
||||
stopEnFilter,
|
||||
stemmerEnFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
||||
}
|
67
vendor/github.com/blevesearch/bleve/analysis/lang/en/possessive_filter_en.go
generated
vendored
Normal file
67
vendor/github.com/blevesearch/bleve/analysis/lang/en/possessive_filter_en.go
generated
vendored
Normal file
@ -0,0 +1,67 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package en
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
// PossessiveName is the name PossessiveFilter is registered as
|
||||
// in the bleve registry.
|
||||
const PossessiveName = "possessive_en"
|
||||
|
||||
const rightSingleQuotationMark = '’'
|
||||
const apostrophe = '\''
|
||||
const fullWidthApostrophe = '''
|
||||
|
||||
const apostropheChars = rightSingleQuotationMark + apostrophe + fullWidthApostrophe
|
||||
|
||||
// PossessiveFilter implements a TokenFilter which
|
||||
// strips the English possessive suffix ('s) from tokens.
|
||||
// It handle a variety of apostrophe types, is case-insensitive
|
||||
// and doesn't distinguish between possessive and contraction.
|
||||
// (ie "She's So Rad" becomes "She So Rad")
|
||||
type PossessiveFilter struct {
|
||||
}
|
||||
|
||||
func NewPossessiveFilter() *PossessiveFilter {
|
||||
return &PossessiveFilter{}
|
||||
}
|
||||
|
||||
func (s *PossessiveFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
for _, token := range input {
|
||||
lastRune, lastRuneSize := utf8.DecodeLastRune(token.Term)
|
||||
if lastRune == 's' || lastRune == 'S' {
|
||||
nextLastRune, nextLastRuneSize := utf8.DecodeLastRune(token.Term[:len(token.Term)-lastRuneSize])
|
||||
if nextLastRune == rightSingleQuotationMark ||
|
||||
nextLastRune == apostrophe ||
|
||||
nextLastRune == fullWidthApostrophe {
|
||||
token.Term = token.Term[:len(token.Term)-lastRuneSize-nextLastRuneSize]
|
||||
}
|
||||
}
|
||||
}
|
||||
return input
|
||||
}
|
||||
|
||||
func PossessiveFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewPossessiveFilter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(PossessiveName, PossessiveFilterConstructor)
|
||||
}
|
33
vendor/github.com/blevesearch/bleve/analysis/lang/en/stop_filter_en.go
generated
vendored
Normal file
33
vendor/github.com/blevesearch/bleve/analysis/lang/en/stop_filter_en.go
generated
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package en
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/token/stop"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
@ -7,10 +7,11 @@ import (
|
||||
|
||||
const StopName = "stop_en"
|
||||
|
||||
// EnglishStopWords is the built-in list of stopwords used by the "stop_en" TokenFilter.
|
||||
//
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
|
||||
// ` was changed to ' to allow for literal string
|
||||
|
||||
var EnglishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/english/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
60
vendor/github.com/blevesearch/bleve/analysis/language/ar/analyzer_ar.go
generated
vendored
60
vendor/github.com/blevesearch/bleve/analysis/language/ar/analyzer_ar.go
generated
vendored
@ -1,60 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package ar
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter"
|
||||
"github.com/blevesearch/bleve/analysis/token_filters/unicode_normalize"
|
||||
"github.com/blevesearch/bleve/analysis/tokenizers/unicode"
|
||||
)
|
||||
|
||||
const AnalyzerName = "ar"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
tokenizer, err := cache.TokenizerNamed(unicode.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
normalizeFilter := unicode_normalize.MustNewUnicodeNormalizeFilter(unicode_normalize.NFKC)
|
||||
stopArFilter, err := cache.TokenFilterNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
normalizeArFilter, err := cache.TokenFilterNamed(NormalizeName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stemmerArFilter, err := cache.TokenFilterNamed(StemmerName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: tokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
toLowerFilter,
|
||||
normalizeFilter,
|
||||
stopArFilter,
|
||||
normalizeArFilter,
|
||||
stemmerArFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
||||
}
|
179
vendor/github.com/blevesearch/bleve/analysis/language/ar/analyzer_ar_test.go
generated
vendored
179
vendor/github.com/blevesearch/bleve/analysis/language/ar/analyzer_ar_test.go
generated
vendored
@ -1,179 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package ar
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
func TestArabicAnalyzer(t *testing.T) {
|
||||
tests := []struct {
|
||||
input []byte
|
||||
output analysis.TokenStream
|
||||
}{
|
||||
{
|
||||
input: []byte("كبير"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("كبير"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 8,
|
||||
},
|
||||
},
|
||||
},
|
||||
// feminine marker
|
||||
{
|
||||
input: []byte("كبيرة"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("كبير"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 10,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("مشروب"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("مشروب"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 10,
|
||||
},
|
||||
},
|
||||
},
|
||||
// plural -at
|
||||
{
|
||||
input: []byte("مشروبات"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("مشروب"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 14,
|
||||
},
|
||||
},
|
||||
},
|
||||
// plural -in
|
||||
{
|
||||
input: []byte("أمريكيين"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("امريك"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 16,
|
||||
},
|
||||
},
|
||||
},
|
||||
// singular with bare alif
|
||||
{
|
||||
input: []byte("امريكي"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("امريك"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 12,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("كتاب"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("كتاب"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 8,
|
||||
},
|
||||
},
|
||||
},
|
||||
// definite article
|
||||
{
|
||||
input: []byte("الكتاب"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("كتاب"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 12,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("ما ملكت أيمانكم"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ملكت"),
|
||||
Position: 2,
|
||||
Start: 5,
|
||||
End: 13,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("ايمانكم"),
|
||||
Position: 3,
|
||||
Start: 14,
|
||||
End: 28,
|
||||
},
|
||||
},
|
||||
},
|
||||
// stopwords
|
||||
{
|
||||
input: []byte("الذين ملكت أيمانكم"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ملكت"),
|
||||
Position: 2,
|
||||
Start: 11,
|
||||
End: 19,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("ايمانكم"),
|
||||
Position: 3,
|
||||
Start: 20,
|
||||
End: 34,
|
||||
},
|
||||
},
|
||||
},
|
||||
// presentation form normalization
|
||||
{
|
||||
input: []byte("ﺍﻟﺴﻼﻢ"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("سلام"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 15,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
cache := registry.NewCache()
|
||||
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for _, test := range tests {
|
||||
actual := analyzer.Analyze(test.input)
|
||||
if !reflect.DeepEqual(actual, test.output) {
|
||||
t.Errorf("expected %v, got %v", test.output, actual)
|
||||
t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
|
||||
}
|
||||
}
|
||||
}
|
80
vendor/github.com/blevesearch/bleve/analysis/language/ar/arabic_normalize.go
generated
vendored
80
vendor/github.com/blevesearch/bleve/analysis/language/ar/arabic_normalize.go
generated
vendored
@ -1,80 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package ar
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const NormalizeName = "normalize_ar"
|
||||
|
||||
const (
|
||||
Alef = '\u0627'
|
||||
AlefMadda = '\u0622'
|
||||
AlefHamzaAbove = '\u0623'
|
||||
AlefHamzaBelow = '\u0625'
|
||||
Yeh = '\u064A'
|
||||
DotlessYeh = '\u0649'
|
||||
TehMarbuta = '\u0629'
|
||||
Heh = '\u0647'
|
||||
Tatweel = '\u0640'
|
||||
Fathatan = '\u064B'
|
||||
Dammatan = '\u064C'
|
||||
Kasratan = '\u064D'
|
||||
Fatha = '\u064E'
|
||||
Damma = '\u064F'
|
||||
Kasra = '\u0650'
|
||||
Shadda = '\u0651'
|
||||
Sukun = '\u0652'
|
||||
)
|
||||
|
||||
type ArabicNormalizeFilter struct {
|
||||
}
|
||||
|
||||
func NewArabicNormalizeFilter() *ArabicNormalizeFilter {
|
||||
return &ArabicNormalizeFilter{}
|
||||
}
|
||||
|
||||
func (s *ArabicNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
for _, token := range input {
|
||||
term := normalize(token.Term)
|
||||
token.Term = term
|
||||
}
|
||||
return input
|
||||
}
|
||||
|
||||
func normalize(input []byte) []byte {
|
||||
runes := bytes.Runes(input)
|
||||
for i := 0; i < len(runes); i++ {
|
||||
switch runes[i] {
|
||||
case AlefMadda, AlefHamzaAbove, AlefHamzaBelow:
|
||||
runes[i] = Alef
|
||||
case DotlessYeh:
|
||||
runes[i] = Yeh
|
||||
case TehMarbuta:
|
||||
runes[i] = Heh
|
||||
case Tatweel, Kasratan, Dammatan, Fathatan, Fatha, Damma, Kasra, Shadda, Sukun:
|
||||
runes = analysis.DeleteRune(runes, i)
|
||||
i--
|
||||
}
|
||||
}
|
||||
return analysis.BuildTermFromRunes(runes)
|
||||
}
|
||||
|
||||
func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewArabicNormalizeFilter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
|
||||
}
|
229
vendor/github.com/blevesearch/bleve/analysis/language/ar/arabic_normalize_test.go
generated
vendored
229
vendor/github.com/blevesearch/bleve/analysis/language/ar/arabic_normalize_test.go
generated
vendored
@ -1,229 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package ar
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
)
|
||||
|
||||
func TestArabicNormalizeFilter(t *testing.T) {
|
||||
tests := []struct {
|
||||
input analysis.TokenStream
|
||||
output analysis.TokenStream
|
||||
}{
|
||||
// AlifMadda
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("آجن"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("اجن"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// AlifHamzaAbove
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("أحمد"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("احمد"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// AlifHamzaBelow
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("إعاذ"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("اعاذ"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// AlifMaksura
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("بنى"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("بني"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// TehMarbuta
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("فاطمة"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("فاطمه"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// Tatweel
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("روبرـــــت"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("روبرت"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// Fatha
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("مَبنا"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("مبنا"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// Kasra
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("علِي"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("علي"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// Damma
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("بُوات"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("بوات"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// Fathatan
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ولداً"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ولدا"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// Kasratan
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ولدٍ"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ولد"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// Dammatan
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ولدٌ"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ولد"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// Sukun
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("نلْسون"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("نلسون"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// Shaddah
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("هتميّ"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("هتمي"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// empty
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte(""),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte(""),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
arabicNormalizeFilter := NewArabicNormalizeFilter()
|
||||
for _, test := range tests {
|
||||
actual := arabicNormalizeFilter.Filter(test.input)
|
||||
if !reflect.DeepEqual(actual, test.output) {
|
||||
t.Errorf("expected %#v, got %#v", test.output, actual)
|
||||
t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
|
||||
}
|
||||
}
|
||||
}
|
113
vendor/github.com/blevesearch/bleve/analysis/language/ar/stemmer_ar.go
generated
vendored
113
vendor/github.com/blevesearch/bleve/analysis/language/ar/stemmer_ar.go
generated
vendored
@ -1,113 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package ar
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const StemmerName = "stemmer_ar"
|
||||
|
||||
// These were obtained from org.apache.lucene.analysis.ar.ArabicStemmer
|
||||
var prefixes = [][]rune{
|
||||
[]rune("ال"),
|
||||
[]rune("وال"),
|
||||
[]rune("بال"),
|
||||
[]rune("كال"),
|
||||
[]rune("فال"),
|
||||
[]rune("لل"),
|
||||
[]rune("و"),
|
||||
}
|
||||
var suffixes = [][]rune{
|
||||
[]rune("ها"),
|
||||
[]rune("ان"),
|
||||
[]rune("ات"),
|
||||
[]rune("ون"),
|
||||
[]rune("ين"),
|
||||
[]rune("يه"),
|
||||
[]rune("ية"),
|
||||
[]rune("ه"),
|
||||
[]rune("ة"),
|
||||
[]rune("ي"),
|
||||
}
|
||||
|
||||
type ArabicStemmerFilter struct{}
|
||||
|
||||
func NewArabicStemmerFilter() *ArabicStemmerFilter {
|
||||
return &ArabicStemmerFilter{}
|
||||
}
|
||||
|
||||
func (s *ArabicStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
for _, token := range input {
|
||||
term := stem(token.Term)
|
||||
token.Term = term
|
||||
}
|
||||
return input
|
||||
}
|
||||
|
||||
func canStemPrefix(input, prefix []rune) bool {
|
||||
// Wa- prefix requires at least 3 characters.
|
||||
if len(prefix) == 1 && len(input) < 4 {
|
||||
return false
|
||||
}
|
||||
// Other prefixes require only 2.
|
||||
if len(input)-len(prefix) < 2 {
|
||||
return false
|
||||
}
|
||||
for i := range prefix {
|
||||
if prefix[i] != input[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func canStemSuffix(input, suffix []rune) bool {
|
||||
// All suffixes require at least 2 characters after stemming.
|
||||
if len(input)-len(suffix) < 2 {
|
||||
return false
|
||||
}
|
||||
stemEnd := len(input) - len(suffix)
|
||||
for i := range suffix {
|
||||
if suffix[i] != input[stemEnd+i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func stem(input []byte) []byte {
|
||||
runes := bytes.Runes(input)
|
||||
// Strip a single prefix.
|
||||
for _, p := range prefixes {
|
||||
if canStemPrefix(runes, p) {
|
||||
runes = runes[len(p):]
|
||||
break
|
||||
}
|
||||
}
|
||||
// Strip off multiple suffixes, in their order in the suffixes array.
|
||||
for _, s := range suffixes {
|
||||
if canStemSuffix(runes, s) {
|
||||
runes = runes[:len(runes)-len(s)]
|
||||
}
|
||||
}
|
||||
return analysis.BuildTermFromRunes(runes)
|
||||
}
|
||||
|
||||
func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewArabicStemmerFilter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor)
|
||||
}
|
392
vendor/github.com/blevesearch/bleve/analysis/language/ar/stemmer_ar_test.go
generated
vendored
392
vendor/github.com/blevesearch/bleve/analysis/language/ar/stemmer_ar_test.go
generated
vendored
@ -1,392 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package ar
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
)
|
||||
|
||||
func TestArabicStemmerFilter(t *testing.T) {
|
||||
tests := []struct {
|
||||
input analysis.TokenStream
|
||||
output analysis.TokenStream
|
||||
}{
|
||||
// AlPrefix
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("الحسن"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("حسن"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// WalPrefix
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("والحسن"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("حسن"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// BalPrefix
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("بالحسن"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("حسن"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// KalPrefix
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("كالحسن"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("حسن"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// FalPrefix
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("فالحسن"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("حسن"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// LlPrefix
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("للاخر"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("اخر"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// WaPrefix
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("وحسن"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("حسن"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// AhSuffix
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("زوجها"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("زوج"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// AnSuffix
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ساهدان"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ساهد"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// AtSuffix
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ساهدات"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ساهد"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// WnSuffix
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ساهدون"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ساهد"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// YnSuffix
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ساهدين"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ساهد"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// YhSuffix
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ساهديه"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ساهد"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// YpSuffix
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ساهدية"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ساهد"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// HSuffix
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ساهده"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ساهد"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// PSuffix
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ساهدة"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ساهد"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// YSuffix
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ساهدي"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ساهد"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// ComboPrefSuf
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("وساهدون"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ساهد"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// ComboSuf
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ساهدهات"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("ساهد"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// ShouldntStem
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("الو"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("الو"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// NonArabic
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("English"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("English"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("سلام"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("سلام"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("السلام"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("سلام"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("سلامة"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("سلام"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("السلامة"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("سلام"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("الوصل"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("وصل"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("والصل"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("صل"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// Empty
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte(""),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte(""),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
arabicStemmerFilter := NewArabicStemmerFilter()
|
||||
for _, test := range tests {
|
||||
actual := arabicStemmerFilter.Filter(test.input)
|
||||
if !reflect.DeepEqual(actual, test.output) {
|
||||
t.Errorf("expected %#v, got %#v", test.output, actual)
|
||||
t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
|
||||
}
|
||||
}
|
||||
}
|
28
vendor/github.com/blevesearch/bleve/analysis/language/ar/stop_filter_ar.go
generated
vendored
28
vendor/github.com/blevesearch/bleve/analysis/language/ar/stop_filter_ar.go
generated
vendored
@ -1,28 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package ar
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
149
vendor/github.com/blevesearch/bleve/analysis/language/ar/stop_words_ar.go
generated
vendored
149
vendor/github.com/blevesearch/bleve/analysis/language/ar/stop_words_ar.go
generated
vendored
@ -1,149 +0,0 @@
|
||||
package ar
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_ar"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis
|
||||
// ` was changed to ' to allow for literal string
|
||||
|
||||
var ArabicStopWords = []byte(`# This file was created by Jacques Savoy and is distributed under the BSD license.
|
||||
# See http://members.unine.ch/jacques.savoy/clef/index.html.
|
||||
# Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
# Cleaned on October 11, 2009 (not normalized, so use before normalization)
|
||||
# This means that when modifying this list, you might need to add some
|
||||
# redundant entries, for example containing forms with both أ and ا
|
||||
من
|
||||
ومن
|
||||
منها
|
||||
منه
|
||||
في
|
||||
وفي
|
||||
فيها
|
||||
فيه
|
||||
و
|
||||
ف
|
||||
ثم
|
||||
او
|
||||
أو
|
||||
ب
|
||||
بها
|
||||
به
|
||||
ا
|
||||
أ
|
||||
اى
|
||||
اي
|
||||
أي
|
||||
أى
|
||||
لا
|
||||
ولا
|
||||
الا
|
||||
ألا
|
||||
إلا
|
||||
لكن
|
||||
ما
|
||||
وما
|
||||
كما
|
||||
فما
|
||||
عن
|
||||
مع
|
||||
اذا
|
||||
إذا
|
||||
ان
|
||||
أن
|
||||
إن
|
||||
انها
|
||||
أنها
|
||||
إنها
|
||||
انه
|
||||
أنه
|
||||
إنه
|
||||
بان
|
||||
بأن
|
||||
فان
|
||||
فأن
|
||||
وان
|
||||
وأن
|
||||
وإن
|
||||
التى
|
||||
التي
|
||||
الذى
|
||||
الذي
|
||||
الذين
|
||||
الى
|
||||
الي
|
||||
إلى
|
||||
إلي
|
||||
على
|
||||
عليها
|
||||
عليه
|
||||
اما
|
||||
أما
|
||||
إما
|
||||
ايضا
|
||||
أيضا
|
||||
كل
|
||||
وكل
|
||||
لم
|
||||
ولم
|
||||
لن
|
||||
ولن
|
||||
هى
|
||||
هي
|
||||
هو
|
||||
وهى
|
||||
وهي
|
||||
وهو
|
||||
فهى
|
||||
فهي
|
||||
فهو
|
||||
انت
|
||||
أنت
|
||||
لك
|
||||
لها
|
||||
له
|
||||
هذه
|
||||
هذا
|
||||
تلك
|
||||
ذلك
|
||||
هناك
|
||||
كانت
|
||||
كان
|
||||
يكون
|
||||
تكون
|
||||
وكانت
|
||||
وكان
|
||||
غير
|
||||
بعض
|
||||
قد
|
||||
نحو
|
||||
بين
|
||||
بينما
|
||||
منذ
|
||||
ضمن
|
||||
حيث
|
||||
الان
|
||||
الآن
|
||||
خلال
|
||||
بعد
|
||||
قبل
|
||||
حتى
|
||||
عند
|
||||
عندما
|
||||
لدى
|
||||
جميع
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(ArabicStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
28
vendor/github.com/blevesearch/bleve/analysis/language/bg/stop_filter_bg.go
generated
vendored
28
vendor/github.com/blevesearch/bleve/analysis/language/bg/stop_filter_bg.go
generated
vendored
@ -1,28 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package bg
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
217
vendor/github.com/blevesearch/bleve/analysis/language/bg/stop_words_bg.go
generated
vendored
217
vendor/github.com/blevesearch/bleve/analysis/language/bg/stop_words_bg.go
generated
vendored
@ -1,217 +0,0 @@
|
||||
package bg
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_bg"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
|
||||
// ` was changed to ' to allow for literal string
|
||||
|
||||
var BulgarianStopWords = []byte(`# This file was created by Jacques Savoy and is distributed under the BSD license.
|
||||
# See http://members.unine.ch/jacques.savoy/clef/index.html.
|
||||
# Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
а
|
||||
аз
|
||||
ако
|
||||
ала
|
||||
бе
|
||||
без
|
||||
беше
|
||||
би
|
||||
бил
|
||||
била
|
||||
били
|
||||
било
|
||||
близо
|
||||
бъдат
|
||||
бъде
|
||||
бяха
|
||||
в
|
||||
вас
|
||||
ваш
|
||||
ваша
|
||||
вероятно
|
||||
вече
|
||||
взема
|
||||
ви
|
||||
вие
|
||||
винаги
|
||||
все
|
||||
всеки
|
||||
всички
|
||||
всичко
|
||||
всяка
|
||||
във
|
||||
въпреки
|
||||
върху
|
||||
г
|
||||
ги
|
||||
главно
|
||||
го
|
||||
д
|
||||
да
|
||||
дали
|
||||
до
|
||||
докато
|
||||
докога
|
||||
дори
|
||||
досега
|
||||
доста
|
||||
е
|
||||
едва
|
||||
един
|
||||
ето
|
||||
за
|
||||
зад
|
||||
заедно
|
||||
заради
|
||||
засега
|
||||
затова
|
||||
защо
|
||||
защото
|
||||
и
|
||||
из
|
||||
или
|
||||
им
|
||||
има
|
||||
имат
|
||||
иска
|
||||
й
|
||||
каза
|
||||
как
|
||||
каква
|
||||
какво
|
||||
както
|
||||
какъв
|
||||
като
|
||||
кога
|
||||
когато
|
||||
което
|
||||
които
|
||||
кой
|
||||
който
|
||||
колко
|
||||
която
|
||||
къде
|
||||
където
|
||||
към
|
||||
ли
|
||||
м
|
||||
ме
|
||||
между
|
||||
мен
|
||||
ми
|
||||
мнозина
|
||||
мога
|
||||
могат
|
||||
може
|
||||
моля
|
||||
момента
|
||||
му
|
||||
н
|
||||
на
|
||||
над
|
||||
назад
|
||||
най
|
||||
направи
|
||||
напред
|
||||
например
|
||||
нас
|
||||
не
|
||||
него
|
||||
нея
|
||||
ни
|
||||
ние
|
||||
никой
|
||||
нито
|
||||
но
|
||||
някои
|
||||
някой
|
||||
няма
|
||||
обаче
|
||||
около
|
||||
освен
|
||||
особено
|
||||
от
|
||||
отгоре
|
||||
отново
|
||||
още
|
||||
пак
|
||||
по
|
||||
повече
|
||||
повечето
|
||||
под
|
||||
поне
|
||||
поради
|
||||
после
|
||||
почти
|
||||
прави
|
||||
пред
|
||||
преди
|
||||
през
|
||||
при
|
||||
пък
|
||||
първо
|
||||
с
|
||||
са
|
||||
само
|
||||
се
|
||||
сега
|
||||
си
|
||||
скоро
|
||||
след
|
||||
сме
|
||||
според
|
||||
сред
|
||||
срещу
|
||||
сте
|
||||
съм
|
||||
със
|
||||
също
|
||||
т
|
||||
тази
|
||||
така
|
||||
такива
|
||||
такъв
|
||||
там
|
||||
твой
|
||||
те
|
||||
тези
|
||||
ти
|
||||
тн
|
||||
то
|
||||
това
|
||||
тогава
|
||||
този
|
||||
той
|
||||
толкова
|
||||
точно
|
||||
трябва
|
||||
тук
|
||||
тъй
|
||||
тя
|
||||
тях
|
||||
у
|
||||
харесва
|
||||
ч
|
||||
че
|
||||
често
|
||||
чрез
|
||||
ще
|
||||
щом
|
||||
я
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(BulgarianStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
30
vendor/github.com/blevesearch/bleve/analysis/language/ca/articles_ca.go
generated
vendored
30
vendor/github.com/blevesearch/bleve/analysis/language/ca/articles_ca.go
generated
vendored
@ -1,30 +0,0 @@
|
||||
package ca
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const ArticlesName = "articles_ca"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis
|
||||
|
||||
var CatalanArticles = []byte(`
|
||||
d
|
||||
l
|
||||
m
|
||||
n
|
||||
s
|
||||
t
|
||||
`)
|
||||
|
||||
func ArticlesTokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(CatalanArticles)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(ArticlesName, ArticlesTokenMapConstructor)
|
||||
}
|
32
vendor/github.com/blevesearch/bleve/analysis/language/ca/elision_ca.go
generated
vendored
32
vendor/github.com/blevesearch/bleve/analysis/language/ca/elision_ca.go
generated
vendored
@ -1,32 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package ca
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/token_filters/elision_filter"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const ElisionName = "elision_ca"
|
||||
|
||||
func ElisionFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
articlesTokenMap, err := cache.TokenMapNamed(ArticlesName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error building elision filter: %v", err)
|
||||
}
|
||||
return elision_filter.NewElisionFilter(articlesTokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(ElisionName, ElisionFilterConstructor)
|
||||
}
|
56
vendor/github.com/blevesearch/bleve/analysis/language/ca/elision_ca_test.go
generated
vendored
56
vendor/github.com/blevesearch/bleve/analysis/language/ca/elision_ca_test.go
generated
vendored
@ -1,56 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package ca
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
func TestFrenchElision(t *testing.T) {
|
||||
tests := []struct {
|
||||
input analysis.TokenStream
|
||||
output analysis.TokenStream
|
||||
}{
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("l'Institut"),
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("d'Estudis"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("Institut"),
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("Estudis"),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
cache := registry.NewCache()
|
||||
elisionFilter, err := cache.TokenFilterNamed(ElisionName)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for _, test := range tests {
|
||||
actual := elisionFilter.Filter(test.input)
|
||||
if !reflect.DeepEqual(actual, test.output) {
|
||||
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
|
||||
}
|
||||
}
|
||||
}
|
28
vendor/github.com/blevesearch/bleve/analysis/language/ca/stop_filter_ca.go
generated
vendored
28
vendor/github.com/blevesearch/bleve/analysis/language/ca/stop_filter_ca.go
generated
vendored
@ -1,28 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package ca
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
244
vendor/github.com/blevesearch/bleve/analysis/language/ca/stop_words_ca.go
generated
vendored
244
vendor/github.com/blevesearch/bleve/analysis/language/ca/stop_words_ca.go
generated
vendored
@ -1,244 +0,0 @@
|
||||
package ca
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_ca"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
|
||||
// ` was changed to ' to allow for literal string
|
||||
|
||||
var CatalanStopWords = []byte(`# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
|
||||
a
|
||||
abans
|
||||
ací
|
||||
ah
|
||||
així
|
||||
això
|
||||
al
|
||||
als
|
||||
aleshores
|
||||
algun
|
||||
alguna
|
||||
algunes
|
||||
alguns
|
||||
alhora
|
||||
allà
|
||||
allí
|
||||
allò
|
||||
altra
|
||||
altre
|
||||
altres
|
||||
amb
|
||||
ambdós
|
||||
ambdues
|
||||
apa
|
||||
aquell
|
||||
aquella
|
||||
aquelles
|
||||
aquells
|
||||
aquest
|
||||
aquesta
|
||||
aquestes
|
||||
aquests
|
||||
aquí
|
||||
baix
|
||||
cada
|
||||
cadascú
|
||||
cadascuna
|
||||
cadascunes
|
||||
cadascuns
|
||||
com
|
||||
contra
|
||||
d'un
|
||||
d'una
|
||||
d'unes
|
||||
d'uns
|
||||
dalt
|
||||
de
|
||||
del
|
||||
dels
|
||||
des
|
||||
després
|
||||
dins
|
||||
dintre
|
||||
donat
|
||||
doncs
|
||||
durant
|
||||
e
|
||||
eh
|
||||
el
|
||||
els
|
||||
em
|
||||
en
|
||||
encara
|
||||
ens
|
||||
entre
|
||||
érem
|
||||
eren
|
||||
éreu
|
||||
es
|
||||
és
|
||||
esta
|
||||
està
|
||||
estàvem
|
||||
estaven
|
||||
estàveu
|
||||
esteu
|
||||
et
|
||||
etc
|
||||
ets
|
||||
fins
|
||||
fora
|
||||
gairebé
|
||||
ha
|
||||
han
|
||||
has
|
||||
havia
|
||||
he
|
||||
hem
|
||||
heu
|
||||
hi
|
||||
ho
|
||||
i
|
||||
igual
|
||||
iguals
|
||||
ja
|
||||
l'hi
|
||||
la
|
||||
les
|
||||
li
|
||||
li'n
|
||||
llavors
|
||||
m'he
|
||||
ma
|
||||
mal
|
||||
malgrat
|
||||
mateix
|
||||
mateixa
|
||||
mateixes
|
||||
mateixos
|
||||
me
|
||||
mentre
|
||||
més
|
||||
meu
|
||||
meus
|
||||
meva
|
||||
meves
|
||||
molt
|
||||
molta
|
||||
moltes
|
||||
molts
|
||||
mon
|
||||
mons
|
||||
n'he
|
||||
n'hi
|
||||
ne
|
||||
ni
|
||||
no
|
||||
nogensmenys
|
||||
només
|
||||
nosaltres
|
||||
nostra
|
||||
nostre
|
||||
nostres
|
||||
o
|
||||
oh
|
||||
oi
|
||||
on
|
||||
pas
|
||||
pel
|
||||
pels
|
||||
per
|
||||
però
|
||||
perquè
|
||||
poc
|
||||
poca
|
||||
pocs
|
||||
poques
|
||||
potser
|
||||
propi
|
||||
qual
|
||||
quals
|
||||
quan
|
||||
quant
|
||||
que
|
||||
què
|
||||
quelcom
|
||||
qui
|
||||
quin
|
||||
quina
|
||||
quines
|
||||
quins
|
||||
s'ha
|
||||
s'han
|
||||
sa
|
||||
semblant
|
||||
semblants
|
||||
ses
|
||||
seu
|
||||
seus
|
||||
seva
|
||||
seva
|
||||
seves
|
||||
si
|
||||
sobre
|
||||
sobretot
|
||||
sóc
|
||||
solament
|
||||
sols
|
||||
son
|
||||
són
|
||||
sons
|
||||
sota
|
||||
sou
|
||||
t'ha
|
||||
t'han
|
||||
t'he
|
||||
ta
|
||||
tal
|
||||
també
|
||||
tampoc
|
||||
tan
|
||||
tant
|
||||
tanta
|
||||
tantes
|
||||
teu
|
||||
teus
|
||||
teva
|
||||
teves
|
||||
ton
|
||||
tons
|
||||
tot
|
||||
tota
|
||||
totes
|
||||
tots
|
||||
un
|
||||
una
|
||||
unes
|
||||
uns
|
||||
us
|
||||
va
|
||||
vaig
|
||||
vam
|
||||
van
|
||||
vas
|
||||
veu
|
||||
vosaltres
|
||||
vostra
|
||||
vostre
|
||||
vostres
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(CatalanStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
50
vendor/github.com/blevesearch/bleve/analysis/language/cjk/analyzer_cjk.go
generated
vendored
50
vendor/github.com/blevesearch/bleve/analysis/language/cjk/analyzer_cjk.go
generated
vendored
@ -1,50 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package cjk
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter"
|
||||
"github.com/blevesearch/bleve/analysis/token_filters/unicode_normalize"
|
||||
"github.com/blevesearch/bleve/analysis/tokenizers/whitespace_tokenizer"
|
||||
)
|
||||
|
||||
const AnalyzerName = "cjk"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
whitespaceTokenizer, err := cache.TokenizerNamed(whitespace_tokenizer.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
normalizeFilter := unicode_normalize.MustNewUnicodeNormalizeFilter(unicode_normalize.NFKD)
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
bigramFilter, err := cache.TokenFilterNamed(BigramName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: whitespaceTokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
normalizeFilter,
|
||||
toLowerFilter,
|
||||
bigramFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
||||
}
|
620
vendor/github.com/blevesearch/bleve/analysis/language/cjk/analyzer_cjk_test.go
generated
vendored
620
vendor/github.com/blevesearch/bleve/analysis/language/cjk/analyzer_cjk_test.go
generated
vendored
@ -1,620 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package cjk
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
func TestCJKAnalyzer(t *testing.T) {
|
||||
tests := []struct {
|
||||
input []byte
|
||||
output analysis.TokenStream
|
||||
}{
|
||||
{
|
||||
input: []byte("こんにちは世界"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("こん"),
|
||||
Type: analysis.Double,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 6,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("んに"),
|
||||
Type: analysis.Double,
|
||||
Position: 2,
|
||||
Start: 3,
|
||||
End: 9,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("にち"),
|
||||
Type: analysis.Double,
|
||||
Position: 3,
|
||||
Start: 6,
|
||||
End: 12,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("ちは"),
|
||||
Type: analysis.Double,
|
||||
Position: 4,
|
||||
Start: 9,
|
||||
End: 15,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("は世"),
|
||||
Type: analysis.Double,
|
||||
Position: 5,
|
||||
Start: 12,
|
||||
End: 18,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("世界"),
|
||||
Type: analysis.Double,
|
||||
Position: 6,
|
||||
Start: 15,
|
||||
End: 21,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("一二三四五六七八九十"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("一二"),
|
||||
Type: analysis.Double,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 6,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("二三"),
|
||||
Type: analysis.Double,
|
||||
Position: 2,
|
||||
Start: 3,
|
||||
End: 9,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("三四"),
|
||||
Type: analysis.Double,
|
||||
Position: 3,
|
||||
Start: 6,
|
||||
End: 12,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("四五"),
|
||||
Type: analysis.Double,
|
||||
Position: 4,
|
||||
Start: 9,
|
||||
End: 15,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("五六"),
|
||||
Type: analysis.Double,
|
||||
Position: 5,
|
||||
Start: 12,
|
||||
End: 18,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("六七"),
|
||||
Type: analysis.Double,
|
||||
Position: 6,
|
||||
Start: 15,
|
||||
End: 21,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("七八"),
|
||||
Type: analysis.Double,
|
||||
Position: 7,
|
||||
Start: 18,
|
||||
End: 24,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("八九"),
|
||||
Type: analysis.Double,
|
||||
Position: 8,
|
||||
Start: 21,
|
||||
End: 27,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("九十"),
|
||||
Type: analysis.Double,
|
||||
Position: 9,
|
||||
Start: 24,
|
||||
End: 30,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("一 二三四 五六七八九 十"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("一"),
|
||||
Type: analysis.Single,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 3,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("二三"),
|
||||
Type: analysis.Double,
|
||||
Position: 2,
|
||||
Start: 4,
|
||||
End: 10,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("三四"),
|
||||
Type: analysis.Double,
|
||||
Position: 3,
|
||||
Start: 7,
|
||||
End: 13,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("五六"),
|
||||
Type: analysis.Double,
|
||||
Position: 5,
|
||||
Start: 14,
|
||||
End: 20,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("六七"),
|
||||
Type: analysis.Double,
|
||||
Position: 6,
|
||||
Start: 17,
|
||||
End: 23,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("七八"),
|
||||
Type: analysis.Double,
|
||||
Position: 7,
|
||||
Start: 20,
|
||||
End: 26,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("八九"),
|
||||
Type: analysis.Double,
|
||||
Position: 8,
|
||||
Start: 23,
|
||||
End: 29,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("十"),
|
||||
Type: analysis.Single,
|
||||
Position: 10,
|
||||
Start: 30,
|
||||
End: 33,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("abc defgh ijklmn opqrstu vwxy z"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("abc"),
|
||||
Type: analysis.AlphaNumeric,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 3,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("defgh"),
|
||||
Type: analysis.AlphaNumeric,
|
||||
Position: 2,
|
||||
Start: 4,
|
||||
End: 9,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("ijklmn"),
|
||||
Type: analysis.AlphaNumeric,
|
||||
Position: 3,
|
||||
Start: 10,
|
||||
End: 16,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("opqrstu"),
|
||||
Type: analysis.AlphaNumeric,
|
||||
Position: 4,
|
||||
Start: 17,
|
||||
End: 24,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("vwxy"),
|
||||
Type: analysis.AlphaNumeric,
|
||||
Position: 5,
|
||||
Start: 25,
|
||||
End: 29,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("z"),
|
||||
Type: analysis.AlphaNumeric,
|
||||
Position: 6,
|
||||
Start: 30,
|
||||
End: 31,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("あい"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("あい"),
|
||||
Type: analysis.Double,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 6,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("あい "),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("あい"),
|
||||
Type: analysis.Double,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 6,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("test"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("test"),
|
||||
Type: analysis.AlphaNumeric,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 4,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("test "),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("test"),
|
||||
Type: analysis.AlphaNumeric,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 4,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("あいtest"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("あい"),
|
||||
Type: analysis.Double,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 6,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("test"),
|
||||
Type: analysis.AlphaNumeric,
|
||||
Position: 3,
|
||||
Start: 6,
|
||||
End: 10,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("testあい "),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("test"),
|
||||
Type: analysis.AlphaNumeric,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 4,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("あい"),
|
||||
Type: analysis.Double,
|
||||
Position: 2,
|
||||
Start: 4,
|
||||
End: 10,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("あいうえおabcかきくけこ"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("あい"),
|
||||
Type: analysis.Double,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 6,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("いう"),
|
||||
Type: analysis.Double,
|
||||
Position: 2,
|
||||
Start: 3,
|
||||
End: 9,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("うえ"),
|
||||
Type: analysis.Double,
|
||||
Position: 3,
|
||||
Start: 6,
|
||||
End: 12,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("えお"),
|
||||
Type: analysis.Double,
|
||||
Position: 4,
|
||||
Start: 9,
|
||||
End: 15,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("abc"),
|
||||
Type: analysis.AlphaNumeric,
|
||||
Position: 6,
|
||||
Start: 15,
|
||||
End: 18,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("かき"),
|
||||
Type: analysis.Double,
|
||||
Position: 7,
|
||||
Start: 18,
|
||||
End: 24,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("きく"),
|
||||
Type: analysis.Double,
|
||||
Position: 8,
|
||||
Start: 21,
|
||||
End: 27,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("くけ"),
|
||||
Type: analysis.Double,
|
||||
Position: 9,
|
||||
Start: 24,
|
||||
End: 30,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("けこ"),
|
||||
Type: analysis.Double,
|
||||
Position: 10,
|
||||
Start: 27,
|
||||
End: 33,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("あいうえおabんcかきくけ こ"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("あい"),
|
||||
Type: analysis.Double,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 6,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("いう"),
|
||||
Type: analysis.Double,
|
||||
Position: 2,
|
||||
Start: 3,
|
||||
End: 9,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("うえ"),
|
||||
Type: analysis.Double,
|
||||
Position: 3,
|
||||
Start: 6,
|
||||
End: 12,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("えお"),
|
||||
Type: analysis.Double,
|
||||
Position: 4,
|
||||
Start: 9,
|
||||
End: 15,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("ab"),
|
||||
Type: analysis.AlphaNumeric,
|
||||
Position: 6,
|
||||
Start: 15,
|
||||
End: 17,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("ん"),
|
||||
Type: analysis.Single,
|
||||
Position: 7,
|
||||
Start: 17,
|
||||
End: 20,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("c"),
|
||||
Type: analysis.AlphaNumeric,
|
||||
Position: 8,
|
||||
Start: 20,
|
||||
End: 21,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("かき"),
|
||||
Type: analysis.Double,
|
||||
Position: 9,
|
||||
Start: 21,
|
||||
End: 27,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("きく"),
|
||||
Type: analysis.Double,
|
||||
Position: 10,
|
||||
Start: 24,
|
||||
End: 30,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("くけ"),
|
||||
Type: analysis.Double,
|
||||
Position: 11,
|
||||
Start: 27,
|
||||
End: 33,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("こ"),
|
||||
Type: analysis.Single,
|
||||
Position: 13,
|
||||
Start: 34,
|
||||
End: 37,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("一 روبرت موير"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("一"),
|
||||
Type: analysis.Single,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 3,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("روبرت"),
|
||||
Type: analysis.AlphaNumeric,
|
||||
Position: 2,
|
||||
Start: 4,
|
||||
End: 14,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("موير"),
|
||||
Type: analysis.AlphaNumeric,
|
||||
Position: 3,
|
||||
Start: 15,
|
||||
End: 23,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("一 رُوبرت موير"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("一"),
|
||||
Type: analysis.Single,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 3,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("رُوبرت"),
|
||||
Type: analysis.AlphaNumeric,
|
||||
Position: 2,
|
||||
Start: 4,
|
||||
End: 16,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("موير"),
|
||||
Type: analysis.AlphaNumeric,
|
||||
Position: 3,
|
||||
Start: 17,
|
||||
End: 25,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("𩬅艱鍟䇹愯瀛"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("𩬅艱"),
|
||||
Type: analysis.Double,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 7,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("艱鍟"),
|
||||
Type: analysis.Double,
|
||||
Position: 2,
|
||||
Start: 4,
|
||||
End: 10,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("鍟䇹"),
|
||||
Type: analysis.Double,
|
||||
Position: 3,
|
||||
Start: 7,
|
||||
End: 13,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("䇹愯"),
|
||||
Type: analysis.Double,
|
||||
Position: 4,
|
||||
Start: 10,
|
||||
End: 16,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("愯瀛"),
|
||||
Type: analysis.Double,
|
||||
Position: 5,
|
||||
Start: 13,
|
||||
End: 19,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("一"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("一"),
|
||||
Type: analysis.Single,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 3,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("一丁丂"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("一丁"),
|
||||
Type: analysis.Double,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 6,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("丁丂"),
|
||||
Type: analysis.Double,
|
||||
Position: 2,
|
||||
Start: 3,
|
||||
End: 9,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
cache := registry.NewCache()
|
||||
for _, test := range tests {
|
||||
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
actual := analyzer.Analyze(test.input)
|
||||
if !reflect.DeepEqual(actual, test.output) {
|
||||
t.Errorf("expected %v, got %v", test.output, actual)
|
||||
}
|
||||
}
|
||||
}
|
166
vendor/github.com/blevesearch/bleve/analysis/language/cjk/cjk_bigram.go
generated
vendored
166
vendor/github.com/blevesearch/bleve/analysis/language/cjk/cjk_bigram.go
generated
vendored
@ -1,166 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package cjk
|
||||
|
||||
import (
|
||||
"container/ring"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const BigramName = "cjk_bigram"
|
||||
|
||||
type CJKBigramFilter struct {
|
||||
outputUnigram bool
|
||||
}
|
||||
|
||||
func NewCJKBigramFilter(outputUnigram bool) *CJKBigramFilter {
|
||||
return &CJKBigramFilter{
|
||||
outputUnigram: outputUnigram,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *CJKBigramFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
r := ring.New(2)
|
||||
itemsInRing := 0
|
||||
|
||||
rv := make(analysis.TokenStream, 0, len(input))
|
||||
|
||||
for _, token := range input {
|
||||
if token.Type == analysis.Ideographic {
|
||||
if itemsInRing > 0 {
|
||||
// if items already buffered
|
||||
// check to see if this is aligned
|
||||
curr := r.Value.(*analysis.Token)
|
||||
if token.Start-curr.End != 0 {
|
||||
// not aligned flush
|
||||
flushToken := s.flush(r, &itemsInRing)
|
||||
if flushToken != nil {
|
||||
rv = append(rv, flushToken)
|
||||
}
|
||||
}
|
||||
}
|
||||
// now we can add this token to the buffer
|
||||
r = r.Next()
|
||||
r.Value = token
|
||||
if itemsInRing < 2 {
|
||||
itemsInRing++
|
||||
}
|
||||
if itemsInRing > 1 && s.outputUnigram {
|
||||
unigram := s.buildUnigram(r, &itemsInRing)
|
||||
if unigram != nil {
|
||||
rv = append(rv, unigram)
|
||||
}
|
||||
}
|
||||
bigramToken := s.outputBigram(r, &itemsInRing)
|
||||
if bigramToken != nil {
|
||||
rv = append(rv, bigramToken)
|
||||
}
|
||||
} else {
|
||||
// flush anything already buffered
|
||||
flushToken := s.flush(r, &itemsInRing)
|
||||
if flushToken != nil {
|
||||
rv = append(rv, flushToken)
|
||||
}
|
||||
// output this token as is
|
||||
rv = append(rv, token)
|
||||
}
|
||||
}
|
||||
|
||||
// deal with possible trailing unigram
|
||||
if itemsInRing == 1 || s.outputUnigram {
|
||||
if itemsInRing == 2 {
|
||||
r = r.Next()
|
||||
}
|
||||
unigram := s.buildUnigram(r, &itemsInRing)
|
||||
if unigram != nil {
|
||||
rv = append(rv, unigram)
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s *CJKBigramFilter) flush(r *ring.Ring, itemsInRing *int) *analysis.Token {
|
||||
var rv *analysis.Token
|
||||
if *itemsInRing == 1 {
|
||||
rv = s.buildUnigram(r, itemsInRing)
|
||||
}
|
||||
r.Value = nil
|
||||
*itemsInRing = 0
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s *CJKBigramFilter) outputBigram(r *ring.Ring, itemsInRing *int) *analysis.Token {
|
||||
if *itemsInRing == 2 {
|
||||
thisShingleRing := r.Move(-1)
|
||||
shingledBytes := make([]byte, 0)
|
||||
|
||||
// do first token
|
||||
prev := thisShingleRing.Value.(*analysis.Token)
|
||||
shingledBytes = append(shingledBytes, prev.Term...)
|
||||
|
||||
// do second token
|
||||
thisShingleRing = thisShingleRing.Next()
|
||||
curr := thisShingleRing.Value.(*analysis.Token)
|
||||
shingledBytes = append(shingledBytes, curr.Term...)
|
||||
|
||||
token := analysis.Token{
|
||||
Type: analysis.Double,
|
||||
Term: shingledBytes,
|
||||
Position: prev.Position,
|
||||
Start: prev.Start,
|
||||
End: curr.End,
|
||||
}
|
||||
return &token
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CJKBigramFilter) buildUnigram(r *ring.Ring, itemsInRing *int) *analysis.Token {
|
||||
if *itemsInRing == 2 {
|
||||
thisShingleRing := r.Move(-1)
|
||||
// do first token
|
||||
prev := thisShingleRing.Value.(*analysis.Token)
|
||||
token := analysis.Token{
|
||||
Type: analysis.Single,
|
||||
Term: prev.Term,
|
||||
Position: prev.Position,
|
||||
Start: prev.Start,
|
||||
End: prev.End,
|
||||
}
|
||||
return &token
|
||||
} else if *itemsInRing == 1 {
|
||||
// do first token
|
||||
prev := r.Value.(*analysis.Token)
|
||||
token := analysis.Token{
|
||||
Type: analysis.Single,
|
||||
Term: prev.Term,
|
||||
Position: prev.Position,
|
||||
Start: prev.Start,
|
||||
End: prev.End,
|
||||
}
|
||||
return &token
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func CJKBigramFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
outputUnigram := false
|
||||
outVal, ok := config["output_unigram"].(bool)
|
||||
if ok {
|
||||
outputUnigram = outVal
|
||||
}
|
||||
return NewCJKBigramFilter(outputUnigram), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(BigramName, CJKBigramFilterConstructor)
|
||||
}
|
420
vendor/github.com/blevesearch/bleve/analysis/language/cjk/cjk_bigram_test.go
generated
vendored
420
vendor/github.com/blevesearch/bleve/analysis/language/cjk/cjk_bigram_test.go
generated
vendored
@ -1,420 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package cjk
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
)
|
||||
|
||||
func TestCJKBigramFilter(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
outputUnigram bool
|
||||
input analysis.TokenStream
|
||||
output analysis.TokenStream
|
||||
}{
|
||||
{
|
||||
outputUnigram: false,
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("こ"),
|
||||
Type: analysis.Ideographic,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 3,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("ん"),
|
||||
Type: analysis.Ideographic,
|
||||
Position: 2,
|
||||
Start: 5,
|
||||
End: 7,
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("こ"),
|
||||
Type: analysis.Single,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 3,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("ん"),
|
||||
Type: analysis.Single,
|
||||
Position: 2,
|
||||
Start: 5,
|
||||
End: 7,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
outputUnigram: false,
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("こ"),
|
||||
Type: analysis.Ideographic,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 3,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("ん"),
|
||||
Type: analysis.Ideographic,
|
||||
Position: 2,
|
||||
Start: 3,
|
||||
End: 6,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("に"),
|
||||
Type: analysis.Ideographic,
|
||||
Position: 3,
|
||||
Start: 6,
|
||||
End: 9,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("ち"),
|
||||
Type: analysis.Ideographic,
|
||||
Position: 4,
|
||||
Start: 9,
|
||||
End: 12,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("は"),
|
||||
Type: analysis.Ideographic,
|
||||
Position: 5,
|
||||
Start: 12,
|
||||
End: 15,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("世"),
|
||||
Type: analysis.Ideographic,
|
||||
Position: 6,
|
||||
Start: 15,
|
||||
End: 18,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("界"),
|
||||
Type: analysis.Ideographic,
|
||||
Position: 7,
|
||||
Start: 18,
|
||||
End: 21,
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("こん"),
|
||||
Type: analysis.Double,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 6,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("んに"),
|
||||
Type: analysis.Double,
|
||||
Position: 2,
|
||||
Start: 3,
|
||||
End: 9,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("にち"),
|
||||
Type: analysis.Double,
|
||||
Position: 3,
|
||||
Start: 6,
|
||||
End: 12,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("ちは"),
|
||||
Type: analysis.Double,
|
||||
Position: 4,
|
||||
Start: 9,
|
||||
End: 15,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("は世"),
|
||||
Type: analysis.Double,
|
||||
Position: 5,
|
||||
Start: 12,
|
||||
End: 18,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("世界"),
|
||||
Type: analysis.Double,
|
||||
Position: 6,
|
||||
Start: 15,
|
||||
End: 21,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
outputUnigram: true,
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("こ"),
|
||||
Type: analysis.Ideographic,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 3,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("ん"),
|
||||
Type: analysis.Ideographic,
|
||||
Position: 2,
|
||||
Start: 3,
|
||||
End: 6,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("に"),
|
||||
Type: analysis.Ideographic,
|
||||
Position: 3,
|
||||
Start: 6,
|
||||
End: 9,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("ち"),
|
||||
Type: analysis.Ideographic,
|
||||
Position: 4,
|
||||
Start: 9,
|
||||
End: 12,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("は"),
|
||||
Type: analysis.Ideographic,
|
||||
Position: 5,
|
||||
Start: 12,
|
||||
End: 15,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("世"),
|
||||
Type: analysis.Ideographic,
|
||||
Position: 6,
|
||||
Start: 15,
|
||||
End: 18,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("界"),
|
||||
Type: analysis.Ideographic,
|
||||
Position: 7,
|
||||
Start: 18,
|
||||
End: 21,
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("こ"),
|
||||
Type: analysis.Single,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 3,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("こん"),
|
||||
Type: analysis.Double,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 6,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("ん"),
|
||||
Type: analysis.Single,
|
||||
Position: 2,
|
||||
Start: 3,
|
||||
End: 6,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("んに"),
|
||||
Type: analysis.Double,
|
||||
Position: 2,
|
||||
Start: 3,
|
||||
End: 9,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("に"),
|
||||
Type: analysis.Single,
|
||||
Position: 3,
|
||||
Start: 6,
|
||||
End: 9,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("にち"),
|
||||
Type: analysis.Double,
|
||||
Position: 3,
|
||||
Start: 6,
|
||||
End: 12,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("ち"),
|
||||
Type: analysis.Single,
|
||||
Position: 4,
|
||||
Start: 9,
|
||||
End: 12,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("ちは"),
|
||||
Type: analysis.Double,
|
||||
Position: 4,
|
||||
Start: 9,
|
||||
End: 15,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("は"),
|
||||
Type: analysis.Single,
|
||||
Position: 5,
|
||||
Start: 12,
|
||||
End: 15,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("は世"),
|
||||
Type: analysis.Double,
|
||||
Position: 5,
|
||||
Start: 12,
|
||||
End: 18,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("世"),
|
||||
Type: analysis.Single,
|
||||
Position: 6,
|
||||
Start: 15,
|
||||
End: 18,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("世界"),
|
||||
Type: analysis.Double,
|
||||
Position: 6,
|
||||
Start: 15,
|
||||
End: 21,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("界"),
|
||||
Type: analysis.Single,
|
||||
Position: 7,
|
||||
Start: 18,
|
||||
End: 21,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
outputUnigram: false,
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("こ"),
|
||||
Type: analysis.Ideographic,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 3,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("ん"),
|
||||
Type: analysis.Ideographic,
|
||||
Position: 2,
|
||||
Start: 3,
|
||||
End: 6,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("に"),
|
||||
Type: analysis.Ideographic,
|
||||
Position: 3,
|
||||
Start: 6,
|
||||
End: 9,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("ち"),
|
||||
Type: analysis.Ideographic,
|
||||
Position: 4,
|
||||
Start: 9,
|
||||
End: 12,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("は"),
|
||||
Type: analysis.Ideographic,
|
||||
Position: 5,
|
||||
Start: 12,
|
||||
End: 15,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("cat"),
|
||||
Type: analysis.AlphaNumeric,
|
||||
Position: 6,
|
||||
Start: 12,
|
||||
End: 15,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("世"),
|
||||
Type: analysis.Ideographic,
|
||||
Position: 7,
|
||||
Start: 18,
|
||||
End: 21,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("界"),
|
||||
Type: analysis.Ideographic,
|
||||
Position: 8,
|
||||
Start: 21,
|
||||
End: 24,
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("こん"),
|
||||
Type: analysis.Double,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 6,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("んに"),
|
||||
Type: analysis.Double,
|
||||
Position: 2,
|
||||
Start: 3,
|
||||
End: 9,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("にち"),
|
||||
Type: analysis.Double,
|
||||
Position: 3,
|
||||
Start: 6,
|
||||
End: 12,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("ちは"),
|
||||
Type: analysis.Double,
|
||||
Position: 4,
|
||||
Start: 9,
|
||||
End: 15,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("cat"),
|
||||
Type: analysis.AlphaNumeric,
|
||||
Position: 6,
|
||||
Start: 12,
|
||||
End: 15,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("世界"),
|
||||
Type: analysis.Double,
|
||||
Position: 7,
|
||||
Start: 18,
|
||||
End: 24,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
cjkBigramFilter := NewCJKBigramFilter(test.outputUnigram)
|
||||
actual := cjkBigramFilter.Filter(test.input)
|
||||
if !reflect.DeepEqual(actual, test.output) {
|
||||
t.Errorf("expected %s, got %s", test.output, actual)
|
||||
}
|
||||
}
|
||||
}
|
58
vendor/github.com/blevesearch/bleve/analysis/language/ckb/analyzer_ckb.go
generated
vendored
58
vendor/github.com/blevesearch/bleve/analysis/language/ckb/analyzer_ckb.go
generated
vendored
@ -1,58 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
// +build icu full
|
||||
|
||||
package ckb
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter"
|
||||
"github.com/blevesearch/bleve/analysis/tokenizers/icu"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const AnalyzerName = "ckb"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
icuTokenizer, err := cache.TokenizerNamed(icu.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
normCkbFilter, err := cache.TokenFilterNamed(NormalizeName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stopCkbFilter, err := cache.TokenFilterNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stemmerCkbFilter, err := cache.TokenFilterNamed(StemmerName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: icuTokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
normCkbFilter,
|
||||
toLowerFilter,
|
||||
stopCkbFilter,
|
||||
stemmerCkbFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
||||
}
|
74
vendor/github.com/blevesearch/bleve/analysis/language/ckb/analyzer_ckb_test.go
generated
vendored
74
vendor/github.com/blevesearch/bleve/analysis/language/ckb/analyzer_ckb_test.go
generated
vendored
@ -1,74 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
// +build icu full
|
||||
|
||||
package ckb
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
func TestSoraniAnalyzer(t *testing.T) {
|
||||
tests := []struct {
|
||||
input []byte
|
||||
output analysis.TokenStream
|
||||
}{
|
||||
// stop word removal
|
||||
{
|
||||
input: []byte("ئەم پیاوە"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("پیاو"),
|
||||
Position: 2,
|
||||
Start: 7,
|
||||
End: 17,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("پیاوە"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("پیاو"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 10,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("پیاو"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("پیاو"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 8,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
cache := registry.NewCache()
|
||||
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for _, test := range tests {
|
||||
actual := analyzer.Analyze(test.input)
|
||||
if !reflect.DeepEqual(actual, test.output) {
|
||||
t.Errorf("expected %v, got %v", test.output, actual)
|
||||
}
|
||||
}
|
||||
}
|
113
vendor/github.com/blevesearch/bleve/analysis/language/ckb/sorani_normalize.go
generated
vendored
113
vendor/github.com/blevesearch/bleve/analysis/language/ckb/sorani_normalize.go
generated
vendored
@ -1,113 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package ckb
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"unicode"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const NormalizeName = "normalize_ckb"
|
||||
|
||||
const (
|
||||
Yeh = '\u064A'
|
||||
DotlessYeh = '\u0649'
|
||||
FarsiYeh = '\u06CC'
|
||||
|
||||
Kaf = '\u0643'
|
||||
Keheh = '\u06A9'
|
||||
|
||||
Heh = '\u0647'
|
||||
Ae = '\u06D5'
|
||||
Zwnj = '\u200C'
|
||||
HehDoachashmee = '\u06BE'
|
||||
TehMarbuta = '\u0629'
|
||||
|
||||
Reh = '\u0631'
|
||||
Rreh = '\u0695'
|
||||
RrehAbove = '\u0692'
|
||||
|
||||
Tatweel = '\u0640'
|
||||
Fathatan = '\u064B'
|
||||
Dammatan = '\u064C'
|
||||
Kasratan = '\u064D'
|
||||
Fatha = '\u064E'
|
||||
Damma = '\u064F'
|
||||
Kasra = '\u0650'
|
||||
Shadda = '\u0651'
|
||||
Sukun = '\u0652'
|
||||
)
|
||||
|
||||
type SoraniNormalizeFilter struct {
|
||||
}
|
||||
|
||||
func NewSoraniNormalizeFilter() *SoraniNormalizeFilter {
|
||||
return &SoraniNormalizeFilter{}
|
||||
}
|
||||
|
||||
func (s *SoraniNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
for _, token := range input {
|
||||
term := normalize(token.Term)
|
||||
token.Term = term
|
||||
}
|
||||
return input
|
||||
}
|
||||
|
||||
func normalize(input []byte) []byte {
|
||||
runes := bytes.Runes(input)
|
||||
for i := 0; i < len(runes); i++ {
|
||||
switch runes[i] {
|
||||
case Yeh, DotlessYeh:
|
||||
runes[i] = FarsiYeh
|
||||
case Kaf:
|
||||
runes[i] = Keheh
|
||||
case Zwnj:
|
||||
if i > 0 && runes[i-1] == Heh {
|
||||
runes[i-1] = Ae
|
||||
}
|
||||
runes = analysis.DeleteRune(runes, i)
|
||||
i--
|
||||
case Heh:
|
||||
if i == len(runes)-1 {
|
||||
runes[i] = Ae
|
||||
}
|
||||
case TehMarbuta:
|
||||
runes[i] = Ae
|
||||
case HehDoachashmee:
|
||||
runes[i] = Heh
|
||||
case Reh:
|
||||
if i == 0 {
|
||||
runes[i] = Rreh
|
||||
}
|
||||
case RrehAbove:
|
||||
runes[i] = Rreh
|
||||
case Tatweel, Kasratan, Dammatan, Fathatan, Fatha, Damma, Kasra, Shadda, Sukun:
|
||||
runes = analysis.DeleteRune(runes, i)
|
||||
i--
|
||||
default:
|
||||
if unicode.In(runes[i], unicode.Cf) {
|
||||
runes = analysis.DeleteRune(runes, i)
|
||||
i--
|
||||
}
|
||||
}
|
||||
}
|
||||
return analysis.BuildTermFromRunes(runes)
|
||||
}
|
||||
|
||||
func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewSoraniNormalizeFilter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
|
||||
}
|
318
vendor/github.com/blevesearch/bleve/analysis/language/ckb/sorani_normalize_test.go
generated
vendored
318
vendor/github.com/blevesearch/bleve/analysis/language/ckb/sorani_normalize_test.go
generated
vendored
@ -1,318 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package ckb
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
)
|
||||
|
||||
func TestSoraniNormalizeFilter(t *testing.T) {
|
||||
tests := []struct {
|
||||
input analysis.TokenStream
|
||||
output analysis.TokenStream
|
||||
}{
|
||||
// test Y
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u064A"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u06CC"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u0649"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u06CC"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u06CC"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u06CC"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// test K
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u0643"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u06A9"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u06A9"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u06A9"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// test H
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u0647\u200C"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u06D5"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u0647\u200C\u06A9"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u06D5\u06A9"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u06BE"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u0647"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u0629"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u06D5"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// test final H
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u0647\u0647\u0647"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u0647\u0647\u06D5"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// test RR
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u0692"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u0695"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// test initial RR
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u0631\u0631\u0631"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u0695\u0631\u0631"),
|
||||
},
|
||||
},
|
||||
},
|
||||
// test remove
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u0640"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte(""),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u064B"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte(""),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u064C"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte(""),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u064D"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte(""),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u064E"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte(""),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u064F"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte(""),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u0650"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte(""),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u0651"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte(""),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u0652"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte(""),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("\u200C"),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte(""),
|
||||
},
|
||||
},
|
||||
},
|
||||
// empty
|
||||
{
|
||||
input: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte(""),
|
||||
},
|
||||
},
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte(""),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
soraniNormalizeFilter := NewSoraniNormalizeFilter()
|
||||
for _, test := range tests {
|
||||
actual := soraniNormalizeFilter.Filter(test.input)
|
||||
if !reflect.DeepEqual(actual, test.output) {
|
||||
t.Errorf("expected %#v, got %#v", test.output, actual)
|
||||
t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
|
||||
}
|
||||
}
|
||||
}
|
143
vendor/github.com/blevesearch/bleve/analysis/language/ckb/sorani_stemmer_filter.go
generated
vendored
143
vendor/github.com/blevesearch/bleve/analysis/language/ckb/sorani_stemmer_filter.go
generated
vendored
@ -1,143 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package ckb
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const StemmerName = "stemmer_ckb"
|
||||
|
||||
type SoraniStemmerFilter struct {
|
||||
}
|
||||
|
||||
func NewSoraniStemmerFilter() *SoraniStemmerFilter {
|
||||
return &SoraniStemmerFilter{}
|
||||
}
|
||||
|
||||
func (s *SoraniStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
for _, token := range input {
|
||||
// if not protected keyword, stem it
|
||||
if !token.KeyWord {
|
||||
stemmed := stem(token.Term)
|
||||
token.Term = stemmed
|
||||
}
|
||||
}
|
||||
return input
|
||||
}
|
||||
|
||||
func stem(input []byte) []byte {
|
||||
inputLen := utf8.RuneCount(input)
|
||||
|
||||
// postposition
|
||||
if inputLen > 5 && bytes.HasSuffix(input, []byte("دا")) {
|
||||
input = truncateRunes(input, 2)
|
||||
inputLen = utf8.RuneCount(input)
|
||||
} else if inputLen > 4 && bytes.HasSuffix(input, []byte("نا")) {
|
||||
input = truncateRunes(input, 1)
|
||||
inputLen = utf8.RuneCount(input)
|
||||
} else if inputLen > 6 && bytes.HasSuffix(input, []byte("ەوە")) {
|
||||
input = truncateRunes(input, 3)
|
||||
inputLen = utf8.RuneCount(input)
|
||||
}
|
||||
|
||||
// possessive pronoun
|
||||
if inputLen > 6 &&
|
||||
(bytes.HasSuffix(input, []byte("مان")) ||
|
||||
bytes.HasSuffix(input, []byte("یان")) ||
|
||||
bytes.HasSuffix(input, []byte("تان"))) {
|
||||
input = truncateRunes(input, 3)
|
||||
inputLen = utf8.RuneCount(input)
|
||||
}
|
||||
|
||||
// indefinite singular ezafe
|
||||
if inputLen > 6 && bytes.HasSuffix(input, []byte("ێکی")) {
|
||||
return truncateRunes(input, 3)
|
||||
} else if inputLen > 7 && bytes.HasSuffix(input, []byte("یەکی")) {
|
||||
return truncateRunes(input, 4)
|
||||
}
|
||||
|
||||
if inputLen > 5 && bytes.HasSuffix(input, []byte("ێک")) {
|
||||
// indefinite singular
|
||||
return truncateRunes(input, 2)
|
||||
} else if inputLen > 6 && bytes.HasSuffix(input, []byte("یەک")) {
|
||||
// indefinite singular
|
||||
return truncateRunes(input, 3)
|
||||
} else if inputLen > 6 && bytes.HasSuffix(input, []byte("ەکە")) {
|
||||
// definite singular
|
||||
return truncateRunes(input, 3)
|
||||
} else if inputLen > 5 && bytes.HasSuffix(input, []byte("کە")) {
|
||||
// definite singular
|
||||
return truncateRunes(input, 2)
|
||||
} else if inputLen > 7 && bytes.HasSuffix(input, []byte("ەکان")) {
|
||||
// definite plural
|
||||
return truncateRunes(input, 4)
|
||||
} else if inputLen > 6 && bytes.HasSuffix(input, []byte("کان")) {
|
||||
// definite plural
|
||||
return truncateRunes(input, 3)
|
||||
} else if inputLen > 7 && bytes.HasSuffix(input, []byte("یانی")) {
|
||||
// indefinite plural ezafe
|
||||
return truncateRunes(input, 4)
|
||||
} else if inputLen > 6 && bytes.HasSuffix(input, []byte("انی")) {
|
||||
// indefinite plural ezafe
|
||||
return truncateRunes(input, 3)
|
||||
} else if inputLen > 6 && bytes.HasSuffix(input, []byte("یان")) {
|
||||
// indefinite plural
|
||||
return truncateRunes(input, 3)
|
||||
} else if inputLen > 5 && bytes.HasSuffix(input, []byte("ان")) {
|
||||
// indefinite plural
|
||||
return truncateRunes(input, 2)
|
||||
} else if inputLen > 7 && bytes.HasSuffix(input, []byte("یانە")) {
|
||||
// demonstrative plural
|
||||
return truncateRunes(input, 4)
|
||||
} else if inputLen > 6 && bytes.HasSuffix(input, []byte("انە")) {
|
||||
// demonstrative plural
|
||||
return truncateRunes(input, 3)
|
||||
} else if inputLen > 5 && (bytes.HasSuffix(input, []byte("ایە")) || bytes.HasSuffix(input, []byte("ەیە"))) {
|
||||
// demonstrative singular
|
||||
return truncateRunes(input, 2)
|
||||
} else if inputLen > 4 && bytes.HasSuffix(input, []byte("ە")) {
|
||||
// demonstrative singular
|
||||
return truncateRunes(input, 1)
|
||||
} else if inputLen > 4 && bytes.HasSuffix(input, []byte("ی")) {
|
||||
// absolute singular ezafe
|
||||
return truncateRunes(input, 1)
|
||||
}
|
||||
return input
|
||||
}
|
||||
|
||||
func truncateRunes(input []byte, num int) []byte {
|
||||
runes := bytes.Runes(input)
|
||||
runes = runes[:len(runes)-num]
|
||||
out := buildTermFromRunes(runes)
|
||||
return out
|
||||
}
|
||||
|
||||
func buildTermFromRunes(runes []rune) []byte {
|
||||
rv := make([]byte, 0, len(runes)*4)
|
||||
for _, r := range runes {
|
||||
runeBytes := make([]byte, utf8.RuneLen(r))
|
||||
utf8.EncodeRune(runeBytes, r)
|
||||
rv = append(rv, runeBytes...)
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewSoraniStemmerFilter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor)
|
||||
}
|
294
vendor/github.com/blevesearch/bleve/analysis/language/ckb/sorani_stemmer_filter_test.go
generated
vendored
294
vendor/github.com/blevesearch/bleve/analysis/language/ckb/sorani_stemmer_filter_test.go
generated
vendored
@ -1,294 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package ckb
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/tokenizers/single_token"
|
||||
)
|
||||
|
||||
func TestSoraniStemmerFilter(t *testing.T) {
|
||||
|
||||
// in order to match the lucene tests
|
||||
// we will test with an analyzer, not just the stemmer
|
||||
analyzer := analysis.Analyzer{
|
||||
Tokenizer: single_token.NewSingleTokenTokenizer(),
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
NewSoraniNormalizeFilter(),
|
||||
NewSoraniStemmerFilter(),
|
||||
},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
input []byte
|
||||
output analysis.TokenStream
|
||||
}{
|
||||
{ // -ek
|
||||
input: []byte("پیاوێک"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("پیاو"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 12,
|
||||
},
|
||||
},
|
||||
},
|
||||
{ // -yek
|
||||
input: []byte("دەرگایەک"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("دەرگا"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 16,
|
||||
},
|
||||
},
|
||||
},
|
||||
{ // -aka
|
||||
input: []byte("پیاوەكە"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("پیاو"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 14,
|
||||
},
|
||||
},
|
||||
},
|
||||
{ // -ka
|
||||
input: []byte("دەرگاكە"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("دەرگا"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 14,
|
||||
},
|
||||
},
|
||||
},
|
||||
{ // -a
|
||||
input: []byte("کتاویە"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("کتاوی"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 12,
|
||||
},
|
||||
},
|
||||
},
|
||||
{ // -ya
|
||||
input: []byte("دەرگایە"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("دەرگا"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 14,
|
||||
},
|
||||
},
|
||||
},
|
||||
{ // -An
|
||||
input: []byte("پیاوان"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("پیاو"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 12,
|
||||
},
|
||||
},
|
||||
},
|
||||
{ // -yAn
|
||||
input: []byte("دەرگایان"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("دەرگا"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 16,
|
||||
},
|
||||
},
|
||||
},
|
||||
{ // -akAn
|
||||
input: []byte("پیاوەکان"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("پیاو"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 16,
|
||||
},
|
||||
},
|
||||
},
|
||||
{ // -kAn
|
||||
input: []byte("دەرگاکان"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("دەرگا"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 16,
|
||||
},
|
||||
},
|
||||
},
|
||||
{ // -Ana
|
||||
input: []byte("پیاوانە"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("پیاو"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 14,
|
||||
},
|
||||
},
|
||||
},
|
||||
{ // -yAna
|
||||
input: []byte("دەرگایانە"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("دەرگا"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 18,
|
||||
},
|
||||
},
|
||||
},
|
||||
{ // Ezafe singular
|
||||
input: []byte("هۆتیلی"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("هۆتیل"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 12,
|
||||
},
|
||||
},
|
||||
},
|
||||
{ // Ezafe indefinite
|
||||
input: []byte("هۆتیلێکی"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("هۆتیل"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 16,
|
||||
},
|
||||
},
|
||||
},
|
||||
{ // Ezafe plural
|
||||
input: []byte("هۆتیلانی"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("هۆتیل"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 16,
|
||||
},
|
||||
},
|
||||
},
|
||||
{ // -awa
|
||||
input: []byte("دوورەوە"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("دوور"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 14,
|
||||
},
|
||||
},
|
||||
},
|
||||
{ // -dA
|
||||
input: []byte("نیوەشەودا"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("نیوەشەو"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 18,
|
||||
},
|
||||
},
|
||||
},
|
||||
{ // -A
|
||||
input: []byte("سۆرانا"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("سۆران"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 12,
|
||||
},
|
||||
},
|
||||
},
|
||||
{ // -mAn
|
||||
input: []byte("پارەمان"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("پارە"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 14,
|
||||
},
|
||||
},
|
||||
},
|
||||
{ // -tAn
|
||||
input: []byte("پارەتان"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("پارە"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 14,
|
||||
},
|
||||
},
|
||||
},
|
||||
{ // -yAn
|
||||
input: []byte("پارەیان"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("پارە"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 14,
|
||||
},
|
||||
},
|
||||
},
|
||||
{ // empty
|
||||
input: []byte(""),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte(""),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
actual := analyzer.Analyze(test.input)
|
||||
if !reflect.DeepEqual(actual, test.output) {
|
||||
t.Errorf("for input %s(% x)", test.input, test.input)
|
||||
t.Errorf("\texpected:")
|
||||
for _, token := range test.output {
|
||||
t.Errorf("\t\t%v %s(% x)", token, token.Term, token.Term)
|
||||
}
|
||||
t.Errorf("\tactual:")
|
||||
for _, token := range actual {
|
||||
t.Errorf("\t\t%v %s(% x)", token, token.Term, token.Term)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
28
vendor/github.com/blevesearch/bleve/analysis/language/ckb/stop_filter_ckb.go
generated
vendored
28
vendor/github.com/blevesearch/bleve/analysis/language/ckb/stop_filter_ckb.go
generated
vendored
@ -1,28 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package ckb
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
160
vendor/github.com/blevesearch/bleve/analysis/language/ckb/stop_words_ckb.go
generated
vendored
160
vendor/github.com/blevesearch/bleve/analysis/language/ckb/stop_words_ckb.go
generated
vendored
@ -1,160 +0,0 @@
|
||||
package ckb
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_ckb"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
|
||||
// ` was changed to ' to allow for literal string
|
||||
|
||||
var SoraniStopWords = []byte(`# set of kurdish stopwords
|
||||
# note these have been normalized with our scheme (e represented with U+06D5, etc)
|
||||
# constructed from:
|
||||
# * Fig 5 of "Building A Test Collection For Sorani Kurdish" (Esmaili et al)
|
||||
# * "Sorani Kurdish: A Reference Grammar with selected readings" (Thackston)
|
||||
# * Corpus-based analysis of 77M word Sorani collection: wikipedia, news, blogs, etc
|
||||
|
||||
# and
|
||||
و
|
||||
# which
|
||||
کە
|
||||
# of
|
||||
ی
|
||||
# made/did
|
||||
کرد
|
||||
# that/which
|
||||
ئەوەی
|
||||
# on/head
|
||||
سەر
|
||||
# two
|
||||
دوو
|
||||
# also
|
||||
هەروەها
|
||||
# from/that
|
||||
لەو
|
||||
# makes/does
|
||||
دەکات
|
||||
# some
|
||||
چەند
|
||||
# every
|
||||
هەر
|
||||
|
||||
# demonstratives
|
||||
# that
|
||||
ئەو
|
||||
# this
|
||||
ئەم
|
||||
|
||||
# personal pronouns
|
||||
# I
|
||||
من
|
||||
# we
|
||||
ئێمە
|
||||
# you
|
||||
تۆ
|
||||
# you
|
||||
ئێوە
|
||||
# he/she/it
|
||||
ئەو
|
||||
# they
|
||||
ئەوان
|
||||
|
||||
# prepositions
|
||||
# to/with/by
|
||||
بە
|
||||
پێ
|
||||
# without
|
||||
بەبێ
|
||||
# along with/while/during
|
||||
بەدەم
|
||||
# in the opinion of
|
||||
بەلای
|
||||
# according to
|
||||
بەپێی
|
||||
# before
|
||||
بەرلە
|
||||
# in the direction of
|
||||
بەرەوی
|
||||
# in front of/toward
|
||||
بەرەوە
|
||||
# before/in the face of
|
||||
بەردەم
|
||||
# without
|
||||
بێ
|
||||
# except for
|
||||
بێجگە
|
||||
# for
|
||||
بۆ
|
||||
# on/in
|
||||
دە
|
||||
تێ
|
||||
# with
|
||||
دەگەڵ
|
||||
# after
|
||||
دوای
|
||||
# except for/aside from
|
||||
جگە
|
||||
# in/from
|
||||
لە
|
||||
لێ
|
||||
# in front of/before/because of
|
||||
لەبەر
|
||||
# between/among
|
||||
لەبەینی
|
||||
# concerning/about
|
||||
لەبابەت
|
||||
# concerning
|
||||
لەبارەی
|
||||
# instead of
|
||||
لەباتی
|
||||
# beside
|
||||
لەبن
|
||||
# instead of
|
||||
لەبرێتی
|
||||
# behind
|
||||
لەدەم
|
||||
# with/together with
|
||||
لەگەڵ
|
||||
# by
|
||||
لەلایەن
|
||||
# within
|
||||
لەناو
|
||||
# between/among
|
||||
لەنێو
|
||||
# for the sake of
|
||||
لەپێناوی
|
||||
# with respect to
|
||||
لەرەوی
|
||||
# by means of/for
|
||||
لەرێ
|
||||
# for the sake of
|
||||
لەرێگا
|
||||
# on/on top of/according to
|
||||
لەسەر
|
||||
# under
|
||||
لەژێر
|
||||
# between/among
|
||||
ناو
|
||||
# between/among
|
||||
نێوان
|
||||
# after
|
||||
پاش
|
||||
# before
|
||||
پێش
|
||||
# like
|
||||
وەک
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(SoraniStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
28
vendor/github.com/blevesearch/bleve/analysis/language/cs/stop_filter_cs.go
generated
vendored
28
vendor/github.com/blevesearch/bleve/analysis/language/cs/stop_filter_cs.go
generated
vendored
@ -1,28 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package cs
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
196
vendor/github.com/blevesearch/bleve/analysis/language/cs/stop_words_cs.go
generated
vendored
196
vendor/github.com/blevesearch/bleve/analysis/language/cs/stop_words_cs.go
generated
vendored
@ -1,196 +0,0 @@
|
||||
package cs
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_cs"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
|
||||
// ` was changed to ' to allow for literal string
|
||||
|
||||
var CzechStopWords = []byte(`a
|
||||
s
|
||||
k
|
||||
o
|
||||
i
|
||||
u
|
||||
v
|
||||
z
|
||||
dnes
|
||||
cz
|
||||
tímto
|
||||
budeš
|
||||
budem
|
||||
byli
|
||||
jseš
|
||||
můj
|
||||
svým
|
||||
ta
|
||||
tomto
|
||||
tohle
|
||||
tuto
|
||||
tyto
|
||||
jej
|
||||
zda
|
||||
proč
|
||||
máte
|
||||
tato
|
||||
kam
|
||||
tohoto
|
||||
kdo
|
||||
kteří
|
||||
mi
|
||||
nám
|
||||
tom
|
||||
tomuto
|
||||
mít
|
||||
nic
|
||||
proto
|
||||
kterou
|
||||
byla
|
||||
toho
|
||||
protože
|
||||
asi
|
||||
ho
|
||||
naši
|
||||
napište
|
||||
re
|
||||
což
|
||||
tím
|
||||
takže
|
||||
svých
|
||||
její
|
||||
svými
|
||||
jste
|
||||
aj
|
||||
tu
|
||||
tedy
|
||||
teto
|
||||
bylo
|
||||
kde
|
||||
ke
|
||||
pravé
|
||||
ji
|
||||
nad
|
||||
nejsou
|
||||
či
|
||||
pod
|
||||
téma
|
||||
mezi
|
||||
přes
|
||||
ty
|
||||
pak
|
||||
vám
|
||||
ani
|
||||
když
|
||||
však
|
||||
neg
|
||||
jsem
|
||||
tento
|
||||
článku
|
||||
články
|
||||
aby
|
||||
jsme
|
||||
před
|
||||
pta
|
||||
jejich
|
||||
byl
|
||||
ještě
|
||||
až
|
||||
bez
|
||||
také
|
||||
pouze
|
||||
první
|
||||
vaše
|
||||
která
|
||||
nás
|
||||
nový
|
||||
tipy
|
||||
pokud
|
||||
může
|
||||
strana
|
||||
jeho
|
||||
své
|
||||
jiné
|
||||
zprávy
|
||||
nové
|
||||
není
|
||||
vás
|
||||
jen
|
||||
podle
|
||||
zde
|
||||
už
|
||||
být
|
||||
více
|
||||
bude
|
||||
již
|
||||
než
|
||||
který
|
||||
by
|
||||
které
|
||||
co
|
||||
nebo
|
||||
ten
|
||||
tak
|
||||
má
|
||||
při
|
||||
od
|
||||
po
|
||||
jsou
|
||||
jak
|
||||
další
|
||||
ale
|
||||
si
|
||||
se
|
||||
ve
|
||||
to
|
||||
jako
|
||||
za
|
||||
zpět
|
||||
ze
|
||||
do
|
||||
pro
|
||||
je
|
||||
na
|
||||
atd
|
||||
atp
|
||||
jakmile
|
||||
přičemž
|
||||
já
|
||||
on
|
||||
ona
|
||||
ono
|
||||
oni
|
||||
ony
|
||||
my
|
||||
vy
|
||||
jí
|
||||
ji
|
||||
mě
|
||||
mne
|
||||
jemu
|
||||
tomu
|
||||
těm
|
||||
těmu
|
||||
němu
|
||||
němuž
|
||||
jehož
|
||||
jíž
|
||||
jelikož
|
||||
jež
|
||||
jakož
|
||||
načež
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(CzechStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
54
vendor/github.com/blevesearch/bleve/analysis/language/da/analyzer_da.go
generated
vendored
54
vendor/github.com/blevesearch/bleve/analysis/language/da/analyzer_da.go
generated
vendored
@ -1,54 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
// +build libstemmer full
|
||||
// +build icu full
|
||||
|
||||
package da
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter"
|
||||
"github.com/blevesearch/bleve/analysis/tokenizers/icu"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const AnalyzerName = "da"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
icuTokenizer, err := cache.TokenizerNamed(icu.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stopDaFilter, err := cache.TokenFilterNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stemmerDaFilter, err := cache.TokenFilterNamed(StemmerName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: icuTokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
toLowerFilter,
|
||||
stopDaFilter,
|
||||
stemmerDaFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
||||
}
|
69
vendor/github.com/blevesearch/bleve/analysis/language/da/analyzer_da_test.go
generated
vendored
69
vendor/github.com/blevesearch/bleve/analysis/language/da/analyzer_da_test.go
generated
vendored
@ -1,69 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
// +build libstemmer full
|
||||
// +build icu full
|
||||
|
||||
package da
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
func TestDanishAnalyzer(t *testing.T) {
|
||||
tests := []struct {
|
||||
input []byte
|
||||
output analysis.TokenStream
|
||||
}{
|
||||
// stemming
|
||||
{
|
||||
input: []byte("undersøg"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("undersøg"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 9,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("undersøgelse"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("undersøg"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 13,
|
||||
},
|
||||
},
|
||||
},
|
||||
// stop word
|
||||
{
|
||||
input: []byte("på"),
|
||||
output: analysis.TokenStream{},
|
||||
},
|
||||
}
|
||||
|
||||
cache := registry.NewCache()
|
||||
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for _, test := range tests {
|
||||
actual := analyzer.Analyze(test.input)
|
||||
if !reflect.DeepEqual(actual, test.output) {
|
||||
t.Errorf("expected %v, got %v", test.output, actual)
|
||||
}
|
||||
}
|
||||
}
|
28
vendor/github.com/blevesearch/bleve/analysis/language/da/stemmer_da.go
generated
vendored
28
vendor/github.com/blevesearch/bleve/analysis/language/da/stemmer_da.go
generated
vendored
@ -1,28 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
// +build libstemmer full
|
||||
|
||||
package da
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/token_filters/stemmer_filter"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const StemmerName = "stemmer_da"
|
||||
|
||||
func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return stemmer_filter.NewStemmerFilter("da")
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor)
|
||||
}
|
28
vendor/github.com/blevesearch/bleve/analysis/language/da/stop_filter_da.go
generated
vendored
28
vendor/github.com/blevesearch/bleve/analysis/language/da/stop_filter_da.go
generated
vendored
@ -1,28 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package da
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
134
vendor/github.com/blevesearch/bleve/analysis/language/da/stop_words_da.go
generated
vendored
134
vendor/github.com/blevesearch/bleve/analysis/language/da/stop_words_da.go
generated
vendored
@ -1,134 +0,0 @@
|
||||
package da
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_da"
|
||||
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
|
||||
// ` was changed to ' to allow for literal string
|
||||
|
||||
var DanishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||
|
||||
| A Danish stop word list. Comments begin with vertical bar. Each stop
|
||||
| word is at the start of a line.
|
||||
|
||||
| This is a ranked list (commonest to rarest) of stopwords derived from
|
||||
| a large text sample.
|
||||
|
||||
|
||||
og | and
|
||||
i | in
|
||||
jeg | I
|
||||
det | that (dem. pronoun)/it (pers. pronoun)
|
||||
at | that (in front of a sentence)/to (with infinitive)
|
||||
en | a/an
|
||||
den | it (pers. pronoun)/that (dem. pronoun)
|
||||
til | to/at/for/until/against/by/of/into, more
|
||||
er | present tense of "to be"
|
||||
som | who, as
|
||||
på | on/upon/in/on/at/to/after/of/with/for, on
|
||||
de | they
|
||||
med | with/by/in, along
|
||||
han | he
|
||||
af | of/by/from/off/for/in/with/on, off
|
||||
for | at/for/to/from/by/of/ago, in front/before, because
|
||||
ikke | not
|
||||
der | who/which, there/those
|
||||
var | past tense of "to be"
|
||||
mig | me/myself
|
||||
sig | oneself/himself/herself/itself/themselves
|
||||
men | but
|
||||
et | a/an/one, one (number), someone/somebody/one
|
||||
har | present tense of "to have"
|
||||
om | round/about/for/in/a, about/around/down, if
|
||||
vi | we
|
||||
min | my
|
||||
havde | past tense of "to have"
|
||||
ham | him
|
||||
hun | she
|
||||
nu | now
|
||||
over | over/above/across/by/beyond/past/on/about, over/past
|
||||
da | then, when/as/since
|
||||
fra | from/off/since, off, since
|
||||
du | you
|
||||
ud | out
|
||||
sin | his/her/its/one's
|
||||
dem | them
|
||||
os | us/ourselves
|
||||
op | up
|
||||
man | you/one
|
||||
hans | his
|
||||
hvor | where
|
||||
eller | or
|
||||
hvad | what
|
||||
skal | must/shall etc.
|
||||
selv | myself/youself/herself/ourselves etc., even
|
||||
her | here
|
||||
alle | all/everyone/everybody etc.
|
||||
vil | will (verb)
|
||||
blev | past tense of "to stay/to remain/to get/to become"
|
||||
kunne | could
|
||||
ind | in
|
||||
når | when
|
||||
være | present tense of "to be"
|
||||
dog | however/yet/after all
|
||||
noget | something
|
||||
ville | would
|
||||
jo | you know/you see (adv), yes
|
||||
deres | their/theirs
|
||||
efter | after/behind/according to/for/by/from, later/afterwards
|
||||
ned | down
|
||||
skulle | should
|
||||
denne | this
|
||||
end | than
|
||||
dette | this
|
||||
mit | my/mine
|
||||
også | also
|
||||
under | under/beneath/below/during, below/underneath
|
||||
have | have
|
||||
dig | you
|
||||
anden | other
|
||||
hende | her
|
||||
mine | my
|
||||
alt | everything
|
||||
meget | much/very, plenty of
|
||||
sit | his, her, its, one's
|
||||
sine | his, her, its, one's
|
||||
vor | our
|
||||
mod | against
|
||||
disse | these
|
||||
hvis | if
|
||||
din | your/yours
|
||||
nogle | some
|
||||
hos | by/at
|
||||
blive | be/become
|
||||
mange | many
|
||||
ad | by/through
|
||||
bliver | present tense of "to be/to become"
|
||||
hendes | her/hers
|
||||
været | be
|
||||
thi | for (conj)
|
||||
jer | you
|
||||
sådan | such, like this/like that
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(DanishStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
59
vendor/github.com/blevesearch/bleve/analysis/language/de/analyzer_de.go
generated
vendored
59
vendor/github.com/blevesearch/bleve/analysis/language/de/analyzer_de.go
generated
vendored
@ -1,59 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
// +build libstemmer full
|
||||
// +build icu full
|
||||
|
||||
package de
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter"
|
||||
"github.com/blevesearch/bleve/analysis/tokenizers/icu"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const AnalyzerName = "de"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
icuTokenizer, err := cache.TokenizerNamed(icu.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stopDeFilter, err := cache.TokenFilterNamed(NormalizeName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
normalizeDeFilter, err := cache.TokenFilterNamed(NormalizeName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stemmerDeFilter, err := cache.TokenFilterNamed(StemmerName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: icuTokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
toLowerFilter,
|
||||
stopDeFilter,
|
||||
normalizeDeFilter,
|
||||
stemmerDeFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
||||
}
|
97
vendor/github.com/blevesearch/bleve/analysis/language/de/analyzer_de_test.go
generated
vendored
97
vendor/github.com/blevesearch/bleve/analysis/language/de/analyzer_de_test.go
generated
vendored
@ -1,97 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
// +build libstemmer full
|
||||
// +build icu full
|
||||
|
||||
package de
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
func TestGermanAnalyzer(t *testing.T) {
|
||||
tests := []struct {
|
||||
input []byte
|
||||
output analysis.TokenStream
|
||||
}{
|
||||
{
|
||||
input: []byte("Tisch"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("tisch"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("Tische"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("tisch"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 6,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("Tischen"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("tisch"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 7,
|
||||
},
|
||||
},
|
||||
},
|
||||
// german specials
|
||||
{
|
||||
input: []byte("Schaltflächen"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("schaltflach"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 14,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("Schaltflaechen"),
|
||||
output: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("schaltflach"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 14,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
cache := registry.NewCache()
|
||||
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for _, test := range tests {
|
||||
actual := analyzer.Analyze(test.input)
|
||||
if !reflect.DeepEqual(actual, test.output) {
|
||||
t.Errorf("expected %v, got %v", test.output, actual)
|
||||
}
|
||||
}
|
||||
}
|
94
vendor/github.com/blevesearch/bleve/analysis/language/de/german_normalize.go
generated
vendored
94
vendor/github.com/blevesearch/bleve/analysis/language/de/german_normalize.go
generated
vendored
@ -1,94 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package de
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const NormalizeName = "normalize_de"
|
||||
|
||||
const (
|
||||
N = 0 /* ordinary state */
|
||||
V = 1 /* stops 'u' from entering umlaut state */
|
||||
U = 2 /* umlaut state, allows e-deletion */
|
||||
)
|
||||
|
||||
type GermanNormalizeFilter struct {
|
||||
}
|
||||
|
||||
func NewGermanNormalizeFilter() *GermanNormalizeFilter {
|
||||
return &GermanNormalizeFilter{}
|
||||
}
|
||||
|
||||
func (s *GermanNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
for _, token := range input {
|
||||
term := normalize(token.Term)
|
||||
token.Term = term
|
||||
}
|
||||
return input
|
||||
}
|
||||
|
||||
func normalize(input []byte) []byte {
|
||||
state := N
|
||||
runes := bytes.Runes(input)
|
||||
for i := 0; i < len(runes); i++ {
|
||||
switch runes[i] {
|
||||
case 'a', 'o':
|
||||
state = U
|
||||
case 'u':
|
||||
if state == N {
|
||||
state = U
|
||||
} else {
|
||||
state = V
|
||||
}
|
||||
case 'e':
|
||||
if state == U {
|
||||
runes = analysis.DeleteRune(runes, i)
|
||||
i--
|
||||
}
|
||||
state = V
|
||||
case 'i', 'q', 'y':
|
||||
state = V
|
||||
case 'ä':
|
||||
runes[i] = 'a'
|
||||
state = V
|
||||
case 'ö':
|
||||
runes[i] = 'o'
|
||||
state = V
|
||||
case 'ü':
|
||||
runes[i] = 'u'
|
||||
state = V
|
||||
case 'ß':
|
||||
runes[i] = 's'
|
||||
i++
|
||||
// newrunes := make([]rune, len(runes)+1)
|
||||
// copy(newrunes, runes)
|
||||
// runes = newrunes
|
||||
// runes[i] = 's'
|
||||
runes = analysis.InsertRune(runes, i, 's')
|
||||
state = N
|
||||
default:
|
||||
state = N
|
||||
}
|
||||
}
|
||||
return analysis.BuildTermFromRunes(runes)
|
||||
}
|
||||
|
||||
func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewGermanNormalizeFilter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user