Upgrade server dependencies, manage them with govendor
This commit is contained in:
parent
ebee2746d6
commit
971278e7e5
@ -24,8 +24,8 @@ func Run(dir, domain, email, port string) (*state, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
client, err := acme.NewClient(URL, &user, KeySize)
|
client, err := acme.NewClient(URL, &user, acme.RSA2048)
|
||||||
client.ExcludeChallenges([]string{"tls-sni-01"})
|
client.ExcludeChallenges([]acme.Challenge{acme.TLSSNI01})
|
||||||
client.SetHTTPAddress(port)
|
client.SetHTTPAddress(port)
|
||||||
|
|
||||||
if user.Registration == nil {
|
if user.Registration == nil {
|
||||||
@ -123,7 +123,7 @@ func (s *state) setOCSP(ocsp []byte) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *state) obtain() error {
|
func (s *state) obtain() error {
|
||||||
cert, errors := s.client.ObtainCertificate([]string{s.domain}, true, nil)
|
cert, errors := s.client.ObtainCertificate([]string{s.domain}, true, nil, false)
|
||||||
if err := errors[s.domain]; err != nil {
|
if err := errors[s.domain]; err != nil {
|
||||||
if _, ok := err.(acme.TOSError); ok {
|
if _, ok := err.(acme.TOSError); ok {
|
||||||
err := s.client.AgreeToTOS()
|
err := s.client.AgreeToTOS()
|
||||||
@ -180,7 +180,7 @@ func (s *state) renew() bool {
|
|||||||
meta.PrivateKey = key
|
meta.PrivateKey = key
|
||||||
|
|
||||||
Renew:
|
Renew:
|
||||||
newMeta, err := s.client.RenewCertificate(meta, true)
|
newMeta, err := s.client.RenewCertificate(meta, true, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if _, ok := err.(acme.TOSError); ok {
|
if _, ok := err.(acme.TOSError); ok {
|
||||||
err := s.client.AgreeToTOS()
|
err := s.client.AgreeToTOS()
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package letsencrypt
|
package letsencrypt
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"crypto"
|
||||||
"crypto/rand"
|
"crypto/rand"
|
||||||
"crypto/rsa"
|
"crypto/rsa"
|
||||||
"crypto/x509"
|
"crypto/x509"
|
||||||
@ -17,7 +18,7 @@ const defaultUser = "default"
|
|||||||
type User struct {
|
type User struct {
|
||||||
Email string
|
Email string
|
||||||
Registration *acme.RegistrationResource
|
Registration *acme.RegistrationResource
|
||||||
key *rsa.PrivateKey
|
key crypto.PrivateKey
|
||||||
}
|
}
|
||||||
|
|
||||||
func (u User) GetEmail() string {
|
func (u User) GetEmail() string {
|
||||||
@ -28,7 +29,7 @@ func (u User) GetRegistration() *acme.RegistrationResource {
|
|||||||
return u.Registration
|
return u.Registration
|
||||||
}
|
}
|
||||||
|
|
||||||
func (u User) GetPrivateKey() *rsa.PrivateKey {
|
func (u User) GetPrivateKey() crypto.PrivateKey {
|
||||||
return u.key
|
return u.key
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -86,7 +87,7 @@ func saveUser(user User) error {
|
|||||||
return ioutil.WriteFile(directory.UserRegistration(user.Email), jsonBytes, 0600)
|
return ioutil.WriteFile(directory.UserRegistration(user.Email), jsonBytes, 0600)
|
||||||
}
|
}
|
||||||
|
|
||||||
func loadRSAPrivateKey(file string) (*rsa.PrivateKey, error) {
|
func loadRSAPrivateKey(file string) (crypto.PrivateKey, error) {
|
||||||
keyBytes, err := ioutil.ReadFile(file)
|
keyBytes, err := ioutil.ReadFile(file)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -95,8 +96,10 @@ func loadRSAPrivateKey(file string) (*rsa.PrivateKey, error) {
|
|||||||
return x509.ParsePKCS1PrivateKey(keyBlock.Bytes)
|
return x509.ParsePKCS1PrivateKey(keyBlock.Bytes)
|
||||||
}
|
}
|
||||||
|
|
||||||
func saveRSAPrivateKey(key *rsa.PrivateKey, file string) error {
|
func saveRSAPrivateKey(key crypto.PrivateKey, file string) error {
|
||||||
pemKey := pem.Block{Type: "RSA PRIVATE KEY", Bytes: x509.MarshalPKCS1PrivateKey(key)}
|
pemKey := pem.Block{
|
||||||
|
Type: "RSA PRIVATE KEY", Bytes: x509.MarshalPKCS1PrivateKey(key.(*rsa.PrivateKey)),
|
||||||
|
}
|
||||||
keyOut, err := os.Create(file)
|
keyOut, err := os.Create(file)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -5,7 +5,6 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/xenolf/lego/acme"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func tempdir() string {
|
func tempdir() string {
|
||||||
@ -14,18 +13,10 @@ func tempdir() string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func testUser(t *testing.T, email string) {
|
func testUser(t *testing.T, email string) {
|
||||||
reg := &acme.RegistrationResource{
|
|
||||||
URI: "test.com",
|
|
||||||
Body: acme.Registration{
|
|
||||||
Agreement: "agree?",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
user, err := newUser(email)
|
user, err := newUser(email)
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
key := user.GetPrivateKey()
|
key := user.GetPrivateKey()
|
||||||
assert.NotNil(t, key)
|
assert.NotNil(t, key)
|
||||||
user.Registration = reg
|
|
||||||
|
|
||||||
err = saveUser(user)
|
err = saveUser(user)
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
@ -34,7 +25,6 @@ func testUser(t *testing.T, email string) {
|
|||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
assert.Equal(t, email, user.GetEmail())
|
assert.Equal(t, email, user.GetEmail())
|
||||||
assert.Equal(t, key, user.GetPrivateKey())
|
assert.Equal(t, key, user.GetPrivateKey())
|
||||||
assert.Equal(t, reg, user.GetRegistration())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestUser(t *testing.T) {
|
func TestUser(t *testing.T) {
|
||||||
|
@ -55,7 +55,8 @@ func handleAuth(w http.ResponseWriter, r *http.Request) *Session {
|
|||||||
token, err := parseToken(cookie.Value)
|
token, err := parseToken(cookie.Value)
|
||||||
|
|
||||||
if err == nil && token.Valid {
|
if err == nil && token.Valid {
|
||||||
userID := uint64(token.Claims["UserID"].(float64))
|
claims := token.Claims.(jwt.MapClaims)
|
||||||
|
userID := uint64(claims["UserID"].(float64))
|
||||||
|
|
||||||
log.Println(r.RemoteAddr, "[Auth] GET", r.URL.Path, "| Valid token | User ID:", userID)
|
log.Println(r.RemoteAddr, "[Auth] GET", r.URL.Path, "| Valid token | User ID:", userID)
|
||||||
|
|
||||||
@ -91,7 +92,8 @@ func newUser(w http.ResponseWriter, r *http.Request) *Session {
|
|||||||
go session.run()
|
go session.run()
|
||||||
|
|
||||||
token := jwt.New(jwt.SigningMethodHS256)
|
token := jwt.New(jwt.SigningMethodHS256)
|
||||||
token.Claims["UserID"] = user.ID
|
claims := token.Claims.(jwt.MapClaims)
|
||||||
|
claims["UserID"] = user.ID
|
||||||
tokenString, err := token.SignedString(hmacKey)
|
tokenString, err := token.SignedString(hmacKey)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil
|
return nil
|
||||||
|
@ -118,7 +118,8 @@ func (u *User) SearchMessages(server, channel, q string) ([]Message, error) {
|
|||||||
contentQuery.SetField("content")
|
contentQuery.SetField("content")
|
||||||
contentQuery.SetFuzziness(2)
|
contentQuery.SetFuzziness(2)
|
||||||
|
|
||||||
query := bleve.NewBooleanQuery([]bleve.Query{serverQuery, channelQuery, contentQuery}, nil, nil)
|
query := bleve.NewBooleanQuery()
|
||||||
|
query.AddMust(serverQuery, channelQuery, contentQuery)
|
||||||
|
|
||||||
search := bleve.NewSearchRequest(query)
|
search := bleve.NewSearchRequest(query)
|
||||||
searchResults, err := u.messageIndex.Search(search)
|
searchResults, err := u.messageIndex.Search(search)
|
||||||
|
3
vendor/github.com/BurntSushi/toml/COMPATIBLE
generated
vendored
3
vendor/github.com/BurntSushi/toml/COMPATIBLE
generated
vendored
@ -1,3 +0,0 @@
|
|||||||
Compatible with TOML version
|
|
||||||
[v0.2.0](https://github.com/mojombo/toml/blob/master/versions/toml-v0.2.0.md)
|
|
||||||
|
|
14
vendor/github.com/BurntSushi/toml/COPYING
generated
vendored
14
vendor/github.com/BurntSushi/toml/COPYING
generated
vendored
@ -1,14 +0,0 @@
|
|||||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
|
||||||
Version 2, December 2004
|
|
||||||
|
|
||||||
Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
|
|
||||||
|
|
||||||
Everyone is permitted to copy and distribute verbatim or modified
|
|
||||||
copies of this license document, and changing it is allowed as long
|
|
||||||
as the name is changed.
|
|
||||||
|
|
||||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
|
||||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
|
||||||
|
|
||||||
0. You just DO WHAT THE FUCK YOU WANT TO.
|
|
||||||
|
|
19
vendor/github.com/BurntSushi/toml/Makefile
generated
vendored
19
vendor/github.com/BurntSushi/toml/Makefile
generated
vendored
@ -1,19 +0,0 @@
|
|||||||
install:
|
|
||||||
go install ./...
|
|
||||||
|
|
||||||
test: install
|
|
||||||
go test -v
|
|
||||||
toml-test toml-test-decoder
|
|
||||||
toml-test -encoder toml-test-encoder
|
|
||||||
|
|
||||||
fmt:
|
|
||||||
gofmt -w *.go */*.go
|
|
||||||
colcheck *.go */*.go
|
|
||||||
|
|
||||||
tags:
|
|
||||||
find ./ -name '*.go' -print0 | xargs -0 gotags > TAGS
|
|
||||||
|
|
||||||
push:
|
|
||||||
git push origin master
|
|
||||||
git push github master
|
|
||||||
|
|
220
vendor/github.com/BurntSushi/toml/README.md
generated
vendored
220
vendor/github.com/BurntSushi/toml/README.md
generated
vendored
@ -1,220 +0,0 @@
|
|||||||
## TOML parser and encoder for Go with reflection
|
|
||||||
|
|
||||||
TOML stands for Tom's Obvious, Minimal Language. This Go package provides a
|
|
||||||
reflection interface similar to Go's standard library `json` and `xml`
|
|
||||||
packages. This package also supports the `encoding.TextUnmarshaler` and
|
|
||||||
`encoding.TextMarshaler` interfaces so that you can define custom data
|
|
||||||
representations. (There is an example of this below.)
|
|
||||||
|
|
||||||
Spec: https://github.com/mojombo/toml
|
|
||||||
|
|
||||||
Compatible with TOML version
|
|
||||||
[v0.2.0](https://github.com/toml-lang/toml/blob/master/versions/en/toml-v0.2.0.md)
|
|
||||||
|
|
||||||
Documentation: http://godoc.org/github.com/BurntSushi/toml
|
|
||||||
|
|
||||||
Installation:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
go get github.com/BurntSushi/toml
|
|
||||||
```
|
|
||||||
|
|
||||||
Try the toml validator:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
go get github.com/BurntSushi/toml/cmd/tomlv
|
|
||||||
tomlv some-toml-file.toml
|
|
||||||
```
|
|
||||||
|
|
||||||
[![Build status](https://api.travis-ci.org/BurntSushi/toml.png)](https://travis-ci.org/BurntSushi/toml)
|
|
||||||
|
|
||||||
|
|
||||||
### Testing
|
|
||||||
|
|
||||||
This package passes all tests in
|
|
||||||
[toml-test](https://github.com/BurntSushi/toml-test) for both the decoder
|
|
||||||
and the encoder.
|
|
||||||
|
|
||||||
### Examples
|
|
||||||
|
|
||||||
This package works similarly to how the Go standard library handles `XML`
|
|
||||||
and `JSON`. Namely, data is loaded into Go values via reflection.
|
|
||||||
|
|
||||||
For the simplest example, consider some TOML file as just a list of keys
|
|
||||||
and values:
|
|
||||||
|
|
||||||
```toml
|
|
||||||
Age = 25
|
|
||||||
Cats = [ "Cauchy", "Plato" ]
|
|
||||||
Pi = 3.14
|
|
||||||
Perfection = [ 6, 28, 496, 8128 ]
|
|
||||||
DOB = 1987-07-05T05:45:00Z
|
|
||||||
```
|
|
||||||
|
|
||||||
Which could be defined in Go as:
|
|
||||||
|
|
||||||
```go
|
|
||||||
type Config struct {
|
|
||||||
Age int
|
|
||||||
Cats []string
|
|
||||||
Pi float64
|
|
||||||
Perfection []int
|
|
||||||
DOB time.Time // requires `import time`
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
And then decoded with:
|
|
||||||
|
|
||||||
```go
|
|
||||||
var conf Config
|
|
||||||
if _, err := toml.Decode(tomlData, &conf); err != nil {
|
|
||||||
// handle error
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
You can also use struct tags if your struct field name doesn't map to a TOML
|
|
||||||
key value directly:
|
|
||||||
|
|
||||||
```toml
|
|
||||||
some_key_NAME = "wat"
|
|
||||||
```
|
|
||||||
|
|
||||||
```go
|
|
||||||
type TOML struct {
|
|
||||||
ObscureKey string `toml:"some_key_NAME"`
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Using the `encoding.TextUnmarshaler` interface
|
|
||||||
|
|
||||||
Here's an example that automatically parses duration strings into
|
|
||||||
`time.Duration` values:
|
|
||||||
|
|
||||||
```toml
|
|
||||||
[[song]]
|
|
||||||
name = "Thunder Road"
|
|
||||||
duration = "4m49s"
|
|
||||||
|
|
||||||
[[song]]
|
|
||||||
name = "Stairway to Heaven"
|
|
||||||
duration = "8m03s"
|
|
||||||
```
|
|
||||||
|
|
||||||
Which can be decoded with:
|
|
||||||
|
|
||||||
```go
|
|
||||||
type song struct {
|
|
||||||
Name string
|
|
||||||
Duration duration
|
|
||||||
}
|
|
||||||
type songs struct {
|
|
||||||
Song []song
|
|
||||||
}
|
|
||||||
var favorites songs
|
|
||||||
if _, err := toml.Decode(blob, &favorites); err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, s := range favorites.Song {
|
|
||||||
fmt.Printf("%s (%s)\n", s.Name, s.Duration)
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
And you'll also need a `duration` type that satisfies the
|
|
||||||
`encoding.TextUnmarshaler` interface:
|
|
||||||
|
|
||||||
```go
|
|
||||||
type duration struct {
|
|
||||||
time.Duration
|
|
||||||
}
|
|
||||||
|
|
||||||
func (d *duration) UnmarshalText(text []byte) error {
|
|
||||||
var err error
|
|
||||||
d.Duration, err = time.ParseDuration(string(text))
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### More complex usage
|
|
||||||
|
|
||||||
Here's an example of how to load the example from the official spec page:
|
|
||||||
|
|
||||||
```toml
|
|
||||||
# This is a TOML document. Boom.
|
|
||||||
|
|
||||||
title = "TOML Example"
|
|
||||||
|
|
||||||
[owner]
|
|
||||||
name = "Tom Preston-Werner"
|
|
||||||
organization = "GitHub"
|
|
||||||
bio = "GitHub Cofounder & CEO\nLikes tater tots and beer."
|
|
||||||
dob = 1979-05-27T07:32:00Z # First class dates? Why not?
|
|
||||||
|
|
||||||
[database]
|
|
||||||
server = "192.168.1.1"
|
|
||||||
ports = [ 8001, 8001, 8002 ]
|
|
||||||
connection_max = 5000
|
|
||||||
enabled = true
|
|
||||||
|
|
||||||
[servers]
|
|
||||||
|
|
||||||
# You can indent as you please. Tabs or spaces. TOML don't care.
|
|
||||||
[servers.alpha]
|
|
||||||
ip = "10.0.0.1"
|
|
||||||
dc = "eqdc10"
|
|
||||||
|
|
||||||
[servers.beta]
|
|
||||||
ip = "10.0.0.2"
|
|
||||||
dc = "eqdc10"
|
|
||||||
|
|
||||||
[clients]
|
|
||||||
data = [ ["gamma", "delta"], [1, 2] ] # just an update to make sure parsers support it
|
|
||||||
|
|
||||||
# Line breaks are OK when inside arrays
|
|
||||||
hosts = [
|
|
||||||
"alpha",
|
|
||||||
"omega"
|
|
||||||
]
|
|
||||||
```
|
|
||||||
|
|
||||||
And the corresponding Go types are:
|
|
||||||
|
|
||||||
```go
|
|
||||||
type tomlConfig struct {
|
|
||||||
Title string
|
|
||||||
Owner ownerInfo
|
|
||||||
DB database `toml:"database"`
|
|
||||||
Servers map[string]server
|
|
||||||
Clients clients
|
|
||||||
}
|
|
||||||
|
|
||||||
type ownerInfo struct {
|
|
||||||
Name string
|
|
||||||
Org string `toml:"organization"`
|
|
||||||
Bio string
|
|
||||||
DOB time.Time
|
|
||||||
}
|
|
||||||
|
|
||||||
type database struct {
|
|
||||||
Server string
|
|
||||||
Ports []int
|
|
||||||
ConnMax int `toml:"connection_max"`
|
|
||||||
Enabled bool
|
|
||||||
}
|
|
||||||
|
|
||||||
type server struct {
|
|
||||||
IP string
|
|
||||||
DC string
|
|
||||||
}
|
|
||||||
|
|
||||||
type clients struct {
|
|
||||||
Data [][]interface{}
|
|
||||||
Hosts []string
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Note that a case insensitive match will be tried if an exact match can't be
|
|
||||||
found.
|
|
||||||
|
|
||||||
A working example of the above can be found in `_examples/example.{go,toml}`.
|
|
||||||
|
|
61
vendor/github.com/BurntSushi/toml/_examples/example.go
generated
vendored
61
vendor/github.com/BurntSushi/toml/_examples/example.go
generated
vendored
@ -1,61 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/BurntSushi/toml"
|
|
||||||
)
|
|
||||||
|
|
||||||
type tomlConfig struct {
|
|
||||||
Title string
|
|
||||||
Owner ownerInfo
|
|
||||||
DB database `toml:"database"`
|
|
||||||
Servers map[string]server
|
|
||||||
Clients clients
|
|
||||||
}
|
|
||||||
|
|
||||||
type ownerInfo struct {
|
|
||||||
Name string
|
|
||||||
Org string `toml:"organization"`
|
|
||||||
Bio string
|
|
||||||
DOB time.Time
|
|
||||||
}
|
|
||||||
|
|
||||||
type database struct {
|
|
||||||
Server string
|
|
||||||
Ports []int
|
|
||||||
ConnMax int `toml:"connection_max"`
|
|
||||||
Enabled bool
|
|
||||||
}
|
|
||||||
|
|
||||||
type server struct {
|
|
||||||
IP string
|
|
||||||
DC string
|
|
||||||
}
|
|
||||||
|
|
||||||
type clients struct {
|
|
||||||
Data [][]interface{}
|
|
||||||
Hosts []string
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
var config tomlConfig
|
|
||||||
if _, err := toml.DecodeFile("example.toml", &config); err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
fmt.Printf("Title: %s\n", config.Title)
|
|
||||||
fmt.Printf("Owner: %s (%s, %s), Born: %s\n",
|
|
||||||
config.Owner.Name, config.Owner.Org, config.Owner.Bio,
|
|
||||||
config.Owner.DOB)
|
|
||||||
fmt.Printf("Database: %s %v (Max conn. %d), Enabled? %v\n",
|
|
||||||
config.DB.Server, config.DB.Ports, config.DB.ConnMax,
|
|
||||||
config.DB.Enabled)
|
|
||||||
for serverName, server := range config.Servers {
|
|
||||||
fmt.Printf("Server: %s (%s, %s)\n", serverName, server.IP, server.DC)
|
|
||||||
}
|
|
||||||
fmt.Printf("Client data: %v\n", config.Clients.Data)
|
|
||||||
fmt.Printf("Client hosts: %v\n", config.Clients.Hosts)
|
|
||||||
}
|
|
22
vendor/github.com/BurntSushi/toml/_examples/hard.toml
generated
vendored
22
vendor/github.com/BurntSushi/toml/_examples/hard.toml
generated
vendored
@ -1,22 +0,0 @@
|
|||||||
# Test file for TOML
|
|
||||||
# Only this one tries to emulate a TOML file written by a user of the kind of parser writers probably hate
|
|
||||||
# This part you'll really hate
|
|
||||||
|
|
||||||
[the]
|
|
||||||
test_string = "You'll hate me after this - #" # " Annoying, isn't it?
|
|
||||||
|
|
||||||
[the.hard]
|
|
||||||
test_array = [ "] ", " # "] # ] There you go, parse this!
|
|
||||||
test_array2 = [ "Test #11 ]proved that", "Experiment #9 was a success" ]
|
|
||||||
# You didn't think it'd as easy as chucking out the last #, did you?
|
|
||||||
another_test_string = " Same thing, but with a string #"
|
|
||||||
harder_test_string = " And when \"'s are in the string, along with # \"" # "and comments are there too"
|
|
||||||
# Things will get harder
|
|
||||||
|
|
||||||
[the.hard.bit#]
|
|
||||||
what? = "You don't think some user won't do that?"
|
|
||||||
multi_line_array = [
|
|
||||||
"]",
|
|
||||||
# ] Oh yes I did
|
|
||||||
]
|
|
||||||
|
|
4
vendor/github.com/BurntSushi/toml/_examples/implicit.toml
generated
vendored
4
vendor/github.com/BurntSushi/toml/_examples/implicit.toml
generated
vendored
@ -1,4 +0,0 @@
|
|||||||
# [x] you
|
|
||||||
# [x.y] don't
|
|
||||||
# [x.y.z] need these
|
|
||||||
[x.y.z.w] # for this to work
|
|
6
vendor/github.com/BurntSushi/toml/_examples/invalid-apples.toml
generated
vendored
6
vendor/github.com/BurntSushi/toml/_examples/invalid-apples.toml
generated
vendored
@ -1,6 +0,0 @@
|
|||||||
# DO NOT WANT
|
|
||||||
[fruit]
|
|
||||||
type = "apple"
|
|
||||||
|
|
||||||
[fruit.type]
|
|
||||||
apple = "yes"
|
|
5
vendor/github.com/BurntSushi/toml/_examples/readme1.toml
generated
vendored
5
vendor/github.com/BurntSushi/toml/_examples/readme1.toml
generated
vendored
@ -1,5 +0,0 @@
|
|||||||
Age = 25
|
|
||||||
Cats = [ "Cauchy", "Plato" ]
|
|
||||||
Pi = 3.14
|
|
||||||
Perfection = [ 6, 28, 496, 8128 ]
|
|
||||||
DOB = 1987-07-05T05:45:00Z
|
|
1
vendor/github.com/BurntSushi/toml/_examples/readme2.toml
generated
vendored
1
vendor/github.com/BurntSushi/toml/_examples/readme2.toml
generated
vendored
@ -1 +0,0 @@
|
|||||||
some_key_NAME = "wat"
|
|
14
vendor/github.com/BurntSushi/toml/cmd/toml-test-decoder/COPYING
generated
vendored
14
vendor/github.com/BurntSushi/toml/cmd/toml-test-decoder/COPYING
generated
vendored
@ -1,14 +0,0 @@
|
|||||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
|
||||||
Version 2, December 2004
|
|
||||||
|
|
||||||
Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
|
|
||||||
|
|
||||||
Everyone is permitted to copy and distribute verbatim or modified
|
|
||||||
copies of this license document, and changing it is allowed as long
|
|
||||||
as the name is changed.
|
|
||||||
|
|
||||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
|
||||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
|
||||||
|
|
||||||
0. You just DO WHAT THE FUCK YOU WANT TO.
|
|
||||||
|
|
14
vendor/github.com/BurntSushi/toml/cmd/toml-test-decoder/README.md
generated
vendored
14
vendor/github.com/BurntSushi/toml/cmd/toml-test-decoder/README.md
generated
vendored
@ -1,14 +0,0 @@
|
|||||||
# Implements the TOML test suite interface
|
|
||||||
|
|
||||||
This is an implementation of the interface expected by
|
|
||||||
[toml-test](https://github.com/BurntSushi/toml-test) for my
|
|
||||||
[toml parser written in Go](https://github.com/BurntSushi/toml).
|
|
||||||
In particular, it maps TOML data on `stdin` to a JSON format on `stdout`.
|
|
||||||
|
|
||||||
|
|
||||||
Compatible with TOML version
|
|
||||||
[v0.2.0](https://github.com/mojombo/toml/blob/master/versions/toml-v0.2.0.md)
|
|
||||||
|
|
||||||
Compatible with `toml-test` version
|
|
||||||
[v0.2.0](https://github.com/BurntSushi/toml-test/tree/v0.2.0)
|
|
||||||
|
|
90
vendor/github.com/BurntSushi/toml/cmd/toml-test-decoder/main.go
generated
vendored
90
vendor/github.com/BurntSushi/toml/cmd/toml-test-decoder/main.go
generated
vendored
@ -1,90 +0,0 @@
|
|||||||
// Command toml-test-decoder satisfies the toml-test interface for testing
|
|
||||||
// TOML decoders. Namely, it accepts TOML on stdin and outputs JSON on stdout.
|
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"flag"
|
|
||||||
"fmt"
|
|
||||||
"log"
|
|
||||||
"os"
|
|
||||||
"path"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/BurntSushi/toml"
|
|
||||||
)
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
log.SetFlags(0)
|
|
||||||
|
|
||||||
flag.Usage = usage
|
|
||||||
flag.Parse()
|
|
||||||
}
|
|
||||||
|
|
||||||
func usage() {
|
|
||||||
log.Printf("Usage: %s < toml-file\n", path.Base(os.Args[0]))
|
|
||||||
flag.PrintDefaults()
|
|
||||||
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
if flag.NArg() != 0 {
|
|
||||||
flag.Usage()
|
|
||||||
}
|
|
||||||
|
|
||||||
var tmp interface{}
|
|
||||||
if _, err := toml.DecodeReader(os.Stdin, &tmp); err != nil {
|
|
||||||
log.Fatalf("Error decoding TOML: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
typedTmp := translate(tmp)
|
|
||||||
if err := json.NewEncoder(os.Stdout).Encode(typedTmp); err != nil {
|
|
||||||
log.Fatalf("Error encoding JSON: %s", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func translate(tomlData interface{}) interface{} {
|
|
||||||
switch orig := tomlData.(type) {
|
|
||||||
case map[string]interface{}:
|
|
||||||
typed := make(map[string]interface{}, len(orig))
|
|
||||||
for k, v := range orig {
|
|
||||||
typed[k] = translate(v)
|
|
||||||
}
|
|
||||||
return typed
|
|
||||||
case []map[string]interface{}:
|
|
||||||
typed := make([]map[string]interface{}, len(orig))
|
|
||||||
for i, v := range orig {
|
|
||||||
typed[i] = translate(v).(map[string]interface{})
|
|
||||||
}
|
|
||||||
return typed
|
|
||||||
case []interface{}:
|
|
||||||
typed := make([]interface{}, len(orig))
|
|
||||||
for i, v := range orig {
|
|
||||||
typed[i] = translate(v)
|
|
||||||
}
|
|
||||||
|
|
||||||
// We don't really need to tag arrays, but let's be future proof.
|
|
||||||
// (If TOML ever supports tuples, we'll need this.)
|
|
||||||
return tag("array", typed)
|
|
||||||
case time.Time:
|
|
||||||
return tag("datetime", orig.Format("2006-01-02T15:04:05Z"))
|
|
||||||
case bool:
|
|
||||||
return tag("bool", fmt.Sprintf("%v", orig))
|
|
||||||
case int64:
|
|
||||||
return tag("integer", fmt.Sprintf("%d", orig))
|
|
||||||
case float64:
|
|
||||||
return tag("float", fmt.Sprintf("%v", orig))
|
|
||||||
case string:
|
|
||||||
return tag("string", orig)
|
|
||||||
}
|
|
||||||
|
|
||||||
panic(fmt.Sprintf("Unknown type: %T", tomlData))
|
|
||||||
}
|
|
||||||
|
|
||||||
func tag(typeName string, data interface{}) map[string]interface{} {
|
|
||||||
return map[string]interface{}{
|
|
||||||
"type": typeName,
|
|
||||||
"value": data,
|
|
||||||
}
|
|
||||||
}
|
|
14
vendor/github.com/BurntSushi/toml/cmd/toml-test-encoder/COPYING
generated
vendored
14
vendor/github.com/BurntSushi/toml/cmd/toml-test-encoder/COPYING
generated
vendored
@ -1,14 +0,0 @@
|
|||||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
|
||||||
Version 2, December 2004
|
|
||||||
|
|
||||||
Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
|
|
||||||
|
|
||||||
Everyone is permitted to copy and distribute verbatim or modified
|
|
||||||
copies of this license document, and changing it is allowed as long
|
|
||||||
as the name is changed.
|
|
||||||
|
|
||||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
|
||||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
|
||||||
|
|
||||||
0. You just DO WHAT THE FUCK YOU WANT TO.
|
|
||||||
|
|
14
vendor/github.com/BurntSushi/toml/cmd/toml-test-encoder/README.md
generated
vendored
14
vendor/github.com/BurntSushi/toml/cmd/toml-test-encoder/README.md
generated
vendored
@ -1,14 +0,0 @@
|
|||||||
# Implements the TOML test suite interface for TOML encoders
|
|
||||||
|
|
||||||
This is an implementation of the interface expected by
|
|
||||||
[toml-test](https://github.com/BurntSushi/toml-test) for the
|
|
||||||
[TOML encoder](https://github.com/BurntSushi/toml).
|
|
||||||
In particular, it maps JSON data on `stdin` to a TOML format on `stdout`.
|
|
||||||
|
|
||||||
|
|
||||||
Compatible with TOML version
|
|
||||||
[v0.2.0](https://github.com/mojombo/toml/blob/master/versions/toml-v0.2.0.md)
|
|
||||||
|
|
||||||
Compatible with `toml-test` version
|
|
||||||
[v0.2.0](https://github.com/BurntSushi/toml-test/tree/v0.2.0)
|
|
||||||
|
|
131
vendor/github.com/BurntSushi/toml/cmd/toml-test-encoder/main.go
generated
vendored
131
vendor/github.com/BurntSushi/toml/cmd/toml-test-encoder/main.go
generated
vendored
@ -1,131 +0,0 @@
|
|||||||
// Command toml-test-encoder satisfies the toml-test interface for testing
|
|
||||||
// TOML encoders. Namely, it accepts JSON on stdin and outputs TOML on stdout.
|
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"flag"
|
|
||||||
"log"
|
|
||||||
"os"
|
|
||||||
"path"
|
|
||||||
"strconv"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/BurntSushi/toml"
|
|
||||||
)
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
log.SetFlags(0)
|
|
||||||
|
|
||||||
flag.Usage = usage
|
|
||||||
flag.Parse()
|
|
||||||
}
|
|
||||||
|
|
||||||
func usage() {
|
|
||||||
log.Printf("Usage: %s < json-file\n", path.Base(os.Args[0]))
|
|
||||||
flag.PrintDefaults()
|
|
||||||
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
if flag.NArg() != 0 {
|
|
||||||
flag.Usage()
|
|
||||||
}
|
|
||||||
|
|
||||||
var tmp interface{}
|
|
||||||
if err := json.NewDecoder(os.Stdin).Decode(&tmp); err != nil {
|
|
||||||
log.Fatalf("Error decoding JSON: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
tomlData := translate(tmp)
|
|
||||||
if err := toml.NewEncoder(os.Stdout).Encode(tomlData); err != nil {
|
|
||||||
log.Fatalf("Error encoding TOML: %s", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func translate(typedJson interface{}) interface{} {
|
|
||||||
switch v := typedJson.(type) {
|
|
||||||
case map[string]interface{}:
|
|
||||||
if len(v) == 2 && in("type", v) && in("value", v) {
|
|
||||||
return untag(v)
|
|
||||||
}
|
|
||||||
m := make(map[string]interface{}, len(v))
|
|
||||||
for k, v2 := range v {
|
|
||||||
m[k] = translate(v2)
|
|
||||||
}
|
|
||||||
return m
|
|
||||||
case []interface{}:
|
|
||||||
tabArray := make([]map[string]interface{}, len(v))
|
|
||||||
for i := range v {
|
|
||||||
if m, ok := translate(v[i]).(map[string]interface{}); ok {
|
|
||||||
tabArray[i] = m
|
|
||||||
} else {
|
|
||||||
log.Fatalf("JSON arrays may only contain objects. This " +
|
|
||||||
"corresponds to only tables being allowed in " +
|
|
||||||
"TOML table arrays.")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return tabArray
|
|
||||||
}
|
|
||||||
log.Fatalf("Unrecognized JSON format '%T'.", typedJson)
|
|
||||||
panic("unreachable")
|
|
||||||
}
|
|
||||||
|
|
||||||
func untag(typed map[string]interface{}) interface{} {
|
|
||||||
t := typed["type"].(string)
|
|
||||||
v := typed["value"]
|
|
||||||
switch t {
|
|
||||||
case "string":
|
|
||||||
return v.(string)
|
|
||||||
case "integer":
|
|
||||||
v := v.(string)
|
|
||||||
n, err := strconv.Atoi(v)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("Could not parse '%s' as integer: %s", v, err)
|
|
||||||
}
|
|
||||||
return n
|
|
||||||
case "float":
|
|
||||||
v := v.(string)
|
|
||||||
f, err := strconv.ParseFloat(v, 64)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("Could not parse '%s' as float64: %s", v, err)
|
|
||||||
}
|
|
||||||
return f
|
|
||||||
case "datetime":
|
|
||||||
v := v.(string)
|
|
||||||
t, err := time.Parse("2006-01-02T15:04:05Z", v)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("Could not parse '%s' as a datetime: %s", v, err)
|
|
||||||
}
|
|
||||||
return t
|
|
||||||
case "bool":
|
|
||||||
v := v.(string)
|
|
||||||
switch v {
|
|
||||||
case "true":
|
|
||||||
return true
|
|
||||||
case "false":
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
log.Fatalf("Could not parse '%s' as a boolean.", v)
|
|
||||||
case "array":
|
|
||||||
v := v.([]interface{})
|
|
||||||
array := make([]interface{}, len(v))
|
|
||||||
for i := range v {
|
|
||||||
if m, ok := v[i].(map[string]interface{}); ok {
|
|
||||||
array[i] = untag(m)
|
|
||||||
} else {
|
|
||||||
log.Fatalf("Arrays may only contain other arrays or "+
|
|
||||||
"primitive values, but found a '%T'.", m)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return array
|
|
||||||
}
|
|
||||||
log.Fatalf("Unrecognized tag type '%s'.", t)
|
|
||||||
panic("unreachable")
|
|
||||||
}
|
|
||||||
|
|
||||||
func in(key string, m map[string]interface{}) bool {
|
|
||||||
_, ok := m[key]
|
|
||||||
return ok
|
|
||||||
}
|
|
14
vendor/github.com/BurntSushi/toml/cmd/tomlv/COPYING
generated
vendored
14
vendor/github.com/BurntSushi/toml/cmd/tomlv/COPYING
generated
vendored
@ -1,14 +0,0 @@
|
|||||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
|
||||||
Version 2, December 2004
|
|
||||||
|
|
||||||
Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
|
|
||||||
|
|
||||||
Everyone is permitted to copy and distribute verbatim or modified
|
|
||||||
copies of this license document, and changing it is allowed as long
|
|
||||||
as the name is changed.
|
|
||||||
|
|
||||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
|
||||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
|
||||||
|
|
||||||
0. You just DO WHAT THE FUCK YOU WANT TO.
|
|
||||||
|
|
22
vendor/github.com/BurntSushi/toml/cmd/tomlv/README.md
generated
vendored
22
vendor/github.com/BurntSushi/toml/cmd/tomlv/README.md
generated
vendored
@ -1,22 +0,0 @@
|
|||||||
# TOML Validator
|
|
||||||
|
|
||||||
If Go is installed, it's simple to try it out:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
go get github.com/BurntSushi/toml/cmd/tomlv
|
|
||||||
tomlv some-toml-file.toml
|
|
||||||
```
|
|
||||||
|
|
||||||
You can see the types of every key in a TOML file with:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
tomlv -types some-toml-file.toml
|
|
||||||
```
|
|
||||||
|
|
||||||
At the moment, only one error message is reported at a time. Error messages
|
|
||||||
include line numbers. No output means that the files given are valid TOML, or
|
|
||||||
there is a bug in `tomlv`.
|
|
||||||
|
|
||||||
Compatible with TOML version
|
|
||||||
[v0.1.0](https://github.com/mojombo/toml/blob/master/versions/toml-v0.1.0.md)
|
|
||||||
|
|
61
vendor/github.com/BurntSushi/toml/cmd/tomlv/main.go
generated
vendored
61
vendor/github.com/BurntSushi/toml/cmd/tomlv/main.go
generated
vendored
@ -1,61 +0,0 @@
|
|||||||
// Command tomlv validates TOML documents and prints each key's type.
|
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"flag"
|
|
||||||
"fmt"
|
|
||||||
"log"
|
|
||||||
"os"
|
|
||||||
"path"
|
|
||||||
"strings"
|
|
||||||
"text/tabwriter"
|
|
||||||
|
|
||||||
"github.com/BurntSushi/toml"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
flagTypes = false
|
|
||||||
)
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
log.SetFlags(0)
|
|
||||||
|
|
||||||
flag.BoolVar(&flagTypes, "types", flagTypes,
|
|
||||||
"When set, the types of every defined key will be shown.")
|
|
||||||
|
|
||||||
flag.Usage = usage
|
|
||||||
flag.Parse()
|
|
||||||
}
|
|
||||||
|
|
||||||
func usage() {
|
|
||||||
log.Printf("Usage: %s toml-file [ toml-file ... ]\n",
|
|
||||||
path.Base(os.Args[0]))
|
|
||||||
flag.PrintDefaults()
|
|
||||||
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
if flag.NArg() < 1 {
|
|
||||||
flag.Usage()
|
|
||||||
}
|
|
||||||
for _, f := range flag.Args() {
|
|
||||||
var tmp interface{}
|
|
||||||
md, err := toml.DecodeFile(f, &tmp)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("Error in '%s': %s", f, err)
|
|
||||||
}
|
|
||||||
if flagTypes {
|
|
||||||
printTypes(md)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func printTypes(md toml.MetaData) {
|
|
||||||
tabw := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
|
|
||||||
for _, key := range md.Keys() {
|
|
||||||
fmt.Fprintf(tabw, "%s%s\t%s\n",
|
|
||||||
strings.Repeat(" ", len(key)-1), key, md.Type(key...))
|
|
||||||
}
|
|
||||||
tabw.Flush()
|
|
||||||
}
|
|
493
vendor/github.com/BurntSushi/toml/decode.go
generated
vendored
493
vendor/github.com/BurntSushi/toml/decode.go
generated
vendored
@ -1,493 +0,0 @@
|
|||||||
package toml
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"io/ioutil"
|
|
||||||
"math"
|
|
||||||
"reflect"
|
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
)
|
|
||||||
|
|
||||||
var e = fmt.Errorf
|
|
||||||
|
|
||||||
// Unmarshaler is the interface implemented by objects that can unmarshal a
|
|
||||||
// TOML description of themselves.
|
|
||||||
type Unmarshaler interface {
|
|
||||||
UnmarshalTOML(interface{}) error
|
|
||||||
}
|
|
||||||
|
|
||||||
// Unmarshal decodes the contents of `p` in TOML format into a pointer `v`.
|
|
||||||
func Unmarshal(p []byte, v interface{}) error {
|
|
||||||
_, err := Decode(string(p), v)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Primitive is a TOML value that hasn't been decoded into a Go value.
|
|
||||||
// When using the various `Decode*` functions, the type `Primitive` may
|
|
||||||
// be given to any value, and its decoding will be delayed.
|
|
||||||
//
|
|
||||||
// A `Primitive` value can be decoded using the `PrimitiveDecode` function.
|
|
||||||
//
|
|
||||||
// The underlying representation of a `Primitive` value is subject to change.
|
|
||||||
// Do not rely on it.
|
|
||||||
//
|
|
||||||
// N.B. Primitive values are still parsed, so using them will only avoid
|
|
||||||
// the overhead of reflection. They can be useful when you don't know the
|
|
||||||
// exact type of TOML data until run time.
|
|
||||||
type Primitive struct {
|
|
||||||
undecoded interface{}
|
|
||||||
context Key
|
|
||||||
}
|
|
||||||
|
|
||||||
// DEPRECATED!
|
|
||||||
//
|
|
||||||
// Use MetaData.PrimitiveDecode instead.
|
|
||||||
func PrimitiveDecode(primValue Primitive, v interface{}) error {
|
|
||||||
md := MetaData{decoded: make(map[string]bool)}
|
|
||||||
return md.unify(primValue.undecoded, rvalue(v))
|
|
||||||
}
|
|
||||||
|
|
||||||
// PrimitiveDecode is just like the other `Decode*` functions, except it
|
|
||||||
// decodes a TOML value that has already been parsed. Valid primitive values
|
|
||||||
// can *only* be obtained from values filled by the decoder functions,
|
|
||||||
// including this method. (i.e., `v` may contain more `Primitive`
|
|
||||||
// values.)
|
|
||||||
//
|
|
||||||
// Meta data for primitive values is included in the meta data returned by
|
|
||||||
// the `Decode*` functions with one exception: keys returned by the Undecoded
|
|
||||||
// method will only reflect keys that were decoded. Namely, any keys hidden
|
|
||||||
// behind a Primitive will be considered undecoded. Executing this method will
|
|
||||||
// update the undecoded keys in the meta data. (See the example.)
|
|
||||||
func (md *MetaData) PrimitiveDecode(primValue Primitive, v interface{}) error {
|
|
||||||
md.context = primValue.context
|
|
||||||
defer func() { md.context = nil }()
|
|
||||||
return md.unify(primValue.undecoded, rvalue(v))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Decode will decode the contents of `data` in TOML format into a pointer
|
|
||||||
// `v`.
|
|
||||||
//
|
|
||||||
// TOML hashes correspond to Go structs or maps. (Dealer's choice. They can be
|
|
||||||
// used interchangeably.)
|
|
||||||
//
|
|
||||||
// TOML arrays of tables correspond to either a slice of structs or a slice
|
|
||||||
// of maps.
|
|
||||||
//
|
|
||||||
// TOML datetimes correspond to Go `time.Time` values.
|
|
||||||
//
|
|
||||||
// All other TOML types (float, string, int, bool and array) correspond
|
|
||||||
// to the obvious Go types.
|
|
||||||
//
|
|
||||||
// An exception to the above rules is if a type implements the
|
|
||||||
// encoding.TextUnmarshaler interface. In this case, any primitive TOML value
|
|
||||||
// (floats, strings, integers, booleans and datetimes) will be converted to
|
|
||||||
// a byte string and given to the value's UnmarshalText method. See the
|
|
||||||
// Unmarshaler example for a demonstration with time duration strings.
|
|
||||||
//
|
|
||||||
// Key mapping
|
|
||||||
//
|
|
||||||
// TOML keys can map to either keys in a Go map or field names in a Go
|
|
||||||
// struct. The special `toml` struct tag may be used to map TOML keys to
|
|
||||||
// struct fields that don't match the key name exactly. (See the example.)
|
|
||||||
// A case insensitive match to struct names will be tried if an exact match
|
|
||||||
// can't be found.
|
|
||||||
//
|
|
||||||
// The mapping between TOML values and Go values is loose. That is, there
|
|
||||||
// may exist TOML values that cannot be placed into your representation, and
|
|
||||||
// there may be parts of your representation that do not correspond to
|
|
||||||
// TOML values. This loose mapping can be made stricter by using the IsDefined
|
|
||||||
// and/or Undecoded methods on the MetaData returned.
|
|
||||||
//
|
|
||||||
// This decoder will not handle cyclic types. If a cyclic type is passed,
|
|
||||||
// `Decode` will not terminate.
|
|
||||||
func Decode(data string, v interface{}) (MetaData, error) {
|
|
||||||
p, err := parse(data)
|
|
||||||
if err != nil {
|
|
||||||
return MetaData{}, err
|
|
||||||
}
|
|
||||||
md := MetaData{
|
|
||||||
p.mapping, p.types, p.ordered,
|
|
||||||
make(map[string]bool, len(p.ordered)), nil,
|
|
||||||
}
|
|
||||||
return md, md.unify(p.mapping, rvalue(v))
|
|
||||||
}
|
|
||||||
|
|
||||||
// DecodeFile is just like Decode, except it will automatically read the
|
|
||||||
// contents of the file at `fpath` and decode it for you.
|
|
||||||
func DecodeFile(fpath string, v interface{}) (MetaData, error) {
|
|
||||||
bs, err := ioutil.ReadFile(fpath)
|
|
||||||
if err != nil {
|
|
||||||
return MetaData{}, err
|
|
||||||
}
|
|
||||||
return Decode(string(bs), v)
|
|
||||||
}
|
|
||||||
|
|
||||||
// DecodeReader is just like Decode, except it will consume all bytes
|
|
||||||
// from the reader and decode it for you.
|
|
||||||
func DecodeReader(r io.Reader, v interface{}) (MetaData, error) {
|
|
||||||
bs, err := ioutil.ReadAll(r)
|
|
||||||
if err != nil {
|
|
||||||
return MetaData{}, err
|
|
||||||
}
|
|
||||||
return Decode(string(bs), v)
|
|
||||||
}
|
|
||||||
|
|
||||||
// unify performs a sort of type unification based on the structure of `rv`,
|
|
||||||
// which is the client representation.
|
|
||||||
//
|
|
||||||
// Any type mismatch produces an error. Finding a type that we don't know
|
|
||||||
// how to handle produces an unsupported type error.
|
|
||||||
func (md *MetaData) unify(data interface{}, rv reflect.Value) error {
|
|
||||||
|
|
||||||
// Special case. Look for a `Primitive` value.
|
|
||||||
if rv.Type() == reflect.TypeOf((*Primitive)(nil)).Elem() {
|
|
||||||
// Save the undecoded data and the key context into the primitive
|
|
||||||
// value.
|
|
||||||
context := make(Key, len(md.context))
|
|
||||||
copy(context, md.context)
|
|
||||||
rv.Set(reflect.ValueOf(Primitive{
|
|
||||||
undecoded: data,
|
|
||||||
context: context,
|
|
||||||
}))
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Special case. Unmarshaler Interface support.
|
|
||||||
if rv.CanAddr() {
|
|
||||||
if v, ok := rv.Addr().Interface().(Unmarshaler); ok {
|
|
||||||
return v.UnmarshalTOML(data)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Special case. Handle time.Time values specifically.
|
|
||||||
// TODO: Remove this code when we decide to drop support for Go 1.1.
|
|
||||||
// This isn't necessary in Go 1.2 because time.Time satisfies the encoding
|
|
||||||
// interfaces.
|
|
||||||
if rv.Type().AssignableTo(rvalue(time.Time{}).Type()) {
|
|
||||||
return md.unifyDatetime(data, rv)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Special case. Look for a value satisfying the TextUnmarshaler interface.
|
|
||||||
if v, ok := rv.Interface().(TextUnmarshaler); ok {
|
|
||||||
return md.unifyText(data, v)
|
|
||||||
}
|
|
||||||
// BUG(burntsushi)
|
|
||||||
// The behavior here is incorrect whenever a Go type satisfies the
|
|
||||||
// encoding.TextUnmarshaler interface but also corresponds to a TOML
|
|
||||||
// hash or array. In particular, the unmarshaler should only be applied
|
|
||||||
// to primitive TOML values. But at this point, it will be applied to
|
|
||||||
// all kinds of values and produce an incorrect error whenever those values
|
|
||||||
// are hashes or arrays (including arrays of tables).
|
|
||||||
|
|
||||||
k := rv.Kind()
|
|
||||||
|
|
||||||
// laziness
|
|
||||||
if k >= reflect.Int && k <= reflect.Uint64 {
|
|
||||||
return md.unifyInt(data, rv)
|
|
||||||
}
|
|
||||||
switch k {
|
|
||||||
case reflect.Ptr:
|
|
||||||
elem := reflect.New(rv.Type().Elem())
|
|
||||||
err := md.unify(data, reflect.Indirect(elem))
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
rv.Set(elem)
|
|
||||||
return nil
|
|
||||||
case reflect.Struct:
|
|
||||||
return md.unifyStruct(data, rv)
|
|
||||||
case reflect.Map:
|
|
||||||
return md.unifyMap(data, rv)
|
|
||||||
case reflect.Array:
|
|
||||||
return md.unifyArray(data, rv)
|
|
||||||
case reflect.Slice:
|
|
||||||
return md.unifySlice(data, rv)
|
|
||||||
case reflect.String:
|
|
||||||
return md.unifyString(data, rv)
|
|
||||||
case reflect.Bool:
|
|
||||||
return md.unifyBool(data, rv)
|
|
||||||
case reflect.Interface:
|
|
||||||
// we only support empty interfaces.
|
|
||||||
if rv.NumMethod() > 0 {
|
|
||||||
return e("Unsupported type '%s'.", rv.Kind())
|
|
||||||
}
|
|
||||||
return md.unifyAnything(data, rv)
|
|
||||||
case reflect.Float32:
|
|
||||||
fallthrough
|
|
||||||
case reflect.Float64:
|
|
||||||
return md.unifyFloat64(data, rv)
|
|
||||||
}
|
|
||||||
return e("Unsupported type '%s'.", rv.Kind())
|
|
||||||
}
|
|
||||||
|
|
||||||
func (md *MetaData) unifyStruct(mapping interface{}, rv reflect.Value) error {
|
|
||||||
tmap, ok := mapping.(map[string]interface{})
|
|
||||||
if !ok {
|
|
||||||
return mismatch(rv, "map", mapping)
|
|
||||||
}
|
|
||||||
|
|
||||||
for key, datum := range tmap {
|
|
||||||
var f *field
|
|
||||||
fields := cachedTypeFields(rv.Type())
|
|
||||||
for i := range fields {
|
|
||||||
ff := &fields[i]
|
|
||||||
if ff.name == key {
|
|
||||||
f = ff
|
|
||||||
break
|
|
||||||
}
|
|
||||||
if f == nil && strings.EqualFold(ff.name, key) {
|
|
||||||
f = ff
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if f != nil {
|
|
||||||
subv := rv
|
|
||||||
for _, i := range f.index {
|
|
||||||
subv = indirect(subv.Field(i))
|
|
||||||
}
|
|
||||||
if isUnifiable(subv) {
|
|
||||||
md.decoded[md.context.add(key).String()] = true
|
|
||||||
md.context = append(md.context, key)
|
|
||||||
if err := md.unify(datum, subv); err != nil {
|
|
||||||
return e("Type mismatch for '%s.%s': %s",
|
|
||||||
rv.Type().String(), f.name, err)
|
|
||||||
}
|
|
||||||
md.context = md.context[0 : len(md.context)-1]
|
|
||||||
} else if f.name != "" {
|
|
||||||
// Bad user! No soup for you!
|
|
||||||
return e("Field '%s.%s' is unexported, and therefore cannot "+
|
|
||||||
"be loaded with reflection.", rv.Type().String(), f.name)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (md *MetaData) unifyMap(mapping interface{}, rv reflect.Value) error {
|
|
||||||
tmap, ok := mapping.(map[string]interface{})
|
|
||||||
if !ok {
|
|
||||||
return badtype("map", mapping)
|
|
||||||
}
|
|
||||||
if rv.IsNil() {
|
|
||||||
rv.Set(reflect.MakeMap(rv.Type()))
|
|
||||||
}
|
|
||||||
for k, v := range tmap {
|
|
||||||
md.decoded[md.context.add(k).String()] = true
|
|
||||||
md.context = append(md.context, k)
|
|
||||||
|
|
||||||
rvkey := indirect(reflect.New(rv.Type().Key()))
|
|
||||||
rvval := reflect.Indirect(reflect.New(rv.Type().Elem()))
|
|
||||||
if err := md.unify(v, rvval); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
md.context = md.context[0 : len(md.context)-1]
|
|
||||||
|
|
||||||
rvkey.SetString(k)
|
|
||||||
rv.SetMapIndex(rvkey, rvval)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (md *MetaData) unifyArray(data interface{}, rv reflect.Value) error {
|
|
||||||
datav := reflect.ValueOf(data)
|
|
||||||
if datav.Kind() != reflect.Slice {
|
|
||||||
return badtype("slice", data)
|
|
||||||
}
|
|
||||||
sliceLen := datav.Len()
|
|
||||||
if sliceLen != rv.Len() {
|
|
||||||
return e("expected array length %d; got TOML array of length %d",
|
|
||||||
rv.Len(), sliceLen)
|
|
||||||
}
|
|
||||||
return md.unifySliceArray(datav, rv)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (md *MetaData) unifySlice(data interface{}, rv reflect.Value) error {
|
|
||||||
datav := reflect.ValueOf(data)
|
|
||||||
if datav.Kind() != reflect.Slice {
|
|
||||||
return badtype("slice", data)
|
|
||||||
}
|
|
||||||
sliceLen := datav.Len()
|
|
||||||
if rv.IsNil() || rv.Len() < datav.Len() {
|
|
||||||
rv.Set(reflect.MakeSlice(rv.Type(), sliceLen, sliceLen))
|
|
||||||
}
|
|
||||||
rv.SetLen(datav.Len())
|
|
||||||
return md.unifySliceArray(datav, rv)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (md *MetaData) unifySliceArray(data, rv reflect.Value) error {
|
|
||||||
sliceLen := data.Len()
|
|
||||||
for i := 0; i < sliceLen; i++ {
|
|
||||||
v := data.Index(i).Interface()
|
|
||||||
sliceval := indirect(rv.Index(i))
|
|
||||||
if err := md.unify(v, sliceval); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (md *MetaData) unifyDatetime(data interface{}, rv reflect.Value) error {
|
|
||||||
if _, ok := data.(time.Time); ok {
|
|
||||||
rv.Set(reflect.ValueOf(data))
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return badtype("time.Time", data)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (md *MetaData) unifyString(data interface{}, rv reflect.Value) error {
|
|
||||||
if s, ok := data.(string); ok {
|
|
||||||
rv.SetString(s)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return badtype("string", data)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (md *MetaData) unifyFloat64(data interface{}, rv reflect.Value) error {
|
|
||||||
if num, ok := data.(float64); ok {
|
|
||||||
switch rv.Kind() {
|
|
||||||
case reflect.Float32:
|
|
||||||
fallthrough
|
|
||||||
case reflect.Float64:
|
|
||||||
rv.SetFloat(num)
|
|
||||||
default:
|
|
||||||
panic("bug")
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return badtype("float", data)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (md *MetaData) unifyInt(data interface{}, rv reflect.Value) error {
|
|
||||||
if num, ok := data.(int64); ok {
|
|
||||||
if rv.Kind() >= reflect.Int && rv.Kind() <= reflect.Int64 {
|
|
||||||
switch rv.Kind() {
|
|
||||||
case reflect.Int, reflect.Int64:
|
|
||||||
// No bounds checking necessary.
|
|
||||||
case reflect.Int8:
|
|
||||||
if num < math.MinInt8 || num > math.MaxInt8 {
|
|
||||||
return e("Value '%d' is out of range for int8.", num)
|
|
||||||
}
|
|
||||||
case reflect.Int16:
|
|
||||||
if num < math.MinInt16 || num > math.MaxInt16 {
|
|
||||||
return e("Value '%d' is out of range for int16.", num)
|
|
||||||
}
|
|
||||||
case reflect.Int32:
|
|
||||||
if num < math.MinInt32 || num > math.MaxInt32 {
|
|
||||||
return e("Value '%d' is out of range for int32.", num)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
rv.SetInt(num)
|
|
||||||
} else if rv.Kind() >= reflect.Uint && rv.Kind() <= reflect.Uint64 {
|
|
||||||
unum := uint64(num)
|
|
||||||
switch rv.Kind() {
|
|
||||||
case reflect.Uint, reflect.Uint64:
|
|
||||||
// No bounds checking necessary.
|
|
||||||
case reflect.Uint8:
|
|
||||||
if num < 0 || unum > math.MaxUint8 {
|
|
||||||
return e("Value '%d' is out of range for uint8.", num)
|
|
||||||
}
|
|
||||||
case reflect.Uint16:
|
|
||||||
if num < 0 || unum > math.MaxUint16 {
|
|
||||||
return e("Value '%d' is out of range for uint16.", num)
|
|
||||||
}
|
|
||||||
case reflect.Uint32:
|
|
||||||
if num < 0 || unum > math.MaxUint32 {
|
|
||||||
return e("Value '%d' is out of range for uint32.", num)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
rv.SetUint(unum)
|
|
||||||
} else {
|
|
||||||
panic("unreachable")
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return badtype("integer", data)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (md *MetaData) unifyBool(data interface{}, rv reflect.Value) error {
|
|
||||||
if b, ok := data.(bool); ok {
|
|
||||||
rv.SetBool(b)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return badtype("boolean", data)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (md *MetaData) unifyAnything(data interface{}, rv reflect.Value) error {
|
|
||||||
rv.Set(reflect.ValueOf(data))
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (md *MetaData) unifyText(data interface{}, v TextUnmarshaler) error {
|
|
||||||
var s string
|
|
||||||
switch sdata := data.(type) {
|
|
||||||
case TextMarshaler:
|
|
||||||
text, err := sdata.MarshalText()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
s = string(text)
|
|
||||||
case fmt.Stringer:
|
|
||||||
s = sdata.String()
|
|
||||||
case string:
|
|
||||||
s = sdata
|
|
||||||
case bool:
|
|
||||||
s = fmt.Sprintf("%v", sdata)
|
|
||||||
case int64:
|
|
||||||
s = fmt.Sprintf("%d", sdata)
|
|
||||||
case float64:
|
|
||||||
s = fmt.Sprintf("%f", sdata)
|
|
||||||
default:
|
|
||||||
return badtype("primitive (string-like)", data)
|
|
||||||
}
|
|
||||||
if err := v.UnmarshalText([]byte(s)); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// rvalue returns a reflect.Value of `v`. All pointers are resolved.
|
|
||||||
func rvalue(v interface{}) reflect.Value {
|
|
||||||
return indirect(reflect.ValueOf(v))
|
|
||||||
}
|
|
||||||
|
|
||||||
// indirect returns the value pointed to by a pointer.
|
|
||||||
// Pointers are followed until the value is not a pointer.
|
|
||||||
// New values are allocated for each nil pointer.
|
|
||||||
//
|
|
||||||
// An exception to this rule is if the value satisfies an interface of
|
|
||||||
// interest to us (like encoding.TextUnmarshaler).
|
|
||||||
func indirect(v reflect.Value) reflect.Value {
|
|
||||||
if v.Kind() != reflect.Ptr {
|
|
||||||
if v.CanAddr() {
|
|
||||||
pv := v.Addr()
|
|
||||||
if _, ok := pv.Interface().(TextUnmarshaler); ok {
|
|
||||||
return pv
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return v
|
|
||||||
}
|
|
||||||
if v.IsNil() {
|
|
||||||
v.Set(reflect.New(v.Type().Elem()))
|
|
||||||
}
|
|
||||||
return indirect(reflect.Indirect(v))
|
|
||||||
}
|
|
||||||
|
|
||||||
func isUnifiable(rv reflect.Value) bool {
|
|
||||||
if rv.CanSet() {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
if _, ok := rv.Interface().(TextUnmarshaler); ok {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func badtype(expected string, data interface{}) error {
|
|
||||||
return e("Expected %s but found '%T'.", expected, data)
|
|
||||||
}
|
|
||||||
|
|
||||||
func mismatch(user reflect.Value, expected string, data interface{}) error {
|
|
||||||
return e("Type mismatch for %s. Expected %s but found '%T'.",
|
|
||||||
user.Type().String(), expected, data)
|
|
||||||
}
|
|
122
vendor/github.com/BurntSushi/toml/decode_meta.go
generated
vendored
122
vendor/github.com/BurntSushi/toml/decode_meta.go
generated
vendored
@ -1,122 +0,0 @@
|
|||||||
package toml
|
|
||||||
|
|
||||||
import "strings"
|
|
||||||
|
|
||||||
// MetaData allows access to meta information about TOML data that may not
|
|
||||||
// be inferrable via reflection. In particular, whether a key has been defined
|
|
||||||
// and the TOML type of a key.
|
|
||||||
type MetaData struct {
|
|
||||||
mapping map[string]interface{}
|
|
||||||
types map[string]tomlType
|
|
||||||
keys []Key
|
|
||||||
decoded map[string]bool
|
|
||||||
context Key // Used only during decoding.
|
|
||||||
}
|
|
||||||
|
|
||||||
// IsDefined returns true if the key given exists in the TOML data. The key
|
|
||||||
// should be specified hierarchially. e.g.,
|
|
||||||
//
|
|
||||||
// // access the TOML key 'a.b.c'
|
|
||||||
// IsDefined("a", "b", "c")
|
|
||||||
//
|
|
||||||
// IsDefined will return false if an empty key given. Keys are case sensitive.
|
|
||||||
func (md *MetaData) IsDefined(key ...string) bool {
|
|
||||||
if len(key) == 0 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
var hash map[string]interface{}
|
|
||||||
var ok bool
|
|
||||||
var hashOrVal interface{} = md.mapping
|
|
||||||
for _, k := range key {
|
|
||||||
if hash, ok = hashOrVal.(map[string]interface{}); !ok {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if hashOrVal, ok = hash[k]; !ok {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// Type returns a string representation of the type of the key specified.
|
|
||||||
//
|
|
||||||
// Type will return the empty string if given an empty key or a key that
|
|
||||||
// does not exist. Keys are case sensitive.
|
|
||||||
func (md *MetaData) Type(key ...string) string {
|
|
||||||
fullkey := strings.Join(key, ".")
|
|
||||||
if typ, ok := md.types[fullkey]; ok {
|
|
||||||
return typ.typeString()
|
|
||||||
}
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
// Key is the type of any TOML key, including key groups. Use (MetaData).Keys
|
|
||||||
// to get values of this type.
|
|
||||||
type Key []string
|
|
||||||
|
|
||||||
func (k Key) String() string {
|
|
||||||
return strings.Join(k, ".")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (k Key) maybeQuotedAll() string {
|
|
||||||
var ss []string
|
|
||||||
for i := range k {
|
|
||||||
ss = append(ss, k.maybeQuoted(i))
|
|
||||||
}
|
|
||||||
return strings.Join(ss, ".")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (k Key) maybeQuoted(i int) string {
|
|
||||||
quote := false
|
|
||||||
for _, c := range k[i] {
|
|
||||||
if !isBareKeyChar(c) {
|
|
||||||
quote = true
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if quote {
|
|
||||||
return "\"" + strings.Replace(k[i], "\"", "\\\"", -1) + "\""
|
|
||||||
} else {
|
|
||||||
return k[i]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (k Key) add(piece string) Key {
|
|
||||||
newKey := make(Key, len(k)+1)
|
|
||||||
copy(newKey, k)
|
|
||||||
newKey[len(k)] = piece
|
|
||||||
return newKey
|
|
||||||
}
|
|
||||||
|
|
||||||
// Keys returns a slice of every key in the TOML data, including key groups.
|
|
||||||
// Each key is itself a slice, where the first element is the top of the
|
|
||||||
// hierarchy and the last is the most specific.
|
|
||||||
//
|
|
||||||
// The list will have the same order as the keys appeared in the TOML data.
|
|
||||||
//
|
|
||||||
// All keys returned are non-empty.
|
|
||||||
func (md *MetaData) Keys() []Key {
|
|
||||||
return md.keys
|
|
||||||
}
|
|
||||||
|
|
||||||
// Undecoded returns all keys that have not been decoded in the order in which
|
|
||||||
// they appear in the original TOML document.
|
|
||||||
//
|
|
||||||
// This includes keys that haven't been decoded because of a Primitive value.
|
|
||||||
// Once the Primitive value is decoded, the keys will be considered decoded.
|
|
||||||
//
|
|
||||||
// Also note that decoding into an empty interface will result in no decoding,
|
|
||||||
// and so no keys will be considered decoded.
|
|
||||||
//
|
|
||||||
// In this sense, the Undecoded keys correspond to keys in the TOML document
|
|
||||||
// that do not have a concrete type in your representation.
|
|
||||||
func (md *MetaData) Undecoded() []Key {
|
|
||||||
undecoded := make([]Key, 0, len(md.keys))
|
|
||||||
for _, key := range md.keys {
|
|
||||||
if !md.decoded[key.String()] {
|
|
||||||
undecoded = append(undecoded, key)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return undecoded
|
|
||||||
}
|
|
1018
vendor/github.com/BurntSushi/toml/decode_test.go
generated
vendored
1018
vendor/github.com/BurntSushi/toml/decode_test.go
generated
vendored
File diff suppressed because it is too large
Load Diff
27
vendor/github.com/BurntSushi/toml/doc.go
generated
vendored
27
vendor/github.com/BurntSushi/toml/doc.go
generated
vendored
@ -1,27 +0,0 @@
|
|||||||
/*
|
|
||||||
Package toml provides facilities for decoding and encoding TOML configuration
|
|
||||||
files via reflection. There is also support for delaying decoding with
|
|
||||||
the Primitive type, and querying the set of keys in a TOML document with the
|
|
||||||
MetaData type.
|
|
||||||
|
|
||||||
The specification implemented: https://github.com/mojombo/toml
|
|
||||||
|
|
||||||
The sub-command github.com/BurntSushi/toml/cmd/tomlv can be used to verify
|
|
||||||
whether a file is a valid TOML document. It can also be used to print the
|
|
||||||
type of each key in a TOML document.
|
|
||||||
|
|
||||||
Testing
|
|
||||||
|
|
||||||
There are two important types of tests used for this package. The first is
|
|
||||||
contained inside '*_test.go' files and uses the standard Go unit testing
|
|
||||||
framework. These tests are primarily devoted to holistically testing the
|
|
||||||
decoder and encoder.
|
|
||||||
|
|
||||||
The second type of testing is used to verify the implementation's adherence
|
|
||||||
to the TOML specification. These tests have been factored into their own
|
|
||||||
project: https://github.com/BurntSushi/toml-test
|
|
||||||
|
|
||||||
The reason the tests are in a separate project is so that they can be used by
|
|
||||||
any implementation of TOML. Namely, it is language agnostic.
|
|
||||||
*/
|
|
||||||
package toml
|
|
562
vendor/github.com/BurntSushi/toml/encode.go
generated
vendored
562
vendor/github.com/BurntSushi/toml/encode.go
generated
vendored
@ -1,562 +0,0 @@
|
|||||||
package toml
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bufio"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"reflect"
|
|
||||||
"sort"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
)
|
|
||||||
|
|
||||||
type tomlEncodeError struct{ error }
|
|
||||||
|
|
||||||
var (
|
|
||||||
errArrayMixedElementTypes = errors.New(
|
|
||||||
"can't encode array with mixed element types")
|
|
||||||
errArrayNilElement = errors.New(
|
|
||||||
"can't encode array with nil element")
|
|
||||||
errNonString = errors.New(
|
|
||||||
"can't encode a map with non-string key type")
|
|
||||||
errAnonNonStruct = errors.New(
|
|
||||||
"can't encode an anonymous field that is not a struct")
|
|
||||||
errArrayNoTable = errors.New(
|
|
||||||
"TOML array element can't contain a table")
|
|
||||||
errNoKey = errors.New(
|
|
||||||
"top-level values must be a Go map or struct")
|
|
||||||
errAnything = errors.New("") // used in testing
|
|
||||||
)
|
|
||||||
|
|
||||||
var quotedReplacer = strings.NewReplacer(
|
|
||||||
"\t", "\\t",
|
|
||||||
"\n", "\\n",
|
|
||||||
"\r", "\\r",
|
|
||||||
"\"", "\\\"",
|
|
||||||
"\\", "\\\\",
|
|
||||||
)
|
|
||||||
|
|
||||||
// Encoder controls the encoding of Go values to a TOML document to some
|
|
||||||
// io.Writer.
|
|
||||||
//
|
|
||||||
// The indentation level can be controlled with the Indent field.
|
|
||||||
type Encoder struct {
|
|
||||||
// A single indentation level. By default it is two spaces.
|
|
||||||
Indent string
|
|
||||||
|
|
||||||
// hasWritten is whether we have written any output to w yet.
|
|
||||||
hasWritten bool
|
|
||||||
w *bufio.Writer
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewEncoder returns a TOML encoder that encodes Go values to the io.Writer
|
|
||||||
// given. By default, a single indentation level is 2 spaces.
|
|
||||||
func NewEncoder(w io.Writer) *Encoder {
|
|
||||||
return &Encoder{
|
|
||||||
w: bufio.NewWriter(w),
|
|
||||||
Indent: " ",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Encode writes a TOML representation of the Go value to the underlying
|
|
||||||
// io.Writer. If the value given cannot be encoded to a valid TOML document,
|
|
||||||
// then an error is returned.
|
|
||||||
//
|
|
||||||
// The mapping between Go values and TOML values should be precisely the same
|
|
||||||
// as for the Decode* functions. Similarly, the TextMarshaler interface is
|
|
||||||
// supported by encoding the resulting bytes as strings. (If you want to write
|
|
||||||
// arbitrary binary data then you will need to use something like base64 since
|
|
||||||
// TOML does not have any binary types.)
|
|
||||||
//
|
|
||||||
// When encoding TOML hashes (i.e., Go maps or structs), keys without any
|
|
||||||
// sub-hashes are encoded first.
|
|
||||||
//
|
|
||||||
// If a Go map is encoded, then its keys are sorted alphabetically for
|
|
||||||
// deterministic output. More control over this behavior may be provided if
|
|
||||||
// there is demand for it.
|
|
||||||
//
|
|
||||||
// Encoding Go values without a corresponding TOML representation---like map
|
|
||||||
// types with non-string keys---will cause an error to be returned. Similarly
|
|
||||||
// for mixed arrays/slices, arrays/slices with nil elements, embedded
|
|
||||||
// non-struct types and nested slices containing maps or structs.
|
|
||||||
// (e.g., [][]map[string]string is not allowed but []map[string]string is OK
|
|
||||||
// and so is []map[string][]string.)
|
|
||||||
func (enc *Encoder) Encode(v interface{}) error {
|
|
||||||
rv := eindirect(reflect.ValueOf(v))
|
|
||||||
if err := enc.safeEncode(Key([]string{}), rv); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return enc.w.Flush()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (enc *Encoder) safeEncode(key Key, rv reflect.Value) (err error) {
|
|
||||||
defer func() {
|
|
||||||
if r := recover(); r != nil {
|
|
||||||
if terr, ok := r.(tomlEncodeError); ok {
|
|
||||||
err = terr.error
|
|
||||||
return
|
|
||||||
}
|
|
||||||
panic(r)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
enc.encode(key, rv)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (enc *Encoder) encode(key Key, rv reflect.Value) {
|
|
||||||
// Special case. Time needs to be in ISO8601 format.
|
|
||||||
// Special case. If we can marshal the type to text, then we used that.
|
|
||||||
// Basically, this prevents the encoder for handling these types as
|
|
||||||
// generic structs (or whatever the underlying type of a TextMarshaler is).
|
|
||||||
switch rv.Interface().(type) {
|
|
||||||
case time.Time, TextMarshaler:
|
|
||||||
enc.keyEqElement(key, rv)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
k := rv.Kind()
|
|
||||||
switch k {
|
|
||||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32,
|
|
||||||
reflect.Int64,
|
|
||||||
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32,
|
|
||||||
reflect.Uint64,
|
|
||||||
reflect.Float32, reflect.Float64, reflect.String, reflect.Bool:
|
|
||||||
enc.keyEqElement(key, rv)
|
|
||||||
case reflect.Array, reflect.Slice:
|
|
||||||
if typeEqual(tomlArrayHash, tomlTypeOfGo(rv)) {
|
|
||||||
enc.eArrayOfTables(key, rv)
|
|
||||||
} else {
|
|
||||||
enc.keyEqElement(key, rv)
|
|
||||||
}
|
|
||||||
case reflect.Interface:
|
|
||||||
if rv.IsNil() {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
enc.encode(key, rv.Elem())
|
|
||||||
case reflect.Map:
|
|
||||||
if rv.IsNil() {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
enc.eTable(key, rv)
|
|
||||||
case reflect.Ptr:
|
|
||||||
if rv.IsNil() {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
enc.encode(key, rv.Elem())
|
|
||||||
case reflect.Struct:
|
|
||||||
enc.eTable(key, rv)
|
|
||||||
default:
|
|
||||||
panic(e("Unsupported type for key '%s': %s", key, k))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// eElement encodes any value that can be an array element (primitives and
|
|
||||||
// arrays).
|
|
||||||
func (enc *Encoder) eElement(rv reflect.Value) {
|
|
||||||
switch v := rv.Interface().(type) {
|
|
||||||
case time.Time:
|
|
||||||
// Special case time.Time as a primitive. Has to come before
|
|
||||||
// TextMarshaler below because time.Time implements
|
|
||||||
// encoding.TextMarshaler, but we need to always use UTC.
|
|
||||||
enc.wf(v.In(time.FixedZone("UTC", 0)).Format("2006-01-02T15:04:05Z"))
|
|
||||||
return
|
|
||||||
case TextMarshaler:
|
|
||||||
// Special case. Use text marshaler if it's available for this value.
|
|
||||||
if s, err := v.MarshalText(); err != nil {
|
|
||||||
encPanic(err)
|
|
||||||
} else {
|
|
||||||
enc.writeQuoted(string(s))
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
switch rv.Kind() {
|
|
||||||
case reflect.Bool:
|
|
||||||
enc.wf(strconv.FormatBool(rv.Bool()))
|
|
||||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32,
|
|
||||||
reflect.Int64:
|
|
||||||
enc.wf(strconv.FormatInt(rv.Int(), 10))
|
|
||||||
case reflect.Uint, reflect.Uint8, reflect.Uint16,
|
|
||||||
reflect.Uint32, reflect.Uint64:
|
|
||||||
enc.wf(strconv.FormatUint(rv.Uint(), 10))
|
|
||||||
case reflect.Float32:
|
|
||||||
enc.wf(floatAddDecimal(strconv.FormatFloat(rv.Float(), 'f', -1, 32)))
|
|
||||||
case reflect.Float64:
|
|
||||||
enc.wf(floatAddDecimal(strconv.FormatFloat(rv.Float(), 'f', -1, 64)))
|
|
||||||
case reflect.Array, reflect.Slice:
|
|
||||||
enc.eArrayOrSliceElement(rv)
|
|
||||||
case reflect.Interface:
|
|
||||||
enc.eElement(rv.Elem())
|
|
||||||
case reflect.String:
|
|
||||||
enc.writeQuoted(rv.String())
|
|
||||||
default:
|
|
||||||
panic(e("Unexpected primitive type: %s", rv.Kind()))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// By the TOML spec, all floats must have a decimal with at least one
|
|
||||||
// number on either side.
|
|
||||||
func floatAddDecimal(fstr string) string {
|
|
||||||
if !strings.Contains(fstr, ".") {
|
|
||||||
return fstr + ".0"
|
|
||||||
}
|
|
||||||
return fstr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (enc *Encoder) writeQuoted(s string) {
|
|
||||||
enc.wf("\"%s\"", quotedReplacer.Replace(s))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (enc *Encoder) eArrayOrSliceElement(rv reflect.Value) {
|
|
||||||
length := rv.Len()
|
|
||||||
enc.wf("[")
|
|
||||||
for i := 0; i < length; i++ {
|
|
||||||
elem := rv.Index(i)
|
|
||||||
enc.eElement(elem)
|
|
||||||
if i != length-1 {
|
|
||||||
enc.wf(", ")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
enc.wf("]")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (enc *Encoder) eArrayOfTables(key Key, rv reflect.Value) {
|
|
||||||
if len(key) == 0 {
|
|
||||||
encPanic(errNoKey)
|
|
||||||
}
|
|
||||||
for i := 0; i < rv.Len(); i++ {
|
|
||||||
trv := rv.Index(i)
|
|
||||||
if isNil(trv) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
panicIfInvalidKey(key)
|
|
||||||
enc.newline()
|
|
||||||
enc.wf("%s[[%s]]", enc.indentStr(key), key.maybeQuotedAll())
|
|
||||||
enc.newline()
|
|
||||||
enc.eMapOrStruct(key, trv)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (enc *Encoder) eTable(key Key, rv reflect.Value) {
|
|
||||||
panicIfInvalidKey(key)
|
|
||||||
if len(key) == 1 {
|
|
||||||
// Output an extra new line between top-level tables.
|
|
||||||
// (The newline isn't written if nothing else has been written though.)
|
|
||||||
enc.newline()
|
|
||||||
}
|
|
||||||
if len(key) > 0 {
|
|
||||||
enc.wf("%s[%s]", enc.indentStr(key), key.maybeQuotedAll())
|
|
||||||
enc.newline()
|
|
||||||
}
|
|
||||||
enc.eMapOrStruct(key, rv)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (enc *Encoder) eMapOrStruct(key Key, rv reflect.Value) {
|
|
||||||
switch rv := eindirect(rv); rv.Kind() {
|
|
||||||
case reflect.Map:
|
|
||||||
enc.eMap(key, rv)
|
|
||||||
case reflect.Struct:
|
|
||||||
enc.eStruct(key, rv)
|
|
||||||
default:
|
|
||||||
panic("eTable: unhandled reflect.Value Kind: " + rv.Kind().String())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (enc *Encoder) eMap(key Key, rv reflect.Value) {
|
|
||||||
rt := rv.Type()
|
|
||||||
if rt.Key().Kind() != reflect.String {
|
|
||||||
encPanic(errNonString)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sort keys so that we have deterministic output. And write keys directly
|
|
||||||
// underneath this key first, before writing sub-structs or sub-maps.
|
|
||||||
var mapKeysDirect, mapKeysSub []string
|
|
||||||
for _, mapKey := range rv.MapKeys() {
|
|
||||||
k := mapKey.String()
|
|
||||||
if typeIsHash(tomlTypeOfGo(rv.MapIndex(mapKey))) {
|
|
||||||
mapKeysSub = append(mapKeysSub, k)
|
|
||||||
} else {
|
|
||||||
mapKeysDirect = append(mapKeysDirect, k)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var writeMapKeys = func(mapKeys []string) {
|
|
||||||
sort.Strings(mapKeys)
|
|
||||||
for _, mapKey := range mapKeys {
|
|
||||||
mrv := rv.MapIndex(reflect.ValueOf(mapKey))
|
|
||||||
if isNil(mrv) {
|
|
||||||
// Don't write anything for nil fields.
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
enc.encode(key.add(mapKey), mrv)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
writeMapKeys(mapKeysDirect)
|
|
||||||
writeMapKeys(mapKeysSub)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (enc *Encoder) eStruct(key Key, rv reflect.Value) {
|
|
||||||
// Write keys for fields directly under this key first, because if we write
|
|
||||||
// a field that creates a new table, then all keys under it will be in that
|
|
||||||
// table (not the one we're writing here).
|
|
||||||
rt := rv.Type()
|
|
||||||
var fieldsDirect, fieldsSub [][]int
|
|
||||||
var addFields func(rt reflect.Type, rv reflect.Value, start []int)
|
|
||||||
addFields = func(rt reflect.Type, rv reflect.Value, start []int) {
|
|
||||||
for i := 0; i < rt.NumField(); i++ {
|
|
||||||
f := rt.Field(i)
|
|
||||||
// skip unexported fields
|
|
||||||
if f.PkgPath != "" && !f.Anonymous {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
frv := rv.Field(i)
|
|
||||||
if f.Anonymous {
|
|
||||||
t := f.Type
|
|
||||||
switch t.Kind() {
|
|
||||||
case reflect.Struct:
|
|
||||||
addFields(t, frv, f.Index)
|
|
||||||
continue
|
|
||||||
case reflect.Ptr:
|
|
||||||
if t.Elem().Kind() == reflect.Struct {
|
|
||||||
if !frv.IsNil() {
|
|
||||||
addFields(t.Elem(), frv.Elem(), f.Index)
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// Fall through to the normal field encoding logic below
|
|
||||||
// for non-struct anonymous fields.
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if typeIsHash(tomlTypeOfGo(frv)) {
|
|
||||||
fieldsSub = append(fieldsSub, append(start, f.Index...))
|
|
||||||
} else {
|
|
||||||
fieldsDirect = append(fieldsDirect, append(start, f.Index...))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
addFields(rt, rv, nil)
|
|
||||||
|
|
||||||
var writeFields = func(fields [][]int) {
|
|
||||||
for _, fieldIndex := range fields {
|
|
||||||
sft := rt.FieldByIndex(fieldIndex)
|
|
||||||
sf := rv.FieldByIndex(fieldIndex)
|
|
||||||
if isNil(sf) {
|
|
||||||
// Don't write anything for nil fields.
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
keyName := sft.Tag.Get("toml")
|
|
||||||
if keyName == "-" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if keyName == "" {
|
|
||||||
keyName = sft.Name
|
|
||||||
}
|
|
||||||
|
|
||||||
keyName, opts := getOptions(keyName)
|
|
||||||
if _, ok := opts["omitempty"]; ok && isEmpty(sf) {
|
|
||||||
continue
|
|
||||||
} else if _, ok := opts["omitzero"]; ok && isZero(sf) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
enc.encode(key.add(keyName), sf)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
writeFields(fieldsDirect)
|
|
||||||
writeFields(fieldsSub)
|
|
||||||
}
|
|
||||||
|
|
||||||
// tomlTypeName returns the TOML type name of the Go value's type. It is
|
|
||||||
// used to determine whether the types of array elements are mixed (which is
|
|
||||||
// forbidden). If the Go value is nil, then it is illegal for it to be an array
|
|
||||||
// element, and valueIsNil is returned as true.
|
|
||||||
|
|
||||||
// Returns the TOML type of a Go value. The type may be `nil`, which means
|
|
||||||
// no concrete TOML type could be found.
|
|
||||||
func tomlTypeOfGo(rv reflect.Value) tomlType {
|
|
||||||
if isNil(rv) || !rv.IsValid() {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
switch rv.Kind() {
|
|
||||||
case reflect.Bool:
|
|
||||||
return tomlBool
|
|
||||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32,
|
|
||||||
reflect.Int64,
|
|
||||||
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32,
|
|
||||||
reflect.Uint64:
|
|
||||||
return tomlInteger
|
|
||||||
case reflect.Float32, reflect.Float64:
|
|
||||||
return tomlFloat
|
|
||||||
case reflect.Array, reflect.Slice:
|
|
||||||
if typeEqual(tomlHash, tomlArrayType(rv)) {
|
|
||||||
return tomlArrayHash
|
|
||||||
} else {
|
|
||||||
return tomlArray
|
|
||||||
}
|
|
||||||
case reflect.Ptr, reflect.Interface:
|
|
||||||
return tomlTypeOfGo(rv.Elem())
|
|
||||||
case reflect.String:
|
|
||||||
return tomlString
|
|
||||||
case reflect.Map:
|
|
||||||
return tomlHash
|
|
||||||
case reflect.Struct:
|
|
||||||
switch rv.Interface().(type) {
|
|
||||||
case time.Time:
|
|
||||||
return tomlDatetime
|
|
||||||
case TextMarshaler:
|
|
||||||
return tomlString
|
|
||||||
default:
|
|
||||||
return tomlHash
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
panic("unexpected reflect.Kind: " + rv.Kind().String())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// tomlArrayType returns the element type of a TOML array. The type returned
|
|
||||||
// may be nil if it cannot be determined (e.g., a nil slice or a zero length
|
|
||||||
// slize). This function may also panic if it finds a type that cannot be
|
|
||||||
// expressed in TOML (such as nil elements, heterogeneous arrays or directly
|
|
||||||
// nested arrays of tables).
|
|
||||||
func tomlArrayType(rv reflect.Value) tomlType {
|
|
||||||
if isNil(rv) || !rv.IsValid() || rv.Len() == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
firstType := tomlTypeOfGo(rv.Index(0))
|
|
||||||
if firstType == nil {
|
|
||||||
encPanic(errArrayNilElement)
|
|
||||||
}
|
|
||||||
|
|
||||||
rvlen := rv.Len()
|
|
||||||
for i := 1; i < rvlen; i++ {
|
|
||||||
elem := rv.Index(i)
|
|
||||||
switch elemType := tomlTypeOfGo(elem); {
|
|
||||||
case elemType == nil:
|
|
||||||
encPanic(errArrayNilElement)
|
|
||||||
case !typeEqual(firstType, elemType):
|
|
||||||
encPanic(errArrayMixedElementTypes)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// If we have a nested array, then we must make sure that the nested
|
|
||||||
// array contains ONLY primitives.
|
|
||||||
// This checks arbitrarily nested arrays.
|
|
||||||
if typeEqual(firstType, tomlArray) || typeEqual(firstType, tomlArrayHash) {
|
|
||||||
nest := tomlArrayType(eindirect(rv.Index(0)))
|
|
||||||
if typeEqual(nest, tomlHash) || typeEqual(nest, tomlArrayHash) {
|
|
||||||
encPanic(errArrayNoTable)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return firstType
|
|
||||||
}
|
|
||||||
|
|
||||||
func getOptions(keyName string) (string, map[string]struct{}) {
|
|
||||||
opts := make(map[string]struct{})
|
|
||||||
ss := strings.Split(keyName, ",")
|
|
||||||
name := ss[0]
|
|
||||||
if len(ss) > 1 {
|
|
||||||
for _, opt := range ss {
|
|
||||||
opts[opt] = struct{}{}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return name, opts
|
|
||||||
}
|
|
||||||
|
|
||||||
func isZero(rv reflect.Value) bool {
|
|
||||||
switch rv.Kind() {
|
|
||||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
|
||||||
if rv.Int() == 0 {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
|
|
||||||
if rv.Uint() == 0 {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
case reflect.Float32, reflect.Float64:
|
|
||||||
if rv.Float() == 0.0 {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func isEmpty(rv reflect.Value) bool {
|
|
||||||
switch rv.Kind() {
|
|
||||||
case reflect.String:
|
|
||||||
if len(strings.TrimSpace(rv.String())) == 0 {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
case reflect.Array, reflect.Slice, reflect.Map:
|
|
||||||
if rv.Len() == 0 {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func (enc *Encoder) newline() {
|
|
||||||
if enc.hasWritten {
|
|
||||||
enc.wf("\n")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (enc *Encoder) keyEqElement(key Key, val reflect.Value) {
|
|
||||||
if len(key) == 0 {
|
|
||||||
encPanic(errNoKey)
|
|
||||||
}
|
|
||||||
panicIfInvalidKey(key)
|
|
||||||
enc.wf("%s%s = ", enc.indentStr(key), key.maybeQuoted(len(key)-1))
|
|
||||||
enc.eElement(val)
|
|
||||||
enc.newline()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (enc *Encoder) wf(format string, v ...interface{}) {
|
|
||||||
if _, err := fmt.Fprintf(enc.w, format, v...); err != nil {
|
|
||||||
encPanic(err)
|
|
||||||
}
|
|
||||||
enc.hasWritten = true
|
|
||||||
}
|
|
||||||
|
|
||||||
func (enc *Encoder) indentStr(key Key) string {
|
|
||||||
return strings.Repeat(enc.Indent, len(key)-1)
|
|
||||||
}
|
|
||||||
|
|
||||||
func encPanic(err error) {
|
|
||||||
panic(tomlEncodeError{err})
|
|
||||||
}
|
|
||||||
|
|
||||||
func eindirect(v reflect.Value) reflect.Value {
|
|
||||||
switch v.Kind() {
|
|
||||||
case reflect.Ptr, reflect.Interface:
|
|
||||||
return eindirect(v.Elem())
|
|
||||||
default:
|
|
||||||
return v
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func isNil(rv reflect.Value) bool {
|
|
||||||
switch rv.Kind() {
|
|
||||||
case reflect.Interface, reflect.Map, reflect.Ptr, reflect.Slice:
|
|
||||||
return rv.IsNil()
|
|
||||||
default:
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func panicIfInvalidKey(key Key) {
|
|
||||||
for _, k := range key {
|
|
||||||
if len(k) == 0 {
|
|
||||||
encPanic(e("Key '%s' is not a valid table name. Key names "+
|
|
||||||
"cannot be empty.", key.maybeQuotedAll()))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func isValidKeyName(s string) bool {
|
|
||||||
return len(s) != 0
|
|
||||||
}
|
|
566
vendor/github.com/BurntSushi/toml/encode_test.go
generated
vendored
566
vendor/github.com/BurntSushi/toml/encode_test.go
generated
vendored
@ -1,566 +0,0 @@
|
|||||||
package toml
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"fmt"
|
|
||||||
"log"
|
|
||||||
"net"
|
|
||||||
"testing"
|
|
||||||
"time"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestEncodeRoundTrip(t *testing.T) {
|
|
||||||
type Config struct {
|
|
||||||
Age int
|
|
||||||
Cats []string
|
|
||||||
Pi float64
|
|
||||||
Perfection []int
|
|
||||||
DOB time.Time
|
|
||||||
Ipaddress net.IP
|
|
||||||
}
|
|
||||||
|
|
||||||
var inputs = Config{
|
|
||||||
13,
|
|
||||||
[]string{"one", "two", "three"},
|
|
||||||
3.145,
|
|
||||||
[]int{11, 2, 3, 4},
|
|
||||||
time.Now(),
|
|
||||||
net.ParseIP("192.168.59.254"),
|
|
||||||
}
|
|
||||||
|
|
||||||
var firstBuffer bytes.Buffer
|
|
||||||
e := NewEncoder(&firstBuffer)
|
|
||||||
err := e.Encode(inputs)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
var outputs Config
|
|
||||||
if _, err := Decode(firstBuffer.String(), &outputs); err != nil {
|
|
||||||
log.Printf("Could not decode:\n-----\n%s\n-----\n",
|
|
||||||
firstBuffer.String())
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// could test each value individually, but I'm lazy
|
|
||||||
var secondBuffer bytes.Buffer
|
|
||||||
e2 := NewEncoder(&secondBuffer)
|
|
||||||
err = e2.Encode(outputs)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
if firstBuffer.String() != secondBuffer.String() {
|
|
||||||
t.Error(
|
|
||||||
firstBuffer.String(),
|
|
||||||
"\n\n is not identical to\n\n",
|
|
||||||
secondBuffer.String())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// XXX(burntsushi)
|
|
||||||
// I think these tests probably should be removed. They are good, but they
|
|
||||||
// ought to be obsolete by toml-test.
|
|
||||||
func TestEncode(t *testing.T) {
|
|
||||||
type Embedded struct {
|
|
||||||
Int int `toml:"_int"`
|
|
||||||
}
|
|
||||||
type NonStruct int
|
|
||||||
|
|
||||||
date := time.Date(2014, 5, 11, 20, 30, 40, 0, time.FixedZone("IST", 3600))
|
|
||||||
dateStr := "2014-05-11T19:30:40Z"
|
|
||||||
|
|
||||||
tests := map[string]struct {
|
|
||||||
input interface{}
|
|
||||||
wantOutput string
|
|
||||||
wantError error
|
|
||||||
}{
|
|
||||||
"bool field": {
|
|
||||||
input: struct {
|
|
||||||
BoolTrue bool
|
|
||||||
BoolFalse bool
|
|
||||||
}{true, false},
|
|
||||||
wantOutput: "BoolTrue = true\nBoolFalse = false\n",
|
|
||||||
},
|
|
||||||
"int fields": {
|
|
||||||
input: struct {
|
|
||||||
Int int
|
|
||||||
Int8 int8
|
|
||||||
Int16 int16
|
|
||||||
Int32 int32
|
|
||||||
Int64 int64
|
|
||||||
}{1, 2, 3, 4, 5},
|
|
||||||
wantOutput: "Int = 1\nInt8 = 2\nInt16 = 3\nInt32 = 4\nInt64 = 5\n",
|
|
||||||
},
|
|
||||||
"uint fields": {
|
|
||||||
input: struct {
|
|
||||||
Uint uint
|
|
||||||
Uint8 uint8
|
|
||||||
Uint16 uint16
|
|
||||||
Uint32 uint32
|
|
||||||
Uint64 uint64
|
|
||||||
}{1, 2, 3, 4, 5},
|
|
||||||
wantOutput: "Uint = 1\nUint8 = 2\nUint16 = 3\nUint32 = 4" +
|
|
||||||
"\nUint64 = 5\n",
|
|
||||||
},
|
|
||||||
"float fields": {
|
|
||||||
input: struct {
|
|
||||||
Float32 float32
|
|
||||||
Float64 float64
|
|
||||||
}{1.5, 2.5},
|
|
||||||
wantOutput: "Float32 = 1.5\nFloat64 = 2.5\n",
|
|
||||||
},
|
|
||||||
"string field": {
|
|
||||||
input: struct{ String string }{"foo"},
|
|
||||||
wantOutput: "String = \"foo\"\n",
|
|
||||||
},
|
|
||||||
"string field and unexported field": {
|
|
||||||
input: struct {
|
|
||||||
String string
|
|
||||||
unexported int
|
|
||||||
}{"foo", 0},
|
|
||||||
wantOutput: "String = \"foo\"\n",
|
|
||||||
},
|
|
||||||
"datetime field in UTC": {
|
|
||||||
input: struct{ Date time.Time }{date},
|
|
||||||
wantOutput: fmt.Sprintf("Date = %s\n", dateStr),
|
|
||||||
},
|
|
||||||
"datetime field as primitive": {
|
|
||||||
// Using a map here to fail if isStructOrMap() returns true for
|
|
||||||
// time.Time.
|
|
||||||
input: map[string]interface{}{
|
|
||||||
"Date": date,
|
|
||||||
"Int": 1,
|
|
||||||
},
|
|
||||||
wantOutput: fmt.Sprintf("Date = %s\nInt = 1\n", dateStr),
|
|
||||||
},
|
|
||||||
"array fields": {
|
|
||||||
input: struct {
|
|
||||||
IntArray0 [0]int
|
|
||||||
IntArray3 [3]int
|
|
||||||
}{[0]int{}, [3]int{1, 2, 3}},
|
|
||||||
wantOutput: "IntArray0 = []\nIntArray3 = [1, 2, 3]\n",
|
|
||||||
},
|
|
||||||
"slice fields": {
|
|
||||||
input: struct{ IntSliceNil, IntSlice0, IntSlice3 []int }{
|
|
||||||
nil, []int{}, []int{1, 2, 3},
|
|
||||||
},
|
|
||||||
wantOutput: "IntSlice0 = []\nIntSlice3 = [1, 2, 3]\n",
|
|
||||||
},
|
|
||||||
"datetime slices": {
|
|
||||||
input: struct{ DatetimeSlice []time.Time }{
|
|
||||||
[]time.Time{date, date},
|
|
||||||
},
|
|
||||||
wantOutput: fmt.Sprintf("DatetimeSlice = [%s, %s]\n",
|
|
||||||
dateStr, dateStr),
|
|
||||||
},
|
|
||||||
"nested arrays and slices": {
|
|
||||||
input: struct {
|
|
||||||
SliceOfArrays [][2]int
|
|
||||||
ArrayOfSlices [2][]int
|
|
||||||
SliceOfArraysOfSlices [][2][]int
|
|
||||||
ArrayOfSlicesOfArrays [2][][2]int
|
|
||||||
SliceOfMixedArrays [][2]interface{}
|
|
||||||
ArrayOfMixedSlices [2][]interface{}
|
|
||||||
}{
|
|
||||||
[][2]int{{1, 2}, {3, 4}},
|
|
||||||
[2][]int{{1, 2}, {3, 4}},
|
|
||||||
[][2][]int{
|
|
||||||
{
|
|
||||||
{1, 2}, {3, 4},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
{5, 6}, {7, 8},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
[2][][2]int{
|
|
||||||
{
|
|
||||||
{1, 2}, {3, 4},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
{5, 6}, {7, 8},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
[][2]interface{}{
|
|
||||||
{1, 2}, {"a", "b"},
|
|
||||||
},
|
|
||||||
[2][]interface{}{
|
|
||||||
{1, 2}, {"a", "b"},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
wantOutput: `SliceOfArrays = [[1, 2], [3, 4]]
|
|
||||||
ArrayOfSlices = [[1, 2], [3, 4]]
|
|
||||||
SliceOfArraysOfSlices = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
|
|
||||||
ArrayOfSlicesOfArrays = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
|
|
||||||
SliceOfMixedArrays = [[1, 2], ["a", "b"]]
|
|
||||||
ArrayOfMixedSlices = [[1, 2], ["a", "b"]]
|
|
||||||
`,
|
|
||||||
},
|
|
||||||
"empty slice": {
|
|
||||||
input: struct{ Empty []interface{} }{[]interface{}{}},
|
|
||||||
wantOutput: "Empty = []\n",
|
|
||||||
},
|
|
||||||
"(error) slice with element type mismatch (string and integer)": {
|
|
||||||
input: struct{ Mixed []interface{} }{[]interface{}{1, "a"}},
|
|
||||||
wantError: errArrayMixedElementTypes,
|
|
||||||
},
|
|
||||||
"(error) slice with element type mismatch (integer and float)": {
|
|
||||||
input: struct{ Mixed []interface{} }{[]interface{}{1, 2.5}},
|
|
||||||
wantError: errArrayMixedElementTypes,
|
|
||||||
},
|
|
||||||
"slice with elems of differing Go types, same TOML types": {
|
|
||||||
input: struct {
|
|
||||||
MixedInts []interface{}
|
|
||||||
MixedFloats []interface{}
|
|
||||||
}{
|
|
||||||
[]interface{}{
|
|
||||||
int(1), int8(2), int16(3), int32(4), int64(5),
|
|
||||||
uint(1), uint8(2), uint16(3), uint32(4), uint64(5),
|
|
||||||
},
|
|
||||||
[]interface{}{float32(1.5), float64(2.5)},
|
|
||||||
},
|
|
||||||
wantOutput: "MixedInts = [1, 2, 3, 4, 5, 1, 2, 3, 4, 5]\n" +
|
|
||||||
"MixedFloats = [1.5, 2.5]\n",
|
|
||||||
},
|
|
||||||
"(error) slice w/ element type mismatch (one is nested array)": {
|
|
||||||
input: struct{ Mixed []interface{} }{
|
|
||||||
[]interface{}{1, []interface{}{2}},
|
|
||||||
},
|
|
||||||
wantError: errArrayMixedElementTypes,
|
|
||||||
},
|
|
||||||
"(error) slice with 1 nil element": {
|
|
||||||
input: struct{ NilElement1 []interface{} }{[]interface{}{nil}},
|
|
||||||
wantError: errArrayNilElement,
|
|
||||||
},
|
|
||||||
"(error) slice with 1 nil element (and other non-nil elements)": {
|
|
||||||
input: struct{ NilElement []interface{} }{
|
|
||||||
[]interface{}{1, nil},
|
|
||||||
},
|
|
||||||
wantError: errArrayNilElement,
|
|
||||||
},
|
|
||||||
"simple map": {
|
|
||||||
input: map[string]int{"a": 1, "b": 2},
|
|
||||||
wantOutput: "a = 1\nb = 2\n",
|
|
||||||
},
|
|
||||||
"map with interface{} value type": {
|
|
||||||
input: map[string]interface{}{"a": 1, "b": "c"},
|
|
||||||
wantOutput: "a = 1\nb = \"c\"\n",
|
|
||||||
},
|
|
||||||
"map with interface{} value type, some of which are structs": {
|
|
||||||
input: map[string]interface{}{
|
|
||||||
"a": struct{ Int int }{2},
|
|
||||||
"b": 1,
|
|
||||||
},
|
|
||||||
wantOutput: "b = 1\n\n[a]\n Int = 2\n",
|
|
||||||
},
|
|
||||||
"nested map": {
|
|
||||||
input: map[string]map[string]int{
|
|
||||||
"a": {"b": 1},
|
|
||||||
"c": {"d": 2},
|
|
||||||
},
|
|
||||||
wantOutput: "[a]\n b = 1\n\n[c]\n d = 2\n",
|
|
||||||
},
|
|
||||||
"nested struct": {
|
|
||||||
input: struct{ Struct struct{ Int int } }{
|
|
||||||
struct{ Int int }{1},
|
|
||||||
},
|
|
||||||
wantOutput: "[Struct]\n Int = 1\n",
|
|
||||||
},
|
|
||||||
"nested struct and non-struct field": {
|
|
||||||
input: struct {
|
|
||||||
Struct struct{ Int int }
|
|
||||||
Bool bool
|
|
||||||
}{struct{ Int int }{1}, true},
|
|
||||||
wantOutput: "Bool = true\n\n[Struct]\n Int = 1\n",
|
|
||||||
},
|
|
||||||
"2 nested structs": {
|
|
||||||
input: struct{ Struct1, Struct2 struct{ Int int } }{
|
|
||||||
struct{ Int int }{1}, struct{ Int int }{2},
|
|
||||||
},
|
|
||||||
wantOutput: "[Struct1]\n Int = 1\n\n[Struct2]\n Int = 2\n",
|
|
||||||
},
|
|
||||||
"deeply nested structs": {
|
|
||||||
input: struct {
|
|
||||||
Struct1, Struct2 struct{ Struct3 *struct{ Int int } }
|
|
||||||
}{
|
|
||||||
struct{ Struct3 *struct{ Int int } }{&struct{ Int int }{1}},
|
|
||||||
struct{ Struct3 *struct{ Int int } }{nil},
|
|
||||||
},
|
|
||||||
wantOutput: "[Struct1]\n [Struct1.Struct3]\n Int = 1" +
|
|
||||||
"\n\n[Struct2]\n",
|
|
||||||
},
|
|
||||||
"nested struct with nil struct elem": {
|
|
||||||
input: struct {
|
|
||||||
Struct struct{ Inner *struct{ Int int } }
|
|
||||||
}{
|
|
||||||
struct{ Inner *struct{ Int int } }{nil},
|
|
||||||
},
|
|
||||||
wantOutput: "[Struct]\n",
|
|
||||||
},
|
|
||||||
"nested struct with no fields": {
|
|
||||||
input: struct {
|
|
||||||
Struct struct{ Inner struct{} }
|
|
||||||
}{
|
|
||||||
struct{ Inner struct{} }{struct{}{}},
|
|
||||||
},
|
|
||||||
wantOutput: "[Struct]\n [Struct.Inner]\n",
|
|
||||||
},
|
|
||||||
"struct with tags": {
|
|
||||||
input: struct {
|
|
||||||
Struct struct {
|
|
||||||
Int int `toml:"_int"`
|
|
||||||
} `toml:"_struct"`
|
|
||||||
Bool bool `toml:"_bool"`
|
|
||||||
}{
|
|
||||||
struct {
|
|
||||||
Int int `toml:"_int"`
|
|
||||||
}{1}, true,
|
|
||||||
},
|
|
||||||
wantOutput: "_bool = true\n\n[_struct]\n _int = 1\n",
|
|
||||||
},
|
|
||||||
"embedded struct": {
|
|
||||||
input: struct{ Embedded }{Embedded{1}},
|
|
||||||
wantOutput: "_int = 1\n",
|
|
||||||
},
|
|
||||||
"embedded *struct": {
|
|
||||||
input: struct{ *Embedded }{&Embedded{1}},
|
|
||||||
wantOutput: "_int = 1\n",
|
|
||||||
},
|
|
||||||
"nested embedded struct": {
|
|
||||||
input: struct {
|
|
||||||
Struct struct{ Embedded } `toml:"_struct"`
|
|
||||||
}{struct{ Embedded }{Embedded{1}}},
|
|
||||||
wantOutput: "[_struct]\n _int = 1\n",
|
|
||||||
},
|
|
||||||
"nested embedded *struct": {
|
|
||||||
input: struct {
|
|
||||||
Struct struct{ *Embedded } `toml:"_struct"`
|
|
||||||
}{struct{ *Embedded }{&Embedded{1}}},
|
|
||||||
wantOutput: "[_struct]\n _int = 1\n",
|
|
||||||
},
|
|
||||||
"embedded non-struct": {
|
|
||||||
input: struct{ NonStruct }{5},
|
|
||||||
wantOutput: "NonStruct = 5\n",
|
|
||||||
},
|
|
||||||
"array of tables": {
|
|
||||||
input: struct {
|
|
||||||
Structs []*struct{ Int int } `toml:"struct"`
|
|
||||||
}{
|
|
||||||
[]*struct{ Int int }{{1}, {3}},
|
|
||||||
},
|
|
||||||
wantOutput: "[[struct]]\n Int = 1\n\n[[struct]]\n Int = 3\n",
|
|
||||||
},
|
|
||||||
"array of tables order": {
|
|
||||||
input: map[string]interface{}{
|
|
||||||
"map": map[string]interface{}{
|
|
||||||
"zero": 5,
|
|
||||||
"arr": []map[string]int{
|
|
||||||
{
|
|
||||||
"friend": 5,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
wantOutput: "[map]\n zero = 5\n\n [[map.arr]]\n friend = 5\n",
|
|
||||||
},
|
|
||||||
"(error) top-level slice": {
|
|
||||||
input: []struct{ Int int }{{1}, {2}, {3}},
|
|
||||||
wantError: errNoKey,
|
|
||||||
},
|
|
||||||
"(error) slice of slice": {
|
|
||||||
input: struct {
|
|
||||||
Slices [][]struct{ Int int }
|
|
||||||
}{
|
|
||||||
[][]struct{ Int int }{{{1}}, {{2}}, {{3}}},
|
|
||||||
},
|
|
||||||
wantError: errArrayNoTable,
|
|
||||||
},
|
|
||||||
"(error) map no string key": {
|
|
||||||
input: map[int]string{1: ""},
|
|
||||||
wantError: errNonString,
|
|
||||||
},
|
|
||||||
"(error) empty key name": {
|
|
||||||
input: map[string]int{"": 1},
|
|
||||||
wantError: errAnything,
|
|
||||||
},
|
|
||||||
"(error) empty map name": {
|
|
||||||
input: map[string]interface{}{
|
|
||||||
"": map[string]int{"v": 1},
|
|
||||||
},
|
|
||||||
wantError: errAnything,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
for label, test := range tests {
|
|
||||||
encodeExpected(t, label, test.input, test.wantOutput, test.wantError)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestEncodeNestedTableArrays(t *testing.T) {
|
|
||||||
type song struct {
|
|
||||||
Name string `toml:"name"`
|
|
||||||
}
|
|
||||||
type album struct {
|
|
||||||
Name string `toml:"name"`
|
|
||||||
Songs []song `toml:"songs"`
|
|
||||||
}
|
|
||||||
type springsteen struct {
|
|
||||||
Albums []album `toml:"albums"`
|
|
||||||
}
|
|
||||||
value := springsteen{
|
|
||||||
[]album{
|
|
||||||
{"Born to Run",
|
|
||||||
[]song{{"Jungleland"}, {"Meeting Across the River"}}},
|
|
||||||
{"Born in the USA",
|
|
||||||
[]song{{"Glory Days"}, {"Dancing in the Dark"}}},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
expected := `[[albums]]
|
|
||||||
name = "Born to Run"
|
|
||||||
|
|
||||||
[[albums.songs]]
|
|
||||||
name = "Jungleland"
|
|
||||||
|
|
||||||
[[albums.songs]]
|
|
||||||
name = "Meeting Across the River"
|
|
||||||
|
|
||||||
[[albums]]
|
|
||||||
name = "Born in the USA"
|
|
||||||
|
|
||||||
[[albums.songs]]
|
|
||||||
name = "Glory Days"
|
|
||||||
|
|
||||||
[[albums.songs]]
|
|
||||||
name = "Dancing in the Dark"
|
|
||||||
`
|
|
||||||
encodeExpected(t, "nested table arrays", value, expected, nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestEncodeArrayHashWithNormalHashOrder(t *testing.T) {
|
|
||||||
type Alpha struct {
|
|
||||||
V int
|
|
||||||
}
|
|
||||||
type Beta struct {
|
|
||||||
V int
|
|
||||||
}
|
|
||||||
type Conf struct {
|
|
||||||
V int
|
|
||||||
A Alpha
|
|
||||||
B []Beta
|
|
||||||
}
|
|
||||||
|
|
||||||
val := Conf{
|
|
||||||
V: 1,
|
|
||||||
A: Alpha{2},
|
|
||||||
B: []Beta{{3}},
|
|
||||||
}
|
|
||||||
expected := "V = 1\n\n[A]\n V = 2\n\n[[B]]\n V = 3\n"
|
|
||||||
encodeExpected(t, "array hash with normal hash order", val, expected, nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestEncodeWithOmitEmpty(t *testing.T) {
|
|
||||||
type simple struct {
|
|
||||||
User string `toml:"user"`
|
|
||||||
Pass string `toml:"password,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
value := simple{"Testing", ""}
|
|
||||||
expected := fmt.Sprintf("user = %q\n", value.User)
|
|
||||||
encodeExpected(t, "simple with omitempty, is empty", value, expected, nil)
|
|
||||||
value.Pass = "some password"
|
|
||||||
expected = fmt.Sprintf("user = %q\npassword = %q\n", value.User, value.Pass)
|
|
||||||
encodeExpected(t, "simple with omitempty, not empty", value, expected, nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestEncodeWithOmitZero(t *testing.T) {
|
|
||||||
type simple struct {
|
|
||||||
Number int `toml:"number,omitzero"`
|
|
||||||
Real float64 `toml:"real,omitzero"`
|
|
||||||
Unsigned uint `toml:"unsigned,omitzero"`
|
|
||||||
}
|
|
||||||
|
|
||||||
value := simple{0, 0.0, uint(0)}
|
|
||||||
expected := ""
|
|
||||||
|
|
||||||
encodeExpected(t, "simple with omitzero, all zero", value, expected, nil)
|
|
||||||
|
|
||||||
value.Number = 10
|
|
||||||
value.Real = 20
|
|
||||||
value.Unsigned = 5
|
|
||||||
expected = `number = 10
|
|
||||||
real = 20.0
|
|
||||||
unsigned = 5
|
|
||||||
`
|
|
||||||
encodeExpected(t, "simple with omitzero, non-zero", value, expected, nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestEncodeAnonymousStructPointerField(t *testing.T) {
|
|
||||||
type Sub struct{}
|
|
||||||
type simple struct {
|
|
||||||
*Sub
|
|
||||||
}
|
|
||||||
|
|
||||||
value := simple{}
|
|
||||||
expected := ""
|
|
||||||
encodeExpected(t, "nil anonymous struct pointer field", value, expected, nil)
|
|
||||||
|
|
||||||
value = simple{Sub: &Sub{}}
|
|
||||||
expected = ""
|
|
||||||
encodeExpected(t, "non-nil anonymous struct pointer field", value, expected, nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestEncodeIgnoredFields(t *testing.T) {
|
|
||||||
type simple struct {
|
|
||||||
Number int `toml:"-"`
|
|
||||||
}
|
|
||||||
value := simple{}
|
|
||||||
expected := ""
|
|
||||||
encodeExpected(t, "ignored field", value, expected, nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
func encodeExpected(
|
|
||||||
t *testing.T, label string, val interface{}, wantStr string, wantErr error,
|
|
||||||
) {
|
|
||||||
var buf bytes.Buffer
|
|
||||||
enc := NewEncoder(&buf)
|
|
||||||
err := enc.Encode(val)
|
|
||||||
if err != wantErr {
|
|
||||||
if wantErr != nil {
|
|
||||||
if wantErr == errAnything && err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
t.Errorf("%s: want Encode error %v, got %v", label, wantErr, err)
|
|
||||||
} else {
|
|
||||||
t.Errorf("%s: Encode failed: %s", label, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if got := buf.String(); wantStr != got {
|
|
||||||
t.Errorf("%s: want\n-----\n%q\n-----\nbut got\n-----\n%q\n-----\n",
|
|
||||||
label, wantStr, got)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func ExampleEncoder_Encode() {
|
|
||||||
date, _ := time.Parse(time.RFC822, "14 Mar 10 18:00 UTC")
|
|
||||||
var config = map[string]interface{}{
|
|
||||||
"date": date,
|
|
||||||
"counts": []int{1, 1, 2, 3, 5, 8},
|
|
||||||
"hash": map[string]string{
|
|
||||||
"key1": "val1",
|
|
||||||
"key2": "val2",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
buf := new(bytes.Buffer)
|
|
||||||
if err := NewEncoder(buf).Encode(config); err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
fmt.Println(buf.String())
|
|
||||||
|
|
||||||
// Output:
|
|
||||||
// counts = [1, 1, 2, 3, 5, 8]
|
|
||||||
// date = 2010-03-14T18:00:00Z
|
|
||||||
//
|
|
||||||
// [hash]
|
|
||||||
// key1 = "val1"
|
|
||||||
// key2 = "val2"
|
|
||||||
}
|
|
19
vendor/github.com/BurntSushi/toml/encoding_types.go
generated
vendored
19
vendor/github.com/BurntSushi/toml/encoding_types.go
generated
vendored
@ -1,19 +0,0 @@
|
|||||||
// +build go1.2
|
|
||||||
|
|
||||||
package toml
|
|
||||||
|
|
||||||
// In order to support Go 1.1, we define our own TextMarshaler and
|
|
||||||
// TextUnmarshaler types. For Go 1.2+, we just alias them with the
|
|
||||||
// standard library interfaces.
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding"
|
|
||||||
)
|
|
||||||
|
|
||||||
// TextMarshaler is a synonym for encoding.TextMarshaler. It is defined here
|
|
||||||
// so that Go 1.1 can be supported.
|
|
||||||
type TextMarshaler encoding.TextMarshaler
|
|
||||||
|
|
||||||
// TextUnmarshaler is a synonym for encoding.TextUnmarshaler. It is defined
|
|
||||||
// here so that Go 1.1 can be supported.
|
|
||||||
type TextUnmarshaler encoding.TextUnmarshaler
|
|
18
vendor/github.com/BurntSushi/toml/encoding_types_1.1.go
generated
vendored
18
vendor/github.com/BurntSushi/toml/encoding_types_1.1.go
generated
vendored
@ -1,18 +0,0 @@
|
|||||||
// +build !go1.2
|
|
||||||
|
|
||||||
package toml
|
|
||||||
|
|
||||||
// These interfaces were introduced in Go 1.2, so we add them manually when
|
|
||||||
// compiling for Go 1.1.
|
|
||||||
|
|
||||||
// TextMarshaler is a synonym for encoding.TextMarshaler. It is defined here
|
|
||||||
// so that Go 1.1 can be supported.
|
|
||||||
type TextMarshaler interface {
|
|
||||||
MarshalText() (text []byte, err error)
|
|
||||||
}
|
|
||||||
|
|
||||||
// TextUnmarshaler is a synonym for encoding.TextUnmarshaler. It is defined
|
|
||||||
// here so that Go 1.1 can be supported.
|
|
||||||
type TextUnmarshaler interface {
|
|
||||||
UnmarshalText(text []byte) error
|
|
||||||
}
|
|
871
vendor/github.com/BurntSushi/toml/lex.go
generated
vendored
871
vendor/github.com/BurntSushi/toml/lex.go
generated
vendored
@ -1,871 +0,0 @@
|
|||||||
package toml
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"strings"
|
|
||||||
"unicode/utf8"
|
|
||||||
)
|
|
||||||
|
|
||||||
type itemType int
|
|
||||||
|
|
||||||
const (
|
|
||||||
itemError itemType = iota
|
|
||||||
itemNIL // used in the parser to indicate no type
|
|
||||||
itemEOF
|
|
||||||
itemText
|
|
||||||
itemString
|
|
||||||
itemRawString
|
|
||||||
itemMultilineString
|
|
||||||
itemRawMultilineString
|
|
||||||
itemBool
|
|
||||||
itemInteger
|
|
||||||
itemFloat
|
|
||||||
itemDatetime
|
|
||||||
itemArray // the start of an array
|
|
||||||
itemArrayEnd
|
|
||||||
itemTableStart
|
|
||||||
itemTableEnd
|
|
||||||
itemArrayTableStart
|
|
||||||
itemArrayTableEnd
|
|
||||||
itemKeyStart
|
|
||||||
itemCommentStart
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
eof = 0
|
|
||||||
tableStart = '['
|
|
||||||
tableEnd = ']'
|
|
||||||
arrayTableStart = '['
|
|
||||||
arrayTableEnd = ']'
|
|
||||||
tableSep = '.'
|
|
||||||
keySep = '='
|
|
||||||
arrayStart = '['
|
|
||||||
arrayEnd = ']'
|
|
||||||
arrayValTerm = ','
|
|
||||||
commentStart = '#'
|
|
||||||
stringStart = '"'
|
|
||||||
stringEnd = '"'
|
|
||||||
rawStringStart = '\''
|
|
||||||
rawStringEnd = '\''
|
|
||||||
)
|
|
||||||
|
|
||||||
type stateFn func(lx *lexer) stateFn
|
|
||||||
|
|
||||||
type lexer struct {
|
|
||||||
input string
|
|
||||||
start int
|
|
||||||
pos int
|
|
||||||
width int
|
|
||||||
line int
|
|
||||||
state stateFn
|
|
||||||
items chan item
|
|
||||||
|
|
||||||
// A stack of state functions used to maintain context.
|
|
||||||
// The idea is to reuse parts of the state machine in various places.
|
|
||||||
// For example, values can appear at the top level or within arbitrarily
|
|
||||||
// nested arrays. The last state on the stack is used after a value has
|
|
||||||
// been lexed. Similarly for comments.
|
|
||||||
stack []stateFn
|
|
||||||
}
|
|
||||||
|
|
||||||
type item struct {
|
|
||||||
typ itemType
|
|
||||||
val string
|
|
||||||
line int
|
|
||||||
}
|
|
||||||
|
|
||||||
func (lx *lexer) nextItem() item {
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case item := <-lx.items:
|
|
||||||
return item
|
|
||||||
default:
|
|
||||||
lx.state = lx.state(lx)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func lex(input string) *lexer {
|
|
||||||
lx := &lexer{
|
|
||||||
input: input + "\n",
|
|
||||||
state: lexTop,
|
|
||||||
line: 1,
|
|
||||||
items: make(chan item, 10),
|
|
||||||
stack: make([]stateFn, 0, 10),
|
|
||||||
}
|
|
||||||
return lx
|
|
||||||
}
|
|
||||||
|
|
||||||
func (lx *lexer) push(state stateFn) {
|
|
||||||
lx.stack = append(lx.stack, state)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (lx *lexer) pop() stateFn {
|
|
||||||
if len(lx.stack) == 0 {
|
|
||||||
return lx.errorf("BUG in lexer: no states to pop.")
|
|
||||||
}
|
|
||||||
last := lx.stack[len(lx.stack)-1]
|
|
||||||
lx.stack = lx.stack[0 : len(lx.stack)-1]
|
|
||||||
return last
|
|
||||||
}
|
|
||||||
|
|
||||||
func (lx *lexer) current() string {
|
|
||||||
return lx.input[lx.start:lx.pos]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (lx *lexer) emit(typ itemType) {
|
|
||||||
lx.items <- item{typ, lx.current(), lx.line}
|
|
||||||
lx.start = lx.pos
|
|
||||||
}
|
|
||||||
|
|
||||||
func (lx *lexer) emitTrim(typ itemType) {
|
|
||||||
lx.items <- item{typ, strings.TrimSpace(lx.current()), lx.line}
|
|
||||||
lx.start = lx.pos
|
|
||||||
}
|
|
||||||
|
|
||||||
func (lx *lexer) next() (r rune) {
|
|
||||||
if lx.pos >= len(lx.input) {
|
|
||||||
lx.width = 0
|
|
||||||
return eof
|
|
||||||
}
|
|
||||||
|
|
||||||
if lx.input[lx.pos] == '\n' {
|
|
||||||
lx.line++
|
|
||||||
}
|
|
||||||
r, lx.width = utf8.DecodeRuneInString(lx.input[lx.pos:])
|
|
||||||
lx.pos += lx.width
|
|
||||||
return r
|
|
||||||
}
|
|
||||||
|
|
||||||
// ignore skips over the pending input before this point.
|
|
||||||
func (lx *lexer) ignore() {
|
|
||||||
lx.start = lx.pos
|
|
||||||
}
|
|
||||||
|
|
||||||
// backup steps back one rune. Can be called only once per call of next.
|
|
||||||
func (lx *lexer) backup() {
|
|
||||||
lx.pos -= lx.width
|
|
||||||
if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' {
|
|
||||||
lx.line--
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// accept consumes the next rune if it's equal to `valid`.
|
|
||||||
func (lx *lexer) accept(valid rune) bool {
|
|
||||||
if lx.next() == valid {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
lx.backup()
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// peek returns but does not consume the next rune in the input.
|
|
||||||
func (lx *lexer) peek() rune {
|
|
||||||
r := lx.next()
|
|
||||||
lx.backup()
|
|
||||||
return r
|
|
||||||
}
|
|
||||||
|
|
||||||
// errorf stops all lexing by emitting an error and returning `nil`.
|
|
||||||
// Note that any value that is a character is escaped if it's a special
|
|
||||||
// character (new lines, tabs, etc.).
|
|
||||||
func (lx *lexer) errorf(format string, values ...interface{}) stateFn {
|
|
||||||
lx.items <- item{
|
|
||||||
itemError,
|
|
||||||
fmt.Sprintf(format, values...),
|
|
||||||
lx.line,
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexTop consumes elements at the top level of TOML data.
|
|
||||||
func lexTop(lx *lexer) stateFn {
|
|
||||||
r := lx.next()
|
|
||||||
if isWhitespace(r) || isNL(r) {
|
|
||||||
return lexSkip(lx, lexTop)
|
|
||||||
}
|
|
||||||
|
|
||||||
switch r {
|
|
||||||
case commentStart:
|
|
||||||
lx.push(lexTop)
|
|
||||||
return lexCommentStart
|
|
||||||
case tableStart:
|
|
||||||
return lexTableStart
|
|
||||||
case eof:
|
|
||||||
if lx.pos > lx.start {
|
|
||||||
return lx.errorf("Unexpected EOF.")
|
|
||||||
}
|
|
||||||
lx.emit(itemEOF)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// At this point, the only valid item can be a key, so we back up
|
|
||||||
// and let the key lexer do the rest.
|
|
||||||
lx.backup()
|
|
||||||
lx.push(lexTopEnd)
|
|
||||||
return lexKeyStart
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexTopEnd is entered whenever a top-level item has been consumed. (A value
|
|
||||||
// or a table.) It must see only whitespace, and will turn back to lexTop
|
|
||||||
// upon a new line. If it sees EOF, it will quit the lexer successfully.
|
|
||||||
func lexTopEnd(lx *lexer) stateFn {
|
|
||||||
r := lx.next()
|
|
||||||
switch {
|
|
||||||
case r == commentStart:
|
|
||||||
// a comment will read to a new line for us.
|
|
||||||
lx.push(lexTop)
|
|
||||||
return lexCommentStart
|
|
||||||
case isWhitespace(r):
|
|
||||||
return lexTopEnd
|
|
||||||
case isNL(r):
|
|
||||||
lx.ignore()
|
|
||||||
return lexTop
|
|
||||||
case r == eof:
|
|
||||||
lx.ignore()
|
|
||||||
return lexTop
|
|
||||||
}
|
|
||||||
return lx.errorf("Expected a top-level item to end with a new line, "+
|
|
||||||
"comment or EOF, but got %q instead.", r)
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexTable lexes the beginning of a table. Namely, it makes sure that
|
|
||||||
// it starts with a character other than '.' and ']'.
|
|
||||||
// It assumes that '[' has already been consumed.
|
|
||||||
// It also handles the case that this is an item in an array of tables.
|
|
||||||
// e.g., '[[name]]'.
|
|
||||||
func lexTableStart(lx *lexer) stateFn {
|
|
||||||
if lx.peek() == arrayTableStart {
|
|
||||||
lx.next()
|
|
||||||
lx.emit(itemArrayTableStart)
|
|
||||||
lx.push(lexArrayTableEnd)
|
|
||||||
} else {
|
|
||||||
lx.emit(itemTableStart)
|
|
||||||
lx.push(lexTableEnd)
|
|
||||||
}
|
|
||||||
return lexTableNameStart
|
|
||||||
}
|
|
||||||
|
|
||||||
func lexTableEnd(lx *lexer) stateFn {
|
|
||||||
lx.emit(itemTableEnd)
|
|
||||||
return lexTopEnd
|
|
||||||
}
|
|
||||||
|
|
||||||
func lexArrayTableEnd(lx *lexer) stateFn {
|
|
||||||
if r := lx.next(); r != arrayTableEnd {
|
|
||||||
return lx.errorf("Expected end of table array name delimiter %q, "+
|
|
||||||
"but got %q instead.", arrayTableEnd, r)
|
|
||||||
}
|
|
||||||
lx.emit(itemArrayTableEnd)
|
|
||||||
return lexTopEnd
|
|
||||||
}
|
|
||||||
|
|
||||||
func lexTableNameStart(lx *lexer) stateFn {
|
|
||||||
switch r := lx.peek(); {
|
|
||||||
case r == tableEnd || r == eof:
|
|
||||||
return lx.errorf("Unexpected end of table name. (Table names cannot " +
|
|
||||||
"be empty.)")
|
|
||||||
case r == tableSep:
|
|
||||||
return lx.errorf("Unexpected table separator. (Table names cannot " +
|
|
||||||
"be empty.)")
|
|
||||||
case r == stringStart || r == rawStringStart:
|
|
||||||
lx.ignore()
|
|
||||||
lx.push(lexTableNameEnd)
|
|
||||||
return lexValue // reuse string lexing
|
|
||||||
default:
|
|
||||||
return lexBareTableName
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexTableName lexes the name of a table. It assumes that at least one
|
|
||||||
// valid character for the table has already been read.
|
|
||||||
func lexBareTableName(lx *lexer) stateFn {
|
|
||||||
switch r := lx.next(); {
|
|
||||||
case isBareKeyChar(r):
|
|
||||||
return lexBareTableName
|
|
||||||
case r == tableSep || r == tableEnd:
|
|
||||||
lx.backup()
|
|
||||||
lx.emitTrim(itemText)
|
|
||||||
return lexTableNameEnd
|
|
||||||
default:
|
|
||||||
return lx.errorf("Bare keys cannot contain %q.", r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexTableNameEnd reads the end of a piece of a table name, optionally
|
|
||||||
// consuming whitespace.
|
|
||||||
func lexTableNameEnd(lx *lexer) stateFn {
|
|
||||||
switch r := lx.next(); {
|
|
||||||
case isWhitespace(r):
|
|
||||||
return lexTableNameEnd
|
|
||||||
case r == tableSep:
|
|
||||||
lx.ignore()
|
|
||||||
return lexTableNameStart
|
|
||||||
case r == tableEnd:
|
|
||||||
return lx.pop()
|
|
||||||
default:
|
|
||||||
return lx.errorf("Expected '.' or ']' to end table name, but got %q "+
|
|
||||||
"instead.", r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexKeyStart consumes a key name up until the first non-whitespace character.
|
|
||||||
// lexKeyStart will ignore whitespace.
|
|
||||||
func lexKeyStart(lx *lexer) stateFn {
|
|
||||||
r := lx.peek()
|
|
||||||
switch {
|
|
||||||
case r == keySep:
|
|
||||||
return lx.errorf("Unexpected key separator %q.", keySep)
|
|
||||||
case isWhitespace(r) || isNL(r):
|
|
||||||
lx.next()
|
|
||||||
return lexSkip(lx, lexKeyStart)
|
|
||||||
case r == stringStart || r == rawStringStart:
|
|
||||||
lx.ignore()
|
|
||||||
lx.emit(itemKeyStart)
|
|
||||||
lx.push(lexKeyEnd)
|
|
||||||
return lexValue // reuse string lexing
|
|
||||||
default:
|
|
||||||
lx.ignore()
|
|
||||||
lx.emit(itemKeyStart)
|
|
||||||
return lexBareKey
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexBareKey consumes the text of a bare key. Assumes that the first character
|
|
||||||
// (which is not whitespace) has not yet been consumed.
|
|
||||||
func lexBareKey(lx *lexer) stateFn {
|
|
||||||
switch r := lx.next(); {
|
|
||||||
case isBareKeyChar(r):
|
|
||||||
return lexBareKey
|
|
||||||
case isWhitespace(r):
|
|
||||||
lx.emitTrim(itemText)
|
|
||||||
return lexKeyEnd
|
|
||||||
case r == keySep:
|
|
||||||
lx.backup()
|
|
||||||
lx.emitTrim(itemText)
|
|
||||||
return lexKeyEnd
|
|
||||||
default:
|
|
||||||
return lx.errorf("Bare keys cannot contain %q.", r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexKeyEnd consumes the end of a key and trims whitespace (up to the key
|
|
||||||
// separator).
|
|
||||||
func lexKeyEnd(lx *lexer) stateFn {
|
|
||||||
switch r := lx.next(); {
|
|
||||||
case r == keySep:
|
|
||||||
return lexSkip(lx, lexValue)
|
|
||||||
case isWhitespace(r):
|
|
||||||
return lexSkip(lx, lexKeyEnd)
|
|
||||||
default:
|
|
||||||
return lx.errorf("Expected key separator %q, but got %q instead.",
|
|
||||||
keySep, r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexValue starts the consumption of a value anywhere a value is expected.
|
|
||||||
// lexValue will ignore whitespace.
|
|
||||||
// After a value is lexed, the last state on the next is popped and returned.
|
|
||||||
func lexValue(lx *lexer) stateFn {
|
|
||||||
// We allow whitespace to precede a value, but NOT new lines.
|
|
||||||
// In array syntax, the array states are responsible for ignoring new
|
|
||||||
// lines.
|
|
||||||
r := lx.next()
|
|
||||||
if isWhitespace(r) {
|
|
||||||
return lexSkip(lx, lexValue)
|
|
||||||
}
|
|
||||||
|
|
||||||
switch {
|
|
||||||
case r == arrayStart:
|
|
||||||
lx.ignore()
|
|
||||||
lx.emit(itemArray)
|
|
||||||
return lexArrayValue
|
|
||||||
case r == stringStart:
|
|
||||||
if lx.accept(stringStart) {
|
|
||||||
if lx.accept(stringStart) {
|
|
||||||
lx.ignore() // Ignore """
|
|
||||||
return lexMultilineString
|
|
||||||
}
|
|
||||||
lx.backup()
|
|
||||||
}
|
|
||||||
lx.ignore() // ignore the '"'
|
|
||||||
return lexString
|
|
||||||
case r == rawStringStart:
|
|
||||||
if lx.accept(rawStringStart) {
|
|
||||||
if lx.accept(rawStringStart) {
|
|
||||||
lx.ignore() // Ignore """
|
|
||||||
return lexMultilineRawString
|
|
||||||
}
|
|
||||||
lx.backup()
|
|
||||||
}
|
|
||||||
lx.ignore() // ignore the "'"
|
|
||||||
return lexRawString
|
|
||||||
case r == 't':
|
|
||||||
return lexTrue
|
|
||||||
case r == 'f':
|
|
||||||
return lexFalse
|
|
||||||
case r == '-':
|
|
||||||
return lexNumberStart
|
|
||||||
case isDigit(r):
|
|
||||||
lx.backup() // avoid an extra state and use the same as above
|
|
||||||
return lexNumberOrDateStart
|
|
||||||
case r == '.': // special error case, be kind to users
|
|
||||||
return lx.errorf("Floats must start with a digit, not '.'.")
|
|
||||||
}
|
|
||||||
return lx.errorf("Expected value but found %q instead.", r)
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexArrayValue consumes one value in an array. It assumes that '[' or ','
|
|
||||||
// have already been consumed. All whitespace and new lines are ignored.
|
|
||||||
func lexArrayValue(lx *lexer) stateFn {
|
|
||||||
r := lx.next()
|
|
||||||
switch {
|
|
||||||
case isWhitespace(r) || isNL(r):
|
|
||||||
return lexSkip(lx, lexArrayValue)
|
|
||||||
case r == commentStart:
|
|
||||||
lx.push(lexArrayValue)
|
|
||||||
return lexCommentStart
|
|
||||||
case r == arrayValTerm:
|
|
||||||
return lx.errorf("Unexpected array value terminator %q.",
|
|
||||||
arrayValTerm)
|
|
||||||
case r == arrayEnd:
|
|
||||||
return lexArrayEnd
|
|
||||||
}
|
|
||||||
|
|
||||||
lx.backup()
|
|
||||||
lx.push(lexArrayValueEnd)
|
|
||||||
return lexValue
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexArrayValueEnd consumes the cruft between values of an array. Namely,
|
|
||||||
// it ignores whitespace and expects either a ',' or a ']'.
|
|
||||||
func lexArrayValueEnd(lx *lexer) stateFn {
|
|
||||||
r := lx.next()
|
|
||||||
switch {
|
|
||||||
case isWhitespace(r) || isNL(r):
|
|
||||||
return lexSkip(lx, lexArrayValueEnd)
|
|
||||||
case r == commentStart:
|
|
||||||
lx.push(lexArrayValueEnd)
|
|
||||||
return lexCommentStart
|
|
||||||
case r == arrayValTerm:
|
|
||||||
lx.ignore()
|
|
||||||
return lexArrayValue // move on to the next value
|
|
||||||
case r == arrayEnd:
|
|
||||||
return lexArrayEnd
|
|
||||||
}
|
|
||||||
return lx.errorf("Expected an array value terminator %q or an array "+
|
|
||||||
"terminator %q, but got %q instead.", arrayValTerm, arrayEnd, r)
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexArrayEnd finishes the lexing of an array. It assumes that a ']' has
|
|
||||||
// just been consumed.
|
|
||||||
func lexArrayEnd(lx *lexer) stateFn {
|
|
||||||
lx.ignore()
|
|
||||||
lx.emit(itemArrayEnd)
|
|
||||||
return lx.pop()
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexString consumes the inner contents of a string. It assumes that the
|
|
||||||
// beginning '"' has already been consumed and ignored.
|
|
||||||
func lexString(lx *lexer) stateFn {
|
|
||||||
r := lx.next()
|
|
||||||
switch {
|
|
||||||
case isNL(r):
|
|
||||||
return lx.errorf("Strings cannot contain new lines.")
|
|
||||||
case r == '\\':
|
|
||||||
lx.push(lexString)
|
|
||||||
return lexStringEscape
|
|
||||||
case r == stringEnd:
|
|
||||||
lx.backup()
|
|
||||||
lx.emit(itemString)
|
|
||||||
lx.next()
|
|
||||||
lx.ignore()
|
|
||||||
return lx.pop()
|
|
||||||
}
|
|
||||||
return lexString
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexMultilineString consumes the inner contents of a string. It assumes that
|
|
||||||
// the beginning '"""' has already been consumed and ignored.
|
|
||||||
func lexMultilineString(lx *lexer) stateFn {
|
|
||||||
r := lx.next()
|
|
||||||
switch {
|
|
||||||
case r == '\\':
|
|
||||||
return lexMultilineStringEscape
|
|
||||||
case r == stringEnd:
|
|
||||||
if lx.accept(stringEnd) {
|
|
||||||
if lx.accept(stringEnd) {
|
|
||||||
lx.backup()
|
|
||||||
lx.backup()
|
|
||||||
lx.backup()
|
|
||||||
lx.emit(itemMultilineString)
|
|
||||||
lx.next()
|
|
||||||
lx.next()
|
|
||||||
lx.next()
|
|
||||||
lx.ignore()
|
|
||||||
return lx.pop()
|
|
||||||
}
|
|
||||||
lx.backup()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return lexMultilineString
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexRawString consumes a raw string. Nothing can be escaped in such a string.
|
|
||||||
// It assumes that the beginning "'" has already been consumed and ignored.
|
|
||||||
func lexRawString(lx *lexer) stateFn {
|
|
||||||
r := lx.next()
|
|
||||||
switch {
|
|
||||||
case isNL(r):
|
|
||||||
return lx.errorf("Strings cannot contain new lines.")
|
|
||||||
case r == rawStringEnd:
|
|
||||||
lx.backup()
|
|
||||||
lx.emit(itemRawString)
|
|
||||||
lx.next()
|
|
||||||
lx.ignore()
|
|
||||||
return lx.pop()
|
|
||||||
}
|
|
||||||
return lexRawString
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexMultilineRawString consumes a raw string. Nothing can be escaped in such
|
|
||||||
// a string. It assumes that the beginning "'" has already been consumed and
|
|
||||||
// ignored.
|
|
||||||
func lexMultilineRawString(lx *lexer) stateFn {
|
|
||||||
r := lx.next()
|
|
||||||
switch {
|
|
||||||
case r == rawStringEnd:
|
|
||||||
if lx.accept(rawStringEnd) {
|
|
||||||
if lx.accept(rawStringEnd) {
|
|
||||||
lx.backup()
|
|
||||||
lx.backup()
|
|
||||||
lx.backup()
|
|
||||||
lx.emit(itemRawMultilineString)
|
|
||||||
lx.next()
|
|
||||||
lx.next()
|
|
||||||
lx.next()
|
|
||||||
lx.ignore()
|
|
||||||
return lx.pop()
|
|
||||||
}
|
|
||||||
lx.backup()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return lexMultilineRawString
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexMultilineStringEscape consumes an escaped character. It assumes that the
|
|
||||||
// preceding '\\' has already been consumed.
|
|
||||||
func lexMultilineStringEscape(lx *lexer) stateFn {
|
|
||||||
// Handle the special case first:
|
|
||||||
if isNL(lx.next()) {
|
|
||||||
return lexMultilineString
|
|
||||||
} else {
|
|
||||||
lx.backup()
|
|
||||||
lx.push(lexMultilineString)
|
|
||||||
return lexStringEscape(lx)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func lexStringEscape(lx *lexer) stateFn {
|
|
||||||
r := lx.next()
|
|
||||||
switch r {
|
|
||||||
case 'b':
|
|
||||||
fallthrough
|
|
||||||
case 't':
|
|
||||||
fallthrough
|
|
||||||
case 'n':
|
|
||||||
fallthrough
|
|
||||||
case 'f':
|
|
||||||
fallthrough
|
|
||||||
case 'r':
|
|
||||||
fallthrough
|
|
||||||
case '"':
|
|
||||||
fallthrough
|
|
||||||
case '\\':
|
|
||||||
return lx.pop()
|
|
||||||
case 'u':
|
|
||||||
return lexShortUnicodeEscape
|
|
||||||
case 'U':
|
|
||||||
return lexLongUnicodeEscape
|
|
||||||
}
|
|
||||||
return lx.errorf("Invalid escape character %q. Only the following "+
|
|
||||||
"escape characters are allowed: "+
|
|
||||||
"\\b, \\t, \\n, \\f, \\r, \\\", \\/, \\\\, "+
|
|
||||||
"\\uXXXX and \\UXXXXXXXX.", r)
|
|
||||||
}
|
|
||||||
|
|
||||||
func lexShortUnicodeEscape(lx *lexer) stateFn {
|
|
||||||
var r rune
|
|
||||||
for i := 0; i < 4; i++ {
|
|
||||||
r = lx.next()
|
|
||||||
if !isHexadecimal(r) {
|
|
||||||
return lx.errorf("Expected four hexadecimal digits after '\\u', "+
|
|
||||||
"but got '%s' instead.", lx.current())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return lx.pop()
|
|
||||||
}
|
|
||||||
|
|
||||||
func lexLongUnicodeEscape(lx *lexer) stateFn {
|
|
||||||
var r rune
|
|
||||||
for i := 0; i < 8; i++ {
|
|
||||||
r = lx.next()
|
|
||||||
if !isHexadecimal(r) {
|
|
||||||
return lx.errorf("Expected eight hexadecimal digits after '\\U', "+
|
|
||||||
"but got '%s' instead.", lx.current())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return lx.pop()
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexNumberOrDateStart consumes either a (positive) integer, float or
|
|
||||||
// datetime. It assumes that NO negative sign has been consumed.
|
|
||||||
func lexNumberOrDateStart(lx *lexer) stateFn {
|
|
||||||
r := lx.next()
|
|
||||||
if !isDigit(r) {
|
|
||||||
if r == '.' {
|
|
||||||
return lx.errorf("Floats must start with a digit, not '.'.")
|
|
||||||
} else {
|
|
||||||
return lx.errorf("Expected a digit but got %q.", r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return lexNumberOrDate
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexNumberOrDate consumes either a (positive) integer, float or datetime.
|
|
||||||
func lexNumberOrDate(lx *lexer) stateFn {
|
|
||||||
r := lx.next()
|
|
||||||
switch {
|
|
||||||
case r == '-':
|
|
||||||
if lx.pos-lx.start != 5 {
|
|
||||||
return lx.errorf("All ISO8601 dates must be in full Zulu form.")
|
|
||||||
}
|
|
||||||
return lexDateAfterYear
|
|
||||||
case isDigit(r):
|
|
||||||
return lexNumberOrDate
|
|
||||||
case r == '.':
|
|
||||||
return lexFloatStart
|
|
||||||
}
|
|
||||||
|
|
||||||
lx.backup()
|
|
||||||
lx.emit(itemInteger)
|
|
||||||
return lx.pop()
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexDateAfterYear consumes a full Zulu Datetime in ISO8601 format.
|
|
||||||
// It assumes that "YYYY-" has already been consumed.
|
|
||||||
func lexDateAfterYear(lx *lexer) stateFn {
|
|
||||||
formats := []rune{
|
|
||||||
// digits are '0'.
|
|
||||||
// everything else is direct equality.
|
|
||||||
'0', '0', '-', '0', '0',
|
|
||||||
'T',
|
|
||||||
'0', '0', ':', '0', '0', ':', '0', '0',
|
|
||||||
'Z',
|
|
||||||
}
|
|
||||||
for _, f := range formats {
|
|
||||||
r := lx.next()
|
|
||||||
if f == '0' {
|
|
||||||
if !isDigit(r) {
|
|
||||||
return lx.errorf("Expected digit in ISO8601 datetime, "+
|
|
||||||
"but found %q instead.", r)
|
|
||||||
}
|
|
||||||
} else if f != r {
|
|
||||||
return lx.errorf("Expected %q in ISO8601 datetime, "+
|
|
||||||
"but found %q instead.", f, r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
lx.emit(itemDatetime)
|
|
||||||
return lx.pop()
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexNumberStart consumes either an integer or a float. It assumes that
|
|
||||||
// a negative sign has already been read, but that *no* digits have been
|
|
||||||
// consumed. lexNumberStart will move to the appropriate integer or float
|
|
||||||
// states.
|
|
||||||
func lexNumberStart(lx *lexer) stateFn {
|
|
||||||
// we MUST see a digit. Even floats have to start with a digit.
|
|
||||||
r := lx.next()
|
|
||||||
if !isDigit(r) {
|
|
||||||
if r == '.' {
|
|
||||||
return lx.errorf("Floats must start with a digit, not '.'.")
|
|
||||||
} else {
|
|
||||||
return lx.errorf("Expected a digit but got %q.", r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return lexNumber
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexNumber consumes an integer or a float after seeing the first digit.
|
|
||||||
func lexNumber(lx *lexer) stateFn {
|
|
||||||
r := lx.next()
|
|
||||||
switch {
|
|
||||||
case isDigit(r):
|
|
||||||
return lexNumber
|
|
||||||
case r == '.':
|
|
||||||
return lexFloatStart
|
|
||||||
}
|
|
||||||
|
|
||||||
lx.backup()
|
|
||||||
lx.emit(itemInteger)
|
|
||||||
return lx.pop()
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexFloatStart starts the consumption of digits of a float after a '.'.
|
|
||||||
// Namely, at least one digit is required.
|
|
||||||
func lexFloatStart(lx *lexer) stateFn {
|
|
||||||
r := lx.next()
|
|
||||||
if !isDigit(r) {
|
|
||||||
return lx.errorf("Floats must have a digit after the '.', but got "+
|
|
||||||
"%q instead.", r)
|
|
||||||
}
|
|
||||||
return lexFloat
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexFloat consumes the digits of a float after a '.'.
|
|
||||||
// Assumes that one digit has been consumed after a '.' already.
|
|
||||||
func lexFloat(lx *lexer) stateFn {
|
|
||||||
r := lx.next()
|
|
||||||
if isDigit(r) {
|
|
||||||
return lexFloat
|
|
||||||
}
|
|
||||||
|
|
||||||
lx.backup()
|
|
||||||
lx.emit(itemFloat)
|
|
||||||
return lx.pop()
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexConst consumes the s[1:] in s. It assumes that s[0] has already been
|
|
||||||
// consumed.
|
|
||||||
func lexConst(lx *lexer, s string) stateFn {
|
|
||||||
for i := range s[1:] {
|
|
||||||
if r := lx.next(); r != rune(s[i+1]) {
|
|
||||||
return lx.errorf("Expected %q, but found %q instead.", s[:i+1],
|
|
||||||
s[:i]+string(r))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexTrue consumes the "rue" in "true". It assumes that 't' has already
|
|
||||||
// been consumed.
|
|
||||||
func lexTrue(lx *lexer) stateFn {
|
|
||||||
if fn := lexConst(lx, "true"); fn != nil {
|
|
||||||
return fn
|
|
||||||
}
|
|
||||||
lx.emit(itemBool)
|
|
||||||
return lx.pop()
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexFalse consumes the "alse" in "false". It assumes that 'f' has already
|
|
||||||
// been consumed.
|
|
||||||
func lexFalse(lx *lexer) stateFn {
|
|
||||||
if fn := lexConst(lx, "false"); fn != nil {
|
|
||||||
return fn
|
|
||||||
}
|
|
||||||
lx.emit(itemBool)
|
|
||||||
return lx.pop()
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexCommentStart begins the lexing of a comment. It will emit
|
|
||||||
// itemCommentStart and consume no characters, passing control to lexComment.
|
|
||||||
func lexCommentStart(lx *lexer) stateFn {
|
|
||||||
lx.ignore()
|
|
||||||
lx.emit(itemCommentStart)
|
|
||||||
return lexComment
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexComment lexes an entire comment. It assumes that '#' has been consumed.
|
|
||||||
// It will consume *up to* the first new line character, and pass control
|
|
||||||
// back to the last state on the stack.
|
|
||||||
func lexComment(lx *lexer) stateFn {
|
|
||||||
r := lx.peek()
|
|
||||||
if isNL(r) || r == eof {
|
|
||||||
lx.emit(itemText)
|
|
||||||
return lx.pop()
|
|
||||||
}
|
|
||||||
lx.next()
|
|
||||||
return lexComment
|
|
||||||
}
|
|
||||||
|
|
||||||
// lexSkip ignores all slurped input and moves on to the next state.
|
|
||||||
func lexSkip(lx *lexer, nextState stateFn) stateFn {
|
|
||||||
return func(lx *lexer) stateFn {
|
|
||||||
lx.ignore()
|
|
||||||
return nextState
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// isWhitespace returns true if `r` is a whitespace character according
|
|
||||||
// to the spec.
|
|
||||||
func isWhitespace(r rune) bool {
|
|
||||||
return r == '\t' || r == ' '
|
|
||||||
}
|
|
||||||
|
|
||||||
func isNL(r rune) bool {
|
|
||||||
return r == '\n' || r == '\r'
|
|
||||||
}
|
|
||||||
|
|
||||||
func isDigit(r rune) bool {
|
|
||||||
return r >= '0' && r <= '9'
|
|
||||||
}
|
|
||||||
|
|
||||||
func isHexadecimal(r rune) bool {
|
|
||||||
return (r >= '0' && r <= '9') ||
|
|
||||||
(r >= 'a' && r <= 'f') ||
|
|
||||||
(r >= 'A' && r <= 'F')
|
|
||||||
}
|
|
||||||
|
|
||||||
func isBareKeyChar(r rune) bool {
|
|
||||||
return (r >= 'A' && r <= 'Z') ||
|
|
||||||
(r >= 'a' && r <= 'z') ||
|
|
||||||
(r >= '0' && r <= '9') ||
|
|
||||||
r == '_' ||
|
|
||||||
r == '-'
|
|
||||||
}
|
|
||||||
|
|
||||||
func (itype itemType) String() string {
|
|
||||||
switch itype {
|
|
||||||
case itemError:
|
|
||||||
return "Error"
|
|
||||||
case itemNIL:
|
|
||||||
return "NIL"
|
|
||||||
case itemEOF:
|
|
||||||
return "EOF"
|
|
||||||
case itemText:
|
|
||||||
return "Text"
|
|
||||||
case itemString:
|
|
||||||
return "String"
|
|
||||||
case itemRawString:
|
|
||||||
return "String"
|
|
||||||
case itemMultilineString:
|
|
||||||
return "String"
|
|
||||||
case itemRawMultilineString:
|
|
||||||
return "String"
|
|
||||||
case itemBool:
|
|
||||||
return "Bool"
|
|
||||||
case itemInteger:
|
|
||||||
return "Integer"
|
|
||||||
case itemFloat:
|
|
||||||
return "Float"
|
|
||||||
case itemDatetime:
|
|
||||||
return "DateTime"
|
|
||||||
case itemTableStart:
|
|
||||||
return "TableStart"
|
|
||||||
case itemTableEnd:
|
|
||||||
return "TableEnd"
|
|
||||||
case itemKeyStart:
|
|
||||||
return "KeyStart"
|
|
||||||
case itemArray:
|
|
||||||
return "Array"
|
|
||||||
case itemArrayEnd:
|
|
||||||
return "ArrayEnd"
|
|
||||||
case itemCommentStart:
|
|
||||||
return "CommentStart"
|
|
||||||
}
|
|
||||||
panic(fmt.Sprintf("BUG: Unknown type '%d'.", int(itype)))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (item item) String() string {
|
|
||||||
return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val)
|
|
||||||
}
|
|
493
vendor/github.com/BurntSushi/toml/parse.go
generated
vendored
493
vendor/github.com/BurntSushi/toml/parse.go
generated
vendored
@ -1,493 +0,0 @@
|
|||||||
package toml
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"log"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
"unicode"
|
|
||||||
"unicode/utf8"
|
|
||||||
)
|
|
||||||
|
|
||||||
type parser struct {
|
|
||||||
mapping map[string]interface{}
|
|
||||||
types map[string]tomlType
|
|
||||||
lx *lexer
|
|
||||||
|
|
||||||
// A list of keys in the order that they appear in the TOML data.
|
|
||||||
ordered []Key
|
|
||||||
|
|
||||||
// the full key for the current hash in scope
|
|
||||||
context Key
|
|
||||||
|
|
||||||
// the base key name for everything except hashes
|
|
||||||
currentKey string
|
|
||||||
|
|
||||||
// rough approximation of line number
|
|
||||||
approxLine int
|
|
||||||
|
|
||||||
// A map of 'key.group.names' to whether they were created implicitly.
|
|
||||||
implicits map[string]bool
|
|
||||||
}
|
|
||||||
|
|
||||||
type parseError string
|
|
||||||
|
|
||||||
func (pe parseError) Error() string {
|
|
||||||
return string(pe)
|
|
||||||
}
|
|
||||||
|
|
||||||
func parse(data string) (p *parser, err error) {
|
|
||||||
defer func() {
|
|
||||||
if r := recover(); r != nil {
|
|
||||||
var ok bool
|
|
||||||
if err, ok = r.(parseError); ok {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
panic(r)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
p = &parser{
|
|
||||||
mapping: make(map[string]interface{}),
|
|
||||||
types: make(map[string]tomlType),
|
|
||||||
lx: lex(data),
|
|
||||||
ordered: make([]Key, 0),
|
|
||||||
implicits: make(map[string]bool),
|
|
||||||
}
|
|
||||||
for {
|
|
||||||
item := p.next()
|
|
||||||
if item.typ == itemEOF {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
p.topLevel(item)
|
|
||||||
}
|
|
||||||
|
|
||||||
return p, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *parser) panicf(format string, v ...interface{}) {
|
|
||||||
msg := fmt.Sprintf("Near line %d (last key parsed '%s'): %s",
|
|
||||||
p.approxLine, p.current(), fmt.Sprintf(format, v...))
|
|
||||||
panic(parseError(msg))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *parser) next() item {
|
|
||||||
it := p.lx.nextItem()
|
|
||||||
if it.typ == itemError {
|
|
||||||
p.panicf("%s", it.val)
|
|
||||||
}
|
|
||||||
return it
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *parser) bug(format string, v ...interface{}) {
|
|
||||||
log.Panicf("BUG: %s\n\n", fmt.Sprintf(format, v...))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *parser) expect(typ itemType) item {
|
|
||||||
it := p.next()
|
|
||||||
p.assertEqual(typ, it.typ)
|
|
||||||
return it
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *parser) assertEqual(expected, got itemType) {
|
|
||||||
if expected != got {
|
|
||||||
p.bug("Expected '%s' but got '%s'.", expected, got)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *parser) topLevel(item item) {
|
|
||||||
switch item.typ {
|
|
||||||
case itemCommentStart:
|
|
||||||
p.approxLine = item.line
|
|
||||||
p.expect(itemText)
|
|
||||||
case itemTableStart:
|
|
||||||
kg := p.next()
|
|
||||||
p.approxLine = kg.line
|
|
||||||
|
|
||||||
var key Key
|
|
||||||
for ; kg.typ != itemTableEnd && kg.typ != itemEOF; kg = p.next() {
|
|
||||||
key = append(key, p.keyString(kg))
|
|
||||||
}
|
|
||||||
p.assertEqual(itemTableEnd, kg.typ)
|
|
||||||
|
|
||||||
p.establishContext(key, false)
|
|
||||||
p.setType("", tomlHash)
|
|
||||||
p.ordered = append(p.ordered, key)
|
|
||||||
case itemArrayTableStart:
|
|
||||||
kg := p.next()
|
|
||||||
p.approxLine = kg.line
|
|
||||||
|
|
||||||
var key Key
|
|
||||||
for ; kg.typ != itemArrayTableEnd && kg.typ != itemEOF; kg = p.next() {
|
|
||||||
key = append(key, p.keyString(kg))
|
|
||||||
}
|
|
||||||
p.assertEqual(itemArrayTableEnd, kg.typ)
|
|
||||||
|
|
||||||
p.establishContext(key, true)
|
|
||||||
p.setType("", tomlArrayHash)
|
|
||||||
p.ordered = append(p.ordered, key)
|
|
||||||
case itemKeyStart:
|
|
||||||
kname := p.next()
|
|
||||||
p.approxLine = kname.line
|
|
||||||
p.currentKey = p.keyString(kname)
|
|
||||||
|
|
||||||
val, typ := p.value(p.next())
|
|
||||||
p.setValue(p.currentKey, val)
|
|
||||||
p.setType(p.currentKey, typ)
|
|
||||||
p.ordered = append(p.ordered, p.context.add(p.currentKey))
|
|
||||||
p.currentKey = ""
|
|
||||||
default:
|
|
||||||
p.bug("Unexpected type at top level: %s", item.typ)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Gets a string for a key (or part of a key in a table name).
|
|
||||||
func (p *parser) keyString(it item) string {
|
|
||||||
switch it.typ {
|
|
||||||
case itemText:
|
|
||||||
return it.val
|
|
||||||
case itemString, itemMultilineString,
|
|
||||||
itemRawString, itemRawMultilineString:
|
|
||||||
s, _ := p.value(it)
|
|
||||||
return s.(string)
|
|
||||||
default:
|
|
||||||
p.bug("Unexpected key type: %s", it.typ)
|
|
||||||
panic("unreachable")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// value translates an expected value from the lexer into a Go value wrapped
|
|
||||||
// as an empty interface.
|
|
||||||
func (p *parser) value(it item) (interface{}, tomlType) {
|
|
||||||
switch it.typ {
|
|
||||||
case itemString:
|
|
||||||
return p.replaceEscapes(it.val), p.typeOfPrimitive(it)
|
|
||||||
case itemMultilineString:
|
|
||||||
trimmed := stripFirstNewline(stripEscapedWhitespace(it.val))
|
|
||||||
return p.replaceEscapes(trimmed), p.typeOfPrimitive(it)
|
|
||||||
case itemRawString:
|
|
||||||
return it.val, p.typeOfPrimitive(it)
|
|
||||||
case itemRawMultilineString:
|
|
||||||
return stripFirstNewline(it.val), p.typeOfPrimitive(it)
|
|
||||||
case itemBool:
|
|
||||||
switch it.val {
|
|
||||||
case "true":
|
|
||||||
return true, p.typeOfPrimitive(it)
|
|
||||||
case "false":
|
|
||||||
return false, p.typeOfPrimitive(it)
|
|
||||||
}
|
|
||||||
p.bug("Expected boolean value, but got '%s'.", it.val)
|
|
||||||
case itemInteger:
|
|
||||||
num, err := strconv.ParseInt(it.val, 10, 64)
|
|
||||||
if err != nil {
|
|
||||||
// See comment below for floats describing why we make a
|
|
||||||
// distinction between a bug and a user error.
|
|
||||||
if e, ok := err.(*strconv.NumError); ok &&
|
|
||||||
e.Err == strconv.ErrRange {
|
|
||||||
|
|
||||||
p.panicf("Integer '%s' is out of the range of 64-bit "+
|
|
||||||
"signed integers.", it.val)
|
|
||||||
} else {
|
|
||||||
p.bug("Expected integer value, but got '%s'.", it.val)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return num, p.typeOfPrimitive(it)
|
|
||||||
case itemFloat:
|
|
||||||
num, err := strconv.ParseFloat(it.val, 64)
|
|
||||||
if err != nil {
|
|
||||||
// Distinguish float values. Normally, it'd be a bug if the lexer
|
|
||||||
// provides an invalid float, but it's possible that the float is
|
|
||||||
// out of range of valid values (which the lexer cannot determine).
|
|
||||||
// So mark the former as a bug but the latter as a legitimate user
|
|
||||||
// error.
|
|
||||||
//
|
|
||||||
// This is also true for integers.
|
|
||||||
if e, ok := err.(*strconv.NumError); ok &&
|
|
||||||
e.Err == strconv.ErrRange {
|
|
||||||
|
|
||||||
p.panicf("Float '%s' is out of the range of 64-bit "+
|
|
||||||
"IEEE-754 floating-point numbers.", it.val)
|
|
||||||
} else {
|
|
||||||
p.bug("Expected float value, but got '%s'.", it.val)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return num, p.typeOfPrimitive(it)
|
|
||||||
case itemDatetime:
|
|
||||||
t, err := time.Parse("2006-01-02T15:04:05Z", it.val)
|
|
||||||
if err != nil {
|
|
||||||
p.panicf("Invalid RFC3339 Zulu DateTime: '%s'.", it.val)
|
|
||||||
}
|
|
||||||
return t, p.typeOfPrimitive(it)
|
|
||||||
case itemArray:
|
|
||||||
array := make([]interface{}, 0)
|
|
||||||
types := make([]tomlType, 0)
|
|
||||||
|
|
||||||
for it = p.next(); it.typ != itemArrayEnd; it = p.next() {
|
|
||||||
if it.typ == itemCommentStart {
|
|
||||||
p.expect(itemText)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
val, typ := p.value(it)
|
|
||||||
array = append(array, val)
|
|
||||||
types = append(types, typ)
|
|
||||||
}
|
|
||||||
return array, p.typeOfArray(types)
|
|
||||||
}
|
|
||||||
p.bug("Unexpected value type: %s", it.typ)
|
|
||||||
panic("unreachable")
|
|
||||||
}
|
|
||||||
|
|
||||||
// establishContext sets the current context of the parser,
|
|
||||||
// where the context is either a hash or an array of hashes. Which one is
|
|
||||||
// set depends on the value of the `array` parameter.
|
|
||||||
//
|
|
||||||
// Establishing the context also makes sure that the key isn't a duplicate, and
|
|
||||||
// will create implicit hashes automatically.
|
|
||||||
func (p *parser) establishContext(key Key, array bool) {
|
|
||||||
var ok bool
|
|
||||||
|
|
||||||
// Always start at the top level and drill down for our context.
|
|
||||||
hashContext := p.mapping
|
|
||||||
keyContext := make(Key, 0)
|
|
||||||
|
|
||||||
// We only need implicit hashes for key[0:-1]
|
|
||||||
for _, k := range key[0 : len(key)-1] {
|
|
||||||
_, ok = hashContext[k]
|
|
||||||
keyContext = append(keyContext, k)
|
|
||||||
|
|
||||||
// No key? Make an implicit hash and move on.
|
|
||||||
if !ok {
|
|
||||||
p.addImplicit(keyContext)
|
|
||||||
hashContext[k] = make(map[string]interface{})
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the hash context is actually an array of tables, then set
|
|
||||||
// the hash context to the last element in that array.
|
|
||||||
//
|
|
||||||
// Otherwise, it better be a table, since this MUST be a key group (by
|
|
||||||
// virtue of it not being the last element in a key).
|
|
||||||
switch t := hashContext[k].(type) {
|
|
||||||
case []map[string]interface{}:
|
|
||||||
hashContext = t[len(t)-1]
|
|
||||||
case map[string]interface{}:
|
|
||||||
hashContext = t
|
|
||||||
default:
|
|
||||||
p.panicf("Key '%s' was already created as a hash.", keyContext)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
p.context = keyContext
|
|
||||||
if array {
|
|
||||||
// If this is the first element for this array, then allocate a new
|
|
||||||
// list of tables for it.
|
|
||||||
k := key[len(key)-1]
|
|
||||||
if _, ok := hashContext[k]; !ok {
|
|
||||||
hashContext[k] = make([]map[string]interface{}, 0, 5)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add a new table. But make sure the key hasn't already been used
|
|
||||||
// for something else.
|
|
||||||
if hash, ok := hashContext[k].([]map[string]interface{}); ok {
|
|
||||||
hashContext[k] = append(hash, make(map[string]interface{}))
|
|
||||||
} else {
|
|
||||||
p.panicf("Key '%s' was already created and cannot be used as "+
|
|
||||||
"an array.", keyContext)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
p.setValue(key[len(key)-1], make(map[string]interface{}))
|
|
||||||
}
|
|
||||||
p.context = append(p.context, key[len(key)-1])
|
|
||||||
}
|
|
||||||
|
|
||||||
// setValue sets the given key to the given value in the current context.
|
|
||||||
// It will make sure that the key hasn't already been defined, account for
|
|
||||||
// implicit key groups.
|
|
||||||
func (p *parser) setValue(key string, value interface{}) {
|
|
||||||
var tmpHash interface{}
|
|
||||||
var ok bool
|
|
||||||
|
|
||||||
hash := p.mapping
|
|
||||||
keyContext := make(Key, 0)
|
|
||||||
for _, k := range p.context {
|
|
||||||
keyContext = append(keyContext, k)
|
|
||||||
if tmpHash, ok = hash[k]; !ok {
|
|
||||||
p.bug("Context for key '%s' has not been established.", keyContext)
|
|
||||||
}
|
|
||||||
switch t := tmpHash.(type) {
|
|
||||||
case []map[string]interface{}:
|
|
||||||
// The context is a table of hashes. Pick the most recent table
|
|
||||||
// defined as the current hash.
|
|
||||||
hash = t[len(t)-1]
|
|
||||||
case map[string]interface{}:
|
|
||||||
hash = t
|
|
||||||
default:
|
|
||||||
p.bug("Expected hash to have type 'map[string]interface{}', but "+
|
|
||||||
"it has '%T' instead.", tmpHash)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
keyContext = append(keyContext, key)
|
|
||||||
|
|
||||||
if _, ok := hash[key]; ok {
|
|
||||||
// Typically, if the given key has already been set, then we have
|
|
||||||
// to raise an error since duplicate keys are disallowed. However,
|
|
||||||
// it's possible that a key was previously defined implicitly. In this
|
|
||||||
// case, it is allowed to be redefined concretely. (See the
|
|
||||||
// `tests/valid/implicit-and-explicit-after.toml` test in `toml-test`.)
|
|
||||||
//
|
|
||||||
// But we have to make sure to stop marking it as an implicit. (So that
|
|
||||||
// another redefinition provokes an error.)
|
|
||||||
//
|
|
||||||
// Note that since it has already been defined (as a hash), we don't
|
|
||||||
// want to overwrite it. So our business is done.
|
|
||||||
if p.isImplicit(keyContext) {
|
|
||||||
p.removeImplicit(keyContext)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Otherwise, we have a concrete key trying to override a previous
|
|
||||||
// key, which is *always* wrong.
|
|
||||||
p.panicf("Key '%s' has already been defined.", keyContext)
|
|
||||||
}
|
|
||||||
hash[key] = value
|
|
||||||
}
|
|
||||||
|
|
||||||
// setType sets the type of a particular value at a given key.
|
|
||||||
// It should be called immediately AFTER setValue.
|
|
||||||
//
|
|
||||||
// Note that if `key` is empty, then the type given will be applied to the
|
|
||||||
// current context (which is either a table or an array of tables).
|
|
||||||
func (p *parser) setType(key string, typ tomlType) {
|
|
||||||
keyContext := make(Key, 0, len(p.context)+1)
|
|
||||||
for _, k := range p.context {
|
|
||||||
keyContext = append(keyContext, k)
|
|
||||||
}
|
|
||||||
if len(key) > 0 { // allow type setting for hashes
|
|
||||||
keyContext = append(keyContext, key)
|
|
||||||
}
|
|
||||||
p.types[keyContext.String()] = typ
|
|
||||||
}
|
|
||||||
|
|
||||||
// addImplicit sets the given Key as having been created implicitly.
|
|
||||||
func (p *parser) addImplicit(key Key) {
|
|
||||||
p.implicits[key.String()] = true
|
|
||||||
}
|
|
||||||
|
|
||||||
// removeImplicit stops tagging the given key as having been implicitly
|
|
||||||
// created.
|
|
||||||
func (p *parser) removeImplicit(key Key) {
|
|
||||||
p.implicits[key.String()] = false
|
|
||||||
}
|
|
||||||
|
|
||||||
// isImplicit returns true if the key group pointed to by the key was created
|
|
||||||
// implicitly.
|
|
||||||
func (p *parser) isImplicit(key Key) bool {
|
|
||||||
return p.implicits[key.String()]
|
|
||||||
}
|
|
||||||
|
|
||||||
// current returns the full key name of the current context.
|
|
||||||
func (p *parser) current() string {
|
|
||||||
if len(p.currentKey) == 0 {
|
|
||||||
return p.context.String()
|
|
||||||
}
|
|
||||||
if len(p.context) == 0 {
|
|
||||||
return p.currentKey
|
|
||||||
}
|
|
||||||
return fmt.Sprintf("%s.%s", p.context, p.currentKey)
|
|
||||||
}
|
|
||||||
|
|
||||||
func stripFirstNewline(s string) string {
|
|
||||||
if len(s) == 0 || s[0] != '\n' {
|
|
||||||
return s
|
|
||||||
}
|
|
||||||
return s[1:]
|
|
||||||
}
|
|
||||||
|
|
||||||
func stripEscapedWhitespace(s string) string {
|
|
||||||
esc := strings.Split(s, "\\\n")
|
|
||||||
if len(esc) > 1 {
|
|
||||||
for i := 1; i < len(esc); i++ {
|
|
||||||
esc[i] = strings.TrimLeftFunc(esc[i], unicode.IsSpace)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return strings.Join(esc, "")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *parser) replaceEscapes(str string) string {
|
|
||||||
var replaced []rune
|
|
||||||
s := []byte(str)
|
|
||||||
r := 0
|
|
||||||
for r < len(s) {
|
|
||||||
if s[r] != '\\' {
|
|
||||||
c, size := utf8.DecodeRune(s[r:])
|
|
||||||
r += size
|
|
||||||
replaced = append(replaced, c)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
r += 1
|
|
||||||
if r >= len(s) {
|
|
||||||
p.bug("Escape sequence at end of string.")
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
switch s[r] {
|
|
||||||
default:
|
|
||||||
p.bug("Expected valid escape code after \\, but got %q.", s[r])
|
|
||||||
return ""
|
|
||||||
case 'b':
|
|
||||||
replaced = append(replaced, rune(0x0008))
|
|
||||||
r += 1
|
|
||||||
case 't':
|
|
||||||
replaced = append(replaced, rune(0x0009))
|
|
||||||
r += 1
|
|
||||||
case 'n':
|
|
||||||
replaced = append(replaced, rune(0x000A))
|
|
||||||
r += 1
|
|
||||||
case 'f':
|
|
||||||
replaced = append(replaced, rune(0x000C))
|
|
||||||
r += 1
|
|
||||||
case 'r':
|
|
||||||
replaced = append(replaced, rune(0x000D))
|
|
||||||
r += 1
|
|
||||||
case '"':
|
|
||||||
replaced = append(replaced, rune(0x0022))
|
|
||||||
r += 1
|
|
||||||
case '\\':
|
|
||||||
replaced = append(replaced, rune(0x005C))
|
|
||||||
r += 1
|
|
||||||
case 'u':
|
|
||||||
// At this point, we know we have a Unicode escape of the form
|
|
||||||
// `uXXXX` at [r, r+5). (Because the lexer guarantees this
|
|
||||||
// for us.)
|
|
||||||
escaped := p.asciiEscapeToUnicode(s[r+1 : r+5])
|
|
||||||
replaced = append(replaced, escaped)
|
|
||||||
r += 5
|
|
||||||
case 'U':
|
|
||||||
// At this point, we know we have a Unicode escape of the form
|
|
||||||
// `uXXXX` at [r, r+9). (Because the lexer guarantees this
|
|
||||||
// for us.)
|
|
||||||
escaped := p.asciiEscapeToUnicode(s[r+1 : r+9])
|
|
||||||
replaced = append(replaced, escaped)
|
|
||||||
r += 9
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return string(replaced)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *parser) asciiEscapeToUnicode(bs []byte) rune {
|
|
||||||
s := string(bs)
|
|
||||||
hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32)
|
|
||||||
if err != nil {
|
|
||||||
p.bug("Could not parse '%s' as a hexadecimal number, but the "+
|
|
||||||
"lexer claims it's OK: %s", s, err)
|
|
||||||
}
|
|
||||||
if !utf8.ValidRune(rune(hex)) {
|
|
||||||
p.panicf("Escaped character '\\u%s' is not valid UTF-8.", s)
|
|
||||||
}
|
|
||||||
return rune(hex)
|
|
||||||
}
|
|
||||||
|
|
||||||
func isStringType(ty itemType) bool {
|
|
||||||
return ty == itemString || ty == itemMultilineString ||
|
|
||||||
ty == itemRawString || ty == itemRawMultilineString
|
|
||||||
}
|
|
1
vendor/github.com/BurntSushi/toml/session.vim
generated
vendored
1
vendor/github.com/BurntSushi/toml/session.vim
generated
vendored
@ -1 +0,0 @@
|
|||||||
au BufWritePost *.go silent!make tags > /dev/null 2>&1
|
|
91
vendor/github.com/BurntSushi/toml/type_check.go
generated
vendored
91
vendor/github.com/BurntSushi/toml/type_check.go
generated
vendored
@ -1,91 +0,0 @@
|
|||||||
package toml
|
|
||||||
|
|
||||||
// tomlType represents any Go type that corresponds to a TOML type.
|
|
||||||
// While the first draft of the TOML spec has a simplistic type system that
|
|
||||||
// probably doesn't need this level of sophistication, we seem to be militating
|
|
||||||
// toward adding real composite types.
|
|
||||||
type tomlType interface {
|
|
||||||
typeString() string
|
|
||||||
}
|
|
||||||
|
|
||||||
// typeEqual accepts any two types and returns true if they are equal.
|
|
||||||
func typeEqual(t1, t2 tomlType) bool {
|
|
||||||
if t1 == nil || t2 == nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return t1.typeString() == t2.typeString()
|
|
||||||
}
|
|
||||||
|
|
||||||
func typeIsHash(t tomlType) bool {
|
|
||||||
return typeEqual(t, tomlHash) || typeEqual(t, tomlArrayHash)
|
|
||||||
}
|
|
||||||
|
|
||||||
type tomlBaseType string
|
|
||||||
|
|
||||||
func (btype tomlBaseType) typeString() string {
|
|
||||||
return string(btype)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (btype tomlBaseType) String() string {
|
|
||||||
return btype.typeString()
|
|
||||||
}
|
|
||||||
|
|
||||||
var (
|
|
||||||
tomlInteger tomlBaseType = "Integer"
|
|
||||||
tomlFloat tomlBaseType = "Float"
|
|
||||||
tomlDatetime tomlBaseType = "Datetime"
|
|
||||||
tomlString tomlBaseType = "String"
|
|
||||||
tomlBool tomlBaseType = "Bool"
|
|
||||||
tomlArray tomlBaseType = "Array"
|
|
||||||
tomlHash tomlBaseType = "Hash"
|
|
||||||
tomlArrayHash tomlBaseType = "ArrayHash"
|
|
||||||
)
|
|
||||||
|
|
||||||
// typeOfPrimitive returns a tomlType of any primitive value in TOML.
|
|
||||||
// Primitive values are: Integer, Float, Datetime, String and Bool.
|
|
||||||
//
|
|
||||||
// Passing a lexer item other than the following will cause a BUG message
|
|
||||||
// to occur: itemString, itemBool, itemInteger, itemFloat, itemDatetime.
|
|
||||||
func (p *parser) typeOfPrimitive(lexItem item) tomlType {
|
|
||||||
switch lexItem.typ {
|
|
||||||
case itemInteger:
|
|
||||||
return tomlInteger
|
|
||||||
case itemFloat:
|
|
||||||
return tomlFloat
|
|
||||||
case itemDatetime:
|
|
||||||
return tomlDatetime
|
|
||||||
case itemString:
|
|
||||||
return tomlString
|
|
||||||
case itemMultilineString:
|
|
||||||
return tomlString
|
|
||||||
case itemRawString:
|
|
||||||
return tomlString
|
|
||||||
case itemRawMultilineString:
|
|
||||||
return tomlString
|
|
||||||
case itemBool:
|
|
||||||
return tomlBool
|
|
||||||
}
|
|
||||||
p.bug("Cannot infer primitive type of lex item '%s'.", lexItem)
|
|
||||||
panic("unreachable")
|
|
||||||
}
|
|
||||||
|
|
||||||
// typeOfArray returns a tomlType for an array given a list of types of its
|
|
||||||
// values.
|
|
||||||
//
|
|
||||||
// In the current spec, if an array is homogeneous, then its type is always
|
|
||||||
// "Array". If the array is not homogeneous, an error is generated.
|
|
||||||
func (p *parser) typeOfArray(types []tomlType) tomlType {
|
|
||||||
// Empty arrays are cool.
|
|
||||||
if len(types) == 0 {
|
|
||||||
return tomlArray
|
|
||||||
}
|
|
||||||
|
|
||||||
theType := types[0]
|
|
||||||
for _, t := range types[1:] {
|
|
||||||
if !typeEqual(theType, t) {
|
|
||||||
p.panicf("Array contains values of type '%s' and '%s', but "+
|
|
||||||
"arrays must be homogeneous.", theType, t)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return tomlArray
|
|
||||||
}
|
|
241
vendor/github.com/BurntSushi/toml/type_fields.go
generated
vendored
241
vendor/github.com/BurntSushi/toml/type_fields.go
generated
vendored
@ -1,241 +0,0 @@
|
|||||||
package toml
|
|
||||||
|
|
||||||
// Struct field handling is adapted from code in encoding/json:
|
|
||||||
//
|
|
||||||
// Copyright 2010 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the Go distribution.
|
|
||||||
|
|
||||||
import (
|
|
||||||
"reflect"
|
|
||||||
"sort"
|
|
||||||
"sync"
|
|
||||||
)
|
|
||||||
|
|
||||||
// A field represents a single field found in a struct.
|
|
||||||
type field struct {
|
|
||||||
name string // the name of the field (`toml` tag included)
|
|
||||||
tag bool // whether field has a `toml` tag
|
|
||||||
index []int // represents the depth of an anonymous field
|
|
||||||
typ reflect.Type // the type of the field
|
|
||||||
}
|
|
||||||
|
|
||||||
// byName sorts field by name, breaking ties with depth,
|
|
||||||
// then breaking ties with "name came from toml tag", then
|
|
||||||
// breaking ties with index sequence.
|
|
||||||
type byName []field
|
|
||||||
|
|
||||||
func (x byName) Len() int { return len(x) }
|
|
||||||
|
|
||||||
func (x byName) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
|
|
||||||
|
|
||||||
func (x byName) Less(i, j int) bool {
|
|
||||||
if x[i].name != x[j].name {
|
|
||||||
return x[i].name < x[j].name
|
|
||||||
}
|
|
||||||
if len(x[i].index) != len(x[j].index) {
|
|
||||||
return len(x[i].index) < len(x[j].index)
|
|
||||||
}
|
|
||||||
if x[i].tag != x[j].tag {
|
|
||||||
return x[i].tag
|
|
||||||
}
|
|
||||||
return byIndex(x).Less(i, j)
|
|
||||||
}
|
|
||||||
|
|
||||||
// byIndex sorts field by index sequence.
|
|
||||||
type byIndex []field
|
|
||||||
|
|
||||||
func (x byIndex) Len() int { return len(x) }
|
|
||||||
|
|
||||||
func (x byIndex) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
|
|
||||||
|
|
||||||
func (x byIndex) Less(i, j int) bool {
|
|
||||||
for k, xik := range x[i].index {
|
|
||||||
if k >= len(x[j].index) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if xik != x[j].index[k] {
|
|
||||||
return xik < x[j].index[k]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return len(x[i].index) < len(x[j].index)
|
|
||||||
}
|
|
||||||
|
|
||||||
// typeFields returns a list of fields that TOML should recognize for the given
|
|
||||||
// type. The algorithm is breadth-first search over the set of structs to
|
|
||||||
// include - the top struct and then any reachable anonymous structs.
|
|
||||||
func typeFields(t reflect.Type) []field {
|
|
||||||
// Anonymous fields to explore at the current level and the next.
|
|
||||||
current := []field{}
|
|
||||||
next := []field{{typ: t}}
|
|
||||||
|
|
||||||
// Count of queued names for current level and the next.
|
|
||||||
count := map[reflect.Type]int{}
|
|
||||||
nextCount := map[reflect.Type]int{}
|
|
||||||
|
|
||||||
// Types already visited at an earlier level.
|
|
||||||
visited := map[reflect.Type]bool{}
|
|
||||||
|
|
||||||
// Fields found.
|
|
||||||
var fields []field
|
|
||||||
|
|
||||||
for len(next) > 0 {
|
|
||||||
current, next = next, current[:0]
|
|
||||||
count, nextCount = nextCount, map[reflect.Type]int{}
|
|
||||||
|
|
||||||
for _, f := range current {
|
|
||||||
if visited[f.typ] {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
visited[f.typ] = true
|
|
||||||
|
|
||||||
// Scan f.typ for fields to include.
|
|
||||||
for i := 0; i < f.typ.NumField(); i++ {
|
|
||||||
sf := f.typ.Field(i)
|
|
||||||
if sf.PkgPath != "" && !sf.Anonymous { // unexported
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
name, _ := getOptions(sf.Tag.Get("toml"))
|
|
||||||
if name == "-" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
index := make([]int, len(f.index)+1)
|
|
||||||
copy(index, f.index)
|
|
||||||
index[len(f.index)] = i
|
|
||||||
|
|
||||||
ft := sf.Type
|
|
||||||
if ft.Name() == "" && ft.Kind() == reflect.Ptr {
|
|
||||||
// Follow pointer.
|
|
||||||
ft = ft.Elem()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Record found field and index sequence.
|
|
||||||
if name != "" || !sf.Anonymous || ft.Kind() != reflect.Struct {
|
|
||||||
tagged := name != ""
|
|
||||||
if name == "" {
|
|
||||||
name = sf.Name
|
|
||||||
}
|
|
||||||
fields = append(fields, field{name, tagged, index, ft})
|
|
||||||
if count[f.typ] > 1 {
|
|
||||||
// If there were multiple instances, add a second,
|
|
||||||
// so that the annihilation code will see a duplicate.
|
|
||||||
// It only cares about the distinction between 1 or 2,
|
|
||||||
// so don't bother generating any more copies.
|
|
||||||
fields = append(fields, fields[len(fields)-1])
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Record new anonymous struct to explore in next round.
|
|
||||||
nextCount[ft]++
|
|
||||||
if nextCount[ft] == 1 {
|
|
||||||
f := field{name: ft.Name(), index: index, typ: ft}
|
|
||||||
next = append(next, f)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
sort.Sort(byName(fields))
|
|
||||||
|
|
||||||
// Delete all fields that are hidden by the Go rules for embedded fields,
|
|
||||||
// except that fields with TOML tags are promoted.
|
|
||||||
|
|
||||||
// The fields are sorted in primary order of name, secondary order
|
|
||||||
// of field index length. Loop over names; for each name, delete
|
|
||||||
// hidden fields by choosing the one dominant field that survives.
|
|
||||||
out := fields[:0]
|
|
||||||
for advance, i := 0, 0; i < len(fields); i += advance {
|
|
||||||
// One iteration per name.
|
|
||||||
// Find the sequence of fields with the name of this first field.
|
|
||||||
fi := fields[i]
|
|
||||||
name := fi.name
|
|
||||||
for advance = 1; i+advance < len(fields); advance++ {
|
|
||||||
fj := fields[i+advance]
|
|
||||||
if fj.name != name {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if advance == 1 { // Only one field with this name
|
|
||||||
out = append(out, fi)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
dominant, ok := dominantField(fields[i : i+advance])
|
|
||||||
if ok {
|
|
||||||
out = append(out, dominant)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fields = out
|
|
||||||
sort.Sort(byIndex(fields))
|
|
||||||
|
|
||||||
return fields
|
|
||||||
}
|
|
||||||
|
|
||||||
// dominantField looks through the fields, all of which are known to
|
|
||||||
// have the same name, to find the single field that dominates the
|
|
||||||
// others using Go's embedding rules, modified by the presence of
|
|
||||||
// TOML tags. If there are multiple top-level fields, the boolean
|
|
||||||
// will be false: This condition is an error in Go and we skip all
|
|
||||||
// the fields.
|
|
||||||
func dominantField(fields []field) (field, bool) {
|
|
||||||
// The fields are sorted in increasing index-length order. The winner
|
|
||||||
// must therefore be one with the shortest index length. Drop all
|
|
||||||
// longer entries, which is easy: just truncate the slice.
|
|
||||||
length := len(fields[0].index)
|
|
||||||
tagged := -1 // Index of first tagged field.
|
|
||||||
for i, f := range fields {
|
|
||||||
if len(f.index) > length {
|
|
||||||
fields = fields[:i]
|
|
||||||
break
|
|
||||||
}
|
|
||||||
if f.tag {
|
|
||||||
if tagged >= 0 {
|
|
||||||
// Multiple tagged fields at the same level: conflict.
|
|
||||||
// Return no field.
|
|
||||||
return field{}, false
|
|
||||||
}
|
|
||||||
tagged = i
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if tagged >= 0 {
|
|
||||||
return fields[tagged], true
|
|
||||||
}
|
|
||||||
// All remaining fields have the same length. If there's more than one,
|
|
||||||
// we have a conflict (two fields named "X" at the same level) and we
|
|
||||||
// return no field.
|
|
||||||
if len(fields) > 1 {
|
|
||||||
return field{}, false
|
|
||||||
}
|
|
||||||
return fields[0], true
|
|
||||||
}
|
|
||||||
|
|
||||||
var fieldCache struct {
|
|
||||||
sync.RWMutex
|
|
||||||
m map[reflect.Type][]field
|
|
||||||
}
|
|
||||||
|
|
||||||
// cachedTypeFields is like typeFields but uses a cache to avoid repeated work.
|
|
||||||
func cachedTypeFields(t reflect.Type) []field {
|
|
||||||
fieldCache.RLock()
|
|
||||||
f := fieldCache.m[t]
|
|
||||||
fieldCache.RUnlock()
|
|
||||||
if f != nil {
|
|
||||||
return f
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute fields without lock.
|
|
||||||
// Might duplicate effort but won't hold other computations back.
|
|
||||||
f = typeFields(t)
|
|
||||||
if f == nil {
|
|
||||||
f = []field{}
|
|
||||||
}
|
|
||||||
|
|
||||||
fieldCache.Lock()
|
|
||||||
if fieldCache.m == nil {
|
|
||||||
fieldCache.m = map[reflect.Type][]field{}
|
|
||||||
}
|
|
||||||
fieldCache.m[t] = f
|
|
||||||
fieldCache.Unlock()
|
|
||||||
return f
|
|
||||||
}
|
|
16
vendor/github.com/blevesearch/bleve/CONTRIBUTING.md
generated
vendored
Normal file
16
vendor/github.com/blevesearch/bleve/CONTRIBUTING.md
generated
vendored
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
# Contributing to Bleve
|
||||||
|
|
||||||
|
We look forward to your contributions, but ask that you first review these guidelines.
|
||||||
|
|
||||||
|
### Sign the CLA
|
||||||
|
|
||||||
|
As Bleve is a Couchbase project we require contributors accept the [Couchbase Contributor License Agreement](http://review.couchbase.org/static/individual_agreement.html). To sign this agreement log into the Couchbase [code review tool](http://review.couchbase.org/). The Bleve project does not use this code review tool but it is still used to track acceptance of the contributor license agreements.
|
||||||
|
|
||||||
|
### Submitting a Pull Request
|
||||||
|
|
||||||
|
All types of contributions are welcome, but please keep the following in mind:
|
||||||
|
|
||||||
|
- If you're planning a large change, you should really discuss it in a github issue or on the google group first. This helps avoid duplicate effort and spending time on something that may not be merged.
|
||||||
|
- Existing tests should continue to pass, new tests for the contribution are nice to have.
|
||||||
|
- All code should have gone through `go fmt`
|
||||||
|
- All code should pass `go vet`
|
54
vendor/github.com/blevesearch/bleve/README.md
generated
vendored
54
vendor/github.com/blevesearch/bleve/README.md
generated
vendored
@ -1,10 +1,14 @@
|
|||||||
# ![bleve](docs/bleve.png) bleve
|
# ![bleve](docs/bleve.png) bleve
|
||||||
|
|
||||||
[![Build Status](https://travis-ci.org/blevesearch/bleve.svg?branch=master)](https://travis-ci.org/blevesearch/bleve) [![Coverage Status](https://coveralls.io/repos/blevesearch/bleve/badge.png?branch=master)](https://coveralls.io/r/blevesearch/bleve?branch=master) [![GoDoc](https://godoc.org/github.com/blevesearch/bleve?status.svg)](https://godoc.org/github.com/blevesearch/bleve) [![Join the chat at https://gitter.im/blevesearch/bleve](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/blevesearch/bleve?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
[![Build Status](https://travis-ci.org/blevesearch/bleve.svg?branch=master)](https://travis-ci.org/blevesearch/bleve) [![Coverage Status](https://coveralls.io/repos/blevesearch/bleve/badge.png?branch=master)](https://coveralls.io/r/blevesearch/bleve?branch=master) [![GoDoc](https://godoc.org/github.com/blevesearch/bleve?status.svg)](https://godoc.org/github.com/blevesearch/bleve)
|
||||||
|
[![Join the chat at https://gitter.im/blevesearch/bleve](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/blevesearch/bleve?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||||
|
[![codebeat](https://codebeat.co/badges/38a7cbc9-9cf5-41c0-a315-0746178230f4)](https://codebeat.co/projects/github-com-blevesearch-bleve)
|
||||||
|
[![Go Report Card](https://goreportcard.com/badge/blevesearch/bleve)](https://goreportcard.com/report/blevesearch/bleve)
|
||||||
|
[![Sourcegraph](https://sourcegraph.com/github.com/blevesearch/bleve/-/badge.svg)](https://sourcegraph.com/github.com/blevesearch/bleve?badge) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
|
||||||
|
|
||||||
modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/)
|
modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/)
|
||||||
|
|
||||||
Try out bleve live by [searching our wiki](http://wikisearch.blevesearch.com/search/).
|
Try out bleve live by [searching the bleve website](http://www.blevesearch.com/search/?q=bleve).
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
@ -16,7 +20,7 @@ Try out bleve live by [searching our wiki](http://wikisearch.blevesearch.com/sea
|
|||||||
* Term, Phrase, Match, Match Phrase, Prefix
|
* Term, Phrase, Match, Match Phrase, Prefix
|
||||||
* Conjunction, Disjunction, Boolean
|
* Conjunction, Disjunction, Boolean
|
||||||
* Numeric Range, Date Range
|
* Numeric Range, Date Range
|
||||||
* Simple query [syntax](https://github.com/blevesearch/bleve/wiki/Query-String-Query) for human entry
|
* Simple query [syntax](http://www.blevesearch.com/docs/Query-String-Query/) for human entry
|
||||||
* tf-idf Scoring
|
* tf-idf Scoring
|
||||||
* Search result match highlighting
|
* Search result match highlighting
|
||||||
* Supports Aggregating Facets:
|
* Supports Aggregating Facets:
|
||||||
@ -30,32 +34,34 @@ Discuss usage and development of bleve in the [google group](https://groups.goog
|
|||||||
|
|
||||||
## Indexing
|
## Indexing
|
||||||
|
|
||||||
message := struct{
|
```go
|
||||||
Id string
|
message := struct{
|
||||||
From string
|
Id string
|
||||||
Body string
|
From string
|
||||||
}{
|
Body string
|
||||||
Id: "example",
|
}{
|
||||||
From: "marty.schoch@gmail.com",
|
Id: "example",
|
||||||
Body: "bleve indexing is easy",
|
From: "marty.schoch@gmail.com",
|
||||||
}
|
Body: "bleve indexing is easy",
|
||||||
|
}
|
||||||
|
|
||||||
mapping := bleve.NewIndexMapping()
|
mapping := bleve.NewIndexMapping()
|
||||||
index, err := bleve.New("example.bleve", mapping)
|
index, err := bleve.New("example.bleve", mapping)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
index.Index(message.Id, message)
|
index.Index(message.Id, message)
|
||||||
|
```
|
||||||
|
|
||||||
## Querying
|
## Querying
|
||||||
|
|
||||||
index, _ := bleve.Open("example.bleve")
|
```go
|
||||||
query := bleve.NewQueryStringQuery("bleve")
|
index, _ := bleve.Open("example.bleve")
|
||||||
searchRequest := bleve.NewSearchRequest(query)
|
query := bleve.NewQueryStringQuery("bleve")
|
||||||
searchResult, _ := index.Search(searchRequest)
|
searchRequest := bleve.NewSearchRequest(query)
|
||||||
|
searchResult, _ := index.Search(searchRequest)
|
||||||
|
```
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
Apache License Version 2.0
|
Apache License Version 2.0
|
||||||
|
|
||||||
|
|
||||||
|
52
vendor/github.com/blevesearch/bleve/analysis/analyzer/standard/standard.go
generated
vendored
Normal file
52
vendor/github.com/blevesearch/bleve/analysis/analyzer/standard/standard.go
generated
vendored
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package standard
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/blevesearch/bleve/analysis"
|
||||||
|
"github.com/blevesearch/bleve/analysis/lang/en"
|
||||||
|
"github.com/blevesearch/bleve/analysis/token/lowercase"
|
||||||
|
"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
|
||||||
|
"github.com/blevesearch/bleve/registry"
|
||||||
|
)
|
||||||
|
|
||||||
|
const Name = "standard"
|
||||||
|
|
||||||
|
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||||
|
tokenizer, err := cache.TokenizerNamed(unicode.Name)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
stopEnFilter, err := cache.TokenFilterNamed(en.StopName)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
rv := analysis.Analyzer{
|
||||||
|
Tokenizer: tokenizer,
|
||||||
|
TokenFilters: []analysis.TokenFilter{
|
||||||
|
toLowerFilter,
|
||||||
|
stopEnFilter,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
return &rv, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||||
|
}
|
130
vendor/github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer/custom_analyzer.go
generated
vendored
130
vendor/github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer/custom_analyzer.go
generated
vendored
@ -1,130 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package standard_analyzer
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const Name = "custom"
|
|
||||||
|
|
||||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
|
||||||
|
|
||||||
var err error
|
|
||||||
var charFilters []analysis.CharFilter
|
|
||||||
charFiltersNames, ok := config["char_filters"].([]string)
|
|
||||||
if ok {
|
|
||||||
charFilters, err = getCharFilters(charFiltersNames, cache)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
charFiltersNamesInterfaceSlice, ok := config["char_filters"].([]interface{})
|
|
||||||
if ok {
|
|
||||||
charFiltersNames, err := convertInterfaceSliceToStringSlice(charFiltersNamesInterfaceSlice, "char filter")
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
charFilters, err = getCharFilters(charFiltersNames, cache)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
tokenizerName, ok := config["tokenizer"].(string)
|
|
||||||
if !ok {
|
|
||||||
return nil, fmt.Errorf("must specify tokenizer")
|
|
||||||
}
|
|
||||||
|
|
||||||
tokenizer, err := cache.TokenizerNamed(tokenizerName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
var tokenFilters []analysis.TokenFilter
|
|
||||||
tokenFiltersNames, ok := config["token_filters"].([]string)
|
|
||||||
if ok {
|
|
||||||
tokenFilters, err = getTokenFilters(tokenFiltersNames, cache)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
tokenFiltersNamesInterfaceSlice, ok := config["token_filters"].([]interface{})
|
|
||||||
if ok {
|
|
||||||
tokenFiltersNames, err := convertInterfaceSliceToStringSlice(tokenFiltersNamesInterfaceSlice, "token filter")
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
tokenFilters, err = getTokenFilters(tokenFiltersNames, cache)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
rv := analysis.Analyzer{
|
|
||||||
Tokenizer: tokenizer,
|
|
||||||
}
|
|
||||||
if charFilters != nil {
|
|
||||||
rv.CharFilters = charFilters
|
|
||||||
}
|
|
||||||
if tokenFilters != nil {
|
|
||||||
rv.TokenFilters = tokenFilters
|
|
||||||
}
|
|
||||||
return &rv, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
|
||||||
}
|
|
||||||
|
|
||||||
func getCharFilters(charFilterNames []string, cache *registry.Cache) ([]analysis.CharFilter, error) {
|
|
||||||
charFilters := make([]analysis.CharFilter, len(charFilterNames))
|
|
||||||
for i, charFilterName := range charFilterNames {
|
|
||||||
charFilter, err := cache.CharFilterNamed(charFilterName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
charFilters[i] = charFilter
|
|
||||||
}
|
|
||||||
|
|
||||||
return charFilters, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func getTokenFilters(tokenFilterNames []string, cache *registry.Cache) ([]analysis.TokenFilter, error) {
|
|
||||||
tokenFilters := make([]analysis.TokenFilter, len(tokenFilterNames))
|
|
||||||
for i, tokenFilterName := range tokenFilterNames {
|
|
||||||
tokenFilter, err := cache.TokenFilterNamed(tokenFilterName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
tokenFilters[i] = tokenFilter
|
|
||||||
}
|
|
||||||
|
|
||||||
return tokenFilters, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func convertInterfaceSliceToStringSlice(interfaceSlice []interface{}, objType string) ([]string, error) {
|
|
||||||
stringSlice := make([]string, len(interfaceSlice))
|
|
||||||
for i, interfaceObj := range interfaceSlice {
|
|
||||||
stringObj, ok := interfaceObj.(string)
|
|
||||||
if ok {
|
|
||||||
stringSlice[i] = stringObj
|
|
||||||
} else {
|
|
||||||
return nil, fmt.Errorf(objType + " name must be a string")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return stringSlice, nil
|
|
||||||
}
|
|
@ -1,49 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
// +build cld2 full
|
|
||||||
|
|
||||||
package detect_lang_analyzer
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/analysis/token_filters/cld2"
|
|
||||||
"github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter"
|
|
||||||
"github.com/blevesearch/bleve/analysis/tokenizers/single_token"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const Name = "detect_lang"
|
|
||||||
|
|
||||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
|
||||||
keywordTokenizer, err := cache.TokenizerNamed(single_token.Name)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
detectLangFilter, err := cache.TokenFilterNamed(cld2.Name)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
rv := analysis.Analyzer{
|
|
||||||
Tokenizer: keywordTokenizer,
|
|
||||||
TokenFilters: []analysis.TokenFilter{
|
|
||||||
toLowerFilter,
|
|
||||||
detectLangFilter,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
return &rv, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
|
||||||
}
|
|
@ -1,33 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package keyword_analyzer
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/analysis/tokenizers/single_token"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const Name = "keyword"
|
|
||||||
|
|
||||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
|
||||||
keywordTokenizer, err := cache.TokenizerNamed(single_token.Name)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
rv := analysis.Analyzer{
|
|
||||||
Tokenizer: keywordTokenizer,
|
|
||||||
}
|
|
||||||
return &rv, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
|
||||||
}
|
|
@ -1,41 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package simple_analyzer
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter"
|
|
||||||
"github.com/blevesearch/bleve/analysis/tokenizers/unicode"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const Name = "simple"
|
|
||||||
|
|
||||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
|
||||||
tokenizer, err := cache.TokenizerNamed(unicode.Name)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
rv := analysis.Analyzer{
|
|
||||||
Tokenizer: tokenizer,
|
|
||||||
TokenFilters: []analysis.TokenFilter{
|
|
||||||
toLowerFilter,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
return &rv, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
|
||||||
}
|
|
@ -1,47 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package standard_analyzer
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/analysis/language/en"
|
|
||||||
"github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter"
|
|
||||||
"github.com/blevesearch/bleve/analysis/tokenizers/unicode"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const Name = "standard"
|
|
||||||
|
|
||||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
|
||||||
tokenizer, err := cache.TokenizerNamed(unicode.Name)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
stopEnFilter, err := cache.TokenFilterNamed(en.StopName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
rv := analysis.Analyzer{
|
|
||||||
Tokenizer: tokenizer,
|
|
||||||
TokenFilters: []analysis.TokenFilter{
|
|
||||||
toLowerFilter,
|
|
||||||
stopEnFilter,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
return &rv, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
|
||||||
}
|
|
@ -1,33 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package ignore_byte_array_converter
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
type IgnoreByteArrayConverter struct{}
|
|
||||||
|
|
||||||
func NewIgnoreByteArrayConverter() *IgnoreByteArrayConverter {
|
|
||||||
return &IgnoreByteArrayConverter{}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *IgnoreByteArrayConverter) Convert(in []byte) (interface{}, error) {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func Constructor(config map[string]interface{}, cache *registry.Cache) (analysis.ByteArrayConverter, error) {
|
|
||||||
return NewIgnoreByteArrayConverter(), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterByteArrayConverter("ignore", Constructor)
|
|
||||||
}
|
|
@ -1,40 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package json_byte_array_converter
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
type JSONByteArrayConverter struct{}
|
|
||||||
|
|
||||||
func NewJSONByteArrayConverter() *JSONByteArrayConverter {
|
|
||||||
return &JSONByteArrayConverter{}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *JSONByteArrayConverter) Convert(in []byte) (interface{}, error) {
|
|
||||||
var rv map[string]interface{}
|
|
||||||
err := json.Unmarshal(in, &rv)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return rv, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func Constructor(config map[string]interface{}, cache *registry.Cache) (analysis.ByteArrayConverter, error) {
|
|
||||||
return NewJSONByteArrayConverter(), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterByteArrayConverter("json", Constructor)
|
|
||||||
}
|
|
@ -1,33 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package string_byte_array_converter
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
type StringByteArrayConverter struct{}
|
|
||||||
|
|
||||||
func NewStringByteArrayConverter() *StringByteArrayConverter {
|
|
||||||
return &StringByteArrayConverter{}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *StringByteArrayConverter) Convert(in []byte) (interface{}, error) {
|
|
||||||
return string(in), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func Constructor(config map[string]interface{}, cache *registry.Cache) (analysis.ByteArrayConverter, error) {
|
|
||||||
return NewStringByteArrayConverter(), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterByteArrayConverter("string", Constructor)
|
|
||||||
}
|
|
@ -1,31 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package html_char_filter
|
|
||||||
|
|
||||||
import (
|
|
||||||
"regexp"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/analysis/char_filters/regexp_char_filter"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const Name = "html"
|
|
||||||
|
|
||||||
var htmlCharFilterRegexp = regexp.MustCompile(`</?[!\w]+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/?>`)
|
|
||||||
|
|
||||||
func CharFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.CharFilter, error) {
|
|
||||||
replaceBytes := []byte(" ")
|
|
||||||
return regexp_char_filter.NewRegexpCharFilter(htmlCharFilterRegexp, replaceBytes), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterCharFilter(Name, CharFilterConstructor)
|
|
||||||
}
|
|
@ -1,58 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package regexp_char_filter
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"fmt"
|
|
||||||
"regexp"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const Name = "regexp"
|
|
||||||
|
|
||||||
type RegexpCharFilter struct {
|
|
||||||
r *regexp.Regexp
|
|
||||||
replacement []byte
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewRegexpCharFilter(r *regexp.Regexp, replacement []byte) *RegexpCharFilter {
|
|
||||||
return &RegexpCharFilter{
|
|
||||||
r: r,
|
|
||||||
replacement: replacement,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *RegexpCharFilter) Filter(input []byte) []byte {
|
|
||||||
return s.r.ReplaceAllFunc(input, func(in []byte) []byte { return bytes.Repeat(s.replacement, len(in)) })
|
|
||||||
}
|
|
||||||
|
|
||||||
func RegexpCharFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.CharFilter, error) {
|
|
||||||
regexpStr, ok := config["regexp"].(string)
|
|
||||||
if !ok {
|
|
||||||
return nil, fmt.Errorf("must specify regexp")
|
|
||||||
}
|
|
||||||
r, err := regexp.Compile(regexpStr)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("unable to build regexp char filter: %v", err)
|
|
||||||
}
|
|
||||||
replaceBytes := []byte(" ")
|
|
||||||
replaceStr, ok := config["replace"].(string)
|
|
||||||
if ok {
|
|
||||||
replaceBytes = []byte(replaceStr)
|
|
||||||
}
|
|
||||||
return NewRegexpCharFilter(r, replaceBytes), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterCharFilter(Name, RegexpCharFilterConstructor)
|
|
||||||
}
|
|
@ -1,82 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package regexp_char_filter
|
|
||||||
|
|
||||||
import (
|
|
||||||
"reflect"
|
|
||||||
"regexp"
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestRegexpCharFilter(t *testing.T) {
|
|
||||||
|
|
||||||
htmlTagPattern := `</?[!\w]+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/?>`
|
|
||||||
htmlRegex := regexp.MustCompile(htmlTagPattern)
|
|
||||||
|
|
||||||
tests := []struct {
|
|
||||||
input []byte
|
|
||||||
output []byte
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
input: []byte(`<!DOCTYPE html>
|
|
||||||
<html>
|
|
||||||
<body>
|
|
||||||
|
|
||||||
<h1>My First Heading</h1>
|
|
||||||
|
|
||||||
<p>My first paragraph.</p>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>`),
|
|
||||||
output: []byte(`
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
My First Heading
|
|
||||||
|
|
||||||
My first paragraph.
|
|
||||||
|
|
||||||
|
|
||||||
`),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, test := range tests {
|
|
||||||
filter := NewRegexpCharFilter(htmlRegex, []byte{' '})
|
|
||||||
output := filter.Filter(test.input)
|
|
||||||
if !reflect.DeepEqual(output, test.output) {
|
|
||||||
t.Errorf("Expected:\n`%s`\ngot:\n`%s`\nfor:\n`%s`\n", string(test.output), string(output), string(test.input))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestZeroWidthNonJoinerCharFilter(t *testing.T) {
|
|
||||||
|
|
||||||
zeroWidthNonJoinerPattern := `\x{200C}`
|
|
||||||
zeroWidthNonJoinerRegex := regexp.MustCompile(zeroWidthNonJoinerPattern)
|
|
||||||
|
|
||||||
tests := []struct {
|
|
||||||
input []byte
|
|
||||||
output []byte
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
input: []byte("water\u200Cunder\u200Cthe\u200Cbridge"),
|
|
||||||
output: []byte("water under the bridge"),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, test := range tests {
|
|
||||||
filter := NewRegexpCharFilter(zeroWidthNonJoinerRegex, []byte{' '})
|
|
||||||
output := filter.Filter(test.input)
|
|
||||||
if !reflect.DeepEqual(output, test.output) {
|
|
||||||
t.Errorf("Expected:\n`%s`\ngot:\n`%s`\nfor:\n`%s`\n", string(test.output), string(output), string(test.input))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,31 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package zero_width_non_joiner
|
|
||||||
|
|
||||||
import (
|
|
||||||
"regexp"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/analysis/char_filters/regexp_char_filter"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const Name = "zero_width_spaces"
|
|
||||||
|
|
||||||
var zeroWidthNonJoinerRegexp = regexp.MustCompile(`\x{200C}`)
|
|
||||||
|
|
||||||
func CharFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.CharFilter, error) {
|
|
||||||
replaceBytes := []byte(" ")
|
|
||||||
return regexp_char_filter.NewRegexpCharFilter(zeroWidthNonJoinerRegexp, replaceBytes), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterCharFilter(Name, CharFilterConstructor)
|
|
||||||
}
|
|
64
vendor/github.com/blevesearch/bleve/analysis/datetime/flexible/flexible.go
generated
vendored
Normal file
64
vendor/github.com/blevesearch/bleve/analysis/datetime/flexible/flexible.go
generated
vendored
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package flexible
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/analysis"
|
||||||
|
"github.com/blevesearch/bleve/registry"
|
||||||
|
)
|
||||||
|
|
||||||
|
const Name = "flexiblego"
|
||||||
|
|
||||||
|
type DateTimeParser struct {
|
||||||
|
layouts []string
|
||||||
|
}
|
||||||
|
|
||||||
|
func New(layouts []string) *DateTimeParser {
|
||||||
|
return &DateTimeParser{
|
||||||
|
layouts: layouts,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *DateTimeParser) ParseDateTime(input string) (time.Time, error) {
|
||||||
|
for _, layout := range p.layouts {
|
||||||
|
rv, err := time.Parse(layout, input)
|
||||||
|
if err == nil {
|
||||||
|
return rv, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return time.Time{}, analysis.ErrInvalidDateTime
|
||||||
|
}
|
||||||
|
|
||||||
|
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
|
||||||
|
layouts, ok := config["layouts"].([]interface{})
|
||||||
|
if !ok {
|
||||||
|
return nil, fmt.Errorf("must specify layouts")
|
||||||
|
}
|
||||||
|
var layoutStrs []string
|
||||||
|
for _, layout := range layouts {
|
||||||
|
layoutStr, ok := layout.(string)
|
||||||
|
if ok {
|
||||||
|
layoutStrs = append(layoutStrs, layoutStr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return New(layoutStrs), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||||
|
}
|
45
vendor/github.com/blevesearch/bleve/analysis/datetime/optional/optional.go
generated
vendored
Normal file
45
vendor/github.com/blevesearch/bleve/analysis/datetime/optional/optional.go
generated
vendored
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package optional
|
||||||
|
|
||||||
|
import (
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/analysis"
|
||||||
|
"github.com/blevesearch/bleve/analysis/datetime/flexible"
|
||||||
|
"github.com/blevesearch/bleve/registry"
|
||||||
|
)
|
||||||
|
|
||||||
|
const Name = "dateTimeOptional"
|
||||||
|
|
||||||
|
const rfc3339NoTimezone = "2006-01-02T15:04:05"
|
||||||
|
const rfc3339NoTimezoneNoT = "2006-01-02 15:04:05"
|
||||||
|
const rfc3339NoTime = "2006-01-02"
|
||||||
|
|
||||||
|
var layouts = []string{
|
||||||
|
time.RFC3339Nano,
|
||||||
|
time.RFC3339,
|
||||||
|
rfc3339NoTimezone,
|
||||||
|
rfc3339NoTimezoneNoT,
|
||||||
|
rfc3339NoTime,
|
||||||
|
}
|
||||||
|
|
||||||
|
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
|
||||||
|
return flexible.New(layouts), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||||
|
}
|
@ -1,40 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package html_char_filter
|
|
||||||
|
|
||||||
import (
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/analysis/datetime_parsers/flexible_go"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const Name = "dateTimeOptional"
|
|
||||||
|
|
||||||
const rfc3339NoTimezone = "2006-01-02T15:04:05"
|
|
||||||
const rfc3339NoTimezoneNoT = "2006-01-02 15:04:05"
|
|
||||||
const rfc3339NoTime = "2006-01-02"
|
|
||||||
|
|
||||||
var layouts = []string{
|
|
||||||
time.RFC3339Nano,
|
|
||||||
time.RFC3339,
|
|
||||||
rfc3339NoTimezone,
|
|
||||||
rfc3339NoTimezoneNoT,
|
|
||||||
rfc3339NoTime,
|
|
||||||
}
|
|
||||||
|
|
||||||
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
|
|
||||||
return flexible_go.NewFlexibleGoDateTimeParser(layouts), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
|
||||||
}
|
|
59
vendor/github.com/blevesearch/bleve/analysis/datetime_parsers/flexible_go/flexible_go.go
generated
vendored
59
vendor/github.com/blevesearch/bleve/analysis/datetime_parsers/flexible_go/flexible_go.go
generated
vendored
@ -1,59 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package flexible_go
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const Name = "flexiblego"
|
|
||||||
|
|
||||||
type FlexibleGoDateTimeParser struct {
|
|
||||||
layouts []string
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewFlexibleGoDateTimeParser(layouts []string) *FlexibleGoDateTimeParser {
|
|
||||||
return &FlexibleGoDateTimeParser{
|
|
||||||
layouts: layouts,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *FlexibleGoDateTimeParser) ParseDateTime(input string) (time.Time, error) {
|
|
||||||
for _, layout := range p.layouts {
|
|
||||||
rv, err := time.Parse(layout, input)
|
|
||||||
if err == nil {
|
|
||||||
return rv, nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return time.Time{}, analysis.ErrInvalidDateTime
|
|
||||||
}
|
|
||||||
|
|
||||||
func FlexibleGoDateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
|
|
||||||
layouts, ok := config["layouts"].([]interface{})
|
|
||||||
if !ok {
|
|
||||||
return nil, fmt.Errorf("must specify layouts")
|
|
||||||
}
|
|
||||||
layoutStrs := make([]string, 0)
|
|
||||||
for _, layout := range layouts {
|
|
||||||
layoutStr, ok := layout.(string)
|
|
||||||
if ok {
|
|
||||||
layoutStrs = append(layoutStrs, layoutStr)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return NewFlexibleGoDateTimeParser(layoutStrs), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterDateTimeParser(Name, FlexibleGoDateTimeParserConstructor)
|
|
||||||
}
|
|
@ -1,84 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package flexible_go
|
|
||||||
|
|
||||||
import (
|
|
||||||
"reflect"
|
|
||||||
"testing"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestFlexibleDateTimeParser(t *testing.T) {
|
|
||||||
testLocation := time.FixedZone("", -8*60*60)
|
|
||||||
|
|
||||||
tests := []struct {
|
|
||||||
input string
|
|
||||||
expectedTime time.Time
|
|
||||||
expectedError error
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
input: "2014-08-03",
|
|
||||||
expectedTime: time.Date(2014, 8, 3, 0, 0, 0, 0, time.UTC),
|
|
||||||
expectedError: nil,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: "2014-08-03T15:59:30",
|
|
||||||
expectedTime: time.Date(2014, 8, 3, 15, 59, 30, 0, time.UTC),
|
|
||||||
expectedError: nil,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: "2014-08-03 15:59:30",
|
|
||||||
expectedTime: time.Date(2014, 8, 3, 15, 59, 30, 0, time.UTC),
|
|
||||||
expectedError: nil,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: "2014-08-03T15:59:30-08:00",
|
|
||||||
expectedTime: time.Date(2014, 8, 3, 15, 59, 30, 0, testLocation),
|
|
||||||
expectedError: nil,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: "2014-08-03T15:59:30.999999999-08:00",
|
|
||||||
expectedTime: time.Date(2014, 8, 3, 15, 59, 30, 999999999, testLocation),
|
|
||||||
expectedError: nil,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: "not a date time",
|
|
||||||
expectedTime: time.Time{},
|
|
||||||
expectedError: analysis.ErrInvalidDateTime,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
rfc3339NoTimezone := "2006-01-02T15:04:05"
|
|
||||||
rfc3339NoTimezoneNoT := "2006-01-02 15:04:05"
|
|
||||||
rfc3339NoTime := "2006-01-02"
|
|
||||||
|
|
||||||
dateOptionalTimeParser := NewFlexibleGoDateTimeParser(
|
|
||||||
[]string{
|
|
||||||
time.RFC3339Nano,
|
|
||||||
time.RFC3339,
|
|
||||||
rfc3339NoTimezone,
|
|
||||||
rfc3339NoTimezoneNoT,
|
|
||||||
rfc3339NoTime,
|
|
||||||
})
|
|
||||||
|
|
||||||
for _, test := range tests {
|
|
||||||
actualTime, actualErr := dateOptionalTimeParser.ParseDateTime(test.input)
|
|
||||||
if actualErr != test.expectedError {
|
|
||||||
t.Errorf("expected error %#v, got %#v", test.expectedError, actualErr)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if !reflect.DeepEqual(actualTime, test.expectedTime) {
|
|
||||||
t.Errorf("expected time %#v, got %#v", test.expectedTime, actualTime)
|
|
||||||
t.Errorf("expected location %#v,\n got %#v", test.expectedTime.Location(), actualTime.Location())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
135
vendor/github.com/blevesearch/bleve/analysis/freq.go
generated
vendored
135
vendor/github.com/blevesearch/bleve/analysis/freq.go
generated
vendored
@ -1,88 +1,111 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
//
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
// you may not use this file except in compliance with the License.
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
// You may obtain a copy of the License at
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
//
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
// and limitations under the License.
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
package analysis
|
package analysis
|
||||||
|
|
||||||
|
// TokenLocation represents one occurrence of a term at a particular location in
|
||||||
|
// a field. Start, End and Position have the same meaning as in analysis.Token.
|
||||||
|
// Field and ArrayPositions identify the field value in the source document.
|
||||||
|
// See document.Field for details.
|
||||||
type TokenLocation struct {
|
type TokenLocation struct {
|
||||||
Field string
|
Field string
|
||||||
Start int
|
ArrayPositions []uint64
|
||||||
End int
|
Start int
|
||||||
Position int
|
End int
|
||||||
|
Position int
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TokenFreq represents all the occurrences of a term in all fields of a
|
||||||
|
// document.
|
||||||
type TokenFreq struct {
|
type TokenFreq struct {
|
||||||
Term []byte
|
Term []byte
|
||||||
Locations []*TokenLocation
|
Locations []*TokenLocation
|
||||||
|
frequency int
|
||||||
}
|
}
|
||||||
|
|
||||||
type TokenFrequencies []*TokenFreq
|
func (tf *TokenFreq) Frequency() int {
|
||||||
|
return tf.frequency
|
||||||
|
}
|
||||||
|
|
||||||
func (tfs TokenFrequencies) MergeAll(remoteField string, other TokenFrequencies) TokenFrequencies {
|
// TokenFrequencies maps document terms to their combined frequencies from all
|
||||||
// put existing tokens into a map
|
// fields.
|
||||||
index := make(map[string]*TokenFreq)
|
type TokenFrequencies map[string]*TokenFreq
|
||||||
for _, tf := range tfs {
|
|
||||||
index[string(tf.Term)] = tf
|
func (tfs TokenFrequencies) MergeAll(remoteField string, other TokenFrequencies) {
|
||||||
}
|
|
||||||
// walk the new token frequencies
|
// walk the new token frequencies
|
||||||
for _, tf := range other {
|
for tfk, tf := range other {
|
||||||
// set the remoteField value in incoming token freqs
|
// set the remoteField value in incoming token freqs
|
||||||
for _, l := range tf.Locations {
|
for _, l := range tf.Locations {
|
||||||
l.Field = remoteField
|
l.Field = remoteField
|
||||||
}
|
}
|
||||||
existingTf, exists := index[string(tf.Term)]
|
existingTf, exists := tfs[tfk]
|
||||||
if exists {
|
if exists {
|
||||||
existingTf.Locations = append(existingTf.Locations, tf.Locations...)
|
existingTf.Locations = append(existingTf.Locations, tf.Locations...)
|
||||||
|
existingTf.frequency = existingTf.frequency + tf.frequency
|
||||||
} else {
|
} else {
|
||||||
index[string(tf.Term)] = tf
|
tfs[tfk] = &TokenFreq{
|
||||||
|
Term: tf.Term,
|
||||||
|
frequency: tf.frequency,
|
||||||
|
Locations: make([]*TokenLocation, len(tf.Locations)),
|
||||||
|
}
|
||||||
|
copy(tfs[tfk].Locations, tf.Locations)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// flatten map back to array
|
|
||||||
rv := make(TokenFrequencies, len(index))
|
|
||||||
i := 0
|
|
||||||
for _, tf := range index {
|
|
||||||
rv[i] = tf
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
return rv
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TokenFrequency(tokens TokenStream) TokenFrequencies {
|
func TokenFrequency(tokens TokenStream, arrayPositions []uint64, includeTermVectors bool) TokenFrequencies {
|
||||||
index := make(map[string]*TokenFreq)
|
rv := make(map[string]*TokenFreq, len(tokens))
|
||||||
|
|
||||||
for _, token := range tokens {
|
if includeTermVectors {
|
||||||
curr, ok := index[string(token.Term)]
|
tls := make([]TokenLocation, len(tokens))
|
||||||
if ok {
|
tlNext := 0
|
||||||
curr.Locations = append(curr.Locations, &TokenLocation{
|
|
||||||
Start: token.Start,
|
for _, token := range tokens {
|
||||||
End: token.End,
|
tls[tlNext] = TokenLocation{
|
||||||
Position: token.Position,
|
ArrayPositions: arrayPositions,
|
||||||
})
|
Start: token.Start,
|
||||||
} else {
|
End: token.End,
|
||||||
index[string(token.Term)] = &TokenFreq{
|
Position: token.Position,
|
||||||
Term: token.Term,
|
}
|
||||||
Locations: []*TokenLocation{
|
|
||||||
&TokenLocation{
|
curr, ok := rv[string(token.Term)]
|
||||||
Start: token.Start,
|
if ok {
|
||||||
End: token.End,
|
curr.Locations = append(curr.Locations, &tls[tlNext])
|
||||||
Position: token.Position,
|
curr.frequency++
|
||||||
},
|
} else {
|
||||||
},
|
rv[string(token.Term)] = &TokenFreq{
|
||||||
|
Term: token.Term,
|
||||||
|
Locations: []*TokenLocation{&tls[tlNext]},
|
||||||
|
frequency: 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tlNext++
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for _, token := range tokens {
|
||||||
|
curr, exists := rv[string(token.Term)]
|
||||||
|
if exists {
|
||||||
|
curr.frequency++
|
||||||
|
} else {
|
||||||
|
rv[string(token.Term)] = &TokenFreq{
|
||||||
|
Term: token.Term,
|
||||||
|
frequency: 1,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
rv := make(TokenFrequencies, len(index))
|
|
||||||
i := 0
|
|
||||||
for _, tf := range index {
|
|
||||||
rv[i] = tf
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
|
|
||||||
return rv
|
return rv
|
||||||
}
|
}
|
||||||
|
167
vendor/github.com/blevesearch/bleve/analysis/freq_test.go
generated
vendored
167
vendor/github.com/blevesearch/bleve/analysis/freq_test.go
generated
vendored
@ -1,167 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package analysis
|
|
||||||
|
|
||||||
import (
|
|
||||||
"reflect"
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestTokenFrequency(t *testing.T) {
|
|
||||||
tokens := TokenStream{
|
|
||||||
&Token{
|
|
||||||
Term: []byte("water"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 5,
|
|
||||||
},
|
|
||||||
&Token{
|
|
||||||
Term: []byte("water"),
|
|
||||||
Position: 2,
|
|
||||||
Start: 6,
|
|
||||||
End: 11,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
expectedResult := TokenFrequencies{
|
|
||||||
&TokenFreq{
|
|
||||||
Term: []byte("water"),
|
|
||||||
Locations: []*TokenLocation{
|
|
||||||
&TokenLocation{
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 5,
|
|
||||||
},
|
|
||||||
&TokenLocation{
|
|
||||||
Position: 2,
|
|
||||||
Start: 6,
|
|
||||||
End: 11,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
result := TokenFrequency(tokens)
|
|
||||||
if !reflect.DeepEqual(result, expectedResult) {
|
|
||||||
t.Errorf("expected %#v, got %#v", expectedResult, result)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestTokenFrequenciesMergeAll(t *testing.T) {
|
|
||||||
tf1 := TokenFrequencies{
|
|
||||||
&TokenFreq{
|
|
||||||
Term: []byte("water"),
|
|
||||||
Locations: []*TokenLocation{
|
|
||||||
&TokenLocation{
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 5,
|
|
||||||
},
|
|
||||||
&TokenLocation{
|
|
||||||
Position: 2,
|
|
||||||
Start: 6,
|
|
||||||
End: 11,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
tf2 := TokenFrequencies{
|
|
||||||
&TokenFreq{
|
|
||||||
Term: []byte("water"),
|
|
||||||
Locations: []*TokenLocation{
|
|
||||||
&TokenLocation{
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 5,
|
|
||||||
},
|
|
||||||
&TokenLocation{
|
|
||||||
Position: 2,
|
|
||||||
Start: 6,
|
|
||||||
End: 11,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
expectedResult := TokenFrequencies{
|
|
||||||
&TokenFreq{
|
|
||||||
Term: []byte("water"),
|
|
||||||
Locations: []*TokenLocation{
|
|
||||||
&TokenLocation{
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 5,
|
|
||||||
},
|
|
||||||
&TokenLocation{
|
|
||||||
Position: 2,
|
|
||||||
Start: 6,
|
|
||||||
End: 11,
|
|
||||||
},
|
|
||||||
&TokenLocation{
|
|
||||||
Field: "tf2",
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 5,
|
|
||||||
},
|
|
||||||
&TokenLocation{
|
|
||||||
Field: "tf2",
|
|
||||||
Position: 2,
|
|
||||||
Start: 6,
|
|
||||||
End: 11,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
tf1.MergeAll("tf2", tf2)
|
|
||||||
if !reflect.DeepEqual(tf1, expectedResult) {
|
|
||||||
t.Errorf("expected %#v, got %#v", expectedResult, tf1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestTokenFrequenciesMergeAllLeftEmpty(t *testing.T) {
|
|
||||||
tf1 := TokenFrequencies{}
|
|
||||||
tf2 := TokenFrequencies{
|
|
||||||
&TokenFreq{
|
|
||||||
Term: []byte("water"),
|
|
||||||
Locations: []*TokenLocation{
|
|
||||||
&TokenLocation{
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 5,
|
|
||||||
},
|
|
||||||
&TokenLocation{
|
|
||||||
Position: 2,
|
|
||||||
Start: 6,
|
|
||||||
End: 11,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
expectedResult := TokenFrequencies{
|
|
||||||
&TokenFreq{
|
|
||||||
Term: []byte("water"),
|
|
||||||
Locations: []*TokenLocation{
|
|
||||||
&TokenLocation{
|
|
||||||
Field: "tf2",
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 5,
|
|
||||||
},
|
|
||||||
&TokenLocation{
|
|
||||||
Field: "tf2",
|
|
||||||
Position: 2,
|
|
||||||
Start: 6,
|
|
||||||
End: 11,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
result := tf1.MergeAll("tf2", tf2)
|
|
||||||
if !reflect.DeepEqual(result, expectedResult) {
|
|
||||||
t.Errorf("expected %#v, got %#v", expectedResult, result)
|
|
||||||
}
|
|
||||||
}
|
|
70
vendor/github.com/blevesearch/bleve/analysis/lang/en/analyzer_en.go
generated
vendored
Normal file
70
vendor/github.com/blevesearch/bleve/analysis/lang/en/analyzer_en.go
generated
vendored
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// Package en implements an analyzer with reasonable defaults for processing
|
||||||
|
// English text.
|
||||||
|
//
|
||||||
|
// It strips possessive suffixes ('s), transforms tokens to lower case,
|
||||||
|
// removes stopwords from a built-in list, and applies porter stemming.
|
||||||
|
//
|
||||||
|
// The built-in stopwords list is defined in EnglishStopWords.
|
||||||
|
package en
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/blevesearch/bleve/analysis"
|
||||||
|
"github.com/blevesearch/bleve/registry"
|
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/analysis/token/lowercase"
|
||||||
|
"github.com/blevesearch/bleve/analysis/token/porter"
|
||||||
|
"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
|
||||||
|
)
|
||||||
|
|
||||||
|
const AnalyzerName = "en"
|
||||||
|
|
||||||
|
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||||
|
tokenizer, err := cache.TokenizerNamed(unicode.Name)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
possEnFilter, err := cache.TokenFilterNamed(PossessiveName)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
stopEnFilter, err := cache.TokenFilterNamed(StopName)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
stemmerEnFilter, err := cache.TokenFilterNamed(porter.Name)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
rv := analysis.Analyzer{
|
||||||
|
Tokenizer: tokenizer,
|
||||||
|
TokenFilters: []analysis.TokenFilter{
|
||||||
|
possEnFilter,
|
||||||
|
toLowerFilter,
|
||||||
|
stopEnFilter,
|
||||||
|
stemmerEnFilter,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
return &rv, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
||||||
|
}
|
67
vendor/github.com/blevesearch/bleve/analysis/lang/en/possessive_filter_en.go
generated
vendored
Normal file
67
vendor/github.com/blevesearch/bleve/analysis/lang/en/possessive_filter_en.go
generated
vendored
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package en
|
||||||
|
|
||||||
|
import (
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/analysis"
|
||||||
|
"github.com/blevesearch/bleve/registry"
|
||||||
|
)
|
||||||
|
|
||||||
|
// PossessiveName is the name PossessiveFilter is registered as
|
||||||
|
// in the bleve registry.
|
||||||
|
const PossessiveName = "possessive_en"
|
||||||
|
|
||||||
|
const rightSingleQuotationMark = '’'
|
||||||
|
const apostrophe = '\''
|
||||||
|
const fullWidthApostrophe = '''
|
||||||
|
|
||||||
|
const apostropheChars = rightSingleQuotationMark + apostrophe + fullWidthApostrophe
|
||||||
|
|
||||||
|
// PossessiveFilter implements a TokenFilter which
|
||||||
|
// strips the English possessive suffix ('s) from tokens.
|
||||||
|
// It handle a variety of apostrophe types, is case-insensitive
|
||||||
|
// and doesn't distinguish between possessive and contraction.
|
||||||
|
// (ie "She's So Rad" becomes "She So Rad")
|
||||||
|
type PossessiveFilter struct {
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewPossessiveFilter() *PossessiveFilter {
|
||||||
|
return &PossessiveFilter{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *PossessiveFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||||
|
for _, token := range input {
|
||||||
|
lastRune, lastRuneSize := utf8.DecodeLastRune(token.Term)
|
||||||
|
if lastRune == 's' || lastRune == 'S' {
|
||||||
|
nextLastRune, nextLastRuneSize := utf8.DecodeLastRune(token.Term[:len(token.Term)-lastRuneSize])
|
||||||
|
if nextLastRune == rightSingleQuotationMark ||
|
||||||
|
nextLastRune == apostrophe ||
|
||||||
|
nextLastRune == fullWidthApostrophe {
|
||||||
|
token.Term = token.Term[:len(token.Term)-lastRuneSize-nextLastRuneSize]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return input
|
||||||
|
}
|
||||||
|
|
||||||
|
func PossessiveFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||||
|
return NewPossessiveFilter(), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
registry.RegisterTokenFilter(PossessiveName, PossessiveFilterConstructor)
|
||||||
|
}
|
33
vendor/github.com/blevesearch/bleve/analysis/lang/en/stop_filter_en.go
generated
vendored
Normal file
33
vendor/github.com/blevesearch/bleve/analysis/lang/en/stop_filter_en.go
generated
vendored
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package en
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/blevesearch/bleve/analysis"
|
||||||
|
"github.com/blevesearch/bleve/analysis/token/stop"
|
||||||
|
"github.com/blevesearch/bleve/registry"
|
||||||
|
)
|
||||||
|
|
||||||
|
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||||
|
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return stop.NewStopTokensFilter(tokenMap), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||||
|
}
|
@ -7,10 +7,11 @@ import (
|
|||||||
|
|
||||||
const StopName = "stop_en"
|
const StopName = "stop_en"
|
||||||
|
|
||||||
|
// EnglishStopWords is the built-in list of stopwords used by the "stop_en" TokenFilter.
|
||||||
|
//
|
||||||
// this content was obtained from:
|
// this content was obtained from:
|
||||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
|
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
|
||||||
// ` was changed to ' to allow for literal string
|
// ` was changed to ' to allow for literal string
|
||||||
|
|
||||||
var EnglishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/english/stop.txt
|
var EnglishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/english/stop.txt
|
||||||
| This file is distributed under the BSD License.
|
| This file is distributed under the BSD License.
|
||||||
| See http://snowball.tartarus.org/license.php
|
| See http://snowball.tartarus.org/license.php
|
60
vendor/github.com/blevesearch/bleve/analysis/language/ar/analyzer_ar.go
generated
vendored
60
vendor/github.com/blevesearch/bleve/analysis/language/ar/analyzer_ar.go
generated
vendored
@ -1,60 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package ar
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter"
|
|
||||||
"github.com/blevesearch/bleve/analysis/token_filters/unicode_normalize"
|
|
||||||
"github.com/blevesearch/bleve/analysis/tokenizers/unicode"
|
|
||||||
)
|
|
||||||
|
|
||||||
const AnalyzerName = "ar"
|
|
||||||
|
|
||||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
|
||||||
tokenizer, err := cache.TokenizerNamed(unicode.Name)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
normalizeFilter := unicode_normalize.MustNewUnicodeNormalizeFilter(unicode_normalize.NFKC)
|
|
||||||
stopArFilter, err := cache.TokenFilterNamed(StopName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
normalizeArFilter, err := cache.TokenFilterNamed(NormalizeName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
stemmerArFilter, err := cache.TokenFilterNamed(StemmerName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
rv := analysis.Analyzer{
|
|
||||||
Tokenizer: tokenizer,
|
|
||||||
TokenFilters: []analysis.TokenFilter{
|
|
||||||
toLowerFilter,
|
|
||||||
normalizeFilter,
|
|
||||||
stopArFilter,
|
|
||||||
normalizeArFilter,
|
|
||||||
stemmerArFilter,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
return &rv, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
|
||||||
}
|
|
179
vendor/github.com/blevesearch/bleve/analysis/language/ar/analyzer_ar_test.go
generated
vendored
179
vendor/github.com/blevesearch/bleve/analysis/language/ar/analyzer_ar_test.go
generated
vendored
@ -1,179 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package ar
|
|
||||||
|
|
||||||
import (
|
|
||||||
"reflect"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestArabicAnalyzer(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
input []byte
|
|
||||||
output analysis.TokenStream
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
input: []byte("كبير"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("كبير"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 8,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// feminine marker
|
|
||||||
{
|
|
||||||
input: []byte("كبيرة"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("كبير"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 10,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("مشروب"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("مشروب"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 10,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// plural -at
|
|
||||||
{
|
|
||||||
input: []byte("مشروبات"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("مشروب"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 14,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// plural -in
|
|
||||||
{
|
|
||||||
input: []byte("أمريكيين"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("امريك"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 16,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// singular with bare alif
|
|
||||||
{
|
|
||||||
input: []byte("امريكي"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("امريك"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 12,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("كتاب"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("كتاب"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 8,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// definite article
|
|
||||||
{
|
|
||||||
input: []byte("الكتاب"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("كتاب"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 12,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("ما ملكت أيمانكم"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ملكت"),
|
|
||||||
Position: 2,
|
|
||||||
Start: 5,
|
|
||||||
End: 13,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ايمانكم"),
|
|
||||||
Position: 3,
|
|
||||||
Start: 14,
|
|
||||||
End: 28,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// stopwords
|
|
||||||
{
|
|
||||||
input: []byte("الذين ملكت أيمانكم"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ملكت"),
|
|
||||||
Position: 2,
|
|
||||||
Start: 11,
|
|
||||||
End: 19,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ايمانكم"),
|
|
||||||
Position: 3,
|
|
||||||
Start: 20,
|
|
||||||
End: 34,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// presentation form normalization
|
|
||||||
{
|
|
||||||
input: []byte("ﺍﻟﺴﻼﻢ"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("سلام"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 15,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
cache := registry.NewCache()
|
|
||||||
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
for _, test := range tests {
|
|
||||||
actual := analyzer.Analyze(test.input)
|
|
||||||
if !reflect.DeepEqual(actual, test.output) {
|
|
||||||
t.Errorf("expected %v, got %v", test.output, actual)
|
|
||||||
t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
80
vendor/github.com/blevesearch/bleve/analysis/language/ar/arabic_normalize.go
generated
vendored
80
vendor/github.com/blevesearch/bleve/analysis/language/ar/arabic_normalize.go
generated
vendored
@ -1,80 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package ar
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const NormalizeName = "normalize_ar"
|
|
||||||
|
|
||||||
const (
|
|
||||||
Alef = '\u0627'
|
|
||||||
AlefMadda = '\u0622'
|
|
||||||
AlefHamzaAbove = '\u0623'
|
|
||||||
AlefHamzaBelow = '\u0625'
|
|
||||||
Yeh = '\u064A'
|
|
||||||
DotlessYeh = '\u0649'
|
|
||||||
TehMarbuta = '\u0629'
|
|
||||||
Heh = '\u0647'
|
|
||||||
Tatweel = '\u0640'
|
|
||||||
Fathatan = '\u064B'
|
|
||||||
Dammatan = '\u064C'
|
|
||||||
Kasratan = '\u064D'
|
|
||||||
Fatha = '\u064E'
|
|
||||||
Damma = '\u064F'
|
|
||||||
Kasra = '\u0650'
|
|
||||||
Shadda = '\u0651'
|
|
||||||
Sukun = '\u0652'
|
|
||||||
)
|
|
||||||
|
|
||||||
type ArabicNormalizeFilter struct {
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewArabicNormalizeFilter() *ArabicNormalizeFilter {
|
|
||||||
return &ArabicNormalizeFilter{}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *ArabicNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
|
||||||
for _, token := range input {
|
|
||||||
term := normalize(token.Term)
|
|
||||||
token.Term = term
|
|
||||||
}
|
|
||||||
return input
|
|
||||||
}
|
|
||||||
|
|
||||||
func normalize(input []byte) []byte {
|
|
||||||
runes := bytes.Runes(input)
|
|
||||||
for i := 0; i < len(runes); i++ {
|
|
||||||
switch runes[i] {
|
|
||||||
case AlefMadda, AlefHamzaAbove, AlefHamzaBelow:
|
|
||||||
runes[i] = Alef
|
|
||||||
case DotlessYeh:
|
|
||||||
runes[i] = Yeh
|
|
||||||
case TehMarbuta:
|
|
||||||
runes[i] = Heh
|
|
||||||
case Tatweel, Kasratan, Dammatan, Fathatan, Fatha, Damma, Kasra, Shadda, Sukun:
|
|
||||||
runes = analysis.DeleteRune(runes, i)
|
|
||||||
i--
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return analysis.BuildTermFromRunes(runes)
|
|
||||||
}
|
|
||||||
|
|
||||||
func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
|
||||||
return NewArabicNormalizeFilter(), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
|
|
||||||
}
|
|
229
vendor/github.com/blevesearch/bleve/analysis/language/ar/arabic_normalize_test.go
generated
vendored
229
vendor/github.com/blevesearch/bleve/analysis/language/ar/arabic_normalize_test.go
generated
vendored
@ -1,229 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package ar
|
|
||||||
|
|
||||||
import (
|
|
||||||
"reflect"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestArabicNormalizeFilter(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
input analysis.TokenStream
|
|
||||||
output analysis.TokenStream
|
|
||||||
}{
|
|
||||||
// AlifMadda
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("آجن"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("اجن"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// AlifHamzaAbove
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("أحمد"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("احمد"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// AlifHamzaBelow
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("إعاذ"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("اعاذ"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// AlifMaksura
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("بنى"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("بني"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// TehMarbuta
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("فاطمة"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("فاطمه"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// Tatweel
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("روبرـــــت"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("روبرت"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// Fatha
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("مَبنا"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("مبنا"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// Kasra
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("علِي"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("علي"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// Damma
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("بُوات"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("بوات"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// Fathatan
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ولداً"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ولدا"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// Kasratan
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ولدٍ"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ولد"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// Dammatan
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ولدٌ"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ولد"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// Sukun
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("نلْسون"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("نلسون"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// Shaddah
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("هتميّ"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("هتمي"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// empty
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte(""),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte(""),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
arabicNormalizeFilter := NewArabicNormalizeFilter()
|
|
||||||
for _, test := range tests {
|
|
||||||
actual := arabicNormalizeFilter.Filter(test.input)
|
|
||||||
if !reflect.DeepEqual(actual, test.output) {
|
|
||||||
t.Errorf("expected %#v, got %#v", test.output, actual)
|
|
||||||
t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
113
vendor/github.com/blevesearch/bleve/analysis/language/ar/stemmer_ar.go
generated
vendored
113
vendor/github.com/blevesearch/bleve/analysis/language/ar/stemmer_ar.go
generated
vendored
@ -1,113 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package ar
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const StemmerName = "stemmer_ar"
|
|
||||||
|
|
||||||
// These were obtained from org.apache.lucene.analysis.ar.ArabicStemmer
|
|
||||||
var prefixes = [][]rune{
|
|
||||||
[]rune("ال"),
|
|
||||||
[]rune("وال"),
|
|
||||||
[]rune("بال"),
|
|
||||||
[]rune("كال"),
|
|
||||||
[]rune("فال"),
|
|
||||||
[]rune("لل"),
|
|
||||||
[]rune("و"),
|
|
||||||
}
|
|
||||||
var suffixes = [][]rune{
|
|
||||||
[]rune("ها"),
|
|
||||||
[]rune("ان"),
|
|
||||||
[]rune("ات"),
|
|
||||||
[]rune("ون"),
|
|
||||||
[]rune("ين"),
|
|
||||||
[]rune("يه"),
|
|
||||||
[]rune("ية"),
|
|
||||||
[]rune("ه"),
|
|
||||||
[]rune("ة"),
|
|
||||||
[]rune("ي"),
|
|
||||||
}
|
|
||||||
|
|
||||||
type ArabicStemmerFilter struct{}
|
|
||||||
|
|
||||||
func NewArabicStemmerFilter() *ArabicStemmerFilter {
|
|
||||||
return &ArabicStemmerFilter{}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *ArabicStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
|
||||||
for _, token := range input {
|
|
||||||
term := stem(token.Term)
|
|
||||||
token.Term = term
|
|
||||||
}
|
|
||||||
return input
|
|
||||||
}
|
|
||||||
|
|
||||||
func canStemPrefix(input, prefix []rune) bool {
|
|
||||||
// Wa- prefix requires at least 3 characters.
|
|
||||||
if len(prefix) == 1 && len(input) < 4 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
// Other prefixes require only 2.
|
|
||||||
if len(input)-len(prefix) < 2 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
for i := range prefix {
|
|
||||||
if prefix[i] != input[i] {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
func canStemSuffix(input, suffix []rune) bool {
|
|
||||||
// All suffixes require at least 2 characters after stemming.
|
|
||||||
if len(input)-len(suffix) < 2 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
stemEnd := len(input) - len(suffix)
|
|
||||||
for i := range suffix {
|
|
||||||
if suffix[i] != input[stemEnd+i] {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
func stem(input []byte) []byte {
|
|
||||||
runes := bytes.Runes(input)
|
|
||||||
// Strip a single prefix.
|
|
||||||
for _, p := range prefixes {
|
|
||||||
if canStemPrefix(runes, p) {
|
|
||||||
runes = runes[len(p):]
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Strip off multiple suffixes, in their order in the suffixes array.
|
|
||||||
for _, s := range suffixes {
|
|
||||||
if canStemSuffix(runes, s) {
|
|
||||||
runes = runes[:len(runes)-len(s)]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return analysis.BuildTermFromRunes(runes)
|
|
||||||
}
|
|
||||||
|
|
||||||
func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
|
||||||
return NewArabicStemmerFilter(), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor)
|
|
||||||
}
|
|
392
vendor/github.com/blevesearch/bleve/analysis/language/ar/stemmer_ar_test.go
generated
vendored
392
vendor/github.com/blevesearch/bleve/analysis/language/ar/stemmer_ar_test.go
generated
vendored
@ -1,392 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package ar
|
|
||||||
|
|
||||||
import (
|
|
||||||
"reflect"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestArabicStemmerFilter(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
input analysis.TokenStream
|
|
||||||
output analysis.TokenStream
|
|
||||||
}{
|
|
||||||
// AlPrefix
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("الحسن"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("حسن"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// WalPrefix
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("والحسن"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("حسن"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// BalPrefix
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("بالحسن"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("حسن"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// KalPrefix
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("كالحسن"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("حسن"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// FalPrefix
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("فالحسن"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("حسن"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// LlPrefix
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("للاخر"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("اخر"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// WaPrefix
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("وحسن"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("حسن"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// AhSuffix
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("زوجها"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("زوج"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// AnSuffix
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ساهدان"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ساهد"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// AtSuffix
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ساهدات"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ساهد"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// WnSuffix
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ساهدون"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ساهد"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// YnSuffix
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ساهدين"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ساهد"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// YhSuffix
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ساهديه"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ساهد"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// YpSuffix
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ساهدية"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ساهد"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// HSuffix
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ساهده"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ساهد"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// PSuffix
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ساهدة"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ساهد"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// YSuffix
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ساهدي"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ساهد"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// ComboPrefSuf
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("وساهدون"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ساهد"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// ComboSuf
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ساهدهات"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ساهد"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// ShouldntStem
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("الو"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("الو"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// NonArabic
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("English"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("English"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("سلام"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("سلام"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("السلام"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("سلام"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("سلامة"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("سلام"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("السلامة"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("سلام"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("الوصل"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("وصل"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("والصل"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("صل"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// Empty
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte(""),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte(""),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
arabicStemmerFilter := NewArabicStemmerFilter()
|
|
||||||
for _, test := range tests {
|
|
||||||
actual := arabicStemmerFilter.Filter(test.input)
|
|
||||||
if !reflect.DeepEqual(actual, test.output) {
|
|
||||||
t.Errorf("expected %#v, got %#v", test.output, actual)
|
|
||||||
t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
28
vendor/github.com/blevesearch/bleve/analysis/language/ar/stop_filter_ar.go
generated
vendored
28
vendor/github.com/blevesearch/bleve/analysis/language/ar/stop_filter_ar.go
generated
vendored
@ -1,28 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package ar
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
|
||||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
|
||||||
}
|
|
149
vendor/github.com/blevesearch/bleve/analysis/language/ar/stop_words_ar.go
generated
vendored
149
vendor/github.com/blevesearch/bleve/analysis/language/ar/stop_words_ar.go
generated
vendored
@ -1,149 +0,0 @@
|
|||||||
package ar
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const StopName = "stop_ar"
|
|
||||||
|
|
||||||
// this content was obtained from:
|
|
||||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis
|
|
||||||
// ` was changed to ' to allow for literal string
|
|
||||||
|
|
||||||
var ArabicStopWords = []byte(`# This file was created by Jacques Savoy and is distributed under the BSD license.
|
|
||||||
# See http://members.unine.ch/jacques.savoy/clef/index.html.
|
|
||||||
# Also see http://www.opensource.org/licenses/bsd-license.html
|
|
||||||
# Cleaned on October 11, 2009 (not normalized, so use before normalization)
|
|
||||||
# This means that when modifying this list, you might need to add some
|
|
||||||
# redundant entries, for example containing forms with both أ and ا
|
|
||||||
من
|
|
||||||
ومن
|
|
||||||
منها
|
|
||||||
منه
|
|
||||||
في
|
|
||||||
وفي
|
|
||||||
فيها
|
|
||||||
فيه
|
|
||||||
و
|
|
||||||
ف
|
|
||||||
ثم
|
|
||||||
او
|
|
||||||
أو
|
|
||||||
ب
|
|
||||||
بها
|
|
||||||
به
|
|
||||||
ا
|
|
||||||
أ
|
|
||||||
اى
|
|
||||||
اي
|
|
||||||
أي
|
|
||||||
أى
|
|
||||||
لا
|
|
||||||
ولا
|
|
||||||
الا
|
|
||||||
ألا
|
|
||||||
إلا
|
|
||||||
لكن
|
|
||||||
ما
|
|
||||||
وما
|
|
||||||
كما
|
|
||||||
فما
|
|
||||||
عن
|
|
||||||
مع
|
|
||||||
اذا
|
|
||||||
إذا
|
|
||||||
ان
|
|
||||||
أن
|
|
||||||
إن
|
|
||||||
انها
|
|
||||||
أنها
|
|
||||||
إنها
|
|
||||||
انه
|
|
||||||
أنه
|
|
||||||
إنه
|
|
||||||
بان
|
|
||||||
بأن
|
|
||||||
فان
|
|
||||||
فأن
|
|
||||||
وان
|
|
||||||
وأن
|
|
||||||
وإن
|
|
||||||
التى
|
|
||||||
التي
|
|
||||||
الذى
|
|
||||||
الذي
|
|
||||||
الذين
|
|
||||||
الى
|
|
||||||
الي
|
|
||||||
إلى
|
|
||||||
إلي
|
|
||||||
على
|
|
||||||
عليها
|
|
||||||
عليه
|
|
||||||
اما
|
|
||||||
أما
|
|
||||||
إما
|
|
||||||
ايضا
|
|
||||||
أيضا
|
|
||||||
كل
|
|
||||||
وكل
|
|
||||||
لم
|
|
||||||
ولم
|
|
||||||
لن
|
|
||||||
ولن
|
|
||||||
هى
|
|
||||||
هي
|
|
||||||
هو
|
|
||||||
وهى
|
|
||||||
وهي
|
|
||||||
وهو
|
|
||||||
فهى
|
|
||||||
فهي
|
|
||||||
فهو
|
|
||||||
انت
|
|
||||||
أنت
|
|
||||||
لك
|
|
||||||
لها
|
|
||||||
له
|
|
||||||
هذه
|
|
||||||
هذا
|
|
||||||
تلك
|
|
||||||
ذلك
|
|
||||||
هناك
|
|
||||||
كانت
|
|
||||||
كان
|
|
||||||
يكون
|
|
||||||
تكون
|
|
||||||
وكانت
|
|
||||||
وكان
|
|
||||||
غير
|
|
||||||
بعض
|
|
||||||
قد
|
|
||||||
نحو
|
|
||||||
بين
|
|
||||||
بينما
|
|
||||||
منذ
|
|
||||||
ضمن
|
|
||||||
حيث
|
|
||||||
الان
|
|
||||||
الآن
|
|
||||||
خلال
|
|
||||||
بعد
|
|
||||||
قبل
|
|
||||||
حتى
|
|
||||||
عند
|
|
||||||
عندما
|
|
||||||
لدى
|
|
||||||
جميع
|
|
||||||
`)
|
|
||||||
|
|
||||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
|
||||||
rv := analysis.NewTokenMap()
|
|
||||||
err := rv.LoadBytes(ArabicStopWords)
|
|
||||||
return rv, err
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
|
||||||
}
|
|
28
vendor/github.com/blevesearch/bleve/analysis/language/bg/stop_filter_bg.go
generated
vendored
28
vendor/github.com/blevesearch/bleve/analysis/language/bg/stop_filter_bg.go
generated
vendored
@ -1,28 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package bg
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
|
||||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
|
||||||
}
|
|
217
vendor/github.com/blevesearch/bleve/analysis/language/bg/stop_words_bg.go
generated
vendored
217
vendor/github.com/blevesearch/bleve/analysis/language/bg/stop_words_bg.go
generated
vendored
@ -1,217 +0,0 @@
|
|||||||
package bg
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const StopName = "stop_bg"
|
|
||||||
|
|
||||||
// this content was obtained from:
|
|
||||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
|
|
||||||
// ` was changed to ' to allow for literal string
|
|
||||||
|
|
||||||
var BulgarianStopWords = []byte(`# This file was created by Jacques Savoy and is distributed under the BSD license.
|
|
||||||
# See http://members.unine.ch/jacques.savoy/clef/index.html.
|
|
||||||
# Also see http://www.opensource.org/licenses/bsd-license.html
|
|
||||||
а
|
|
||||||
аз
|
|
||||||
ако
|
|
||||||
ала
|
|
||||||
бе
|
|
||||||
без
|
|
||||||
беше
|
|
||||||
би
|
|
||||||
бил
|
|
||||||
била
|
|
||||||
били
|
|
||||||
било
|
|
||||||
близо
|
|
||||||
бъдат
|
|
||||||
бъде
|
|
||||||
бяха
|
|
||||||
в
|
|
||||||
вас
|
|
||||||
ваш
|
|
||||||
ваша
|
|
||||||
вероятно
|
|
||||||
вече
|
|
||||||
взема
|
|
||||||
ви
|
|
||||||
вие
|
|
||||||
винаги
|
|
||||||
все
|
|
||||||
всеки
|
|
||||||
всички
|
|
||||||
всичко
|
|
||||||
всяка
|
|
||||||
във
|
|
||||||
въпреки
|
|
||||||
върху
|
|
||||||
г
|
|
||||||
ги
|
|
||||||
главно
|
|
||||||
го
|
|
||||||
д
|
|
||||||
да
|
|
||||||
дали
|
|
||||||
до
|
|
||||||
докато
|
|
||||||
докога
|
|
||||||
дори
|
|
||||||
досега
|
|
||||||
доста
|
|
||||||
е
|
|
||||||
едва
|
|
||||||
един
|
|
||||||
ето
|
|
||||||
за
|
|
||||||
зад
|
|
||||||
заедно
|
|
||||||
заради
|
|
||||||
засега
|
|
||||||
затова
|
|
||||||
защо
|
|
||||||
защото
|
|
||||||
и
|
|
||||||
из
|
|
||||||
или
|
|
||||||
им
|
|
||||||
има
|
|
||||||
имат
|
|
||||||
иска
|
|
||||||
й
|
|
||||||
каза
|
|
||||||
как
|
|
||||||
каква
|
|
||||||
какво
|
|
||||||
както
|
|
||||||
какъв
|
|
||||||
като
|
|
||||||
кога
|
|
||||||
когато
|
|
||||||
което
|
|
||||||
които
|
|
||||||
кой
|
|
||||||
който
|
|
||||||
колко
|
|
||||||
която
|
|
||||||
къде
|
|
||||||
където
|
|
||||||
към
|
|
||||||
ли
|
|
||||||
м
|
|
||||||
ме
|
|
||||||
между
|
|
||||||
мен
|
|
||||||
ми
|
|
||||||
мнозина
|
|
||||||
мога
|
|
||||||
могат
|
|
||||||
може
|
|
||||||
моля
|
|
||||||
момента
|
|
||||||
му
|
|
||||||
н
|
|
||||||
на
|
|
||||||
над
|
|
||||||
назад
|
|
||||||
най
|
|
||||||
направи
|
|
||||||
напред
|
|
||||||
например
|
|
||||||
нас
|
|
||||||
не
|
|
||||||
него
|
|
||||||
нея
|
|
||||||
ни
|
|
||||||
ние
|
|
||||||
никой
|
|
||||||
нито
|
|
||||||
но
|
|
||||||
някои
|
|
||||||
някой
|
|
||||||
няма
|
|
||||||
обаче
|
|
||||||
около
|
|
||||||
освен
|
|
||||||
особено
|
|
||||||
от
|
|
||||||
отгоре
|
|
||||||
отново
|
|
||||||
още
|
|
||||||
пак
|
|
||||||
по
|
|
||||||
повече
|
|
||||||
повечето
|
|
||||||
под
|
|
||||||
поне
|
|
||||||
поради
|
|
||||||
после
|
|
||||||
почти
|
|
||||||
прави
|
|
||||||
пред
|
|
||||||
преди
|
|
||||||
през
|
|
||||||
при
|
|
||||||
пък
|
|
||||||
първо
|
|
||||||
с
|
|
||||||
са
|
|
||||||
само
|
|
||||||
се
|
|
||||||
сега
|
|
||||||
си
|
|
||||||
скоро
|
|
||||||
след
|
|
||||||
сме
|
|
||||||
според
|
|
||||||
сред
|
|
||||||
срещу
|
|
||||||
сте
|
|
||||||
съм
|
|
||||||
със
|
|
||||||
също
|
|
||||||
т
|
|
||||||
тази
|
|
||||||
така
|
|
||||||
такива
|
|
||||||
такъв
|
|
||||||
там
|
|
||||||
твой
|
|
||||||
те
|
|
||||||
тези
|
|
||||||
ти
|
|
||||||
тн
|
|
||||||
то
|
|
||||||
това
|
|
||||||
тогава
|
|
||||||
този
|
|
||||||
той
|
|
||||||
толкова
|
|
||||||
точно
|
|
||||||
трябва
|
|
||||||
тук
|
|
||||||
тъй
|
|
||||||
тя
|
|
||||||
тях
|
|
||||||
у
|
|
||||||
харесва
|
|
||||||
ч
|
|
||||||
че
|
|
||||||
често
|
|
||||||
чрез
|
|
||||||
ще
|
|
||||||
щом
|
|
||||||
я
|
|
||||||
`)
|
|
||||||
|
|
||||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
|
||||||
rv := analysis.NewTokenMap()
|
|
||||||
err := rv.LoadBytes(BulgarianStopWords)
|
|
||||||
return rv, err
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
|
||||||
}
|
|
30
vendor/github.com/blevesearch/bleve/analysis/language/ca/articles_ca.go
generated
vendored
30
vendor/github.com/blevesearch/bleve/analysis/language/ca/articles_ca.go
generated
vendored
@ -1,30 +0,0 @@
|
|||||||
package ca
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const ArticlesName = "articles_ca"
|
|
||||||
|
|
||||||
// this content was obtained from:
|
|
||||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis
|
|
||||||
|
|
||||||
var CatalanArticles = []byte(`
|
|
||||||
d
|
|
||||||
l
|
|
||||||
m
|
|
||||||
n
|
|
||||||
s
|
|
||||||
t
|
|
||||||
`)
|
|
||||||
|
|
||||||
func ArticlesTokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
|
||||||
rv := analysis.NewTokenMap()
|
|
||||||
err := rv.LoadBytes(CatalanArticles)
|
|
||||||
return rv, err
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterTokenMap(ArticlesName, ArticlesTokenMapConstructor)
|
|
||||||
}
|
|
32
vendor/github.com/blevesearch/bleve/analysis/language/ca/elision_ca.go
generated
vendored
32
vendor/github.com/blevesearch/bleve/analysis/language/ca/elision_ca.go
generated
vendored
@ -1,32 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package ca
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/analysis/token_filters/elision_filter"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const ElisionName = "elision_ca"
|
|
||||||
|
|
||||||
func ElisionFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
|
||||||
articlesTokenMap, err := cache.TokenMapNamed(ArticlesName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("error building elision filter: %v", err)
|
|
||||||
}
|
|
||||||
return elision_filter.NewElisionFilter(articlesTokenMap), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterTokenFilter(ElisionName, ElisionFilterConstructor)
|
|
||||||
}
|
|
56
vendor/github.com/blevesearch/bleve/analysis/language/ca/elision_ca_test.go
generated
vendored
56
vendor/github.com/blevesearch/bleve/analysis/language/ca/elision_ca_test.go
generated
vendored
@ -1,56 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package ca
|
|
||||||
|
|
||||||
import (
|
|
||||||
"reflect"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestFrenchElision(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
input analysis.TokenStream
|
|
||||||
output analysis.TokenStream
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("l'Institut"),
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("d'Estudis"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("Institut"),
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("Estudis"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
cache := registry.NewCache()
|
|
||||||
elisionFilter, err := cache.TokenFilterNamed(ElisionName)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
for _, test := range tests {
|
|
||||||
actual := elisionFilter.Filter(test.input)
|
|
||||||
if !reflect.DeepEqual(actual, test.output) {
|
|
||||||
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
28
vendor/github.com/blevesearch/bleve/analysis/language/ca/stop_filter_ca.go
generated
vendored
28
vendor/github.com/blevesearch/bleve/analysis/language/ca/stop_filter_ca.go
generated
vendored
@ -1,28 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package ca
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
|
||||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
|
||||||
}
|
|
244
vendor/github.com/blevesearch/bleve/analysis/language/ca/stop_words_ca.go
generated
vendored
244
vendor/github.com/blevesearch/bleve/analysis/language/ca/stop_words_ca.go
generated
vendored
@ -1,244 +0,0 @@
|
|||||||
package ca
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const StopName = "stop_ca"
|
|
||||||
|
|
||||||
// this content was obtained from:
|
|
||||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
|
|
||||||
// ` was changed to ' to allow for literal string
|
|
||||||
|
|
||||||
var CatalanStopWords = []byte(`# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
|
|
||||||
a
|
|
||||||
abans
|
|
||||||
ací
|
|
||||||
ah
|
|
||||||
així
|
|
||||||
això
|
|
||||||
al
|
|
||||||
als
|
|
||||||
aleshores
|
|
||||||
algun
|
|
||||||
alguna
|
|
||||||
algunes
|
|
||||||
alguns
|
|
||||||
alhora
|
|
||||||
allà
|
|
||||||
allí
|
|
||||||
allò
|
|
||||||
altra
|
|
||||||
altre
|
|
||||||
altres
|
|
||||||
amb
|
|
||||||
ambdós
|
|
||||||
ambdues
|
|
||||||
apa
|
|
||||||
aquell
|
|
||||||
aquella
|
|
||||||
aquelles
|
|
||||||
aquells
|
|
||||||
aquest
|
|
||||||
aquesta
|
|
||||||
aquestes
|
|
||||||
aquests
|
|
||||||
aquí
|
|
||||||
baix
|
|
||||||
cada
|
|
||||||
cadascú
|
|
||||||
cadascuna
|
|
||||||
cadascunes
|
|
||||||
cadascuns
|
|
||||||
com
|
|
||||||
contra
|
|
||||||
d'un
|
|
||||||
d'una
|
|
||||||
d'unes
|
|
||||||
d'uns
|
|
||||||
dalt
|
|
||||||
de
|
|
||||||
del
|
|
||||||
dels
|
|
||||||
des
|
|
||||||
després
|
|
||||||
dins
|
|
||||||
dintre
|
|
||||||
donat
|
|
||||||
doncs
|
|
||||||
durant
|
|
||||||
e
|
|
||||||
eh
|
|
||||||
el
|
|
||||||
els
|
|
||||||
em
|
|
||||||
en
|
|
||||||
encara
|
|
||||||
ens
|
|
||||||
entre
|
|
||||||
érem
|
|
||||||
eren
|
|
||||||
éreu
|
|
||||||
es
|
|
||||||
és
|
|
||||||
esta
|
|
||||||
està
|
|
||||||
estàvem
|
|
||||||
estaven
|
|
||||||
estàveu
|
|
||||||
esteu
|
|
||||||
et
|
|
||||||
etc
|
|
||||||
ets
|
|
||||||
fins
|
|
||||||
fora
|
|
||||||
gairebé
|
|
||||||
ha
|
|
||||||
han
|
|
||||||
has
|
|
||||||
havia
|
|
||||||
he
|
|
||||||
hem
|
|
||||||
heu
|
|
||||||
hi
|
|
||||||
ho
|
|
||||||
i
|
|
||||||
igual
|
|
||||||
iguals
|
|
||||||
ja
|
|
||||||
l'hi
|
|
||||||
la
|
|
||||||
les
|
|
||||||
li
|
|
||||||
li'n
|
|
||||||
llavors
|
|
||||||
m'he
|
|
||||||
ma
|
|
||||||
mal
|
|
||||||
malgrat
|
|
||||||
mateix
|
|
||||||
mateixa
|
|
||||||
mateixes
|
|
||||||
mateixos
|
|
||||||
me
|
|
||||||
mentre
|
|
||||||
més
|
|
||||||
meu
|
|
||||||
meus
|
|
||||||
meva
|
|
||||||
meves
|
|
||||||
molt
|
|
||||||
molta
|
|
||||||
moltes
|
|
||||||
molts
|
|
||||||
mon
|
|
||||||
mons
|
|
||||||
n'he
|
|
||||||
n'hi
|
|
||||||
ne
|
|
||||||
ni
|
|
||||||
no
|
|
||||||
nogensmenys
|
|
||||||
només
|
|
||||||
nosaltres
|
|
||||||
nostra
|
|
||||||
nostre
|
|
||||||
nostres
|
|
||||||
o
|
|
||||||
oh
|
|
||||||
oi
|
|
||||||
on
|
|
||||||
pas
|
|
||||||
pel
|
|
||||||
pels
|
|
||||||
per
|
|
||||||
però
|
|
||||||
perquè
|
|
||||||
poc
|
|
||||||
poca
|
|
||||||
pocs
|
|
||||||
poques
|
|
||||||
potser
|
|
||||||
propi
|
|
||||||
qual
|
|
||||||
quals
|
|
||||||
quan
|
|
||||||
quant
|
|
||||||
que
|
|
||||||
què
|
|
||||||
quelcom
|
|
||||||
qui
|
|
||||||
quin
|
|
||||||
quina
|
|
||||||
quines
|
|
||||||
quins
|
|
||||||
s'ha
|
|
||||||
s'han
|
|
||||||
sa
|
|
||||||
semblant
|
|
||||||
semblants
|
|
||||||
ses
|
|
||||||
seu
|
|
||||||
seus
|
|
||||||
seva
|
|
||||||
seva
|
|
||||||
seves
|
|
||||||
si
|
|
||||||
sobre
|
|
||||||
sobretot
|
|
||||||
sóc
|
|
||||||
solament
|
|
||||||
sols
|
|
||||||
son
|
|
||||||
són
|
|
||||||
sons
|
|
||||||
sota
|
|
||||||
sou
|
|
||||||
t'ha
|
|
||||||
t'han
|
|
||||||
t'he
|
|
||||||
ta
|
|
||||||
tal
|
|
||||||
també
|
|
||||||
tampoc
|
|
||||||
tan
|
|
||||||
tant
|
|
||||||
tanta
|
|
||||||
tantes
|
|
||||||
teu
|
|
||||||
teus
|
|
||||||
teva
|
|
||||||
teves
|
|
||||||
ton
|
|
||||||
tons
|
|
||||||
tot
|
|
||||||
tota
|
|
||||||
totes
|
|
||||||
tots
|
|
||||||
un
|
|
||||||
una
|
|
||||||
unes
|
|
||||||
uns
|
|
||||||
us
|
|
||||||
va
|
|
||||||
vaig
|
|
||||||
vam
|
|
||||||
van
|
|
||||||
vas
|
|
||||||
veu
|
|
||||||
vosaltres
|
|
||||||
vostra
|
|
||||||
vostre
|
|
||||||
vostres
|
|
||||||
`)
|
|
||||||
|
|
||||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
|
||||||
rv := analysis.NewTokenMap()
|
|
||||||
err := rv.LoadBytes(CatalanStopWords)
|
|
||||||
return rv, err
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
|
||||||
}
|
|
50
vendor/github.com/blevesearch/bleve/analysis/language/cjk/analyzer_cjk.go
generated
vendored
50
vendor/github.com/blevesearch/bleve/analysis/language/cjk/analyzer_cjk.go
generated
vendored
@ -1,50 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package cjk
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter"
|
|
||||||
"github.com/blevesearch/bleve/analysis/token_filters/unicode_normalize"
|
|
||||||
"github.com/blevesearch/bleve/analysis/tokenizers/whitespace_tokenizer"
|
|
||||||
)
|
|
||||||
|
|
||||||
const AnalyzerName = "cjk"
|
|
||||||
|
|
||||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
|
||||||
whitespaceTokenizer, err := cache.TokenizerNamed(whitespace_tokenizer.Name)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
normalizeFilter := unicode_normalize.MustNewUnicodeNormalizeFilter(unicode_normalize.NFKD)
|
|
||||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
bigramFilter, err := cache.TokenFilterNamed(BigramName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
rv := analysis.Analyzer{
|
|
||||||
Tokenizer: whitespaceTokenizer,
|
|
||||||
TokenFilters: []analysis.TokenFilter{
|
|
||||||
normalizeFilter,
|
|
||||||
toLowerFilter,
|
|
||||||
bigramFilter,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
return &rv, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
|
||||||
}
|
|
620
vendor/github.com/blevesearch/bleve/analysis/language/cjk/analyzer_cjk_test.go
generated
vendored
620
vendor/github.com/blevesearch/bleve/analysis/language/cjk/analyzer_cjk_test.go
generated
vendored
@ -1,620 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package cjk
|
|
||||||
|
|
||||||
import (
|
|
||||||
"reflect"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestCJKAnalyzer(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
input []byte
|
|
||||||
output analysis.TokenStream
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
input: []byte("こんにちは世界"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("こん"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 6,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("んに"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 2,
|
|
||||||
Start: 3,
|
|
||||||
End: 9,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("にち"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 3,
|
|
||||||
Start: 6,
|
|
||||||
End: 12,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ちは"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 4,
|
|
||||||
Start: 9,
|
|
||||||
End: 15,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("は世"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 5,
|
|
||||||
Start: 12,
|
|
||||||
End: 18,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("世界"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 6,
|
|
||||||
Start: 15,
|
|
||||||
End: 21,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("一二三四五六七八九十"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("一二"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 6,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("二三"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 2,
|
|
||||||
Start: 3,
|
|
||||||
End: 9,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("三四"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 3,
|
|
||||||
Start: 6,
|
|
||||||
End: 12,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("四五"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 4,
|
|
||||||
Start: 9,
|
|
||||||
End: 15,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("五六"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 5,
|
|
||||||
Start: 12,
|
|
||||||
End: 18,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("六七"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 6,
|
|
||||||
Start: 15,
|
|
||||||
End: 21,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("七八"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 7,
|
|
||||||
Start: 18,
|
|
||||||
End: 24,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("八九"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 8,
|
|
||||||
Start: 21,
|
|
||||||
End: 27,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("九十"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 9,
|
|
||||||
Start: 24,
|
|
||||||
End: 30,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("一 二三四 五六七八九 十"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("一"),
|
|
||||||
Type: analysis.Single,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 3,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("二三"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 2,
|
|
||||||
Start: 4,
|
|
||||||
End: 10,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("三四"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 3,
|
|
||||||
Start: 7,
|
|
||||||
End: 13,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("五六"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 5,
|
|
||||||
Start: 14,
|
|
||||||
End: 20,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("六七"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 6,
|
|
||||||
Start: 17,
|
|
||||||
End: 23,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("七八"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 7,
|
|
||||||
Start: 20,
|
|
||||||
End: 26,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("八九"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 8,
|
|
||||||
Start: 23,
|
|
||||||
End: 29,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("十"),
|
|
||||||
Type: analysis.Single,
|
|
||||||
Position: 10,
|
|
||||||
Start: 30,
|
|
||||||
End: 33,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("abc defgh ijklmn opqrstu vwxy z"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("abc"),
|
|
||||||
Type: analysis.AlphaNumeric,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 3,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("defgh"),
|
|
||||||
Type: analysis.AlphaNumeric,
|
|
||||||
Position: 2,
|
|
||||||
Start: 4,
|
|
||||||
End: 9,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ijklmn"),
|
|
||||||
Type: analysis.AlphaNumeric,
|
|
||||||
Position: 3,
|
|
||||||
Start: 10,
|
|
||||||
End: 16,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("opqrstu"),
|
|
||||||
Type: analysis.AlphaNumeric,
|
|
||||||
Position: 4,
|
|
||||||
Start: 17,
|
|
||||||
End: 24,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("vwxy"),
|
|
||||||
Type: analysis.AlphaNumeric,
|
|
||||||
Position: 5,
|
|
||||||
Start: 25,
|
|
||||||
End: 29,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("z"),
|
|
||||||
Type: analysis.AlphaNumeric,
|
|
||||||
Position: 6,
|
|
||||||
Start: 30,
|
|
||||||
End: 31,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("あい"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("あい"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 6,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("あい "),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("あい"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 6,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("test"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("test"),
|
|
||||||
Type: analysis.AlphaNumeric,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 4,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("test "),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("test"),
|
|
||||||
Type: analysis.AlphaNumeric,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 4,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("あいtest"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("あい"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 6,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("test"),
|
|
||||||
Type: analysis.AlphaNumeric,
|
|
||||||
Position: 3,
|
|
||||||
Start: 6,
|
|
||||||
End: 10,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("testあい "),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("test"),
|
|
||||||
Type: analysis.AlphaNumeric,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 4,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("あい"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 2,
|
|
||||||
Start: 4,
|
|
||||||
End: 10,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("あいうえおabcかきくけこ"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("あい"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 6,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("いう"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 2,
|
|
||||||
Start: 3,
|
|
||||||
End: 9,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("うえ"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 3,
|
|
||||||
Start: 6,
|
|
||||||
End: 12,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("えお"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 4,
|
|
||||||
Start: 9,
|
|
||||||
End: 15,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("abc"),
|
|
||||||
Type: analysis.AlphaNumeric,
|
|
||||||
Position: 6,
|
|
||||||
Start: 15,
|
|
||||||
End: 18,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("かき"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 7,
|
|
||||||
Start: 18,
|
|
||||||
End: 24,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("きく"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 8,
|
|
||||||
Start: 21,
|
|
||||||
End: 27,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("くけ"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 9,
|
|
||||||
Start: 24,
|
|
||||||
End: 30,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("けこ"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 10,
|
|
||||||
Start: 27,
|
|
||||||
End: 33,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("あいうえおabんcかきくけ こ"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("あい"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 6,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("いう"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 2,
|
|
||||||
Start: 3,
|
|
||||||
End: 9,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("うえ"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 3,
|
|
||||||
Start: 6,
|
|
||||||
End: 12,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("えお"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 4,
|
|
||||||
Start: 9,
|
|
||||||
End: 15,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ab"),
|
|
||||||
Type: analysis.AlphaNumeric,
|
|
||||||
Position: 6,
|
|
||||||
Start: 15,
|
|
||||||
End: 17,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ん"),
|
|
||||||
Type: analysis.Single,
|
|
||||||
Position: 7,
|
|
||||||
Start: 17,
|
|
||||||
End: 20,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("c"),
|
|
||||||
Type: analysis.AlphaNumeric,
|
|
||||||
Position: 8,
|
|
||||||
Start: 20,
|
|
||||||
End: 21,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("かき"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 9,
|
|
||||||
Start: 21,
|
|
||||||
End: 27,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("きく"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 10,
|
|
||||||
Start: 24,
|
|
||||||
End: 30,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("くけ"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 11,
|
|
||||||
Start: 27,
|
|
||||||
End: 33,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("こ"),
|
|
||||||
Type: analysis.Single,
|
|
||||||
Position: 13,
|
|
||||||
Start: 34,
|
|
||||||
End: 37,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("一 روبرت موير"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("一"),
|
|
||||||
Type: analysis.Single,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 3,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("روبرت"),
|
|
||||||
Type: analysis.AlphaNumeric,
|
|
||||||
Position: 2,
|
|
||||||
Start: 4,
|
|
||||||
End: 14,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("موير"),
|
|
||||||
Type: analysis.AlphaNumeric,
|
|
||||||
Position: 3,
|
|
||||||
Start: 15,
|
|
||||||
End: 23,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("一 رُوبرت موير"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("一"),
|
|
||||||
Type: analysis.Single,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 3,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("رُوبرت"),
|
|
||||||
Type: analysis.AlphaNumeric,
|
|
||||||
Position: 2,
|
|
||||||
Start: 4,
|
|
||||||
End: 16,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("موير"),
|
|
||||||
Type: analysis.AlphaNumeric,
|
|
||||||
Position: 3,
|
|
||||||
Start: 17,
|
|
||||||
End: 25,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("𩬅艱鍟䇹愯瀛"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("𩬅艱"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 7,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("艱鍟"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 2,
|
|
||||||
Start: 4,
|
|
||||||
End: 10,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("鍟䇹"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 3,
|
|
||||||
Start: 7,
|
|
||||||
End: 13,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("䇹愯"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 4,
|
|
||||||
Start: 10,
|
|
||||||
End: 16,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("愯瀛"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 5,
|
|
||||||
Start: 13,
|
|
||||||
End: 19,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("一"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("一"),
|
|
||||||
Type: analysis.Single,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 3,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("一丁丂"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("一丁"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 6,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("丁丂"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 2,
|
|
||||||
Start: 3,
|
|
||||||
End: 9,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
cache := registry.NewCache()
|
|
||||||
for _, test := range tests {
|
|
||||||
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
actual := analyzer.Analyze(test.input)
|
|
||||||
if !reflect.DeepEqual(actual, test.output) {
|
|
||||||
t.Errorf("expected %v, got %v", test.output, actual)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
166
vendor/github.com/blevesearch/bleve/analysis/language/cjk/cjk_bigram.go
generated
vendored
166
vendor/github.com/blevesearch/bleve/analysis/language/cjk/cjk_bigram.go
generated
vendored
@ -1,166 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package cjk
|
|
||||||
|
|
||||||
import (
|
|
||||||
"container/ring"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const BigramName = "cjk_bigram"
|
|
||||||
|
|
||||||
type CJKBigramFilter struct {
|
|
||||||
outputUnigram bool
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewCJKBigramFilter(outputUnigram bool) *CJKBigramFilter {
|
|
||||||
return &CJKBigramFilter{
|
|
||||||
outputUnigram: outputUnigram,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *CJKBigramFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
|
||||||
r := ring.New(2)
|
|
||||||
itemsInRing := 0
|
|
||||||
|
|
||||||
rv := make(analysis.TokenStream, 0, len(input))
|
|
||||||
|
|
||||||
for _, token := range input {
|
|
||||||
if token.Type == analysis.Ideographic {
|
|
||||||
if itemsInRing > 0 {
|
|
||||||
// if items already buffered
|
|
||||||
// check to see if this is aligned
|
|
||||||
curr := r.Value.(*analysis.Token)
|
|
||||||
if token.Start-curr.End != 0 {
|
|
||||||
// not aligned flush
|
|
||||||
flushToken := s.flush(r, &itemsInRing)
|
|
||||||
if flushToken != nil {
|
|
||||||
rv = append(rv, flushToken)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// now we can add this token to the buffer
|
|
||||||
r = r.Next()
|
|
||||||
r.Value = token
|
|
||||||
if itemsInRing < 2 {
|
|
||||||
itemsInRing++
|
|
||||||
}
|
|
||||||
if itemsInRing > 1 && s.outputUnigram {
|
|
||||||
unigram := s.buildUnigram(r, &itemsInRing)
|
|
||||||
if unigram != nil {
|
|
||||||
rv = append(rv, unigram)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
bigramToken := s.outputBigram(r, &itemsInRing)
|
|
||||||
if bigramToken != nil {
|
|
||||||
rv = append(rv, bigramToken)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// flush anything already buffered
|
|
||||||
flushToken := s.flush(r, &itemsInRing)
|
|
||||||
if flushToken != nil {
|
|
||||||
rv = append(rv, flushToken)
|
|
||||||
}
|
|
||||||
// output this token as is
|
|
||||||
rv = append(rv, token)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// deal with possible trailing unigram
|
|
||||||
if itemsInRing == 1 || s.outputUnigram {
|
|
||||||
if itemsInRing == 2 {
|
|
||||||
r = r.Next()
|
|
||||||
}
|
|
||||||
unigram := s.buildUnigram(r, &itemsInRing)
|
|
||||||
if unigram != nil {
|
|
||||||
rv = append(rv, unigram)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return rv
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *CJKBigramFilter) flush(r *ring.Ring, itemsInRing *int) *analysis.Token {
|
|
||||||
var rv *analysis.Token
|
|
||||||
if *itemsInRing == 1 {
|
|
||||||
rv = s.buildUnigram(r, itemsInRing)
|
|
||||||
}
|
|
||||||
r.Value = nil
|
|
||||||
*itemsInRing = 0
|
|
||||||
return rv
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *CJKBigramFilter) outputBigram(r *ring.Ring, itemsInRing *int) *analysis.Token {
|
|
||||||
if *itemsInRing == 2 {
|
|
||||||
thisShingleRing := r.Move(-1)
|
|
||||||
shingledBytes := make([]byte, 0)
|
|
||||||
|
|
||||||
// do first token
|
|
||||||
prev := thisShingleRing.Value.(*analysis.Token)
|
|
||||||
shingledBytes = append(shingledBytes, prev.Term...)
|
|
||||||
|
|
||||||
// do second token
|
|
||||||
thisShingleRing = thisShingleRing.Next()
|
|
||||||
curr := thisShingleRing.Value.(*analysis.Token)
|
|
||||||
shingledBytes = append(shingledBytes, curr.Term...)
|
|
||||||
|
|
||||||
token := analysis.Token{
|
|
||||||
Type: analysis.Double,
|
|
||||||
Term: shingledBytes,
|
|
||||||
Position: prev.Position,
|
|
||||||
Start: prev.Start,
|
|
||||||
End: curr.End,
|
|
||||||
}
|
|
||||||
return &token
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *CJKBigramFilter) buildUnigram(r *ring.Ring, itemsInRing *int) *analysis.Token {
|
|
||||||
if *itemsInRing == 2 {
|
|
||||||
thisShingleRing := r.Move(-1)
|
|
||||||
// do first token
|
|
||||||
prev := thisShingleRing.Value.(*analysis.Token)
|
|
||||||
token := analysis.Token{
|
|
||||||
Type: analysis.Single,
|
|
||||||
Term: prev.Term,
|
|
||||||
Position: prev.Position,
|
|
||||||
Start: prev.Start,
|
|
||||||
End: prev.End,
|
|
||||||
}
|
|
||||||
return &token
|
|
||||||
} else if *itemsInRing == 1 {
|
|
||||||
// do first token
|
|
||||||
prev := r.Value.(*analysis.Token)
|
|
||||||
token := analysis.Token{
|
|
||||||
Type: analysis.Single,
|
|
||||||
Term: prev.Term,
|
|
||||||
Position: prev.Position,
|
|
||||||
Start: prev.Start,
|
|
||||||
End: prev.End,
|
|
||||||
}
|
|
||||||
return &token
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func CJKBigramFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
|
||||||
outputUnigram := false
|
|
||||||
outVal, ok := config["output_unigram"].(bool)
|
|
||||||
if ok {
|
|
||||||
outputUnigram = outVal
|
|
||||||
}
|
|
||||||
return NewCJKBigramFilter(outputUnigram), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterTokenFilter(BigramName, CJKBigramFilterConstructor)
|
|
||||||
}
|
|
420
vendor/github.com/blevesearch/bleve/analysis/language/cjk/cjk_bigram_test.go
generated
vendored
420
vendor/github.com/blevesearch/bleve/analysis/language/cjk/cjk_bigram_test.go
generated
vendored
@ -1,420 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package cjk
|
|
||||||
|
|
||||||
import (
|
|
||||||
"reflect"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestCJKBigramFilter(t *testing.T) {
|
|
||||||
|
|
||||||
tests := []struct {
|
|
||||||
outputUnigram bool
|
|
||||||
input analysis.TokenStream
|
|
||||||
output analysis.TokenStream
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
outputUnigram: false,
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("こ"),
|
|
||||||
Type: analysis.Ideographic,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 3,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ん"),
|
|
||||||
Type: analysis.Ideographic,
|
|
||||||
Position: 2,
|
|
||||||
Start: 5,
|
|
||||||
End: 7,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("こ"),
|
|
||||||
Type: analysis.Single,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 3,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ん"),
|
|
||||||
Type: analysis.Single,
|
|
||||||
Position: 2,
|
|
||||||
Start: 5,
|
|
||||||
End: 7,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
outputUnigram: false,
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("こ"),
|
|
||||||
Type: analysis.Ideographic,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 3,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ん"),
|
|
||||||
Type: analysis.Ideographic,
|
|
||||||
Position: 2,
|
|
||||||
Start: 3,
|
|
||||||
End: 6,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("に"),
|
|
||||||
Type: analysis.Ideographic,
|
|
||||||
Position: 3,
|
|
||||||
Start: 6,
|
|
||||||
End: 9,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ち"),
|
|
||||||
Type: analysis.Ideographic,
|
|
||||||
Position: 4,
|
|
||||||
Start: 9,
|
|
||||||
End: 12,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("は"),
|
|
||||||
Type: analysis.Ideographic,
|
|
||||||
Position: 5,
|
|
||||||
Start: 12,
|
|
||||||
End: 15,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("世"),
|
|
||||||
Type: analysis.Ideographic,
|
|
||||||
Position: 6,
|
|
||||||
Start: 15,
|
|
||||||
End: 18,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("界"),
|
|
||||||
Type: analysis.Ideographic,
|
|
||||||
Position: 7,
|
|
||||||
Start: 18,
|
|
||||||
End: 21,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("こん"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 6,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("んに"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 2,
|
|
||||||
Start: 3,
|
|
||||||
End: 9,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("にち"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 3,
|
|
||||||
Start: 6,
|
|
||||||
End: 12,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ちは"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 4,
|
|
||||||
Start: 9,
|
|
||||||
End: 15,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("は世"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 5,
|
|
||||||
Start: 12,
|
|
||||||
End: 18,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("世界"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 6,
|
|
||||||
Start: 15,
|
|
||||||
End: 21,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
outputUnigram: true,
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("こ"),
|
|
||||||
Type: analysis.Ideographic,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 3,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ん"),
|
|
||||||
Type: analysis.Ideographic,
|
|
||||||
Position: 2,
|
|
||||||
Start: 3,
|
|
||||||
End: 6,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("に"),
|
|
||||||
Type: analysis.Ideographic,
|
|
||||||
Position: 3,
|
|
||||||
Start: 6,
|
|
||||||
End: 9,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ち"),
|
|
||||||
Type: analysis.Ideographic,
|
|
||||||
Position: 4,
|
|
||||||
Start: 9,
|
|
||||||
End: 12,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("は"),
|
|
||||||
Type: analysis.Ideographic,
|
|
||||||
Position: 5,
|
|
||||||
Start: 12,
|
|
||||||
End: 15,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("世"),
|
|
||||||
Type: analysis.Ideographic,
|
|
||||||
Position: 6,
|
|
||||||
Start: 15,
|
|
||||||
End: 18,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("界"),
|
|
||||||
Type: analysis.Ideographic,
|
|
||||||
Position: 7,
|
|
||||||
Start: 18,
|
|
||||||
End: 21,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("こ"),
|
|
||||||
Type: analysis.Single,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 3,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("こん"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 6,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ん"),
|
|
||||||
Type: analysis.Single,
|
|
||||||
Position: 2,
|
|
||||||
Start: 3,
|
|
||||||
End: 6,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("んに"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 2,
|
|
||||||
Start: 3,
|
|
||||||
End: 9,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("に"),
|
|
||||||
Type: analysis.Single,
|
|
||||||
Position: 3,
|
|
||||||
Start: 6,
|
|
||||||
End: 9,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("にち"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 3,
|
|
||||||
Start: 6,
|
|
||||||
End: 12,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ち"),
|
|
||||||
Type: analysis.Single,
|
|
||||||
Position: 4,
|
|
||||||
Start: 9,
|
|
||||||
End: 12,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ちは"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 4,
|
|
||||||
Start: 9,
|
|
||||||
End: 15,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("は"),
|
|
||||||
Type: analysis.Single,
|
|
||||||
Position: 5,
|
|
||||||
Start: 12,
|
|
||||||
End: 15,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("は世"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 5,
|
|
||||||
Start: 12,
|
|
||||||
End: 18,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("世"),
|
|
||||||
Type: analysis.Single,
|
|
||||||
Position: 6,
|
|
||||||
Start: 15,
|
|
||||||
End: 18,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("世界"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 6,
|
|
||||||
Start: 15,
|
|
||||||
End: 21,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("界"),
|
|
||||||
Type: analysis.Single,
|
|
||||||
Position: 7,
|
|
||||||
Start: 18,
|
|
||||||
End: 21,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
outputUnigram: false,
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("こ"),
|
|
||||||
Type: analysis.Ideographic,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 3,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ん"),
|
|
||||||
Type: analysis.Ideographic,
|
|
||||||
Position: 2,
|
|
||||||
Start: 3,
|
|
||||||
End: 6,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("に"),
|
|
||||||
Type: analysis.Ideographic,
|
|
||||||
Position: 3,
|
|
||||||
Start: 6,
|
|
||||||
End: 9,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ち"),
|
|
||||||
Type: analysis.Ideographic,
|
|
||||||
Position: 4,
|
|
||||||
Start: 9,
|
|
||||||
End: 12,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("は"),
|
|
||||||
Type: analysis.Ideographic,
|
|
||||||
Position: 5,
|
|
||||||
Start: 12,
|
|
||||||
End: 15,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("cat"),
|
|
||||||
Type: analysis.AlphaNumeric,
|
|
||||||
Position: 6,
|
|
||||||
Start: 12,
|
|
||||||
End: 15,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("世"),
|
|
||||||
Type: analysis.Ideographic,
|
|
||||||
Position: 7,
|
|
||||||
Start: 18,
|
|
||||||
End: 21,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("界"),
|
|
||||||
Type: analysis.Ideographic,
|
|
||||||
Position: 8,
|
|
||||||
Start: 21,
|
|
||||||
End: 24,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("こん"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 6,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("んに"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 2,
|
|
||||||
Start: 3,
|
|
||||||
End: 9,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("にち"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 3,
|
|
||||||
Start: 6,
|
|
||||||
End: 12,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("ちは"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 4,
|
|
||||||
Start: 9,
|
|
||||||
End: 15,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("cat"),
|
|
||||||
Type: analysis.AlphaNumeric,
|
|
||||||
Position: 6,
|
|
||||||
Start: 12,
|
|
||||||
End: 15,
|
|
||||||
},
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("世界"),
|
|
||||||
Type: analysis.Double,
|
|
||||||
Position: 7,
|
|
||||||
Start: 18,
|
|
||||||
End: 24,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, test := range tests {
|
|
||||||
cjkBigramFilter := NewCJKBigramFilter(test.outputUnigram)
|
|
||||||
actual := cjkBigramFilter.Filter(test.input)
|
|
||||||
if !reflect.DeepEqual(actual, test.output) {
|
|
||||||
t.Errorf("expected %s, got %s", test.output, actual)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
58
vendor/github.com/blevesearch/bleve/analysis/language/ckb/analyzer_ckb.go
generated
vendored
58
vendor/github.com/blevesearch/bleve/analysis/language/ckb/analyzer_ckb.go
generated
vendored
@ -1,58 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
// +build icu full
|
|
||||||
|
|
||||||
package ckb
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter"
|
|
||||||
"github.com/blevesearch/bleve/analysis/tokenizers/icu"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const AnalyzerName = "ckb"
|
|
||||||
|
|
||||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
|
||||||
icuTokenizer, err := cache.TokenizerNamed(icu.Name)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
normCkbFilter, err := cache.TokenFilterNamed(NormalizeName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
stopCkbFilter, err := cache.TokenFilterNamed(StopName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
stemmerCkbFilter, err := cache.TokenFilterNamed(StemmerName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
rv := analysis.Analyzer{
|
|
||||||
Tokenizer: icuTokenizer,
|
|
||||||
TokenFilters: []analysis.TokenFilter{
|
|
||||||
normCkbFilter,
|
|
||||||
toLowerFilter,
|
|
||||||
stopCkbFilter,
|
|
||||||
stemmerCkbFilter,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
return &rv, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
|
||||||
}
|
|
74
vendor/github.com/blevesearch/bleve/analysis/language/ckb/analyzer_ckb_test.go
generated
vendored
74
vendor/github.com/blevesearch/bleve/analysis/language/ckb/analyzer_ckb_test.go
generated
vendored
@ -1,74 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
// +build icu full
|
|
||||||
|
|
||||||
package ckb
|
|
||||||
|
|
||||||
import (
|
|
||||||
"reflect"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestSoraniAnalyzer(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
input []byte
|
|
||||||
output analysis.TokenStream
|
|
||||||
}{
|
|
||||||
// stop word removal
|
|
||||||
{
|
|
||||||
input: []byte("ئەم پیاوە"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("پیاو"),
|
|
||||||
Position: 2,
|
|
||||||
Start: 7,
|
|
||||||
End: 17,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("پیاوە"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("پیاو"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 10,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("پیاو"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("پیاو"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 8,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
cache := registry.NewCache()
|
|
||||||
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
for _, test := range tests {
|
|
||||||
actual := analyzer.Analyze(test.input)
|
|
||||||
if !reflect.DeepEqual(actual, test.output) {
|
|
||||||
t.Errorf("expected %v, got %v", test.output, actual)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
113
vendor/github.com/blevesearch/bleve/analysis/language/ckb/sorani_normalize.go
generated
vendored
113
vendor/github.com/blevesearch/bleve/analysis/language/ckb/sorani_normalize.go
generated
vendored
@ -1,113 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package ckb
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"unicode"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const NormalizeName = "normalize_ckb"
|
|
||||||
|
|
||||||
const (
|
|
||||||
Yeh = '\u064A'
|
|
||||||
DotlessYeh = '\u0649'
|
|
||||||
FarsiYeh = '\u06CC'
|
|
||||||
|
|
||||||
Kaf = '\u0643'
|
|
||||||
Keheh = '\u06A9'
|
|
||||||
|
|
||||||
Heh = '\u0647'
|
|
||||||
Ae = '\u06D5'
|
|
||||||
Zwnj = '\u200C'
|
|
||||||
HehDoachashmee = '\u06BE'
|
|
||||||
TehMarbuta = '\u0629'
|
|
||||||
|
|
||||||
Reh = '\u0631'
|
|
||||||
Rreh = '\u0695'
|
|
||||||
RrehAbove = '\u0692'
|
|
||||||
|
|
||||||
Tatweel = '\u0640'
|
|
||||||
Fathatan = '\u064B'
|
|
||||||
Dammatan = '\u064C'
|
|
||||||
Kasratan = '\u064D'
|
|
||||||
Fatha = '\u064E'
|
|
||||||
Damma = '\u064F'
|
|
||||||
Kasra = '\u0650'
|
|
||||||
Shadda = '\u0651'
|
|
||||||
Sukun = '\u0652'
|
|
||||||
)
|
|
||||||
|
|
||||||
type SoraniNormalizeFilter struct {
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewSoraniNormalizeFilter() *SoraniNormalizeFilter {
|
|
||||||
return &SoraniNormalizeFilter{}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SoraniNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
|
||||||
for _, token := range input {
|
|
||||||
term := normalize(token.Term)
|
|
||||||
token.Term = term
|
|
||||||
}
|
|
||||||
return input
|
|
||||||
}
|
|
||||||
|
|
||||||
func normalize(input []byte) []byte {
|
|
||||||
runes := bytes.Runes(input)
|
|
||||||
for i := 0; i < len(runes); i++ {
|
|
||||||
switch runes[i] {
|
|
||||||
case Yeh, DotlessYeh:
|
|
||||||
runes[i] = FarsiYeh
|
|
||||||
case Kaf:
|
|
||||||
runes[i] = Keheh
|
|
||||||
case Zwnj:
|
|
||||||
if i > 0 && runes[i-1] == Heh {
|
|
||||||
runes[i-1] = Ae
|
|
||||||
}
|
|
||||||
runes = analysis.DeleteRune(runes, i)
|
|
||||||
i--
|
|
||||||
case Heh:
|
|
||||||
if i == len(runes)-1 {
|
|
||||||
runes[i] = Ae
|
|
||||||
}
|
|
||||||
case TehMarbuta:
|
|
||||||
runes[i] = Ae
|
|
||||||
case HehDoachashmee:
|
|
||||||
runes[i] = Heh
|
|
||||||
case Reh:
|
|
||||||
if i == 0 {
|
|
||||||
runes[i] = Rreh
|
|
||||||
}
|
|
||||||
case RrehAbove:
|
|
||||||
runes[i] = Rreh
|
|
||||||
case Tatweel, Kasratan, Dammatan, Fathatan, Fatha, Damma, Kasra, Shadda, Sukun:
|
|
||||||
runes = analysis.DeleteRune(runes, i)
|
|
||||||
i--
|
|
||||||
default:
|
|
||||||
if unicode.In(runes[i], unicode.Cf) {
|
|
||||||
runes = analysis.DeleteRune(runes, i)
|
|
||||||
i--
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return analysis.BuildTermFromRunes(runes)
|
|
||||||
}
|
|
||||||
|
|
||||||
func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
|
||||||
return NewSoraniNormalizeFilter(), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
|
|
||||||
}
|
|
318
vendor/github.com/blevesearch/bleve/analysis/language/ckb/sorani_normalize_test.go
generated
vendored
318
vendor/github.com/blevesearch/bleve/analysis/language/ckb/sorani_normalize_test.go
generated
vendored
@ -1,318 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package ckb
|
|
||||||
|
|
||||||
import (
|
|
||||||
"reflect"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestSoraniNormalizeFilter(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
input analysis.TokenStream
|
|
||||||
output analysis.TokenStream
|
|
||||||
}{
|
|
||||||
// test Y
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u064A"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u06CC"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u0649"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u06CC"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u06CC"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u06CC"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// test K
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u0643"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u06A9"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u06A9"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u06A9"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// test H
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u0647\u200C"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u06D5"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u0647\u200C\u06A9"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u06D5\u06A9"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u06BE"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u0647"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u0629"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u06D5"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// test final H
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u0647\u0647\u0647"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u0647\u0647\u06D5"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// test RR
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u0692"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u0695"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// test initial RR
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u0631\u0631\u0631"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u0695\u0631\u0631"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// test remove
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u0640"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte(""),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u064B"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte(""),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u064C"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte(""),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u064D"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte(""),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u064E"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte(""),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u064F"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte(""),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u0650"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte(""),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u0651"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte(""),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u0652"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte(""),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("\u200C"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte(""),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// empty
|
|
||||||
{
|
|
||||||
input: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte(""),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte(""),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
soraniNormalizeFilter := NewSoraniNormalizeFilter()
|
|
||||||
for _, test := range tests {
|
|
||||||
actual := soraniNormalizeFilter.Filter(test.input)
|
|
||||||
if !reflect.DeepEqual(actual, test.output) {
|
|
||||||
t.Errorf("expected %#v, got %#v", test.output, actual)
|
|
||||||
t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
143
vendor/github.com/blevesearch/bleve/analysis/language/ckb/sorani_stemmer_filter.go
generated
vendored
143
vendor/github.com/blevesearch/bleve/analysis/language/ckb/sorani_stemmer_filter.go
generated
vendored
@ -1,143 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package ckb
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"unicode/utf8"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const StemmerName = "stemmer_ckb"
|
|
||||||
|
|
||||||
type SoraniStemmerFilter struct {
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewSoraniStemmerFilter() *SoraniStemmerFilter {
|
|
||||||
return &SoraniStemmerFilter{}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SoraniStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
|
||||||
for _, token := range input {
|
|
||||||
// if not protected keyword, stem it
|
|
||||||
if !token.KeyWord {
|
|
||||||
stemmed := stem(token.Term)
|
|
||||||
token.Term = stemmed
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return input
|
|
||||||
}
|
|
||||||
|
|
||||||
func stem(input []byte) []byte {
|
|
||||||
inputLen := utf8.RuneCount(input)
|
|
||||||
|
|
||||||
// postposition
|
|
||||||
if inputLen > 5 && bytes.HasSuffix(input, []byte("دا")) {
|
|
||||||
input = truncateRunes(input, 2)
|
|
||||||
inputLen = utf8.RuneCount(input)
|
|
||||||
} else if inputLen > 4 && bytes.HasSuffix(input, []byte("نا")) {
|
|
||||||
input = truncateRunes(input, 1)
|
|
||||||
inputLen = utf8.RuneCount(input)
|
|
||||||
} else if inputLen > 6 && bytes.HasSuffix(input, []byte("ەوە")) {
|
|
||||||
input = truncateRunes(input, 3)
|
|
||||||
inputLen = utf8.RuneCount(input)
|
|
||||||
}
|
|
||||||
|
|
||||||
// possessive pronoun
|
|
||||||
if inputLen > 6 &&
|
|
||||||
(bytes.HasSuffix(input, []byte("مان")) ||
|
|
||||||
bytes.HasSuffix(input, []byte("یان")) ||
|
|
||||||
bytes.HasSuffix(input, []byte("تان"))) {
|
|
||||||
input = truncateRunes(input, 3)
|
|
||||||
inputLen = utf8.RuneCount(input)
|
|
||||||
}
|
|
||||||
|
|
||||||
// indefinite singular ezafe
|
|
||||||
if inputLen > 6 && bytes.HasSuffix(input, []byte("ێکی")) {
|
|
||||||
return truncateRunes(input, 3)
|
|
||||||
} else if inputLen > 7 && bytes.HasSuffix(input, []byte("یەکی")) {
|
|
||||||
return truncateRunes(input, 4)
|
|
||||||
}
|
|
||||||
|
|
||||||
if inputLen > 5 && bytes.HasSuffix(input, []byte("ێک")) {
|
|
||||||
// indefinite singular
|
|
||||||
return truncateRunes(input, 2)
|
|
||||||
} else if inputLen > 6 && bytes.HasSuffix(input, []byte("یەک")) {
|
|
||||||
// indefinite singular
|
|
||||||
return truncateRunes(input, 3)
|
|
||||||
} else if inputLen > 6 && bytes.HasSuffix(input, []byte("ەکە")) {
|
|
||||||
// definite singular
|
|
||||||
return truncateRunes(input, 3)
|
|
||||||
} else if inputLen > 5 && bytes.HasSuffix(input, []byte("کە")) {
|
|
||||||
// definite singular
|
|
||||||
return truncateRunes(input, 2)
|
|
||||||
} else if inputLen > 7 && bytes.HasSuffix(input, []byte("ەکان")) {
|
|
||||||
// definite plural
|
|
||||||
return truncateRunes(input, 4)
|
|
||||||
} else if inputLen > 6 && bytes.HasSuffix(input, []byte("کان")) {
|
|
||||||
// definite plural
|
|
||||||
return truncateRunes(input, 3)
|
|
||||||
} else if inputLen > 7 && bytes.HasSuffix(input, []byte("یانی")) {
|
|
||||||
// indefinite plural ezafe
|
|
||||||
return truncateRunes(input, 4)
|
|
||||||
} else if inputLen > 6 && bytes.HasSuffix(input, []byte("انی")) {
|
|
||||||
// indefinite plural ezafe
|
|
||||||
return truncateRunes(input, 3)
|
|
||||||
} else if inputLen > 6 && bytes.HasSuffix(input, []byte("یان")) {
|
|
||||||
// indefinite plural
|
|
||||||
return truncateRunes(input, 3)
|
|
||||||
} else if inputLen > 5 && bytes.HasSuffix(input, []byte("ان")) {
|
|
||||||
// indefinite plural
|
|
||||||
return truncateRunes(input, 2)
|
|
||||||
} else if inputLen > 7 && bytes.HasSuffix(input, []byte("یانە")) {
|
|
||||||
// demonstrative plural
|
|
||||||
return truncateRunes(input, 4)
|
|
||||||
} else if inputLen > 6 && bytes.HasSuffix(input, []byte("انە")) {
|
|
||||||
// demonstrative plural
|
|
||||||
return truncateRunes(input, 3)
|
|
||||||
} else if inputLen > 5 && (bytes.HasSuffix(input, []byte("ایە")) || bytes.HasSuffix(input, []byte("ەیە"))) {
|
|
||||||
// demonstrative singular
|
|
||||||
return truncateRunes(input, 2)
|
|
||||||
} else if inputLen > 4 && bytes.HasSuffix(input, []byte("ە")) {
|
|
||||||
// demonstrative singular
|
|
||||||
return truncateRunes(input, 1)
|
|
||||||
} else if inputLen > 4 && bytes.HasSuffix(input, []byte("ی")) {
|
|
||||||
// absolute singular ezafe
|
|
||||||
return truncateRunes(input, 1)
|
|
||||||
}
|
|
||||||
return input
|
|
||||||
}
|
|
||||||
|
|
||||||
func truncateRunes(input []byte, num int) []byte {
|
|
||||||
runes := bytes.Runes(input)
|
|
||||||
runes = runes[:len(runes)-num]
|
|
||||||
out := buildTermFromRunes(runes)
|
|
||||||
return out
|
|
||||||
}
|
|
||||||
|
|
||||||
func buildTermFromRunes(runes []rune) []byte {
|
|
||||||
rv := make([]byte, 0, len(runes)*4)
|
|
||||||
for _, r := range runes {
|
|
||||||
runeBytes := make([]byte, utf8.RuneLen(r))
|
|
||||||
utf8.EncodeRune(runeBytes, r)
|
|
||||||
rv = append(rv, runeBytes...)
|
|
||||||
}
|
|
||||||
return rv
|
|
||||||
}
|
|
||||||
|
|
||||||
func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
|
||||||
return NewSoraniStemmerFilter(), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor)
|
|
||||||
}
|
|
294
vendor/github.com/blevesearch/bleve/analysis/language/ckb/sorani_stemmer_filter_test.go
generated
vendored
294
vendor/github.com/blevesearch/bleve/analysis/language/ckb/sorani_stemmer_filter_test.go
generated
vendored
@ -1,294 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package ckb
|
|
||||||
|
|
||||||
import (
|
|
||||||
"reflect"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/analysis/tokenizers/single_token"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestSoraniStemmerFilter(t *testing.T) {
|
|
||||||
|
|
||||||
// in order to match the lucene tests
|
|
||||||
// we will test with an analyzer, not just the stemmer
|
|
||||||
analyzer := analysis.Analyzer{
|
|
||||||
Tokenizer: single_token.NewSingleTokenTokenizer(),
|
|
||||||
TokenFilters: []analysis.TokenFilter{
|
|
||||||
NewSoraniNormalizeFilter(),
|
|
||||||
NewSoraniStemmerFilter(),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
tests := []struct {
|
|
||||||
input []byte
|
|
||||||
output analysis.TokenStream
|
|
||||||
}{
|
|
||||||
{ // -ek
|
|
||||||
input: []byte("پیاوێک"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("پیاو"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 12,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{ // -yek
|
|
||||||
input: []byte("دەرگایەک"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("دەرگا"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 16,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{ // -aka
|
|
||||||
input: []byte("پیاوەكە"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("پیاو"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 14,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{ // -ka
|
|
||||||
input: []byte("دەرگاكە"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("دەرگا"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 14,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{ // -a
|
|
||||||
input: []byte("کتاویە"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("کتاوی"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 12,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{ // -ya
|
|
||||||
input: []byte("دەرگایە"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("دەرگا"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 14,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{ // -An
|
|
||||||
input: []byte("پیاوان"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("پیاو"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 12,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{ // -yAn
|
|
||||||
input: []byte("دەرگایان"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("دەرگا"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 16,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{ // -akAn
|
|
||||||
input: []byte("پیاوەکان"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("پیاو"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 16,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{ // -kAn
|
|
||||||
input: []byte("دەرگاکان"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("دەرگا"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 16,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{ // -Ana
|
|
||||||
input: []byte("پیاوانە"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("پیاو"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 14,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{ // -yAna
|
|
||||||
input: []byte("دەرگایانە"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("دەرگا"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 18,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{ // Ezafe singular
|
|
||||||
input: []byte("هۆتیلی"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("هۆتیل"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 12,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{ // Ezafe indefinite
|
|
||||||
input: []byte("هۆتیلێکی"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("هۆتیل"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 16,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{ // Ezafe plural
|
|
||||||
input: []byte("هۆتیلانی"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("هۆتیل"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 16,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{ // -awa
|
|
||||||
input: []byte("دوورەوە"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("دوور"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 14,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{ // -dA
|
|
||||||
input: []byte("نیوەشەودا"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("نیوەشەو"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 18,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{ // -A
|
|
||||||
input: []byte("سۆرانا"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("سۆران"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 12,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{ // -mAn
|
|
||||||
input: []byte("پارەمان"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("پارە"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 14,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{ // -tAn
|
|
||||||
input: []byte("پارەتان"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("پارە"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 14,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{ // -yAn
|
|
||||||
input: []byte("پارەیان"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("پارە"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 14,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{ // empty
|
|
||||||
input: []byte(""),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte(""),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 0,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, test := range tests {
|
|
||||||
actual := analyzer.Analyze(test.input)
|
|
||||||
if !reflect.DeepEqual(actual, test.output) {
|
|
||||||
t.Errorf("for input %s(% x)", test.input, test.input)
|
|
||||||
t.Errorf("\texpected:")
|
|
||||||
for _, token := range test.output {
|
|
||||||
t.Errorf("\t\t%v %s(% x)", token, token.Term, token.Term)
|
|
||||||
}
|
|
||||||
t.Errorf("\tactual:")
|
|
||||||
for _, token := range actual {
|
|
||||||
t.Errorf("\t\t%v %s(% x)", token, token.Term, token.Term)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
28
vendor/github.com/blevesearch/bleve/analysis/language/ckb/stop_filter_ckb.go
generated
vendored
28
vendor/github.com/blevesearch/bleve/analysis/language/ckb/stop_filter_ckb.go
generated
vendored
@ -1,28 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package ckb
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
|
||||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
|
||||||
}
|
|
160
vendor/github.com/blevesearch/bleve/analysis/language/ckb/stop_words_ckb.go
generated
vendored
160
vendor/github.com/blevesearch/bleve/analysis/language/ckb/stop_words_ckb.go
generated
vendored
@ -1,160 +0,0 @@
|
|||||||
package ckb
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const StopName = "stop_ckb"
|
|
||||||
|
|
||||||
// this content was obtained from:
|
|
||||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
|
|
||||||
// ` was changed to ' to allow for literal string
|
|
||||||
|
|
||||||
var SoraniStopWords = []byte(`# set of kurdish stopwords
|
|
||||||
# note these have been normalized with our scheme (e represented with U+06D5, etc)
|
|
||||||
# constructed from:
|
|
||||||
# * Fig 5 of "Building A Test Collection For Sorani Kurdish" (Esmaili et al)
|
|
||||||
# * "Sorani Kurdish: A Reference Grammar with selected readings" (Thackston)
|
|
||||||
# * Corpus-based analysis of 77M word Sorani collection: wikipedia, news, blogs, etc
|
|
||||||
|
|
||||||
# and
|
|
||||||
و
|
|
||||||
# which
|
|
||||||
کە
|
|
||||||
# of
|
|
||||||
ی
|
|
||||||
# made/did
|
|
||||||
کرد
|
|
||||||
# that/which
|
|
||||||
ئەوەی
|
|
||||||
# on/head
|
|
||||||
سەر
|
|
||||||
# two
|
|
||||||
دوو
|
|
||||||
# also
|
|
||||||
هەروەها
|
|
||||||
# from/that
|
|
||||||
لەو
|
|
||||||
# makes/does
|
|
||||||
دەکات
|
|
||||||
# some
|
|
||||||
چەند
|
|
||||||
# every
|
|
||||||
هەر
|
|
||||||
|
|
||||||
# demonstratives
|
|
||||||
# that
|
|
||||||
ئەو
|
|
||||||
# this
|
|
||||||
ئەم
|
|
||||||
|
|
||||||
# personal pronouns
|
|
||||||
# I
|
|
||||||
من
|
|
||||||
# we
|
|
||||||
ئێمە
|
|
||||||
# you
|
|
||||||
تۆ
|
|
||||||
# you
|
|
||||||
ئێوە
|
|
||||||
# he/she/it
|
|
||||||
ئەو
|
|
||||||
# they
|
|
||||||
ئەوان
|
|
||||||
|
|
||||||
# prepositions
|
|
||||||
# to/with/by
|
|
||||||
بە
|
|
||||||
پێ
|
|
||||||
# without
|
|
||||||
بەبێ
|
|
||||||
# along with/while/during
|
|
||||||
بەدەم
|
|
||||||
# in the opinion of
|
|
||||||
بەلای
|
|
||||||
# according to
|
|
||||||
بەپێی
|
|
||||||
# before
|
|
||||||
بەرلە
|
|
||||||
# in the direction of
|
|
||||||
بەرەوی
|
|
||||||
# in front of/toward
|
|
||||||
بەرەوە
|
|
||||||
# before/in the face of
|
|
||||||
بەردەم
|
|
||||||
# without
|
|
||||||
بێ
|
|
||||||
# except for
|
|
||||||
بێجگە
|
|
||||||
# for
|
|
||||||
بۆ
|
|
||||||
# on/in
|
|
||||||
دە
|
|
||||||
تێ
|
|
||||||
# with
|
|
||||||
دەگەڵ
|
|
||||||
# after
|
|
||||||
دوای
|
|
||||||
# except for/aside from
|
|
||||||
جگە
|
|
||||||
# in/from
|
|
||||||
لە
|
|
||||||
لێ
|
|
||||||
# in front of/before/because of
|
|
||||||
لەبەر
|
|
||||||
# between/among
|
|
||||||
لەبەینی
|
|
||||||
# concerning/about
|
|
||||||
لەبابەت
|
|
||||||
# concerning
|
|
||||||
لەبارەی
|
|
||||||
# instead of
|
|
||||||
لەباتی
|
|
||||||
# beside
|
|
||||||
لەبن
|
|
||||||
# instead of
|
|
||||||
لەبرێتی
|
|
||||||
# behind
|
|
||||||
لەدەم
|
|
||||||
# with/together with
|
|
||||||
لەگەڵ
|
|
||||||
# by
|
|
||||||
لەلایەن
|
|
||||||
# within
|
|
||||||
لەناو
|
|
||||||
# between/among
|
|
||||||
لەنێو
|
|
||||||
# for the sake of
|
|
||||||
لەپێناوی
|
|
||||||
# with respect to
|
|
||||||
لەرەوی
|
|
||||||
# by means of/for
|
|
||||||
لەرێ
|
|
||||||
# for the sake of
|
|
||||||
لەرێگا
|
|
||||||
# on/on top of/according to
|
|
||||||
لەسەر
|
|
||||||
# under
|
|
||||||
لەژێر
|
|
||||||
# between/among
|
|
||||||
ناو
|
|
||||||
# between/among
|
|
||||||
نێوان
|
|
||||||
# after
|
|
||||||
پاش
|
|
||||||
# before
|
|
||||||
پێش
|
|
||||||
# like
|
|
||||||
وەک
|
|
||||||
`)
|
|
||||||
|
|
||||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
|
||||||
rv := analysis.NewTokenMap()
|
|
||||||
err := rv.LoadBytes(SoraniStopWords)
|
|
||||||
return rv, err
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
|
||||||
}
|
|
28
vendor/github.com/blevesearch/bleve/analysis/language/cs/stop_filter_cs.go
generated
vendored
28
vendor/github.com/blevesearch/bleve/analysis/language/cs/stop_filter_cs.go
generated
vendored
@ -1,28 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package cs
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
|
||||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
|
||||||
}
|
|
196
vendor/github.com/blevesearch/bleve/analysis/language/cs/stop_words_cs.go
generated
vendored
196
vendor/github.com/blevesearch/bleve/analysis/language/cs/stop_words_cs.go
generated
vendored
@ -1,196 +0,0 @@
|
|||||||
package cs
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const StopName = "stop_cs"
|
|
||||||
|
|
||||||
// this content was obtained from:
|
|
||||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
|
|
||||||
// ` was changed to ' to allow for literal string
|
|
||||||
|
|
||||||
var CzechStopWords = []byte(`a
|
|
||||||
s
|
|
||||||
k
|
|
||||||
o
|
|
||||||
i
|
|
||||||
u
|
|
||||||
v
|
|
||||||
z
|
|
||||||
dnes
|
|
||||||
cz
|
|
||||||
tímto
|
|
||||||
budeš
|
|
||||||
budem
|
|
||||||
byli
|
|
||||||
jseš
|
|
||||||
můj
|
|
||||||
svým
|
|
||||||
ta
|
|
||||||
tomto
|
|
||||||
tohle
|
|
||||||
tuto
|
|
||||||
tyto
|
|
||||||
jej
|
|
||||||
zda
|
|
||||||
proč
|
|
||||||
máte
|
|
||||||
tato
|
|
||||||
kam
|
|
||||||
tohoto
|
|
||||||
kdo
|
|
||||||
kteří
|
|
||||||
mi
|
|
||||||
nám
|
|
||||||
tom
|
|
||||||
tomuto
|
|
||||||
mít
|
|
||||||
nic
|
|
||||||
proto
|
|
||||||
kterou
|
|
||||||
byla
|
|
||||||
toho
|
|
||||||
protože
|
|
||||||
asi
|
|
||||||
ho
|
|
||||||
naši
|
|
||||||
napište
|
|
||||||
re
|
|
||||||
což
|
|
||||||
tím
|
|
||||||
takže
|
|
||||||
svých
|
|
||||||
její
|
|
||||||
svými
|
|
||||||
jste
|
|
||||||
aj
|
|
||||||
tu
|
|
||||||
tedy
|
|
||||||
teto
|
|
||||||
bylo
|
|
||||||
kde
|
|
||||||
ke
|
|
||||||
pravé
|
|
||||||
ji
|
|
||||||
nad
|
|
||||||
nejsou
|
|
||||||
či
|
|
||||||
pod
|
|
||||||
téma
|
|
||||||
mezi
|
|
||||||
přes
|
|
||||||
ty
|
|
||||||
pak
|
|
||||||
vám
|
|
||||||
ani
|
|
||||||
když
|
|
||||||
však
|
|
||||||
neg
|
|
||||||
jsem
|
|
||||||
tento
|
|
||||||
článku
|
|
||||||
články
|
|
||||||
aby
|
|
||||||
jsme
|
|
||||||
před
|
|
||||||
pta
|
|
||||||
jejich
|
|
||||||
byl
|
|
||||||
ještě
|
|
||||||
až
|
|
||||||
bez
|
|
||||||
také
|
|
||||||
pouze
|
|
||||||
první
|
|
||||||
vaše
|
|
||||||
která
|
|
||||||
nás
|
|
||||||
nový
|
|
||||||
tipy
|
|
||||||
pokud
|
|
||||||
může
|
|
||||||
strana
|
|
||||||
jeho
|
|
||||||
své
|
|
||||||
jiné
|
|
||||||
zprávy
|
|
||||||
nové
|
|
||||||
není
|
|
||||||
vás
|
|
||||||
jen
|
|
||||||
podle
|
|
||||||
zde
|
|
||||||
už
|
|
||||||
být
|
|
||||||
více
|
|
||||||
bude
|
|
||||||
již
|
|
||||||
než
|
|
||||||
který
|
|
||||||
by
|
|
||||||
které
|
|
||||||
co
|
|
||||||
nebo
|
|
||||||
ten
|
|
||||||
tak
|
|
||||||
má
|
|
||||||
při
|
|
||||||
od
|
|
||||||
po
|
|
||||||
jsou
|
|
||||||
jak
|
|
||||||
další
|
|
||||||
ale
|
|
||||||
si
|
|
||||||
se
|
|
||||||
ve
|
|
||||||
to
|
|
||||||
jako
|
|
||||||
za
|
|
||||||
zpět
|
|
||||||
ze
|
|
||||||
do
|
|
||||||
pro
|
|
||||||
je
|
|
||||||
na
|
|
||||||
atd
|
|
||||||
atp
|
|
||||||
jakmile
|
|
||||||
přičemž
|
|
||||||
já
|
|
||||||
on
|
|
||||||
ona
|
|
||||||
ono
|
|
||||||
oni
|
|
||||||
ony
|
|
||||||
my
|
|
||||||
vy
|
|
||||||
jí
|
|
||||||
ji
|
|
||||||
mě
|
|
||||||
mne
|
|
||||||
jemu
|
|
||||||
tomu
|
|
||||||
těm
|
|
||||||
těmu
|
|
||||||
němu
|
|
||||||
němuž
|
|
||||||
jehož
|
|
||||||
jíž
|
|
||||||
jelikož
|
|
||||||
jež
|
|
||||||
jakož
|
|
||||||
načež
|
|
||||||
`)
|
|
||||||
|
|
||||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
|
||||||
rv := analysis.NewTokenMap()
|
|
||||||
err := rv.LoadBytes(CzechStopWords)
|
|
||||||
return rv, err
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
|
||||||
}
|
|
54
vendor/github.com/blevesearch/bleve/analysis/language/da/analyzer_da.go
generated
vendored
54
vendor/github.com/blevesearch/bleve/analysis/language/da/analyzer_da.go
generated
vendored
@ -1,54 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
// +build libstemmer full
|
|
||||||
// +build icu full
|
|
||||||
|
|
||||||
package da
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter"
|
|
||||||
"github.com/blevesearch/bleve/analysis/tokenizers/icu"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const AnalyzerName = "da"
|
|
||||||
|
|
||||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
|
||||||
icuTokenizer, err := cache.TokenizerNamed(icu.Name)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
stopDaFilter, err := cache.TokenFilterNamed(StopName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
stemmerDaFilter, err := cache.TokenFilterNamed(StemmerName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
rv := analysis.Analyzer{
|
|
||||||
Tokenizer: icuTokenizer,
|
|
||||||
TokenFilters: []analysis.TokenFilter{
|
|
||||||
toLowerFilter,
|
|
||||||
stopDaFilter,
|
|
||||||
stemmerDaFilter,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
return &rv, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
|
||||||
}
|
|
69
vendor/github.com/blevesearch/bleve/analysis/language/da/analyzer_da_test.go
generated
vendored
69
vendor/github.com/blevesearch/bleve/analysis/language/da/analyzer_da_test.go
generated
vendored
@ -1,69 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
// +build libstemmer full
|
|
||||||
// +build icu full
|
|
||||||
|
|
||||||
package da
|
|
||||||
|
|
||||||
import (
|
|
||||||
"reflect"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestDanishAnalyzer(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
input []byte
|
|
||||||
output analysis.TokenStream
|
|
||||||
}{
|
|
||||||
// stemming
|
|
||||||
{
|
|
||||||
input: []byte("undersøg"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("undersøg"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 9,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("undersøgelse"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("undersøg"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 13,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// stop word
|
|
||||||
{
|
|
||||||
input: []byte("på"),
|
|
||||||
output: analysis.TokenStream{},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
cache := registry.NewCache()
|
|
||||||
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
for _, test := range tests {
|
|
||||||
actual := analyzer.Analyze(test.input)
|
|
||||||
if !reflect.DeepEqual(actual, test.output) {
|
|
||||||
t.Errorf("expected %v, got %v", test.output, actual)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
28
vendor/github.com/blevesearch/bleve/analysis/language/da/stemmer_da.go
generated
vendored
28
vendor/github.com/blevesearch/bleve/analysis/language/da/stemmer_da.go
generated
vendored
@ -1,28 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
// +build libstemmer full
|
|
||||||
|
|
||||||
package da
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/analysis/token_filters/stemmer_filter"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const StemmerName = "stemmer_da"
|
|
||||||
|
|
||||||
func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
|
||||||
return stemmer_filter.NewStemmerFilter("da")
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor)
|
|
||||||
}
|
|
28
vendor/github.com/blevesearch/bleve/analysis/language/da/stop_filter_da.go
generated
vendored
28
vendor/github.com/blevesearch/bleve/analysis/language/da/stop_filter_da.go
generated
vendored
@ -1,28 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package da
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/analysis/token_filters/stop_tokens_filter"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
|
||||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return stop_tokens_filter.NewStopTokensFilter(tokenMap), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
|
||||||
}
|
|
134
vendor/github.com/blevesearch/bleve/analysis/language/da/stop_words_da.go
generated
vendored
134
vendor/github.com/blevesearch/bleve/analysis/language/da/stop_words_da.go
generated
vendored
@ -1,134 +0,0 @@
|
|||||||
package da
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const StopName = "stop_da"
|
|
||||||
|
|
||||||
// this content was obtained from:
|
|
||||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
|
|
||||||
// ` was changed to ' to allow for literal string
|
|
||||||
|
|
||||||
var DanishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
|
|
||||||
| This file is distributed under the BSD License.
|
|
||||||
| See http://snowball.tartarus.org/license.php
|
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
|
||||||
| - Encoding was converted to UTF-8.
|
|
||||||
| - This notice was added.
|
|
||||||
|
|
|
||||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
|
||||||
|
|
||||||
| A Danish stop word list. Comments begin with vertical bar. Each stop
|
|
||||||
| word is at the start of a line.
|
|
||||||
|
|
||||||
| This is a ranked list (commonest to rarest) of stopwords derived from
|
|
||||||
| a large text sample.
|
|
||||||
|
|
||||||
|
|
||||||
og | and
|
|
||||||
i | in
|
|
||||||
jeg | I
|
|
||||||
det | that (dem. pronoun)/it (pers. pronoun)
|
|
||||||
at | that (in front of a sentence)/to (with infinitive)
|
|
||||||
en | a/an
|
|
||||||
den | it (pers. pronoun)/that (dem. pronoun)
|
|
||||||
til | to/at/for/until/against/by/of/into, more
|
|
||||||
er | present tense of "to be"
|
|
||||||
som | who, as
|
|
||||||
på | on/upon/in/on/at/to/after/of/with/for, on
|
|
||||||
de | they
|
|
||||||
med | with/by/in, along
|
|
||||||
han | he
|
|
||||||
af | of/by/from/off/for/in/with/on, off
|
|
||||||
for | at/for/to/from/by/of/ago, in front/before, because
|
|
||||||
ikke | not
|
|
||||||
der | who/which, there/those
|
|
||||||
var | past tense of "to be"
|
|
||||||
mig | me/myself
|
|
||||||
sig | oneself/himself/herself/itself/themselves
|
|
||||||
men | but
|
|
||||||
et | a/an/one, one (number), someone/somebody/one
|
|
||||||
har | present tense of "to have"
|
|
||||||
om | round/about/for/in/a, about/around/down, if
|
|
||||||
vi | we
|
|
||||||
min | my
|
|
||||||
havde | past tense of "to have"
|
|
||||||
ham | him
|
|
||||||
hun | she
|
|
||||||
nu | now
|
|
||||||
over | over/above/across/by/beyond/past/on/about, over/past
|
|
||||||
da | then, when/as/since
|
|
||||||
fra | from/off/since, off, since
|
|
||||||
du | you
|
|
||||||
ud | out
|
|
||||||
sin | his/her/its/one's
|
|
||||||
dem | them
|
|
||||||
os | us/ourselves
|
|
||||||
op | up
|
|
||||||
man | you/one
|
|
||||||
hans | his
|
|
||||||
hvor | where
|
|
||||||
eller | or
|
|
||||||
hvad | what
|
|
||||||
skal | must/shall etc.
|
|
||||||
selv | myself/youself/herself/ourselves etc., even
|
|
||||||
her | here
|
|
||||||
alle | all/everyone/everybody etc.
|
|
||||||
vil | will (verb)
|
|
||||||
blev | past tense of "to stay/to remain/to get/to become"
|
|
||||||
kunne | could
|
|
||||||
ind | in
|
|
||||||
når | when
|
|
||||||
være | present tense of "to be"
|
|
||||||
dog | however/yet/after all
|
|
||||||
noget | something
|
|
||||||
ville | would
|
|
||||||
jo | you know/you see (adv), yes
|
|
||||||
deres | their/theirs
|
|
||||||
efter | after/behind/according to/for/by/from, later/afterwards
|
|
||||||
ned | down
|
|
||||||
skulle | should
|
|
||||||
denne | this
|
|
||||||
end | than
|
|
||||||
dette | this
|
|
||||||
mit | my/mine
|
|
||||||
også | also
|
|
||||||
under | under/beneath/below/during, below/underneath
|
|
||||||
have | have
|
|
||||||
dig | you
|
|
||||||
anden | other
|
|
||||||
hende | her
|
|
||||||
mine | my
|
|
||||||
alt | everything
|
|
||||||
meget | much/very, plenty of
|
|
||||||
sit | his, her, its, one's
|
|
||||||
sine | his, her, its, one's
|
|
||||||
vor | our
|
|
||||||
mod | against
|
|
||||||
disse | these
|
|
||||||
hvis | if
|
|
||||||
din | your/yours
|
|
||||||
nogle | some
|
|
||||||
hos | by/at
|
|
||||||
blive | be/become
|
|
||||||
mange | many
|
|
||||||
ad | by/through
|
|
||||||
bliver | present tense of "to be/to become"
|
|
||||||
hendes | her/hers
|
|
||||||
været | be
|
|
||||||
thi | for (conj)
|
|
||||||
jer | you
|
|
||||||
sådan | such, like this/like that
|
|
||||||
`)
|
|
||||||
|
|
||||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
|
||||||
rv := analysis.NewTokenMap()
|
|
||||||
err := rv.LoadBytes(DanishStopWords)
|
|
||||||
return rv, err
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
|
||||||
}
|
|
59
vendor/github.com/blevesearch/bleve/analysis/language/de/analyzer_de.go
generated
vendored
59
vendor/github.com/blevesearch/bleve/analysis/language/de/analyzer_de.go
generated
vendored
@ -1,59 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
// +build libstemmer full
|
|
||||||
// +build icu full
|
|
||||||
|
|
||||||
package de
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter"
|
|
||||||
"github.com/blevesearch/bleve/analysis/tokenizers/icu"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const AnalyzerName = "de"
|
|
||||||
|
|
||||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
|
||||||
icuTokenizer, err := cache.TokenizerNamed(icu.Name)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
stopDeFilter, err := cache.TokenFilterNamed(NormalizeName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
normalizeDeFilter, err := cache.TokenFilterNamed(NormalizeName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
stemmerDeFilter, err := cache.TokenFilterNamed(StemmerName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
rv := analysis.Analyzer{
|
|
||||||
Tokenizer: icuTokenizer,
|
|
||||||
TokenFilters: []analysis.TokenFilter{
|
|
||||||
toLowerFilter,
|
|
||||||
stopDeFilter,
|
|
||||||
normalizeDeFilter,
|
|
||||||
stemmerDeFilter,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
return &rv, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
|
||||||
}
|
|
97
vendor/github.com/blevesearch/bleve/analysis/language/de/analyzer_de_test.go
generated
vendored
97
vendor/github.com/blevesearch/bleve/analysis/language/de/analyzer_de_test.go
generated
vendored
@ -1,97 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
// +build libstemmer full
|
|
||||||
// +build icu full
|
|
||||||
|
|
||||||
package de
|
|
||||||
|
|
||||||
import (
|
|
||||||
"reflect"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestGermanAnalyzer(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
input []byte
|
|
||||||
output analysis.TokenStream
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
input: []byte("Tisch"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("tisch"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 5,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("Tische"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("tisch"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 6,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("Tischen"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("tisch"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 7,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// german specials
|
|
||||||
{
|
|
||||||
input: []byte("Schaltflächen"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("schaltflach"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 14,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: []byte("Schaltflaechen"),
|
|
||||||
output: analysis.TokenStream{
|
|
||||||
&analysis.Token{
|
|
||||||
Term: []byte("schaltflach"),
|
|
||||||
Position: 1,
|
|
||||||
Start: 0,
|
|
||||||
End: 14,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
cache := registry.NewCache()
|
|
||||||
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
for _, test := range tests {
|
|
||||||
actual := analyzer.Analyze(test.input)
|
|
||||||
if !reflect.DeepEqual(actual, test.output) {
|
|
||||||
t.Errorf("expected %v, got %v", test.output, actual)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
94
vendor/github.com/blevesearch/bleve/analysis/language/de/german_normalize.go
generated
vendored
94
vendor/github.com/blevesearch/bleve/analysis/language/de/german_normalize.go
generated
vendored
@ -1,94 +0,0 @@
|
|||||||
// Copyright (c) 2014 Couchbase, Inc.
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
||||||
// except in compliance with the License. You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
||||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
|
||||||
// and limitations under the License.
|
|
||||||
|
|
||||||
package de
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/analysis"
|
|
||||||
"github.com/blevesearch/bleve/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
const NormalizeName = "normalize_de"
|
|
||||||
|
|
||||||
const (
|
|
||||||
N = 0 /* ordinary state */
|
|
||||||
V = 1 /* stops 'u' from entering umlaut state */
|
|
||||||
U = 2 /* umlaut state, allows e-deletion */
|
|
||||||
)
|
|
||||||
|
|
||||||
type GermanNormalizeFilter struct {
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewGermanNormalizeFilter() *GermanNormalizeFilter {
|
|
||||||
return &GermanNormalizeFilter{}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *GermanNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
|
||||||
for _, token := range input {
|
|
||||||
term := normalize(token.Term)
|
|
||||||
token.Term = term
|
|
||||||
}
|
|
||||||
return input
|
|
||||||
}
|
|
||||||
|
|
||||||
func normalize(input []byte) []byte {
|
|
||||||
state := N
|
|
||||||
runes := bytes.Runes(input)
|
|
||||||
for i := 0; i < len(runes); i++ {
|
|
||||||
switch runes[i] {
|
|
||||||
case 'a', 'o':
|
|
||||||
state = U
|
|
||||||
case 'u':
|
|
||||||
if state == N {
|
|
||||||
state = U
|
|
||||||
} else {
|
|
||||||
state = V
|
|
||||||
}
|
|
||||||
case 'e':
|
|
||||||
if state == U {
|
|
||||||
runes = analysis.DeleteRune(runes, i)
|
|
||||||
i--
|
|
||||||
}
|
|
||||||
state = V
|
|
||||||
case 'i', 'q', 'y':
|
|
||||||
state = V
|
|
||||||
case 'ä':
|
|
||||||
runes[i] = 'a'
|
|
||||||
state = V
|
|
||||||
case 'ö':
|
|
||||||
runes[i] = 'o'
|
|
||||||
state = V
|
|
||||||
case 'ü':
|
|
||||||
runes[i] = 'u'
|
|
||||||
state = V
|
|
||||||
case 'ß':
|
|
||||||
runes[i] = 's'
|
|
||||||
i++
|
|
||||||
// newrunes := make([]rune, len(runes)+1)
|
|
||||||
// copy(newrunes, runes)
|
|
||||||
// runes = newrunes
|
|
||||||
// runes[i] = 's'
|
|
||||||
runes = analysis.InsertRune(runes, i, 's')
|
|
||||||
state = N
|
|
||||||
default:
|
|
||||||
state = N
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return analysis.BuildTermFromRunes(runes)
|
|
||||||
}
|
|
||||||
|
|
||||||
func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
|
||||||
return NewGermanNormalizeFilter(), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
|
|
||||||
}
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user