Upgrade server dependencies, manage them with govendor
This commit is contained in:
parent
ebee2746d6
commit
971278e7e5
1748 changed files with 196165 additions and 194500 deletions
20
vendor/github.com/blevesearch/segment/export_test.go
generated
vendored
20
vendor/github.com/blevesearch/segment/export_test.go
generated
vendored
|
@ -1,20 +0,0 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package segment
|
||||
|
||||
// Exported for testing only.
|
||||
import (
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func (s *Segmenter) MaxTokenSize(n int) {
|
||||
if n < utf8.UTFMax || n > 1e9 {
|
||||
panic("bad max token size")
|
||||
}
|
||||
if n < len(s.buf) {
|
||||
s.buf = make([]byte, n)
|
||||
}
|
||||
s.maxTokenSize = n
|
||||
}
|
330
vendor/github.com/blevesearch/segment/ragel/unicode2ragel.rb
generated
vendored
330
vendor/github.com/blevesearch/segment/ragel/unicode2ragel.rb
generated
vendored
|
@ -1,330 +0,0 @@
|
|||
#!/usr/bin/env ruby
|
||||
#
|
||||
# This scripted has been updated to accept more command-line arguments:
|
||||
#
|
||||
# -u, --url URL to process
|
||||
# -m, --machine Machine name
|
||||
# -p, --properties Properties to add to the machine
|
||||
# -o, --output Write output to file
|
||||
#
|
||||
# Updated by: Marty Schoch <marty.schoch@gmail.com>
|
||||
#
|
||||
# This script uses the unicode spec to generate a Ragel state machine
|
||||
# that recognizes unicode alphanumeric characters. It generates 5
|
||||
# character classes: uupper, ulower, ualpha, udigit, and ualnum.
|
||||
# Currently supported encodings are UTF-8 [default] and UCS-4.
|
||||
#
|
||||
# Usage: unicode2ragel.rb [options]
|
||||
# -e, --encoding [ucs4 | utf8] Data encoding
|
||||
# -h, --help Show this message
|
||||
#
|
||||
# This script was originally written as part of the Ferret search
|
||||
# engine library.
|
||||
#
|
||||
# Author: Rakan El-Khalil <rakan@well.com>
|
||||
|
||||
require 'optparse'
|
||||
require 'open-uri'
|
||||
|
||||
ENCODINGS = [ :utf8, :ucs4 ]
|
||||
ALPHTYPES = { :utf8 => "unsigned char", :ucs4 => "unsigned int" }
|
||||
DEFAULT_CHART_URL = "http://www.unicode.org/Public/5.1.0/ucd/DerivedCoreProperties.txt"
|
||||
DEFAULT_MACHINE_NAME= "WChar"
|
||||
|
||||
###
|
||||
# Display vars & default option
|
||||
|
||||
TOTAL_WIDTH = 80
|
||||
RANGE_WIDTH = 23
|
||||
@encoding = :utf8
|
||||
@chart_url = DEFAULT_CHART_URL
|
||||
machine_name = DEFAULT_MACHINE_NAME
|
||||
properties = []
|
||||
@output = $stdout
|
||||
|
||||
###
|
||||
# Option parsing
|
||||
|
||||
cli_opts = OptionParser.new do |opts|
|
||||
opts.on("-e", "--encoding [ucs4 | utf8]", "Data encoding") do |o|
|
||||
@encoding = o.downcase.to_sym
|
||||
end
|
||||
opts.on("-h", "--help", "Show this message") do
|
||||
puts opts
|
||||
exit
|
||||
end
|
||||
opts.on("-u", "--url URL", "URL to process") do |o|
|
||||
@chart_url = o
|
||||
end
|
||||
opts.on("-m", "--machine MACHINE_NAME", "Machine name") do |o|
|
||||
machine_name = o
|
||||
end
|
||||
opts.on("-p", "--properties x,y,z", Array, "Properties to add to machine") do |o|
|
||||
properties = o
|
||||
end
|
||||
opts.on("-o", "--output FILE", "output file") do |o|
|
||||
@output = File.new(o, "w+")
|
||||
end
|
||||
end
|
||||
|
||||
cli_opts.parse(ARGV)
|
||||
unless ENCODINGS.member? @encoding
|
||||
puts "Invalid encoding: #{@encoding}"
|
||||
puts cli_opts
|
||||
exit
|
||||
end
|
||||
|
||||
##
|
||||
# Downloads the document at url and yields every alpha line's hex
|
||||
# range and description.
|
||||
|
||||
def each_alpha( url, property )
|
||||
open( url ) do |file|
|
||||
file.each_line do |line|
|
||||
next if line =~ /^#/;
|
||||
next if line !~ /; #{property} #/;
|
||||
|
||||
range, description = line.split(/;/)
|
||||
range.strip!
|
||||
description.gsub!(/.*#/, '').strip!
|
||||
|
||||
if range =~ /\.\./
|
||||
start, stop = range.split '..'
|
||||
else start = stop = range
|
||||
end
|
||||
|
||||
yield start.hex .. stop.hex, description
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
###
|
||||
# Formats to hex at minimum width
|
||||
|
||||
def to_hex( n )
|
||||
r = "%0X" % n
|
||||
r = "0#{r}" unless (r.length % 2).zero?
|
||||
r
|
||||
end
|
||||
|
||||
###
|
||||
# UCS4 is just a straight hex conversion of the unicode codepoint.
|
||||
|
||||
def to_ucs4( range )
|
||||
rangestr = "0x" + to_hex(range.begin)
|
||||
rangestr << "..0x" + to_hex(range.end) if range.begin != range.end
|
||||
[ rangestr ]
|
||||
end
|
||||
|
||||
##
|
||||
# 0x00 - 0x7f -> 0zzzzzzz[7]
|
||||
# 0x80 - 0x7ff -> 110yyyyy[5] 10zzzzzz[6]
|
||||
# 0x800 - 0xffff -> 1110xxxx[4] 10yyyyyy[6] 10zzzzzz[6]
|
||||
# 0x010000 - 0x10ffff -> 11110www[3] 10xxxxxx[6] 10yyyyyy[6] 10zzzzzz[6]
|
||||
|
||||
UTF8_BOUNDARIES = [0x7f, 0x7ff, 0xffff, 0x10ffff]
|
||||
|
||||
def to_utf8_enc( n )
|
||||
r = 0
|
||||
if n <= 0x7f
|
||||
r = n
|
||||
elsif n <= 0x7ff
|
||||
y = 0xc0 | (n >> 6)
|
||||
z = 0x80 | (n & 0x3f)
|
||||
r = y << 8 | z
|
||||
elsif n <= 0xffff
|
||||
x = 0xe0 | (n >> 12)
|
||||
y = 0x80 | (n >> 6) & 0x3f
|
||||
z = 0x80 | n & 0x3f
|
||||
r = x << 16 | y << 8 | z
|
||||
elsif n <= 0x10ffff
|
||||
w = 0xf0 | (n >> 18)
|
||||
x = 0x80 | (n >> 12) & 0x3f
|
||||
y = 0x80 | (n >> 6) & 0x3f
|
||||
z = 0x80 | n & 0x3f
|
||||
r = w << 24 | x << 16 | y << 8 | z
|
||||
end
|
||||
|
||||
to_hex(r)
|
||||
end
|
||||
|
||||
def from_utf8_enc( n )
|
||||
n = n.hex
|
||||
r = 0
|
||||
if n <= 0x7f
|
||||
r = n
|
||||
elsif n <= 0xdfff
|
||||
y = (n >> 8) & 0x1f
|
||||
z = n & 0x3f
|
||||
r = y << 6 | z
|
||||
elsif n <= 0xefffff
|
||||
x = (n >> 16) & 0x0f
|
||||
y = (n >> 8) & 0x3f
|
||||
z = n & 0x3f
|
||||
r = x << 10 | y << 6 | z
|
||||
elsif n <= 0xf7ffffff
|
||||
w = (n >> 24) & 0x07
|
||||
x = (n >> 16) & 0x3f
|
||||
y = (n >> 8) & 0x3f
|
||||
z = n & 0x3f
|
||||
r = w << 18 | x << 12 | y << 6 | z
|
||||
end
|
||||
r
|
||||
end
|
||||
|
||||
###
|
||||
# Given a range, splits it up into ranges that can be continuously
|
||||
# encoded into utf8. Eg: 0x00 .. 0xff => [0x00..0x7f, 0x80..0xff]
|
||||
# This is not strictly needed since the current [5.1] unicode standard
|
||||
# doesn't have ranges that straddle utf8 boundaries. This is included
|
||||
# for completeness as there is no telling if that will ever change.
|
||||
|
||||
def utf8_ranges( range )
|
||||
ranges = []
|
||||
UTF8_BOUNDARIES.each do |max|
|
||||
if range.begin <= max
|
||||
return ranges << range if range.end <= max
|
||||
|
||||
ranges << range.begin .. max
|
||||
range = (max + 1) .. range.end
|
||||
end
|
||||
end
|
||||
ranges
|
||||
end
|
||||
|
||||
def build_range( start, stop )
|
||||
size = start.size/2
|
||||
left = size - 1
|
||||
return [""] if size < 1
|
||||
|
||||
a = start[0..1]
|
||||
b = stop[0..1]
|
||||
|
||||
###
|
||||
# Shared prefix
|
||||
|
||||
if a == b
|
||||
return build_range(start[2..-1], stop[2..-1]).map do |elt|
|
||||
"0x#{a} " + elt
|
||||
end
|
||||
end
|
||||
|
||||
###
|
||||
# Unshared prefix, end of run
|
||||
|
||||
return ["0x#{a}..0x#{b} "] if left.zero?
|
||||
|
||||
###
|
||||
# Unshared prefix, not end of run
|
||||
# Range can be 0x123456..0x56789A
|
||||
# Which is equivalent to:
|
||||
# 0x123456 .. 0x12FFFF
|
||||
# 0x130000 .. 0x55FFFF
|
||||
# 0x560000 .. 0x56789A
|
||||
|
||||
ret = []
|
||||
ret << build_range(start, a + "FF" * left)
|
||||
|
||||
###
|
||||
# Only generate middle range if need be.
|
||||
|
||||
if a.hex+1 != b.hex
|
||||
max = to_hex(b.hex - 1)
|
||||
max = "FF" if b == "FF"
|
||||
ret << "0x#{to_hex(a.hex+1)}..0x#{max} " + "0x00..0xFF " * left
|
||||
end
|
||||
|
||||
###
|
||||
# Don't generate last range if it is covered by first range
|
||||
|
||||
ret << build_range(b + "00" * left, stop) unless b == "FF"
|
||||
ret.flatten!
|
||||
end
|
||||
|
||||
def to_utf8( range )
|
||||
utf8_ranges( range ).map do |r|
|
||||
build_range to_utf8_enc(r.begin), to_utf8_enc(r.end)
|
||||
end.flatten!
|
||||
end
|
||||
|
||||
##
|
||||
# Perform a 3-way comparison of the number of codepoints advertised by
|
||||
# the unicode spec for the given range, the originally parsed range,
|
||||
# and the resulting utf8 encoded range.
|
||||
|
||||
def count_codepoints( code )
|
||||
code.split(' ').inject(1) do |acc, elt|
|
||||
if elt =~ /0x(.+)\.\.0x(.+)/
|
||||
if @encoding == :utf8
|
||||
acc * (from_utf8_enc($2) - from_utf8_enc($1) + 1)
|
||||
else
|
||||
acc * ($2.hex - $1.hex + 1)
|
||||
end
|
||||
else
|
||||
acc
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def is_valid?( range, desc, codes )
|
||||
spec_count = 1
|
||||
spec_count = $1.to_i if desc =~ /\[(\d+)\]/
|
||||
range_count = range.end - range.begin + 1
|
||||
|
||||
sum = codes.inject(0) { |acc, elt| acc + count_codepoints(elt) }
|
||||
sum == spec_count and sum == range_count
|
||||
end
|
||||
|
||||
##
|
||||
# Generate the state maching to stdout
|
||||
|
||||
def generate_machine( name, property )
|
||||
pipe = " "
|
||||
@output.puts " #{name} = "
|
||||
each_alpha( @chart_url, property ) do |range, desc|
|
||||
|
||||
codes = (@encoding == :ucs4) ? to_ucs4(range) : to_utf8(range)
|
||||
|
||||
raise "Invalid encoding of range #{range}: #{codes.inspect}" unless
|
||||
is_valid? range, desc, codes
|
||||
|
||||
range_width = codes.map { |a| a.size }.max
|
||||
range_width = RANGE_WIDTH if range_width < RANGE_WIDTH
|
||||
|
||||
desc_width = TOTAL_WIDTH - RANGE_WIDTH - 11
|
||||
desc_width -= (range_width - RANGE_WIDTH) if range_width > RANGE_WIDTH
|
||||
|
||||
if desc.size > desc_width
|
||||
desc = desc[0..desc_width - 4] + "..."
|
||||
end
|
||||
|
||||
codes.each_with_index do |r, idx|
|
||||
desc = "" unless idx.zero?
|
||||
code = "%-#{range_width}s" % r
|
||||
@output.puts " #{pipe} #{code} ##{desc}"
|
||||
pipe = "|"
|
||||
end
|
||||
end
|
||||
@output.puts " ;"
|
||||
@output.puts ""
|
||||
end
|
||||
|
||||
@output.puts <<EOF
|
||||
# The following Ragel file was autogenerated with #{$0}
|
||||
# from: #{@chart_url}
|
||||
#
|
||||
# It defines #{properties}.
|
||||
#
|
||||
# To use this, make sure that your alphtype is set to #{ALPHTYPES[@encoding]},
|
||||
# and that your input is in #{@encoding}.
|
||||
|
||||
%%{
|
||||
machine #{machine_name};
|
||||
|
||||
EOF
|
||||
|
||||
properties.each { |x| generate_machine( x, x ) }
|
||||
|
||||
@output.puts <<EOF
|
||||
}%%
|
||||
EOF
|
101
vendor/github.com/blevesearch/segment/ragel/uscript.rl
generated
vendored
101
vendor/github.com/blevesearch/segment/ragel/uscript.rl
generated
vendored
|
@ -1,101 +0,0 @@
|
|||
# The following Ragel file was autogenerated with ragel/unicode2ragel.rb
|
||||
# from: http://www.unicode.org/Public/8.0.0/ucd/Scripts.txt
|
||||
#
|
||||
# It defines ["Hangul", "Han", "Hiragana"].
|
||||
#
|
||||
# To use this, make sure that your alphtype is set to unsigned char,
|
||||
# and that your input is in utf8.
|
||||
|
||||
%%{
|
||||
machine SCRIPTS;
|
||||
|
||||
Hangul =
|
||||
0xE1 0x84 0x80..0xFF #Lo [256] HANGUL CHOSEONG KIYEOK..HANGUL...
|
||||
| 0xE1 0x85..0x86 0x00..0xFF #
|
||||
| 0xE1 0x87 0x00..0xBF #
|
||||
| 0xE3 0x80 0xAE..0xAF #Mc [2] HANGUL SINGLE DOT TONE MARK..HANGU...
|
||||
| 0xE3 0x84 0xB1..0xFF #Lo [94] HANGUL LETTER KIYEOK..HANGUL L...
|
||||
| 0xE3 0x85..0x85 0x00..0xFF #
|
||||
| 0xE3 0x86 0x00..0x8E #
|
||||
| 0xE3 0x88 0x80..0x9E #So [31] PARENTHESIZED HANGUL KIYEOK..PAREN...
|
||||
| 0xE3 0x89 0xA0..0xBE #So [31] CIRCLED HANGUL KIYEOK..CIRCLED HAN...
|
||||
| 0xEA 0xA5 0xA0..0xBC #Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANG...
|
||||
| 0xEA 0xB0 0x80..0xFF #Lo [11172] HANGUL SYLLABLE GA..HA...
|
||||
| 0xEA 0xB1..0xFF 0x00..0xFF #
|
||||
| 0xEB..0xEC 0x00..0xFF 0x00..0xFF #
|
||||
| 0xED 0x00 0x00..0xFF #
|
||||
| 0xED 0x01..0x9D 0x00..0xFF #
|
||||
| 0xED 0x9E 0x00..0xA3 #
|
||||
| 0xED 0x9E 0xB0..0xFF #Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUN...
|
||||
| 0xED 0x9F 0x00..0x86 #
|
||||
| 0xED 0x9F 0x8B..0xBB #Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANG...
|
||||
| 0xEF 0xBE 0xA0..0xBE #Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH...
|
||||
| 0xEF 0xBF 0x82..0x87 #Lo [6] HALFWIDTH HANGUL LETTER A..HALFWID...
|
||||
| 0xEF 0xBF 0x8A..0x8F #Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFW...
|
||||
| 0xEF 0xBF 0x92..0x97 #Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWI...
|
||||
| 0xEF 0xBF 0x9A..0x9C #Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWI...
|
||||
;
|
||||
|
||||
Han =
|
||||
0xE2 0xBA 0x80..0x99 #So [26] CJK RADICAL REPEAT..CJK RADICAL RAP
|
||||
| 0xE2 0xBA 0x9B..0xFF #So [89] CJK RADICAL CHOKE..CJK RADICAL C-S...
|
||||
| 0xE2 0xBB 0x00..0xB3 #
|
||||
| 0xE2 0xBC 0x80..0xFF #So [214] KANGXI RADICAL ONE..KANGXI RAD...
|
||||
| 0xE2 0xBD..0xBE 0x00..0xFF #
|
||||
| 0xE2 0xBF 0x00..0x95 #
|
||||
| 0xE3 0x80 0x85 #Lm IDEOGRAPHIC ITERATION MARK
|
||||
| 0xE3 0x80 0x87 #Nl IDEOGRAPHIC NUMBER ZERO
|
||||
| 0xE3 0x80 0xA1..0xA9 #Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUM...
|
||||
| 0xE3 0x80 0xB8..0xBA #Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUM...
|
||||
| 0xE3 0x80 0xBB #Lm VERTICAL IDEOGRAPHIC ITERATION MARK
|
||||
| 0xE3 0x90 0x80..0xFF #Lo [6582] CJK UNIFIED IDEOGRAPH-3400..C...
|
||||
| 0xE3 0x91..0xFF 0x00..0xFF #
|
||||
| 0xE4 0x00 0x00..0xFF #
|
||||
| 0xE4 0x01..0xB5 0x00..0xFF #
|
||||
| 0xE4 0xB6 0x00..0xB5 #
|
||||
| 0xE4 0xB8 0x80..0xFF #Lo [20950] CJK UNIFIED IDEOGRAPH-...
|
||||
| 0xE4 0xB9..0xFF 0x00..0xFF #
|
||||
| 0xE5..0xE8 0x00..0xFF 0x00..0xFF #
|
||||
| 0xE9 0x00 0x00..0xFF #
|
||||
| 0xE9 0x01..0xBE 0x00..0xFF #
|
||||
| 0xE9 0xBF 0x00..0x95 #
|
||||
| 0xEF 0xA4 0x80..0xFF #Lo [366] CJK COMPATIBILITY IDEOGRAPH-F9...
|
||||
| 0xEF 0xA5..0xA8 0x00..0xFF #
|
||||
| 0xEF 0xA9 0x00..0xAD #
|
||||
| 0xEF 0xA9 0xB0..0xFF #Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA...
|
||||
| 0xEF 0xAA..0xAA 0x00..0xFF #
|
||||
| 0xEF 0xAB 0x00..0x99 #
|
||||
| 0xF0 0xA0 0x80 0x80..0xFF #Lo [42711] CJK UNIFIED IDEOG...
|
||||
| 0xF0 0xA0 0x81..0xFF 0x00..0xFF #
|
||||
| 0xF0 0xA1..0xA9 0x00..0xFF 0x00..0xFF #
|
||||
| 0xF0 0xAA 0x00 0x00..0xFF #
|
||||
| 0xF0 0xAA 0x01..0x9A 0x00..0xFF #
|
||||
| 0xF0 0xAA 0x9B 0x00..0x96 #
|
||||
| 0xF0 0xAA 0x9C 0x80..0xFF #Lo [4149] CJK UNIFIED IDEOGRAPH-2A...
|
||||
| 0xF0 0xAA 0x9D..0xFF 0x00..0xFF #
|
||||
| 0xF0 0xAB 0x00 0x00..0xFF #
|
||||
| 0xF0 0xAB 0x01..0x9B 0x00..0xFF #
|
||||
| 0xF0 0xAB 0x9C 0x00..0xB4 #
|
||||
| 0xF0 0xAB 0x9D 0x80..0xFF #Lo [222] CJK UNIFIED IDEOGRAPH-2B7...
|
||||
| 0xF0 0xAB 0x9E..0x9F 0x00..0xFF #
|
||||
| 0xF0 0xAB 0xA0 0x00..0x9D #
|
||||
| 0xF0 0xAB 0xA0 0xA0..0xFF #Lo [5762] CJK UNIFIED IDEOGRAPH-2B...
|
||||
| 0xF0 0xAB 0xA1..0xFF 0x00..0xFF #
|
||||
| 0xF0 0xAC 0x00 0x00..0xFF #
|
||||
| 0xF0 0xAC 0x01..0xB9 0x00..0xFF #
|
||||
| 0xF0 0xAC 0xBA 0x00..0xA1 #
|
||||
| 0xF0 0xAF 0xA0 0x80..0xFF #Lo [542] CJK COMPATIBILITY IDEOGRA...
|
||||
| 0xF0 0xAF 0xA1..0xA7 0x00..0xFF #
|
||||
| 0xF0 0xAF 0xA8 0x00..0x9D #
|
||||
;
|
||||
|
||||
Hiragana =
|
||||
0xE3 0x81 0x81..0xFF #Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA ...
|
||||
| 0xE3 0x82 0x00..0x96 #
|
||||
| 0xE3 0x82 0x9D..0x9E #Lm [2] HIRAGANA ITERATION MARK..HIRAGANA ...
|
||||
| 0xE3 0x82 0x9F #Lo HIRAGANA DIGRAPH YORI
|
||||
| 0xF0 0x9B 0x80 0x81 #Lo HIRAGANA LETTER ARCHAIC YE
|
||||
| 0xF0 0x9F 0x88 0x80 #So SQUARE HIRAGANA HOKA
|
||||
;
|
||||
|
||||
}%%
|
1290
vendor/github.com/blevesearch/segment/ragel/uwb.rl
generated
vendored
1290
vendor/github.com/blevesearch/segment/ragel/uwb.rl
generated
vendored
File diff suppressed because it is too large
Load diff
29
vendor/github.com/blevesearch/segment/segment_fuzz_test.go
generated
vendored
29
vendor/github.com/blevesearch/segment/segment_fuzz_test.go
generated
vendored
|
@ -1,29 +0,0 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
// +build gofuzz_generate
|
||||
|
||||
package segment
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"strconv"
|
||||
"testing"
|
||||
)
|
||||
|
||||
const fuzzPrefix = "workdir/corpus"
|
||||
|
||||
func TestGenerateWordSegmentFuzz(t *testing.T) {
|
||||
|
||||
os.MkdirAll(fuzzPrefix, 0777)
|
||||
for i, test := range unicodeWordTests {
|
||||
ioutil.WriteFile(fuzzPrefix+"/"+strconv.Itoa(i)+".txt", test.input, 0777)
|
||||
}
|
||||
}
|
241
vendor/github.com/blevesearch/segment/segment_test.go
generated
vendored
241
vendor/github.com/blevesearch/segment/segment_test.go
generated
vendored
|
@ -1,241 +0,0 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package segment
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"errors"
|
||||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// Tests borrowed from Scanner to test Segmenter
|
||||
|
||||
// slowReader is a reader that returns only a few bytes at a time, to test the incremental
|
||||
// reads in Scanner.Scan.
|
||||
type slowReader struct {
|
||||
max int
|
||||
buf io.Reader
|
||||
}
|
||||
|
||||
func (sr *slowReader) Read(p []byte) (n int, err error) {
|
||||
if len(p) > sr.max {
|
||||
p = p[0:sr.max]
|
||||
}
|
||||
return sr.buf.Read(p)
|
||||
}
|
||||
|
||||
// genLine writes to buf a predictable but non-trivial line of text of length
|
||||
// n, including the terminal newline and an occasional carriage return.
|
||||
// If addNewline is false, the \r and \n are not emitted.
|
||||
func genLine(buf *bytes.Buffer, lineNum, n int, addNewline bool) {
|
||||
buf.Reset()
|
||||
doCR := lineNum%5 == 0
|
||||
if doCR {
|
||||
n--
|
||||
}
|
||||
for i := 0; i < n-1; i++ { // Stop early for \n.
|
||||
c := 'a' + byte(lineNum+i)
|
||||
if c == '\n' || c == '\r' { // Don't confuse us.
|
||||
c = 'N'
|
||||
}
|
||||
buf.WriteByte(c)
|
||||
}
|
||||
if addNewline {
|
||||
if doCR {
|
||||
buf.WriteByte('\r')
|
||||
}
|
||||
buf.WriteByte('\n')
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func wrapSplitFuncAsSegmentFuncForTesting(splitFunc bufio.SplitFunc) SegmentFunc {
|
||||
return func(data []byte, atEOF bool) (advance int, token []byte, typ int, err error) {
|
||||
typ = 0
|
||||
advance, token, err = splitFunc(data, atEOF)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Test that the line segmenter errors out on a long line.
|
||||
func TestSegmentTooLong(t *testing.T) {
|
||||
const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
|
||||
// Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.
|
||||
tmp := new(bytes.Buffer)
|
||||
buf := new(bytes.Buffer)
|
||||
lineNum := 0
|
||||
j := 0
|
||||
for i := 0; i < 2*smallMaxTokenSize; i++ {
|
||||
genLine(tmp, lineNum, j, true)
|
||||
j++
|
||||
buf.Write(tmp.Bytes())
|
||||
lineNum++
|
||||
}
|
||||
s := NewSegmenter(&slowReader{3, buf})
|
||||
// change to line segmenter for testing
|
||||
s.SetSegmenter(wrapSplitFuncAsSegmentFuncForTesting(bufio.ScanLines))
|
||||
s.MaxTokenSize(smallMaxTokenSize)
|
||||
j = 0
|
||||
for lineNum := 0; s.Segment(); lineNum++ {
|
||||
genLine(tmp, lineNum, j, false)
|
||||
if j < smallMaxTokenSize {
|
||||
j++
|
||||
} else {
|
||||
j--
|
||||
}
|
||||
line := tmp.Bytes()
|
||||
if !bytes.Equal(s.Bytes(), line) {
|
||||
t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Bytes(), line)
|
||||
}
|
||||
}
|
||||
err := s.Err()
|
||||
if err != ErrTooLong {
|
||||
t.Fatalf("expected ErrTooLong; got %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
var testError = errors.New("testError")
|
||||
|
||||
// Test the correct error is returned when the split function errors out.
|
||||
func TestSegmentError(t *testing.T) {
|
||||
// Create a split function that delivers a little data, then a predictable error.
|
||||
numSplits := 0
|
||||
const okCount = 7
|
||||
errorSplit := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
||||
if atEOF {
|
||||
panic("didn't get enough data")
|
||||
}
|
||||
if numSplits >= okCount {
|
||||
return 0, nil, testError
|
||||
}
|
||||
numSplits++
|
||||
return 1, data[0:1], nil
|
||||
}
|
||||
// Read the data.
|
||||
const text = "abcdefghijklmnopqrstuvwxyz"
|
||||
buf := strings.NewReader(text)
|
||||
s := NewSegmenter(&slowReader{1, buf})
|
||||
// change to line segmenter for testing
|
||||
s.SetSegmenter(wrapSplitFuncAsSegmentFuncForTesting(errorSplit))
|
||||
var i int
|
||||
for i = 0; s.Segment(); i++ {
|
||||
if len(s.Bytes()) != 1 || text[i] != s.Bytes()[0] {
|
||||
t.Errorf("#%d: expected %q got %q", i, text[i], s.Bytes()[0])
|
||||
}
|
||||
}
|
||||
// Check correct termination location and error.
|
||||
if i != okCount {
|
||||
t.Errorf("unexpected termination; expected %d tokens got %d", okCount, i)
|
||||
}
|
||||
err := s.Err()
|
||||
if err != testError {
|
||||
t.Fatalf("expected %q got %v", testError, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Test that Scan finishes if we have endless empty reads.
|
||||
type endlessZeros struct{}
|
||||
|
||||
func (endlessZeros) Read(p []byte) (int, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func TestBadReader(t *testing.T) {
|
||||
scanner := NewSegmenter(endlessZeros{})
|
||||
for scanner.Segment() {
|
||||
t.Fatal("read should fail")
|
||||
}
|
||||
err := scanner.Err()
|
||||
if err != io.ErrNoProgress {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSegmentAdvanceNegativeError(t *testing.T) {
|
||||
errorSplit := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
||||
if atEOF {
|
||||
panic("didn't get enough data")
|
||||
}
|
||||
return -1, data[0:1], nil
|
||||
}
|
||||
// Read the data.
|
||||
const text = "abcdefghijklmnopqrstuvwxyz"
|
||||
buf := strings.NewReader(text)
|
||||
s := NewSegmenter(&slowReader{1, buf})
|
||||
// change to line segmenter for testing
|
||||
s.SetSegmenter(wrapSplitFuncAsSegmentFuncForTesting(errorSplit))
|
||||
s.Segment()
|
||||
err := s.Err()
|
||||
if err != ErrNegativeAdvance {
|
||||
t.Fatalf("expected %q got %v", testError, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSegmentAdvanceTooFarError(t *testing.T) {
|
||||
errorSplit := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
||||
if atEOF {
|
||||
panic("didn't get enough data")
|
||||
}
|
||||
return len(data) + 10, data[0:1], nil
|
||||
}
|
||||
// Read the data.
|
||||
const text = "abcdefghijklmnopqrstuvwxyz"
|
||||
buf := strings.NewReader(text)
|
||||
s := NewSegmenter(&slowReader{1, buf})
|
||||
// change to line segmenter for testing
|
||||
s.SetSegmenter(wrapSplitFuncAsSegmentFuncForTesting(errorSplit))
|
||||
s.Segment()
|
||||
err := s.Err()
|
||||
if err != ErrAdvanceTooFar {
|
||||
t.Fatalf("expected %q got %v", testError, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSegmentLongTokens(t *testing.T) {
|
||||
// Read the data.
|
||||
text := bytes.Repeat([]byte("abcdefghijklmnop"), 257)
|
||||
buf := strings.NewReader(string(text))
|
||||
s := NewSegmenter(&slowReader{1, buf})
|
||||
// change to line segmenter for testing
|
||||
s.SetSegmenter(wrapSplitFuncAsSegmentFuncForTesting(bufio.ScanLines))
|
||||
for s.Segment() {
|
||||
line := s.Bytes()
|
||||
if !bytes.Equal(text, line) {
|
||||
t.Errorf("expected %s, got %s", text, line)
|
||||
}
|
||||
}
|
||||
err := s.Err()
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error; got %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSegmentLongTokensDontDouble(t *testing.T) {
|
||||
// Read the data.
|
||||
text := bytes.Repeat([]byte("abcdefghijklmnop"), 257)
|
||||
buf := strings.NewReader(string(text))
|
||||
s := NewSegmenter(&slowReader{1, buf})
|
||||
// change to line segmenter for testing
|
||||
s.SetSegmenter(wrapSplitFuncAsSegmentFuncForTesting(bufio.ScanLines))
|
||||
s.MaxTokenSize(6144)
|
||||
for s.Segment() {
|
||||
line := s.Bytes()
|
||||
if !bytes.Equal(text, line) {
|
||||
t.Errorf("expected %s, got %s", text, line)
|
||||
}
|
||||
}
|
||||
err := s.Err()
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error; got %s", err)
|
||||
}
|
||||
}
|
445
vendor/github.com/blevesearch/segment/segment_words_test.go
generated
vendored
445
vendor/github.com/blevesearch/segment/segment_words_test.go
generated
vendored
|
@ -1,445 +0,0 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package segment
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestAdhocSegmentsWithType(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
input []byte
|
||||
output [][]byte
|
||||
outputStrings []string
|
||||
outputTypes []int
|
||||
}{
|
||||
{
|
||||
input: []byte("Now is the.\n End."),
|
||||
output: [][]byte{
|
||||
[]byte("Now"),
|
||||
[]byte(" "),
|
||||
[]byte(" "),
|
||||
[]byte("is"),
|
||||
[]byte(" "),
|
||||
[]byte("the"),
|
||||
[]byte("."),
|
||||
[]byte("\n"),
|
||||
[]byte(" "),
|
||||
[]byte("End"),
|
||||
[]byte("."),
|
||||
},
|
||||
outputStrings: []string{
|
||||
"Now",
|
||||
" ",
|
||||
" ",
|
||||
"is",
|
||||
" ",
|
||||
"the",
|
||||
".",
|
||||
"\n",
|
||||
" ",
|
||||
"End",
|
||||
".",
|
||||
},
|
||||
outputTypes: []int{
|
||||
Letter,
|
||||
None,
|
||||
None,
|
||||
Letter,
|
||||
None,
|
||||
Letter,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Letter,
|
||||
None,
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("3.5"),
|
||||
output: [][]byte{
|
||||
[]byte("3.5"),
|
||||
},
|
||||
outputStrings: []string{
|
||||
"3.5",
|
||||
},
|
||||
outputTypes: []int{
|
||||
Number,
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("cat3.5"),
|
||||
output: [][]byte{
|
||||
[]byte("cat3.5"),
|
||||
},
|
||||
outputStrings: []string{
|
||||
"cat3.5",
|
||||
},
|
||||
outputTypes: []int{
|
||||
Letter,
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("c"),
|
||||
output: [][]byte{
|
||||
[]byte("c"),
|
||||
},
|
||||
outputStrings: []string{
|
||||
"c",
|
||||
},
|
||||
outputTypes: []int{
|
||||
Letter,
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("こんにちは世界"),
|
||||
output: [][]byte{
|
||||
[]byte("こ"),
|
||||
[]byte("ん"),
|
||||
[]byte("に"),
|
||||
[]byte("ち"),
|
||||
[]byte("は"),
|
||||
[]byte("世"),
|
||||
[]byte("界"),
|
||||
},
|
||||
outputStrings: []string{
|
||||
"こ",
|
||||
"ん",
|
||||
"に",
|
||||
"ち",
|
||||
"は",
|
||||
"世",
|
||||
"界",
|
||||
},
|
||||
outputTypes: []int{
|
||||
Ideo,
|
||||
Ideo,
|
||||
Ideo,
|
||||
Ideo,
|
||||
Ideo,
|
||||
Ideo,
|
||||
Ideo,
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("你好世界"),
|
||||
output: [][]byte{
|
||||
[]byte("你"),
|
||||
[]byte("好"),
|
||||
[]byte("世"),
|
||||
[]byte("界"),
|
||||
},
|
||||
outputStrings: []string{
|
||||
"你",
|
||||
"好",
|
||||
"世",
|
||||
"界",
|
||||
},
|
||||
outputTypes: []int{
|
||||
Ideo,
|
||||
Ideo,
|
||||
Ideo,
|
||||
Ideo,
|
||||
},
|
||||
},
|
||||
{
|
||||
input: []byte("サッカ"),
|
||||
output: [][]byte{
|
||||
[]byte("サッカ"),
|
||||
},
|
||||
outputStrings: []string{
|
||||
"サッカ",
|
||||
},
|
||||
outputTypes: []int{
|
||||
Ideo,
|
||||
},
|
||||
},
|
||||
// test for wb7b/wb7c
|
||||
{
|
||||
input: []byte(`א"א`),
|
||||
output: [][]byte{
|
||||
[]byte(`א"א`),
|
||||
},
|
||||
outputStrings: []string{
|
||||
`א"א`,
|
||||
},
|
||||
outputTypes: []int{
|
||||
Letter,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
rv := make([][]byte, 0)
|
||||
rvstrings := make([]string, 0)
|
||||
rvtypes := make([]int, 0)
|
||||
segmenter := NewWordSegmenter(bytes.NewReader(test.input))
|
||||
// Set the split function for the scanning operation.
|
||||
for segmenter.Segment() {
|
||||
rv = append(rv, segmenter.Bytes())
|
||||
rvstrings = append(rvstrings, segmenter.Text())
|
||||
rvtypes = append(rvtypes, segmenter.Type())
|
||||
}
|
||||
if err := segmenter.Err(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if !reflect.DeepEqual(rv, test.output) {
|
||||
t.Fatalf("expected:\n%#v\ngot:\n%#v\nfor: '%s'", test.output, rv, test.input)
|
||||
}
|
||||
if !reflect.DeepEqual(rvstrings, test.outputStrings) {
|
||||
t.Fatalf("expected:\n%#v\ngot:\n%#v\nfor: '%s'", test.outputStrings, rvstrings, test.input)
|
||||
}
|
||||
if !reflect.DeepEqual(rvtypes, test.outputTypes) {
|
||||
t.Fatalf("expeced:\n%#v\ngot:\n%#v\nfor: '%s'", test.outputTypes, rvtypes, test.input)
|
||||
}
|
||||
}
|
||||
|
||||
// run same tests again with direct
|
||||
for _, test := range tests {
|
||||
rv := make([][]byte, 0)
|
||||
rvstrings := make([]string, 0)
|
||||
rvtypes := make([]int, 0)
|
||||
segmenter := NewWordSegmenterDirect(test.input)
|
||||
// Set the split function for the scanning operation.
|
||||
for segmenter.Segment() {
|
||||
rv = append(rv, segmenter.Bytes())
|
||||
rvstrings = append(rvstrings, segmenter.Text())
|
||||
rvtypes = append(rvtypes, segmenter.Type())
|
||||
}
|
||||
if err := segmenter.Err(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if !reflect.DeepEqual(rv, test.output) {
|
||||
t.Fatalf("expected:\n%#v\ngot:\n%#v\nfor: '%s'", test.output, rv, test.input)
|
||||
}
|
||||
if !reflect.DeepEqual(rvstrings, test.outputStrings) {
|
||||
t.Fatalf("expected:\n%#v\ngot:\n%#v\nfor: '%s'", test.outputStrings, rvstrings, test.input)
|
||||
}
|
||||
if !reflect.DeepEqual(rvtypes, test.outputTypes) {
|
||||
t.Fatalf("expeced:\n%#v\ngot:\n%#v\nfor: '%s'", test.outputTypes, rvtypes, test.input)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestUnicodeSegments(t *testing.T) {
|
||||
|
||||
for _, test := range unicodeWordTests {
|
||||
rv := make([][]byte, 0)
|
||||
scanner := bufio.NewScanner(bytes.NewReader(test.input))
|
||||
// Set the split function for the scanning operation.
|
||||
scanner.Split(SplitWords)
|
||||
for scanner.Scan() {
|
||||
rv = append(rv, scanner.Bytes())
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if !reflect.DeepEqual(rv, test.output) {
|
||||
t.Fatalf("expected:\n%#v\ngot:\n%#v\nfor: '%s' comment: %s", test.output, rv, test.input, test.comment)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnicodeSegmentsSlowReader(t *testing.T) {
|
||||
|
||||
for i, test := range unicodeWordTests {
|
||||
rv := make([][]byte, 0)
|
||||
segmenter := NewWordSegmenter(&slowReader{1, bytes.NewReader(test.input)})
|
||||
for segmenter.Segment() {
|
||||
rv = append(rv, segmenter.Bytes())
|
||||
}
|
||||
if err := segmenter.Err(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if !reflect.DeepEqual(rv, test.output) {
|
||||
t.Fatalf("expected:\n%#v\ngot:\n%#v\nfor: %d '%s' comment: %s", test.output, rv, i, test.input, test.comment)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestWordSegmentLongInputSlowReader(t *testing.T) {
|
||||
// Read the data.
|
||||
text := bytes.Repeat([]byte("abcdefghijklmnop"), 26)
|
||||
buf := strings.NewReader(string(text) + " cat")
|
||||
s := NewSegmenter(&slowReader{1, buf})
|
||||
s.MaxTokenSize(6144)
|
||||
for s.Segment() {
|
||||
}
|
||||
err := s.Err()
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error; got '%s'", err)
|
||||
}
|
||||
finalWord := s.Text()
|
||||
if s.Text() != "cat" {
|
||||
t.Errorf("expected 'cat' got '%s'", finalWord)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkSplitWords(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
vals := make([][]byte, 0)
|
||||
scanner := bufio.NewScanner(bytes.NewReader(bleveWikiArticle))
|
||||
scanner.Split(SplitWords)
|
||||
for scanner.Scan() {
|
||||
vals = append(vals, scanner.Bytes())
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
if len(vals) != 3465 {
|
||||
b.Fatalf("expected 3465 tokens, got %d", len(vals))
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func BenchmarkWordSegmenter(b *testing.B) {
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
vals := make([][]byte, 0)
|
||||
types := make([]int, 0)
|
||||
segmenter := NewWordSegmenter(bytes.NewReader(bleveWikiArticle))
|
||||
for segmenter.Segment() {
|
||||
vals = append(vals, segmenter.Bytes())
|
||||
types = append(types, segmenter.Type())
|
||||
}
|
||||
if err := segmenter.Err(); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
if vals == nil {
|
||||
b.Fatalf("expected non-nil vals")
|
||||
}
|
||||
if types == nil {
|
||||
b.Fatalf("expected non-nil types")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkWordSegmenterDirect(b *testing.B) {
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
vals := make([][]byte, 0)
|
||||
types := make([]int, 0)
|
||||
segmenter := NewWordSegmenterDirect(bleveWikiArticle)
|
||||
for segmenter.Segment() {
|
||||
vals = append(vals, segmenter.Bytes())
|
||||
types = append(types, segmenter.Type())
|
||||
}
|
||||
if err := segmenter.Err(); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
if vals == nil {
|
||||
b.Fatalf("expected non-nil vals")
|
||||
}
|
||||
if types == nil {
|
||||
b.Fatalf("expected non-nil types")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkDirect(b *testing.B) {
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
vals := make([][]byte, 0, 10000)
|
||||
types := make([]int, 0, 10000)
|
||||
vals, types, _, err := SegmentWordsDirect(bleveWikiArticle, vals, types)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
if vals == nil {
|
||||
b.Fatalf("expected non-nil vals")
|
||||
}
|
||||
if types == nil {
|
||||
b.Fatalf("expected non-nil types")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var bleveWikiArticle = []byte(`Boiling liquid expanding vapor explosion
|
||||
From Wikipedia, the free encyclopedia
|
||||
See also: Boiler explosion and Steam explosion
|
||||
|
||||
Flames subsequent to a flammable liquid BLEVE from a tanker. BLEVEs do not necessarily involve fire.
|
||||
|
||||
This article's tone or style may not reflect the encyclopedic tone used on Wikipedia. See Wikipedia's guide to writing better articles for suggestions. (July 2013)
|
||||
A boiling liquid expanding vapor explosion (BLEVE, /ˈblɛviː/ blev-ee) is an explosion caused by the rupture of a vessel containing a pressurized liquid above its boiling point.[1]
|
||||
Contents [hide]
|
||||
1 Mechanism
|
||||
1.1 Water example
|
||||
1.2 BLEVEs without chemical reactions
|
||||
2 Fires
|
||||
3 Incidents
|
||||
4 Safety measures
|
||||
5 See also
|
||||
6 References
|
||||
7 External links
|
||||
Mechanism[edit]
|
||||
|
||||
This section needs additional citations for verification. Please help improve this article by adding citations to reliable sources. Unsourced material may be challenged and removed. (July 2013)
|
||||
There are three characteristics of liquids which are relevant to the discussion of a BLEVE:
|
||||
If a liquid in a sealed container is boiled, the pressure inside the container increases. As the liquid changes to a gas it expands - this expansion in a vented container would cause the gas and liquid to take up more space. In a sealed container the gas and liquid are not able to take up more space and so the pressure rises. Pressurized vessels containing liquids can reach an equilibrium where the liquid stops boiling and the pressure stops rising. This occurs when no more heat is being added to the system (either because it has reached ambient temperature or has had a heat source removed).
|
||||
The boiling temperature of a liquid is dependent on pressure - high pressures will yield high boiling temperatures, and low pressures will yield low boiling temperatures. A common simple experiment is to place a cup of water in a vacuum chamber, and then reduce the pressure in the chamber until the water boils. By reducing the pressure the water will boil even at room temperature. This works both ways - if the pressure is increased beyond normal atmospheric pressures, the boiling of hot water could be suppressed far beyond normal temperatures. The cooling system of a modern internal combustion engine is a real-world example.
|
||||
When a liquid boils it turns into a gas. The resulting gas takes up far more space than the liquid did.
|
||||
Typically, a BLEVE starts with a container of liquid which is held above its normal, atmospheric-pressure boiling temperature. Many substances normally stored as liquids, such as CO2, propane, and other similar industrial gases have boiling temperatures, at atmospheric pressure, far below room temperature. In the case of water, a BLEVE could occur if a pressurized chamber of water is heated far beyond the standard 100 °C (212 °F). That container, because the boiling water pressurizes it, is capable of holding liquid water at very high temperatures.
|
||||
If the pressurized vessel, containing liquid at high temperature (which may be room temperature, depending on the substance) ruptures, the pressure which prevents the liquid from boiling is lost. If the rupture is catastrophic, where the vessel is immediately incapable of holding any pressure at all, then there suddenly exists a large mass of liquid which is at very high temperature and very low pressure. This causes the entire volume of liquid to instantaneously boil, which in turn causes an extremely rapid expansion. Depending on temperatures, pressures and the substance involved, that expansion may be so rapid that it can be classified as an explosion, fully capable of inflicting severe damage on its surroundings.
|
||||
Water example[edit]
|
||||
Imagine, for example, a tank of pressurized liquid water held at 204.4 °C (400 °F). This tank would normally be pressurized to 1.7 MPa (250 psi) above atmospheric ("gauge") pressure. If the tank containing the water were to rupture, there would for a slight moment exist a volume of liquid water which would be
|
||||
at atmospheric pressure, and
|
||||
204.4 °C (400 °F).
|
||||
At atmospheric pressure the boiling point of water is 100 °C (212 °F) - liquid water at atmospheric pressure cannot exist at temperatures higher than 100 °C (212 °F). At that moment, the water would boil and turn to vapour explosively, and the 204.4 °C (400 °F) liquid water turned to gas would take up a lot more volume than it did as liquid, causing a vapour explosion. Such explosions can happen when the superheated water of a steam engine escapes through a crack in a boiler, causing a boiler explosion.
|
||||
BLEVEs without chemical reactions[edit]
|
||||
It is important to note that a BLEVE need not be a chemical explosion—nor does there need to be a fire—however if a flammable substance is subject to a BLEVE it may also be subject to intense heating, either from an external source of heat which may have caused the vessel to rupture in the first place or from an internal source of localized heating such as skin friction. This heating can cause a flammable substance to ignite, adding a secondary explosion caused by the primary BLEVE. While blast effects of any BLEVE can be devastating, a flammable substance such as propane can add significantly to the danger.
|
||||
Bleve explosion.svg
|
||||
While the term BLEVE is most often used to describe the results of a container of flammable liquid rupturing due to fire, a BLEVE can occur even with a non-flammable substance such as water,[2] liquid nitrogen,[3] liquid helium or other refrigerants or cryogens, and therefore is not usually considered a type of chemical explosion.
|
||||
Fires[edit]
|
||||
BLEVEs can be caused by an external fire near the storage vessel causing heating of the contents and pressure build-up. While tanks are often designed to withstand great pressure, constant heating can cause the metal to weaken and eventually fail. If the tank is being heated in an area where there is no liquid, it may rupture faster without the liquid to absorb the heat. Gas containers are usually equipped with relief valves that vent off excess pressure, but the tank can still fail if the pressure is not released quickly enough.[1] Relief valves are sized to release pressure fast enough to prevent the pressure from increasing beyond the strength of the vessel, but not so fast as to be the cause of an explosion. An appropriately sized relief valve will allow the liquid inside to boil slowly, maintaining a constant pressure in the vessel until all the liquid has boiled and the vessel empties.
|
||||
If the substance involved is flammable, it is likely that the resulting cloud of the substance will ignite after the BLEVE has occurred, forming a fireball and possibly a fuel-air explosion, also termed a vapor cloud explosion (VCE). If the materials are toxic, a large area will be contaminated.[4]
|
||||
Incidents[edit]
|
||||
The term "BLEVE" was coined by three researchers at Factory Mutual, in the analysis of an accident there in 1957 involving a chemical reactor vessel.[5]
|
||||
In August 1959 the Kansas City Fire Department suffered its largest ever loss of life in the line of duty, when a 25,000 gallon (95,000 litre) gas tank exploded during a fire on Southwest Boulevard killing five firefighters. This was the first time BLEVE was used to describe a burning fuel tank.[citation needed]
|
||||
Later incidents included the Cheapside Street Whisky Bond Fire in Glasgow, Scotland in 1960; Feyzin, France in 1966; Crescent City, Illinois in 1970; Kingman, Arizona in 1973; a liquid nitrogen tank rupture[6] at Air Products and Chemicals and Mobay Chemical Company at New Martinsville, West Virginia on January 31, 1978 [1];Texas City, Texas in 1978; Murdock, Illinois in 1983; San Juan Ixhuatepec, Mexico City in 1984; and Toronto, Ontario in 2008.
|
||||
Safety measures[edit]
|
||||
[icon] This section requires expansion. (July 2013)
|
||||
Some fire mitigation measures are listed under liquefied petroleum gas.
|
||||
See also[edit]
|
||||
Boiler explosion
|
||||
Expansion ratio
|
||||
Explosive boiling or phase explosion
|
||||
Rapid phase transition
|
||||
Viareggio train derailment
|
||||
2008 Toronto explosions
|
||||
Gas carriers
|
||||
Los Alfaques Disaster
|
||||
Lac-Mégantic derailment
|
||||
References[edit]
|
||||
^ Jump up to: a b Kletz, Trevor (March 1990). Critical Aspects of Safety and Loss Prevention. London: Butterworth–Heinemann. pp. 43–45. ISBN 0-408-04429-2.
|
||||
Jump up ^ "Temperature Pressure Relief Valves on Water Heaters: test, inspect, replace, repair guide". Inspect-ny.com. Retrieved 2011-07-12.
|
||||
Jump up ^ Liquid nitrogen BLEVE demo
|
||||
Jump up ^ "Chemical Process Safety" (PDF). Retrieved 2011-07-12.
|
||||
Jump up ^ David F. Peterson, BLEVE: Facts, Risk Factors, and Fallacies, Fire Engineering magazine (2002).
|
||||
Jump up ^ "STATE EX REL. VAPOR CORP. v. NARICK". Supreme Court of Appeals of West Virginia. 1984-07-12. Retrieved 2014-03-16.
|
||||
External links[edit]
|
||||
Look up boiling liquid expanding vapor explosion in Wiktionary, the free dictionary.
|
||||
Wikimedia Commons has media related to BLEVE.
|
||||
BLEVE Demo on YouTube — video of a controlled BLEVE demo
|
||||
huge explosions on YouTube — video of propane and isobutane BLEVEs from a train derailment at Murdock, Illinois (3 September 1983)
|
||||
Propane BLEVE on YouTube — video of BLEVE from the Toronto propane depot fire
|
||||
Moscow Ring Road Accident on YouTube - Dozens of LPG tank BLEVEs after a road accident in Moscow
|
||||
Kingman, AZ BLEVE — An account of the 5 July 1973 explosion in Kingman, with photographs
|
||||
Propane Tank Explosions — Description of circumstances required to cause a propane tank BLEVE.
|
||||
Analysis of BLEVE Events at DOE Sites - Details physics and mathematics of BLEVEs.
|
||||
HID - SAFETY REPORT ASSESSMENT GUIDE: Whisky Maturation Warehouses - The liquor is aged in wooden barrels that can suffer BLEVE.
|
||||
Categories: ExplosivesFirefightingFireTypes of fireGas technologiesIndustrial fires and explosions`)
|
11994
vendor/github.com/blevesearch/segment/tables_test.go
generated
vendored
11994
vendor/github.com/blevesearch/segment/tables_test.go
generated
vendored
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue