Files
virtual-kubelet/vendor/github.com/cloudfoundry-incubator/candiedyaml/reader_test.go
2017-12-05 17:53:58 -06:00

292 lines
9.8 KiB
Go

/*
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package candiedyaml
import (
// "fmt"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
/*
* Test cases are stolen from
* http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
*/
type test_case struct {
title string
test string
result bool
}
var _ = Describe("Reader", func() {
LONG := 100000
Context("UTF8 Sequences", func() {
utf8_sequences := []test_case{
/* {"title", "test 1|test 2|...|test N!", (0 or 1)}, */
{"a simple test", "'test' is '\xd0\xbf\xd1\x80\xd0\xbe\xd0\xb2\xd0\xb5\xd1\x80\xd0\xba\xd0\xb0' in Russian!", true},
{"an empty line", "!", true},
{"u-0 is a control character", "\x00!", false},
{"u-80 is a control character", "\xc2\x80!", false},
{"u-800 is valid", "\xe0\xa0\x80!", true},
{"u-10000 is valid", "\xf0\x90\x80\x80!", true},
{"5 bytes sequences are not allowed", "\xf8\x88\x80\x80\x80!", false},
{"6 bytes sequences are not allowed", "\xfc\x84\x80\x80\x80\x80!", false},
{"u-7f is a control character", "\x7f!", false},
{"u-7FF is valid", "\xdf\xbf!", true},
{"u-FFFF is a control character", "\xef\xbf\xbf!", false},
{"u-1FFFFF is too large", "\xf7\xbf\xbf\xbf!", false},
{"u-3FFFFFF is 5 bytes", "\xfb\xbf\xbf\xbf\xbf!", false},
{"u-7FFFFFFF is 6 bytes", "\xfd\xbf\xbf\xbf\xbf\xbf!", false},
{"u-D7FF", "\xed\x9f\xbf!", true},
{"u-E000", "\xee\x80\x80!", true},
{"u-FFFD", "\xef\xbf\xbd!", true},
{"u-10FFFF", "\xf4\x8f\xbf\xbf!", true},
{"u-110000", "\xf4\x90\x80\x80!", false},
{"first continuation byte", "\x80!", false},
{"last continuation byte", "\xbf!", false},
{"2 continuation bytes", "\x80\xbf!", false},
{"3 continuation bytes", "\x80\xbf\x80!", false},
{"4 continuation bytes", "\x80\xbf\x80\xbf!", false},
{"5 continuation bytes", "\x80\xbf\x80\xbf\x80!", false},
{"6 continuation bytes", "\x80\xbf\x80\xbf\x80\xbf!", false},
{"7 continuation bytes", "\x80\xbf\x80\xbf\x80\xbf\x80!", false},
{"sequence of all 64 possible continuation bytes",
"\x80|\x81|\x82|\x83|\x84|\x85|\x86|\x87|\x88|\x89|\x8a|\x8b|\x8c|\x8d|\x8e|\x8f|" +
"\x90|\x91|\x92|\x93|\x94|\x95|\x96|\x97|\x98|\x99|\x9a|\x9b|\x9c|\x9d|\x9e|\x9f|" +
"\xa0|\xa1|\xa2|\xa3|\xa4|\xa5|\xa6|\xa7|\xa8|\xa9|\xaa|\xab|\xac|\xad|\xae|\xaf|" +
"\xb0|\xb1|\xb2|\xb3|\xb4|\xb5|\xb6|\xb7|\xb8|\xb9|\xba|\xbb|\xbc|\xbd|\xbe|\xbf!", false},
{"32 first bytes of 2-byte sequences {0xc0-0xdf}",
"\xc0 |\xc1 |\xc2 |\xc3 |\xc4 |\xc5 |\xc6 |\xc7 |\xc8 |\xc9 |\xca |\xcb |\xcc |\xcd |\xce |\xcf |" +
"\xd0 |\xd1 |\xd2 |\xd3 |\xd4 |\xd5 |\xd6 |\xd7 |\xd8 |\xd9 |\xda |\xdb |\xdc |\xdd |\xde |\xdf !", false},
{"16 first bytes of 3-byte sequences {0xe0-0xef}",
"\xe0 |\xe1 |\xe2 |\xe3 |\xe4 |\xe5 |\xe6 |\xe7 |\xe8 |\xe9 |\xea |\xeb |\xec |\xed |\xee |\xef !", false},
{"8 first bytes of 4-byte sequences {0xf0-0xf7}", "\xf0 |\xf1 |\xf2 |\xf3 |\xf4 |\xf5 |\xf6 |\xf7 !", false},
{"4 first bytes of 5-byte sequences {0xf8-0xfb}", "\xf8 |\xf9 |\xfa |\xfb !", false},
{"2 first bytes of 6-byte sequences {0xfc-0xfd}", "\xfc |\xfd !", false},
{"sequences with last byte missing {u-0}",
"\xc0|\xe0\x80|\xf0\x80\x80|\xf8\x80\x80\x80|\xfc\x80\x80\x80\x80!", false},
{"sequences with last byte missing {u-...FF}",
"\xdf|\xef\xbf|\xf7\xbf\xbf|\xfb\xbf\xbf\xbf|\xfd\xbf\xbf\xbf\xbf!", false},
{"impossible bytes", "\xfe|\xff|\xfe\xfe\xff\xff!", false},
{"overlong sequences {u-2f}",
"\xc0\xaf|\xe0\x80\xaf|\xf0\x80\x80\xaf|\xf8\x80\x80\x80\xaf|\xfc\x80\x80\x80\x80\xaf!", false},
{"maximum overlong sequences",
"\xc1\xbf|\xe0\x9f\xbf|\xf0\x8f\xbf\xbf|\xf8\x87\xbf\xbf\xbf|\xfc\x83\xbf\xbf\xbf\xbf!", false},
{"overlong representation of the NUL character",
"\xc0\x80|\xe0\x80\x80|\xf0\x80\x80\x80|\xf8\x80\x80\x80\x80|\xfc\x80\x80\x80\x80\x80!", false},
{"single UTF-16 surrogates",
"\xed\xa0\x80|\xed\xad\xbf|\xed\xae\x80|\xed\xaf\xbf|\xed\xb0\x80|\xed\xbe\x80|\xed\xbf\xbf!", false},
{"paired UTF-16 surrogates",
"\xed\xa0\x80\xed\xb0\x80|\xed\xa0\x80\xed\xbf\xbf|\xed\xad\xbf\xed\xb0\x80|" +
"\xed\xad\xbf\xed\xbf\xbf|\xed\xae\x80\xed\xb0\x80|\xed\xae\x80\xed\xbf\xbf|" +
"\xed\xaf\xbf\xed\xb0\x80|\xed\xaf\xbf\xed\xbf\xbf!", false},
{"other illegal code positions", "\xef\xbf\xbe|\xef\xbf\xbf!", false},
}
check_sequence := func(tc test_case) {
It(tc.title, func() {
start := 0
end := start
bytes := []byte(tc.test)
for {
for bytes[end] != '|' && bytes[end] != '!' {
end++
}
parser := yaml_parser_t{}
yaml_parser_initialize(&parser)
yaml_parser_set_input_string(&parser, bytes)
result := yaml_parser_update_buffer(&parser, end-start)
Expect(result).To(Equal(tc.result))
// outcome := '+'
// if result != tc.result {
// outcome = '-'
// }
// fmt.Printf("\t\t %c %s", outcome, tc.title)
// if parser.error == yaml_NO_ERROR {
// fmt.Printf("(no error)\n")
// } else if parser.error == yaml_READER_ERROR {
// if parser.problem_value != -1 {
// fmt.Printf("(reader error: %s: #%X at %d)\n",
// parser.problem, parser.problem_value, parser.problem_offset)
// } else {
// fmt.Printf("(reader error: %s: at %d)\n",
// parser.problem, parser.problem_offset)
// }
// }
if bytes[end] == '!' {
break
}
end++
start = end
yaml_parser_delete(&parser)
}
})
}
for _, test := range utf8_sequences {
check_sequence(test)
}
})
Context("BOMs", func() {
boms := []test_case{
/* {"title", "test!", lenth}, */
{"no bom (utf-8)", "Hi is \xd0\x9f\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82!", true},
{"bom (utf-8)", "\xef\xbb\xbfHi is \xd0\x9f\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82!", true},
{"bom (utf-16-le)", "\xff\xfeH\x00i\x00 \x00i\x00s\x00 \x00\x1f\x04@\x04" + "8\x04" + "2\x04" + "5\x04" + "B\x04!", true},
{"bom (utf-16-be)", "\xfe\xff\x00H\x00i\x00 \x00i\x00s\x00 \x04\x1f\x04@\x04" + "8\x04" + "2\x04" + "5\x04" + "B!", true},
}
check_bom := func(tc test_case) {
It(tc.title, func() {
start := 0
end := start
bytes := []byte(tc.test)
for bytes[end] != '!' {
end++
}
parser := yaml_parser_t{}
yaml_parser_initialize(&parser)
yaml_parser_set_input_string(&parser, bytes[:end-start])
result := yaml_parser_update_buffer(&parser, end-start)
Expect(result).To(Equal(tc.result))
yaml_parser_delete(&parser)
})
}
for _, test := range boms {
check_bom(test)
}
})
Context("Long UTF8", func() {
It("parses properly", func() {
buffer := make([]byte, 0, 3+LONG*2)
buffer = append(buffer, '\xef', '\xbb', '\xbf')
for j := 0; j < LONG; j++ {
if j%2 == 1 {
buffer = append(buffer, '\xd0', '\x90')
} else {
buffer = append(buffer, '\xd0', '\xaf')
}
}
parser := yaml_parser_t{}
yaml_parser_initialize(&parser)
yaml_parser_set_input_string(&parser, buffer)
for k := 0; k < LONG; k++ {
if parser.unread == 0 {
updated := yaml_parser_update_buffer(&parser, 1)
Expect(updated).To(BeTrue())
// printf("\treader error: %s at %d\n", parser.problem, parser.problem_offset);
}
Expect(parser.unread).NotTo(Equal(0))
// printf("\tnot enough characters at %d\n", k);
var ch0, ch1 byte
if k%2 == 1 {
ch0 = '\xd0'
ch1 = '\x90'
} else {
ch0 = '\xd0'
ch1 = '\xaf'
}
Expect(parser.buffer[parser.buffer_pos]).To(Equal(ch0))
Expect(parser.buffer[parser.buffer_pos+1]).To(Equal(ch1))
// printf("\tincorrect UTF-8 sequence: %X %X instead of %X %X\n",
// (int)parser.buffer.pointer[0], (int)parser.buffer.pointer[1],
// (int)ch0, (int)ch1);
parser.buffer_pos += 2
parser.unread -= 1
}
updated := yaml_parser_update_buffer(&parser, 1)
Expect(updated).To(BeTrue())
// printf("\treader error: %s at %d\n", parser.problem, parser.problem_offset);
yaml_parser_delete(&parser)
})
})
Context("Long UTF16", func() {
It("parses properly", func() {
buffer := make([]byte, 0, 2+LONG*2)
buffer = append(buffer, '\xff', '\xfe')
for j := 0; j < LONG; j++ {
if j%2 == 1 {
buffer = append(buffer, '\x10', '\x04')
} else {
buffer = append(buffer, '/', '\x04')
}
}
parser := yaml_parser_t{}
yaml_parser_initialize(&parser)
yaml_parser_set_input_string(&parser, buffer)
for k := 0; k < LONG; k++ {
if parser.unread == 0 {
updated := yaml_parser_update_buffer(&parser, 1)
Expect(updated).To(BeTrue())
// printf("\treader error: %s at %d\n", parser.problem, parser.problem_offset);
}
Expect(parser.unread).NotTo(Equal(0))
// printf("\tnot enough characters at %d\n", k);
var ch0, ch1 byte
if k%2 == 1 {
ch0 = '\xd0'
ch1 = '\x90'
} else {
ch0 = '\xd0'
ch1 = '\xaf'
}
Expect(parser.buffer[parser.buffer_pos]).To(Equal(ch0))
Expect(parser.buffer[parser.buffer_pos+1]).To(Equal(ch1))
// printf("\tincorrect UTF-8 sequence: %X %X instead of %X %X\n",
// (int)parser.buffer.pointer[0], (int)parser.buffer.pointer[1],
// (int)ch0, (int)ch1);
parser.buffer_pos += 2
parser.unread -= 1
}
updated := yaml_parser_update_buffer(&parser, 1)
Expect(updated).To(BeTrue())
// printf("\treader error: %s at %d\n", parser.problem, parser.problem_offset);
yaml_parser_delete(&parser)
})
})
})