901 lines
20 KiB
Go
901 lines
20 KiB
Go
package jsonvalue
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"strconv"
|
|
|
|
"github.com/Andrew-M-C/go.jsonvalue/internal/unsafe"
|
|
)
|
|
|
|
// ================ OUTER UNMARSHAL ================
|
|
|
|
// unmarshalWithIter parse bytes with unknown value type.
|
|
func unmarshalWithIter(p pool, it iter, offset int) (v *V, err error) {
|
|
end := len(it)
|
|
offset, reachEnd := it.skipBlanks(offset)
|
|
if reachEnd {
|
|
return &V{}, fmt.Errorf("%w, cannot find any symbol characters found", ErrRawBytesUnrecognized)
|
|
}
|
|
|
|
chr := it[offset]
|
|
switch chr {
|
|
case '{':
|
|
v, offset, err = unmarshalObjectWithIterUnknownEnd(p, it, offset, end)
|
|
|
|
case '[':
|
|
v, offset, err = unmarshalArrayWithIterUnknownEnd(p, it, offset, end)
|
|
|
|
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-':
|
|
var n *V
|
|
n, offset, _, err = it.parseNumber(p, offset)
|
|
if err == nil {
|
|
v = n
|
|
}
|
|
|
|
case '"':
|
|
var sectLenWithoutQuote int
|
|
var sectEnd int
|
|
sectLenWithoutQuote, sectEnd, err = it.parseStrFromBytesForwardWithQuote(offset)
|
|
if err == nil {
|
|
v, err = NewString(unsafe.BtoS(it[offset+1:offset+1+sectLenWithoutQuote])), nil
|
|
offset = sectEnd
|
|
}
|
|
|
|
case 't':
|
|
offset, err = it.parseTrue(offset)
|
|
if err == nil {
|
|
v = NewBool(true)
|
|
}
|
|
|
|
case 'f':
|
|
offset, err = it.parseFalse(offset)
|
|
if err == nil {
|
|
v = NewBool(false)
|
|
}
|
|
|
|
case 'n':
|
|
offset, err = it.parseNull(offset)
|
|
if err == nil {
|
|
v = NewNull()
|
|
}
|
|
|
|
default:
|
|
return &V{}, fmt.Errorf("%w, invalid character \\u%04X at Position %d", ErrRawBytesUnrecognized, chr, offset)
|
|
}
|
|
|
|
if err != nil {
|
|
return &V{}, err
|
|
}
|
|
|
|
if offset, reachEnd = it.skipBlanks(offset, end); !reachEnd {
|
|
return &V{}, fmt.Errorf("%w, unnecessary trailing data remains at Position %d", ErrRawBytesUnrecognized, offset)
|
|
}
|
|
|
|
return v, nil
|
|
}
|
|
|
|
// unmarshalArrayWithIterUnknownEnd is similar with unmarshalArrayWithIter, though should start with '[',
|
|
// but it does not known where its ']' is
|
|
func unmarshalArrayWithIterUnknownEnd(p pool, it iter, offset, right int) (_ *V, end int, err error) {
|
|
offset++
|
|
arr := newArray(p)
|
|
|
|
reachEnd := false
|
|
|
|
for offset < right {
|
|
// search for ending ']'
|
|
offset, reachEnd = it.skipBlanks(offset, right)
|
|
if reachEnd {
|
|
// ']' not found
|
|
return nil, -1, fmt.Errorf("%w, cannot find ']'", ErrNotArrayValue)
|
|
}
|
|
|
|
chr := it[offset]
|
|
switch chr {
|
|
case ']':
|
|
return arr, offset + 1, nil
|
|
|
|
case ',':
|
|
offset++
|
|
|
|
case '{':
|
|
v, sectEnd, err := unmarshalObjectWithIterUnknownEnd(p, it, offset, right)
|
|
if err != nil {
|
|
return nil, -1, err
|
|
}
|
|
appendToArr(arr, v)
|
|
offset = sectEnd
|
|
|
|
case '[':
|
|
v, sectEnd, err := unmarshalArrayWithIterUnknownEnd(p, it, offset, right)
|
|
if err != nil {
|
|
return nil, -1, err
|
|
}
|
|
appendToArr(arr, v)
|
|
offset = sectEnd
|
|
|
|
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-':
|
|
var v *V
|
|
v, sectEnd, _, err := it.parseNumber(p, offset)
|
|
if err != nil {
|
|
return nil, -1, err
|
|
}
|
|
appendToArr(arr, v)
|
|
offset = sectEnd
|
|
|
|
case '"':
|
|
sectLenWithoutQuote, sectEnd, err := it.parseStrFromBytesForwardWithQuote(offset)
|
|
if err != nil {
|
|
return nil, -1, err
|
|
}
|
|
v := NewString(unsafe.BtoS(it[offset+1 : offset+1+sectLenWithoutQuote]))
|
|
appendToArr(arr, v)
|
|
offset = sectEnd
|
|
|
|
case 't':
|
|
sectEnd, err := it.parseTrue(offset)
|
|
if err != nil {
|
|
return nil, -1, err
|
|
}
|
|
appendToArr(arr, NewBool(true))
|
|
offset = sectEnd
|
|
|
|
case 'f':
|
|
sectEnd, err := it.parseFalse(offset)
|
|
if err != nil {
|
|
return nil, -1, err
|
|
}
|
|
appendToArr(arr, NewBool(false))
|
|
offset = sectEnd
|
|
|
|
case 'n':
|
|
sectEnd, err := it.parseNull(offset)
|
|
if err != nil {
|
|
return nil, -1, err
|
|
}
|
|
appendToArr(arr, NewNull())
|
|
offset = sectEnd
|
|
|
|
default:
|
|
return nil, -1, fmt.Errorf("%w, invalid character \\u%04X at Position %d", ErrRawBytesUnrecognized, chr, offset)
|
|
}
|
|
}
|
|
|
|
return nil, -1, fmt.Errorf("%w, cannot find ']'", ErrNotArrayValue)
|
|
}
|
|
|
|
func appendToArr(v *V, child *V) {
|
|
if v.children.arr == nil {
|
|
v.children.arr = make([]*V, 0, initialArrayCapacity)
|
|
}
|
|
v.children.arr = append(v.children.arr, child)
|
|
}
|
|
|
|
// unmarshalObjectWithIterUnknownEnd unmarshal object from raw bytes. it[offset] must be '{'
|
|
func unmarshalObjectWithIterUnknownEnd(p pool, it iter, offset, right int) (_ *V, end int, err error) {
|
|
offset++
|
|
obj := newObject(p)
|
|
|
|
keyStart, keyEnd := 0, 0
|
|
colonFound := false
|
|
|
|
reachEnd := false
|
|
|
|
keyNotFoundErr := func() error {
|
|
if keyEnd == 0 {
|
|
return fmt.Errorf(
|
|
"%w, missing key for another value at Position %d", ErrNotObjectValue, offset,
|
|
)
|
|
}
|
|
if !colonFound {
|
|
return fmt.Errorf(
|
|
"%w, missing colon for key at Position %d", ErrNotObjectValue, offset,
|
|
)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
valNotFoundErr := func() error {
|
|
if keyEnd > 0 {
|
|
return fmt.Errorf(
|
|
"%w, missing value for key '%s' at Position %d",
|
|
ErrNotObjectValue, unsafe.BtoS(it[keyStart:keyEnd]), keyStart,
|
|
)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
for offset < right {
|
|
offset, reachEnd = it.skipBlanks(offset, right)
|
|
if reachEnd {
|
|
// '}' not found
|
|
return nil, -1, fmt.Errorf("%w, cannot find '}'", ErrNotObjectValue)
|
|
}
|
|
|
|
chr := it[offset]
|
|
switch chr {
|
|
case '}':
|
|
if err = valNotFoundErr(); err != nil {
|
|
return nil, -1, err
|
|
}
|
|
return obj, offset + 1, nil
|
|
|
|
case ',':
|
|
if err = valNotFoundErr(); err != nil {
|
|
return nil, -1, err
|
|
}
|
|
offset++
|
|
// continue
|
|
|
|
case ':':
|
|
if colonFound {
|
|
return nil, -1, fmt.Errorf("%w, duplicate colon at Position %d", ErrNotObjectValue, keyStart)
|
|
}
|
|
colonFound = true
|
|
if err = keyNotFoundErr(); err != nil {
|
|
return nil, -1, err
|
|
}
|
|
offset++
|
|
// continue
|
|
|
|
case '{':
|
|
if err = keyNotFoundErr(); err != nil {
|
|
return nil, -1, err
|
|
}
|
|
v, sectEnd, err := unmarshalObjectWithIterUnknownEnd(p, it, offset, right)
|
|
if err != nil {
|
|
return nil, -1, err
|
|
}
|
|
setToObjectChildren(obj, unsafe.BtoS(it[keyStart:keyEnd]), v)
|
|
keyEnd, colonFound = 0, false
|
|
offset = sectEnd
|
|
|
|
case '[':
|
|
if err = keyNotFoundErr(); err != nil {
|
|
return nil, -1, err
|
|
}
|
|
v, sectEnd, err := unmarshalArrayWithIterUnknownEnd(p, it, offset, right)
|
|
if err != nil {
|
|
return nil, -1, err
|
|
}
|
|
setToObjectChildren(obj, unsafe.BtoS(it[keyStart:keyEnd]), v)
|
|
keyEnd, colonFound = 0, false
|
|
offset = sectEnd
|
|
|
|
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-':
|
|
if err = keyNotFoundErr(); err != nil {
|
|
return nil, -1, err
|
|
}
|
|
var v *V
|
|
v, sectEnd, _, err := it.parseNumber(p, offset)
|
|
if err != nil {
|
|
return nil, -1, err
|
|
}
|
|
setToObjectChildren(obj, unsafe.BtoS(it[keyStart:keyEnd]), v)
|
|
keyEnd, colonFound = 0, false
|
|
offset = sectEnd
|
|
|
|
case '"':
|
|
if keyEnd > 0 {
|
|
// string value
|
|
if !colonFound {
|
|
return nil, -1, fmt.Errorf("%w, missing value for key '%s' at Position %d",
|
|
ErrNotObjectValue, unsafe.BtoS(it[keyStart:keyEnd]), keyStart,
|
|
)
|
|
}
|
|
sectLenWithoutQuote, sectEnd, err := it.parseStrFromBytesForwardWithQuote(offset)
|
|
if err != nil {
|
|
return nil, -1, err
|
|
}
|
|
v := NewString(unsafe.BtoS(it[offset+1 : offset+1+sectLenWithoutQuote]))
|
|
setToObjectChildren(obj, unsafe.BtoS(it[keyStart:keyEnd]), v)
|
|
keyEnd, colonFound = 0, false
|
|
offset = sectEnd
|
|
|
|
} else {
|
|
// string key
|
|
sectLenWithoutQuote, sectEnd, err := it.parseStrFromBytesForwardWithQuote(offset)
|
|
if err != nil {
|
|
return nil, -1, err
|
|
}
|
|
keyStart, keyEnd = offset+1, offset+1+sectLenWithoutQuote
|
|
offset = sectEnd
|
|
}
|
|
|
|
case 't':
|
|
if err = keyNotFoundErr(); err != nil {
|
|
return nil, -1, err
|
|
}
|
|
sectEnd, err := it.parseTrue(offset)
|
|
if err != nil {
|
|
return nil, -1, err
|
|
}
|
|
setToObjectChildren(obj, unsafe.BtoS(it[keyStart:keyEnd]), NewBool(true))
|
|
keyEnd, colonFound = 0, false
|
|
offset = sectEnd
|
|
|
|
case 'f':
|
|
if err = keyNotFoundErr(); err != nil {
|
|
return nil, -1, err
|
|
}
|
|
sectEnd, err := it.parseFalse(offset)
|
|
if err != nil {
|
|
return nil, -1, err
|
|
}
|
|
setToObjectChildren(obj, unsafe.BtoS(it[keyStart:keyEnd]), NewBool(false))
|
|
keyEnd, colonFound = 0, false
|
|
offset = sectEnd
|
|
|
|
case 'n':
|
|
if err = keyNotFoundErr(); err != nil {
|
|
return nil, -1, err
|
|
}
|
|
sectEnd, err := it.parseNull(offset)
|
|
if err != nil {
|
|
return nil, -1, err
|
|
}
|
|
setToObjectChildren(obj, unsafe.BtoS(it[keyStart:keyEnd]), NewNull())
|
|
keyEnd, colonFound = 0, false
|
|
offset = sectEnd
|
|
|
|
default:
|
|
return nil, -1, fmt.Errorf("%w, invalid character \\u%04X at Position %d", ErrRawBytesUnrecognized, chr, offset)
|
|
}
|
|
|
|
}
|
|
|
|
return nil, -1, fmt.Errorf("%w, cannot find '}'", ErrNotObjectValue)
|
|
}
|
|
|
|
// parseNumber parse a number string. Reference:
|
|
//
|
|
// - [ECMA-404 The JSON Data Interchange Standard](https://www.json.org/json-en.html)
|
|
func parseNumber(v *V, p pool) (err error) {
|
|
it := iter(v.srcByte)
|
|
|
|
parsed, end, reachEnd, err := it.parseNumber(p, 0)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if !reachEnd {
|
|
return fmt.Errorf("invalid character: 0x%02x", v.srcByte[end])
|
|
}
|
|
|
|
*v = *parsed
|
|
return nil
|
|
}
|
|
|
|
// ==== simple object parsing ====
|
|
func newFromNumber(p pool, b []byte) (ret *V, err error) {
|
|
v := new(p, Number)
|
|
v.srcByte = b
|
|
return v, nil
|
|
}
|
|
|
|
// ================ GENERAL UNMARSHALING ================
|
|
|
|
// iter is used to iterate []byte text
|
|
type iter []byte
|
|
|
|
func (it iter) parseStrFromBytesForwardWithQuote(offset int) (sectLenWithoutQuote int, sectEnd int, err error) {
|
|
offset++ // skip "
|
|
end := len(it)
|
|
sectEnd = offset
|
|
|
|
shift := func(i *int, le int) {
|
|
if end-*i < le {
|
|
err = fmt.Errorf(
|
|
"%w, expect at least %d remaining bytes, but got %d at Position %d",
|
|
ErrIllegalString, end-*i, le, *i,
|
|
)
|
|
return
|
|
}
|
|
it.memcpy(sectEnd, *i, le)
|
|
sectEnd += le
|
|
*i += le
|
|
}
|
|
|
|
// iterate every byte
|
|
for i := offset; i < end; {
|
|
chr := it[i]
|
|
|
|
// ACSII?
|
|
switch {
|
|
case chr == '\\':
|
|
err = it.handleEscapeStart(&i, §End)
|
|
case chr == '"':
|
|
// found end quote
|
|
return sectEnd - offset, i + 1, nil
|
|
case chr <= 0x7F:
|
|
// shift(&i, 1)
|
|
it[sectEnd] = it[i]
|
|
i++
|
|
sectEnd++
|
|
case runeIdentifyingBytes2(chr):
|
|
shift(&i, 2)
|
|
case runeIdentifyingBytes3(chr):
|
|
shift(&i, 3)
|
|
case runeIdentifyingBytes4(chr):
|
|
shift(&i, 4)
|
|
default:
|
|
err = fmt.Errorf("%w: illegal UTF8 string at Position %d", ErrIllegalString, i)
|
|
}
|
|
if err != nil {
|
|
return -1, -1, err
|
|
}
|
|
}
|
|
|
|
err = errors.New("ending double quote of a string is not found")
|
|
return
|
|
}
|
|
|
|
func (it iter) handleEscapeStart(i *int, sectEnd *int) error {
|
|
if len(it)-1-*i < 1 {
|
|
return errors.New("escape symbol not followed by another character")
|
|
}
|
|
|
|
chr := it[*i+1]
|
|
switch chr {
|
|
default:
|
|
return fmt.Errorf("unrecognized character 0x%02X after escape symbol", chr)
|
|
case '"', '\'', '/', '\\':
|
|
it[*sectEnd] = chr
|
|
*sectEnd++
|
|
*i += 2
|
|
case 'b':
|
|
it[*sectEnd] = '\b'
|
|
*sectEnd++
|
|
*i += 2
|
|
case 'f':
|
|
it[*sectEnd] = '\f'
|
|
*sectEnd++
|
|
*i += 2
|
|
case 'r':
|
|
it[*sectEnd] = '\r'
|
|
*sectEnd++
|
|
*i += 2
|
|
case 'n':
|
|
it[*sectEnd] = '\n'
|
|
*sectEnd++
|
|
*i += 2
|
|
case 't':
|
|
it[*sectEnd] = '\t'
|
|
*sectEnd++
|
|
*i += 2
|
|
case 'u':
|
|
return it.handleEscapeUnicodeStartWithEnd(i, len(it)-1, sectEnd)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (it iter) handleEscapeUnicodeStartWithEnd(i *int, end int, sectEnd *int) (err error) {
|
|
if end-*i <= 5 {
|
|
return errors.New("escape symbol not followed by another character")
|
|
}
|
|
|
|
b3 := chrToHex(it[*i+2], &err)
|
|
b2 := chrToHex(it[*i+3], &err)
|
|
b1 := chrToHex(it[*i+4], &err)
|
|
b0 := chrToHex(it[*i+5], &err)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
r := (rune(b3) << 12) + (rune(b2) << 8) + (rune(b1) << 4) + rune(b0)
|
|
|
|
// this rune is smaller than 0x10000
|
|
if r <= 0xD7FF || r >= 0xE000 {
|
|
le := it.assignASCIICodedRune(*sectEnd, r)
|
|
*i += 6
|
|
*sectEnd += le
|
|
return nil
|
|
}
|
|
|
|
// reference: [JSON 序列化中的转义和 Unicode 编码](https://cloud.tencent.com/developer/article/1625557/)
|
|
// should get another unicode-escaped character
|
|
if end-*i <= 11 {
|
|
return fmt.Errorf("insufficient UTF-16 data at offset %d", *i)
|
|
}
|
|
if it[*i+6] != '\\' || it[*i+7] != 'u' {
|
|
return fmt.Errorf("expect unicode escape character at position %d but not", *i+6)
|
|
}
|
|
|
|
ex3 := chrToHex(it[*i+8], &err)
|
|
ex2 := chrToHex(it[*i+9], &err)
|
|
ex1 := chrToHex(it[*i+10], &err)
|
|
ex0 := chrToHex(it[*i+11], &err)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
ex := (rune(ex3) << 12) + (rune(ex2) << 8) + (rune(ex1) << 4) + rune(ex0)
|
|
if ex < 0xDC00 {
|
|
return fmt.Errorf(
|
|
"%w, expect second UTF-16 encoding but got 0x04%X at position %d",
|
|
ErrIllegalString, r, *i+8,
|
|
)
|
|
}
|
|
ex -= 0xDC00
|
|
if ex > 0x03FF {
|
|
return fmt.Errorf(
|
|
"%w, expect second UTF-16 encoding but got 0x04%X at position %d",
|
|
ErrIllegalString, r, *i+8,
|
|
)
|
|
}
|
|
|
|
r = ((r - 0xD800) << 10) + ex + 0x10000
|
|
|
|
le := it.assignASCIICodedRune(*sectEnd, r)
|
|
*i += 12
|
|
*sectEnd += le
|
|
return nil
|
|
}
|
|
|
|
func chrToHex(chr byte, errOut *error) byte {
|
|
if chr >= '0' && chr <= '9' {
|
|
return chr - '0'
|
|
}
|
|
if chr >= 'A' && chr <= 'F' {
|
|
return chr - 'A' + 10
|
|
}
|
|
if chr >= 'a' && chr <= 'f' {
|
|
return chr - 'a' + 10
|
|
}
|
|
*errOut = fmt.Errorf("invalid unicode value character: %c", rune(chr))
|
|
return 0
|
|
}
|
|
|
|
func (it iter) memcpy(dst, src, length int) {
|
|
if dst == src {
|
|
return
|
|
}
|
|
copy(it[dst:dst+length], it[src:src+length])
|
|
// ptr := unsafe.Pointer(&it[0])
|
|
// C.memcpy(
|
|
// unsafe.Pointer(uintptr(ptr)+uintptr(dst)),
|
|
// unsafe.Pointer(uintptr(ptr)+uintptr(src)),
|
|
// C.size_t(length),
|
|
// )
|
|
}
|
|
|
|
func (it iter) assignASCIICodedRune(dst int, r rune) (offset int) {
|
|
switch {
|
|
// 0zzzzzzz ==>
|
|
// 0zzzzzzz
|
|
case r <= 0x7F:
|
|
it[dst+0] = byte(r)
|
|
return 1
|
|
// 00000yyy yyzzzzzz ==>
|
|
// 110yyyyy 10zzzzzz
|
|
case r <= 0x7FF:
|
|
it[dst+1] = byte((r&0x03F)>>0) + 0x80
|
|
it[dst+0] = byte((r&0x7C0)>>6) + 0xC0
|
|
return 2
|
|
// xxxxyyyy yyzzzzzz ==>
|
|
// 1110xxxx 10yyyyyy 10zzzzzz
|
|
case r <= 0xFFFF:
|
|
it[dst+2] = byte((r&0x003F)>>0) + 0x80
|
|
it[dst+1] = byte((r&0x0FC0)>>6) + 0x80
|
|
it[dst+0] = byte((r&0xF000)>>12) + 0xE0
|
|
return 3
|
|
// 000wwwxx xxxxyyyy yyzzzzzz ==>
|
|
// 11110www 10xxxxxx 10yyyyyy 10zzzzzz
|
|
default:
|
|
it[dst+3] = byte((r&0x00003F)>>0) + 0x80
|
|
it[dst+2] = byte((r&0x000FC0)>>6) + 0x80
|
|
it[dst+1] = byte((r&0x03F000)>>12) + 0x80
|
|
it[dst+0] = byte((r&0x1C0000)>>18) + 0xF0
|
|
return 4
|
|
}
|
|
}
|
|
|
|
func runeIdentifyingBytes2(chr byte) bool {
|
|
return (chr & 0xE0) == 0xC0
|
|
}
|
|
|
|
func runeIdentifyingBytes3(chr byte) bool {
|
|
return (chr & 0xF0) == 0xE0
|
|
}
|
|
|
|
func runeIdentifyingBytes4(chr byte) bool {
|
|
return (chr & 0xF8) == 0xF0
|
|
}
|
|
|
|
func (it iter) parseTrue(offset int) (end int, err error) {
|
|
if len(it)-offset < 4 {
|
|
return -1, fmt.Errorf("%w, insufficient character from Position %d", ErrNotValidBoolValue, offset)
|
|
}
|
|
|
|
if it[offset] == 't' &&
|
|
it[offset+1] == 'r' &&
|
|
it[offset+2] == 'u' &&
|
|
it[offset+3] == 'e' {
|
|
return offset + 4, nil
|
|
}
|
|
|
|
return -1, fmt.Errorf("%w, not 'true' at Position %d", ErrNotValidBoolValue, offset)
|
|
}
|
|
|
|
func (it iter) parseFalse(offset int) (end int, err error) {
|
|
if len(it)-offset < 5 {
|
|
return -1, fmt.Errorf("%w, insufficient character from Position %d", ErrNotValidBoolValue, offset)
|
|
}
|
|
|
|
if it[offset] == 'f' &&
|
|
it[offset+1] == 'a' &&
|
|
it[offset+2] == 'l' &&
|
|
it[offset+3] == 's' &&
|
|
it[offset+4] == 'e' {
|
|
return offset + 5, nil
|
|
}
|
|
|
|
return -1, fmt.Errorf("%w, not 'false' at Position %d", ErrNotValidBoolValue, offset)
|
|
}
|
|
|
|
func (it iter) parseNull(offset int) (end int, err error) {
|
|
if len(it)-offset < 4 {
|
|
return -1, fmt.Errorf("%w, insufficient character from Position %d", ErrNotValidNullValue, offset)
|
|
}
|
|
|
|
if it[offset] == 'n' &&
|
|
it[offset+1] == 'u' &&
|
|
it[offset+2] == 'l' &&
|
|
it[offset+3] == 'l' {
|
|
return offset + 4, nil
|
|
}
|
|
|
|
return -1, fmt.Errorf("%w, not 'null' at Position %d", ErrNotValidBoolValue, offset)
|
|
}
|
|
|
|
// skipBlanks skip blank characters until end or reaching a non-blank character
|
|
func (it iter) skipBlanks(offset int, endPos ...int) (newOffset int, reachEnd bool) {
|
|
end := 0
|
|
if len(endPos) > 0 {
|
|
end = endPos[0]
|
|
} else {
|
|
end = len(it)
|
|
}
|
|
|
|
for offset < end {
|
|
chr := it[offset]
|
|
switch chr {
|
|
case ' ', '\r', '\n', '\t', '\b':
|
|
offset++ // continue
|
|
default:
|
|
return offset, false
|
|
}
|
|
}
|
|
|
|
return end, true
|
|
}
|
|
|
|
// ================ FLOAT UNMARSHALING ================
|
|
|
|
// For state machine chart, please refer to ./img/parse_float_state_chart.drawio
|
|
|
|
func (it iter) parseNumber(
|
|
p pool, offset int,
|
|
) (v *V, end int, reachEnd bool, err error) {
|
|
|
|
idx := offset
|
|
negative := false
|
|
floated := false
|
|
exponentGot := false
|
|
dotGot := false
|
|
intAfterDotGot := false
|
|
integer := uint64(0)
|
|
edgeFound := false
|
|
|
|
// len(it)-idx means remain bytes
|
|
|
|
for ; len(it)-idx > 0 && !edgeFound; idx++ {
|
|
b := it[idx]
|
|
|
|
switch b {
|
|
default:
|
|
edgeFound = true
|
|
|
|
case '0':
|
|
if idx == offset {
|
|
// OK
|
|
} else if exponentGot {
|
|
// OK
|
|
} else if dotGot {
|
|
intAfterDotGot = true
|
|
} else if negative {
|
|
if integer == 0 && idx != offset+1 {
|
|
err = it.numErrorf(idx, "unexpected zero")
|
|
return
|
|
}
|
|
} else if integer == 0 {
|
|
err = it.numErrorf(idx, "unexpected zero")
|
|
return
|
|
}
|
|
integer *= 10
|
|
|
|
case '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
|
if !floated {
|
|
integer = integer*10 + uint64(b) - '0'
|
|
} else if !exponentGot {
|
|
intAfterDotGot = true
|
|
}
|
|
|
|
case 'e', 'E':
|
|
if exponentGot {
|
|
err = it.numErrorf(idx, "unexpected exponent symbol")
|
|
return
|
|
}
|
|
exponentGot = true
|
|
floated = true
|
|
|
|
case '+':
|
|
if !exponentGot {
|
|
err = it.numErrorf(idx, "unexpected +")
|
|
return
|
|
}
|
|
// Codes below not needed because this error is caught in outer logic
|
|
// if !floated {
|
|
// err = it.numErrorf(idx, "unexpected positive symbol")
|
|
// return
|
|
// }
|
|
|
|
case '-':
|
|
if !floated {
|
|
if idx != offset {
|
|
err = it.numErrorf(idx, "unexpected negative symbol")
|
|
return
|
|
}
|
|
negative = true
|
|
}
|
|
|
|
case '.':
|
|
if idx == offset || floated || exponentGot || dotGot {
|
|
err = it.numErrorf(idx, "unexpected dot symbol")
|
|
return
|
|
}
|
|
dotGot = true
|
|
floated = true
|
|
}
|
|
}
|
|
|
|
if edgeFound {
|
|
idx--
|
|
}
|
|
|
|
if floated {
|
|
if dotGot && !intAfterDotGot {
|
|
err = it.numErrorf(offset, "integer after dot missing")
|
|
return
|
|
}
|
|
v, err = it.parseFloatResult(p, offset, idx)
|
|
} else {
|
|
if integer > 0 && it[offset] == '0' {
|
|
err = it.numErrorf(offset, "non-zero integer should not start with zero")
|
|
return
|
|
}
|
|
|
|
firstB := it[offset]
|
|
if idx-offset == 1 {
|
|
if firstB >= '0' && firstB <= '9' {
|
|
// OK
|
|
} else {
|
|
err = it.numErrorf(offset, "invalid number format")
|
|
return
|
|
}
|
|
}
|
|
|
|
if negative {
|
|
v, err = it.parseNegativeIntResult(p, offset, idx, integer)
|
|
} else {
|
|
v, err = it.parsePositiveIntResult(p, offset, idx, integer)
|
|
}
|
|
}
|
|
|
|
return v, idx, len(it)-idx == 0, err
|
|
}
|
|
|
|
func (it iter) numErrorf(offset int, f string, a ...any) error {
|
|
a = append([]any{offset}, a...)
|
|
return fmt.Errorf("parsing number at index %d: "+f, a...)
|
|
|
|
// debug ONLY below
|
|
|
|
// getCaller := func(skip int) string {
|
|
// pc, _, _, ok := runtime.Caller(skip + 1)
|
|
// if !ok {
|
|
// return "<caller N/A>"
|
|
// }
|
|
// ca := runtime.CallersFrames([]uintptr{pc})
|
|
// fr, _ := ca.Next()
|
|
|
|
// fu := filepath.Ext(fr.Function)
|
|
// fu = strings.TrimLeft(fu, ".")
|
|
// li := fr.Line
|
|
|
|
// return fmt.Sprintf("%s(), Line %d", fu, li)
|
|
// }
|
|
// ca := getCaller(1)
|
|
|
|
// a = append([]any{ca, string(it), offset}, a...)
|
|
// return fmt.Errorf("%s - parsing number \"%s\" at index %d: "+f, a...)
|
|
}
|
|
|
|
const (
|
|
uintMaxStr = "18446744073709551615"
|
|
uintMaxDigits = 10000000000000000000
|
|
intMin = -9223372036854775808
|
|
intMinStr = "-9223372036854775808"
|
|
intMinAbs = 9223372036854775808
|
|
)
|
|
|
|
func (it iter) parseFloatResult(p pool, start, end int) (*V, error) {
|
|
f, err := strconv.ParseFloat(unsafe.BtoS(it[start:end]), 64)
|
|
if err != nil {
|
|
return nil, it.numErrorf(start, "%w", err)
|
|
}
|
|
|
|
v := new(p, Number)
|
|
v.srcByte = it[start:end]
|
|
|
|
v.num.negative = f < 0
|
|
v.num.floated = true
|
|
v.num.i64 = int64(f)
|
|
v.num.u64 = uint64(f)
|
|
v.num.f64 = f
|
|
|
|
return v, nil
|
|
}
|
|
|
|
func (it iter) parsePositiveIntResult(p pool, start, end int, integer uint64) (*V, error) {
|
|
le := end - start
|
|
|
|
if le > len(uintMaxStr) {
|
|
return nil, it.numErrorf(start, "value too large")
|
|
} else if le == len(uintMaxStr) {
|
|
if integer < uintMaxDigits {
|
|
return nil, it.numErrorf(start, "value too large")
|
|
}
|
|
}
|
|
|
|
v := new(p, Number)
|
|
v.srcByte = it[start:end]
|
|
|
|
v.num.negative = false
|
|
v.num.floated = false
|
|
v.num.i64 = int64(integer)
|
|
v.num.u64 = uint64(integer)
|
|
v.num.f64 = float64(integer)
|
|
|
|
return v, nil
|
|
}
|
|
|
|
func (it iter) parseNegativeIntResult(p pool, start, end int, integer uint64) (*V, error) {
|
|
le := end - start
|
|
|
|
if le > len(intMinStr) {
|
|
return nil, it.numErrorf(start, "absolute value too large")
|
|
} else if le == len(intMinStr) {
|
|
if integer > intMinAbs {
|
|
return nil, it.numErrorf(start, "absolute value too large")
|
|
}
|
|
}
|
|
|
|
v := new(p, Number)
|
|
v.srcByte = it[start:end]
|
|
|
|
v.num.negative = true
|
|
v.num.floated = false
|
|
|
|
if integer == intMinAbs {
|
|
v.num.i64 = intMin
|
|
} else {
|
|
v.num.i64 = -int64(integer)
|
|
}
|
|
|
|
v.num.u64 = uint64(v.num.i64)
|
|
v.num.f64 = -float64(integer)
|
|
|
|
return v, nil
|
|
}
|