automerge/automerge-wasm/test/helpers/encoding.js
2022-01-09 08:05:00 -08:00

1209 lines
40 KiB
JavaScript

/**
* UTF-8 decoding and encoding using API that is supported in Node >= 12 and modern browsers:
* https://developer.mozilla.org/en-US/docs/Web/API/TextEncoder/encode
* https://developer.mozilla.org/en-US/docs/Web/API/TextDecoder/decode
* If you're running in an environment where it's not available, please use a polyfill, such as:
* https://github.com/anonyco/FastestSmallestTextEncoderDecoder
*/
const utf8encoder = new TextEncoder()
const utf8decoder = new TextDecoder('utf-8')
function stringToUtf8(string) {
return utf8encoder.encode(string)
}
function utf8ToString(buffer) {
return utf8decoder.decode(buffer)
}
/**
* Converts a string consisting of hexadecimal digits into an Uint8Array.
*/
function hexStringToBytes(value) {
if (typeof value !== 'string') {
throw new TypeError('value is not a string')
}
if (!/^([0-9a-f][0-9a-f])*$/.test(value)) {
throw new RangeError('value is not hexadecimal')
}
if (value === '') {
return new Uint8Array(0)
} else {
return new Uint8Array(value.match(/../g).map(b => parseInt(b, 16)))
}
}
const NIBBLE_TO_HEX = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']
const BYTE_TO_HEX = new Array(256)
for (let i = 0; i < 256; i++) {
BYTE_TO_HEX[i] = `${NIBBLE_TO_HEX[(i >>> 4) & 0xf]}${NIBBLE_TO_HEX[i & 0xf]}`;
}
/**
* Converts a Uint8Array into the equivalent hexadecimal string.
*/
function bytesToHexString(bytes) {
let hex = '', len = bytes.byteLength
for (let i = 0; i < len; i++) {
hex += BYTE_TO_HEX[bytes[i]]
}
return hex
}
/**
* Wrapper around an Uint8Array that allows values to be appended to the buffer,
* and that automatically grows the buffer when space runs out.
*/
class Encoder {
constructor() {
this.buf = new Uint8Array(16)
this.offset = 0
}
/**
* Returns the byte array containing the encoded data.
*/
get buffer() {
this.finish()
return this.buf.subarray(0, this.offset)
}
/**
* Reallocates the encoder's buffer to be bigger.
*/
grow(minSize = 0) {
let newSize = this.buf.byteLength * 4
while (newSize < minSize) newSize *= 2
const newBuf = new Uint8Array(newSize)
newBuf.set(this.buf, 0)
this.buf = newBuf
return this
}
/**
* Appends one byte (0 to 255) to the buffer.
*/
appendByte(value) {
if (this.offset >= this.buf.byteLength) this.grow()
this.buf[this.offset] = value
this.offset += 1
}
/**
* Encodes a 32-bit nonnegative integer in a variable number of bytes using
* the LEB128 encoding scheme (https://en.wikipedia.org/wiki/LEB128) and
* appends it to the buffer. Returns the number of bytes written.
*/
appendUint32(value) {
if (!Number.isInteger(value)) throw new RangeError('value is not an integer')
if (value < 0 || value > 0xffffffff) throw new RangeError('number out of range')
const numBytes = Math.max(1, Math.ceil((32 - Math.clz32(value)) / 7))
if (this.offset + numBytes > this.buf.byteLength) this.grow()
for (let i = 0; i < numBytes; i++) {
this.buf[this.offset + i] = (value & 0x7f) | (i === numBytes - 1 ? 0x00 : 0x80)
value >>>= 7 // zero-filling right shift
}
this.offset += numBytes
return numBytes
}
/**
* Encodes a 32-bit signed integer in a variable number of bytes using the
* LEB128 encoding scheme (https://en.wikipedia.org/wiki/LEB128) and appends
* it to the buffer. Returns the number of bytes written.
*/
appendInt32(value) {
if (!Number.isInteger(value)) throw new RangeError('value is not an integer')
if (value < -0x80000000 || value > 0x7fffffff) throw new RangeError('number out of range')
const numBytes = Math.ceil((33 - Math.clz32(value >= 0 ? value : -value - 1)) / 7)
if (this.offset + numBytes > this.buf.byteLength) this.grow()
for (let i = 0; i < numBytes; i++) {
this.buf[this.offset + i] = (value & 0x7f) | (i === numBytes - 1 ? 0x00 : 0x80)
value >>= 7 // sign-propagating right shift
}
this.offset += numBytes
return numBytes
}
/**
* Encodes a nonnegative integer in a variable number of bytes using the LEB128
* encoding scheme, up to the maximum size of integers supported by JavaScript
* (53 bits).
*/
appendUint53(value) {
if (!Number.isInteger(value)) throw new RangeError('value is not an integer')
if (value < 0 || value > Number.MAX_SAFE_INTEGER) {
throw new RangeError('number out of range')
}
const high32 = Math.floor(value / 0x100000000)
const low32 = (value & 0xffffffff) >>> 0 // right shift to interpret as unsigned
return this.appendUint64(high32, low32)
}
/**
* Encodes a signed integer in a variable number of bytes using the LEB128
* encoding scheme, up to the maximum size of integers supported by JavaScript
* (53 bits).
*/
appendInt53(value) {
if (!Number.isInteger(value)) throw new RangeError('value is not an integer')
if (value < Number.MIN_SAFE_INTEGER || value > Number.MAX_SAFE_INTEGER) {
throw new RangeError('number out of range')
}
const high32 = Math.floor(value / 0x100000000)
const low32 = (value & 0xffffffff) >>> 0 // right shift to interpret as unsigned
return this.appendInt64(high32, low32)
}
/**
* Encodes a 64-bit nonnegative integer in a variable number of bytes using
* the LEB128 encoding scheme, and appends it to the buffer. The number is
* given as two 32-bit halves since JavaScript cannot accurately represent
* integers with more than 53 bits in a single variable.
*/
appendUint64(high32, low32) {
if (!Number.isInteger(high32) || !Number.isInteger(low32)) {
throw new RangeError('value is not an integer')
}
if (high32 < 0 || high32 > 0xffffffff || low32 < 0 || low32 > 0xffffffff) {
throw new RangeError('number out of range')
}
if (high32 === 0) return this.appendUint32(low32)
const numBytes = Math.ceil((64 - Math.clz32(high32)) / 7)
if (this.offset + numBytes > this.buf.byteLength) this.grow()
for (let i = 0; i < 4; i++) {
this.buf[this.offset + i] = (low32 & 0x7f) | 0x80
low32 >>>= 7 // zero-filling right shift
}
this.buf[this.offset + 4] = (low32 & 0x0f) | ((high32 & 0x07) << 4) | (numBytes === 5 ? 0x00 : 0x80)
high32 >>>= 3
for (let i = 5; i < numBytes; i++) {
this.buf[this.offset + i] = (high32 & 0x7f) | (i === numBytes - 1 ? 0x00 : 0x80)
high32 >>>= 7
}
this.offset += numBytes
return numBytes
}
/**
* Encodes a 64-bit signed integer in a variable number of bytes using the
* LEB128 encoding scheme, and appends it to the buffer. The number is given
* as two 32-bit halves since JavaScript cannot accurately represent integers
* with more than 53 bits in a single variable. The sign of the 64-bit
* number is determined by the sign of the `high32` half; the sign of the
* `low32` half is ignored.
*/
appendInt64(high32, low32) {
if (!Number.isInteger(high32) || !Number.isInteger(low32)) {
throw new RangeError('value is not an integer')
}
if (high32 < -0x80000000 || high32 > 0x7fffffff || low32 < -0x80000000 || low32 > 0xffffffff) {
throw new RangeError('number out of range')
}
low32 >>>= 0 // interpret as unsigned
if (high32 === 0 && low32 <= 0x7fffffff) return this.appendInt32(low32)
if (high32 === -1 && low32 >= 0x80000000) return this.appendInt32(low32 - 0x100000000)
const numBytes = Math.ceil((65 - Math.clz32(high32 >= 0 ? high32 : -high32 - 1)) / 7)
if (this.offset + numBytes > this.buf.byteLength) this.grow()
for (let i = 0; i < 4; i++) {
this.buf[this.offset + i] = (low32 & 0x7f) | 0x80
low32 >>>= 7 // zero-filling right shift
}
this.buf[this.offset + 4] = (low32 & 0x0f) | ((high32 & 0x07) << 4) | (numBytes === 5 ? 0x00 : 0x80)
high32 >>= 3 // sign-propagating right shift
for (let i = 5; i < numBytes; i++) {
this.buf[this.offset + i] = (high32 & 0x7f) | (i === numBytes - 1 ? 0x00 : 0x80)
high32 >>= 7
}
this.offset += numBytes
return numBytes
}
/**
* Appends the contents of byte buffer `data` to the buffer. Returns the
* number of bytes appended.
*/
appendRawBytes(data) {
if (this.offset + data.byteLength > this.buf.byteLength) {
this.grow(this.offset + data.byteLength)
}
this.buf.set(data, this.offset)
this.offset += data.byteLength
return data.byteLength
}
/**
* Appends a UTF-8 string to the buffer, without any metadata. Returns the
* number of bytes appended.
*/
appendRawString(value) {
if (typeof value !== 'string') throw new TypeError('value is not a string')
return this.appendRawBytes(stringToUtf8(value))
}
/**
* Appends the contents of byte buffer `data` to the buffer, prefixed with the
* number of bytes in the buffer (as a LEB128-encoded unsigned integer).
*/
appendPrefixedBytes(data) {
this.appendUint53(data.byteLength)
this.appendRawBytes(data)
return this
}
/**
* Appends a UTF-8 string to the buffer, prefixed with its length in bytes
* (where the length is encoded as an unsigned LEB128 integer).
*/
appendPrefixedString(value) {
if (typeof value !== 'string') throw new TypeError('value is not a string')
this.appendPrefixedBytes(stringToUtf8(value))
return this
}
/**
* Takes a value, which must be a string consisting only of hexadecimal
* digits, maps it to a byte array, and appends it to the buffer, prefixed
* with its length in bytes.
*/
appendHexString(value) {
this.appendPrefixedBytes(hexStringToBytes(value))
return this
}
/**
* Flushes any unwritten data to the buffer. Call this before reading from
* the buffer constructed by this Encoder.
*/
finish() {
}
}
/**
* Counterpart to Encoder. Wraps a Uint8Array buffer with a cursor indicating
* the current decoding position, and allows values to be incrementally read by
* decoding the bytes at the current position.
*/
class Decoder {
constructor(buffer) {
if (!(buffer instanceof Uint8Array)) {
throw new TypeError(`Not a byte array: ${buffer}`)
}
this.buf = buffer
this.offset = 0
}
/**
* Returns false if there is still data to be read at the current decoding
* position, and true if we are at the end of the buffer.
*/
get done() {
return this.offset === this.buf.byteLength
}
/**
* Resets the cursor position, so that the next read goes back to the
* beginning of the buffer.
*/
reset() {
this.offset = 0
}
/**
* Moves the current decoding position forward by the specified number of
* bytes, without decoding anything.
*/
skip(bytes) {
if (this.offset + bytes > this.buf.byteLength) {
throw new RangeError('cannot skip beyond end of buffer')
}
this.offset += bytes
}
/**
* Reads one byte (0 to 255) from the buffer.
*/
readByte() {
this.offset += 1
return this.buf[this.offset - 1]
}
/**
* Reads a LEB128-encoded unsigned integer from the current position in the buffer.
* Throws an exception if the value doesn't fit in a 32-bit unsigned int.
*/
readUint32() {
let result = 0, shift = 0
while (this.offset < this.buf.byteLength) {
const nextByte = this.buf[this.offset]
if (shift === 28 && (nextByte & 0xf0) !== 0) { // more than 5 bytes, or value > 0xffffffff
throw new RangeError('number out of range')
}
result = (result | (nextByte & 0x7f) << shift) >>> 0 // right shift to interpret value as unsigned
shift += 7
this.offset++
if ((nextByte & 0x80) === 0) return result
}
throw new RangeError('buffer ended with incomplete number')
}
/**
* Reads a LEB128-encoded signed integer from the current position in the buffer.
* Throws an exception if the value doesn't fit in a 32-bit signed int.
*/
readInt32() {
let result = 0, shift = 0
while (this.offset < this.buf.byteLength) {
const nextByte = this.buf[this.offset]
if ((shift === 28 && (nextByte & 0x80) !== 0) || // more than 5 bytes
(shift === 28 && (nextByte & 0x40) === 0 && (nextByte & 0x38) !== 0) || // positive int > 0x7fffffff
(shift === 28 && (nextByte & 0x40) !== 0 && (nextByte & 0x38) !== 0x38)) { // negative int < -0x80000000
throw new RangeError('number out of range')
}
result |= (nextByte & 0x7f) << shift
shift += 7
this.offset++
if ((nextByte & 0x80) === 0) {
if ((nextByte & 0x40) === 0 || shift > 28) {
return result // positive, or negative value that doesn't need sign-extending
} else {
return result | (-1 << shift) // sign-extend negative integer
}
}
}
throw new RangeError('buffer ended with incomplete number')
}
/**
* Reads a LEB128-encoded unsigned integer from the current position in the
* buffer. Allows any integer that can be safely represented by JavaScript
* (up to 2^53 - 1), and throws an exception outside of that range.
*/
readUint53() {
const { low32, high32 } = this.readUint64()
if (high32 < 0 || high32 > 0x1fffff) {
throw new RangeError('number out of range')
}
return high32 * 0x100000000 + low32
}
/**
* Reads a LEB128-encoded signed integer from the current position in the
* buffer. Allows any integer that can be safely represented by JavaScript
* (between -(2^53 - 1) and 2^53 - 1), throws an exception outside of that range.
*/
readInt53() {
const { low32, high32 } = this.readInt64()
if (high32 < -0x200000 || (high32 === -0x200000 && low32 === 0) || high32 > 0x1fffff) {
throw new RangeError('number out of range')
}
return high32 * 0x100000000 + low32
}
/**
* Reads a LEB128-encoded unsigned integer from the current position in the
* buffer. Throws an exception if the value doesn't fit in a 64-bit unsigned
* int. Returns the number in two 32-bit halves, as an object of the form
* `{high32, low32}`.
*/
readUint64() {
let low32 = 0, high32 = 0, shift = 0
while (this.offset < this.buf.byteLength && shift <= 28) {
const nextByte = this.buf[this.offset]
low32 = (low32 | (nextByte & 0x7f) << shift) >>> 0 // right shift to interpret value as unsigned
if (shift === 28) {
high32 = (nextByte & 0x70) >>> 4
}
shift += 7
this.offset++
if ((nextByte & 0x80) === 0) return { high32, low32 }
}
shift = 3
while (this.offset < this.buf.byteLength) {
const nextByte = this.buf[this.offset]
if (shift === 31 && (nextByte & 0xfe) !== 0) { // more than 10 bytes, or value > 2^64 - 1
throw new RangeError('number out of range')
}
high32 = (high32 | (nextByte & 0x7f) << shift) >>> 0
shift += 7
this.offset++
if ((nextByte & 0x80) === 0) return { high32, low32 }
}
throw new RangeError('buffer ended with incomplete number')
}
/**
* Reads a LEB128-encoded signed integer from the current position in the
* buffer. Throws an exception if the value doesn't fit in a 64-bit signed
* int. Returns the number in two 32-bit halves, as an object of the form
* `{high32, low32}`. The `low32` half is always non-negative, and the
* sign of the `high32` half indicates the sign of the 64-bit number.
*/
readInt64() {
let low32 = 0, high32 = 0, shift = 0
while (this.offset < this.buf.byteLength && shift <= 28) {
const nextByte = this.buf[this.offset]
low32 = (low32 | (nextByte & 0x7f) << shift) >>> 0 // right shift to interpret value as unsigned
if (shift === 28) {
high32 = (nextByte & 0x70) >>> 4
}
shift += 7
this.offset++
if ((nextByte & 0x80) === 0) {
if ((nextByte & 0x40) !== 0) { // sign-extend negative integer
if (shift < 32) low32 = (low32 | (-1 << shift)) >>> 0
high32 |= -1 << Math.max(shift - 32, 0)
}
return { high32, low32 }
}
}
shift = 3
while (this.offset < this.buf.byteLength) {
const nextByte = this.buf[this.offset]
// On the 10th byte there are only two valid values: all 7 value bits zero
// (if the value is positive) or all 7 bits one (if the value is negative)
if (shift === 31 && nextByte !== 0 && nextByte !== 0x7f) {
throw new RangeError('number out of range')
}
high32 |= (nextByte & 0x7f) << shift
shift += 7
this.offset++
if ((nextByte & 0x80) === 0) {
if ((nextByte & 0x40) !== 0 && shift < 32) { // sign-extend negative integer
high32 |= -1 << shift
}
return { high32, low32 }
}
}
throw new RangeError('buffer ended with incomplete number')
}
/**
* Extracts a subarray `length` bytes in size, starting from the current
* position in the buffer, and moves the position forward.
*/
readRawBytes(length) {
const start = this.offset
if (start + length > this.buf.byteLength) {
throw new RangeError('subarray exceeds buffer size')
}
this.offset += length
return this.buf.subarray(start, this.offset)
}
/**
* Extracts `length` bytes from the buffer, starting from the current position,
* and returns the UTF-8 string decoding of those bytes.
*/
readRawString(length) {
return utf8ToString(this.readRawBytes(length))
}
/**
* Extracts a subarray from the current position in the buffer, prefixed with
* its length in bytes (encoded as an unsigned LEB128 integer).
*/
readPrefixedBytes() {
return this.readRawBytes(this.readUint53())
}
/**
* Reads a UTF-8 string from the current position in the buffer, prefixed with its
* length in bytes (where the length is encoded as an unsigned LEB128 integer).
*/
readPrefixedString() {
return utf8ToString(this.readPrefixedBytes())
}
/**
* Reads a byte array from the current position in the buffer, prefixed with its
* length in bytes. Returns that byte array converted to a hexadecimal string.
*/
readHexString() {
return bytesToHexString(this.readPrefixedBytes())
}
}
/**
* An encoder that uses run-length encoding to compress sequences of repeated
* values. The constructor argument specifies the type of values, which may be
* either 'int', 'uint', or 'utf8'. Besides valid values of the selected
* datatype, values may also be null.
*
* The encoded buffer starts with a LEB128-encoded signed integer, the
* repetition count. The interpretation of the following values depends on this
* repetition count:
* - If this number is a positive value n, the next value in the buffer
* (encoded as the specified datatype) is repeated n times in the sequence.
* - If the repetition count is a negative value -n, then the next n values
* (encoded as the specified datatype) in the buffer are treated as a
* literal, i.e. they appear in the sequence without any further
* interpretation or repetition.
* - If the repetition count is zero, then the next value in the buffer is a
* LEB128-encoded unsigned integer indicating the number of null values
* that appear at the current position in the sequence.
*
* After one of these three has completed, the process repeats, starting again
* with a repetition count, until we reach the end of the buffer.
*/
class RLEEncoder extends Encoder {
constructor(type) {
super()
this.type = type
this.state = 'empty'
this.lastValue = undefined
this.count = 0
this.literal = []
}
/**
* Appends a new value to the sequence. If `repetitions` is given, the value is repeated
* `repetitions` times.
*/
appendValue(value, repetitions = 1) {
this._appendValue(value, repetitions)
}
/**
* Like `appendValue()`, but this method is not overridden by `DeltaEncoder`.
*/
_appendValue(value, repetitions = 1) {
if (repetitions <= 0) return
if (this.state === 'empty') {
this.state = (value === null ? 'nulls' : (repetitions === 1 ? 'loneValue' : 'repetition'))
this.lastValue = value
this.count = repetitions
} else if (this.state === 'loneValue') {
if (value === null) {
this.flush()
this.state = 'nulls'
this.count = repetitions
} else if (value === this.lastValue) {
this.state = 'repetition'
this.count = 1 + repetitions
} else if (repetitions > 1) {
this.flush()
this.state = 'repetition'
this.count = repetitions
this.lastValue = value
} else {
this.state = 'literal'
this.literal = [this.lastValue]
this.lastValue = value
}
} else if (this.state === 'repetition') {
if (value === null) {
this.flush()
this.state = 'nulls'
this.count = repetitions
} else if (value === this.lastValue) {
this.count += repetitions
} else if (repetitions > 1) {
this.flush()
this.state = 'repetition'
this.count = repetitions
this.lastValue = value
} else {
this.flush()
this.state = 'loneValue'
this.lastValue = value
}
} else if (this.state === 'literal') {
if (value === null) {
this.literal.push(this.lastValue)
this.flush()
this.state = 'nulls'
this.count = repetitions
} else if (value === this.lastValue) {
this.flush()
this.state = 'repetition'
this.count = 1 + repetitions
} else if (repetitions > 1) {
this.literal.push(this.lastValue)
this.flush()
this.state = 'repetition'
this.count = repetitions
this.lastValue = value
} else {
this.literal.push(this.lastValue)
this.lastValue = value
}
} else if (this.state === 'nulls') {
if (value === null) {
this.count += repetitions
} else if (repetitions > 1) {
this.flush()
this.state = 'repetition'
this.count = repetitions
this.lastValue = value
} else {
this.flush()
this.state = 'loneValue'
this.lastValue = value
}
}
}
/**
* Copies values from the RLEDecoder `decoder` into this encoder. The `options` object may
* contain the following keys:
* - `count`: The number of values to copy. If not specified, copies all remaining values.
* - `sumValues`: If true, the function computes the sum of all numeric values as they are
* copied (null values are counted as zero), and returns that number.
* - `sumShift`: If set, values are shifted right by `sumShift` bits before adding to the sum.
*
* Returns an object of the form `{nonNullValues, sum}` where `nonNullValues` is the number of
* non-null values copied, and `sum` is the sum (only if the `sumValues` option is set).
*/
copyFrom(decoder, options = {}) {
const { count, sumValues, sumShift } = options
if (!(decoder instanceof RLEDecoder) || (decoder.type !== this.type)) {
throw new TypeError('incompatible type of decoder')
}
let remaining = (typeof count === 'number' ? count : Number.MAX_SAFE_INTEGER)
let nonNullValues = 0, sum = 0
if (count && remaining > 0 && decoder.done) throw new RangeError(`cannot copy ${count} values`)
if (remaining === 0 || decoder.done) return sumValues ? {nonNullValues, sum} : {nonNullValues}
// Copy a value so that we have a well-defined starting state. NB: when super.copyFrom() is
// called by the DeltaEncoder subclass, the following calls to readValue() and appendValue()
// refer to the overridden methods, while later readRecord(), readRawValue() and _appendValue()
// calls refer to the non-overridden RLEDecoder/RLEEncoder methods.
let firstValue = decoder.readValue()
if (firstValue === null) {
const numNulls = Math.min(decoder.count + 1, remaining)
remaining -= numNulls
decoder.count -= numNulls - 1
this.appendValue(null, numNulls)
if (count && remaining > 0 && decoder.done) throw new RangeError(`cannot copy ${count} values`)
if (remaining === 0 || decoder.done) return sumValues ? {nonNullValues, sum} : {nonNullValues}
firstValue = decoder.readValue()
if (firstValue === null) throw new RangeError('null run must be followed by non-null value')
}
this.appendValue(firstValue)
remaining--
nonNullValues++
if (sumValues) sum += (sumShift ? (firstValue >>> sumShift) : firstValue)
if (count && remaining > 0 && decoder.done) throw new RangeError(`cannot copy ${count} values`)
if (remaining === 0 || decoder.done) return sumValues ? {nonNullValues, sum} : {nonNullValues}
// Copy data at the record level without expanding repetitions
let firstRun = (decoder.count > 0)
while (remaining > 0 && !decoder.done) {
if (!firstRun) decoder.readRecord()
const numValues = Math.min(decoder.count, remaining)
decoder.count -= numValues
if (decoder.state === 'literal') {
nonNullValues += numValues
for (let i = 0; i < numValues; i++) {
if (decoder.done) throw new RangeError('incomplete literal')
const value = decoder.readRawValue()
if (value === decoder.lastValue) throw new RangeError('Repetition of values is not allowed in literal')
decoder.lastValue = value
this._appendValue(value)
if (sumValues) sum += (sumShift ? (value >>> sumShift) : value)
}
} else if (decoder.state === 'repetition') {
nonNullValues += numValues
if (sumValues) sum += numValues * (sumShift ? (decoder.lastValue >>> sumShift) : decoder.lastValue)
const value = decoder.lastValue
this._appendValue(value)
if (numValues > 1) {
this._appendValue(value)
if (this.state !== 'repetition') throw new RangeError(`Unexpected state ${this.state}`)
this.count += numValues - 2
}
} else if (decoder.state === 'nulls') {
this._appendValue(null)
if (this.state !== 'nulls') throw new RangeError(`Unexpected state ${this.state}`)
this.count += numValues - 1
}
firstRun = false
remaining -= numValues
}
if (count && remaining > 0 && decoder.done) throw new RangeError(`cannot copy ${count} values`)
return sumValues ? {nonNullValues, sum} : {nonNullValues}
}
/**
* Private method, do not call from outside the class.
*/
flush() {
if (this.state === 'loneValue') {
this.appendInt32(-1)
this.appendRawValue(this.lastValue)
} else if (this.state === 'repetition') {
this.appendInt53(this.count)
this.appendRawValue(this.lastValue)
} else if (this.state === 'literal') {
this.appendInt53(-this.literal.length)
for (let v of this.literal) this.appendRawValue(v)
} else if (this.state === 'nulls') {
this.appendInt32(0)
this.appendUint53(this.count)
}
this.state = 'empty'
}
/**
* Private method, do not call from outside the class.
*/
appendRawValue(value) {
if (this.type === 'int') {
this.appendInt53(value)
} else if (this.type === 'uint') {
this.appendUint53(value)
} else if (this.type === 'utf8') {
this.appendPrefixedString(value)
} else {
throw new RangeError(`Unknown RLEEncoder datatype: ${this.type}`)
}
}
/**
* Flushes any unwritten data to the buffer. Call this before reading from
* the buffer constructed by this Encoder.
*/
finish() {
if (this.state === 'literal') this.literal.push(this.lastValue)
// Don't write anything if the only values we have seen are nulls
if (this.state !== 'nulls' || this.offset > 0) this.flush()
}
}
/**
* Counterpart to RLEEncoder: reads values from an RLE-compressed sequence,
* returning nulls and repeated values as required.
*/
class RLEDecoder extends Decoder {
constructor(type, buffer) {
super(buffer)
this.type = type
this.lastValue = undefined
this.count = 0
this.state = undefined
}
/**
* Returns false if there is still data to be read at the current decoding
* position, and true if we are at the end of the buffer.
*/
get done() {
return (this.count === 0) && (this.offset === this.buf.byteLength)
}
/**
* Resets the cursor position, so that the next read goes back to the
* beginning of the buffer.
*/
reset() {
this.offset = 0
this.lastValue = undefined
this.count = 0
this.state = undefined
}
/**
* Returns the next value (or null) in the sequence.
*/
readValue() {
if (this.done) return null
if (this.count === 0) this.readRecord()
this.count -= 1
if (this.state === 'literal') {
const value = this.readRawValue()
if (value === this.lastValue) throw new RangeError('Repetition of values is not allowed in literal')
this.lastValue = value
return value
} else {
return this.lastValue
}
}
/**
* Discards the next `numSkip` values in the sequence.
*/
skipValues(numSkip) {
while (numSkip > 0 && !this.done) {
if (this.count === 0) {
this.count = this.readInt53()
if (this.count > 0) {
this.lastValue = (this.count <= numSkip) ? this.skipRawValues(1) : this.readRawValue()
this.state = 'repetition'
} else if (this.count < 0) {
this.count = -this.count
this.state = 'literal'
} else { // this.count == 0
this.count = this.readUint53()
this.lastValue = null
this.state = 'nulls'
}
}
const consume = Math.min(numSkip, this.count)
if (this.state === 'literal') this.skipRawValues(consume)
numSkip -= consume
this.count -= consume
}
}
/**
* Private method, do not call from outside the class.
* Reads a repetition count from the buffer and sets up the state appropriately.
*/
readRecord() {
this.count = this.readInt53()
if (this.count > 1) {
const value = this.readRawValue()
if ((this.state === 'repetition' || this.state === 'literal') && this.lastValue === value) {
throw new RangeError('Successive repetitions with the same value are not allowed')
}
this.state = 'repetition'
this.lastValue = value
} else if (this.count === 1) {
throw new RangeError('Repetition count of 1 is not allowed, use a literal instead')
} else if (this.count < 0) {
this.count = -this.count
if (this.state === 'literal') throw new RangeError('Successive literals are not allowed')
this.state = 'literal'
} else { // this.count == 0
if (this.state === 'nulls') throw new RangeError('Successive null runs are not allowed')
this.count = this.readUint53()
if (this.count === 0) throw new RangeError('Zero-length null runs are not allowed')
this.lastValue = null
this.state = 'nulls'
}
}
/**
* Private method, do not call from outside the class.
* Reads one value of the datatype configured on construction.
*/
readRawValue() {
if (this.type === 'int') {
return this.readInt53()
} else if (this.type === 'uint') {
return this.readUint53()
} else if (this.type === 'utf8') {
return this.readPrefixedString()
} else {
throw new RangeError(`Unknown RLEDecoder datatype: ${this.type}`)
}
}
/**
* Private method, do not call from outside the class.
* Skips over `num` values of the datatype configured on construction.
*/
skipRawValues(num) {
if (this.type === 'utf8') {
for (let i = 0; i < num; i++) this.skip(this.readUint53())
} else {
while (num > 0 && this.offset < this.buf.byteLength) {
if ((this.buf[this.offset] & 0x80) === 0) num--
this.offset++
}
if (num > 0) throw new RangeError('cannot skip beyond end of buffer')
}
}
}
/**
* A variant of RLEEncoder: rather than storing the actual values passed to
* appendValue(), this version stores only the first value, and for all
* subsequent values it stores the difference to the previous value. This
* encoding is good when values tend to come in sequentially incrementing runs,
* because the delta between successive values is 1, and repeated values of 1
* are easily compressed with run-length encoding.
*
* Null values are also allowed, as with RLEEncoder.
*/
class DeltaEncoder extends RLEEncoder {
constructor() {
super('int')
this.absoluteValue = 0
}
/**
* Appends a new integer value to the sequence. If `repetitions` is given, the value is repeated
* `repetitions` times.
*/
appendValue(value, repetitions = 1) {
if (repetitions <= 0) return
if (typeof value === 'number') {
super.appendValue(value - this.absoluteValue, 1)
this.absoluteValue = value
if (repetitions > 1) super.appendValue(0, repetitions - 1)
} else {
super.appendValue(value, repetitions)
}
}
/**
* Copies values from the DeltaDecoder `decoder` into this encoder. The `options` object may
* contain the key `count`, indicating the number of values to copy. If not specified, copies
* all remaining values in the decoder.
*/
copyFrom(decoder, options = {}) {
if (options.sumValues) {
throw new RangeError('unsupported options for DeltaEncoder.copyFrom()')
}
if (!(decoder instanceof DeltaDecoder)) {
throw new TypeError('incompatible type of decoder')
}
let remaining = options.count
if (remaining > 0 && decoder.done) throw new RangeError(`cannot copy ${remaining} values`)
if (remaining === 0 || decoder.done) return
// Copy any null values, and the first non-null value, so that appendValue() computes the
// difference between the encoder's last value and the decoder's first (absolute) value.
let value = decoder.readValue(), nulls = 0
this.appendValue(value)
if (value === null) {
nulls = decoder.count + 1
if (remaining !== undefined && remaining < nulls) nulls = remaining
decoder.count -= nulls - 1
this.count += nulls - 1
if (remaining > nulls && decoder.done) throw new RangeError(`cannot copy ${remaining} values`)
if (remaining === nulls || decoder.done) return
// The next value read is certain to be non-null because we're not at the end of the decoder,
// and a run of nulls must be followed by a run of non-nulls.
if (decoder.count === 0) this.appendValue(decoder.readValue())
}
// Once we have the first value, the subsequent relative values can be copied verbatim without
// any further processing. Note that the first value copied by super.copyFrom() is an absolute
// value, while subsequent values are relative. Thus, the sum of all of the (non-null) copied
// values must equal the absolute value of the final element copied.
if (remaining !== undefined) remaining -= nulls + 1
const { nonNullValues, sum } = super.copyFrom(decoder, {count: remaining, sumValues: true})
if (nonNullValues > 0) {
this.absoluteValue = sum
decoder.absoluteValue = sum
}
}
}
/**
* Counterpart to DeltaEncoder: reads values from a delta-compressed sequence of
* numbers (may include null values).
*/
class DeltaDecoder extends RLEDecoder {
constructor(buffer) {
super('int', buffer)
this.absoluteValue = 0
}
/**
* Resets the cursor position, so that the next read goes back to the
* beginning of the buffer.
*/
reset() {
this.offset = 0
this.lastValue = undefined
this.count = 0
this.state = undefined
this.absoluteValue = 0
}
/**
* Returns the next integer (or null) value in the sequence.
*/
readValue() {
const value = super.readValue()
if (value === null) return null
this.absoluteValue += value
return this.absoluteValue
}
/**
* Discards the next `numSkip` values in the sequence.
*/
skipValues(numSkip) {
while (numSkip > 0 && !this.done) {
if (this.count === 0) this.readRecord()
const consume = Math.min(numSkip, this.count)
if (this.state === 'literal') {
for (let i = 0; i < consume; i++) {
this.lastValue = this.readRawValue()
this.absoluteValue += this.lastValue
}
} else if (this.state === 'repetition') {
this.absoluteValue += consume * this.lastValue
}
numSkip -= consume
this.count -= consume
}
}
}
/**
* Encodes a sequence of boolean values by mapping it to a sequence of integers:
* the number of false values, followed by the number of true values, followed
* by the number of false values, and so on. Each number is encoded as a LEB128
* unsigned integer. This encoding is a bit like RLEEncoder, except that we
* only encode the repetition count but not the actual value, since the values
* just alternate between false and true (starting with false).
*/
class BooleanEncoder extends Encoder {
constructor() {
super()
this.lastValue = false
this.count = 0
}
/**
* Appends a new value to the sequence. If `repetitions` is given, the value is repeated
* `repetitions` times.
*/
appendValue(value, repetitions = 1) {
if (value !== false && value !== true) {
throw new RangeError(`Unsupported value for BooleanEncoder: ${value}`)
}
if (repetitions <= 0) return
if (this.lastValue === value) {
this.count += repetitions
} else {
this.appendUint53(this.count)
this.lastValue = value
this.count = repetitions
}
}
/**
* Copies values from the BooleanDecoder `decoder` into this encoder. The `options` object may
* contain the key `count`, indicating the number of values to copy. If not specified, copies
* all remaining values in the decoder.
*/
copyFrom(decoder, options = {}) {
if (!(decoder instanceof BooleanDecoder)) {
throw new TypeError('incompatible type of decoder')
}
const { count } = options
let remaining = (typeof count === 'number' ? count : Number.MAX_SAFE_INTEGER)
if (count && remaining > 0 && decoder.done) throw new RangeError(`cannot copy ${count} values`)
if (remaining === 0 || decoder.done) return
// Copy one value to bring decoder and encoder state into sync, then finish that value's repetitions
this.appendValue(decoder.readValue())
remaining--
const firstCopy = Math.min(decoder.count, remaining)
this.count += firstCopy
decoder.count -= firstCopy
remaining -= firstCopy
while (remaining > 0 && !decoder.done) {
decoder.count = decoder.readUint53()
if (decoder.count === 0) throw new RangeError('Zero-length runs are not allowed')
decoder.lastValue = !decoder.lastValue
this.appendUint53(this.count)
const numCopied = Math.min(decoder.count, remaining)
this.count = numCopied
this.lastValue = decoder.lastValue
decoder.count -= numCopied
remaining -= numCopied
}
if (count && remaining > 0 && decoder.done) throw new RangeError(`cannot copy ${count} values`)
}
/**
* Flushes any unwritten data to the buffer. Call this before reading from
* the buffer constructed by this Encoder.
*/
finish() {
if (this.count > 0) {
this.appendUint53(this.count)
this.count = 0
}
}
}
/**
* Counterpart to BooleanEncoder: reads boolean values from a runlength-encoded
* sequence.
*/
class BooleanDecoder extends Decoder {
constructor(buffer) {
super(buffer)
this.lastValue = true // is negated the first time we read a count
this.firstRun = true
this.count = 0
}
/**
* Returns false if there is still data to be read at the current decoding
* position, and true if we are at the end of the buffer.
*/
get done() {
return (this.count === 0) && (this.offset === this.buf.byteLength)
}
/**
* Resets the cursor position, so that the next read goes back to the
* beginning of the buffer.
*/
reset() {
this.offset = 0
this.lastValue = true
this.firstRun = true
this.count = 0
}
/**
* Returns the next value in the sequence.
*/
readValue() {
if (this.done) return false
while (this.count === 0) {
this.count = this.readUint53()
this.lastValue = !this.lastValue
if (this.count === 0 && !this.firstRun) {
throw new RangeError('Zero-length runs are not allowed')
}
this.firstRun = false
}
this.count -= 1
return this.lastValue
}
/**
* Discards the next `numSkip` values in the sequence.
*/
skipValues(numSkip) {
while (numSkip > 0 && !this.done) {
if (this.count === 0) {
this.count = this.readUint53()
this.lastValue = !this.lastValue
if (this.count === 0) throw new RangeError('Zero-length runs are not allowed')
}
if (this.count < numSkip) {
numSkip -= this.count
this.count = 0
} else {
this.count -= numSkip
numSkip = 0
}
}
}
}
module.exports = {
stringToUtf8, utf8ToString, hexStringToBytes, bytesToHexString,
Encoder, Decoder, RLEEncoder, RLEDecoder, DeltaEncoder, DeltaDecoder, BooleanEncoder, BooleanDecoder
}