automerge/automerge-wasm/test/helpers/encoding.js

/**
 * UTF-8 decoding and encoding using API that is supported in Node >= 12 and modern browsers:
 * https://developer.mozilla.org/en-US/docs/Web/API/TextEncoder/encode
 * https://developer.mozilla.org/en-US/docs/Web/API/TextDecoder/decode
 * If you're running in an environment where it's not available, please use a polyfill, such as:
 * https://github.com/anonyco/FastestSmallestTextEncoderDecoder
 */
const utf8encoder = new TextEncoder()
const utf8decoder = new TextDecoder('utf-8')

function stringToUtf8(string) {
  return utf8encoder.encode(string)
}

function utf8ToString(buffer) {
  return utf8decoder.decode(buffer)
}

/**
 * Converts a string consisting of hexadecimal digits into an Uint8Array.
 */
function hexStringToBytes(value) {
  if (typeof value !== 'string') {
    throw new TypeError('value is not a string')
  }
  if (!/^([0-9a-f][0-9a-f])*$/.test(value)) {
    throw new RangeError('value is not hexadecimal')
  }
  if (value === '') {
    return new Uint8Array(0)
  } else {
    return new Uint8Array(value.match(/../g).map(b => parseInt(b, 16)))
  }
}

const NIBBLE_TO_HEX = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']
const BYTE_TO_HEX = new Array(256)
for (let i = 0; i < 256; i++) {
  BYTE_TO_HEX[i] = `${NIBBLE_TO_HEX[(i >>> 4) & 0xf]}${NIBBLE_TO_HEX[i & 0xf]}`;
}

/**
 * Converts a Uint8Array into the equivalent hexadecimal string.
 */
function bytesToHexString(bytes) {
  let hex = '', len = bytes.byteLength
  for (let i = 0; i < len; i++) {
    hex += BYTE_TO_HEX[bytes[i]]
  }
  return hex
}

/**
 * Wrapper around an Uint8Array that allows values to be appended to the buffer,
 * and that automatically grows the buffer when space runs out.
 */
class Encoder {
  constructor() {
    this.buf = new Uint8Array(16)
    this.offset = 0
  }

  /**
   * Returns the byte array containing the encoded data.
   */
  get buffer() {
    this.finish()
    return this.buf.subarray(0, this.offset)
  }

  /**
   * Reallocates the encoder's buffer to be bigger.
   */
  grow(minSize = 0) {
    let newSize = this.buf.byteLength * 4
    while (newSize < minSize) newSize *= 2
    const newBuf = new Uint8Array(newSize)
    newBuf.set(this.buf, 0)
    this.buf = newBuf
    return this
  }

  /**
   * Appends one byte (0 to 255) to the buffer.
   */
  appendByte(value) {
    if (this.offset >= this.buf.byteLength) this.grow()
    this.buf[this.offset] = value
    this.offset += 1
  }

  /**
   * Encodes a 32-bit nonnegative integer in a variable number of bytes using
   * the LEB128 encoding scheme (https://en.wikipedia.org/wiki/LEB128) and
   * appends it to the buffer. Returns the number of bytes written.
   */
  appendUint32(value) {
    if (!Number.isInteger(value)) throw new RangeError('value is not an integer')
    if (value < 0 || value > 0xffffffff) throw new RangeError('number out of range')

    const numBytes = Math.max(1, Math.ceil((32 - Math.clz32(value)) / 7))
    if (this.offset + numBytes > this.buf.byteLength) this.grow()

    for (let i = 0; i < numBytes; i++) {
      this.buf[this.offset + i] = (value & 0x7f) | (i === numBytes - 1 ? 0x00 : 0x80)
      value >>>= 7 // zero-filling right shift
    }
    this.offset += numBytes
    return numBytes
  }

  /**
   * Encodes a 32-bit signed integer in a variable number of bytes using the
   * LEB128 encoding scheme (https://en.wikipedia.org/wiki/LEB128) and appends
   * it to the buffer. Returns the number of bytes written.
   */
  appendInt32(value) {
    if (!Number.isInteger(value)) throw new RangeError('value is not an integer')
    if (value < -0x80000000 || value > 0x7fffffff) throw new RangeError('number out of range')

    const numBytes = Math.ceil((33 - Math.clz32(value >= 0 ? value : -value - 1)) / 7)
    if (this.offset + numBytes > this.buf.byteLength) this.grow()

    for (let i = 0; i < numBytes; i++) {
      this.buf[this.offset + i] = (value & 0x7f) | (i === numBytes - 1 ? 0x00 : 0x80)
      value >>= 7 // sign-propagating right shift
    }
    this.offset += numBytes
    return numBytes
  }

  /**
   * Encodes a nonnegative integer in a variable number of bytes using the LEB128
   * encoding scheme, up to the maximum size of integers supported by JavaScript
   * (53 bits).
   */
  appendUint53(value) {
    if (!Number.isInteger(value)) throw new RangeError('value is not an integer')
    if (value < 0 || value > Number.MAX_SAFE_INTEGER) {
      throw new RangeError('number out of range')
    }
    const high32 = Math.floor(value / 0x100000000)
    const low32 = (value & 0xffffffff) >>> 0 // right shift to interpret as unsigned
    return this.appendUint64(high32, low32)
  }

  /**
   * Encodes a signed integer in a variable number of bytes using the LEB128
   * encoding scheme, up to the maximum size of integers supported by JavaScript
   * (53 bits).
   */
  appendInt53(value) {
    if (!Number.isInteger(value)) throw new RangeError('value is not an integer')
    if (value < Number.MIN_SAFE_INTEGER || value > Number.MAX_SAFE_INTEGER) {
      throw new RangeError('number out of range')
    }
    const high32 = Math.floor(value / 0x100000000)
    const low32 = (value & 0xffffffff) >>> 0 // right shift to interpret as unsigned
    return this.appendInt64(high32, low32)
  }

  /**
   * Encodes a 64-bit nonnegative integer in a variable number of bytes using
   * the LEB128 encoding scheme, and appends it to the buffer. The number is
   * given as two 32-bit halves since JavaScript cannot accurately represent
   * integers with more than 53 bits in a single variable.
   */
  appendUint64(high32, low32) {
    if (!Number.isInteger(high32) || !Number.isInteger(low32)) {
      throw new RangeError('value is not an integer')
    }
    if (high32 < 0 || high32 > 0xffffffff || low32 < 0 || low32 > 0xffffffff) {
      throw new RangeError('number out of range')
    }
    if (high32 === 0) return this.appendUint32(low32)

    const numBytes = Math.ceil((64 - Math.clz32(high32)) / 7)
    if (this.offset + numBytes > this.buf.byteLength) this.grow()
    for (let i = 0; i < 4; i++) {
      this.buf[this.offset + i] = (low32 & 0x7f) | 0x80
      low32 >>>= 7 // zero-filling right shift
    }
    this.buf[this.offset + 4] = (low32 & 0x0f) | ((high32 & 0x07) << 4) | (numBytes === 5 ? 0x00 : 0x80)
    high32 >>>= 3
    for (let i = 5; i < numBytes; i++) {
      this.buf[this.offset + i] = (high32 & 0x7f) | (i === numBytes - 1 ? 0x00 : 0x80)
      high32 >>>= 7
    }
    this.offset += numBytes
    return numBytes
  }

  /**
   * Encodes a 64-bit signed integer in a variable number of bytes using the
   * LEB128 encoding scheme, and appends it to the buffer. The number is given
   * as two 32-bit halves since JavaScript cannot accurately represent integers
   * with more than 53 bits in a single variable. The sign of the 64-bit
   * number is determined by the sign of the `high32` half; the sign of the
   * `low32` half is ignored.
   */
  appendInt64(high32, low32) {
    if (!Number.isInteger(high32) || !Number.isInteger(low32)) {
      throw new RangeError('value is not an integer')
    }
    if (high32 < -0x80000000 || high32 > 0x7fffffff || low32 < -0x80000000 || low32 > 0xffffffff) {
      throw new RangeError('number out of range')
    }
    low32 >>>= 0 // interpret as unsigned
    if (high32 === 0 && low32 <= 0x7fffffff) return this.appendInt32(low32)
    if (high32 === -1 && low32 >= 0x80000000) return this.appendInt32(low32 - 0x100000000)

    const numBytes = Math.ceil((65 - Math.clz32(high32 >= 0 ? high32 : -high32 - 1)) / 7)
    if (this.offset + numBytes > this.buf.byteLength) this.grow()
    for (let i = 0; i < 4; i++) {
      this.buf[this.offset + i] = (low32 & 0x7f) | 0x80
      low32 >>>= 7 // zero-filling right shift
    }
    this.buf[this.offset + 4] = (low32 & 0x0f) | ((high32 & 0x07) << 4) | (numBytes === 5 ? 0x00 : 0x80)
    high32 >>= 3 // sign-propagating right shift
    for (let i = 5; i < numBytes; i++) {
      this.buf[this.offset + i] = (high32 & 0x7f) | (i === numBytes - 1 ? 0x00 : 0x80)
      high32 >>= 7
    }
    this.offset += numBytes
    return numBytes
  }

  /**
   * Appends the contents of byte buffer `data` to the buffer. Returns the
   * number of bytes appended.
   */
  appendRawBytes(data) {
    if (this.offset + data.byteLength > this.buf.byteLength) {
      this.grow(this.offset + data.byteLength)
    }
    this.buf.set(data, this.offset)
    this.offset += data.byteLength
    return data.byteLength
  }

  /**
   * Appends a UTF-8 string to the buffer, without any metadata. Returns the
   * number of bytes appended.
   */
  appendRawString(value) {
    if (typeof value !== 'string') throw new TypeError('value is not a string')
    return this.appendRawBytes(stringToUtf8(value))
  }

  /**
   * Appends the contents of byte buffer `data` to the buffer, prefixed with the
   * number of bytes in the buffer (as a LEB128-encoded unsigned integer).
   */
  appendPrefixedBytes(data) {
    this.appendUint53(data.byteLength)
    this.appendRawBytes(data)
    return this
  }

  /**
   * Appends a UTF-8 string to the buffer, prefixed with its length in bytes
   * (where the length is encoded as an unsigned LEB128 integer).
   */
  appendPrefixedString(value) {
    if (typeof value !== 'string') throw new TypeError('value is not a string')
    this.appendPrefixedBytes(stringToUtf8(value))
    return this
  }

  /**
   * Takes a value, which must be a string consisting only of hexadecimal
   * digits, maps it to a byte array, and appends it to the buffer, prefixed
   * with its length in bytes.
   */
  appendHexString(value) {
    this.appendPrefixedBytes(hexStringToBytes(value))
    return this
  }

  /**
   * Flushes any unwritten data to the buffer. Call this before reading from
   * the buffer constructed by this Encoder.
   */
  finish() {
  }
}

/**
 * Counterpart to Encoder. Wraps a Uint8Array buffer with a cursor indicating
 * the current decoding position, and allows values to be incrementally read by
 * decoding the bytes at the current position.
 */
class Decoder {
  constructor(buffer) {
    if (!(buffer instanceof Uint8Array)) {
      throw new TypeError(`Not a byte array: ${buffer}`)
    }
    this.buf = buffer
    this.offset = 0
  }

  /**
   * Returns false if there is still data to be read at the current decoding
   * position, and true if we are at the end of the buffer.
   */
  get done() {
    return this.offset === this.buf.byteLength
  }

  /**
   * Resets the cursor position, so that the next read goes back to the
   * beginning of the buffer.
   */
  reset() {
    this.offset = 0
  }

  /**
   * Moves the current decoding position forward by the specified number of
   * bytes, without decoding anything.
   */
  skip(bytes) {
    if (this.offset + bytes > this.buf.byteLength) {
      throw new RangeError('cannot skip beyond end of buffer')
    }
    this.offset += bytes
  }

  /**
   * Reads one byte (0 to 255) from the buffer.
   */
  readByte() {
    this.offset += 1
    return this.buf[this.offset - 1]
  }

  /**
   * Reads a LEB128-encoded unsigned integer from the current position in the buffer.
   * Throws an exception if the value doesn't fit in a 32-bit unsigned int.
   */
  readUint32() {
    let result = 0, shift = 0
    while (this.offset < this.buf.byteLength) {
      const nextByte = this.buf[this.offset]
      if (shift === 28 && (nextByte & 0xf0) !== 0) { // more than 5 bytes, or value > 0xffffffff
        throw new RangeError('number out of range')
      }
      result = (result | (nextByte & 0x7f) << shift) >>> 0 // right shift to interpret value as unsigned
      shift += 7
      this.offset++
      if ((nextByte & 0x80) === 0) return result
    }
    throw new RangeError('buffer ended with incomplete number')
  }

  /**
   * Reads a LEB128-encoded signed integer from the current position in the buffer.
   * Throws an exception if the value doesn't fit in a 32-bit signed int.
   */
  readInt32() {
    let result = 0, shift = 0
    while (this.offset < this.buf.byteLength) {
      const nextByte = this.buf[this.offset]
      if ((shift === 28 && (nextByte & 0x80) !== 0) || // more than 5 bytes
          (shift === 28 && (nextByte & 0x40) === 0 && (nextByte & 0x38) !== 0) || // positive int > 0x7fffffff
          (shift === 28 && (nextByte & 0x40) !== 0 && (nextByte & 0x38) !== 0x38)) { // negative int < -0x80000000
        throw new RangeError('number out of range')
      }
      result |= (nextByte & 0x7f) << shift
      shift += 7
      this.offset++

      if ((nextByte & 0x80) === 0) {
        if ((nextByte & 0x40) === 0 || shift > 28) {
          return result // positive, or negative value that doesn't need sign-extending
        } else {
          return result | (-1 << shift) // sign-extend negative integer
        }
      }
    }
    throw new RangeError('buffer ended with incomplete number')
  }

  /**
   * Reads a LEB128-encoded unsigned integer from the current position in the
   * buffer. Allows any integer that can be safely represented by JavaScript
   * (up to 2^53 - 1), and throws an exception outside of that range.
   */
  readUint53() {
    const { low32, high32 } = this.readUint64()
    if (high32 < 0 || high32 > 0x1fffff) {
      throw new RangeError('number out of range')
    }
    return high32 * 0x100000000 + low32
  }

  /**
   * Reads a LEB128-encoded signed integer from the current position in the
   * buffer. Allows any integer that can be safely represented by JavaScript
   * (between -(2^53 - 1) and 2^53 - 1), throws an exception outside of that range.
   */
  readInt53() {
    const { low32, high32 } = this.readInt64()
    if (high32 < -0x200000 || (high32 === -0x200000 && low32 === 0) || high32 > 0x1fffff) {
      throw new RangeError('number out of range')
    }
    return high32 * 0x100000000 + low32
  }

  /**
   * Reads a LEB128-encoded unsigned integer from the current position in the
   * buffer. Throws an exception if the value doesn't fit in a 64-bit unsigned
   * int. Returns the number in two 32-bit halves, as an object of the form
   * `{high32, low32}`.
   */
  readUint64() {
    let low32 = 0, high32 = 0, shift = 0
    while (this.offset < this.buf.byteLength && shift <= 28) {
      const nextByte = this.buf[this.offset]
      low32 = (low32 | (nextByte & 0x7f) << shift) >>> 0 // right shift to interpret value as unsigned
      if (shift === 28) {
        high32 = (nextByte & 0x70) >>> 4
      }
      shift += 7
      this.offset++
      if ((nextByte & 0x80) === 0) return { high32, low32 }
    }

    shift = 3
    while (this.offset < this.buf.byteLength) {
      const nextByte = this.buf[this.offset]
      if (shift === 31 && (nextByte & 0xfe) !== 0) { // more than 10 bytes, or value > 2^64 - 1
        throw new RangeError('number out of range')
      }
      high32 = (high32 | (nextByte & 0x7f) << shift) >>> 0
      shift += 7
      this.offset++
      if ((nextByte & 0x80) === 0) return { high32, low32 }
    }
    throw new RangeError('buffer ended with incomplete number')
  }

  /**
   * Reads a LEB128-encoded signed integer from the current position in the
   * buffer. Throws an exception if the value doesn't fit in a 64-bit signed
   * int. Returns the number in two 32-bit halves, as an object of the form
   * `{high32, low32}`. The `low32` half is always non-negative, and the
   * sign of the `high32` half indicates the sign of the 64-bit number.
   */
  readInt64() {
    let low32 = 0, high32 = 0, shift = 0
    while (this.offset < this.buf.byteLength && shift <= 28) {
      const nextByte = this.buf[this.offset]
      low32 = (low32 | (nextByte & 0x7f) << shift) >>> 0 // right shift to interpret value as unsigned
      if (shift === 28) {
        high32 = (nextByte & 0x70) >>> 4
      }
      shift += 7
      this.offset++
      if ((nextByte & 0x80) === 0) {
        if ((nextByte & 0x40) !== 0) { // sign-extend negative integer
          if (shift < 32) low32 = (low32 | (-1 << shift)) >>> 0
          high32 |= -1 << Math.max(shift - 32, 0)
        }
        return { high32, low32 }
      }
    }

    shift = 3
    while (this.offset < this.buf.byteLength) {
      const nextByte = this.buf[this.offset]
      // On the 10th byte there are only two valid values: all 7 value bits zero
      // (if the value is positive) or all 7 bits one (if the value is negative)
      if (shift === 31 && nextByte !== 0 && nextByte !== 0x7f) {
        throw new RangeError('number out of range')
      }
      high32 |= (nextByte & 0x7f) << shift
      shift += 7
      this.offset++
      if ((nextByte & 0x80) === 0) {
        if ((nextByte & 0x40) !== 0 && shift < 32) { // sign-extend negative integer
          high32 |= -1 << shift
        }
        return { high32, low32 }
      }
    }
    throw new RangeError('buffer ended with incomplete number')
  }

  /**
   * Extracts a subarray `length` bytes in size, starting from the current
   * position in the buffer, and moves the position forward.
   */
  readRawBytes(length) {
    const start = this.offset
    if (start + length > this.buf.byteLength) {
      throw new RangeError('subarray exceeds buffer size')
    }
    this.offset += length
    return this.buf.subarray(start, this.offset)
  }

  /**
   * Extracts `length` bytes from the buffer, starting from the current position,
   * and returns the UTF-8 string decoding of those bytes.
   */
  readRawString(length) {
    return utf8ToString(this.readRawBytes(length))
  }

  /**
   * Extracts a subarray from the current position in the buffer, prefixed with
   * its length in bytes (encoded as an unsigned LEB128 integer).
   */
  readPrefixedBytes() {
    return this.readRawBytes(this.readUint53())
  }

  /**
   * Reads a UTF-8 string from the current position in the buffer, prefixed with its
   * length in bytes (where the length is encoded as an unsigned LEB128 integer).
   */
  readPrefixedString() {
    return utf8ToString(this.readPrefixedBytes())
  }

  /**
   * Reads a byte array from the current position in the buffer, prefixed with its
   * length in bytes. Returns that byte array converted to a hexadecimal string.
   */
  readHexString() {
    return bytesToHexString(this.readPrefixedBytes())
  }
}

/**
 * An encoder that uses run-length encoding to compress sequences of repeated
 * values. The constructor argument specifies the type of values, which may be
 * either 'int', 'uint', or 'utf8'. Besides valid values of the selected
 * datatype, values may also be null.
 *
 * The encoded buffer starts with a LEB128-encoded signed integer, the
 * repetition count. The interpretation of the following values depends on this
 * repetition count:
 *   - If this number is a positive value n, the next value in the buffer
 *     (encoded as the specified datatype) is repeated n times in the sequence.
 *   - If the repetition count is a negative value -n, then the next n values
 *     (encoded as the specified datatype) in the buffer are treated as a
 *     literal, i.e. they appear in the sequence without any further
 *     interpretation or repetition.
 *   - If the repetition count is zero, then the next value in the buffer is a
 *     LEB128-encoded unsigned integer indicating the number of null values
 *     that appear at the current position in the sequence.
 *
 * After one of these three has completed, the process repeats, starting again
 * with a repetition count, until we reach the end of the buffer.
 */
class RLEEncoder extends Encoder {
  constructor(type) {
    super()
    this.type = type
    this.state = 'empty'
    this.lastValue = undefined
    this.count = 0
    this.literal = []
  }

  /**
   * Appends a new value to the sequence. If `repetitions` is given, the value is repeated
   * `repetitions` times.
   */
  appendValue(value, repetitions = 1) {
    this._appendValue(value, repetitions)
  }

  /**
   * Like `appendValue()`, but this method is not overridden by `DeltaEncoder`.
   */
  _appendValue(value, repetitions = 1) {
    if (repetitions <= 0) return
    if (this.state === 'empty') {
      this.state = (value === null ? 'nulls' : (repetitions === 1 ? 'loneValue' : 'repetition'))
      this.lastValue = value
      this.count = repetitions
    } else if (this.state === 'loneValue') {
      if (value === null) {
        this.flush()
        this.state = 'nulls'
        this.count = repetitions
      } else if (value === this.lastValue) {
        this.state = 'repetition'
        this.count = 1 + repetitions
      } else if (repetitions > 1) {
        this.flush()
        this.state = 'repetition'
        this.count = repetitions
        this.lastValue = value
      } else {
        this.state = 'literal'
        this.literal = [this.lastValue]
        this.lastValue = value
      }
    } else if (this.state === 'repetition') {
      if (value === null) {
        this.flush()
        this.state = 'nulls'
        this.count = repetitions
      } else if (value === this.lastValue) {
        this.count += repetitions
      } else if (repetitions > 1) {
        this.flush()
        this.state = 'repetition'
        this.count = repetitions
        this.lastValue = value
      } else {
        this.flush()
        this.state = 'loneValue'
        this.lastValue = value
      }
    } else if (this.state === 'literal') {
      if (value === null) {
        this.literal.push(this.lastValue)
        this.flush()
        this.state = 'nulls'
        this.count = repetitions
      } else if (value === this.lastValue) {
        this.flush()
        this.state = 'repetition'
        this.count = 1 + repetitions
      } else if (repetitions > 1) {
        this.literal.push(this.lastValue)
        this.flush()
        this.state = 'repetition'
        this.count = repetitions
        this.lastValue = value
      } else {
        this.literal.push(this.lastValue)
        this.lastValue = value
      }
    } else if (this.state === 'nulls') {
      if (value === null) {
        this.count += repetitions
      } else if (repetitions > 1) {
        this.flush()
        this.state = 'repetition'
        this.count = repetitions
        this.lastValue = value
      } else {
        this.flush()
        this.state = 'loneValue'
        this.lastValue = value
      }
    }
  }

  /**
   * Copies values from the RLEDecoder `decoder` into this encoder. The `options` object may
   * contain the following keys:
   *  - `count`: The number of values to copy. If not specified, copies all remaining values.
   *  - `sumValues`: If true, the function computes the sum of all numeric values as they are
   *    copied (null values are counted as zero), and returns that number.
   *  - `sumShift`: If set, values are shifted right by `sumShift` bits before adding to the sum.
   *
   * Returns an object of the form `{nonNullValues, sum}` where `nonNullValues` is the number of
   * non-null values copied, and `sum` is the sum (only if the `sumValues` option is set).
   */
  copyFrom(decoder, options = {}) {
    const { count, sumValues, sumShift } = options
    if (!(decoder instanceof RLEDecoder) || (decoder.type !== this.type)) {
      throw new TypeError('incompatible type of decoder')
    }
    let remaining = (typeof count === 'number' ? count : Number.MAX_SAFE_INTEGER)
    let nonNullValues = 0, sum = 0
    if (count && remaining > 0 && decoder.done) throw new RangeError(`cannot copy ${count} values`)
    if (remaining === 0 || decoder.done) return sumValues ? {nonNullValues, sum} : {nonNullValues}

    // Copy a value so that we have a well-defined starting state. NB: when super.copyFrom() is
    // called by the DeltaEncoder subclass, the following calls to readValue() and appendValue()
    // refer to the overridden methods, while later readRecord(), readRawValue() and _appendValue()
    // calls refer to the non-overridden RLEDecoder/RLEEncoder methods.
    let firstValue = decoder.readValue()
    if (firstValue === null) {
      const numNulls = Math.min(decoder.count + 1, remaining)
      remaining -= numNulls
      decoder.count -= numNulls - 1
      this.appendValue(null, numNulls)
      if (count && remaining > 0 && decoder.done) throw new RangeError(`cannot copy ${count} values`)
      if (remaining === 0 || decoder.done) return sumValues ? {nonNullValues, sum} : {nonNullValues}
      firstValue = decoder.readValue()
      if (firstValue === null) throw new RangeError('null run must be followed by non-null value')
    }
    this.appendValue(firstValue)
    remaining--
    nonNullValues++
    if (sumValues) sum += (sumShift ? (firstValue >>> sumShift) : firstValue)
    if (count && remaining > 0 && decoder.done) throw new RangeError(`cannot copy ${count} values`)
    if (remaining === 0 || decoder.done) return sumValues ? {nonNullValues, sum} : {nonNullValues}

    // Copy data at the record level without expanding repetitions
    let firstRun = (decoder.count > 0)
    while (remaining > 0 && !decoder.done) {
      if (!firstRun) decoder.readRecord()
      const numValues = Math.min(decoder.count, remaining)
      decoder.count -= numValues

      if (decoder.state === 'literal') {
        nonNullValues += numValues
        for (let i = 0; i < numValues; i++) {
          if (decoder.done) throw new RangeError('incomplete literal')
          const value = decoder.readRawValue()
          if (value === decoder.lastValue) throw new RangeError('Repetition of values is not allowed in literal')
          decoder.lastValue = value
          this._appendValue(value)
          if (sumValues) sum += (sumShift ? (value >>> sumShift) : value)
        }
      } else if (decoder.state === 'repetition') {
        nonNullValues += numValues
        if (sumValues) sum += numValues * (sumShift ? (decoder.lastValue >>> sumShift) : decoder.lastValue)
        const value = decoder.lastValue
        this._appendValue(value)
        if (numValues > 1) {
          this._appendValue(value)
          if (this.state !== 'repetition') throw new RangeError(`Unexpected state ${this.state}`)
          this.count += numValues - 2
        }
      } else if (decoder.state === 'nulls') {
        this._appendValue(null)
        if (this.state !== 'nulls') throw new RangeError(`Unexpected state ${this.state}`)
        this.count += numValues - 1
      }

      firstRun = false
      remaining -= numValues
    }
    if (count && remaining > 0 && decoder.done) throw new RangeError(`cannot copy ${count} values`)
    return sumValues ? {nonNullValues, sum} : {nonNullValues}
  }

  /**
   * Private method, do not call from outside the class.
   */
  flush() {
    if (this.state === 'loneValue') {
      this.appendInt32(-1)
      this.appendRawValue(this.lastValue)
    } else if (this.state === 'repetition') {
      this.appendInt53(this.count)
      this.appendRawValue(this.lastValue)
    } else if (this.state === 'literal') {
      this.appendInt53(-this.literal.length)
      for (let v of this.literal) this.appendRawValue(v)
    } else if (this.state === 'nulls') {
      this.appendInt32(0)
      this.appendUint53(this.count)
    }
    this.state = 'empty'
  }

  /**
   * Private method, do not call from outside the class.
   */
  appendRawValue(value) {
    if (this.type === 'int') {
      this.appendInt53(value)
    } else if (this.type === 'uint') {
      this.appendUint53(value)
    } else if (this.type === 'utf8') {
      this.appendPrefixedString(value)
    } else {
      throw new RangeError(`Unknown RLEEncoder datatype: ${this.type}`)
    }
  }

  /**
   * Flushes any unwritten data to the buffer. Call this before reading from
   * the buffer constructed by this Encoder.
   */
  finish() {
    if (this.state === 'literal') this.literal.push(this.lastValue)
    // Don't write anything if the only values we have seen are nulls
    if (this.state !== 'nulls' || this.offset > 0) this.flush()
  }
}

/**
 * Counterpart to RLEEncoder: reads values from an RLE-compressed sequence,
 * returning nulls and repeated values as required.
 */
class RLEDecoder extends Decoder {
  constructor(type, buffer) {
    super(buffer)
    this.type = type
    this.lastValue = undefined
    this.count = 0
    this.state = undefined
  }

  /**
   * Returns false if there is still data to be read at the current decoding
   * position, and true if we are at the end of the buffer.
   */
  get done() {
    return (this.count === 0) && (this.offset === this.buf.byteLength)
  }

  /**
   * Resets the cursor position, so that the next read goes back to the
   * beginning of the buffer.
   */
  reset() {
    this.offset = 0
    this.lastValue = undefined
    this.count = 0
    this.state = undefined
  }

  /**
   * Returns the next value (or null) in the sequence.
   */
  readValue() {
    if (this.done) return null
    if (this.count === 0) this.readRecord()
    this.count -= 1
    if (this.state === 'literal') {
      const value = this.readRawValue()
      if (value === this.lastValue) throw new RangeError('Repetition of values is not allowed in literal')
      this.lastValue = value
      return value
    } else {
      return this.lastValue
    }
  }

  /**
   * Discards the next `numSkip` values in the sequence.
   */
  skipValues(numSkip) {
    while (numSkip > 0 && !this.done) {
      if (this.count === 0) {
        this.count = this.readInt53()
        if (this.count > 0) {
          this.lastValue = (this.count <= numSkip) ? this.skipRawValues(1) : this.readRawValue()
          this.state = 'repetition'
        } else if (this.count < 0) {
          this.count = -this.count
          this.state = 'literal'
        } else { // this.count == 0
          this.count = this.readUint53()
          this.lastValue = null
          this.state = 'nulls'
        }
      }

      const consume = Math.min(numSkip, this.count)
      if (this.state === 'literal') this.skipRawValues(consume)
      numSkip -= consume
      this.count -= consume
    }
  }

  /**
   * Private method, do not call from outside the class.
   * Reads a repetition count from the buffer and sets up the state appropriately.
   */
  readRecord() {
    this.count = this.readInt53()
    if (this.count > 1) {
      const value = this.readRawValue()
      if ((this.state === 'repetition' || this.state === 'literal') && this.lastValue === value) {
        throw new RangeError('Successive repetitions with the same value are not allowed')
      }
      this.state = 'repetition'
      this.lastValue = value
    } else if (this.count === 1) {
      throw new RangeError('Repetition count of 1 is not allowed, use a literal instead')
    } else if (this.count < 0) {
      this.count = -this.count
      if (this.state === 'literal') throw new RangeError('Successive literals are not allowed')
      this.state = 'literal'
    } else { // this.count == 0
      if (this.state === 'nulls') throw new RangeError('Successive null runs are not allowed')
      this.count = this.readUint53()
      if (this.count === 0) throw new RangeError('Zero-length null runs are not allowed')
      this.lastValue = null
      this.state = 'nulls'
    }
  }

  /**
   * Private method, do not call from outside the class.
   * Reads one value of the datatype configured on construction.
   */
  readRawValue() {
    if (this.type === 'int') {
      return this.readInt53()
    } else if (this.type === 'uint') {
      return this.readUint53()
    } else if (this.type === 'utf8') {
      return this.readPrefixedString()
    } else {
      throw new RangeError(`Unknown RLEDecoder datatype: ${this.type}`)
    }
  }

  /**
   * Private method, do not call from outside the class.
   * Skips over `num` values of the datatype configured on construction.
   */
  skipRawValues(num) {
    if (this.type === 'utf8') {
      for (let i = 0; i < num; i++) this.skip(this.readUint53())
    } else {
      while (num > 0 && this.offset < this.buf.byteLength) {
        if ((this.buf[this.offset] & 0x80) === 0) num--
        this.offset++
      }
      if (num > 0) throw new RangeError('cannot skip beyond end of buffer')
    }
  }
}

/**
 * A variant of RLEEncoder: rather than storing the actual values passed to
 * appendValue(), this version stores only the first value, and for all
 * subsequent values it stores the difference to the previous value. This
 * encoding is good when values tend to come in sequentially incrementing runs,
 * because the delta between successive values is 1, and repeated values of 1
 * are easily compressed with run-length encoding.
 *
 * Null values are also allowed, as with RLEEncoder.
 */
class DeltaEncoder extends RLEEncoder {
  constructor() {
    super('int')
    this.absoluteValue = 0
  }

  /**
   * Appends a new integer value to the sequence. If `repetitions` is given, the value is repeated
   * `repetitions` times.
   */
  appendValue(value, repetitions = 1) {
    if (repetitions <= 0) return
    if (typeof value === 'number') {
      super.appendValue(value - this.absoluteValue, 1)
      this.absoluteValue = value
      if (repetitions > 1) super.appendValue(0, repetitions - 1)
    } else {
      super.appendValue(value, repetitions)
    }
  }

  /**
   * Copies values from the DeltaDecoder `decoder` into this encoder. The `options` object may
   * contain the key `count`, indicating the number of values to copy. If not specified, copies
   * all remaining values in the decoder.
   */
  copyFrom(decoder, options = {}) {
    if (options.sumValues) {
      throw new RangeError('unsupported options for DeltaEncoder.copyFrom()')
    }
    if (!(decoder instanceof DeltaDecoder)) {
      throw new TypeError('incompatible type of decoder')
    }

    let remaining = options.count
    if (remaining > 0 && decoder.done) throw new RangeError(`cannot copy ${remaining} values`)
    if (remaining === 0 || decoder.done) return

    // Copy any null values, and the first non-null value, so that appendValue() computes the
    // difference between the encoder's last value and the decoder's first (absolute) value.
    let value = decoder.readValue(), nulls = 0
    this.appendValue(value)
    if (value === null) {
      nulls = decoder.count + 1
      if (remaining !== undefined && remaining < nulls) nulls = remaining
      decoder.count -= nulls - 1
      this.count += nulls - 1
      if (remaining > nulls && decoder.done) throw new RangeError(`cannot copy ${remaining} values`)
      if (remaining === nulls || decoder.done) return

      // The next value read is certain to be non-null because we're not at the end of the decoder,
      // and a run of nulls must be followed by a run of non-nulls.
      if (decoder.count === 0) this.appendValue(decoder.readValue())
    }

    // Once we have the first value, the subsequent relative values can be copied verbatim without
    // any further processing. Note that the first value copied by super.copyFrom() is an absolute
    // value, while subsequent values are relative. Thus, the sum of all of the (non-null) copied
    // values must equal the absolute value of the final element copied.
    if (remaining !== undefined) remaining -= nulls + 1
    const { nonNullValues, sum } = super.copyFrom(decoder, {count: remaining, sumValues: true})
    if (nonNullValues > 0) {
      this.absoluteValue = sum
      decoder.absoluteValue = sum
    }
  }
}

/**
 * Counterpart to DeltaEncoder: reads values from a delta-compressed sequence of
 * numbers (may include null values).
 */
class DeltaDecoder extends RLEDecoder {
  constructor(buffer) {
    super('int', buffer)
    this.absoluteValue = 0
  }

  /**
   * Resets the cursor position, so that the next read goes back to the
   * beginning of the buffer.
   */
  reset() {
    this.offset = 0
    this.lastValue = undefined
    this.count = 0
    this.state = undefined
    this.absoluteValue = 0
  }

  /**
   * Returns the next integer (or null) value in the sequence.
   */
  readValue() {
    const value = super.readValue()
    if (value === null) return null
    this.absoluteValue += value
    return this.absoluteValue
  }

  /**
   * Discards the next `numSkip` values in the sequence.
   */
  skipValues(numSkip) {
    while (numSkip > 0 && !this.done) {
      if (this.count === 0) this.readRecord()
      const consume = Math.min(numSkip, this.count)
      if (this.state === 'literal') {
        for (let i = 0; i < consume; i++) {
          this.lastValue = this.readRawValue()
          this.absoluteValue += this.lastValue
        }
      } else if (this.state === 'repetition') {
        this.absoluteValue += consume * this.lastValue
      }
      numSkip -= consume
      this.count -= consume
    }
  }
}

/**
 * Encodes a sequence of boolean values by mapping it to a sequence of integers:
 * the number of false values, followed by the number of true values, followed
 * by the number of false values, and so on. Each number is encoded as a LEB128
 * unsigned integer. This encoding is a bit like RLEEncoder, except that we
 * only encode the repetition count but not the actual value, since the values
 * just alternate between false and true (starting with false).
 */
class BooleanEncoder extends Encoder {
  constructor() {
    super()
    this.lastValue = false
    this.count = 0
  }

  /**
   * Appends a new value to the sequence. If `repetitions` is given, the value is repeated
   * `repetitions` times.
   */
  appendValue(value, repetitions = 1) {
    if (value !== false && value !== true) {
      throw new RangeError(`Unsupported value for BooleanEncoder: ${value}`)
    }
    if (repetitions <= 0) return
    if (this.lastValue === value) {
      this.count += repetitions
    } else {
      this.appendUint53(this.count)
      this.lastValue = value
      this.count = repetitions
    }
  }

  /**
   * Copies values from the BooleanDecoder `decoder` into this encoder. The `options` object may
   * contain the key `count`, indicating the number of values to copy. If not specified, copies
   * all remaining values in the decoder.
   */
  copyFrom(decoder, options = {}) {
    if (!(decoder instanceof BooleanDecoder)) {
      throw new TypeError('incompatible type of decoder')
    }

    const { count } = options
    let remaining = (typeof count === 'number' ? count : Number.MAX_SAFE_INTEGER)
    if (count && remaining > 0 && decoder.done) throw new RangeError(`cannot copy ${count} values`)
    if (remaining === 0 || decoder.done) return

    // Copy one value to bring decoder and encoder state into sync, then finish that value's repetitions
    this.appendValue(decoder.readValue())
    remaining--
    const firstCopy = Math.min(decoder.count, remaining)
    this.count += firstCopy
    decoder.count -= firstCopy
    remaining -= firstCopy

    while (remaining > 0 && !decoder.done) {
      decoder.count = decoder.readUint53()
      if (decoder.count === 0) throw new RangeError('Zero-length runs are not allowed')
      decoder.lastValue = !decoder.lastValue
      this.appendUint53(this.count)

      const numCopied = Math.min(decoder.count, remaining)
      this.count = numCopied
      this.lastValue = decoder.lastValue
      decoder.count -= numCopied
      remaining -= numCopied
    }

    if (count && remaining > 0 && decoder.done) throw new RangeError(`cannot copy ${count} values`)
  }

  /**
   * Flushes any unwritten data to the buffer. Call this before reading from
   * the buffer constructed by this Encoder.
   */
  finish() {
    if (this.count > 0) {
      this.appendUint53(this.count)
      this.count = 0
    }
  }
}

/**
 * Counterpart to BooleanEncoder: reads boolean values from a runlength-encoded
 * sequence.
 */
class BooleanDecoder extends Decoder {
  constructor(buffer) {
    super(buffer)
    this.lastValue = true // is negated the first time we read a count
    this.firstRun = true
    this.count = 0
  }

  /**
   * Returns false if there is still data to be read at the current decoding
   * position, and true if we are at the end of the buffer.
   */
  get done() {
    return (this.count === 0) && (this.offset === this.buf.byteLength)
  }

  /**
   * Resets the cursor position, so that the next read goes back to the
   * beginning of the buffer.
   */
  reset() {
    this.offset = 0
    this.lastValue = true
    this.firstRun = true
    this.count = 0
  }

  /**
   * Returns the next value in the sequence.
   */
  readValue() {
    if (this.done) return false
    while (this.count === 0) {
      this.count = this.readUint53()
      this.lastValue = !this.lastValue
      if (this.count === 0 && !this.firstRun) {
        throw new RangeError('Zero-length runs are not allowed')
      }
      this.firstRun = false
    }
    this.count -= 1
    return this.lastValue
  }

  /**
   * Discards the next `numSkip` values in the sequence.
   */
  skipValues(numSkip) {
    while (numSkip > 0 && !this.done) {
      if (this.count === 0) {
        this.count = this.readUint53()
        this.lastValue = !this.lastValue
        if (this.count === 0) throw new RangeError('Zero-length runs are not allowed')
      }
      if (this.count < numSkip) {
        numSkip -= this.count
        this.count = 0
      } else {
        this.count -= numSkip
        numSkip = 0
      }
    }
  }
}

module.exports = {
  stringToUtf8, utf8ToString, hexStringToBytes, bytesToHexString,
  Encoder, Decoder, RLEEncoder, RLEDecoder, DeltaEncoder, DeltaDecoder, BooleanEncoder, BooleanDecoder
}