automerge/rust/automerge/src/sequence_tree.rs
Orion Henry ca25ed0ca0
automerge-wasm: Use a SequenceTree in the OpObserver
Generating patches to text objects (a la the edit-trace benchmark) was
very slow due to appending to the back of a Vec. Use the SequenceTree
(effectively a B-tree) instead so as to speed up sequence patch
generation.
2022-11-22 12:13:42 +00:00

680 lines
21 KiB
Rust

use std::{
cmp::{min, Ordering},
fmt::Debug,
mem,
};
pub(crate) const B: usize = 16;
pub type SequenceTree<T> = SequenceTreeInternal<T>;
#[derive(Clone, Debug)]
pub struct SequenceTreeInternal<T> {
root_node: Option<SequenceTreeNode<T>>,
}
#[derive(Clone, Debug, PartialEq)]
struct SequenceTreeNode<T> {
elements: Vec<T>,
children: Vec<SequenceTreeNode<T>>,
length: usize,
}
impl<T> SequenceTreeInternal<T>
where
T: Clone + Debug,
{
/// Construct a new, empty, sequence.
pub fn new() -> Self {
Self { root_node: None }
}
/// Get the length of the sequence.
pub fn len(&self) -> usize {
self.root_node.as_ref().map_or(0, |n| n.len())
}
/// Check if the sequence is empty.
pub fn is_empty(&self) -> bool {
self.len() == 0
}
/// Create an iterator through the sequence.
pub fn iter(&self) -> Iter<'_, T> {
Iter {
inner: self,
index: 0,
}
}
/// Insert the `element` into the sequence at `index`.
///
/// # Panics
///
/// Panics if `index > len`.
pub fn insert(&mut self, index: usize, element: T) {
let old_len = self.len();
if let Some(root) = self.root_node.as_mut() {
#[cfg(debug_assertions)]
root.check();
if root.is_full() {
let original_len = root.len();
let new_root = SequenceTreeNode::new();
// move new_root to root position
let old_root = mem::replace(root, new_root);
root.length += old_root.len();
root.children.push(old_root);
root.split_child(0);
assert_eq!(original_len, root.len());
// after splitting the root has one element and two children, find which child the
// index is in
let first_child_len = root.children[0].len();
let (child, insertion_index) = if first_child_len < index {
(&mut root.children[1], index - (first_child_len + 1))
} else {
(&mut root.children[0], index)
};
root.length += 1;
child.insert_into_non_full_node(insertion_index, element)
} else {
root.insert_into_non_full_node(index, element)
}
} else {
self.root_node = Some(SequenceTreeNode {
elements: vec![element],
children: Vec::new(),
length: 1,
})
}
assert_eq!(self.len(), old_len + 1, "{:#?}", self);
}
/// Push the `element` onto the back of the sequence.
pub fn push(&mut self, element: T) {
let l = self.len();
self.insert(l, element)
}
/// Get the `element` at `index` in the sequence.
pub fn get(&self, index: usize) -> Option<&T> {
self.root_node.as_ref().and_then(|n| n.get(index))
}
/// Get the `element` at `index` in the sequence.
pub fn get_mut(&mut self, index: usize) -> Option<&mut T> {
self.root_node.as_mut().and_then(|n| n.get_mut(index))
}
/// Removes the element at `index` from the sequence.
///
/// # Panics
///
/// Panics if `index` is out of bounds.
pub fn remove(&mut self, index: usize) -> T {
if let Some(root) = self.root_node.as_mut() {
#[cfg(debug_assertions)]
let len = root.check();
let old = root.remove(index);
if root.elements.is_empty() {
if root.is_leaf() {
self.root_node = None;
} else {
self.root_node = Some(root.children.remove(0));
}
}
#[cfg(debug_assertions)]
debug_assert_eq!(len, self.root_node.as_ref().map_or(0, |r| r.check()) + 1);
old
} else {
panic!("remove from empty tree")
}
}
/// Update the `element` at `index` in the sequence, returning the old value.
///
/// # Panics
///
/// Panics if `index > len`
pub fn set(&mut self, index: usize, element: T) -> T {
self.root_node.as_mut().unwrap().set(index, element)
}
}
impl<T> SequenceTreeNode<T>
where
T: Clone + Debug,
{
fn new() -> Self {
Self {
elements: Vec::new(),
children: Vec::new(),
length: 0,
}
}
pub(crate) fn len(&self) -> usize {
self.length
}
fn is_leaf(&self) -> bool {
self.children.is_empty()
}
fn is_full(&self) -> bool {
self.elements.len() >= 2 * B - 1
}
/// Returns the child index and the given index adjusted for the cumulative index before that
/// child.
fn find_child_index(&self, index: usize) -> (usize, usize) {
let mut cumulative_len = 0;
for (child_index, child) in self.children.iter().enumerate() {
if cumulative_len + child.len() >= index {
return (child_index, index - cumulative_len);
} else {
cumulative_len += child.len() + 1;
}
}
panic!("index not found in node")
}
fn insert_into_non_full_node(&mut self, index: usize, element: T) {
assert!(!self.is_full());
if self.is_leaf() {
self.length += 1;
self.elements.insert(index, element);
} else {
let (child_index, sub_index) = self.find_child_index(index);
let child = &mut self.children[child_index];
if child.is_full() {
self.split_child(child_index);
// child structure has changed so we need to find the index again
let (child_index, sub_index) = self.find_child_index(index);
let child = &mut self.children[child_index];
child.insert_into_non_full_node(sub_index, element);
} else {
child.insert_into_non_full_node(sub_index, element);
}
self.length += 1;
}
}
// A utility function to split the child `full_child_index` of this node
// Note that `full_child_index` must be full when this function is called.
fn split_child(&mut self, full_child_index: usize) {
let original_len_self = self.len();
// Create a new node which is going to store (B-1) keys
// of the full child.
let mut successor_sibling = SequenceTreeNode::new();
let full_child = &mut self.children[full_child_index];
let original_len = full_child.len();
assert!(full_child.is_full());
successor_sibling.elements = full_child.elements.split_off(B);
if !full_child.is_leaf() {
successor_sibling.children = full_child.children.split_off(B);
}
let middle = full_child.elements.pop().unwrap();
full_child.length =
full_child.elements.len() + full_child.children.iter().map(|c| c.len()).sum::<usize>();
successor_sibling.length = successor_sibling.elements.len()
+ successor_sibling
.children
.iter()
.map(|c| c.len())
.sum::<usize>();
let z_len = successor_sibling.len();
let full_child_len = full_child.len();
self.children
.insert(full_child_index + 1, successor_sibling);
self.elements.insert(full_child_index, middle);
assert_eq!(full_child_len + z_len + 1, original_len, "{:#?}", self);
assert_eq!(original_len_self, self.len());
}
fn remove_from_leaf(&mut self, index: usize) -> T {
self.length -= 1;
self.elements.remove(index)
}
fn remove_element_from_non_leaf(&mut self, index: usize, element_index: usize) -> T {
self.length -= 1;
if self.children[element_index].elements.len() >= B {
let total_index = self.cumulative_index(element_index);
// recursively delete index - 1 in predecessor_node
let predecessor = self.children[element_index].remove(index - 1 - total_index);
// replace element with that one
mem::replace(&mut self.elements[element_index], predecessor)
} else if self.children[element_index + 1].elements.len() >= B {
// recursively delete index + 1 in successor_node
let total_index = self.cumulative_index(element_index + 1);
let successor = self.children[element_index + 1].remove(index + 1 - total_index);
// replace element with that one
mem::replace(&mut self.elements[element_index], successor)
} else {
let middle_element = self.elements.remove(element_index);
let successor_child = self.children.remove(element_index + 1);
self.children[element_index].merge(middle_element, successor_child);
let total_index = self.cumulative_index(element_index);
self.children[element_index].remove(index - total_index)
}
}
fn cumulative_index(&self, child_index: usize) -> usize {
self.children[0..child_index]
.iter()
.map(|c| c.len() + 1)
.sum()
}
fn remove_from_internal_child(&mut self, index: usize, mut child_index: usize) -> T {
if self.children[child_index].elements.len() < B
&& if child_index > 0 {
self.children[child_index - 1].elements.len() < B
} else {
true
}
&& if child_index + 1 < self.children.len() {
self.children[child_index + 1].elements.len() < B
} else {
true
}
{
// if the child and its immediate siblings have B-1 elements merge the child
// with one sibling, moving an element from this node into the new merged node
// to be the median
if child_index > 0 {
let middle = self.elements.remove(child_index - 1);
// use the predessor sibling
let successor = self.children.remove(child_index);
child_index -= 1;
self.children[child_index].merge(middle, successor);
} else {
let middle = self.elements.remove(child_index);
// use the sucessor sibling
let successor = self.children.remove(child_index + 1);
self.children[child_index].merge(middle, successor);
}
} else if self.children[child_index].elements.len() < B {
if child_index > 0
&& self
.children
.get(child_index - 1)
.map_or(false, |c| c.elements.len() >= B)
{
let last_element = self.children[child_index - 1].elements.pop().unwrap();
assert!(!self.children[child_index - 1].elements.is_empty());
self.children[child_index - 1].length -= 1;
let parent_element =
mem::replace(&mut self.elements[child_index - 1], last_element);
self.children[child_index]
.elements
.insert(0, parent_element);
self.children[child_index].length += 1;
if let Some(last_child) = self.children[child_index - 1].children.pop() {
self.children[child_index - 1].length -= last_child.len();
self.children[child_index].length += last_child.len();
self.children[child_index].children.insert(0, last_child);
}
} else if self
.children
.get(child_index + 1)
.map_or(false, |c| c.elements.len() >= B)
{
let first_element = self.children[child_index + 1].elements.remove(0);
self.children[child_index + 1].length -= 1;
assert!(!self.children[child_index + 1].elements.is_empty());
let parent_element = mem::replace(&mut self.elements[child_index], first_element);
self.children[child_index].length += 1;
self.children[child_index].elements.push(parent_element);
if !self.children[child_index + 1].is_leaf() {
let first_child = self.children[child_index + 1].children.remove(0);
self.children[child_index + 1].length -= first_child.len();
self.children[child_index].length += first_child.len();
self.children[child_index].children.push(first_child);
}
}
}
self.length -= 1;
let total_index = self.cumulative_index(child_index);
self.children[child_index].remove(index - total_index)
}
fn check(&self) -> usize {
let l = self.elements.len() + self.children.iter().map(|c| c.check()).sum::<usize>();
assert_eq!(self.len(), l, "{:#?}", self);
l
}
pub(crate) fn remove(&mut self, index: usize) -> T {
let original_len = self.len();
if self.is_leaf() {
let v = self.remove_from_leaf(index);
assert_eq!(original_len, self.len() + 1);
debug_assert_eq!(self.check(), self.len());
v
} else {
let mut total_index = 0;
for (child_index, child) in self.children.iter().enumerate() {
match (total_index + child.len()).cmp(&index) {
Ordering::Less => {
// should be later on in the loop
total_index += child.len() + 1;
continue;
}
Ordering::Equal => {
let v = self.remove_element_from_non_leaf(
index,
min(child_index, self.elements.len() - 1),
);
assert_eq!(original_len, self.len() + 1);
debug_assert_eq!(self.check(), self.len());
return v;
}
Ordering::Greater => {
let v = self.remove_from_internal_child(index, child_index);
assert_eq!(original_len, self.len() + 1);
debug_assert_eq!(self.check(), self.len());
return v;
}
}
}
panic!(
"index not found to remove {} {} {} {}",
index,
total_index,
self.len(),
self.check()
);
}
}
fn merge(&mut self, middle: T, successor_sibling: SequenceTreeNode<T>) {
self.elements.push(middle);
self.elements.extend(successor_sibling.elements);
self.children.extend(successor_sibling.children);
self.length += successor_sibling.length + 1;
assert!(self.is_full());
}
pub(crate) fn set(&mut self, index: usize, element: T) -> T {
if self.is_leaf() {
let old_element = self.elements.get_mut(index).unwrap();
mem::replace(old_element, element)
} else {
let mut cumulative_len = 0;
for (child_index, child) in self.children.iter_mut().enumerate() {
match (cumulative_len + child.len()).cmp(&index) {
Ordering::Less => {
cumulative_len += child.len() + 1;
}
Ordering::Equal => {
let old_element = self.elements.get_mut(child_index).unwrap();
return mem::replace(old_element, element);
}
Ordering::Greater => {
return child.set(index - cumulative_len, element);
}
}
}
panic!("Invalid index to set: {} but len was {}", index, self.len())
}
}
pub(crate) fn get(&self, index: usize) -> Option<&T> {
if self.is_leaf() {
return self.elements.get(index);
} else {
let mut cumulative_len = 0;
for (child_index, child) in self.children.iter().enumerate() {
match (cumulative_len + child.len()).cmp(&index) {
Ordering::Less => {
cumulative_len += child.len() + 1;
}
Ordering::Equal => return self.elements.get(child_index),
Ordering::Greater => {
return child.get(index - cumulative_len);
}
}
}
}
None
}
pub(crate) fn get_mut(&mut self, index: usize) -> Option<&mut T> {
if self.is_leaf() {
return self.elements.get_mut(index);
} else {
let mut cumulative_len = 0;
for (child_index, child) in self.children.iter_mut().enumerate() {
match (cumulative_len + child.len()).cmp(&index) {
Ordering::Less => {
cumulative_len += child.len() + 1;
}
Ordering::Equal => return self.elements.get_mut(child_index),
Ordering::Greater => {
return child.get_mut(index - cumulative_len);
}
}
}
}
None
}
}
impl<T> Default for SequenceTreeInternal<T>
where
T: Clone + Debug,
{
fn default() -> Self {
Self::new()
}
}
impl<T> PartialEq for SequenceTreeInternal<T>
where
T: Clone + Debug + PartialEq,
{
fn eq(&self, other: &Self) -> bool {
self.len() == other.len() && self.iter().zip(other.iter()).all(|(a, b)| a == b)
}
}
impl<'a, T> IntoIterator for &'a SequenceTreeInternal<T>
where
T: Clone + Debug,
{
type Item = &'a T;
type IntoIter = Iter<'a, T>;
fn into_iter(self) -> Self::IntoIter {
Iter {
inner: self,
index: 0,
}
}
}
#[derive(Debug)]
pub struct Iter<'a, T> {
inner: &'a SequenceTreeInternal<T>,
index: usize,
}
impl<'a, T> Iterator for Iter<'a, T>
where
T: Clone + Debug,
{
type Item = &'a T;
fn next(&mut self) -> Option<Self::Item> {
self.index += 1;
self.inner.get(self.index - 1)
}
fn nth(&mut self, n: usize) -> Option<Self::Item> {
self.index += n + 1;
self.inner.get(self.index - 1)
}
}
#[cfg(test)]
mod tests {
use proptest::prelude::*;
use super::*;
#[test]
fn push_back() {
let mut t = SequenceTree::new();
t.push(1);
t.push(2);
t.push(3);
t.push(4);
t.push(5);
t.push(6);
t.push(8);
t.push(100);
}
#[test]
fn insert() {
let mut t = SequenceTree::new();
t.insert(0, 1);
t.insert(1, 1);
t.insert(0, 1);
t.insert(0, 1);
t.insert(0, 1);
t.insert(3, 1);
t.insert(4, 1);
}
#[test]
fn insert_book() {
let mut t = SequenceTree::new();
for i in 0..100 {
t.insert(i % 2, ());
}
}
#[test]
fn insert_book_vec() {
let mut t = SequenceTree::new();
let mut v = Vec::new();
for i in 0..100 {
t.insert(i % 3, ());
v.insert(i % 3, ());
assert_eq!(v, t.iter().copied().collect::<Vec<_>>())
}
}
fn arb_indices() -> impl Strategy<Value = Vec<usize>> {
proptest::collection::vec(any::<usize>(), 0..1000).prop_map(|v| {
let mut len = 0;
v.into_iter()
.map(|i| {
len += 1;
i % len
})
.collect::<Vec<_>>()
})
}
proptest! {
#[test]
fn proptest_insert(indices in arb_indices()) {
let mut t = SequenceTreeInternal::<usize>::new();
let mut v = Vec::new();
for i in indices{
if i <= v.len() {
t.insert(i % 3, i);
v.insert(i % 3, i);
} else {
return Err(proptest::test_runner::TestCaseError::reject("index out of bounds"))
}
assert_eq!(v, t.iter().copied().collect::<Vec<_>>())
}
}
}
proptest! {
// This is a really slow test due to all the copying of the Vecs (i.e. not due to the
// sequencetree) so we only do a few runs
#![proptest_config(ProptestConfig::with_cases(20))]
#[test]
fn proptest_remove(inserts in arb_indices(), removes in arb_indices()) {
let mut t = SequenceTreeInternal::<usize>::new();
let mut v = Vec::new();
for i in inserts {
if i <= v.len() {
t.insert(i , i);
v.insert(i , i);
} else {
return Err(proptest::test_runner::TestCaseError::reject("index out of bounds"))
}
assert_eq!(v, t.iter().copied().collect::<Vec<_>>())
}
for i in removes {
if i < v.len() {
let tr = t.remove(i);
let vr = v.remove(i);
assert_eq!(tr, vr);
} else {
return Err(proptest::test_runner::TestCaseError::reject("index out of bounds"))
}
assert_eq!(v, t.iter().copied().collect::<Vec<_>>())
}
}
}
}