The `SeekOp` query can produce incorrect results when the optree it is searching only has visible ops on the internal nodes. Add some tests to demonstrate the issue as well as a fix.
265 lines
9.8 KiB
Rust
265 lines
9.8 KiB
Rust
use crate::op_tree::{OpSetMetadata, OpTreeNode};
|
|
use crate::query::{binary_search_by, QueryResult, TreeQuery};
|
|
use crate::types::{Key, ListEncoding, Op, HEAD};
|
|
use std::cmp::Ordering;
|
|
use std::fmt::Debug;
|
|
|
|
#[derive(Debug, Clone, PartialEq)]
|
|
pub(crate) struct SeekOp<'a> {
|
|
/// the op we are looking for
|
|
op: &'a Op,
|
|
/// The position to insert at
|
|
pub(crate) pos: usize,
|
|
/// The indices of ops that this op overwrites
|
|
pub(crate) succ: Vec<usize>,
|
|
/// whether a position has been found
|
|
found: bool,
|
|
/// The found start position of the key if there is one yet (for map objects).
|
|
start: Option<usize>,
|
|
}
|
|
|
|
impl<'a> SeekOp<'a> {
|
|
pub(crate) fn new(op: &'a Op) -> Self {
|
|
SeekOp {
|
|
op,
|
|
succ: vec![],
|
|
pos: 0,
|
|
found: false,
|
|
start: None,
|
|
}
|
|
}
|
|
|
|
fn lesser_insert(&self, op: &Op, m: &OpSetMetadata) -> bool {
|
|
op.insert && m.lamport_cmp(op.id, self.op.id) == Ordering::Less
|
|
}
|
|
|
|
fn greater_opid(&self, op: &Op, m: &OpSetMetadata) -> bool {
|
|
m.lamport_cmp(op.id, self.op.id) == Ordering::Greater
|
|
}
|
|
|
|
fn is_target_insert(&self, op: &Op) -> bool {
|
|
op.insert && op.elemid() == self.op.key.elemid()
|
|
}
|
|
}
|
|
|
|
impl<'a> TreeQuery<'a> for SeekOp<'a> {
|
|
fn query_node_with_metadata(
|
|
&mut self,
|
|
child: &OpTreeNode,
|
|
m: &OpSetMetadata,
|
|
ops: &[Op],
|
|
) -> QueryResult {
|
|
if self.found {
|
|
return QueryResult::Descend;
|
|
}
|
|
match self.op.key {
|
|
Key::Seq(HEAD) => {
|
|
while self.pos < child.len() {
|
|
let op = &ops[child.get(self.pos).unwrap()];
|
|
if op.insert && m.lamport_cmp(op.id, self.op.id) == Ordering::Less {
|
|
break;
|
|
}
|
|
self.pos += 1;
|
|
}
|
|
QueryResult::Finish
|
|
}
|
|
Key::Seq(e) => {
|
|
if child.index.ops.contains(&e.0) {
|
|
QueryResult::Descend
|
|
} else {
|
|
self.pos += child.len();
|
|
QueryResult::Next
|
|
}
|
|
}
|
|
Key::Map(_) => {
|
|
if let Some(start) = self.start {
|
|
if self.pos + child.len() >= start {
|
|
// skip empty nodes
|
|
if child.index.visible_len(ListEncoding::List) == 0 {
|
|
let child_contains_key =
|
|
child.elements.iter().any(|e| ops[*e].key == self.op.key);
|
|
if !child_contains_key {
|
|
// If we are in a node which has no visible ops, but none of the
|
|
// elements of the node match the key of the op, then we must have
|
|
// finished processing and so we can just return.
|
|
// See https://github.com/automerge/automerge-rs/pull/480
|
|
QueryResult::Finish
|
|
} else {
|
|
// Otherwise, we need to proceed to the next node
|
|
self.pos += child.len();
|
|
QueryResult::Next
|
|
}
|
|
} else {
|
|
QueryResult::Descend
|
|
}
|
|
} else {
|
|
self.pos += child.len();
|
|
QueryResult::Next
|
|
}
|
|
} else {
|
|
// in the root node find the first op position for the key
|
|
let start = binary_search_by(child, ops, |op| m.key_cmp(&op.key, &self.op.key));
|
|
self.start = Some(start);
|
|
self.pos = start;
|
|
QueryResult::Skip(start)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn query_element_with_metadata(&mut self, e: &Op, m: &OpSetMetadata) -> QueryResult {
|
|
match self.op.key {
|
|
Key::Map(_) => {
|
|
// don't bother looking at things past our key
|
|
if e.key != self.op.key {
|
|
return QueryResult::Finish;
|
|
}
|
|
|
|
if self.op.overwrites(e) {
|
|
self.succ.push(self.pos);
|
|
}
|
|
|
|
if m.lamport_cmp(e.id, self.op.id) == Ordering::Greater {
|
|
return QueryResult::Finish;
|
|
}
|
|
|
|
self.pos += 1;
|
|
QueryResult::Next
|
|
}
|
|
Key::Seq(_) => {
|
|
if !self.found {
|
|
if self.is_target_insert(e) {
|
|
self.found = true;
|
|
if self.op.overwrites(e) {
|
|
self.succ.push(self.pos);
|
|
}
|
|
}
|
|
self.pos += 1;
|
|
QueryResult::Next
|
|
} else {
|
|
// we have already found the target
|
|
if self.op.overwrites(e) {
|
|
self.succ.push(self.pos);
|
|
}
|
|
if self.op.insert {
|
|
if self.lesser_insert(e, m) {
|
|
QueryResult::Finish
|
|
} else {
|
|
self.pos += 1;
|
|
QueryResult::Next
|
|
}
|
|
} else if e.insert || self.greater_opid(e, m) {
|
|
QueryResult::Finish
|
|
} else {
|
|
self.pos += 1;
|
|
QueryResult::Next
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use crate::{
|
|
op_set::OpSet,
|
|
op_tree::B,
|
|
query::SeekOp,
|
|
types::{Key, ObjId, Op, OpId},
|
|
ActorId, ScalarValue,
|
|
};
|
|
|
|
#[test]
|
|
fn seek_on_page_boundary() {
|
|
// Create an optree in which the only visible ops are on the boundaries of the nodes,
|
|
// i.e. the visible elements are in the internal nodes. Like so
|
|
//
|
|
// .----------------------.
|
|
// | id | key | succ |
|
|
// | B | "a" | |
|
|
// | 2B | "b" | |
|
|
// '----------------------'
|
|
// / | \
|
|
// ;------------------------. | `------------------------------------.
|
|
// | id | op | succ | | | id | op | succ |
|
|
// | 0 |set "a" | 1 | | | 2B + 1 |set "c" | 2B + 2 |
|
|
// | 1 |set "a" | 2 | | | 2B + 2 |set "c" | 2B + 3 |
|
|
// | 2 |set "a" | 3 | | ...
|
|
// ... | | 3B |set "c" | |
|
|
// | B - 1 |set "a" | B | | '------------------------------------'
|
|
// '--------'--------'------' |
|
|
// |
|
|
// .-----------------------------.
|
|
// | id | key | succ |
|
|
// | B + 1 | "b" | B + 2 |
|
|
// | B + 2 | "b" | B + 3 |
|
|
// ....
|
|
// | B + (B - 1 | "b" | 2B |
|
|
// '-----------------------------'
|
|
//
|
|
// The important point here is that the leaf nodes contain no visible ops for keys "a" and
|
|
// "b".
|
|
let mut set = OpSet::new();
|
|
let actor = set.m.actors.cache(ActorId::random());
|
|
let a = set.m.props.cache("a".to_string());
|
|
let b = set.m.props.cache("b".to_string());
|
|
let c = set.m.props.cache("c".to_string());
|
|
|
|
let mut counter = 0;
|
|
// For each key insert `B` operations with the `pred` and `succ` setup such that the final
|
|
// operation for each key is the only visible op.
|
|
for key in [a, b, c] {
|
|
for iteration in 0..B {
|
|
// Generate a value to insert
|
|
let keystr = set.m.props.get(key);
|
|
let val = keystr.repeat(iteration + 1);
|
|
|
|
// Only the last op is visible
|
|
let pred = if iteration == 0 {
|
|
Default::default()
|
|
} else {
|
|
set.m
|
|
.sorted_opids(vec![OpId::new(counter - 1, actor)].into_iter())
|
|
};
|
|
|
|
// only the last op is visible
|
|
let succ = if iteration == B - 1 {
|
|
Default::default()
|
|
} else {
|
|
set.m
|
|
.sorted_opids(vec![OpId::new(counter, actor)].into_iter())
|
|
};
|
|
|
|
let op = Op {
|
|
id: OpId::new(counter, actor),
|
|
action: crate::OpType::Put(ScalarValue::Str(val.into())),
|
|
key: Key::Map(key),
|
|
succ,
|
|
pred,
|
|
insert: false,
|
|
};
|
|
set.insert(counter as usize, &ObjId::root(), op);
|
|
counter += 1;
|
|
}
|
|
}
|
|
|
|
// Now try and create an op which inserts at the next index of 'a'
|
|
let new_op = Op {
|
|
id: OpId::new(counter, actor),
|
|
action: crate::OpType::Put(ScalarValue::Str("test".into())),
|
|
key: Key::Map(a),
|
|
succ: Default::default(),
|
|
pred: set
|
|
.m
|
|
.sorted_opids(std::iter::once(OpId::new(B as u64 - 1, actor))),
|
|
insert: false,
|
|
};
|
|
|
|
let q = SeekOp::new(&new_op);
|
|
let q = set.search(&ObjId::root(), q);
|
|
|
|
// we've inserted `B - 1` elements for "a", so the index should be `B`
|
|
assert_eq!(q.pos, B);
|
|
}
|
|
}
|