Correctly sort actor IDs when encoding changes

This is a port of a fix previously merged into `main`.

The javascript implementation of automerge sorts actor IDs
lexicographically when encoding changes. We were sorting actor IDs in
the order the appear in the change we're encoding. This meant that the
index that we assigned to operations in the encoded change was different
to that which the javascript implementation assigns, resulting in
mismatched head errors as the hashes we created did not match the
javascript implementation.

This change fixes the issue by sorting actor IDs lexicographically. We
make a pass over the operations in the change before encoding to collect
the actor IDs and sort them. This means we no longer need to pass a
mutable `Vec<ActorId>` to the various encode functions, which cleans
things up a little.
This commit is contained in:
Alex Good 2022-01-02 20:37:13 +00:00 committed by Orion Henry
parent ef89520d7c
commit 1f0a1e4071
3 changed files with 91 additions and 34 deletions

View file

@ -113,6 +113,42 @@ pub(crate) fn encode_document(
Ok(bytes)
}
/// When encoding a change we take all the actor IDs referenced by a change and place them in an
/// array. The array has the actor who authored the change as the first element and all remaining
/// actors (i.e. those referenced in object IDs in the target of an operation or in the `pred` of
/// an operation) lexicographically ordered following the change author.
fn actor_ids_in_change(change: &amp::Change) -> Vec<amp::ActorId> {
let mut other_ids: Vec<&amp::ActorId> = change
.operations
.iter()
.flat_map(opids_in_operation)
.filter(|a| *a != &change.actor_id)
.unique()
.collect();
other_ids.sort();
// Now prepend the change actor
std::iter::once(&change.actor_id)
.chain(other_ids.into_iter())
.cloned()
.collect()
}
fn opids_in_operation(op: &amp::Op) -> impl Iterator<Item = &amp::ActorId> {
let obj_actor_id = match &op.obj {
amp::ObjectId::Root => None,
amp::ObjectId::Id(opid) => Some(opid.actor()),
};
let pred_ids = op.pred.iter().map(amp::OpId::actor);
let key_actor = match &op.key {
amp::Key::Seq(amp::ElementId::Id(i)) => Some(i.actor()),
_ => None,
};
obj_actor_id
.into_iter()
.chain(key_actor.into_iter())
.chain(pred_ids)
}
impl From<amp::Change> for Change {
fn from(value: amp::Change) -> Self {
encode(&value)
@ -200,8 +236,7 @@ fn encode_chunk(change: &amp::Change, deps: &[amp::ChangeHash]) -> ChunkIntermed
bytes.write_all(&hash.0).unwrap();
}
// encode first actor
let mut actors = vec![change.actor_id.clone()];
let actors = actor_ids_in_change(change);
change.actor_id.to_bytes().encode(&mut bytes).unwrap();
// encode seq, start_op, time, message
@ -213,7 +248,7 @@ fn encode_chunk(change: &amp::Change, deps: &[amp::ChangeHash]) -> ChunkIntermed
let message = message..bytes.len();
// encode ops into a side buffer - collect all other actors
let (ops_buf, mut ops) = ColumnEncoder::encode_ops(&change.operations, &mut actors);
let (ops_buf, mut ops) = ColumnEncoder::encode_ops(&change.operations, &actors);
// encode all other actors
actors[1..].encode(&mut bytes).unwrap();
@ -916,3 +951,41 @@ fn pred_into(
pred.map(|(ctr, actor)| amp::OpId(ctr, actors[actor].clone()))
.collect()
}
#[cfg(test)]
mod tests {
use crate::legacy as amp;
#[test]
fn mismatched_head_repro_one() {
let op_json = serde_json::json!({
"ops": [
{
"action": "del",
"obj": "1@1485eebc689d47efbf8b892e81653eb3",
"elemId": "3164@0dcdf83d9594477199f80ccd25e87053",
"pred": [
"3164@0dcdf83d9594477199f80ccd25e87053"
],
"insert": false
},
],
"actor": "e63cf5ed1f0a4fb28b2c5bc6793b9272",
"hash": "e7fd5c02c8fdd2cdc3071ce898a5839bf36229678af3b940f347da541d147ae2",
"seq": 1,
"startOp": 3179,
"time": 1634146652,
"message": null,
"deps": [
"2603cded00f91e525507fc9e030e77f9253b239d90264ee343753efa99e3fec1"
]
});
let change: amp::Change = serde_json::from_value(op_json).unwrap();
let expected_hash: super::amp::ChangeHash =
"4dff4665d658a28bb6dcace8764eb35fa8e48e0a255e70b6b8cbf8e8456e5c50"
.parse()
.unwrap();
let encoded: super::Change = change.into();
assert_eq!(encoded.hash, expected_hash);
}
}

View file

@ -41,22 +41,13 @@ impl Encodable for [ActorId] {
}
}
fn map_actor(actor: &ActorId, actors: &mut Vec<ActorId>) -> usize {
if let Some(pos) = actors.iter().position(|a| a == actor) {
pos
} else {
actors.push(actor.clone());
actors.len() - 1
}
fn actor_index(actor: &ActorId, actors: &[ActorId]) -> usize {
actors.iter().position(|a| a == actor).unwrap()
}
impl Encodable for ActorId {
fn encode_with_actors<R: Write>(
&self,
buf: &mut R,
actors: &mut Vec<ActorId>,
) -> io::Result<usize> {
map_actor(self, actors).encode(buf)
fn encode_with_actors<R: Write>(&self, buf: &mut R, actors: &[ActorId]) -> io::Result<usize> {
actor_index(self, actors).encode(buf)
}
fn encode<R: Write>(&self, _buf: &mut R) -> io::Result<usize> {
@ -601,7 +592,7 @@ impl ValEncoder {
}
}
fn append_value2(&mut self, val: &ScalarValue, actors: &mut Vec<ActorId>) {
fn append_value2(&mut self, val: &ScalarValue, actors: &[ActorId]) {
// It may seem weird to have two consecutive matches on the same value. The reason is so
// that we don't have to repeat the `append_null` calls on ref_actor and ref_counter in
// every arm of the next match
@ -725,7 +716,7 @@ impl KeyEncoderOld {
}
}
fn append(&mut self, key: amp::Key, actors: &mut Vec<ActorId>) {
fn append(&mut self, key: amp::Key, actors: &[ActorId]) {
match key {
amp::Key::Map(s) => {
self.actor.append_null();
@ -738,7 +729,7 @@ impl KeyEncoderOld {
self.str.append_null();
}
amp::Key::Seq(amp::ElementId::Id(amp::OpId(ctr, actor))) => {
self.actor.append_value(map_actor(&actor, actors));
self.actor.append_value(actor_index(&actor, actors));
self.ctr.append_value(ctr);
self.str.append_null();
}
@ -811,11 +802,11 @@ impl PredEncoder {
}
}
fn append(&mut self, pred: &SortedVec<amp::OpId>, actors: &mut Vec<ActorId>) {
fn append(&mut self, pred: &SortedVec<amp::OpId>, actors: &[ActorId]) {
self.num.append_value(pred.len());
for p in pred.iter() {
self.ctr.append_value(p.0);
self.actor.append_value(map_actor(&p.1, actors));
self.actor.append_value(actor_index(&p.1, actors));
}
}
@ -879,14 +870,14 @@ impl ObjEncoderOld {
}
}
fn append(&mut self, obj: &amp::ObjectId, actors: &mut Vec<ActorId>) {
fn append(&mut self, obj: &amp::ObjectId, actors: &[ActorId]) {
match obj {
amp::ObjectId::Root => {
self.actor.append_null();
self.ctr.append_null();
}
amp::ObjectId::Id(amp::OpId(ctr, actor)) => {
self.actor.append_value(map_actor(actor, actors));
self.actor.append_value(actor_index(actor, actors));
self.ctr.append_value(*ctr);
}
}
@ -1131,10 +1122,7 @@ pub(crate) struct ColumnEncoder {
}
impl ColumnEncoder {
pub fn encode_ops<'a, I>(
ops: I,
actors: &'a mut Vec<ActorId>,
) -> (Vec<u8>, HashMap<u32, Range<usize>>)
pub fn encode_ops<'a, I>(ops: I, actors: &[ActorId]) -> (Vec<u8>, HashMap<u32, Range<usize>>)
where
I: IntoIterator<Item = &'a amp::Op>,
{
@ -1154,7 +1142,7 @@ impl ColumnEncoder {
}
}
fn encode<'a, 'b, I>(&'a mut self, ops: I, actors: &'b mut Vec<ActorId>)
fn encode<'a, 'b, I>(&'a mut self, ops: I, actors: &[ActorId])
where
I: IntoIterator<Item = &'b amp::Op>,
{
@ -1163,7 +1151,7 @@ impl ColumnEncoder {
}
}
fn append(&mut self, op: &amp::Op, actors: &mut Vec<ActorId>) {
fn append(&mut self, op: &amp::Op, actors: &[ActorId]) {
self.obj.append(&op.obj, actors);
self.key.append(op.key.clone(), actors);
self.insert.append(op.insert);

View file

@ -246,11 +246,7 @@ pub(crate) trait Encodable {
Ok(buf)
}
fn encode_with_actors<R: Write>(
&self,
buf: &mut R,
_actors: &mut Vec<ActorId>,
) -> io::Result<usize> {
fn encode_with_actors<R: Write>(&self, buf: &mut R, _actors: &[ActorId]) -> io::Result<usize> {
self.encode(buf)
}