Correctly sort actor IDs when encoding changes

This is a port of a fix previously merged into `main`. The javascript implementation of automerge sorts actor IDs lexicographically when encoding changes. We were sorting actor IDs in the order the appear in the change we're encoding. This meant that the index that we assigned to operations in the encoded change was different to that which the javascript implementation assigns, resulting in mismatched head errors as the hashes we created did not match the javascript implementation. This change fixes the issue by sorting actor IDs lexicographically. We make a pass over the operations in the change before encoding to collect the actor IDs and sort them. This means we no longer need to pass a mutable `Vec<ActorId>` to the various encode functions, which cleans things up a little.
2022-01-02 20:37:13 +00:00 · 2022-01-02 20:37:13 +00:00 · 1f0a1e4071
commit 1f0a1e4071
parent ef89520d7c
3 changed files with 91 additions and 34 deletions
--- a/automerge/src/change.rs
+++ b/automerge/src/change.rs
@ -113,6 +113,42 @@ pub(crate) fn encode_document(
    Ok(bytes)
 }

+/// When encoding a change we take all the actor IDs referenced by a change and place them in an
+/// array. The array has the actor who authored the change as the first element and all remaining
+/// actors (i.e. those referenced in object IDs in the target of an operation or in the `pred` of
+/// an operation) lexicographically ordered following the change author.
+fn actor_ids_in_change(change: &amp::Change) -> Vec<amp::ActorId> {
+    let mut other_ids: Vec<&amp::ActorId> = change
+        .operations
+        .iter()
+        .flat_map(opids_in_operation)
+        .filter(|a| *a != &change.actor_id)
+        .unique()
+        .collect();
+    other_ids.sort();
+    // Now prepend the change actor
+    std::iter::once(&change.actor_id)
+        .chain(other_ids.into_iter())
+        .cloned()
+        .collect()
+}
+
+fn opids_in_operation(op: &amp::Op) -> impl Iterator<Item = &amp::ActorId> {
+    let obj_actor_id = match &op.obj {
+        amp::ObjectId::Root => None,
+        amp::ObjectId::Id(opid) => Some(opid.actor()),
+    };
+    let pred_ids = op.pred.iter().map(amp::OpId::actor);
+    let key_actor = match &op.key {
+        amp::Key::Seq(amp::ElementId::Id(i)) => Some(i.actor()),
+        _ => None,
+    };
+    obj_actor_id
+        .into_iter()
+        .chain(key_actor.into_iter())
+        .chain(pred_ids)
+}
+
 impl From<amp::Change> for Change {
    fn from(value: amp::Change) -> Self {
        encode(&value)
@ -200,8 +236,7 @@ fn encode_chunk(change: &amp::Change, deps: &[amp::ChangeHash]) -> ChunkIntermed
        bytes.write_all(&hash.0).unwrap();
    }

-    // encode first actor
-    let mut actors = vec![change.actor_id.clone()];
+    let actors = actor_ids_in_change(change);
    change.actor_id.to_bytes().encode(&mut bytes).unwrap();

    // encode seq, start_op, time, message
@ -213,7 +248,7 @@ fn encode_chunk(change: &amp::Change, deps: &[amp::ChangeHash]) -> ChunkIntermed
    let message = message..bytes.len();

    // encode ops into a side buffer - collect all other actors
-    let (ops_buf, mut ops) = ColumnEncoder::encode_ops(&change.operations, &mut actors);
+    let (ops_buf, mut ops) = ColumnEncoder::encode_ops(&change.operations, &actors);

    // encode all other actors
    actors[1..].encode(&mut bytes).unwrap();
@ -916,3 +951,41 @@ fn pred_into(
    pred.map(|(ctr, actor)| amp::OpId(ctr, actors[actor].clone()))
        .collect()
 }
+
+#[cfg(test)]
+mod tests {
+    use crate::legacy as amp;
+    #[test]
+    fn mismatched_head_repro_one() {
+        let op_json = serde_json::json!({
+            "ops": [
+                {
+                    "action": "del",
+                    "obj": "1@1485eebc689d47efbf8b892e81653eb3",
+                    "elemId": "3164@0dcdf83d9594477199f80ccd25e87053",
+                    "pred": [
+                        "3164@0dcdf83d9594477199f80ccd25e87053"
+                    ],
+                    "insert": false
+                },
+            ],
+            "actor": "e63cf5ed1f0a4fb28b2c5bc6793b9272",
+            "hash": "e7fd5c02c8fdd2cdc3071ce898a5839bf36229678af3b940f347da541d147ae2",
+            "seq": 1,
+            "startOp": 3179,
+            "time": 1634146652,
+            "message": null,
+            "deps": [
+                "2603cded00f91e525507fc9e030e77f9253b239d90264ee343753efa99e3fec1"
+            ]
+        });
+
+        let change: amp::Change = serde_json::from_value(op_json).unwrap();
+        let expected_hash: super::amp::ChangeHash =
+            "4dff4665d658a28bb6dcace8764eb35fa8e48e0a255e70b6b8cbf8e8456e5c50"
+                .parse()
+                .unwrap();
+        let encoded: super::Change = change.into();
+        assert_eq!(encoded.hash, expected_hash);
+    }
+}
--- a/automerge/src/columnar.rs
+++ b/automerge/src/columnar.rs
@ -41,22 +41,13 @@ impl Encodable for [ActorId] {
    }
 }

-fn map_actor(actor: &ActorId, actors: &mut Vec<ActorId>) -> usize {
-    if let Some(pos) = actors.iter().position(|a| a == actor) {
-        pos
-    } else {
-        actors.push(actor.clone());
-        actors.len() - 1
-    }
+fn actor_index(actor: &ActorId, actors: &[ActorId]) -> usize {
+    actors.iter().position(|a| a == actor).unwrap()
 }

 impl Encodable for ActorId {
-    fn encode_with_actors<R: Write>(
-        &self,
-        buf: &mut R,
-        actors: &mut Vec<ActorId>,
-    ) -> io::Result<usize> {
-        map_actor(self, actors).encode(buf)
+    fn encode_with_actors<R: Write>(&self, buf: &mut R, actors: &[ActorId]) -> io::Result<usize> {
+        actor_index(self, actors).encode(buf)
    }

    fn encode<R: Write>(&self, _buf: &mut R) -> io::Result<usize> {
@ -601,7 +592,7 @@ impl ValEncoder {
        }
    }

-    fn append_value2(&mut self, val: &ScalarValue, actors: &mut Vec<ActorId>) {
+    fn append_value2(&mut self, val: &ScalarValue, actors: &[ActorId]) {
        // It may seem weird to have two consecutive matches on the same value. The reason is so
        // that we don't have to repeat the `append_null` calls on ref_actor and ref_counter in
        // every arm of the next match
@ -725,7 +716,7 @@ impl KeyEncoderOld {
        }
    }

-    fn append(&mut self, key: amp::Key, actors: &mut Vec<ActorId>) {
+    fn append(&mut self, key: amp::Key, actors: &[ActorId]) {
        match key {
            amp::Key::Map(s) => {
                self.actor.append_null();
@ -738,7 +729,7 @@ impl KeyEncoderOld {
                self.str.append_null();
            }
            amp::Key::Seq(amp::ElementId::Id(amp::OpId(ctr, actor))) => {
-                self.actor.append_value(map_actor(&actor, actors));
+                self.actor.append_value(actor_index(&actor, actors));
                self.ctr.append_value(ctr);
                self.str.append_null();
            }
@ -811,11 +802,11 @@ impl PredEncoder {
        }
    }

-    fn append(&mut self, pred: &SortedVec<amp::OpId>, actors: &mut Vec<ActorId>) {
+    fn append(&mut self, pred: &SortedVec<amp::OpId>, actors: &[ActorId]) {
        self.num.append_value(pred.len());
        for p in pred.iter() {
            self.ctr.append_value(p.0);
-            self.actor.append_value(map_actor(&p.1, actors));
+            self.actor.append_value(actor_index(&p.1, actors));
        }
    }

@ -879,14 +870,14 @@ impl ObjEncoderOld {
        }
    }

-    fn append(&mut self, obj: &amp::ObjectId, actors: &mut Vec<ActorId>) {
+    fn append(&mut self, obj: &amp::ObjectId, actors: &[ActorId]) {
        match obj {
            amp::ObjectId::Root => {
                self.actor.append_null();
                self.ctr.append_null();
            }
            amp::ObjectId::Id(amp::OpId(ctr, actor)) => {
-                self.actor.append_value(map_actor(actor, actors));
+                self.actor.append_value(actor_index(actor, actors));
                self.ctr.append_value(*ctr);
            }
        }
@ -1131,10 +1122,7 @@ pub(crate) struct ColumnEncoder {
 }

 impl ColumnEncoder {
-    pub fn encode_ops<'a, I>(
-        ops: I,
-        actors: &'a mut Vec<ActorId>,
-    ) -> (Vec<u8>, HashMap<u32, Range<usize>>)
+    pub fn encode_ops<'a, I>(ops: I, actors: &[ActorId]) -> (Vec<u8>, HashMap<u32, Range<usize>>)
    where
        I: IntoIterator<Item = &'a amp::Op>,
    {
@ -1154,7 +1142,7 @@ impl ColumnEncoder {
        }
    }

-    fn encode<'a, 'b, I>(&'a mut self, ops: I, actors: &'b mut Vec<ActorId>)
+    fn encode<'a, 'b, I>(&'a mut self, ops: I, actors: &[ActorId])
    where
        I: IntoIterator<Item = &'b amp::Op>,
    {
@ -1163,7 +1151,7 @@ impl ColumnEncoder {
        }
    }

-    fn append(&mut self, op: &amp::Op, actors: &mut Vec<ActorId>) {
+    fn append(&mut self, op: &amp::Op, actors: &[ActorId]) {
        self.obj.append(&op.obj, actors);
        self.key.append(op.key.clone(), actors);
        self.insert.append(op.insert);
--- a/automerge/src/encoding.rs
+++ b/automerge/src/encoding.rs
@ -246,11 +246,7 @@ pub(crate) trait Encodable {
        Ok(buf)
    }

-    fn encode_with_actors<R: Write>(
-        &self,
-        buf: &mut R,
-        _actors: &mut Vec<ActorId>,
-    ) -> io::Result<usize> {
+    fn encode_with_actors<R: Write>(&self, buf: &mut R, _actors: &[ActorId]) -> io::Result<usize> {
        self.encode(buf)
    }