Missing the A in ACID

The A in ACID stands for “Atomic”, and I’m going to present some experiments about what can go wrong, and how often, if the A is not there. And it’s not there when you’re working with so-called NoSQL databases, in this case MongoDB. A post on MongoDB might seem peculiar, given my evident penchant for high-level abstractions, when the post-I-should-be-writing would have probably been how recursive SQL traverses a non-deterministic automaton, but things have been straying from normal lately. I’ve been downsized from my daytime job and focused more on Flying Donut. Until things settle on a new normal (if unemployment can be called that) I dug up old Flying Donut material to fill the void. Not to mention that it’s one more opportunity to show how well F# fares in such simulation tasks.

The context is the apparently simple action of moving an item from one place of a list to another. Conventional MongoDB wisdom says that, in the absence of atomic transactions, you design so that you have to update whole documents or sub-documents at once. Since I was near the Flying Donut team from the beginning, I learned about an early debate on how to rearrange items of a list, and I decided to run some experiments and find out how often can things go bad when parallel modifications are done without atomicity.

Both experiments are on a linked list representation. The encoding is terse: nodes are numbered 0 to N and the “next node” function is represented by an array. There are locals numbered from 0 to 1, also stored in an array. You can read the actual code in the end of the post.

The first experiment concerns fine-grained actions that would be used in a typical server-side implementation of moving a node. It uses the following six atomic actions: T0 = X.next, X.next = T0.next, T1 = Y.next, T0.next = T1, Y.next = T0 (T0, T1 are the temps). I simulated all possible interleavings of those six actions between two clients, the first moving a node from 1 to 3 and the second moving a node from 2 to 4. The result is havoc: of the 252 interleavings, 60 ended up with a well-form list, and 192 ended up with loops or unlinked items (finegrainedRun).

The second experiment concerns actions that would be used in a client-side implementation which took advantage of knowledge of the whole state, to avoid extra reads. It makes use of the following actions: X.next = NX.next, NX.next = Y.next, Y.next = NX, implementing the macro move MOVE(x,nx,nnx,y,ny), meaning: Move nx, which is currently between x and nnx, to between y and ny. I thought that providing more information would make things better, but I was wrong. The client-side version is even less resilient than the server-side one, probably because the server-side case could adapt to the current state of the list and recover in some cases. 20 interleavings, all malformed (coarsegrainedRun)…

Conventional MongoDB wisdom seems to win, this time. There’s no substitute for the A in ACID. You have to save the whole list at once, using the only kind of A in MongoDB: a single update operation.

Just kidding. You know I’m no fun of conventional wisdom. I can’t possibly end the post with that remark. Although I do recommend that you follow the conventional wisdom in similar MongoDB problems, I’m going to be the devil’s advocate and show you how it is possible for a number of concurrent processes to move items around in a list, without interference, using enough of the I to make up for the lack of the A. But, that has to wait until the next post!

The code

let rec interleave lst1 lst2 =
  match lst1, lst2 with
    | [], ys -> [ys]
    | xs, [] -> [xs]
    | x :: xs, y :: ys ->
        (List.map (fun zs -> x :: zs) (interleave xs (y::ys))) @
        (List.map (fun zs -> y :: zs) (interleave (x::xs) ys))

//let show () =
//    let res = interleave [1;2;3] [11;12;13]
//    res |> List.iter (fun x -> printf "["; x |> List.iter (printf "%O "); printf "]\n")
//    printfn "Done"

//printfn "Interleavings"
//do show ()

type locals = int array

type thelist = int array

type state = State of locals array * thelist

let checkUniq listarr =
    (listarr |> Set.ofArray |> Set.count) = listarr.Length

let showState2 listarr =
    printf "["
    listarr |> Array.iter (printf "%O ")
    printf "] : %O\n" (checkUniq listarr) 

let showState (State (locarr, listarr)) =
    printf "Process0 T0=%O T1=%O, Process1 T0=%O T1=%O, " locarr.[0].[0] locarr.[0].[1] locarr.[1].[0] locarr.[1].[1]
    showState2 listarr 

type movefun = int -> int -> int -> state -> state

// T0 = X.next
let getNextX (x:int) (y:int) (i:int) (State (locarr, listarr)) : state =
    let mylocals = locarr.[i]
    mylocals.[0] <- listarr.[x]
    State (locarr, listarr)
    
// X.next = T0.next
let linkX (x:int) (y:int) (i:int) (State (locarr, listarr)) : state =
    let mylocals = locarr.[i]
    listarr.[x] <- listarr.[mylocals.[0]]
    State (locarr, listarr)

// T1 = Y.next
let getNextY (x:int) (y:int) (i:int) (State (locarr, listarr)) : state =
    let mylocals = locarr.[i]
    mylocals.[1] <- listarr.[y]
    State (locarr, listarr)

// T0.next = T1
let linkNextX (x:int) (y:int) (i:int) (State (locarr, listarr)) : state =
    let mylocals = locarr.[i]
    listarr.[mylocals.[0]] <- mylocals.[1]
    State (locarr, listarr)

// Y.next = T0
let linkY (x:int) (y:int) (i:int) (State (locarr, listarr)) : state =
    let mylocals = locarr.[i]
    listarr.[y] <- mylocals.[0]
    State (locarr, listarr)

let finegrainedUnit () =
    let lst = [|1;2;3;4;5;6|]
    let loc1 = [|0;0|]
    let loc2 = [|0;0|]
    let state = State ([|loc1;loc2|],lst)
    let actions = [ getNextX 1 3 0; getNextX 2 4 1; linkX 1 3 0; linkX 2 4 1; getNextY 1 3 0; getNextY 2 4 1; linkNextX 1 3 0; linkNextX 2 4 1; linkY 1 3 0; linkY 2 4 1 ]
    let finalstate = actions |> List.fold (fun s fn -> fn s) state
    showState state
    ()

printfn "Linked list example"
do finegrainedUnit ()

let applyactions actions = 
    let lst = [|1;2;3;4;5;6|]
    let loc1 = [|0;0|]
    let loc2 = [|0;0|]
    let state = State ([|loc1;loc2|],lst)
    let res = actions |> List.fold (fun s fn -> fn s) state
    state

let finegrainedRun () =
    let actionsseq = [ getNextX ; linkX ; getNextY ; linkNextX ; linkY ]
    let actionsf x y i = actionsseq |> List.map (fun f -> f x y i)
    let actions1 = actionsf 1 3 0
    let actions2 = actionsf 2 4 1
    
    let interleavings = interleave actions1 actions2
    let res = interleavings |> List.map applyactions
    res |> List.iter showState
    ()

printfn "Linked list"
do finegrainedRun ()

type movefun2 = int -> int -> int -> int -> int -> thelist -> thelist

// X.next = NX.next
let linkX2 (x:int) (nx:int) (nnx:int) (y:int) (ny:int) (listarr:thelist) : thelist =
    listarr.[x] <- nnx
    listarr

// NX.next = Y.next
let linkNextX2 (x:int) (nx:int) (nnx:int) (y:int) (ny:int) (listarr:thelist) : thelist =
    listarr.[nx] <- ny
    listarr

// Y.next = NX
let linkY2 (x:int) (nx:int) (y:int) (nnx:int) (ny:int) (listarr:thelist) : thelist =
    listarr.[y] <- nx
    listarr

let coarsegrainedUnit () =
    let lst = [|1;2;3;4;5;6|]
    let loc1 = [|0;0|]
    let loc2 = [|0;0|]
    let actions = [ linkX2 1 2 3 3 4; linkX2 2 3 4 4 5; linkNextX2 1 2 3 3 4; linkNextX2 2 3 4 4 5; linkY2 1 2 3 3 4; linkY2 2 3 4 4 5 ]
    let finalstate = actions |> List.fold (fun s fn -> fn s) lst
    showState2 lst
    ()

printfn "Linked list 2nd set of actions example"
do coarsegrainedUnit ()

let applyactions2 actions = 
    let lst = [|1;2;3;4;5;6|]
    let loc1 = [|0;0|]
    let loc2 = [|0;0|]
    let res = actions |> List.fold (fun s fn -> fn s) lst
    lst

let coarsegrainedRun () =
    let actionsseq = [ linkX2 ; linkNextX2 ; linkY2 ]
    let actionsf x nx nnx y ny = actionsseq |> List.map (fun f -> f x nx nnx y ny)
    let actions1 = actionsf 1 2 3 3 4
    let actions2 = actionsf 2 3 4 4 5
    
    let interleavings = interleave actions1 actions2
    let res = interleavings |> List.map applyactions2
    res |> List.iter showState2
    ()

printfn "Linked list 2nd set of actions"
do coarsegrainedRun ()

printfn "Done"
Advertisements