Infusing MongoDB with some ACID

In the previous post, I demonstrated the obvious, viz. that in the absence of transactions, concurrent clients can step on each others’ feet. And now, we find ourselves in a crossroads: either we admit that there’s a new science of data management whereby all problems have been solved once we demormalized everything into large tree structures, or we accept that there’s still need for transactional support and we need some way to implement it. Lest I make it a habit to state the obvious, I won’t spend time on this dilemma. Let me just say that denormalizing, if anything, widens the need for transactions, since denormalized data must be constantly synchronized. Notwithstanding thousands of Google hits stating with almost religious fanaticism how everyone who seeks “Old SQL” properties in MongoDB just doesn’t get it.

Let’s revisit the item reordering exercise from the last post, and reformulate it in a bit more elementary form: a particular simple way to go about rearranging items in a list, which is swapping locations of two items. With this formulation, the problem now is that we have multiple agents, each needing occasionally to get exclusive access to two shared resources and do some work, preferably without interfering too much with anyone else. Did I jog your memory? I bet I did. This problem is the Dining Philosophers one, of the venerable Edsger Dijkstra.

I will show you that MongoDB is, indeed, up to the task of handling this problem. I even hope that this will appease the MongoDB fans into flaming me less than usual for this kind of heretic talk. Let me set up the universe of discourse by way of showing you the code to create the data, set, get, test and reset them (yes, data is plural, by the way).

open MongoDB.Bson
open MongoDB.Driver

let mutex = new System.Object()
let global_counter = ref 0
let connectionString = "mongodb://localhost/?safe=true";
let server = MongoServer.Create(connectionString);
let mongo = server.GetDatabase("local");
let items = mongo.GetCollection<BsonDocument>("items")

let create_items () =
    let create_item (n:int) =
        let x = (new BsonDocument()).Add("name",BsonValue.Create(n))
        items.Insert(x) |> ignore
    for i in 1 .. 5 do create_item i
    let all = items.Count
    printfn "%A" all

let checkUniq listarr =
    (listarr |> Set.ofList |> Set.count) = listarr.Length

let check_items () =
    let cur = items.FindAll()
    let enumer = cur.GetEnumerator()
    let rec traverse l =
        if enumer.MoveNext () then traverse (enumer.Current :: l)
        else l
    let all = traverse []
    let allord = all |> List.map (fun d -> d.GetValue("ord").AsInt32)
    checkUniq allord

let get_item (n:int) =
    let query = new QueryDocument((new BsonDocument()).Add("name",BsonValue.Create(n)))
    let doc = items.FindOne(query)
    doc.GetValue("ord").AsInt32

let set_item (n:int) (o:int) =
    let query = new QueryDocument((new BsonDocument()).Add("name",BsonValue.Create(n)))
    let upd = new UpdateDocument((new BsonDocument()).Add("$set",BsonValue.Create((new BsonDocument()).Add("ord", BsonValue.Create(o)))))
    items.Update(query, upd) |> ignore

let reset_item (n:int) =
    let query = new QueryDocument((new BsonDocument()).Add("name",BsonValue.Create(n)))
    let upd = new UpdateDocument((new BsonDocument()).Add("$set",BsonValue.Create((new BsonDocument()).Add("ord", BsonValue.Create(n)).Add("locked", BsonValue.Create(0)))))
    items.Update(query, upd) |> ignore

let reset_items () = for i in 1 .. 5 do reset_item i

Simple enough. Five items, identified by integer names from one to five, holding integer places from one to five.

To make a long story short, I’ll skip the part where a naive solution to the problem leads to potential deadlock and adopt the practice of ordering resources to eliminate that. But let me show you how I implement locking, including the busy-wait loop to fill in for the missing blocking wait.

let r (lockfun:int->bool) (n:int) =
    let rec raux i =
        if i = 200 then failwith "Possible livelock" 
        let b = lockfun n
        if b then i
        else Async.RunSynchronously(Async.Sleep 10) ; raux (i+1)
    let i = raux 1
    lock mutex (fun () -> global_counter := !global_counter + i ; printfn "l %A -> %A" n i)

let simple_lock (n:int) =
    let query = new QueryDocument((new BsonDocument()).Add("name",BsonValue.Create(n)).Add("locked",BsonValue.Create(0)))
    let upd = new UpdateDocument((new BsonDocument()).Add("$set",BsonValue.Create((new BsonDocument()).Add("locked", BsonValue.Create(1)))))
    let wc = items.Update(query, upd)
    wc.DocumentsAffected = (int64)1

let simple_unlock (n:int) =
    let query = new QueryDocument((new BsonDocument()).Add("name",BsonValue.Create(n)))
    let upd = new UpdateDocument((new BsonDocument()).Add("$set",BsonValue.Create((new BsonDocument()).Add("locked", BsonValue.Create(0)))))
    let wc = items.Update(query, upd)
    wc.DocumentsAffected = (int64)1

Each swap operation is implemented by the following code.

let rec swap_simple_lock (ix:int) (iy:int) =
    if ix > iy then
        swap_simple_lock iy ix
    else
        r simple_lock ix
        let oix = get_item ix
        r simple_lock iy
        let oiy = get_item iy
        set_item ix oiy
        set_item iy oix
        simple_unlock iy |> ignore
        simple_unlock ix |> ignore

The code to run a full test is the following.

let worker (swapfun:int->int->unit) cnt =
    let random = new System.Random()
    async {
        for i = 1 to cnt 
            do
                let ix = random.Next(4) + 1
                let iytemp = random.Next(4) + 1
                let iy = if ix <> iytemp then iytemp else (iytemp % 4) + 1
                swapfun ix iy
    }

let run_test swapfun cnt wcnt =
    let s = seq { for i in [1 .. wcnt] -> worker swapfun cnt }
    Async.RunSynchronously (Async.Parallel s)

let cnt = 10
let wcnt = 10
let expected_cnt = 2 * cnt * wcnt
run_test swap_simple_lock cnt wcnt |> ignore
let b = check_items ()
printfn "check = %A" b
printfn "Exprected_counter = %A" expected_cnt
printfn "Global_counter = %A" !global_counter

Running the test almost proves that the problem is solved and, for practical purposes, one might also say that it is. However, the only thing we did is push the problem to a corner albeit similar to the corner it is pushed by the “Full SQL” solution. Starvation due to livelock is still possible, particularly since the ordering of resources makes some workers less privileged than others. To demonstrate that, I’ll “rig” the test a little.

let rigged_worker ix iy (swapfun:int->int->unit) cnt =
    let random = new System.Random()
    async {
        for i = 1 to cnt 
            do
                let flip = random.Next(5)
                let ix = if flip = 0 then 1 else ix
                let iy = if flip = 0 then ix else iy
                swapfun ix iy;
        lock mutex (fun () -> printfn "swapped %A, %A for %A times" ix iy cnt)
    }

let run_rigged_test swapfun cnt wcnt =
    let s = seq { for i in [1 .. wcnt] -> rigged_worker 4 5 swapfun cnt }
    Async.RunSynchronously (Async.Parallel s)

run_rigged_test swap_simple_lock cnt wcnt |> ignore

This set of probabilities makes a livelock appear in almost every run, on my machine. So a livelock is still possible, but is it more possible than running similar concurrent transactions in SQL? I must say I do not have an authoritative answer to that but, after monitoring systems running many millions of similar concurrent transactions per day, I can attest to only having witnessed locking of a transaction by another when the latter took more time than expected. Modern RDBMS’s include mechanisms to minimize starvation (here’s an article I found to that effect).

So, is this the end of it? I hope not, and I have some ideas on now to implement a solution that goes a bit further in the way of avoiding starvation. Rest assured that, should they come to fruition, you’ll be the first to know.

Advertisements

Missing the A in ACID

The A in ACID stands for “Atomic”, and I’m going to present some experiments about what can go wrong, and how often, if the A is not there. And it’s not there when you’re working with so-called NoSQL databases, in this case MongoDB. A post on MongoDB might seem peculiar, given my evident penchant for high-level abstractions, when the post-I-should-be-writing would have probably been how recursive SQL traverses a non-deterministic automaton, but things have been straying from normal lately. I’ve been downsized from my daytime job and focused more on Flying Donut. Until things settle on a new normal (if unemployment can be called that) I dug up old Flying Donut material to fill the void. Not to mention that it’s one more opportunity to show how well F# fares in such simulation tasks.

The context is the apparently simple action of moving an item from one place of a list to another. Conventional MongoDB wisdom says that, in the absence of atomic transactions, you design so that you have to update whole documents or sub-documents at once. Since I was near the Flying Donut team from the beginning, I learned about an early debate on how to rearrange items of a list, and I decided to run some experiments and find out how often can things go bad when parallel modifications are done without atomicity.

Both experiments are on a linked list representation. The encoding is terse: nodes are numbered 0 to N and the “next node” function is represented by an array. There are locals numbered from 0 to 1, also stored in an array. You can read the actual code in the end of the post.

The first experiment concerns fine-grained actions that would be used in a typical server-side implementation of moving a node. It uses the following six atomic actions: T0 = X.next, X.next = T0.next, T1 = Y.next, T0.next = T1, Y.next = T0 (T0, T1 are the temps). I simulated all possible interleavings of those six actions between two clients, the first moving a node from 1 to 3 and the second moving a node from 2 to 4. The result is havoc: of the 252 interleavings, 60 ended up with a well-form list, and 192 ended up with loops or unlinked items (finegrainedRun).

The second experiment concerns actions that would be used in a client-side implementation which took advantage of knowledge of the whole state, to avoid extra reads. It makes use of the following actions: X.next = NX.next, NX.next = Y.next, Y.next = NX, implementing the macro move MOVE(x,nx,nnx,y,ny), meaning: Move nx, which is currently between x and nnx, to between y and ny. I thought that providing more information would make things better, but I was wrong. The client-side version is even less resilient than the server-side one, probably because the server-side case could adapt to the current state of the list and recover in some cases. 20 interleavings, all malformed (coarsegrainedRun)…

Conventional MongoDB wisdom seems to win, this time. There’s no substitute for the A in ACID. You have to save the whole list at once, using the only kind of A in MongoDB: a single update operation.

Just kidding. You know I’m no fun of conventional wisdom. I can’t possibly end the post with that remark. Although I do recommend that you follow the conventional wisdom in similar MongoDB problems, I’m going to be the devil’s advocate and show you how it is possible for a number of concurrent processes to move items around in a list, without interference, using enough of the I to make up for the lack of the A. But, that has to wait until the next post!

The code

let rec interleave lst1 lst2 =
  match lst1, lst2 with
    | [], ys -> [ys]
    | xs, [] -> [xs]
    | x :: xs, y :: ys ->
        (List.map (fun zs -> x :: zs) (interleave xs (y::ys))) @
        (List.map (fun zs -> y :: zs) (interleave (x::xs) ys))

//let show () =
//    let res = interleave [1;2;3] [11;12;13]
//    res |> List.iter (fun x -> printf "["; x |> List.iter (printf "%O "); printf "]\n")
//    printfn "Done"

//printfn "Interleavings"
//do show ()

type locals = int array

type thelist = int array

type state = State of locals array * thelist

let checkUniq listarr =
    (listarr |> Set.ofArray |> Set.count) = listarr.Length

let showState2 listarr =
    printf "["
    listarr |> Array.iter (printf "%O ")
    printf "] : %O\n" (checkUniq listarr) 

let showState (State (locarr, listarr)) =
    printf "Process0 T0=%O T1=%O, Process1 T0=%O T1=%O, " locarr.[0].[0] locarr.[0].[1] locarr.[1].[0] locarr.[1].[1]
    showState2 listarr 

type movefun = int -> int -> int -> state -> state

// T0 = X.next
let getNextX (x:int) (y:int) (i:int) (State (locarr, listarr)) : state =
    let mylocals = locarr.[i]
    mylocals.[0] <- listarr.[x]
    State (locarr, listarr)
    
// X.next = T0.next
let linkX (x:int) (y:int) (i:int) (State (locarr, listarr)) : state =
    let mylocals = locarr.[i]
    listarr.[x] <- listarr.[mylocals.[0]]
    State (locarr, listarr)

// T1 = Y.next
let getNextY (x:int) (y:int) (i:int) (State (locarr, listarr)) : state =
    let mylocals = locarr.[i]
    mylocals.[1] <- listarr.[y]
    State (locarr, listarr)

// T0.next = T1
let linkNextX (x:int) (y:int) (i:int) (State (locarr, listarr)) : state =
    let mylocals = locarr.[i]
    listarr.[mylocals.[0]] <- mylocals.[1]
    State (locarr, listarr)

// Y.next = T0
let linkY (x:int) (y:int) (i:int) (State (locarr, listarr)) : state =
    let mylocals = locarr.[i]
    listarr.[y] <- mylocals.[0]
    State (locarr, listarr)

let finegrainedUnit () =
    let lst = [|1;2;3;4;5;6|]
    let loc1 = [|0;0|]
    let loc2 = [|0;0|]
    let state = State ([|loc1;loc2|],lst)
    let actions = [ getNextX 1 3 0; getNextX 2 4 1; linkX 1 3 0; linkX 2 4 1; getNextY 1 3 0; getNextY 2 4 1; linkNextX 1 3 0; linkNextX 2 4 1; linkY 1 3 0; linkY 2 4 1 ]
    let finalstate = actions |> List.fold (fun s fn -> fn s) state
    showState state
    ()

printfn "Linked list example"
do finegrainedUnit ()

let applyactions actions = 
    let lst = [|1;2;3;4;5;6|]
    let loc1 = [|0;0|]
    let loc2 = [|0;0|]
    let state = State ([|loc1;loc2|],lst)
    let res = actions |> List.fold (fun s fn -> fn s) state
    state

let finegrainedRun () =
    let actionsseq = [ getNextX ; linkX ; getNextY ; linkNextX ; linkY ]
    let actionsf x y i = actionsseq |> List.map (fun f -> f x y i)
    let actions1 = actionsf 1 3 0
    let actions2 = actionsf 2 4 1
    
    let interleavings = interleave actions1 actions2
    let res = interleavings |> List.map applyactions
    res |> List.iter showState
    ()

printfn "Linked list"
do finegrainedRun ()

type movefun2 = int -> int -> int -> int -> int -> thelist -> thelist

// X.next = NX.next
let linkX2 (x:int) (nx:int) (nnx:int) (y:int) (ny:int) (listarr:thelist) : thelist =
    listarr.[x] <- nnx
    listarr

// NX.next = Y.next
let linkNextX2 (x:int) (nx:int) (nnx:int) (y:int) (ny:int) (listarr:thelist) : thelist =
    listarr.[nx] <- ny
    listarr

// Y.next = NX
let linkY2 (x:int) (nx:int) (y:int) (nnx:int) (ny:int) (listarr:thelist) : thelist =
    listarr.[y] <- nx
    listarr

let coarsegrainedUnit () =
    let lst = [|1;2;3;4;5;6|]
    let loc1 = [|0;0|]
    let loc2 = [|0;0|]
    let actions = [ linkX2 1 2 3 3 4; linkX2 2 3 4 4 5; linkNextX2 1 2 3 3 4; linkNextX2 2 3 4 4 5; linkY2 1 2 3 3 4; linkY2 2 3 4 4 5 ]
    let finalstate = actions |> List.fold (fun s fn -> fn s) lst
    showState2 lst
    ()

printfn "Linked list 2nd set of actions example"
do coarsegrainedUnit ()

let applyactions2 actions = 
    let lst = [|1;2;3;4;5;6|]
    let loc1 = [|0;0|]
    let loc2 = [|0;0|]
    let res = actions |> List.fold (fun s fn -> fn s) lst
    lst

let coarsegrainedRun () =
    let actionsseq = [ linkX2 ; linkNextX2 ; linkY2 ]
    let actionsf x nx nnx y ny = actionsseq |> List.map (fun f -> f x nx nnx y ny)
    let actions1 = actionsf 1 2 3 3 4
    let actions2 = actionsf 2 3 4 4 5
    
    let interleavings = interleave actions1 actions2
    let res = interleavings |> List.map applyactions2
    res |> List.iter showState2
    ()

printfn "Linked list 2nd set of actions"
do coarsegrainedRun ()

printfn "Done"

Constraints on workflow paths, part 4

Although I have not had much personal time lately, to finish the Pandamator code, I managed to move the workflow constraint checking forward, using LtlChecker as I promised in the relevant post. I’m not very happy with all the details — yet. Although I do call it ultimately from within C#, the work is done through F# code, so relying on parameter-less delegates, which in turn rely on mutable state, did not do me any good. I’m thinking of switching everything to use externally passed state, since using refs in F# is a sore sight indeed.

The reason I ended up using F# (except for the obvious reason that F# is more fun than C#) is that the code to generate workflow paths and interleave validation is very heavily recursive and intertwined. Meaning  that the 130 lines of F# would have been God knows what, had they been written in C#.

In what follows, I have elided details about the Scenario and the ScenarioStep because they don’t matter for this post. The central point is function pathsFromNode. Don’t worry about the partial match on the list, I make sure the list starts non-empty.

let rec pathsFromNode (f:ScenarioStep -> ConstraintResult -> ConstraintResult) ((step,res) :: path) (scenario:Scenario) =
    seq {
        let trans = (transitionsFromNode scenario step) |> Seq.toList
        if trans.Length = 0 then
            let res1 = f step res //run f one more time to simulate extension to infinite path
            yield (step,res1) :: path
        else
        for s in trans do
            if not (List.exists (fun (x,_) -> x = s) path) then
                let res1 = f s res
                if res1.status <> Failed then
                    yield! (pathsFromNode f ((s,res1) :: (step,res) :: path) scenario)
                else yield (s,res1) :: (step,res) :: path
    }

Note the line with the comment. It is necessary for the reason I explained in the last post (Büchi automata assuming infinite words). This function returns a sequence of paths, the generation of each path being able to end prematurely if the constraint validation fails along the way.

Since I am using LTL on single paths, I needed some way to extend this to all possible paths.  I do this the easy way, using ∀ and ∃.

let checkForallNever51 (scenario:Scenario) (step:ScenarioStep) =
    let step_ref = {new _step_ref with step=step}
    let dic = new Dictionary<string,System.Func> ()
    let p = Action(51)
    dic.Add("p",(fun () -> evalPredicate step_ref.step p))
    let con = create_constraint "G ! p" dic
    let i0 = init con
    if i0.status = Failed then
        (false, Some [step, i0])
    else
        let checker (step:ScenarioStep) (res:ConstraintResult) =
            step_ref.step  forall_ce (fun ((s,res) :: _) -> res.status <> Failed && res.status <> CannotTerminate)

let checkNever51After52 (scenario:Scenario) =
    let coisteps = scenario.steps |> List.filter (fun s -> s.actions |> List.exists (fun a -> a.id = 52))
    coisteps |> forall_ce2 (checkForallNeverSoi scenario)

Never mind forall_ce, it is like forall but returns a counterexample if it fails (annotated boolean, if you like). There is also an exists_ce with the same return type, but it always returns None since failing does not produce any counterexample. And the forall_ce2 variant accepts a predicate that returns an annotated boolean, as well. I know, the names suck but I couldn’t think of anything better.

As you can see, I am selecting the start nodes and run a ∀ on each of them. In this example, I am not starting from entry nodes of the workflow.

In the next example, I am starting from entry nodes, but (in a perverse twist) I also use the constraint validation with a ∃ for filtering the start nodes.

let checkEventually52 quantifier (scenario:Scenario) (step:ScenarioStep) =
    let step_ref = {new _step_ref with step=step}
    let dic = new Dictionary<string,System.Func<bool>> ()
    dic.Add("p",(fun () -> evalPredicate step_ref.step (Action(52))))
    dic.Add("q",(fun () -> evalPredicate step_ref.step (Action(51))))
    let con = create_constraint "F p & G ! q" dic
    let i0 = init con
    if i0.status = Failed then
        (false, Some [step, i0])
    else
        let checker (step:ScenarioStep) (res:ConstraintResult) =
            step_ref.step <- step
            next res
        let paths = pathsFromNode checker [(step,i0)] scenario
        paths |> quantifier (fun ((s,res) :: _) -> res.status <> Failed && res.status <> CannotTerminate)

let checkForallEventually52 (scenario:Scenario) (step:ScenarioStep) =
    checkEventuallyCoi forall_ce scenario step
    
let checkExistsEventually52 (scenario:Scenario) (step:ScenarioStep) =
    checkEventuallyCoi exists_ce scenario step

let checkEventually52After52Event (scenario:Scenario) =
    let coieventsteps = scenario.steps 
                        |> List.filter (fun s -> 
                                        ((stepHasInput s) && (fst (checkExistsEventually52 scenario s)))
                                        || (stepHasWebactionFor52 s))
    coieventsteps |> forall_ce2 (checkForallEventually52 scenario)

The quantifier is a parameter of checkEventually52. Granted, it is not the cleaner code imaginable. It is also not comparable to what CTL* (or plain CTL — I remind you that CTL allows quantifiers before any occurrence of a modal operator in an expression, not just at top level) would be able to express. However, it has proven that it is possible to express constraints a lot more declaratively than my ad-hoc Prolog program did. What remains now, is to use it to express useful constraints.

LtlChecker, part 2

When I described LtlChecker in the relevant post, I wrote I had made a mild effort to “make it more C# friendly” and I left it at that. However, it dawned on me that LtlChecker, combined with an exhaustive path generation on a workflow (the kind that F# sequences do effortlessly) can actually replace the ad-hoc approach I followed with my Prolog program (see the post on workflow constraints that started the whole affair). Stepping through the Büchi automaton for every transition of the workflow will prune the search space very effectively. It is not the same as having a CTL* checker, but serves the same purpose, since we are mostly covered if we get an implicit ∀ in front of all formulas (for which plain CTL is sufficient, to be frank).

So, in the end, it seems I’ll be able to join both threads (the one on workflow constraints and the one on temporal database constraints) using similar tools for both. Before doing that, though, which demands some more of my time than I currently have, I’ll present some caveats to you, and some actual C# code using LtlChecker (it was C#-friendly, after all).

First of all, you should know that Büchi automata are actually defined for infinite paths… which I thought did not matter while running it, but it actually does. I’ll revisit this point in the third test case below. For now, just keep in mind that a finite path supposedly repeats the final transition for ever and that’s how it’s made infinite to satisfy this precondition.

Second, I hinted at that in a casual comment in the LtlChecker code (ln.51), but did not spell it out in all detail: an accepting run of a Büchi automaton must pass through all acceptance conditions an infinite number of times. When you combine this with the above fact, the only way this can happen, is if the final transition (which will be mentally repeated infinite times) is actually marked with all of them. So, for finite paths (our case), any transition which it not accepting all acceptance conditions is as if it accepts none at all.

Let us look at some unit tests, now.

using System;
using System.Collections.Generic;

namespace CheckLtlUsageExample
{
    class Program
    {
        static void Main(string[] args)
        {
            Test1();
            Test2();
            Test3();
        }

        private static void Test1()
        {
            var obj = new[] { 0 };
            var dictionary = new Dictionary<string, Func<bool>> {{"p", () => obj[0] == 0}};
            var ltlConstraint = LtlChecker.create_constraint("G p", dictionary);
            var constraintResult = LtlChecker.init(ltlConstraint);
            var constraintResult2 = LtlChecker.next(constraintResult);
            var constraintResult3 = LtlChecker.next(constraintResult2);
            System.Diagnostics.Debug.Assert(constraintResult.status == LtlChecker.ConstraintStatus.CanTerminate);
            System.Diagnostics.Debug.Assert(constraintResult2.status == LtlChecker.ConstraintStatus.CanTerminate);
            System.Diagnostics.Debug.Assert(constraintResult3.status == LtlChecker.ConstraintStatus.CanTerminate);
        }
        private static void Test2()
        {
            var obj = new[] { 0 };
            var dictionary = new Dictionary<string, Func<bool>> {{"p", () => obj[0] == 1}};
            var ltlConstraint = LtlChecker.create_constraint("F p", dictionary);
            var constraintResult = LtlChecker.init(ltlConstraint);
            var constraintResult2 = LtlChecker.next(constraintResult);
            System.Diagnostics.Debug.Assert(constraintResult.status == LtlChecker.ConstraintStatus.CannotTerminate);
            System.Diagnostics.Debug.Assert(constraintResult2.status == LtlChecker.ConstraintStatus.CannotTerminate);
        }
        private static void Test3()
        {
            var obj = new[] { 0 };
            var dictionary = new Dictionary<string, Func<bool>> {{"p", () => obj[0] == 1}};
            var ltlConstraint = LtlChecker.create_constraint("F p", dictionary);
            var constraintResult = LtlChecker.init(ltlConstraint);
            obj[0] = 1;
            var constraintResult2 = LtlChecker.next(constraintResult);
            var constraintResult3 = LtlChecker.next(constraintResult2); //Extra step to simulate infinite path
            System.Diagnostics.Debug.Assert(constraintResult.status == LtlChecker.ConstraintStatus.CannotTerminate);
            System.Diagnostics.Debug.Assert(constraintResult2.status == LtlChecker.ConstraintStatus.CannotTerminate);
            System.Diagnostics.Debug.Assert(constraintResult3.status == LtlChecker.ConstraintStatus.CanTerminate);
        }
    }
}

Test cases one and two are straightforward.

Test case three exhibits a particular idiocyncrasy, which can be explained if one looks at the Büchi automaton itself.

Fp-ba-graph

It is not sufficient for SPOT to encounter “p”. This encounter must be followed by at least one other transition. This might be an artifact of the algorithms used to produce the automaton, but, since automata are supposed to apply to infinite paths, it is a right of the algorithm to do that. Which means that we must adapt to it… hence the extra “step” in test case three. There must always be an extra step in the end of a path, just in case.

I conclude with these words of caution, and I hope, by the next post, that either the path checking or the SQL implementation of running the automaton, will be completed so that I earn my bragging rights I’ve been working hard towards to.

LtlChecker: A spin-off of Pandamator

In the course of playing around with code to run the Büchi automata for the LTL constraint I’m adding to Pandamator, I realized that all that could be packaged into a nifty little feature for checking object evolution (or evolution of whole systems, for that matter). Enter LtlChecker, a library to help you apply the power of LTL to your imperative programs. Its only dependency is SPOT.fs from the Pandamator project, whose job is to execute SPOT and parse the automaton it produces. And SPOT itself, of course. I have took the easy way out regarding how SPOT is found: it must be in the path. Please note that cygwin1.dll must be on the path as well, for SPOT to be invoked.

SPOT.fs can be found in the SVN repository and, in any case, I have described it in the relevant post. I will focus on the new code, LtlChecker.fs. In an attempt to make it more C#-friendly, I have used Dictionary instead of Map.

Here’s a simple usage scenario, with which I will demonstrate how it works and I’ll present the implementation later.

type _test_struct = {
    mutable x: int
}

let _unit_test () =
    let obj = { new _test_struct with x=0 }
    let dic = new Dictionary<string,System.Func<bool>> ()
    dic.Add("p0",(fun () -> obj.x > 0))
    dic.Add("p1",(fun () -> obj.x > 0))
    let con = create_constraint "G(p0 -> G p1)" dic
    let i0 = init con
    obj.x <- 1
    let i1 = next i0
    obj.x <- 0
    let i2 = next i1
    obj.x <- 1
    let i2b = next i1
    (i0,i1,i2,i2b)

I have actually traced two diverging paths, for the constraint G(p0 -> G p1), where p0 and p1 both mean “property x of the test object is > 0”. The constraint can be translated as “once x becomes > 0, it never becomes zero again”.

Calling create_constraint creates an LtlConstraint, which is used to initialize the constraint checking. Calling init on it, evaluates propositions at the initial state and gives you back a ConstraintResult, which is what you’ll be examining throughout the constraint checking. The relevant property you should be looking at is status, an enumeration that tells you if the constraint has failed (Failed), if the system is at a state where it can be terminated with the constraint being fulfilled (CanTerminate), or if the system cannot be terminated now because the constraint awaits fulfillment (CannotTerminate).

Let’s look at it in practice. Here’s what i0 looks like.

{currentstates = [{state = 0;
                      accepting = true;}];
    status = CanTerminate;
    ltlconstraint =
     {automaton =
       {acccondnum = 1;
        states =
         [|{initial = true;
            transitions =
             [{endstate = 1;
               acccond = [0];
               guard = Bin (And,Prop "p1",Prop "p0");};
              {endstate = 0;
               acccond = [0];
               guard = Not (Prop "p0");}];};
           {initial = false;
            transitions = [{endstate = 1;
                            acccond = [0];
                            guard = Prop "p1";}];}|];};
      propositions =
       dict
         [("p0",
           System.Func`1[System.Boolean] {Method = Boolean Invoke();
                                          Target = LtlChecker+_unit_test@87;});
          ("p1",
           System.Func`1[System.Boolean]
             {Method = Boolean Invoke();
              Target = LtlChecker+_unit_test@88-1;})];};}

Status is CanTerminate, because x is still 0. In i1, status is still CanTerminate, and the current state is now state 1.

In i2, where x has gone to being 0 again, the status is Failed! That’s what one would expect.

In i2b, which is an alternative path from i1 but with x continuing to be bigger than 0, the status remains CanTerminate.

That’s is. A small facility to apply temporal logic in the evolution of mutable systems! In fact, you can apply it to any timeseries.

Nota Bene: I cannot guarantee that my interpretation of how running a Büchi automaton is done, is correct. It might happen that what works for the LTL expression above, might not work for some other expression. If I find problems (and manage to overcome them), I will update the code on SVN.

Here’s the code itself (it can also be found in the SVN repository, in project LtlChecker.

module LtlChecker

open System.Collections.Generic
open SPOT

type LtlConstraint = {
    automaton: BuchiAutomaton;
    propositions: Dictionary<string,System.Func<bool>>;
}

type ConstraintStatus = Failed | CanTerminate | CannotTerminate

type ConstraintState = {
    state: int;
    accepting: bool;
}

type ConstraintResult = {
    currentstates: ConstraintState list;
    status: ConstraintStatus;
    ltlconstraint: LtlConstraint;
}

let rec execute_logexp (propositions:Dictionary<string,System.Func<bool>>) (exp:LogicExpr) =
    match exp with
    | Const v -> v
    | Not e -> not (execute_logexp propositions e)
    | Bin (o, x, y) ->
        let lx = execute_logexp propositions x
        let ly = execute_logexp propositions y
        match o with
        | And -> lx && ly
        | Or -> lx || ly
        | Impl -> execute_logexp propositions (Bin(Or,Not(x),y))
        | Equiv -> execute_logexp propositions (Bin(Or,Bin(And,x,y),Bin(And,Not(x),Not(y)))) // (x and y) or (not x or not y)
        | Xor -> execute_logexp propositions (Bin(And,Bin(Or,x,y),Not(Bin(And,x,y)))) // (x or y) and not(x and y)
    | Prop p ->
        let rc = propositions.[p]
        rc.Invoke()

let create_constraint (ltl:string) (propositions:Dictionary<string,System.Func<bool>>) =
    let ba = ltl2ba ("'"+ltl+"'")
    { new LtlConstraint
        with automaton = ba
        and propositions = propositions
    }

let do_transitions (state:int) (con:LtlConstraint) =
    let states = con.automaton.states.[state].transitions
                        |> List.filter (fun t -> execute_logexp con.propositions t.guard)
                        |> List.map (fun t -> { new ConstraintState with state=t.endstate and accepting=t.acccond.Length>0 }) //Finite paths cannot work with more than one accepting condition
    states

let states_to_result (nextstates:ConstraintState list) (con:LtlConstraint) =
    let status =
        if nextstates.Length = 0 then
            Failed
        elif nextstates |> List.exists (fun s -> s.accepting) then
            CanTerminate
        else
            CannotTerminate

    { new ConstraintResult
        with currentstates=nextstates
        and ltlconstraint=con
        and status=status
    }

let init (con:LtlConstraint) =
    let initialstate = con.automaton.states |> Array.findIndex (fun st -> st.initial)
    let nextstates = do_transitions initialstate con
    states_to_result nextstates con

let next (res:ConstraintResult) =
    let nextstates = res.currentstates |> List.map (fun s -> do_transitions s.state res.ltlconstraint) |> List.fold (@) []
    states_to_result nextstates res.ltlconstraint

New Table Constraint Planned for Pandamator, part 2

As I promised in the previous relevant post, I am moving forward with the LTL constraint for Pandamator (as any of you who have been following the iteration on Flying Donut already know). Apart from some integration in the maintenance scripts, all that remains is actually running the Büchi automaton. Which, of course, is not an easy task to do in SQL… That’s why I’m going to stick to the positive side only, and give you an account of what I have done so far.

The actual syntax of the constraint is very close to what I wrote earlier (I used WITH instead of WHERE because it’s already used as a keyword for defining things).

alter table Production.Product 
add validtime constraint weightneverdec 
check 'G(p0 -> G p1)' 
with p0 = weight > 0, p1 = weight > 0

This is a simple constraint for testing, which means that, whenever Product.weight becomes bigger than zero, it never becomes zero again.

SPOT happily produces the following automaton for that expression.

weightneverdec

2 1t
0 1
1 0 -1 & p1 p0
0 0 -1 ! p0
-1
1 0
1 0 -1 p1
-1

The automaton is parsed internally to the following value.

val it : BuchiAutomaton =
  {acccondnum = 1;
   states =
    [|{initial = true;
       transitions =
        [{endstate = 1;
          acccond = [0];
          guard = Bin (And,Prop "p1",Prop "p0");}; {endstate = 0;
                                                    acccond = [0];
                                                    guard = Not (Prop "p0");}];};
      {initial = false;
       transitions = [{endstate = 1;
                       acccond = [0];
                       guard = Prop "p1";}];}|];}

Almost the full definition for LBTT is followed by my (simple) parser, except I assume that states are named as consecutive numbers starting at zero, because SPOT does that.

The abstract syntax tree for the automaton is defined by the following types.

type LogicOp = And | Or | Impl | Xor | Equiv

type LogicExpr =
    | Prop of string
    | Const of bool
    | Not of LogicExpr
    | Bin of LogicOp * LogicExpr * LogicExpr

type BuchiAutomatonTransition = {
    endstate: int;
    acccond: int list;
    guard: LogicExpr;
}

type BuchiAutomatonState = {
    initial: bool
    transitions: BuchiAutomatonTransition list;
}

type BuchiAutomaton = {
    acccondnum: int;
    states: BuchiAutomatonState[];
}

A simple translation layer transforms the propositional logic of the guards, together with the definition of the propositions in the constraint, to SQL.

let rec logexp2stringlist (rcl:PropositionDefinition list) (exp:LogicExpr) =
    match exp with
    | Const true -> ["(1=1)"]
    | Const false -> ["(1=0)"]
    | Not e -> ["NOT";"("] @ (logexp2stringlist rcl e) @ [")"]
    | Bin (o, x, y) -> 
        let lx = logexp2stringlist rcl x
        let ly = logexp2stringlist rcl y
        match o with
        | And -> ["(";"("] @ lx @ [")";"and";"("] @ ly @ [")";")"]
        | Or -> ["(";"("] @ lx @ [")";"or";"("] @ ly @ [")";")"]
        | Impl -> logexp2stringlist rcl (Bin(Or,Not(x),y))
        | Equiv -> logexp2stringlist rcl (Bin(Or,Bin(And,x,y),Bin(And,Not(x),Not(y)))) // (x and y) or (not x or not y)
        | Xor -> logexp2stringlist rcl (Bin(And,Bin(Or,x,y),Not(Bin(And,x,y)))) // (x or y) and not(x and y)
    | Prop p -> 
        let rc = rcl |> List.find (fun rc -> rc.name = p)
        "(" :: rc.expression :: [")"]

During ALTER VALIDTIME execution, a view is created to abstract away the exact expressions used for the propositions (as boolean numbers 0/1, since SQL does not recognize logical expressions outside WHERE).

CREATE VIEW [Production].[Product_weightneverdec_GuardView] WITH SCHEMABINDING as
SELECT
 ProductID,
 ValidToDate,
 row_number() over (partition by ProductID order by ValidToDate) AS VersionOrdinal,
 case when ( ( ( weight > 0 ) ) and ( ( weight > 0 ) ) ) then 1 else 0 end AS g0_0,
 case when NOT ( ( weight > 0 ) ) then 1 else 0 end AS g0_1,
 case when ( weight > 0 ) then 1 else 0 end AS g1_0
FROM [Production].[Product]

That’s it. I wish I had done more, but I believe I made good use of my scant free time to do as much as possible. Expressed like that, the view can be used to perform (non-deterministic, i.e. parallel…) runs of the automaton on the actual object histories. Stay tuned, I hope my next post on the subject will be a triumphant conclusion to creating the constraint.

Fewer and narrower covering indexes for report tables

I recently looked at how to index a report table of six dimensions (plus the time one) with the fewer number of the narrower indexes possible. I was interested in drill-down queries, so the condition (apart from the time condition) will be formed by a conjunction of dimension key equalities.  A quick research did not come up with anything. Any prefix of an index can be used to cover a query, so I was sure that indexes with one to 6 columns would be needed. The closest I came using straightforward combinatorics was 58 permutations of six columns to cover any possible query. Not bad, compared to the total number of 720 distinct permutations. And, to be truthful, I did not derive the number by math but by eliminating unneeded permutations using a program. However, 58 indexes is still a lot, and I was still bothered by the presence of extra columns in most of those permutations. We only need one index having all six columns and we only need one index having all five columns for any subset of five columns, and so on.

It has been said that any heavy calculation that you do because you don’t grok enough math to avoid it, saves you some face if you do it in F# (ok, it is I who said it). Follows a program to calculate indexes of length 1 to N to cover any possible query with N columns. Columns are represented by numbers 1 to N+1, just to make the whole thing seem more abstract than it is. And how many are those for N=6? Not 42, unfortunately but close: [1; 2; 3; 4; 5; 6], [1; 2; 3; 4; 6], [1; 2; 3; 5; 6], [1; 2; 3; 6], [1; 2; 4; 5; 6], [1; 2; 4; 6], [1; 2; 5; 6], [1; 2; 6], [1; 3; 4; 5; 6], [1; 3; 4; 6], [1; 3; 5; 6], [1; 3; 6], [1; 4; 5; 6], [1; 4; 6], [1; 5; 6], [1; 6], [2; 3; 4; 5; 6], [2; 3; 4; 6], [2; 3; 5; 6], [2; 3; 6], [2; 4; 5; 6], [2; 4; 6], [2; 5; 6], [2; 6], [3; 4; 5; 6], [3; 4; 6], [3; 5; 6], [3; 6], [4; 5; 6], [4; 6], [5; 6], [6]

// Helpers from: http://fssnip.net/4u

let rec insertions x = function
    | []             -> [[x]]
    | (y :: ys) as l -> (x::l)::(List.map (fun x -> y::x) (insertions x ys))
let rec permutations = function
    | []      -> seq [ [] ]
    | x :: xs -> Seq.concat (Seq.map (insertions x) (permutations xs))

// The program itself

type covering = {
    covering: Set list;
}

let covers (N:int) (x:int list) =
    {
        covering= List.init N (fun i -> if x.Length>=i+1 then Set.ofSeq (Seq.take (i+1) x) else Set.ofList []);
    }

type allcovers = {
    p: Set;
    covering: Set<Set> list;
}

let emptyallcovers (N:int) =
    { 
        p= Set.ofList [];
        covering= List.init N (fun i -> Set.ofList []);
    }

let subsumes (a:int list) (b:int list) =
    if a.Length >= b.Length then
        Seq.fold (&&) true (Seq.map2 (fun i j -> i = j) a b)
    else 
        false

let add_subsuming (x:int list) (s:Set) =
    let s2 = Set.filter (fun y -> subsumes x y) s
    Set.add x (s - s2)

let combine_coverings (c:allcovers) (x:int list) (cx:covering) =
    {
        p = add_subsuming x c.p;
        covering = c.covering |> List.mapi (fun i c -> Set.add (cx.covering.Item i) c);
    }

let addifnotcovered (N:int) (c:allcovers) (x:int list) =
    let cx = covers N x
    let rec add1 (c:allcovers) (cx:covering) (x:int list) =
        let Li = x.Length - 1
        if not (Set.contains (cx.covering.Item Li) (c.covering.Item Li)) then
            combine_coverings c x cx
        elif Li = 0 then
            c
        else
            add1 c cx  (List.ofSeq (Seq.take Li x))
    add1 c cx x

let Main() =
    let N = 6
    let vec = [1 .. N]
    let p =  permutations vec
    let cp = Seq.fold (addifnotcovered N) (emptyallcovers N) p
    Seq.iter (printf "%A\n") cp.p

    printf "End"

Main()