art with code

2008-09-26

Building an OCaml array library from basic operations

In writing Prelude.ml utility functions for array-like data structures, I noticed that they can be built from a very small set of base operations. The minimum you need is empty, fold and unfold, but those work best with lists and trees. For efficient arrays you want alloc, length, get and set. We can write either way in terms of the other, interestingly enough.

Empty, foldl and unfoldl in terms of alloc, length, get and set:

let empty = alloc 0

(* recurse with get, a counter and an accumulator *)
let foldl f i a =
let rec aux f acc idx len a =
if idx = len then acc
else aux f (f acc (get a idx)) (succ idx) len a in
aux f i 0 (length a) a

let push a v =
let b = alloc (length a + 1) in
for i = 0 to (length a - 1) do set b i (get a i) done;
set b (length a) v

(* recurse with push and an accumulator *)
let unfoldl f i =
let rec aux f i res =
match f i with
| None -> res
| Some (v, acc) -> aux f acc (push res v) in
aux f i empty


Alloc, length, get and set in terms of empty, foldl and unfoldl:

let zero = 0

(* unfold with a counter and set to zero *)
let alloc l =
unfoldl (fun i ->
if i = l then None
else Some (zero, succ i)
) 0

(* fold with a counter *)
let length a = foldl (fun l _ -> l+1) 0 a

(* fold with a counter and pick the value when counter equals wanted index *)
let get a i =
let l,v =
foldl (fun (j,res) v ->
if i = j then (j, Some v) else (succ j, None)
)
(0, None) a in
v

(* unfold with a counter and set the value when counter equals wanted index *)
let set a i v =
let l = length a in
unfoldl (fun j ->
if j = l then None
else Some (
if i = j then v else get a j,
succ j
)
) 0


As you may notice, unfold looks inefficient for arrays and set and get look inefficient for lists. Not to mention that the above implementation of get returns an option, while the fold and push expect a flat value. The type of the array elements is also set at library writing time with zero. As the above code is only for the purpose of demonstrating the interchangeability of the signatures, may the kind reader forgive these omissions (presenting better examples would be very welcome too.)

Let's take a look at finding values. Finds and gets can be abstracted into a fold with an early exit. A find is a fold that exits when f v equals true. A get is a fold that exits when counter equals wanted index.


(* findWithIndex using get *)
let findWithIndex f a =
let rec aux f i len a =
if i = len then None
else
let v = get a i in
if f v i then Some (v,i)
else aux f (succ i) len a in
aux f 0 (length a) a

let maybe d f v = match v with Some x -> f x | None -> d
let optMap f v = match v with Some x -> Some (f x) | None -> None

let find f a = optMap fst (findWithIndex (fun v _ -> f v) a)
let findIndex f a = optMap snd (findWithIndex (fun v _ -> f v) a)

(* findWithIndex using fold and an exception break *)

exception Found of 'a
let findWithIndex f a =
try ignore (foldl (fun i v -> if f v i then raise (Found (v, i)) else succ i) 0 a);
None
with Found x -> Some x

let get a i = optMap fst (findWithIndex (fun _ j -> i = j) a)


Many element query functions are variants of find, here we have any, all, elem and indexOf.

(* any is a find that returns true on match, false on not found *)
let any f a = find f a <> None

(* all is a variant of any: if nothing matches the negation of f, return true *)
let all f a = not (any (fun v -> not (f v)) a)

(* elem is a special case of any *)
let elem e a = any ((=) e) a

(* indexOf is a special case of findIndex *)
let indexOf e a = findIndex ((=) e) a


To do filtering, we fold with push (or collect to a list and do of_list at the end, depending on the cost of the operations.)

let filter f a = foldl (fun b v -> if f v then push b v else b) empty a

let filter f a = of_list (List.rev (foldl (fun b v -> f v then v::b else b) [] a))


Implementing map and iter in terms of fold is trivial:

let map f a = foldl (fun b i -> push b (f i)) empty a
let iter f a = foldl (fun () i -> f i) () a


To implement map in imperative terms, we should write an init function:

let init f l =
let rec aux f idx len a =
if idx = len then a
else (set a idx (f idx); aux f (succ idx) len a) in
aux f 0 l (alloc l)

let map f a = init (fun i -> f (get a i)) (length a)

let mapWithIndex f a = init (fun i -> f (get a i) i) (length a)
(* If you trust your compiler, let map f a = mapWithIndex (fun v _ -> f v) a *)

let foldlWithIndex f i a =
let rec aux f acc idx len a =
if idx = len then acc
else aux f (f acc (get a idx) idx) (succ idx) len a in
aux f i 0 (length a) a

let iterWithIndex f a = foldlWithIndex (fun () v i -> f v i) () a
(* (fun () v i -> f v i) = (fun () -> f) = (const f), where const x y = x *)


Reverse, copy and append are easy as well (I'm going to do only array-like versions from here on):

let reverse a =
let l = length a in
init (fun i -> get a (l-1-i)) l

let copy a = init (get a) (length a)

let append a b =
let la, lb = length a, length b in
let c = alloc (la + lb)
iterWithIndex (fun v i -> set c i v) a;
iterWithIndex (fun v i -> set c (i+la) v) b;
c


It is useful to have two indexing modes: left-indexing and right-indexing. The usual way is to have 0 be the first index from left and -1 be the first index from right. Easy enough:

let normalizeIndex a i = if i >= 0 then i else (length a + 1)


Now we can write sub and slice:

let sub i len a =
let i = normalizeIndex a i in
init (fun j -> get a (j+i)) len

let slice i j a =
let i = normalizeIndex a i in
let j = normalizeIndex a j + (if j < 0 then 1 else 0) in
sub i (j-i) a


A striding sub is also useful at times:

let subStride stride i len a =
let i = normalizeIndex a i in
init (fun j -> get a ((j*stride)+i)) len

(* We could even do let sub = subStride 1 *)


How about writing some subsequence iterators, they do help performance if the compiler isn't full of candy and magic. The usual way would be a for-loop with offset and stride. For-loops are error-prone, so iterators also move the source of bugs into a centralized location. Easier to fix one function than twenty.

let subFoldl i len f acc a =
let rec aux f acc i len a =
if i >= len then acc
else aux f (f acc (get a i)) (succ i) len a in
aux f acc (normalizeIndex a i) len a

(* let foldl f acc a = subFoldl 0 (length a) f acc a *)

let subIter i len f a = subFoldl i len (fun () v -> f v) () a
let subMap i len f a =
let i = normalizeIndex a i in
init (fun j -> f (get a (j+i))) len

(* slices can be done in the same manner *)
let sliceFoldl i j f acc a =
let i = normalizeIndex a i in
let j = normalizeIndex a j + (if j < 0 then 1 else 0) in
subFoldl i (j-i) f acc a

let sliceIter i j f a = sliceFoldl i j (fun () v -> f v) () a
let sliceMap i j f a =
let i = normalizeIndex a i in
let j = normalizeIndex a j + (if j < 0 then 1 else 0) in
init (fun k -> f (get a (k+i))) (j-i)

(* iterating subsequence iterators left as an exercise for the reader *)


To make the above functions faster and safer, we should do bounds-checking before iteration and use unsafe_get and unsafe_set, here's an example of an error-checking sub:

let sub i len a =
let i = normalizeIndex i in
if i + len > length a then raise Not_found;
init (fun j -> unsafe_get a (i+j)) len


Now for some list-like utils, more folds and list conversions:

let first a = if a = empty then raise Not_found else get a 0
let head = first
let last a = if a = empty then raise Not_found else get a (length a - 1)

let tail a = sub 1 (length a - 1) a
let popped a = sub 0 (length a - 1) a

let push a v = append a (init (fun _ -> v) 1)
let pop a = (last a, popped a)

let unshift a v = append (init (fun _ -> v) 1) a
let shift a = (first a, tail a)


let foldr f acc a =
let rec aux f acc i a =
if i < 0 then acc
else aux f (f (get a i) acc) (pred i) a in
aux f acc (length a - 1) a

let subFoldr i len f acc a =
let rec aux f acc len j a =
if len < 0 then acc
else aux f (f (get a (len+j)) acc) (pred len) j a in
let i = normalizeIndex a i in
if i + len > length a then raise Not_found;
aux f acc (len-1) i a

let foldl1 f a = subFoldl 1 (length a - 1) f (first a) a
let foldr1 f a = subFoldr 0 (length a - 1) f (last a) a


let to_list a = foldr (fun l v -> v::l) [] a
let of_list l =
let rec aux i a l =
match l with
| [] -> a
| h::t -> set a i h; aux (succ i) a t in
aux 0 (alloc (List.length l)) l


And from lists, a segue into lists of arrays:

let blit src src_idx dst dst_idx len =
if src_idx >= dst_idx then
for i=0 to len-1 do set dst (dst_idx + i) (get src (src_idx + i)) done
else
for i=len-1 downto 0 do set dst (dst_idx + i) (get src (src_idx + i)) done

let concat l =
let l = List.filter ((<>) empty) l in
let len = List.fold_left (fun s a -> a + length a) 0 l in
let res = alloc len in
ignore (List.fold_left (fun i a -> blit a 0 res i (length a); i + length a) 0 l);
res

let groupsOf n a =
if n < 1 then invalid_arg "groupsOf: n < 1";
let len = length a in
List.unfoldl (fun i ->
if i >= len then None
else Some (sub i (min (len-i) n) a, i+n)
) 0

let splitInto n a =
if length a <= 1 then groupsOf 1 a
else
let plen = int_of_float (ceil (float (length a) /. float n)) in
groupsOf plen a


Continuing with the splitting, some list utils from Haskell:

(* take is a special case of sub *)
let take n a = sub 0 n a

(* drop is a special case of slice *)
let drop n a = slice n (-1) a

let splitAt n a = (take n a, drop n a)

let flip f x y = f y x
let (<<) f g x = f (g x)

(* break breaks at the index of the first true to f *)
let break f a = maybe (a, empty) (flip splitAt a) (findIndex f a)

(* span splits at the index of the first false to f *)
let span f a = break (not << f) a

(* takeWhile is a take with findIndex *)
let takeWhile f a = maybe (copy a) (flip take a) (findIndex (not << f) a)

(* dropWhile is a drop with findIndex *)
let dropWhile f a = maybe (copy a) (flip drop a) (findIndex (not << f) a)


Ranges are nice for creating test data:

let range s e =
if s <= e
then init ((+) s) (e-s+1)
else init ((-) s) (s-e+1)

let (--) = range

(* (10--15) = [|10; 11; 12; 13; 14; 15|] *)


And that's all for now. The above code likely has bugs in it, I wrote it from scratch for this blog post, so there be dragons.

No comments:

Blog Archive