Module signatures

Simple polymorphic sorted list

Let us implement a polymorphic sorted list module that satisfies the following module signature:

module type POLY_SL =
  sig
    type 'a t = 'a list
    (** polymorphic sorted list *)

    val empty : 'a t
    (** [empty] is an empty sorted list *)

    val add : 'a -> 'a t -> 'a t
    (** [add v ls] inserts [v] in [ls], while keeping
      the output list sorted (in ascending order) *)

    val merge : 'a t -> 'a t -> 'a t
    (** [merge ls1 ls2] merges two sorted lists *)

    val print : ('a -> string) -> 'a t -> unit
    (** [print to_s ls] prints all elements of the list [ls],
      [to_s] is a function converting list elements into string *)
  end
module PolySortedList = struct
  type 'a t = 'a list

  let empty = []

  let rec add v ls = ...

  let rec merge ls1 ls2 = ...

  let print to_string ls =
    let print_one x = Printf.printf "%s " (to_string x) in
    List.iter print_one ls;
    print_newline ()
end

Implement functions add and merge

add 15 [10; 20; 30] --> [10; 15; 20; 30]

🐤 click to open

let rec add v ls =
  match ls with
  | hd :: tl ->
      if v <= hd then
        v :: ls
      else
        hd :: add v tl
  | [] -> [v]

merge [7; 11; 12] [10; 20; 30] --> [7; 10; 11; 12; 20; 30]

🐤 click to open

let rec merge ls1 ls2 =
  match ls1, ls2 with
  | h1::t1, h2::t2 ->
      if h1 <= h2 then
        h1 :: merge t1 ls2
      else
        h2 :: merge ls1 t2
  | [], _ -> ls2
  | _ -> ls1

Full code:

🐤 click to open

module PolySortedList = struct
  type 'a t = 'a list

  let empty = []

  let rec add v ls =
    match ls with
    | hd :: tl ->
        if v <= hd then
          v :: ls
        else
          hd :: add v tl
    | [] -> [v]

  let rec merge ls1 ls2 =
    match ls1, ls2 with
    | h1::t1, h2::t2 ->
        if h1 <= h2 then
          h1 :: merge t1 ls2
        else
          h2 :: merge ls1 t2
    | [], _ -> ls2
    | _ -> ls1

  let print to_string ls =
    let print_one x = Printf.printf "%s " (to_string x) in
    List.iter print_one ls;
    print_newline ()
end

module PSL = PolySortedList

let () =
  let ls = PSL.(empty |> add 10 |> add 20) in

  (* print using module function print *)
  PSL.print (string_of_int) ls

How to hide the internal representation the sorted list? Make the type 'a PolySortedList.t abstract

We have to edit the module signature POLY_SL so that it does not reveal the type of 'a t, and then declare PolySortedList with that signature:

module type POLY_SL =
  sig
    type 'a t   (* does not tell that it is stored as a list *)
    val empty : 'a t
    val add : 'a -> 'a t -> 'a t
    val merge : 'a t -> 'a t -> 'a t
    val print : ('a -> string) -> 'a t -> unit
  end

module PolySortedList : POLY_SL = struct
  ...
end

We cannot access sorted list objects as list anymore (get a compile-time error if we try), since its implementation is hidden:

# let ls = PolySortedList.(empty |> add 10 |> add 20) ;;
val ls : int PolySortedList.t = <abstr>

# List.iter print_int ls ;;
Error: This expression has type int PolySortedList.t
       but an expression was expected of type int list

However, we still can use the module interface to operate on the sorted list type:

# PolySortedList.print string_of_int ls ;;
10 20
- : unit = ()

Full code:

🐤 click to open

module type POLY_SL =
  sig
    type 'a t
    val empty : 'a t
    val add : 'a -> 'a t -> 'a t
    val merge : 'a t -> 'a t -> 'a t
    val print : ('a -> string) -> 'a t -> unit
  end

module PolySortedList : POLY_SL = struct
  type 'a t = 'a list

  let empty = []

  let rec add v ls =
    match ls with
    | hd :: tl ->
        if v <= hd then
          v :: ls
        else
          hd :: add v tl
    | [] -> [v]

  let rec merge ls1 ls2 =
    match ls1, ls2 with
    | h1::t1, h2::t2 ->
        if h1 <= h2 then
          h1 :: merge t1 ls2
        else
          h2 :: merge ls1 t2
    | [], _ -> ls2
    | _ -> ls1

  let print to_string ls =
    let print_one x = Printf.printf "%s " (to_string x) in
    List.iter print_one ls;
    print_newline ()
end

module PSL = PolySortedList

let () =
  let ls = PSL.(empty |> add 10 |> add 20) in

  (* cannot compile this:
  (* iterate as a list and print *)
  List.iter (print_int) ls;
  print_newline ();
  *)

  (* print using module function print *)
  PolySortedList.print (string_of_int) ls

Functors

Customizing the sorting order in SortedList

It can be useful to be able to customize the order in which the list elements should be sorted.

In order to do that, we can change the functions add and merge to sort according to a specified function precedes (if precedes x y, then x should go before y in the list). This function is passed as an additional parameter into both functions:

  let rec add precedes v ls =
    match ls with
    | hd :: tl ->
        if precedes v hd then
          v :: ls
        else
          hd :: add precedes v tl
    | [] -> [v]

  let rec merge precedes ls1 ls2 =
    match ls1, ls2 with
    | h1::t1, h2::t2 ->
        if precedes h1 h2 then
          h1 :: merge precedes t1 ls2
        else
          h2 :: merge precedes ls1 t2
    | [], _ -> ls2
    | _ -> ls1

The only difference is that instead of v <= hd and h1 <= h2, we have precedes v hd and precedes h1 h2 now. To achieve the old behavior we can pass (<=) for the precedes function and it will work the same as before:

add (<=) 15 [10; 20; 30] --> [10; 15; 20; 30]

merge (<=) [7; 11; 12] [10; 20; 30] --> [7; 10; 11; 12; 20; 30]

Full code:

🐤 click to open

module PolySortedList = struct
  type 'a t = 'a list

  let empty = []

  let rec add precedes v ls =
    match ls with
    | hd :: tl ->
        if precedes v hd then
          v :: ls
        else
          hd :: add precedes v tl
    | [] -> [v]

  let rec merge precedes ls1 ls2 =
    match ls1, ls2 with
    | h1::t1, h2::t2 ->
        if precedes h1 h2 then
          h1 :: merge precedes t1 ls2
        else
          h2 :: merge precedes ls1 t2
    | [], _ -> ls2
    | _ -> ls1

  let print to_string ls =
    let print_one x = Printf.printf "%s " (to_string x) in
    List.iter print_one ls;
    print_newline ()
end

module PSL = PolySortedList

let () =

  PSL.print string_of_int
    (PSL.add (<=) 15 [10; 20; 30]);

  PSL.print string_of_int
    (PSL.merge (<=) [7; 11; 12] [10; 20; 30])

Output:

10 15 20 30
7 10 11 12 20 30

What can possibly go wrong?

Having customized order is helpful, but it can lead to errors if you pass a wrong function by mistake, or if you try to merge two lists that we created with different precedes functions:

module PSL = PolySortedList

let _ =
  PSL.( empty
    |> add (<=) 1
    |> add (<=) 10
    |> add (<=) 5     (* [1; 5; 10] *)
    |> add (>=) 2     (* [2; 1; 5; 10] *)
  )

One possible solution

One possible solution is to represent the list as a record:

  type 'a t = {ls : 'a list; precedes : 'a -> 'a -> bool}

and replace the value empty with a function that takes a precedes function and creates an empty list record, which remembers how the elements must be ordered:

  let make_empty prc = {ls = []; precedes = prc}

Then, in the implementation of the functions add and merge, you can simply refer to the field precedes. The user does not have to supply it every time.

One downside of this solution is that the implementation is now more complex, since the data type is a record. Another, more advanced problem, is that it can be hard to ensure that you don’t try to merge two lists that use different precedes function.

Full code:

🐤 click to open

module PolySortedList = struct
  type 'a t = { ls : 'a list; precedes : 'a -> 'a -> bool }

  let make_empty prc = { ls = []; precedes = prc }

  let add v a =
    let rec aux ls =
      match ls with
      | hd :: tl ->
          if a.precedes v hd then
            v :: ls
          else
            hd :: aux tl
      | [] -> [v]
    in
    {a with ls = aux a.ls}

  let merge a b =
    let rec aux ls1 ls2 =
      match ls1, ls2 with
      | h1::t1, h2::t2 ->
          if a.precedes h1 h2 then
            h1 :: aux t1 ls2
          else
            h2 :: aux ls1 t2
      | [], _ -> ls2
      | _ -> ls1
    in
    {a with ls = aux a.ls b.ls}

  let print to_string a =
    let print_one x = Printf.printf "%s " (to_string x) in
    List.iter print_one a.ls;
    print_newline ()
end

Usage:

module PSL = PolySortedList

let () =
  let ls = PSL.(make_empty (<=) |> add 10 |> add 20) in

  PolySortedList.print (string_of_int) ls

Output:

10 20

An even better solution: Can PolySortedList generate separate modules for different precedes functions?

What if we could parameterize the module itself with the function precedes? With made-up syntax (not real OCaml), it could look like:

module PSL1 = PolySortedList(<=)   (* Not actual OCaml *)
module PSL2 = PolySortedList(>=)

If it were possible, then

let _ =
  let ls1 = PSL1.(
    empty
    |> add 1
    |> add 10
    |> add 5  (* [1; 5; 10] - ascending order *)
    )
  in

  PSL1.print ls1;

  let ls2 = PSL2.(
    empty
    |> add 7
    |> add 3
    |> add 8  (* [8; 7; 3] - descending order *)
    )
  in

  PSL2.print ls2

Such module specialization is possible using module functors.

Functors ("module functions")

We define a functor SortedList, parameterizable by a module of the module type ELEMENT.

We need to define the module type first, to tell the compiler how the functor SortedList needs to be parameterized. You can think of functors as module functions. A functor is, essentially, a module, which misses certain functionality that must be provided from the outside.

module type ELEMENT =
  sig
    type t
    val precedes : t -> t -> bool
    val to_string : t -> string
  end

The functor itself:

module SortedList (E : ELEMENT) = struct
  (* Use E.t as the type of list elements: *)
  type t = E.t list

  let empty = []

  (* Use E.precedes for comparison in add and merge functions: *)
  let rec add v ls =
    match ls with
    | hd :: tl ->
        if E.precedes v hd then
          v :: ls
        else
          hd :: add v tl
    | [] -> [v]

  let rec merge ls1 ls2 =
    match ls1, ls2 with
    | h1::t1, h2::t2 ->
        if E.precedes h1 h2 then
          h1 :: merge t1 ls2
        else
          h2 :: merge ls1 t2
    | [], _ -> ls2
    | _ -> ls1

  (* Use E.to_string to convert element to string *)
  let print ls =
    let print_one x = Printf.printf "%s " (E.to_string x) in
    List.iter print_one ls;
    print_newline ()
end

Let us make a sorted list of persons. We first define a module PersonName that implements the module type ELEMENT, then use the functor SortedList to make a module for sorted list of persons PersonSL:

module PersonName = struct
  type t = {first : string; last : string}

  let precedes n1 n2 =
    n1.last < n2.last ||
    n1.last = n2.last && n1.first <= n2.first
    (* sort by last name, then first name *)

  let to_string name =
    name.first ^ " " ^ name.last
end

module PersonSL = SortedList(PersonName)

let _ =
  PersonSL.empty
  |> PersonSL.add PersonName.({first = "Alice"; last = "Johnson"})
  |> PersonSL.add PersonName.({first = "Bob"; last = "Thompson"})
  |> PersonSL.add PersonName.({first = "Carl"; last = "Johnson"})
  (* --> [ Alice Johnson, Carl Johnson, Bob Thompson ] *)

Abstracting modules produced by functors

The implementation above still keeps the implementation of the produced modules open.

Let us make two modules of sorted lists who use different precedes operation, but the types of elements are the same:

module SL1 = SortedList (struct
    type t = int
    let to_string = string_of_int
    let precedes = (<=)
  end)

module SL2 = SortedList (struct
    type t = int
    let to_string = string_of_int
    let precedes = (>=)
  end)

The type SL1.t is the same as SL2.t, they are both int list. So, the module system does not prohibit using these types interchangeably, since they are both just lists of integers. Because of that, the following code produces an incorrectly sorted list:

let _ =
  let ls1 = SL1.(
    empty
    |> add 1
    |> add 10
    |> add 5  (* [1; 5; 10] - ascending order *)
    )
  in

  let ls2 = SL2.(
    empty
    |> add 7
    |> add 3
    |> add 8  (* [8; 7; 3] - descending order *)
    )
  in

  SL1.print (SL1.merge ls1 ls2)  (* [1; 5; 8; 7; 3; 10] - out of order *)

We can make types SL1.t and SL2.t abstract if we tell the functor SortedList output modules with abstract types (of module type SL):

module type SL =
  sig
    type elem
    type t
    val empty : t
    val add : elem -> t -> t
    val merge : t -> t -> t
    val print : t -> unit
  end

module SortedList (E : ELEMENT) : (SL with type elem = E.t) = struct
  (* Use E.t as the type of list elements: *)
  type elem = E.t

  type t = elem list

  ...
end

Then SL1.t and SL2.t will have different type, so if we try merging SL1.t and SL2.t, the compiler will complain:

  SL1.print (SL1.merge ls1 ls2)  (* compile error! *)
Error: This expression has type SL2.t but an expression was expected of type
         SL1.t

Full code:

🐤 click to open

module type ELEMENT =
  sig
    type t
    val precedes : t -> t -> bool
    val to_string : t -> string
  end

module type SL =
  sig
    type elem
    type t
    val empty : t
    val add : elem -> t -> t
    val merge : t -> t -> t
    val print : t -> unit
  end

module SortedList (E : ELEMENT) : (SL with type elem = E.t) = struct
  (* Use E.t as the type of list elements: *)
  type elem = E.t

  type t = elem list

  let empty = []

  (* Use E.precedes for comparison in add and merge functions: *)
  let rec add v ls =
    match ls with
    | hd :: tl ->
        if E.precedes v hd then
          v :: ls
        else
          hd :: add v tl
    | [] -> [v]

  let rec merge ls1 ls2 =
    match ls1, ls2 with
    | h1::t1, h2::t2 ->
        if E.precedes h1 h2 then
          h1 :: merge t1 ls2
        else
          h2 :: merge ls1 t2
    | [], _ -> ls2
    | _ -> ls1

  (* Use E.to_string to convert element to string *)
  let print ls =
    let print_one x = Printf.printf "%s " (E.to_string x) in
    List.iter print_one ls;
    print_newline ()
end

module SL1 = SortedList (struct
    type t = int
    let to_string = string_of_int
    let precedes = (<=)
  end)

module SL2 = SortedList (struct
    type t = int
    let to_string = string_of_int
    let precedes = (>=)
  end)

let _ =
  let ls1 = SL1.(
    empty
    |> add 1
    |> add 10
    |> add 5  (* [1; 5; 10] - ascending order *)
    )
  in

  let ls2 = SL2.(
    empty
    |> add 7
    |> add 3
    |> add 8  (* [8; 7; 3] - descending order *)
    )
  in

  SL1.print ls1;

  SL2.print ls2;

Output:

1 5 10
8 7 3

However, if we try merging SL1.t and SL2.t, the compiler will complain:

  SL1.print (SL1.merge ls1 ls2)  (* compile error! *)
Error: This expression has type SL2.t but an expression was expected of type
         SL1.t

Map and Set

Modules Map and Set use a functor interface for defining ordered map and set data types. Both modules provide functors, Map.Make and Set.Make, respectively, that must be parameterized by modules of type Map.OrderedType or Set.OrderedType.

Documentation on modules produced by the functors Map.Make and Set.Make

Example:

module IntPair = struct
  type t = int*int
  let compare = Pervasives.compare
end

module M = Map.Make(IntPair)

let () =

  let m =
    M.empty
    |> M.add (1,2) "Bob"
    |> M.add (3,4) "Alice"
    |> M.add (0,5) "Marco"
    |> M.add (3,7) "Maggie"
  in

  M.iter (fun (x,y) name ->
    Printf.printf "(%i, %i) -> %s\n" x y name
  ) m

Output:

(0, 5) -> Marco
(1, 2) -> Bob
(3, 4) -> Alice
(3, 7) -> Maggie

OCaml map data type is stored as a height-balanced binary tree ordered by keys:

Oz2OOVB

The module IntPair specifies how the keys must be compared.

The definition of the module IntPair satisfies Map.OrderedType signature:

module type OrderedType =
  sig
    type t
    val compare : t -> t -> int
  end

The function compare takes two values (compare x y) and should return an integer

  • 0 if x is "equal" to y

  • positive integer if x is "greater than" y

  • negative integer if x is "smaller than" y

Since this behavior matches exactly the polymorphic function compare in the module Pervasives, we simply used it without defining our own comparison function. However, if you want to have specific order for the elements in the map, you can write your own function here.

Set works exactly the same as Map, but stores only keys, without values.

Example:

module IntPair = struct
  type t = int*int
  let compare = Pervasives.compare
end

module S = Set.Make(IntPair)

let () =
  let s =
    S.empty
    |> S.add (1,2)
    |> S.add (3,4)
    |> S.add (0,5)
    |> S.add (3,7)
  in

  S.iter (fun (x,y) ->
    Printf.printf "(%i, %i)\n" x y
  ) s

Output:

(0, 5)
(1, 2)
(3, 4)
(3, 7)

Map and Set provide faster lookup operation than List, O(log n) instead of O(n). However insertion also takes O(log n) time. Generally, Map and Set are preferred if you need to store a lot of data that is not processed linearly (but instead different elements are frequently looked up without specific order).

You can use Map as a kind of functional array. Existing functional array implementation cannot achieve O(1) update time, as their imperative counterparts.