Binary trees: warm up exercises

type 'a tree =
    Node of ('a * 'a tree * 'a tree)
  | Leaf

Number of internal nodes.

count_internal t returns the number of internal nodes (i.e. non-leaves) in the tree t.

count_internal Leaf  -->  0
count_internal (Node (10, Leaf, Node (20, Leaf, Leaf)))  -->  2
count_internal (Node (10, Leaf, Node (20, Node (15, Leaf, Leaf), Leaf)))  -->  3

 
.         10            10
         /  \          /  \
        .   20        .   20
           /  \          /  \
          .    .        15   .
                       /  \
                      .    .
 

🐤 click to open

let rec count_internal = function
  | Leaf -> 0
  | Node (q, l, r) -> 1 + count_internal l + count_internal r

Height of a tree is the length of the longest simple path from the root to a leaf.

Simple paths don’t allow visit the same node more than once. In a tree it implies that you cannot traverse up and down the tree. Simple paths from a root will always go down, towards the leaves.

height t returns the height of the tree t.

height Leaf  -->  0
height (Node ("cat", Node("bird", Leaf, Leaf), Node ("horse", Node ("dog", Leaf, Leaf), Leaf)))  -->  3

 
  .             cat
              /     \
           bird     horse
           /  \     /   \
          .    .   dog   .
                  /   \
                 .     .
 

🐤 click to open

let rec height = function
  | Leaf -> 0
  | Node (q, l, r) -> 1 + max (height l) (height r)

S-expressions

S-expressions (stands for symbolic expressions) is a format for representing tree- and list-like data. First, they were designed to represent LISP programs source code, and later adopted as a general format for storing structured data.

We define S-expression as either:

  • atom, contiguous string of one or more characters,

  • list of S-expressions that are separated by whitespace and enclosed in parentheses.

Examples:

abc
()
(abc def ghi)
(abc (def ghi) ())
(+ (* 10 20) 30 40)

This is a simplified variant of S-expression, where delimiters are only whitespace,' ', '\t', '\n', '\r', and parentheses '(' and ')'. More advanced variants would also allow, other things, for example, quote-delimited strings "red apple" considered as a single atom. We are not doing that.

In LISP and Scheme entire programs are represented as S-expressions:

(defun square (x)
   (* x x))
(defun factorial (x)
   (if (zerop x)
       1
       (* x (factorial (- x 1)))))

Declaring S-expression type

In OCaml, we can define S-expression as

type expr = A of string | L of expr list

Number of atoms.

count_atoms e returns the number of atoms in S-expression e.

We are going to consider empty lists L [] as atoms.

count_atoms (A "bob")  -->  1
count_atoms (L [A "bob"; L [A "alice"; A "dave"]])  -->  3
bob
(bob (alice dave))

🐤 click to open

let rec count_atoms = function
  | A _ -> 1
  | L [] -> 1  (* count empty lists as atoms *)
  | L ls -> List.fold_left (fun acc e -> acc + count_atoms e) 0 ls

Height of S-expression. Since S-expressions are a form of a tree, we can compute its height.

Count atoms and empty lists as S-expressions of height 0.

height (A "cat")  -->  0
height (L [A "1"; L [A "2"; A "3"; A "4"]])  -->  2

 
  cat           .
               / \
              1   .
                 /|\
                2 3 4
 

🐤 click to open

let rec height = function
  | A _ -> 0
  | L [] -> 0
  | L ls -> 1 + List.fold_left (fun acc e -> max acc (height e)) 0 ls

String of S-expression.

Converting an S-expression into string.

string_of_expr (L [A "1"; L [A "2"; A "3"; A "4"]])  -->  "(1 (2 3 4))"

🐤 click to open

let rec string_of_expr = function
  | A s -> s
  | L ls -> "(" ^ (ls |> List.map string_of_expr |> String.concat " ") ^ ")"

Parsing S-expressions using Opal combinator parser

Parsers are rarely written by hand, instead it is more practical to use parser generators such as Menhir. We will use a parser combinator library Opal.

Here is the code to parse text input into S-expressions (the code might look obscure, since it is using monadic combinators, but allows very concise definition of our parser):

main.ml
open Opal

type expr = A of string | L of expr list

(* parser *)
let atom_char = none_of ['('; ')'; ' '; '\t'; '\n'; '\r']
let atom = (spaces >> many1 atom_char) => implode
let parens = between (token "(") (token ")")

let rec expr input = (atom_expr <|> list_expr) input
and atom_expr input = (atom => (fun s -> (A s))) input
and list_expr input = (parens (many expr) => (fun ls -> (L ls))) input

(* expr to string conversion *)
let rec string_of_expr = function
  | A s -> s
  | L ls -> "(" ^ (ls |> List.map string_of_expr |> String.concat " ") ^ ")"

(* main, reading one S-expression from stdin *)
let () =
  let input = LazyStream.of_channel stdin in
  match parse expr input with
  | None -> Printf.printf "ERROR!\n"
  | Some e -> Printf.printf "Got an S-expression: %s\n" (string_of_expr e)

Opal is a single-file library, so you can just copy opal.ml from its Github repository and drop it in the same folder with this program file, then compile both files, opal.ml and main.ml:

$ ocamlfind ocamlopt -o prog opal.ml main.ml

Test the program:

$ echo "(a b c)" | ./prog
Got an S-expression: (a b c)

$ echo "(a b c (" | ./prog
ERROR!

Interpreting S-expressions as arithmetic formulas

Arithmetic expression evaluation.

Write a function eval that interprets S-expressions as arithmetic formulas with + and * operators and evaluates that formula.

eval (L [A "+"; A "1"; A "2"; L [A "*"; A "3"; A "4"; A "5"]])  -->  63

Explanation: (+ 1 2 (* 3 4 5)) stands for (1 + 2 + (3 * 4 * 5)) = 63.

🐤 click to open

let rec eval = function
  | A s -> int_of_string s
  | L [e] -> eval e
  | L ((A "+") :: tl) ->
      List.fold_left (+) 0 (List.map eval tl)
  | L ((A "*") :: tl) ->
      List.fold_left ( * ) 1 (List.map eval tl)
  | _ -> failwith "unknown operator"

Using this eval function, we can now evaluate the user input we’ve got from the parser:

(* main, reading one S-expression from stdin *)
let () =
  let input = LazyStream.of_channel stdin in
  match parse expr input with
  | None -> Printf.printf "ERROR!\n"
  | Some e ->
      Printf.printf "%s -> %i\n" (string_of_expr e) (eval e)

The program works:

$ echo "(+ 1 2 (* 3 4 5))" | ./prog
(+ 1 2 (* 3 4 5)) -> 63

Drawing S-expression trees using Cairo library

Intro to Cairo

Basic Cairo boilerplate code:

main.ml
let draw filename =

  let surface = Cairo.PDF.create filename ~w:100.0 ~h:100.0 in
  let cr = Cairo.create surface in

  Cairo.set_line_width cr 1.0;

  Cairo.set_source_rgba cr 0.0 0.0 0.0 1.0;
  Cairo.move_to cr 20.0 30.0;
  Cairo.show_text cr "text";
  Cairo.stroke cr;

  Cairo.set_source_rgba cr 0.3 0.3 0.7 1.0;
  Cairo.move_to cr 40.0 50.0;
  Cairo.line_to cr 70.0 80.0;
  Cairo.stroke cr;

  Cairo.Surface.finish surface

let () =
  draw "output.pdf"

Compile with ocamlfind and run:

$ ocamlfind ocamlopt -package cairo2 -linkpkg -o prog main.ml
$ ./prog

It produces output.pdf:

suLSTFe

Rendering S-expression trees

Full program listing:

main.ml
open Opal

type expr = A of string | L of expr list

(* parser *)
let atom_char = none_of ['('; ')'; ' '; '\t'; '\n'; '\r']
let atom = (spaces >> many1 atom_char) => implode
let parens = between (token "(") (token ")")

let rec expr input = (atom_expr <|> list_expr) input
and atom_expr input = (atom => (fun s -> (A s))) input
and list_expr input = (parens (many expr) => (fun ls -> (L ls))) input

(* number of atoms in s-expression *)
let rec count_atoms = function
  | A _ -> 1
  | L [] -> 1  (* count empty lists as atoms *)
  | L ls -> List.fold_left (fun acc e -> acc + count_atoms e) 0 ls

(* height if the tree *)
let rec height = function
  | A _ -> 0
  | L [] -> 0
  | L ls -> 1 + List.fold_left (fun acc e -> max acc (height e)) 0 ls

(* draw a diagram *)
let draw_expr filename e =

  let d = 40.0 in

  (* helper function computing the horizontal length the expression needs *)
  let span e = d *. float (count_atoms e) in

  (* width and height of the PDF file *)
  let w = span e +. 2.0 *. d in
  let h = float (height e + 2) *. d in

  let surface = Cairo.PDF.create filename ~w:w ~h:h in
  let cr = Cairo.create surface in
  Cairo.set_line_width cr 1.0;

  (* helper function drawing a subtree *)
  let rec draw_at (x, y) e =
    match e with
    | A s ->
        Cairo.set_source_rgba cr 0.0 0.0 0.0 1.0;
        let ext = Cairo.text_extents cr s in
        let xx = x -. ext.width *. 0.5 in
        let yy = y +. 0.3 *. d in
        Cairo.move_to cr xx yy;
        Cairo.show_text cr s;
        Cairo.stroke cr;
    | L ls ->
        let child_y = y +. d in
        ignore (List.fold_left (fun left_edge_x child_e ->
          let child_x = left_edge_x +. 0.5 *. span child_e in
          Cairo.set_source_rgba cr 0.3 0.3 0.7 1.0;
          Cairo.move_to cr x y;
          Cairo.line_to cr child_x child_y;
          Cairo.stroke cr;
          draw_at (child_x, child_y) child_e;
          left_edge_x +. span child_e
        ) (x -. (0.5 *. span e)) ls)
  in

  draw_at (w *. 0.5, d) e;

  Cairo.Surface.finish surface

(* main *)
let () =

  let input = LazyStream.of_channel stdin in
  match parse expr input with
  | None -> Printf.printf "ERROR!\n"
  | Some e ->
      draw_expr "output.pdf" e

To compile:

$ ocamlfind ocamlopt -package cairo2 -linkpkg -o prog opal.ml main.ml

Testing on short LISP programs:

square.lisp
(defun square (x)
   (* x x))
$ ./prog < square.lisp
BYcQiFM
factorial.lisp
(defun factorial (x)
   (if (zerop x)
       1
       (* x (factorial (- x 1)))))
$ ./prog < factorial.lisp
eaogDYu