Lists

Empty list

[]

List construction operator ::

1 :: 2 :: 3 :: []

This operator is right-associative, so fully-parenthesized, the expression means 1 :: (2 :: (3 :: []))

fQkP4x3

Short-hand syntax

[1; 2; 3]

A few examples demonstrating the short-hand notation and :: operator:

# 1 :: 2 :: 3 :: 4 :: [] ;;
- : int list = [1; 2; 3; 4]

# [1; 2; 3; 4] ;;
- : int list = [1; 2; 3; 4]
# let ls1 = [10; 20; 30] ;;
val ls1 : int list = [10; 20; 30]

# 15 :: ls1 ;;
- : int list = [15; 10; 20; 30]

# 100 :: 99 :: ls1 ;;
- : int list = [100; 99; 10; 20; 30]

Example

Let’s write a program that can classify lists into empty and non-empty, and return the corresponding string:

classify [] --> "empty"
classify [1;2;3] --> "non-empty"

One might use if statement:

let classify ls =
  if ls = [] then
    "empty"
  else
    "non-empty"

But how to access the elements of the list? (See the next section on pattern matching.)

Pattern matching

The inner structure of a composite type (such as list) can be inspected with a pattern matching construct:

let classify ls =
  match ls with
  | [] -> "empty"
  | hd :: tl -> "non-empty"

Pattern matching is like the C and C++ switch statement on steroids, able to inspect the structure of the examined object. In this case, if the hd :: tl pattern matches, then the variables hd and tl get bound to the head and tail values of the inspected list ls. However, in this example, these variables remain unused and only "non-empty" string is returned.

Let’s consider a more complex example.

Example: Length of the list

let rec length ls =
  match ls with
  | [] -> 0
  | hd :: tl -> 1 + length tl

If the list is empty, then its length is 0, otherwise it can be broken down into hd ("head") and tl ("tail") parts, then the total length is the length of the tail plus 1.

Its evaluation goes as follows:

    length [10; 20; 30]
--> 1 + length [20; 30]
--> 1 + (1 + length [30])
--> 1 + (1 + (1 + length []))
--> 1 + (1 + (1 + 0))
--> 1 + (1 + 1)
--> 1 + 2
--> 3

Wildcard pattern _

Wildcard pattern _ matches anything, but does not bind it to any variable:

let rec length ls =
  match ls with
  | [] -> 0
  | _ :: tl -> 1 + length tl

Because the value of the head of the list is not needed for computing length, we can simply put the wildcard _ there, the head value is matched, but no variable is created to store it.

Values can be matched directly

let rec all_ones ls =
  match ls with
  | 1 :: tl -> all_ones tl  (* if the head is 1, check the tail *)
  | _ :: _ -> false
  | [] -> true
all_ones [1; 1; 1; 1; 1] --> true

And you can match any expression except functions, it does not have to be a list:

let rec factorial n =
  match n with
  | 0 -> 1
  | _ -> n * factorial (n-1)
factorial 5 --> 120

The order of cases in pattern-matching

The cases are tried in order from top to bottom, and only the first matched case gets evaluated.

let test ls =
  match ls with
  | h :: t -> "long"
  | _ -> "anything"
  | [] -> "short"
test [] --> "anything"

Exercises

Sum of elements

sum [1; 2; 3] --> 6

🐤 click to open

let rec sum ls =
  match ls with
  | [] -> 0
  | hd::tl -> hd + sum tl

Contains zero

contains_zero [12; 34; 0; 15] --> true

🐤 click to open

let rec contains_zero ls =
  match ls with
  | [] -> false
  | hd::tl ->
      if hd = 0 then
        true
      else
        contains_zero tl
variant without if-then-else
let rec contains_zero ls =
  match ls with
  | [] -> false
  | hd::tl -> (hd = 0) || contains_zero tl
another variant
let rec contains_zero ls =
  match ls with
  | [] -> false
  | 0::tl -> true
  | hd::tl -> contains_zero tl

Contains specified number

contains 81 [77; 81; 15; 82] --> true

🐤 click to open

let rec contains x ls =
  match ls with
  | [] -> false
  | hd::tl -> (hd = x) || contains x tl

Check that list is sorted

is_sorted [5; 10; 15; 21] --> true

🐤 click to open

let rec is_sorted ls =
  match ls with
  | a :: b :: tl -> a <= b && is_sorted (b::tl)
  | a :: [] -> true
  | [] -> true
variant
let rec is_sorted ls =
  match ls with
  | a :: b :: tl -> a <= b && is_sorted (b::tl)
  | [a] -> true   (* [a] is the same as a::[] *)
  | [] -> true
variant with wildcard pattern _
let rec is_sorted ls =
  match ls with
  | a :: b :: tl -> a <= b && is_sorted (b::tl)
  | _ -> true    (* Wildcard pattern _ matches anything *)

Check if list contains two consecutive identical elements

contains_pair [10; 7; 15; 15; 28]--> true

🐤 click to open

let rec contains_pair ls =
  match ls with
  | a :: b :: tl -> (a = b) || contains_pair (b::tl)
  | _ -> false

Increment all elements by 1

increment_all [1; 2; 3; 4; 5] --> [2; 3; 4; 5; 6]

🐤 click to open

let rec increment_all ls =
  match ls with
  | hd :: tl -> (hd + 1) :: increment_all tl
  | [] -> []

Integer parity list (odd or even)

parity [1; 2; 3; 4; 5] --> [true; false; true; false; true]

🐤 click to open

let rec parity ls =
  match ls with
  | hd :: tl -> (hd mod 2 <> 0) :: parity tl
  | [] -> []

Keep only even

keep_even [1; 2; 3; 4; 5] --> [2; 4]

🐤 click to open

let rec keep_even ls =
  match ls with
  | hd :: tl ->
      if hd mod 2 = 0 then
        hd :: keep_even tl
      else
        keep_even tl
  | [] -> []

Insert after or between

insert_after 100 [1; 2; 3] --> [1; 100; 2; 100; 3; 100]

insert_between 100 [1; 2; 3] --> [1; 100; 2; 100; 3]

🐤 click to open

let rec insert_after v ls =
  match ls with
  | hd::tl -> hd :: v :: insert_after v tl
  | [] -> []

let rec insert_between v ls =
  match ls with
  | [] -> []
  | a :: [] -> [a]
  | a :: tl -> a :: v :: insert_between v tl
variant
let rec insert_between v ls =
  match ls with
  | a :: b :: tl -> a :: v :: insert_between v (b::tl)
  | _ -> ls

Interleave two lists

interleave [1; 2; 3] [100; 200; 300] --> [1; 100; 2; 200; 3; 300]

If one of the lists is shorter:

interleave [1; 2] [100; 200; 300; 400] --> [1; 100; 2; 200; 300; 400]
interleave [1; 2; 3; 4] [100; 200] --> [1; 100; 2; 200; 3; 4]

🐤 click to open

let rec interleave ls1 ls2 =
  match ls1 with
  | hd :: tl -> hd :: interleave ls2 tl
  | [] -> ls2

Append and Flatten

append [10; 20] [55; 66] -> [10; 20; 55; 66]

flatten [[1;2;3]; [8;9]; []; [4; 5]] --> [1; 2; 3; 8; 9; 4; 5]

🐤 click to open

let rec append ls1 ls2 =
  match ls1 with
  | hd::tl -> hd :: (append tl ls2)
  | [] -> ls2

let rec flatten ls =
  match ls with
  | a :: b :: tl -> flatten ((append a b) :: tl)
  | a :: [] -> a
  | [] -> []
a more efficient variant of flatten (compare their time complexities)
let rec flatten2 ls =
  match ls with
  | [] :: tl -> flatten2 tl
  | (x::xs) :: tl -> x :: flatten2 (xs :: tl)
  | [] -> []

Local name binding (let …​ in …​)

If your computation requires multiple steps, or it is reusing some values, for clarity and efficiency, you can use local name binding let v = e1 in e2:

Instead of (a - b) * (a - b), one can write

let diff = a - b in diff * diff

or more commonly:

let diff = a - b in
diff * diff

The scope of the name diff is limited only to the following expression diff * diff. A let-in declaration is not possible on its own without a corresponding expression where it is used, and where its scope is.

Evaluation goes as follows (assuming a = 5 and b = 3):

    let diff = 5 - 3 in diff * diff
--> let diff = 2 in diff * diff
--> 2 * 2
--> 4

Consider another example:

let x = 4 + 1 in
let sq = x * x in
10 * (sq - x)

The scope of x is the whole big expression (let sq = x * x in 10 * (sq - x)), which itself makes another local name binding for sq:

let x = 4 + 1 in
  ( let sq = x * x in
      10 * (sq - x) )

Evaluation goes as follows:

    let x = 4 + 1 in
      ( let sq = x * x in
          10 * (sq - x) )

--> let x = 5 in
      ( let sq = x * x in
          10 * (sq - x) )

-->     let sq = 5 * 5 in
          10 * (sq - 5)

-->     let sq = 25 in
          10 * (sq - 5)

-->       10 * (25 - 5)

-->       10 * 20

-->       200

More formally, the evaluation semantics for a local name binding is as follows:

let v = e1 in e2
  • first, expression e1 gets evaluated to its value x

  • then all occurrences of name v in expression e2 get substituted with x, resulting in a new expression e3

  • then the new expression e3 gets evaluated to its value