What are [@@deriving compare, sexp_of, hash] for?

Real World OCaml has a dedicated chapter Maps and Hash Tables. It's an excellent tutorial to start with data containers in Jane Street's core (or Base), an alternative to the OCaml standard library stdlib. For both core and stdlib, elements need to provide required functions to be put into containers. The tutorial gives examples of elements with hand-written functions and derived functions.

  open Core

  module Book = struct
    module T = struct
      type t = { title : string; isbn : string }

      let compare t1 t2 =
        let cmp_title = String.compare t1.title t2.title in
        if cmp_title <> 0 then cmp_title else String.compare t1.isbn t2.isbn

      let sexp_of_t t : Sexp.t = List [ Atom t.title; Atom t.isbn ]
    end

    include T
    include Comparator.Make (T)
  end
  open Core

  module Book = struct
    module T = struct
      type t = { title : string; isbn : string } [@@deriving compare, sexp_of]
    end

    include T
    include Comparator.Make (T)
  end

Let's focus in what's inside of T. In this post, we will figure out one question:

What are [@@deriving compare, sexp_of, hash] for, as the post title askes?

We will answer this question from the user-code side, and left the explanation from the library-code side for the future.

Deriving Functions

These ppx deriver are janestreet/ppx_compare for equal and compare, janestreet/ppx_sexp_conv for sexp_of and of_sexp, and janestreet/ppx_hash for hash and hash_fold.

A quick way to inspect the deriving result is to change [@@deriving <ppx>] to [@@deriving_inline <ppx>] [@@@end] so there we can read the generated code between tags.

The idea of all these function deriver are type-based structural traversal. Don't be disturbed by the variable names in the generated code.

type foo1 = P1 of int * string [@@deriving_inline equal]

let _ = fun (_ : foo1) -> ()

let equal_foo1 =
  (fun a__008_ b__009_ ->
     if Stdlib.( == ) a__008_ b__009_ then true
     else
       match (a__008_, b__009_) with
       | P1 (_a__010_, _a__012_), P1 (_b__011_, _b__013_) ->
           Stdlib.( && )
             (equal_int _a__010_ _b__011_)
             (equal_string _a__012_ _b__013_)
    : foo1 -> foo1 -> bool)

let _ = equal_foo1

[@@@end]

equal is straightforward. Two values are equal if they are physically equal i.e. at the same memory address. Otherwises, they need to be equal piecewise-ly. Core doesn't shadow OCaml vanilla standard library Stdlib.

type foo2 = P2 of int * string [@@deriving_inline compare]

let _ = fun (_ : foo2) -> ()

let compare_foo2 =
  (fun a__014_ b__015_ ->
     if Stdlib.( == ) a__014_ b__015_ then 0
     else
       match (a__014_, b__015_) with
       | P2 (_a__016_, _a__018_), P2 (_b__017_, _b__019_) -> (
           match compare_int _a__016_ _b__017_ with
           | 0 -> compare_string _a__018_ _b__019_
           | n -> n)
    : foo2 -> foo2 -> int)

let _ = compare_foo2

[@@@end]

compare is similar to equal. Stdlib.compare : 'a -> 'a -> int the polymorphic compare is not used in Core, however, the convertion should be observed: compare x y returns 0 if x is equal to y, a negative integer if x is less than y, and a positive integer if x is greater than y.

type foo3 = P3 of int * string [@@deriving_inline sexp]

let _ = fun (_ : foo3) -> ()

let foo3_of_sexp =
  (let error_source__022_ = "src-ocaml/elements.ml.foo3" in
   function
   | Sexplib0.Sexp.List
       (Sexplib0.Sexp.Atom (("p3" | "P3") as _tag__025_) :: sexp_args__026_) as
     _sexp__024_ -> (
       match sexp_args__026_ with
       | [ arg0__027_; arg1__028_ ] ->
           let res0__029_ = int_of_sexp arg0__027_
           and res1__030_ = string_of_sexp arg1__028_ in
           P3 (res0__029_, res1__030_)
       | _ ->
           Sexplib0.Sexp_conv_error.stag_incorrect_n_args error_source__022_
             _tag__025_ _sexp__024_)
   | Sexplib0.Sexp.Atom ("p3" | "P3") as sexp__023_ ->
       Sexplib0.Sexp_conv_error.stag_takes_args error_source__022_ sexp__023_
   | Sexplib0.Sexp.List (Sexplib0.Sexp.List _ :: _) as sexp__021_ ->
       Sexplib0.Sexp_conv_error.nested_list_invalid_sum error_source__022_
         sexp__021_
   | Sexplib0.Sexp.List [] as sexp__021_ ->
       Sexplib0.Sexp_conv_error.empty_list_invalid_sum error_source__022_
         sexp__021_
   | sexp__021_ ->
       Sexplib0.Sexp_conv_error.unexpected_stag error_source__022_ sexp__021_
    : Sexplib0.Sexp.t -> foo3)

let _ = foo3_of_sexp

let sexp_of_foo3 =
  (fun (P3 (arg0__031_, arg1__032_)) ->
     let res0__033_ = sexp_of_int arg0__031_
     and res1__034_ = sexp_of_string arg1__032_ in
     Sexplib0.Sexp.List [ Sexplib0.Sexp.Atom "P3"; res0__033_; res1__034_ ]
    : foo3 -> Sexplib0.Sexp.t)

let _ = sexp_of_foo3

[@@@end]

sexp_of and of_sexp are for serialization and deserialization. More details are at RWO Chapter Data Serialization with S-Expressions.

type foo4 = P4 of int * string [@@deriving_inline hash]

let _ = fun (_ : foo4) -> ()

let (hash_fold_foo4 :
      Ppx_hash_lib.Std.Hash.state -> foo4 -> Ppx_hash_lib.Std.Hash.state) =
  (fun hsv arg ->
     match arg with
     | P4 (_a0, _a1) ->
         let hsv = hsv in
         let hsv =
           let hsv = hsv in
           hash_fold_int hsv _a0
         in
         hash_fold_string hsv _a1
    : Ppx_hash_lib.Std.Hash.state -> foo4 -> Ppx_hash_lib.Std.Hash.state)

let _ = hash_fold_foo4

let (hash_foo4 : foo4 -> Ppx_hash_lib.Std.Hash.hash_value) =
  let func arg =
    Ppx_hash_lib.Std.Hash.get_hash_value
      (let hsv = Ppx_hash_lib.Std.Hash.create () in
       hash_fold_foo4 hsv arg)
  in
  fun x -> func x

let _ = hash_foo4

[@@@end]

Here are the most complex functions. hash_fold is a state-passing function to perform the hashing payload. hash wraps hash_fold by providing an initial hash state via Hash.create () and converting the hash result to int via Hash.get_hash_value. It implies if we need a cutsom hash function, implementing hash_fold to perform hashing and providing the same wrapping hash here.

Further explanation can be found at doc for Base.Hasher.S and the ppx_hash/design_doc. In short, hash_fold should take care of not only the values in the structure not also the structure itself, to avoid easily hash collision.

It becomes tricky when a Core-container is used as an element and this element is intended to be used in a container requiring hash, e.g. putting this Int_set_as_element in a Hash_set.

module Int_set_as_element = struct
  module T = struct
    type t = Set.M(Int).t [@@deriving_inline compare, sexp_of, hash]

    let _ = fun (_ : t) -> ()

    let compare =
      (fun a__035_ b__036_ -> Set.compare_m__t (module Int) a__035_ b__036_
        : t -> t -> int)

    let _ = compare

    let sexp_of_t =
      (fun x__037_ -> Set.sexp_of_m__t (module Int) x__037_
        : t -> Sexplib0.Sexp.t)

    let _ = sexp_of_t

    let (hash_fold_t :
          Ppx_hash_lib.Std.Hash.state -> t -> Ppx_hash_lib.Std.Hash.state) =
     fun hsv arg -> Set.hash_fold_m__t (module Int) hsv arg

    and (hash : t -> Ppx_hash_lib.Std.Hash.hash_value) =
      let func = Set.hash_m__t (module Int) in
      fun x -> func x

    let _ = hash_fold_t
    and _ = hash

    [@@@end]
  end

  include T
  include Comparator.Make (T)
end

let _ = Set.empty (module Int_set_as_element)
let _ = Hash_set.create (module Int_set_as_element)

Both Set.hash_m__t and Set.hash_fold_m__t requires a first-class module implementing Hasher.S (which requires hash_fold_t). In this example, the module is Int. Int is a build-in module containing hash and hash_fold. If this is Your_data rather than Int, the generated code above for Set.M(Your_data).t will have Set.hash_m__t (module Your_data) and Set.hash_fold_m__t (module Your_data). Therefore, Your_data have to provide hash and hash_fold functions.

This can explain why Base.Hash_set.Key contains hash and no hash_fold, but ppx_hash still derives both hash and hash_fold.

p.s. I don't claim this design is good.