What are [@@deriving compare, sexp_of, hash] for?
Real World OCaml has a dedicated chapter Maps and Hash Tables. It's an excellent tutorial to start with data containers in Jane Street's core
(or Base
), an alternative to the OCaml standard library stdlib
. For both core
and stdlib
, elements need to provide required functions to be put into containers. The tutorial gives examples of elements with hand-written functions and derived functions.
open Core
module Book = struct
module T = struct
type t = { title : string; isbn : string }
let compare t1 t2 =
let cmp_title = String.compare t1.title t2.title in
if cmp_title <> 0 then cmp_title else String.compare t1.isbn t2.isbn
let sexp_of_t t : Sexp.t = List [ Atom t.title; Atom t.isbn ]
end
include T
include Comparator.Make (T)
end
open Core
module Book = struct
module T = struct
type t = { title : string; isbn : string } [@@deriving compare, sexp_of]
end
include T
include Comparator.Make (T)
end
Let's focus in what's inside of T
. In this post, we will figure out one question:
What are [@@deriving compare, sexp_of, hash]
for, as the post title askes?
We will answer this question from the user-code side, and left the explanation from the library-code side for the future.
Deriving Functions
These ppx deriver are janestreet/ppx_compare for equal
and compare
, janestreet/ppx_sexp_conv for sexp_of
and of_sexp
, and janestreet/ppx_hash for hash
and hash_fold
.
A quick way to inspect the deriving result is to change [@@deriving <ppx>]
to [@@deriving_inline <ppx>] [@@@end]
so there we can read the generated code between tags.
The idea of all these function deriver are type-based structural traversal. Don't be disturbed by the variable names in the generated code.
type foo1 = P1 of int * string [@@deriving_inline equal]
let _ = fun (_ : foo1) -> ()
let equal_foo1 =
(fun a__008_ b__009_ ->
if Stdlib.( == ) a__008_ b__009_ then true
else
match (a__008_, b__009_) with
| P1 (_a__010_, _a__012_), P1 (_b__011_, _b__013_) ->
Stdlib.( && )
(equal_int _a__010_ _b__011_)
(equal_string _a__012_ _b__013_)
: foo1 -> foo1 -> bool)
let _ = equal_foo1
[@@@end]
equal
is straightforward. Two values are equal if they are physically equal i.e. at the same memory address. Otherwises, they need to be equal piecewise-ly. Core
doesn't shadow OCaml vanilla standard library Stdlib
.
type foo2 = P2 of int * string [@@deriving_inline compare]
let _ = fun (_ : foo2) -> ()
let compare_foo2 =
(fun a__014_ b__015_ ->
if Stdlib.( == ) a__014_ b__015_ then 0
else
match (a__014_, b__015_) with
| P2 (_a__016_, _a__018_), P2 (_b__017_, _b__019_) -> (
match compare_int _a__016_ _b__017_ with
| 0 -> compare_string _a__018_ _b__019_
| n -> n)
: foo2 -> foo2 -> int)
let _ = compare_foo2
[@@@end]
compare
is similar to equal
. Stdlib.compare : 'a -> 'a -> int
the polymorphic compare is not used in Core
, however, the convertion should be observed: compare x y
returns 0 if x is equal to y, a negative integer if x is less than y, and a positive integer if x is greater than y.
type foo3 = P3 of int * string [@@deriving_inline sexp]
let _ = fun (_ : foo3) -> ()
let foo3_of_sexp =
(let error_source__022_ = "src-ocaml/elements.ml.foo3" in
function
| Sexplib0.Sexp.List
(Sexplib0.Sexp.Atom (("p3" | "P3") as _tag__025_) :: sexp_args__026_) as
_sexp__024_ -> (
match sexp_args__026_ with
| [ arg0__027_; arg1__028_ ] ->
let res0__029_ = int_of_sexp arg0__027_
and res1__030_ = string_of_sexp arg1__028_ in
P3 (res0__029_, res1__030_)
| _ ->
Sexplib0.Sexp_conv_error.stag_incorrect_n_args error_source__022_
_tag__025_ _sexp__024_)
| Sexplib0.Sexp.Atom ("p3" | "P3") as sexp__023_ ->
Sexplib0.Sexp_conv_error.stag_takes_args error_source__022_ sexp__023_
| Sexplib0.Sexp.List (Sexplib0.Sexp.List _ :: _) as sexp__021_ ->
Sexplib0.Sexp_conv_error.nested_list_invalid_sum error_source__022_
sexp__021_
| Sexplib0.Sexp.List [] as sexp__021_ ->
Sexplib0.Sexp_conv_error.empty_list_invalid_sum error_source__022_
sexp__021_
| sexp__021_ ->
Sexplib0.Sexp_conv_error.unexpected_stag error_source__022_ sexp__021_
: Sexplib0.Sexp.t -> foo3)
let _ = foo3_of_sexp
let sexp_of_foo3 =
(fun (P3 (arg0__031_, arg1__032_)) ->
let res0__033_ = sexp_of_int arg0__031_
and res1__034_ = sexp_of_string arg1__032_ in
Sexplib0.Sexp.List [ Sexplib0.Sexp.Atom "P3"; res0__033_; res1__034_ ]
: foo3 -> Sexplib0.Sexp.t)
let _ = sexp_of_foo3
[@@@end]
sexp_of
and of_sexp
are for serialization and deserialization. More details are at RWO Chapter Data Serialization with S-Expressions.
type foo4 = P4 of int * string [@@deriving_inline hash]
let _ = fun (_ : foo4) -> ()
let (hash_fold_foo4 :
Ppx_hash_lib.Std.Hash.state -> foo4 -> Ppx_hash_lib.Std.Hash.state) =
(fun hsv arg ->
match arg with
| P4 (_a0, _a1) ->
let hsv = hsv in
let hsv =
let hsv = hsv in
hash_fold_int hsv _a0
in
hash_fold_string hsv _a1
: Ppx_hash_lib.Std.Hash.state -> foo4 -> Ppx_hash_lib.Std.Hash.state)
let _ = hash_fold_foo4
let (hash_foo4 : foo4 -> Ppx_hash_lib.Std.Hash.hash_value) =
let func arg =
Ppx_hash_lib.Std.Hash.get_hash_value
(let hsv = Ppx_hash_lib.Std.Hash.create () in
hash_fold_foo4 hsv arg)
in
fun x -> func x
let _ = hash_foo4
[@@@end]
Here are the most complex functions. hash_fold
is a state-passing function to perform the hashing payload. hash
wraps hash_fold
by providing an initial hash state via Hash.create ()
and converting the hash result to int
via Hash.get_hash_value
.
It implies if we need a cutsom hash function, implementing hash_fold
to perform hashing and providing the same wrapping hash
here.
Further explanation can be found at doc for Base.Hasher.S and the ppx_hash/design_doc. In short, hash_fold
should take care of not only the values in the structure not also the structure itself, to avoid easily hash collision.
It becomes tricky when a Core
-container is used as an element and this element is intended to be used in a container requiring hash, e.g. putting this Int_set_as_element
in a Hash_set
.
module Int_set_as_element = struct
module T = struct
type t = Set.M(Int).t [@@deriving_inline compare, sexp_of, hash]
let _ = fun (_ : t) -> ()
let compare =
(fun a__035_ b__036_ -> Set.compare_m__t (module Int) a__035_ b__036_
: t -> t -> int)
let _ = compare
let sexp_of_t =
(fun x__037_ -> Set.sexp_of_m__t (module Int) x__037_
: t -> Sexplib0.Sexp.t)
let _ = sexp_of_t
let (hash_fold_t :
Ppx_hash_lib.Std.Hash.state -> t -> Ppx_hash_lib.Std.Hash.state) =
fun hsv arg -> Set.hash_fold_m__t (module Int) hsv arg
and (hash : t -> Ppx_hash_lib.Std.Hash.hash_value) =
let func = Set.hash_m__t (module Int) in
fun x -> func x
let _ = hash_fold_t
and _ = hash
[@@@end]
end
include T
include Comparator.Make (T)
end
let _ = Set.empty (module Int_set_as_element)
let _ = Hash_set.create (module Int_set_as_element)
Both Set.hash_m__t
and Set.hash_fold_m__t
requires a first-class module implementing Hasher.S
(which requires hash_fold_t
). In this example, the module is Int
. Int
is a build-in module containing hash
and hash_fold
. If this is Your_data
rather than Int
, the generated code above for Set.M(Your_data).t
will have Set.hash_m__t (module Your_data)
and Set.hash_fold_m__t (module Your_data)
. Therefore, Your_data
have to provide hash
and hash_fold
functions.
This can explain why Base.Hash_set.Key
contains hash
and no hash_fold
, but ppx_hash
still derives both hash
and hash_fold
.
p.s. I don't claim this design is good.