Decoding Candid

Writing one-off routines to pluck certain bytes out of incoming Candid messages gets tougher as the messages grow more complex. It’s time to roll up our sleeves and write functions to help decode Candid messages.

Our design is primitive:

  1. Call candy() with a pointer to a function that returns the byte of a message at a given offset.

  2. Call seek() to move the read head to a given argument. It seems instead of multiple arguments, canisters prefer to have a single record argument, which is more flexible and extensible, so usually we call seek(0).

  3. At this point, the_type is the offset of the type at the read head, and the_value is the offset of the value at the read head.

If the read head points to a record type, then call descend() with a field hash to move the head to a given field. This function returns 0 on success, 1 if the field was not found, and a negative value if an error occurred.

The user needs to know the Candid binary format; our library mainly takes care of storing the type definitions so it can correctly skip over bytes for seek() and descend().

We hardly perform any validation. For example, we simply assume the message starts with "DIDL" and begins at offset 4. We also mangle any (S)LEB128-encoded numbers taking more than 32 bits.

#include "candy.h"
#define REP(n) for(u32 _n = n; _n; _n--)
static u8 (*read_at)(u32);
static u32 type_start, arg_start, cursor;
static int args[1024];
static u32 types[1024], type_count;
u32 arg_count;
int the_type;
u32 the_value;

u8 read(u32 *i) { return read_at((*i)++); }
u32 unleb128(u32 *i) {
  for (u32 n = 0, sh = 0;; sh += 7) {
    u8 c = read(i);
    n += (c & 127) << sh;
    if (c < 128) return n;
  }
}

int unsleb128(u32 *i) {
  int n = 0, sh = 0;
  for(;;) {
    u8 c = read(i);
    n += (c & 127) << sh;
    sh += 7;
    if (c < 128) return c & 64 ? n - (1 << sh) : n;
  }
}

static void parse_fields(u32 *);
static void parse_type(u32 *i) {
  switch(unsleb128(i)) {
    case -18: parse_type(i); break;
    case -19: parse_type(i); break;
    case -20: parse_fields(i); break;
    case -21: parse_fields(i); break;
    default: break;
  }
}
static void parse_fields(u32 *i) {
  REP(unleb128(i)) unleb128(i), parse_type(i);
}

static void parse_type_table(u32 *i) {
  type_count = unleb128(i);
  u32 *p = types;
  REP(type_count) {
   *p++ = *i;
   parse_type(i);
 }
}

static int skip() {
  u32 tmp;
  if (the_type >= 0) {
    tmp = types[the_type];
    the_type = unsleb128(&tmp);
  }
  if (the_type >= 0) return 1;
  switch(the_type) {
    case -2: the_value++; break;
    case -3: unleb128(&the_value); break;
    case -4: unsleb128(&the_value); break;
    case -5: the_value++; break;
    case -6: the_value+=2; break;
    case -7: the_value+=4; break;
    case -8: the_value+=8; break;
    case -9: the_value++; break;
    case -10: the_value+=2; break;
    case -11: the_value+=4; break;
    case -12: the_value+=8; break;
    case -13: the_value+=4; break;
    case -14: the_value+=8; break;
    case -15: the_value+=unleb128(&the_value); break;
    case -24: the_value++; the_value+=unleb128(&the_value); break;
    case -18:  // opt
      the_type = unsleb128(&tmp);
      if (read(&the_value) && skip()) return 1;
      break;
    case -19:  // vec
      {
      int t = unsleb128(&tmp);
      u32 n = unleb128(&the_value);
      REP(n) {
        the_type = t;
        if (skip()) return 1;
      }
      break;
      }
    case -20:  // rec
      {
      u32 n = unleb128(&tmp);
      REP(n) {
        unleb128(&tmp);
        the_type = unsleb128(&tmp);
        if (skip()) return 1;
      }
      break;
      }
    case -21:  // variant
      {
      u32 i = 0, n = unleb128(&tmp);
      u32 k = unleb128(&the_value);
      for (; i < k; i++) unleb128(&tmp), unsleb128(&tmp);
      unleb128(&tmp);
      the_type = unsleb128(&tmp);
      if (skip()) return 1;
      for (; i < n; i++) unleb128(&tmp), unsleb128(&tmp);
      break;
      }
    default: break;
  }
  return 0;
}

int descend(u32 h) {
  if (the_type >= 0) {
    cursor = types[the_type];
    the_type = unsleb128(&cursor);
    if (the_type >= 0) return -1;
  }
  if (the_type != -20) return -2;
  u32 n = unleb128(&cursor);
  for(u32 i = 0; i < n; i++) {
    u32 field_hash = unleb128(&cursor);
    the_type = unsleb128(&cursor);
    if (field_hash == h) return 0;
    if (skip()) return -1;
  }
  return 1;
}

void candy(u8 (*read_fun)(u32)) {
  read_at = read_fun;
  cursor = 4;
  parse_type_table(&cursor);
  arg_count = unleb128(&cursor);
  type_start = cursor;
  for (u32 k = 0; k < arg_count; k++) args[k] = unsleb128(&cursor);
  arg_start = cursor;
}

void set_read_at(u8 (*read_fun)(u32)) {
  read_at = read_fun;
}

int seek(u32 k) {
  cursor = type_start;
  the_type = unsleb128(&cursor);
  the_value = arg_start;
  REP(k) {
    if (skip()) return 1;
    the_type = unsleb128(&cursor);
  }
  return 0;
}

u64 read_u64(u32* p) {
  u64 n = 0;
  for (u32 i = 0; i < 8; i++) {
    u64 x = read(p);
    n += x << (i*8);
  }
  return n;
}

Ben Lynn 💡