#include "candy.h"
#define REP(n) for(u32 _n = n; _n; _n--)
static u8 (*read_at)(u32);
static u32 type_start, arg_start, cursor;
static int args[1024];
static u32 types[1024], type_count;
u32 arg_count;
int the_type;
u32 the_value;
u8 read(u32 *i) { return read_at((*i)++); }
u32 unleb128(u32 *i) {
for (u32 n = 0, sh = 0;; sh += 7) {
u8 c = read(i);
n += (c & 127) << sh;
if (c < 128) return n;
}
}
int unsleb128(u32 *i) {
int n = 0, sh = 0;
for(;;) {
u8 c = read(i);
n += (c & 127) << sh;
sh += 7;
if (c < 128) return c & 64 ? n - (1 << sh) : n;
}
}
static void parse_fields(u32 *);
static void parse_type(u32 *i) {
switch(unsleb128(i)) {
case -18: parse_type(i); break;
case -19: parse_type(i); break;
case -20: parse_fields(i); break;
case -21: parse_fields(i); break;
default: break;
}
}
static void parse_fields(u32 *i) {
REP(unleb128(i)) unleb128(i), parse_type(i);
}
static void parse_type_table(u32 *i) {
type_count = unleb128(i);
u32 *p = types;
REP(type_count) {
*p++ = *i;
parse_type(i);
}
}
static int skip() {
u32 tmp;
if (the_type >= 0) {
tmp = types[the_type];
the_type = unsleb128(&tmp);
}
if (the_type >= 0) return 1;
switch(the_type) {
case -2: the_value++; break;
case -3: unleb128(&the_value); break;
case -4: unsleb128(&the_value); break;
case -5: the_value++; break;
case -6: the_value+=2; break;
case -7: the_value+=4; break;
case -8: the_value+=8; break;
case -9: the_value++; break;
case -10: the_value+=2; break;
case -11: the_value+=4; break;
case -12: the_value+=8; break;
case -13: the_value+=4; break;
case -14: the_value+=8; break;
case -15: the_value+=unleb128(&the_value); break;
case -24: the_value++; the_value+=unleb128(&the_value); break;
case -18: // opt
the_type = unsleb128(&tmp);
if (read(&the_value) && skip()) return 1;
break;
case -19: // vec
{
int t = unsleb128(&tmp);
u32 n = unleb128(&the_value);
REP(n) {
the_type = t;
if (skip()) return 1;
}
break;
}
case -20: // rec
{
u32 n = unleb128(&tmp);
REP(n) {
unleb128(&tmp);
the_type = unsleb128(&tmp);
if (skip()) return 1;
}
break;
}
case -21: // variant
{
u32 i = 0, n = unleb128(&tmp);
u32 k = unleb128(&the_value);
for (; i < k; i++) unleb128(&tmp), unsleb128(&tmp);
unleb128(&tmp);
the_type = unsleb128(&tmp);
if (skip()) return 1;
for (; i < n; i++) unleb128(&tmp), unsleb128(&tmp);
break;
}
default: break;
}
return 0;
}
int descend(u32 h) {
if (the_type >= 0) {
cursor = types[the_type];
the_type = unsleb128(&cursor);
if (the_type >= 0) return -1;
}
if (the_type != -20) return -2;
u32 n = unleb128(&cursor);
for(u32 i = 0; i < n; i++) {
u32 field_hash = unleb128(&cursor);
the_type = unsleb128(&cursor);
if (field_hash == h) return 0;
if (skip()) return -1;
}
return 1;
}
void candy(u8 (*read_fun)(u32)) {
read_at = read_fun;
cursor = 4;
parse_type_table(&cursor);
arg_count = unleb128(&cursor);
type_start = cursor;
for (u32 k = 0; k < arg_count; k++) args[k] = unsleb128(&cursor);
arg_start = cursor;
}
void set_read_at(u8 (*read_fun)(u32)) {
read_at = read_fun;
}
int seek(u32 k) {
cursor = type_start;
the_type = unsleb128(&cursor);
the_value = arg_start;
REP(k) {
if (skip()) return 1;
the_type = unsleb128(&cursor);
}
return 0;
}
u64 read_u64(u32* p) {
u64 n = 0;
for (u32 i = 0; i < 8; i++) {
u64 x = read(p);
n += x << (i*8);
}
return n;
}
Decoding Candid
Writing one-off routines to pluck certain bytes out of incoming Candid messages gets tougher as the messages grow more complex. It’s time to roll up our sleeves and write functions to help decode Candid messages.
Our design is primitive:
-
Call
candy()
with a pointer to a function that returns the byte of a message at a given offset. -
Call
seek()
to move the read head to a given argument. It seems instead of multiple arguments, canisters prefer to have a single record argument, which is more flexible and extensible, so usually we callseek(0)
. -
At this point,
the_type
is the offset of the type at the read head, andthe_value
is the offset of the value at the read head.
If the read head points to a record type, then call descend()
with a field hash to move the head to a given field. This function returns 0 on success, 1 if the field was not found, and a negative value if an error occurred.
The user needs to know the Candid binary format; our library mainly takes care of storing the type definitions so it can correctly skip over bytes for seek()
and descend()
.
We hardly perform any validation. For example, we simply assume the message starts with "DIDL" and begins at offset 4. We also mangle any (S)LEB128-encoded numbers taking more than 32 bits.