nooc

nooc programming language compiler
git clone git://git.nihaljere.xyz/nooc
Log | Files | Refs | LICENSE

commit 32f55ca8fa60097ffaba66273a4fd64c9299b676
parent e9015e47939be0e2606d416653abdab37fa7bc66
Author: Nihal Jere <nihal@nihaljere.xyz>
Date:   Sun, 12 Dec 2021 23:03:25 -0600

Add types, stack frame construction and consequently procedures

Types are stored in a hashmap, indexed by their blake3 hash, with
an offset as an array as their value. This basically allows us to
ignore the possibility of hash collisions, because blake3 has aribtrary
width.

Type names are stored in a hashmap, indexed by their name, and have
an offset in an array as their value.

This makes it easy to find redundant types, even if they have the
same name. If a new type isn't in the first hashmap, then it is
added and added to the types array. Then an entry in the second
hashmap is created to point to the offset.

This was all done to allow parameter type specification for procedures,
although we don't typecheck them yet.

%rbp and %rsp are used for dealing with stack frames, the same way as
in C. Some work needs to be done to make sure that these are not
overwritten.

This enables actual procedures, so some tests for procedures for
the exit and write syscalls were added.

The map implementation is stolen from cproc (Michael Forney), and
blake3 comes from blake3-tiny (Michael Forney).

Diffstat:
MMakefile | 4++--
Ablake3.c | 175+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ablake3.h | 20++++++++++++++++++++
Ablockstack.c | 35+++++++++++++++++++++++++++++++++++
Mmain.c | 101+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Amap.c | 154+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amap.h | 17+++++++++++++++++
Mnooc.h | 40++++++++++++++++++++++++++++++++++++----
Mparse.c | 126++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
Mparse.h | 1+
Asiphash.c | 174+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/exit.pass.nooc | 8++++++++
Atest/exitwrite.pass.nooc | 14++++++++++++++
Atype.c | 153+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atype.h | 3+++
Mutil.c | 20++++++++++++++++++++
Mutil.h | 2++
Mx64.c | 43+++++++++++++++++++++++++++++++++++++++++++
Mx64.h | 4++++
19 files changed, 1055 insertions(+), 39 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,8 +1,8 @@ .c.o: $(CC) -c $< -o $@ -nooc: main.o array.o util.o x64.o elf.o lex.o parse.o - $(CC) main.o array.o x64.o util.o elf.o lex.o parse.o -o nooc +nooc: main.o array.o util.o x64.o elf.o lex.o parse.o map.o siphash.o type.o blake3.o + $(CC) main.o array.o x64.o util.o elf.o lex.o parse.o map.o siphash.o type.o blake3.o -o nooc clean: rm -f *.o nooc diff --git a/blake3.c b/blake3.c @@ -0,0 +1,175 @@ +/* blake3-tiny by Michael Forney */ + +#include <stdint.h> +#include <string.h> +#include "blake3.h" + +#define CHUNK_START (1u << 0) +#define CHUNK_END (1u << 1) +#define PARENT (1u << 2) +#define ROOT (1u << 3) + +static const uint32_t iv[] = { + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, + 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, +}; + +static void +compress(uint32_t *out, const uint32_t m[static 16], const uint32_t h[static 8], uint64_t t, uint32_t b, uint32_t d) +{ + static const unsigned char s[][16] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8}, + {3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1}, + {10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6}, + {12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4}, + {9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7}, + {11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13}, + }; + uint32_t v[16] = { + h[0], h[1], h[2], h[3], + h[4], h[5], h[6], h[7], + iv[0], iv[1], iv[2], iv[3], + t, t >> 32, b, d, + }; + unsigned i; + +#define G(i, j, a, b, c, d) \ + a = a + b + m[s[i][j * 2]]; \ + d = d ^ a; \ + d = d >> 16 | d << 16; \ + c = c + d; \ + b = b ^ c; \ + b = b >> 12 | b << 20; \ + a = a + b + m[s[i][j * 2 + 1]]; \ + d = d ^ a; \ + d = d >> 8 | d << 24; \ + c = c + d; \ + b = b ^ c; \ + b = b >> 7 | b << 25; + +#define ROUND(i) \ + G(i, 0, v[0], v[4], v[8], v[12]) \ + G(i, 1, v[1], v[5], v[9], v[13]) \ + G(i, 2, v[2], v[6], v[10], v[14]) \ + G(i, 3, v[3], v[7], v[11], v[15]) \ + G(i, 4, v[0], v[5], v[10], v[15]) \ + G(i, 5, v[1], v[6], v[11], v[12]) \ + G(i, 6, v[2], v[7], v[8], v[13]) \ + G(i, 7, v[3], v[4], v[9], v[14]) + + ROUND(0) ROUND(1) ROUND(2) ROUND(3) + ROUND(4) ROUND(5) ROUND(6) + +#undef G +#undef ROUND + + if (d & ROOT) { + for (i = 8; i < 16; ++i) + out[i] = v[i] ^ h[i - 8]; + } + for (i = 0; i < 8; ++i) + out[i] = v[i] ^ v[i + 8]; +} + +static void +load(uint32_t d[static 16], const unsigned char s[static 64]) { + uint32_t *end; + + for (end = d + 16; d < end; ++d, s += 4) { + *d = (uint32_t)s[0] | (uint32_t)s[1] << 8 + | (uint32_t)s[2] << 16 | (uint32_t)s[3] << 24; + } +} + +static void +block(struct blake3 *ctx, const unsigned char *buf) +{ + uint32_t m[16], flags, *cv = ctx->cv; + uint64_t t; + + flags = 0; + switch (ctx->block) { + case 0: flags |= CHUNK_START; break; + case 15: flags |= CHUNK_END; break; + } + load(m, buf); + compress(cv, m, cv, ctx->chunk, 64, flags); + if (++ctx->block == 16) { + ctx->block = 0; + for (t = ++ctx->chunk; (t & 1) == 0; t >>= 1) { + cv -= 8; + compress(cv, cv, iv, 0, 64, PARENT); + } + cv += 8; + memcpy(cv, iv, sizeof(iv)); + } + ctx->cv = cv; +} + +void +blake3_init(struct blake3 *ctx) +{ + ctx->bytes = 0; + ctx->block = 0; + ctx->chunk = 0; + ctx->cv = ctx->cv_buf; + memcpy(ctx->cv, iv, sizeof(iv)); +} + +void +blake3_update(struct blake3 *ctx, const void *buf, size_t len) +{ + const unsigned char *pos = buf; + size_t n; + + if (ctx->bytes) { + n = 64 - ctx->bytes; + if (len < n) + n = len; + memcpy(ctx->input + ctx->bytes, pos, n); + pos += n, len -= n; + ctx->bytes += n; + if (!len) + return; + block(ctx, ctx->input); + } + for (; len > 64; pos += 64, len -= 64) + block(ctx, pos); + ctx->bytes = len; + memcpy(ctx->input, pos, len); +} + +void +blake3_out(struct blake3 *ctx, unsigned char *restrict out, size_t len) +{ + uint32_t flags, b, x, *in, *cv, m[16], root[16]; + size_t i; + + cv = ctx->cv; + memset(ctx->input + ctx->bytes, 0, 64 - ctx->bytes); + load(m, ctx->input); + flags = CHUNK_END; + if (ctx->block == 0) + flags |= CHUNK_START; + if (cv == ctx->cv_buf) { + b = ctx->bytes; + in = m; + } else { + compress(cv, m, cv, ctx->chunk, ctx->bytes, flags); + flags = PARENT; + while ((cv -= 8) != ctx->cv_buf) + compress(cv, cv, iv, 0, 64, flags); + b = 64; + in = cv; + cv = (uint32_t *)iv; + } + flags |= ROOT; + for (i = 0; i < len; ++i, ++out, x >>= 8) { + if ((i & 63) == 0) + compress(root, in, cv, i >> 6, b, flags); + if ((i & 3) == 0) + x = root[i >> 2 & 15]; + *out = x & 0xff; + } +} diff --git a/blake3.h b/blake3.h @@ -0,0 +1,20 @@ +/* blake3-tiny by Michael Forney */ + +#ifndef BLAKE3_H +#define BLAKE3_H 1 + +#include <stdint.h> + +struct blake3 { + unsigned char input[64]; /* current input bytes */ + unsigned bytes; /* bytes in current input block */ + unsigned block; /* block index in chunk */ + uint64_t chunk; /* chunk index */ + uint32_t *cv, cv_buf[54 * 8]; /* chain value stack */ +}; + +void blake3_init(struct blake3 *); +void blake3_update(struct blake3 *, const void *, size_t); +void blake3_out(struct blake3 *, unsigned char *restrict, size_t); + +#endif diff --git a/blockstack.c b/blockstack.c @@ -0,0 +1,35 @@ +#define BLOCKSTACKSIZE 32 +static struct block *blockstack[BLOCKSTACKSIZE]; +static size_t blocki; +static struct proc *curproc; + +static void +blockpush(struct block *block) +{ + if (blocki >= BLOCKSTACKSIZE - 1) + die("blockpush: too many blocks!"); + + blockstack[blocki] = block; + blocki++; +} + +static struct block * +blockpop() +{ + if (blocki == 0) + die("blockpop: cannot pop empty stack!"); + + blocki--; + return blockstack[blocki]; +} + +static struct block * +blockpeek() +{ + if (blocki == 0) + die("blockpop: cannot peek empty stack!"); + + blocki--; + return blockstack[blocki - 1]; +} + diff --git a/main.c b/main.c @@ -18,6 +18,9 @@ #include "elf.h" #include "lex.h" #include "parse.h" +#include "type.h" +#include "map.h" +#include "blockstack.c" const char const *tokenstr[] = { [TOK_NONE] = "TOK_NONE", @@ -40,9 +43,11 @@ const char const *tokenstr[] = { [TOK_RETURN] = "TOK_RETURN", }; +struct map *typesmap; struct decls decls; struct assgns assgns; struct exprs exprs; +extern struct types types; char *infile; @@ -153,10 +158,12 @@ typecheck(struct block items) struct item *item = &items.data[i]; struct expr *expr; struct decl *decl; + struct type *type; switch (items.data[i].kind) { case ITEM_DECL: decl = &decls.data[item->idx]; - switch (decl->type) { + type = &types.data[decl->type]; + switch (type->class) { case TYPE_I64: expr = &exprs.data[decl->val]; // FIXME: we should be able to deal with ident or fcalls @@ -169,6 +176,8 @@ typecheck(struct block items) if (expr->class != C_STR) error(decl->start->line, decl->start->col, "expected string expression for string declaration"); break; + + // !!!!! FIXME: CHECK PARAMETER TYPES !!!!!! case TYPE_PROC: expr = &exprs.data[decl->val]; if (expr->class != C_PROC) @@ -183,7 +192,9 @@ typecheck(struct block items) decl = finddecl(&items, assgn->s); if (decl == NULL) error(assgn->start->line, assgn->start->col, "unknown name"); - switch (decl->type) { + + type = &types.data[decl->type]; + switch (type->class) { case TYPE_I64: expr = &exprs.data[assgn->val]; // FIXME: we should be able to deal with ident or fcalls @@ -208,12 +219,31 @@ typecheck(struct block items) } } + +// type must be in params - probably should come up with better interface +ssize_t +paramoffset(struct nametypes *params, struct nametype *nametype) +{ + ssize_t offset = 0; + struct type *type; + for (size_t i = params->len - 1; i >= 0; i--) { + type = &types.data[params->data[i].type]; + offset += type->size; + if (&params->data[i] == nametype) + break; + } + + // compensate for %rbp push onto stack + return offset + 8; +} + size_t genexpr(char *buf, size_t idx, enum reg reg) { size_t len = 0; char *ptr = buf; struct expr *expr = &exprs.data[idx]; + if (expr->kind == EXPR_LIT) { switch (expr->class) { case C_INT: @@ -251,10 +281,20 @@ genexpr(char *buf, size_t idx, enum reg reg) freereg(rreg); } else if (expr->kind == EXPR_IDENT) { struct decl *decl = finddecl(curitems, expr->d.s); - if (decl == NULL) { - error(expr->start->line, expr->start->col, "unknown name!"); + if (decl != NULL) { + len += mov_r64_m64(ptr ? ptr + len : ptr, reg, decl->addr); + return len; } - len += mov_r64_m64(ptr ? ptr + len : ptr, reg, decl->addr); + + struct nametype *param = findparam(&curproc->params, expr->d.s); + if (param != NULL) { + // calculate offset + int8_t offset = paramoffset(&curproc->params, param); + len += mov_disp8_m64_r64(buf ? buf + len : NULL, reg, RBP, offset); + return len; + } + + error(expr->start->line, expr->start->col, "genexpr: unknown name '%.*s'", expr->d.s.len, expr->d.s.data); } else { error(expr->start->line, expr->start->col, "genexpr: could not generate code for expression"); } @@ -262,6 +302,29 @@ genexpr(char *buf, size_t idx, enum reg reg) } size_t +gencall(char *buf, size_t addr, struct expr *expr) +{ + size_t len = 0; + struct fparams *params = &expr->d.call.params; + if (params->len > 7) + error(expr->start->line, expr->start->col, "syscall can take at most 7 parameters"); + + enum reg reg = getreg(); + + for (int i = 0; i < params->len; i++) { + len += genexpr(buf ? buf + len : NULL, params->data[i], reg); + len += push_r64(buf ? buf + len : NULL, reg); + } + + len += mov_r_imm(buf ? buf + len : NULL, reg, addr); + len += call(buf ? buf + len : NULL, reg); + + freereg(reg); + + return len; +} + +size_t gensyscall(char *buf, struct expr *expr) { size_t len = 0; @@ -287,6 +350,8 @@ gensyscall(char *buf, struct expr *expr) return len; } +size_t genproc(char *buf, struct proc *proc); + // FIXME: It is not ideal to calculate length by doing all the calculations to generate instruction, before we actually write the instructions. size_t genblock(char *buf, struct block *block) @@ -306,10 +371,7 @@ genblock(char *buf, struct block *block) error(expr->start->line, expr->start->col, "unknown function!"); } - enum reg reg = getreg(); - total += mov_r_imm(buf ? buf + total : NULL, reg, decl->addr); - total += call(buf ? buf + total : NULL, reg); - freereg(reg); + total += gencall(buf ? buf + total : NULL, decl->addr, expr); } } else if (expr->kind == EXPR_COND) { @@ -361,7 +423,10 @@ genblock(char *buf, struct block *block) break; case C_PROC: decls.data[item->idx].addr = total + TEXT_OFFSET; - total += genblock(buf ? buf + total : NULL, &(expr->d.proc.block)); + // FIXME: won't work for nested functions + curproc = &expr->d.proc; + total += genproc(buf ? buf + total : NULL, &(expr->d.proc)); + curproc = NULL; break; default: error(expr->start->line, expr->start->col, "cannot generate code for unknown expression class"); @@ -394,6 +459,7 @@ genblock(char *buf, struct block *block) error(expr->start->line, expr->start->col, "cannot generate code for unknown expression class"); } } else if (item->kind == ITEM_RETURN) { + total += pop_r64(buf ? buf + total : NULL, RBP); total += ret(buf ? buf + total : NULL); } else { error(item->start->line, item->start->col, "cannot generate code for type"); @@ -403,6 +469,18 @@ genblock(char *buf, struct block *block) return total; } +size_t +genproc(char *buf, struct proc *proc) +{ + size_t total = 0; + + total += push_r64(buf ? buf + total : NULL, RBP); + total += mov_r64_r64(buf ? buf + total : NULL, RBP, RSP); + total += genblock(buf ? buf + total : NULL, &proc->block); + + return total; +} + struct stat statbuf; int @@ -434,6 +512,9 @@ main(int argc, char *argv[]) } struct token *head = lex((struct slice){statbuf.st_size, statbuf.st_size, addr}); + + typesmap = mkmap(16); + inittypes(); struct block items = parse(head); typecheck(items); diff --git a/map.c b/map.c @@ -0,0 +1,154 @@ +/* +cproc/map.c + +Copyright © 2019 Michael Forney + +Permission to use, copy, modify, and/or distribute this software for any purpose +with or without fee is hereby granted, provided that the above copyright notice +and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. +*/ + +#include <assert.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + +#include "nooc.h" +#include "util.h" +#include "map.h" + +struct map { + size_t len, cap; + struct mapkey *keys; + union mapval *vals; +}; + +static uint64_t +hash(const void *ptr, size_t len) +{ + extern int siphash(const uint8_t *, const size_t, const uint8_t *, uint8_t *, const size_t); + static const uint8_t k[16] = {0}; // XXX: we don't have a way to get entropy in standard C + uint64_t r; + + siphash(ptr, len, k, (uint8_t *)&r, sizeof(r)); + + return r; +} + +void +mapkey(struct mapkey *k, const void *s, size_t n) +{ + k->str = s; + k->len = n; + k->hash = hash(s, n); +} + +struct map * +mkmap(size_t cap) +{ + struct map *h; + size_t i; + + assert(!(cap & (cap - 1))); + h = xmalloc(sizeof(*h)); + h->len = 0; + h->cap = cap; + h->keys = xcalloc(cap, sizeof(h->keys[0])); + h->vals = xcalloc(cap, sizeof(h->vals[0])); + for (i = 0; i < cap; ++i) + h->keys[i].str = NULL; + + return h; +} + +void +delmap(struct map *h, void del(union mapval)) +{ + size_t i; + + if (!h) + return; + if (del) { + for (i = 0; i < h->cap; ++i) { + if (h->keys[i].str) + del(h->vals[i]); + } + } + free(h->keys); + free(h->vals); + free(h); +} + +static bool +keyequal(struct mapkey *k1, struct mapkey *k2) +{ + if (k1->hash != k2->hash || k1->len != k2->len) + return false; + return memcmp(k1->str, k2->str, k1->len) == 0; +} + +static size_t +keyindex(struct map *h, struct mapkey *k) +{ + size_t i; + + i = k->hash & (h->cap - 1); + while (h->keys[i].str && !keyequal(&h->keys[i], k)) + i = (i + 1) & (h->cap - 1); + return i; +} + +union mapval * +mapput(struct map *h, struct mapkey *k) +{ + struct mapkey *oldkeys; + union mapval *oldvals; + size_t i, j, oldcap; + + if (h->cap / 2 < h->len) { + oldkeys = h->keys; + oldvals = h->vals; + oldcap = h->cap; + h->cap *= 2; + h->keys = xcalloc(h->cap, sizeof(h->keys[0])); + h->vals = xcalloc(h->cap, sizeof(h->vals[0])); + for (i = 0; i < h->cap; ++i) + h->keys[i].str = NULL; + for (i = 0; i < oldcap; ++i) { + if (oldkeys[i].str) { + j = keyindex(h, &oldkeys[i]); + h->keys[j] = oldkeys[i]; + h->vals[j] = oldvals[i]; + } + } + free(oldkeys); + free(oldvals); + } + i = keyindex(h, k); + if (!h->keys[i].str) { + h->keys[i] = *k; + h->vals[i].p = NULL; + ++h->len; + } + + return &h->vals[i]; +} + +union mapval +mapget(struct map *h, struct mapkey *k) +{ + size_t i; + + i = keyindex(h, k); + return h->keys[i].str ? h->vals[i] : (union mapval){0}; +} diff --git a/map.h b/map.h @@ -0,0 +1,17 @@ +struct mapkey { + uint64_t hash; + const void *str; + size_t len; +}; + +union mapval { + uint64_t n; + void *p; +}; + +void mapkey(struct mapkey *, const void *, size_t); +struct map *mkmap(size_t); +void delmap(struct map *, void(union mapval)); + +union mapval *mapput(struct map *, struct mapkey *); +union mapval mapget(struct map *, struct mapkey *); diff --git a/nooc.h b/nooc.h @@ -51,13 +51,44 @@ struct fcall { struct fparams params; }; -// FIXME: make a struct for more complex types -enum type { - TYPE_I64, +struct typelist { + size_t cap; + size_t len; + size_t *data; // struct types +}; + +enum typeclass { + TYPE_I64 = 1, TYPE_STR, TYPE_PROC, }; +// FIXME: types should probably be stored in a hash table +struct type { + enum typeclass class; + size_t size; + union { + struct typelist typelist; + } d; +}; + +struct nametype { + struct slice name; + size_t type; // struct types +}; + +struct types { + size_t cap; + size_t len; + struct type *data; +}; + +struct nametypes { + size_t cap; + size_t len; + struct nametype *data; +}; + struct assgn { struct slice s; size_t val; // struct exprs @@ -72,7 +103,7 @@ struct assgns { struct decl { struct slice s; - enum type type; + size_t type; size_t val; // struct exprs size_t addr; struct token *start; @@ -118,6 +149,7 @@ struct loop { }; struct proc { + struct nametypes params; struct block block; }; diff --git a/parse.c b/parse.c @@ -8,15 +8,21 @@ #include "parse.h" #include "util.h" #include "array.h" +#include "type.h" +#include "map.h" extern const char const *tokenstr[]; extern struct decls decls; extern struct assgns assgns; extern struct exprs exprs; +extern struct types types; +extern struct map *typesmap; struct token *tok; +static void parsenametypes(struct nametypes *nametypes); + struct decl * finddecl(struct block *items, struct slice s) { @@ -30,6 +36,20 @@ finddecl(struct block *items, struct slice s) return NULL; } +struct nametype * +findparam(struct nametypes *params, struct slice s) +{ + struct nametype *param; + for (int i = 0; i < params->len; i++) { + param = &(params->data[i]); + if (slice_cmp(&s, &param->name) == 0) { + return param; + } + } + + return NULL; +} + static void expect(enum tokentype type) { @@ -41,13 +61,12 @@ expect(enum tokentype type) } static void -parsestring() +parsestring(struct expr *expr) { - struct expr expr; - expr.start = tok; - expr.kind = EXPR_LIT; - expr.class = C_STR; - expr.d.v.v.s = (struct slice){ 0 }; + expr->start = tok; + expr->kind = EXPR_LIT; + expr->class = C_STR; + expr->d.v.v.s = (struct slice){ 0 }; struct slice str = tok->slice; for (size_t i = 0; i < str.len; i++) { switch (str.data[i]) { @@ -57,11 +76,11 @@ parsestring() switch (str.data[i]) { case 'n': c = '\n'; - array_add((&expr.d.v.v.s), c); + array_add((&expr->d.v.v.s), c); break; case '\\': c = '\\'; - array_add((&expr.d.v.v.s), c); + array_add((&expr->d.v.v.s), c); break; default: error(tok->line, tok->col, "invalid string escape!"); @@ -71,10 +90,10 @@ parsestring() } break; default: - array_add((&expr.d.v.v.s), str.data[i]); + array_add((&expr->d.v.v.s), str.data[i]); } } - tok = tok->next; + tok = tok->next; } static struct block parseblock(); @@ -115,6 +134,8 @@ parseexpr() expr.kind = EXPR_PROC; expr.class = C_PROC; tok = tok->next; + if (tok->type == TOK_LPAREN) + parsenametypes(&expr.d.proc.params); expr.d.proc.block = parseblock(); // a function call } else if (tok->next && tok->next->type == TOK_LPAREN) { @@ -168,7 +189,7 @@ binary_common: expr.class = exprs.data[expr.left].class; break; case TOK_STRING: - parsestring(); + parsestring(&expr); break; default: error(tok->line, tok->col, "invalid token for expression"); @@ -179,6 +200,76 @@ binary_common: return exprs.len - 1; } +static size_t +parsetype() +{ + uint8_t hash[16]; + struct type type = { 0 }; + struct mapkey key; + size_t offset; + union mapval val; + + if (strncmp(tok->slice.data, "proc", 3) == 0) { + size_t subtype; + type.class = TYPE_PROC; + tok = tok->next; + if (tok->type == TOK_LPAREN) { + tok = tok->next; + while (tok->type != TOK_RPAREN) { + expect(TOK_NAME); + subtype = parsetype(); + + array_add((&type.d.typelist), subtype); + + if (tok->type == TOK_RPAREN) + break; + + expect(TOK_COMMA); + tok = tok->next; + } + + tok = tok->next; + } + } else { + mapkey(&key, tok->slice.data, tok->slice.len); + val = mapget(typesmap, &key); + if (!val.n) + error(tok->line, tok->col, "unknown type"); + + tok = tok->next; + return val.n; + } + + return type_put(&type); +} + +static void +parsenametypes(struct nametypes *nametypes) +{ + expect(TOK_LPAREN); + tok = tok->next; + struct nametype nametype; + while (tok->type != TOK_RPAREN) { + nametype = (struct nametype){ 0 }; + + expect(TOK_NAME); + nametype.name = tok->slice; + tok = tok->next; + + nametype.type = parsetype(); + + array_add(nametypes, nametype); + + if (tok->type == TOK_RPAREN) + break; + + expect(TOK_COMMA); + tok = tok->next; + } + + tok = tok->next; +} + static struct block parseblock() { @@ -204,18 +295,7 @@ parseblock() decl.s = tok->slice; tok = tok->next; - expect(TOK_NAME); - if (strncmp(tok->slice.data, "i64", 3) == 0) { - decl.type = TYPE_I64; - } else if (strncmp(tok->slice.data, "str", 3) == 0) { - decl.type = TYPE_STR; - } else if (strncmp(tok->slice.data, "proc", 3) == 0) { - decl.type = TYPE_PROC; - } else { - error(tok->line, tok->col, "unknown type"); - } - - tok = tok->next; + decl.type = parsetype(); expect(TOK_EQUAL); tok = tok->next; diff --git a/parse.h b/parse.h @@ -1,2 +1,3 @@ struct block parse(struct token *tok); struct decl *finddecl(struct block *items, struct slice s); +struct nametype *findparam(struct nametypes *params, struct slice s); diff --git a/siphash.c b/siphash.c @@ -0,0 +1,174 @@ +/* + SipHash reference C implementation + + Copyright (c) 2012-2021 Jean-Philippe Aumasson + <jeanphilippe.aumasson@gmail.com> + Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to> + + To the extent possible under law, the author(s) have dedicated all copyright + and related and neighboring rights to this software to the public domain + worldwide. This software is distributed without any warranty. + + You should have received a copy of the CC0 Public Domain Dedication along + with + this software. If not, see + <http://creativecommons.org/publicdomain/zero/1.0/>. + */ + +#include <assert.h> +#include <stdint.h> +#include <stdio.h> + +/* default: SipHash-2-4 */ +#ifndef cROUNDS +#define cROUNDS 2 +#endif +#ifndef dROUNDS +#define dROUNDS 4 +#endif + +#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) + +#define U32TO8_LE(p, v) \ + (p)[0] = (uint8_t)((v)); \ + (p)[1] = (uint8_t)((v) >> 8); \ + (p)[2] = (uint8_t)((v) >> 16); \ + (p)[3] = (uint8_t)((v) >> 24); + +#define U64TO8_LE(p, v) \ + U32TO8_LE((p), (uint32_t)((v))); \ + U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); + +#define U8TO64_LE(p) \ + (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \ + ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \ + ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \ + ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) + +#define SIPROUND \ + do { \ + v0 += v1; \ + v1 = ROTL(v1, 13); \ + v1 ^= v0; \ + v0 = ROTL(v0, 32); \ + v2 += v3; \ + v3 = ROTL(v3, 16); \ + v3 ^= v2; \ + v0 += v3; \ + v3 = ROTL(v3, 21); \ + v3 ^= v0; \ + v2 += v1; \ + v1 = ROTL(v1, 17); \ + v1 ^= v2; \ + v2 = ROTL(v2, 32); \ + } while (0) + +#ifdef DEBUG +#define TRACE \ + do { \ + printf("(%3zu) v0 %016" PRIx64 "\n", inlen, v0); \ + printf("(%3zu) v1 %016" PRIx64 "\n", inlen, v1); \ + printf("(%3zu) v2 %016" PRIx64 "\n", inlen, v2); \ + printf("(%3zu) v3 %016" PRIx64 "\n", inlen, v3); \ + } while (0) +#else +#define TRACE +#endif + +int siphash(const void *in, const size_t inlen, const void *k, uint8_t *out, + const size_t outlen) { + + const unsigned char *ni = (const unsigned char *)in; + const unsigned char *kk = (const unsigned char *)k; + + assert((outlen == 8) || (outlen == 16)); + uint64_t v0 = UINT64_C(0x736f6d6570736575); + uint64_t v1 = UINT64_C(0x646f72616e646f6d); + uint64_t v2 = UINT64_C(0x6c7967656e657261); + uint64_t v3 = UINT64_C(0x7465646279746573); + uint64_t k0 = U8TO64_LE(kk); + uint64_t k1 = U8TO64_LE(kk + 8); + uint64_t m; + int i; + const unsigned char *end = ni + inlen - (inlen % sizeof(uint64_t)); + const int left = inlen & 7; + uint64_t b = ((uint64_t)inlen) << 56; + v3 ^= k1; + v2 ^= k0; + v1 ^= k1; + v0 ^= k0; + + if (outlen == 16) + v1 ^= 0xee; + + for (; ni != end; ni += 8) { + m = U8TO64_LE(ni); + v3 ^= m; + + TRACE; + for (i = 0; i < cROUNDS; ++i) + SIPROUND; + + v0 ^= m; + } + + switch (left) { + case 7: + b |= ((uint64_t)ni[6]) << 48; + /* FALLTHRU */ + case 6: + b |= ((uint64_t)ni[5]) << 40; + /* FALLTHRU */ + case 5: + b |= ((uint64_t)ni[4]) << 32; + /* FALLTHRU */ + case 4: + b |= ((uint64_t)ni[3]) << 24; + /* FALLTHRU */ + case 3: + b |= ((uint64_t)ni[2]) << 16; + /* FALLTHRU */ + case 2: + b |= ((uint64_t)ni[1]) << 8; + /* FALLTHRU */ + case 1: + b |= ((uint64_t)ni[0]); + break; + case 0: + break; + } + + v3 ^= b; + + TRACE; + for (i = 0; i < cROUNDS; ++i) + SIPROUND; + + v0 ^= b; + + if (outlen == 16) + v2 ^= 0xee; + else + v2 ^= 0xff; + + TRACE; + for (i = 0; i < dROUNDS; ++i) + SIPROUND; + + b = v0 ^ v1 ^ v2 ^ v3; + U64TO8_LE(out, b); + + if (outlen == 8) + return 0; + + v1 ^= 0xdd; + + TRACE; + for (i = 0; i < dROUNDS; ++i) + SIPROUND; + + b = v0 ^ v1 ^ v2 ^ v3; + U64TO8_LE(out + 8, b); + + return 0; +} diff --git a/test/exit.pass.nooc b/test/exit.pass.nooc @@ -0,0 +1,8 @@ +let exit proc(i64) = proc(code i64) { + syscall(60, code) + return +} + +let main proc = proc { + exit(0) +} diff --git a/test/exitwrite.pass.nooc b/test/exitwrite.pass.nooc @@ -0,0 +1,14 @@ +let write proc(i64, str, i64) = proc(fd i64, string str, len i64) { + syscall(1, fd, string, len) + return +} + + +let exit proc(i64) = proc(code i64) { + syscall(60, code) +} + +let main proc = proc { + write(1, "hello", 5) + exit(0) +} diff --git a/type.c b/type.c @@ -0,0 +1,153 @@ +#include <stdbool.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "nooc.h" +#include "util.h" +#include "type.h" +#include "map.h" +#include "blake3.h" +#include "array.h" + +// hashtable based on cproc's map.c + +struct types types; +extern struct map *typesmap; + +static struct typetable { + size_t cap, count; + struct typekey *keys; + size_t *vals; // offsets in struct types types? +} table; + +struct typekey { + bool present; // FIXME: move into bitfield in typetable? + uint8_t hash[16]; +}; + +// should be run after the map is initialized +void +inittypes() +{ + table.cap = 2; + table.keys = xcalloc(2, sizeof(*table.keys)); + table.vals = xcalloc(2, sizeof(*table.vals)); + struct type type = { 0 }; + struct mapkey key = { 0 }; + uint8_t out[16]; + + // first one should be 0 + type_put(&type); + + type.class = TYPE_I64; + type.size = 8; + size_t idx = type_put(&type); + mapkey(&key, "i64", 3); + + mapput(typesmap, &key)->n = idx; + + type.class = TYPE_STR; + idx = type_put(&type); + mapkey(&key, "str", 3); + mapput(typesmap, &key)->n = idx; +} + +static void +hashtype(struct type *type, char *out) +{ + struct blake3 b3; + + blake3_init(&b3); + + blake3_update(&b3, &type->class, sizeof(enum typeclass)); + switch (type->class) { + case TYPE_PROC: + blake3_update(&b3, type->d.typelist.data, type->d.typelist.len * sizeof(*type->d.typelist.data)); + } + + blake3_out(&b3, out, 16); +} + +static size_t +getindex(uint8_t hash[16]) +{ + uint64_t i = hash[0] & (table.cap - 1); + while (table.keys[i].present && memcmp(table.keys[i].hash, hash, 16)) { + i = (i + 1) & (table.cap - 1); + } + + return i; +} + +size_t +type_query(struct type *type) +{ + uint8_t hash[16]; + hashtype(type, hash); + return type_get(hash); +} + +size_t +type_get(uint8_t hash[16]) +{ + size_t i = getindex(hash); + return table.keys[i].present ? table.vals[i] : 0; +} + +size_t +type_put(struct type *type) +{ + struct typekey *oldkeys; + size_t *oldvals, oldcap, i, j; + uint8_t out[16]; + if (table.cap / 2 < table.count) { + oldkeys = table.keys; + oldvals = table.vals; + oldcap = table.cap; + table.cap *= 2; + table.keys = xcalloc(table.cap, sizeof(table.keys[0])); + table.vals = xcalloc(table.cap, sizeof(table.vals[0])); + for (i = 0; i < oldcap; i++) { + if (oldkeys[i].present) { + j = getindex(oldkeys[i].hash); + table.keys[j] = oldkeys[i]; + table.vals[j] = oldvals[i]; + } + } + + free(oldvals); + free(oldkeys); + } + + hashtype(type, out); + i = getindex(out); + if (!table.keys[i].present) { + table.count++; + table.keys[i].present = true; + memcpy(table.keys[i].hash, out, 16); + array_add((&types), (*type)); + table.vals[i] = types.len - 1; + } + + return types.len - 1; +} + +/* +int +main() +{ + struct type i64 = { .class = TYPE_I64 }; + struct type str = { .class = TYPE_STR }; + struct type proc = { .class = TYPE_PROC }; + uint8_t i64hash[16], strhash[16], prochash[16]; + + hashtype(&i64, i64hash); + hashtype(&str, strhash); + hashtype(&proc, prochash); + printf("%x%x\n", *(uint64_t*)i64hash); + printf("%x%x\n", *(uint64_t*)strhash); + printf("%x%x\n", *(uint64_t*)prochash); +} +*/ diff --git a/type.h b/type.h @@ -0,0 +1,3 @@ +size_t type_get(uint8_t hash[16]); +size_t type_put(struct type *type); +void inittypes(); diff --git a/util.c b/util.c @@ -48,3 +48,23 @@ die(char *error) fprintf(stderr, "%s\n", error); exit(1); } + +void * +xmalloc(size_t size) +{ + char *p = malloc(size); + if (!p) + die("malloc failed!"); + + return p; +} + +void * +xcalloc(size_t nelem, size_t elsize) +{ + char *p = calloc(nelem, elsize); + if (!p) + die("calloc failed!"); + + return p; +} diff --git a/util.h b/util.h @@ -2,3 +2,5 @@ int slice_cmp(struct slice *s1, struct slice *s2); int slice_cmplit(struct slice *s1, char *s2); void error(size_t line, size_t col, const char *error, ...); void die(char *error); +void *xmalloc(size_t size); +void *xcalloc(size_t, size_t); diff --git a/x64.c b/x64.c @@ -112,6 +112,35 @@ mov_m64_r64(char *buf, uint64_t addr, enum reg reg) } size_t +mov_r64_r64(char *buf, enum reg dest, enum reg src) +{ + uint8_t mov[] = {0x48, 0x89}; + uint8_t op1 = (MOD_DIRECT << 6) | (src << 3) | dest; + if (buf) { + memcpy(buf, mov, 2); + buf += 2; + *(buf++) = op1; + } + + return 3; +} + +size_t +mov_disp8_m64_r64(char *buf, enum reg dest, enum reg src, int8_t disp) +{ + uint8_t mov[] = {0x48, 0x8b}; + assert(src != 4); + if (buf) { + memcpy(buf, mov, 2); + buf += 2; + *(buf++) = (MOD_DISP8 << 6) | (dest << 3) | src; + *(buf++) = disp; + } + + return 4; +} + +size_t add_r64_r64(char *buf, enum reg reg1, enum reg reg2) { uint8_t mov[] = {0x48, 0x03}; @@ -218,3 +247,17 @@ ret(char *buf) return 1; } + +size_t push_r64(char *buf, enum reg reg) +{ + if (buf) + *(buf++) = 0x50 + reg; + return 1; +} + +size_t pop_r64(char *buf, enum reg reg) +{ + if (buf) + *(buf++) = 0x58 + reg; + return 1; +} diff --git a/x64.h b/x64.h @@ -35,6 +35,8 @@ size_t add_r_imm(char *buf, enum reg reg, uint64_t imm); size_t mov_r_imm(char *buf, enum reg reg, uint64_t imm); size_t mov_r64_m64(char *buf, enum reg reg, uint64_t addr); size_t mov_m64_r64(char *buf, uint64_t addr, enum reg reg); +size_t mov_r64_r64(char *buf, enum reg dest, enum reg src); +size_t mov_disp8_m64_r64(char *buf, enum reg dest, enum reg src, int8_t disp); size_t add_r64_r64(char *buf, enum reg reg1, enum reg reg2); size_t sub_r64_r64(char *buf, enum reg reg1, enum reg reg2); size_t cmp_r64_r64(char *buf, enum reg reg1, enum reg reg2); @@ -43,3 +45,5 @@ size_t jg(char *buf, int64_t offset); size_t jmp(char *buf, int64_t offset); size_t call(char *buf, enum reg reg); size_t ret(char *buf); +size_t push_r64(char *buf, enum reg reg); +size_t pop_r64(char *buf, enum reg reg);