nooc

Unnamed repository; edit this file 'description' to name the repository.
git clone git://git.nihaljere.xyz/nooc
Log | Files | Refs | LICENSE

commit 4b9ba4cbd1a6e420012c76319c7facfce89ed612
parent a19048e596632b05d785590daad0a04d4e4b0bc2
Author: Nihal Jere <nihal@nihaljere.xyz>
Date:   Fri,  3 Dec 2021 15:57:50 -0600

initial if/else

Diffstat:
Mmain.c | 277+++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
Mnooc.h | 22+++++++++++++++++++---
Mprog.nc | 14++++++--------
Mx64.c | 59+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mx64.h | 4++++
5 files changed, 273 insertions(+), 103 deletions(-)

diff --git a/main.c b/main.c @@ -1,6 +1,8 @@ +#include <assert.h> #include <ctype.h> #include <elf.h> #include <fcntl.h> +#include <stdbool.h> #include <string.h> #include <stdint.h> #include <stdio.h> @@ -27,10 +29,22 @@ lex(struct slice start) struct token *cur = head; while (start.len) { - if (isblank(*start.ptr)) { + if (start.len >= 2 && memcmp(start.ptr, "if", 2) == 0) { + cur->type = TOK_IF; + start.ptr += 2; + start.len -= 2; + } else if (start.len >= 4 && memcmp(start.ptr, "else", 4) == 0) { + cur->type = TOK_ELSE; + start.ptr += 4; + start.len -= 4; + } else if (isblank(*start.ptr)) { start.ptr += 1; start.len -= 1; continue; + } else if (*start.ptr == '>') { + cur->type = TOK_GREATER; + start.ptr += 1; + start.len -= 1; } else if (*start.ptr == ',') { cur->type = TOK_COMMA; start.ptr += 1; @@ -43,6 +57,14 @@ lex(struct slice start) cur->type = TOK_RPAREN; start.ptr += 1; start.len -= 1; + } else if (*start.ptr == '{') { + cur->type = TOK_LCURLY; + start.ptr += 1; + start.len -= 1; + } else if (*start.ptr == '}') { + cur->type = TOK_RCURLY; + start.ptr += 1; + start.len -= 1; } else if (isdigit(*start.ptr)) { cur->slice.ptr = start.ptr; cur->slice.len = 1; @@ -135,10 +157,10 @@ data_pushint(uint64_t i) return DATA_OFFSET + data_seg.len - 8; } -struct items *curitems; +struct block *curitems; struct decl * -finddecl(struct items *items, struct slice s) +finddecl(struct block *items, struct slice s) { for (int i = 0; i < decls.len; i++) { struct decl *decl = &(decls.data[i]); @@ -160,6 +182,10 @@ char *exprkind_str(enum exprkind kind) return "EXPR_BINARY"; case EXPR_IDENT: return "EXPR_IDENT"; + case EXPR_FCALL: + return "EXPR_FCALL"; + case EXPR_COND: + return "EXPR_COND"; default: error("invalid exprkind"); } @@ -172,7 +198,7 @@ dumpval(struct expr *e) { switch (e->class) { case C_INT: - fprintf(stderr, "%ld", e->d.v.v.val); + fprintf(stderr, "%ld", e->d.v.v.i); break; case C_STR: { fprintf(stderr, "\"%.*s\"", e->d.v.v.s.len, e->d.v.v.s.ptr); @@ -191,6 +217,9 @@ dumpbinop(enum binop op) case OP_MINUS: fprintf(stderr, "OP_MINUS"); break; + case OP_GREATER: + fprintf(stderr, "OP_GREATER"); + break; default: error("invalid binop"); } @@ -208,24 +237,42 @@ dumpexpr(int indent, struct expr *expr) break; case EXPR_LIT: dumpval(expr); - fprintf(stderr, "\n"); + fputc('\n', stderr); break; case EXPR_BINARY: dumpbinop(expr->d.op); - fprintf(stderr, "\n"); + fputc('\n', stderr); dumpexpr(indent + 8, &exprs.data[expr->left]); dumpexpr(indent + 8, &exprs.data[expr->right]); break; + case EXPR_COND: + dumpexpr(indent + 8, &exprs.data[expr->d.cond.cond]); + break; + case EXPR_FCALL: + fprintf(stderr, "%.*s\n", expr->d.call.name.len, expr->d.call.name.ptr); + break; default: error("dumpexpr: bad expression"); } } +struct block parse(struct token **tok); + size_t parseexpr(struct token **tok) { struct expr expr = { 0 }; switch ((*tok)->type) { + case TOK_IF: + *tok = (*tok)->next; + expr.kind = EXPR_COND; + expr.d.cond.cond = parseexpr(tok); + expr.d.cond.bif = parse(tok); + if ((*tok)->type == TOK_ELSE) { + *tok = (*tok)->next; + expr.d.cond.belse = parse(tok); + } + break; case TOK_LPAREN: *tok = (*tok)->next; size_t ret = parseexpr(tok); @@ -237,6 +284,7 @@ parseexpr(struct token **tok) // a function call if ((*tok)->next && (*tok)->next->type == TOK_LPAREN) { size_t pidx; + expr.d.call.name = (*tok)->slice; *tok = (*tok)->next->next; expr.kind = EXPR_FCALL; @@ -260,9 +308,13 @@ parseexpr(struct token **tok) expr.kind = EXPR_LIT; expr.class = C_INT; // FIXME: error check - expr.d.v.v.val = strtol((*tok)->slice.ptr, NULL, 10); + expr.d.v.v.i = strtol((*tok)->slice.ptr, NULL, 10); *tok = (*tok)->next; break; + case TOK_GREATER: + expr.kind = EXPR_BINARY; + expr.d.op = OP_GREATER; + goto binary_common; case TOK_PLUS: expr.kind = EXPR_BINARY; expr.d.op = OP_PLUS; @@ -293,34 +345,42 @@ binary_common: return exprs.len - 1; } -struct items -parse(struct token *tok) +struct block +parse(struct token **tok) { - struct items items = { 0 }; + struct block items = { 0 }; struct item item; struct token *name; size_t expr; + bool curlies = false; + + if ((*tok)->type == TOK_LCURLY) { + curlies = true; + *tok = (*tok)->next; + } - while (tok->type != TOK_NONE) { + while ((*tok)->type != TOK_NONE && (*tok)->type != TOK_RCURLY) { item = (struct item){ 0 }; - expect(tok, TOK_NAME); - name = tok; - if (tok->next && tok->next->type == TOK_NAME && tok->next->next && tok->next->next->type == TOK_EQUAL) { + if ((*tok)->type != TOK_IF) { + expect((*tok), TOK_NAME); + name = (*tok); + } + if ((*tok)->next && (*tok)->next->type == TOK_NAME && (*tok)->next->next && (*tok)->next->next->type == TOK_EQUAL) { struct decl decl; item.kind = ITEM_DECL; - tok = tok->next; + (*tok) = (*tok)->next; - if (strncmp(tok->slice.ptr, "i64", 3) == 0) { + if (strncmp((*tok)->slice.ptr, "i64", 3) == 0) { decl.type = TYPE_I64; - } else if (strncmp(tok->slice.ptr, "str", 3) == 0) { + } else if (strncmp((*tok)->slice.ptr, "str", 3) == 0) { decl.type = TYPE_STR; } else { error("unknown type"); } - tok = tok->next->next; + (*tok) = (*tok)->next->next; - decl.val = parseexpr(&tok); + decl.val = parseexpr(tok); decl.s = name->slice; array_add((&decls), decl); @@ -328,16 +388,19 @@ parse(struct token *tok) array_add((&items), item); } else { item.kind = ITEM_EXPR; - item.idx = parseexpr(&tok); + item.idx = parseexpr(&(*tok)); array_add((&items), item); } } + if ((*tok)->type == TOK_RCURLY) + *tok = (*tok)->next; + return items; } void -typecheck(struct items items) +typecheck(struct block items) { for (size_t i = 0; i < items.len; i++) { struct expr *expr; @@ -376,7 +439,7 @@ genexpr(char *buf, size_t idx, enum reg reg) if (expr->kind == EXPR_LIT) { switch (expr->class) { case C_INT: - len = mov_r_imm(ptr ? ptr + len : ptr, reg, expr->d.v.v.val); + len = mov_r_imm(ptr ? ptr + len : ptr, reg, expr->d.v.v.i); break; case C_STR: { int addr = data_push(expr->d.v.v.s.ptr, expr->d.v.v.s.len); @@ -387,28 +450,27 @@ genexpr(char *buf, size_t idx, enum reg reg) error("genexpr: unknown value type!"); } } else if (expr->kind == EXPR_BINARY) { + len += genexpr(ptr ? ptr + len : ptr, expr->left, reg); + enum reg rreg = getreg(); + len += genexpr(ptr ? ptr + len : ptr, expr->right, rreg); + switch (expr->d.op) { case OP_PLUS: { - len += genexpr(ptr ? ptr + len : ptr, expr->left, reg); - enum reg rreg = getreg(); - len += genexpr(ptr ? ptr + len : ptr, expr->right, rreg); - len += add_r64_r64(ptr ? ptr + len : ptr, reg, rreg); - freereg(rreg); break; } case OP_MINUS: { - len += genexpr(ptr ? ptr + len : ptr, expr->left, reg); - enum reg rreg = getreg(); - len += genexpr(ptr ? ptr + len : ptr, expr->right, rreg); - len += sub_r64_r64(ptr ? ptr + len : ptr, reg, rreg); - freereg(rreg); + break; + } + case OP_GREATER: { + len += cmp_r64_r64(ptr ? ptr + len : ptr, reg, rreg); break; } default: error("genexpr: unknown binary op!"); } + freereg(rreg); } else if (expr->kind == EXPR_IDENT) { struct decl *decl = finddecl(curitems, expr->d.s); if (decl == NULL) { @@ -428,26 +490,100 @@ gensyscall(char *buf, struct fparams *params) if (params->len > 7) error("syscall can take at most 7 parameters"); - char *ptr = buf; - // encoding for argument registers in ABI order for (int i = 0; i < params->len; i++) { used_reg |= (1 << abi_arg[i]); - len += genexpr(ptr ? ptr + len : ptr, params->data[i], abi_arg[i]); + len += genexpr(buf ? buf + len : NULL, params->data[i], abi_arg[i]); } + // FIXME: what if an abi arg register has already been allocated before this executes? (ex. nested function call) clearreg(); if (buf) { char syscall[] = {0x0f, 0x05}; - memcpy(ptr + len, syscall, 2); + memcpy(buf + len, syscall, 2); } len += 2; - // for now, we assume each mov is 7 bytes encoded, and 2 bytes for syscall return len; } +// FIXME: It is not ideal to calculate length by doing all the calculations to generate instruction, before we actually write the instructions. +size_t +genblock(char *buf, struct block *block) +{ + size_t total = 0; + for (int i = 0; i < block->len; i++) { + struct item *item = &block->data[i]; + if (item->kind == ITEM_EXPR) { + struct expr expr = exprs.data[item->idx]; + // FIXME: 7 should not be hardcoded here + if (expr.kind == EXPR_FCALL) { + if (expr.d.call.name.len == 7 && memcmp(exprs.data[item->idx].d.call.name.ptr, "syscall", 7) == 0) { + total += gensyscall(buf ? buf + total : NULL, &(exprs.data[item->idx].d.call.params)); + } else { + error("unknown function!"); + } + } else if (expr.kind == EXPR_COND) { + struct expr *binary = &exprs.data[expr.d.cond.cond]; + // FIXME this should go away + assert(binary->kind == EXPR_BINARY); + enum reg reg = getreg(); + total += genexpr(buf ? buf + total : NULL, expr.d.cond.cond, reg); + size_t iflen = genblock(NULL, &expr.d.cond.bif); + size_t elselen = genblock(NULL, &expr.d.cond.belse); + switch (binary->d.op) { + case OP_GREATER: + total += jng(buf ? buf + total : NULL, iflen); + break; + default: + error("unknown binop for conditional"); + } + total += genblock(buf ? buf + total : NULL, &expr.d.cond.bif); + total += jmp(buf ? buf + total: NULL, elselen); + total += genblock(buf ? buf + total : NULL, &expr.d.cond.belse); + } else { + error("unhandled toplevel expression type!"); + } + } else if (item->kind == ITEM_DECL) { + struct expr *expr = &exprs.data[decls.data[item->idx].val]; + switch (expr->class) { + case C_INT: + // this is sort of an optimization, since we write at compile-time instead of evaluating and storing. should this happen here in the long term? + if (expr->kind == EXPR_LIT) { + decls.data[item->idx].addr = data_pushint(expr->d.v.v.i); + } else { + decls.data[item->idx].addr = data_pushint(0); + enum reg reg = getreg(); + size_t exprlen = genexpr(NULL, decls.data[item->idx].val, reg); + size_t movlen = mov_m64_r64(NULL, decls.data[item->idx].addr, reg); + char *code = malloc(exprlen + movlen); + if (!code) + error("genexpr malloc failed"); + + genexpr(code, decls.data[item->idx].val, reg); + mov_m64_r64(code + exprlen, decls.data[item->idx].addr, reg); + if (buf) + memcpy(buf, code, exprlen + movlen); + total += exprlen + movlen; + freereg(reg); + } + break; + // FIXME: we assume that any string is a literal, may break if we add binary operands on strings in the future. + case C_STR: + decls.data[item->idx].addr = data_pushint(data_push(expr->d.v.v.s.ptr, expr->d.v.v.s.len)); + break; + default: + error("cannot generate code for unknown expression class"); + } + } else { + error("cannot generate code for type"); + } + } + + return total; +} + struct stat statbuf; int @@ -474,9 +610,18 @@ main(int argc, char *argv[]) close(in); struct token *head = lex((struct slice){addr, statbuf.st_size}); - struct items items = parse(head); + struct token *curtoken = head; + struct block items = parse(&curtoken); typecheck(items); - curitems = &items; + + size_t len = genblock(NULL, &items); + char *text = malloc(len); + if (!text) { + fprintf(stderr, "text allocation failed!"); + return 1; + } + + size_t len2 = genblock(text, &items); FILE *out = fopen(argv[2], "w"); if (!out) { @@ -485,61 +630,9 @@ main(int argc, char *argv[]) return 1; } - struct data text = { 0 }; - - for (int i = 0; i < items.len; i++) { - struct item *item = &items.data[i]; - if (item->kind == ITEM_EXPR) { - // 7 should not be hardcoded here - int len = exprs.data[item->idx].d.call.name.len > 7 ? 7 : exprs.data[item->idx].d.call.name.len; - if (memcmp(exprs.data[item->idx].d.call.name.ptr, "syscall", len) == 0) { - size_t len = gensyscall(NULL, &(exprs.data[item->idx].d.call.params)); - char *fcode = malloc(len); - if (!fcode) - error("gensyscall malloc failed"); - - gensyscall(fcode, &(exprs.data[item->idx].d.call.params)); - array_push((&text), fcode, len); - - free(fcode); - } - } else if (item->kind == ITEM_DECL) { - struct expr *expr = &exprs.data[decls.data[item->idx].val]; - switch (expr->class) { - case C_INT: - // this is sort of an optimization, since we write at compile-time instead of evaluating and storing. should this happen here in the long term? - if (expr->kind == EXPR_LIT) { - decls.data[item->idx].addr = data_pushint(expr->d.v.v.val); - } else { - decls.data[item->idx].addr = data_pushint(0); - enum reg reg = getreg(); - size_t exprlen = genexpr(NULL, decls.data[item->idx].val, reg); - size_t movlen = mov_m64_r64(NULL, decls.data[item->idx].addr, reg); - char *code = malloc(exprlen + movlen); - if (!code) - error("genexpr malloc failed"); - - genexpr(code, decls.data[item->idx].val, reg); - mov_m64_r64(code + exprlen, decls.data[item->idx].addr, reg); - array_push((&text), code, exprlen + movlen); - freereg(reg); - } - break; - // FIXME: we assume that any string is a literal, may break if we add binary operands on strings in the future. - case C_STR: - decls.data[item->idx].addr = data_pushint(data_push(expr->d.v.v.s.ptr, expr->d.v.v.s.len)); - break; - default: - error("cannot generate code for unknown expression class"); - } - } else { - error("cannot generate code for type"); - } - } munmap(addr, statbuf.st_size); - - elf(text.data, text.len, data_seg.data, data_seg.len, out); + elf(text, len, data_seg.data, data_seg.len, out); fclose(out); } diff --git a/nooc.h b/nooc.h @@ -6,15 +6,21 @@ enum tokentype { TOK_LPAREN, TOK_RPAREN, + TOK_LCURLY, + TOK_RCURLY, TOK_PLUS, TOK_MINUS, + TOK_GREATER, TOK_COMMA, TOK_EQUAL, TOK_NUM, TOK_STRING, + + TOK_IF, + TOK_ELSE }; struct slice { @@ -72,20 +78,28 @@ struct item { size_t idx; }; -struct items { +struct block { size_t cap; size_t len; struct item *data; }; +struct cond { + size_t cond; // struct exprs + struct block bif; + struct block belse; +}; + + enum binop { OP_PLUS, OP_MINUS, + OP_GREATER, }; struct value { union { - uint64_t val; + uint64_t i; struct slice s; } v; }; @@ -94,7 +108,8 @@ enum exprkind { EXPR_LIT, EXPR_IDENT, EXPR_BINARY, - EXPR_FCALL + EXPR_FCALL, + EXPR_COND }; enum class { @@ -110,6 +125,7 @@ struct expr { enum binop op; struct slice s; struct fcall call; + struct cond cond; } d; size_t left; size_t right; diff --git a/prog.nc b/prog.nc @@ -1,10 +1,8 @@ -write i64 = 1 -stdout i64 = 0 exit i64 = 60 -len1 i64 = 11 -hello str = "hello " -world str = "world" -len2 i64 = + 3 3 -syscall(write, stdout, hello, len2) -syscall(write, stdout, world, 5) +write i64 = 1 +if > 1 0 { + syscall(write, 0, "hello", 5) +} else { + syscall(write, 0, "world", 5) +} syscall(exit, 0) diff --git a/x64.c b/x64.c @@ -137,3 +137,62 @@ sub_r64_r64(char *buf, enum reg reg1, enum reg reg2) return 3; } + +size_t +cmp_r64_r64(char *buf, enum reg reg1, enum reg reg2) +{ + uint8_t mov[] = {0x48, 0x3B}; + uint8_t op = (MOD_DIRECT << 6) | (reg1 << 3) | reg2; + if (buf) { + memcpy(buf, mov, 2); + buf += 2; + *(buf++) = op; + } + + return 3; +} + +size_t +jng(char *buf, int64_t offset) +{ + if (-256 <= offset && offset <= 255) { + int8_t i = offset; + if (buf) { + *(buf++) = 0x7E; + *(buf++) = i; + } + return 2; + } else { + error("unimplemented jng offet!"); + } +} + +size_t +jg(char *buf, int64_t offset) +{ + if (-256 <= offset && offset <= 255) { + int8_t i = offset; + if (buf) { + *(buf++) = 0x7F; + *(buf++) = i; + } + return 2; + } else { + error("unimplemented jg offet!"); + } +} + +size_t +jmp(char *buf, int64_t offset) +{ + if (-256 <= offset && offset <= 255) { + int8_t i = offset; + if (buf) { + *(buf++) = 0xEB; + *(buf++) = i; + } + return 2; + } else { + error("unimplemented jmp offet!"); + } +} diff --git a/x64.h b/x64.h @@ -37,3 +37,7 @@ size_t mov_r64_m64(char *buf, enum reg reg, uint64_t addr); size_t mov_m64_r64(char *buf, uint64_t addr, enum reg reg); size_t add_r64_r64(char *buf, enum reg reg1, enum reg reg2); size_t sub_r64_r64(char *buf, enum reg reg1, enum reg reg2); +size_t cmp_r64_r64(char *buf, enum reg reg1, enum reg reg2); +size_t jng(char *buf, int64_t offset); +size_t jg(char *buf, int64_t offset); +size_t jmp(char *buf, int64_t offset);