commit 279c421184bfd8ce59f29f903aa269fd2ccf1901
parent 2e6a8ddefd570157e0a148b4ab5d6f4f1ec312ea
Author: Nihal Jere <nihal@nihaljere.xyz>
Date: Tue, 14 Dec 2021 12:47:18 -0600
stack allocated variables
To do this, a few instructions had to be added, and some functions
were added to abstract over retrieving and storing values to a
declaration. A subtract immediate instruction was added to allow
adjusting the stack pointer for stack allocation.
Diffstat:
10 files changed, 186 insertions(+), 58 deletions(-)
diff --git a/elf.c b/elf.c
@@ -1,4 +1,5 @@
#include <elf.h>
+#include <stdbool.h>
#include <stdio.h>
#include "nooc.h"
diff --git a/lex.c b/lex.c
@@ -1,4 +1,5 @@
#include <ctype.h>
+#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
diff --git a/main.c b/main.c
@@ -22,6 +22,8 @@
#include "map.h"
#include "blockstack.h"
+extern struct block *blockstack[BLOCKSTACKSIZE];
+extern size_t blocki;
struct proc *curproc;
const char *const tokenstr[] = {
@@ -46,7 +48,6 @@ const char *const tokenstr[] = {
};
struct map *typesmap;
-struct decls decls;
struct assgns assgns;
struct exprs exprs;
extern struct types types;
@@ -63,10 +64,66 @@ data_push(char *ptr, size_t len)
}
uint64_t
-data_pushint(uint64_t i)
+data_pushzero(size_t len)
{
- array_push((&data_seg), (&i), 8);
- return DATA_OFFSET + data_seg.len - 8;
+ array_zero((&data_seg), len);
+ return DATA_OFFSET + data_seg.len - len;
+}
+
+void
+decl_alloc(struct block *block, struct decl *decl)
+{
+ struct type *type = &types.data[decl->type];
+ switch (decl->kind) {
+ case DECL_DATA:
+ decl->loc.addr = data_pushzero(type->size);
+ break;
+ case DECL_STACK:
+ decl->loc.off = block->datasize;
+ block->datasize += type->size;
+ break;
+ default:
+ die("decl_alloc: unknown decl kind");
+ }
+}
+
+/* FIXME: handle things beside 64-bit integers */
+size_t
+decl_fromreg(char *buf, struct decl *decl, enum reg reg)
+{
+ size_t total = 0;
+ switch (decl->kind) {
+ case DECL_DATA:
+ total += mov_m64_r64(buf ? buf + total : NULL, decl->loc.addr, reg);
+ break;
+ case DECL_STACK:
+ total += mov_disp8_m64_r64(buf, reg, decl->loc.off, RBP);
+ break;
+ default:
+ fprintf(stderr, "%d\n", decl->kind);
+ die("decl_fromreg: unknown decl kind");
+ }
+
+ return total;
+}
+
+/* FIXME: handle things beside 64-bit integers */
+size_t
+decl_toreg(char *buf, enum reg reg, struct decl *decl)
+{
+ size_t total = 0;
+ switch (decl->kind) {
+ case DECL_DATA:
+ total += mov_r64_m64(buf ? buf + total : NULL, reg, decl->loc.addr);
+ break;
+ case DECL_STACK:
+ total += mov_disp8_r64_m64(buf, reg, RBP, decl->loc.off);
+ break;
+ default:
+ die("unknown decl type!");
+ }
+
+ return total;
}
struct block *curitems;
@@ -168,7 +225,7 @@ typecheck(struct block items)
struct type *type;
switch (items.data[i].kind) {
case ITEM_DECL:
- decl = &decls.data[item->idx];
+ decl = &items.decls.data[item->idx];
type = &types.data[decl->type];
switch (type->class) {
case TYPE_I64:
@@ -203,7 +260,7 @@ typecheck(struct block items)
break;
case ITEM_ASSGN:
struct assgn *assgn = &assgns.data[item->idx];
- decl = finddecl(&items, assgn->s);
+ decl = finddecl(assgn->s);
if (decl == NULL)
error(assgn->start->line, assgn->start->col, "unknown name");
@@ -294,9 +351,9 @@ genexpr(char *buf, size_t idx, enum reg reg)
}
freereg(rreg);
} else if (expr->kind == EXPR_IDENT) {
- struct decl *decl = finddecl(curitems, expr->d.s);
+ struct decl *decl = finddecl(expr->d.s);
if (decl != NULL) {
- len += mov_r64_m64(ptr ? ptr + len : ptr, reg, decl->addr);
+ len += decl_toreg(ptr ? ptr + len : NULL, reg, decl);
return len;
}
@@ -304,7 +361,7 @@ genexpr(char *buf, size_t idx, enum reg reg)
if (param != NULL) {
// calculate offset
int8_t offset = paramoffset(&curproc->params, param);
- len += mov_disp8_m64_r64(buf ? buf + len : NULL, reg, RBP, offset);
+ len += mov_disp8_m64_r64(buf ? buf + len : NULL, reg, offset, RBP);
return len;
}
@@ -368,24 +425,24 @@ size_t genproc(char *buf, struct proc *proc);
// FIXME: It is not ideal to calculate length by doing all the calculations to generate instruction, before we actually write the instructions.
size_t
-genblock(char *buf, struct block *block)
+genblock(char *buf, struct block *block, bool toplevel)
{
+ blockpush(block);
size_t total = 0;
for (int i = 0; i < block->len; i++) {
struct item *item = &block->data[i];
if (item->kind == ITEM_EXPR) {
struct expr *expr = &exprs.data[item->idx];
- // FIXME: 7 should not be hardcoded here
if (expr->kind == EXPR_FCALL) {
if (slice_cmplit(&expr->d.call.name, "syscall") == 0) {
total += gensyscall(buf ? buf + total : NULL, expr);
} else {
- struct decl *decl = finddecl(block, expr->d.call.name);
+ struct decl *decl = finddecl(expr->d.call.name);
if (decl == NULL) {
error(expr->start->line, expr->start->col, "unknown function!");
}
- total += gencall(buf ? buf + total : NULL, decl->addr, expr);
+ total += gencall(buf ? buf + total : NULL, decl->loc.addr, expr);
}
} else if (expr->kind == EXPR_COND) {
@@ -394,8 +451,8 @@ genblock(char *buf, struct block *block)
assert(binary->kind == EXPR_BINARY);
enum reg reg = getreg();
total += genexpr(buf ? buf + total : NULL, expr->d.cond.cond, reg);
- size_t iflen = genblock(NULL, &expr->d.cond.bif) + jmp(NULL, 0);
- size_t elselen = genblock(NULL, &expr->d.cond.belse);
+ size_t iflen = genblock(NULL, &expr->d.cond.bif, false) + jmp(NULL, 0);
+ size_t elselen = genblock(NULL, &expr->d.cond.belse, false);
switch (binary->d.op) {
case OP_GREATER:
total += jng(buf ? buf + total : NULL, iflen);
@@ -403,40 +460,41 @@ genblock(char *buf, struct block *block)
default:
error(expr->start->line, expr->start->col, "unknown binop for conditional");
}
- total += genblock(buf ? buf + total : NULL, &expr->d.cond.bif);
+ total += genblock(buf ? buf + total : NULL, &expr->d.cond.bif, false);
total += jmp(buf ? buf + total: NULL, elselen);
- total += genblock(buf ? buf + total : NULL, &expr->d.cond.belse);
+ total += genblock(buf ? buf + total : NULL, &expr->d.cond.belse, false);
} else if (expr->kind == EXPR_LOOP) {
- size_t back = genblock(NULL, &expr->d.loop.block) + jmp(NULL, 0);
- total += genblock(buf ? buf + total : NULL, &expr->d.loop.block);
+ size_t back = genblock(NULL, &expr->d.loop.block, false) + jmp(NULL, 0);
+ total += genblock(buf ? buf + total : NULL, &expr->d.loop.block, false);
total += jmp(buf ? buf + total: NULL, -back);
} else {
error(expr->start->line, expr->start->col, "unhandled toplevel expression type!");
}
} else if (item->kind == ITEM_DECL) {
- struct expr *expr = &exprs.data[decls.data[item->idx].val];
+ struct decl *decl = &block->decls.data[item->idx];
+ struct expr *expr = &exprs.data[decl->val];
+ uint64_t addr;
+ enum reg reg = getreg();
+
+ decl->kind = toplevel ? DECL_DATA : DECL_STACK;
+ decl_alloc(block, decl);
+
switch (expr->class) {
case C_INT:
- // this is sort of an optimization, since we write at compile-time instead of evaluating and storing. should this happen here in the long term?
- if (expr->kind == EXPR_LIT) {
- if (buf)
- decls.data[item->idx].addr = data_pushint(expr->d.v.v.i);
- } else {
- if (buf)
- decls.data[item->idx].addr = data_pushint(0);
- enum reg reg = getreg();
- total += genexpr(buf ? buf + total : NULL, decls.data[item->idx].val, reg);
- total += mov_m64_r64(buf ? buf + total : NULL, decls.data[item->idx].addr, reg);
- freereg(reg);
- }
+ // FIXME: we can store literals at compile time
+ total += genexpr(buf ? buf + total : NULL, block->decls.data[item->idx].val, reg);
+ total += decl_fromreg(buf ? buf + total : NULL, decl, reg);
break;
// FIXME: we assume that any string is a literal, may break if we add binary operands on strings in the future.
case C_STR:
- if (buf)
- decls.data[item->idx].addr = data_pushint(data_push(expr->d.v.v.s.data, expr->d.v.v.s.len));
+ if (buf) {
+ addr = data_push(expr->d.v.v.s.data, expr->d.v.v.s.len);
+ }
+ total += mov_r_imm(buf ? buf + total : NULL, reg, addr);
+ total += decl_fromreg(buf ? buf + total : NULL, decl, reg);
break;
case C_PROC:
- decls.data[item->idx].addr = total + TEXT_OFFSET;
+ block->decls.data[item->idx].loc.addr = total + TEXT_OFFSET;
// FIXME: won't work for nested functions
curproc = &expr->d.proc;
total += genproc(buf ? buf + total : NULL, &(expr->d.proc));
@@ -445,14 +503,17 @@ genblock(char *buf, struct block *block)
default:
error(expr->start->line, expr->start->col, "cannot generate code for unknown expression class");
}
+
+ decl->declared = true;
+ freereg(reg);
} else if (item->kind == ITEM_ASSGN) {
struct expr *expr = &exprs.data[assgns.data[item->idx].val];
struct assgn *assgn = &assgns.data[item->idx];
- struct decl *decl = finddecl(block, assgn->s);
+ struct decl *decl = finddecl(assgn->s);
if (decl == NULL)
error(assgn->start->line, assgn->start->col, "unknown name");
- if (buf && decl->addr == 0)
+ if (!decl->declared)
error(assgn->start->line, assgn->start->col, "assignment before declaration");
switch (expr->class) {
@@ -460,25 +521,27 @@ genblock(char *buf, struct block *block)
// this is sort of an optimization, since we write at compile-time instead of evaluating and storing. should this happen here in the long term?
enum reg reg = getreg();
total += genexpr(buf ? buf + total : NULL, assgn->val, reg);
- total += mov_m64_r64(buf ? buf + total : NULL, decl->addr, reg);
+ total += decl_fromreg(buf ? buf + total : NULL, decl, reg);
freereg(reg);
break;
// FIXME: we assume that any string is a literal, may break if we add binary operands on strings in the future.
case C_STR:
size_t addr = data_push(expr->d.v.v.s.data, expr->d.v.v.s.len);
total += mov_r_imm(buf ? buf + total : NULL, reg, addr);
- total += mov_m64_r64(buf ? buf + total : NULL, decl->addr, reg);
+ total += decl_fromreg(buf ? buf + total : NULL, decl, reg);
break;
default:
error(expr->start->line, expr->start->col, "cannot generate code for unknown expression class");
}
} else if (item->kind == ITEM_RETURN) {
+ total += mov_r64_r64(buf ? buf + total : NULL, RSP, RBP);
total += pop_r64(buf ? buf + total : NULL, RBP);
total += ret(buf ? buf + total : NULL);
} else {
error(item->start->line, item->start->col, "cannot generate code for type");
}
}
+ blockpop();
return total;
}
@@ -490,7 +553,8 @@ genproc(char *buf, struct proc *proc)
total += push_r64(buf ? buf + total : NULL, RBP);
total += mov_r64_r64(buf ? buf + total : NULL, RBP, RSP);
- total += genblock(buf ? buf + total : NULL, &proc->block);
+ total += sub_r64_imm(buf ? buf + total : NULL, RSP, proc->block.datasize + 8);
+ total += genblock(buf ? buf + total : NULL, &proc->block, false);
return total;
}
@@ -532,14 +596,14 @@ main(int argc, char *argv[])
struct block items = parse(head);
typecheck(items);
- size_t len = genblock(NULL, &items);
+ size_t len = genblock(NULL, &items, true);
char *text = malloc(len);
if (!text) {
fprintf(stderr, "text allocation failed!");
return 1;
}
- size_t len2 = genblock(text, &items);
+ size_t len2 = genblock(text, &items, true);
assert(len == len2);
FILE *out = fopen(argv[2], "w");
@@ -549,14 +613,16 @@ main(int argc, char *argv[])
return 1;
}
- struct decl *main = finddecl(&items, (struct slice){4, 4, "main"});
+ blockpush(&items);
+ struct decl *main = finddecl((struct slice){4, 4, "main"});
if (main == NULL) {
fprintf(stderr, "main function not found\n");
return 1;
}
+ blockpop();
munmap(addr, statbuf.st_size);
- elf(main->addr, text, len, data_seg.data, data_seg.len, out);
+ elf(main->loc.addr, text, len, data_seg.data, data_seg.len, out);
fclose(out);
}
diff --git a/nooc.h b/nooc.h
@@ -65,7 +65,6 @@ enum typeclass {
TYPE_PROC,
};
-// FIXME: types should probably be stored in a hash table
struct type {
enum typeclass class;
size_t size;
@@ -107,7 +106,15 @@ struct decl {
struct slice s;
size_t type;
size_t val; // struct exprs
- size_t addr;
+ bool declared;
+ enum {
+ DECL_STACK,
+ DECL_DATA
+ } kind;
+ union {
+ size_t off;
+ size_t addr;
+ } loc;
struct token *start;
};
@@ -135,6 +142,12 @@ struct item {
};
struct block {
+ struct {
+ size_t cap;
+ size_t len;
+ struct decl *data;
+ } decls;
+ size_t datasize;
size_t cap;
size_t len;
struct item *data;
diff --git a/parse.c b/parse.c
@@ -10,10 +10,14 @@
#include "array.h"
#include "type.h"
#include "map.h"
+#include "blockstack.h"
+
+extern struct block *blockstack[BLOCKSTACKSIZE];
+extern size_t blocki;
+extern struct proc *curproc;
extern const char *const tokenstr[];
-extern struct decls decls;
extern struct assgns assgns;
extern struct exprs exprs;
extern struct types types;
@@ -24,12 +28,14 @@ struct token *tok;
static void parsenametypes(struct nametypes *nametypes);
struct decl *
-finddecl(struct block *items, struct slice s)
+finddecl(struct slice s)
{
- for (int i = 0; i < decls.len; i++) {
- struct decl *decl = &(decls.data[i]);
- if (slice_cmp(&s, &decl->s) == 0) {
- return decl;
+ for (int j = blocki - 1; j >= 0; j--) {
+ for (int i = 0; i < blockstack[j]->decls.len; i++) {
+ struct decl *decl = &(blockstack[j]->decls.data[i]);
+ if (slice_cmp(&s, &decl->s) == 0) {
+ return decl;
+ }
}
}
@@ -275,6 +281,7 @@ parseblock()
struct item item;
bool curlies = false;
+ blockpush(&items);
if (tok->type == TOK_LCURLY) {
curlies = true;
tok = tok->next;
@@ -298,14 +305,14 @@ parseblock()
tok = tok->next;
// FIXME: scoping
- if (finddecl(&items, decl.s)) {
+ if (finddecl(decl.s)) {
error(tok->line, tok->col, "repeat declaration!");
}
decl.val = parseexpr();
- array_add((&decls), decl);
+ array_add((&items.decls), decl);
- item.idx = decls.len - 1;
+ item.idx = items.decls.len - 1;
array_add((&items), item);
} else if (tok->type == TOK_RETURN) {
item.kind = ITEM_RETURN;
@@ -335,6 +342,7 @@ parseblock()
tok = tok->next;
}
+ blockpop();
return items;
}
diff --git a/parse.h b/parse.h
@@ -1,3 +1,3 @@
struct block parse(struct token *tok);
-struct decl *finddecl(struct block *items, struct slice s);
struct nametype *findparam(struct nametypes *params, struct slice s);
+struct decl *finddecl(struct slice s);
diff --git a/test/exitwrite.pass.nooc b/test/exitwrite.pass.nooc
@@ -1,4 +1,5 @@
let write proc(i64, str, i64) = proc(fd i64, string str, len i64) {
+ let r1 i64 = 0
syscall(1, fd, string, len)
return
}
@@ -10,5 +11,6 @@ let exit proc(i64) = proc(code i64) {
let main proc = proc {
write(1, "hello", 5)
+ write(1, " world\n", 7)
exit(0)
}
diff --git a/util.c b/util.c
@@ -1,4 +1,5 @@
#include <stdarg.h>
+#include <stdbool.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
diff --git a/x64.c b/x64.c
@@ -1,4 +1,5 @@
#include <assert.h>
+#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
@@ -127,7 +128,22 @@ mov_r64_r64(char *buf, enum reg dest, enum reg src)
}
size_t
-mov_disp8_m64_r64(char *buf, enum reg dest, enum reg src, int8_t disp)
+mov_disp8_m64_r64(char *buf, enum reg dest, int8_t disp, enum reg src)
+{
+ uint8_t mov[] = {0x48, 0x8b};
+ assert(src != 4);
+ if (buf) {
+ memcpy(buf, mov, 2);
+ buf += 2;
+ *(buf++) = (MOD_DISP8 << 6) | (dest << 3) | src;
+ *(buf++) = disp;
+ }
+
+ return 4;
+}
+
+size_t
+mov_disp8_r64_m64(char *buf, enum reg dest, enum reg src, int8_t disp)
{
uint8_t mov[] = {0x48, 0x8b};
assert(src != 4);
@@ -170,6 +186,24 @@ sub_r64_r64(char *buf, enum reg reg1, enum reg reg2)
}
size_t
+sub_r64_imm(char *buf, enum reg dest, int32_t imm)
+{
+ uint8_t mov[] = {0x48, 0x81};
+ uint8_t op1 = (MOD_DIRECT << 6) | (5 << 3) | dest;
+ if (buf) {
+ memcpy(buf, mov, 2);
+ buf += 2;
+ *(buf++) = op1;
+ *(buf++) = imm & 0xFF;
+ *(buf++) = (imm >> 8) & 0xFF;
+ *(buf++) = (imm >> 16) & 0xFF;
+ *(buf++) = (imm >> 24) & 0xFF;
+ }
+
+ return 7;
+}
+
+size_t
cmp_r64_r64(char *buf, enum reg reg1, enum reg reg2)
{
uint8_t mov[] = {0x48, 0x3B};
diff --git a/x64.h b/x64.h
@@ -36,9 +36,11 @@ size_t mov_r_imm(char *buf, enum reg reg, uint64_t imm);
size_t mov_r64_m64(char *buf, enum reg reg, uint64_t addr);
size_t mov_m64_r64(char *buf, uint64_t addr, enum reg reg);
size_t mov_r64_r64(char *buf, enum reg dest, enum reg src);
-size_t mov_disp8_m64_r64(char *buf, enum reg dest, enum reg src, int8_t disp);
+size_t mov_disp8_m64_r64(char *buf, enum reg dest, int8_t disp, enum reg src);
+size_t mov_disp8_r64_m64(char *buf, enum reg dest, enum reg src, int8_t disp);
size_t add_r64_r64(char *buf, enum reg reg1, enum reg reg2);
size_t sub_r64_r64(char *buf, enum reg reg1, enum reg reg2);
+size_t sub_r64_imm(char *buf, enum reg reg1, int32_t imm);
size_t cmp_r64_r64(char *buf, enum reg reg1, enum reg reg2);
size_t jng(char *buf, int64_t offset);
size_t jg(char *buf, int64_t offset);