commit 2547675fd2db30876c61d5b71aa25d95c20a78f9
parent 9797b0e5dbf37f729865a3b22b10e0981fb5dcf3
Author: Nihal Jere <nihal@nihaljere.xyz>
Date: Sat, 18 Dec 2021 14:12:08 -0600
return 1 value from a procedure
Instead of genexpr taking a register as output, it now takes a
struct out, which let's us abstract over storing to a register and
memory. The address to store the output to (a decl's address) is
given in the struct out when we do an fcall, otherwise a register
is given, because that's what we do for all other expression types.
This makes code generation much less efficient: unnecessary registers
are allocated all over the place, and essentially 2 movs each for
what should be 1 mov.
Instructions were added which store and load from memory whose address
is specified in a register.
Diffstat:
6 files changed, 145 insertions(+), 83 deletions(-)
diff --git a/main.c b/main.c
@@ -126,6 +126,46 @@ decl_toreg(char *buf, enum reg reg, struct decl *decl)
return total;
}
+size_t
+out_write(char *buf, struct out *out, enum reg reg)
+{
+ size_t total = 0;
+ switch (out->kind) {
+ case OUT_ADDR:
+ total += mov_mr64_r64(buf ? buf + total : buf, out->reg, reg);
+ break;
+ case OUT_REG:
+ total += mov_r64_r64(buf ? buf + total : buf, out->reg, reg);
+ break;
+ case OUT_IGNORE:
+ break;
+ default:
+ die("unhandled OUT_*");
+ }
+
+ return total;
+}
+
+size_t
+out_read(char *buf, enum reg reg, struct out *out)
+{
+ size_t total = 0;
+ switch (out->kind) {
+ case OUT_ADDR:
+ total += mov_r64_mr64(buf ? buf + total : buf, reg, out->reg);
+ break;
+ case OUT_REG:
+ total += mov_r64_r64(buf ? buf + total : buf, reg, out->reg);
+ break;
+ case OUT_IGNORE:
+ break;
+ default:
+ die("unhandled OUT_*");
+ }
+
+ return total;
+}
+
struct block *curitems;
char *exprkind_str(enum exprkind kind)
@@ -291,25 +331,7 @@ typecheck(struct block items)
}
}
-
-// type must be in params - probably should come up with better interface
-ssize_t
-paramoffset(struct nametypes *params, struct nametype *nametype)
-{
- ssize_t offset = 0;
- struct type *type;
- for (size_t i = params->len - 1; i >= 0; i--) {
- type = &types.data[params->data[i].type];
- offset += type->size;
- if (¶ms->data[i] == nametype)
- break;
- }
-
- // compensate for %rbp push onto stack
- return offset + 8;
-}
-
-size_t genexpr(char *buf, size_t idx, enum reg reg);
+size_t genexpr(char *buf, size_t idx, struct out *out);
size_t genproc(char *buf, struct proc *proc);
size_t genblock(char *buf, struct block *block, bool toplevel);
@@ -321,27 +343,28 @@ gencall(char *buf, size_t addr, struct expr *expr)
if (params->len > 7)
error(expr->start->line, expr->start->col, "syscall can take at most 7 parameters");
- enum reg reg = getreg();
+ struct out out = {OUT_REG, getreg()};
for (int i = 0; i < params->len; i++) {
- len += genexpr(buf ? buf + len : NULL, params->data[i], reg);
- len += push_r64(buf ? buf + len : NULL, reg);
+ len += genexpr(buf ? buf + len : NULL, params->data[i], &out);
+ len += push_r64(buf ? buf + len : NULL, out.reg);
}
- len += mov_r64_imm(buf ? buf + len : NULL, reg, addr);
- len += call(buf ? buf + len : NULL, reg);
+ len += mov_r64_imm(buf ? buf + len : NULL, out.reg, addr);
+ len += call(buf ? buf + len : NULL, out.reg);
- freereg(reg);
+ freereg(out.reg);
return len;
}
size_t
-gensyscall(char *buf, struct expr *expr, enum reg reg)
+gensyscall(char *buf, struct expr *expr, struct out *out)
{
unsigned short pushed = 0;
size_t len = 0;
struct fparams *params = &expr->d.call.params;
+ struct out tempout = { .kind = OUT_REG };
if (params->len > 7)
error(expr->start->line, expr->start->col, "syscall can take at most 7 parameters");
@@ -353,7 +376,8 @@ gensyscall(char *buf, struct expr *expr, enum reg reg)
} else {
used_reg |= (1 << abi_arg[i]);
}
- len += genexpr(buf ? buf + len : NULL, params->data[i], abi_arg[i]);
+ tempout.reg = abi_arg[i];
+ len += genexpr(buf ? buf + len : NULL, params->data[i], &tempout);
}
if (buf) {
@@ -362,11 +386,10 @@ gensyscall(char *buf, struct expr *expr, enum reg reg)
}
len += 2;
- if (reg != RAX)
- len += mov_r64_r64(buf ? buf + len : NULL, reg, RAX);
+ len += out_write(buf ? buf + len : NULL, out, RAX);
for (int i = params->len - 1; i >= 0; i--) {
- if (pushed & (1 << abi_arg[i]) && abi_arg[i] != reg) {
+ if (pushed & (1 << abi_arg[i]) && (out->kind != OUT_REG || abi_arg[i] != out->reg)) {
len += pop_r64(buf ? buf + len : NULL, abi_arg[i]);
} else {
freereg(abi_arg[i]);
@@ -377,65 +400,68 @@ gensyscall(char *buf, struct expr *expr, enum reg reg)
}
size_t
-genexpr(char *buf, size_t idx, enum reg reg)
+genexpr(char *buf, size_t idx, struct out *out)
{
size_t total = 0;
struct expr *expr = &exprs.data[idx];
if (expr->kind == EXPR_LIT) {
+ enum reg reg = getreg();
switch (expr->class) {
case C_INT:
- total = mov_r64_imm(buf ? buf + total : buf, reg, expr->d.v.v.i);
+ total += mov_r64_imm(buf ? buf + total : buf, reg, expr->d.v.v.i);
break;
case C_STR: {
int addr = data_push(expr->d.v.v.s.data, expr->d.v.v.s.len);
- total = mov_r64_imm(buf ? buf + total : buf, reg, addr);
+ total += mov_r64_imm(buf ? buf + total : buf, reg, addr);
break;
}
default:
error(expr->start->line, expr->start->col, "genexpr: unknown value type!");
}
+
+ total += out_write(buf ? buf + total : NULL, out, reg);
+ freereg(reg);
+
} else if (expr->kind == EXPR_BINARY) {
- total += genexpr(buf ? buf + total : buf, expr->left, reg);
- enum reg rreg = getreg();
- total += genexpr(buf ? buf + total : buf, expr->right, rreg);
+ total += genexpr(buf ? buf + total : buf, expr->left, out);
+ struct out out2 = { OUT_REG, getreg() };
+ total += genexpr(buf ? buf + total : buf, expr->right, &out2);
+
+ enum reg reg = getreg();
+
+ total += out_read(buf ? buf + total : buf, reg, out);
switch (expr->d.op) {
case OP_PLUS: {
- total += add_r64_r64(buf ? buf + total : buf, reg, rreg);
+ total += add_r64_r64(buf ? buf + total : buf, reg, out2.reg);
break;
}
case OP_MINUS: {
- total += sub_r64_r64(buf ? buf + total : buf, reg, rreg);
+ total += sub_r64_r64(buf ? buf + total : buf, reg, out2.reg);
break;
}
case OP_GREATER: {
- total += cmp_r64_r64(buf ? buf + total : buf, reg, rreg);
+ total += cmp_r64_r64(buf ? buf + total : buf, reg, out2.reg);
break;
}
default:
error(expr->start->line, expr->start->col, "genexpr: unknown binary op!");
}
- freereg(rreg);
+ freereg(out2.reg);
+
+ freereg(reg);
} else if (expr->kind == EXPR_IDENT) {
struct decl *decl = finddecl(expr->d.s);
- if (decl != NULL) {
- total += decl_toreg(buf ? buf + total : NULL, reg, decl);
- return total;
- }
-
- struct nametype *param = findparam(&curproc->in, expr->d.s);
- if (param != NULL) {
- // calculate offset
- int8_t offset = paramoffset(&curproc->in, param);
- total += mov_disp8_m64_r64(buf ? buf + total : NULL, reg, offset, RBP);
- return total;
+ if (decl == NULL) {
+ error(expr->start->line, expr->start->col, "genexpr: unknown name '%.*s'", expr->d.s.len, expr->d.s.data);
}
+ total += decl_toreg(buf ? buf + total : NULL, out->reg, decl);
+ return total;
- error(expr->start->line, expr->start->col, "genexpr: unknown name '%.*s'", expr->d.s.len, expr->d.s.data);
} else if (expr->kind == EXPR_FCALL) {
if (slice_cmplit(&expr->d.call.name, "syscall") == 0) {
- total += gensyscall(buf ? buf + total : NULL, expr, reg);
+ total += gensyscall(buf ? buf + total : NULL, expr, out);
} else {
struct decl *decl = finddecl(expr->d.call.name);
if (decl == NULL) {
@@ -448,9 +474,9 @@ genexpr(char *buf, size_t idx, enum reg reg)
struct expr *binary = &exprs.data[expr->d.cond.cond];
// FIXME this should go away
assert(binary->kind == EXPR_BINARY);
- enum reg reg = getreg();
- total += genexpr(buf ? buf + total : NULL, expr->d.cond.cond, reg);
- freereg(reg);
+ struct out tempout = {OUT_REG, getreg()};
+ total += genexpr(buf ? buf + total : NULL, expr->d.cond.cond, &tempout);
+ freereg(tempout.reg);
size_t iflen = genblock(NULL, &expr->d.cond.bif, false) + jmp(NULL, 0);
size_t elselen = genblock(NULL, &expr->d.cond.belse, false);
switch (binary->d.op) {
@@ -482,13 +508,11 @@ genblock(char *buf, struct block *block, bool toplevel)
for (int i = 0; i < block->len; i++) {
struct item *item = &block->data[i];
if (item->kind == ITEM_EXPR) {
- enum reg reg = getreg();
- total += genexpr(buf ? buf + total : NULL, item->idx, reg);
- freereg(reg);
+ struct out tempout = {OUT_IGNORE};
+ total += genexpr(buf ? buf + total : NULL, item->idx, &tempout);
} else if (item->kind == ITEM_DECL) {
struct decl *decl = &block->decls.data[item->idx];
struct expr *expr = &exprs.data[decl->val];
- enum reg reg;
decl->kind = toplevel ? DECL_DATA : DECL_STACK;
decl_alloc(block, decl);
@@ -500,10 +524,10 @@ genblock(char *buf, struct block *block, bool toplevel)
total += genproc(buf ? buf + total : NULL, &(expr->d.proc));
curproc = NULL;
} else {
- reg = getreg();
- total += genexpr(buf ? buf + total : NULL, block->decls.data[item->idx].val, reg);
- total += decl_fromreg(buf ? buf + total : NULL, decl, reg);
- freereg(reg);
+ struct out tempout = {OUT_REG, getreg()};
+ total += genexpr(buf ? buf + total : NULL, block->decls.data[item->idx].val, &tempout);
+ total += decl_fromreg(buf ? buf + total : NULL, decl, tempout.reg);
+ freereg(tempout.reg);
}
decl->declared = true;
@@ -511,30 +535,30 @@ genblock(char *buf, struct block *block, bool toplevel)
struct expr *expr = &exprs.data[assgns.data[item->idx].val];
struct assgn *assgn = &assgns.data[item->idx];
struct decl *decl = finddecl(assgn->s);
- enum reg reg;
+ struct out tempout = {OUT_REG};
if (decl == NULL)
error(assgn->start->line, assgn->start->col, "unknown name");
if (!decl->declared)
error(assgn->start->line, assgn->start->col, "assignment before declaration");
+ tempout.reg = getreg();
switch (expr->class) {
case C_INT:
// this is sort of an optimization, since we write at compile-time instead of evaluating and storing. should this happen here in the long term?
- reg = getreg();
- total += genexpr(buf ? buf + total : NULL, assgn->val, reg);
- total += decl_fromreg(buf ? buf + total : NULL, decl, reg);
- freereg(reg);
+ total += genexpr(buf ? buf + total : NULL, assgn->val, &tempout);
+ total += decl_fromreg(buf ? buf + total : NULL, decl, tempout.reg);
break;
// FIXME: we assume that any string is a literal, may break if we add binary operands on strings in the future.
case C_STR:
size_t addr = data_push(expr->d.v.v.s.data, expr->d.v.v.s.len);
- total += mov_r64_imm(buf ? buf + total : NULL, reg, addr);
- total += decl_fromreg(buf ? buf + total : NULL, decl, reg);
+ total += mov_r64_imm(buf ? buf + total : NULL, tempout.reg, addr);
+ total += decl_fromreg(buf ? buf + total : NULL, decl, tempout.reg);
break;
default:
error(expr->start->line, expr->start->col, "cannot generate code for unknown expression class");
}
+ freereg(tempout.reg);
} else if (item->kind == ITEM_RETURN) {
total += mov_r64_r64(buf ? buf + total : NULL, RSP, RBP);
total += pop_r64(buf ? buf + total : NULL, RBP);
diff --git a/nooc.h b/nooc.h
@@ -115,7 +115,7 @@ struct decl {
DECL_DATA
} kind;
union {
- size_t off;
+ int64_t off;
size_t addr;
} loc;
struct token *start;
@@ -223,3 +223,12 @@ struct exprs {
size_t len;
struct expr *data;
};
+
+struct out {
+ enum {
+ OUT_REG,
+ OUT_ADDR,
+ OUT_IGNORE,
+ } kind;
+ int reg;
+};
diff --git a/parse.c b/parse.c
@@ -148,12 +148,12 @@ parseexpr(struct block *block)
if (tok->type == TOK_LPAREN)
parsenametypes(&expr.d.proc.out);
expr.d.proc.block = parseblock();
- for (size_t i = 0; i < expr.d.proc.in.len; i++) {
+ for (int i = expr.d.proc.in.len - 1; i >= 0; i--) {
decl.s = expr.d.proc.in.data[i].name;
decl.type = expr.d.proc.in.data[i].type;
type = &types.data[decl.type];
offset += type->size;
- decl.loc.off = offset;
+ decl.loc.off = -offset - 8;
array_add((&block->decls), decl);
}
@@ -163,7 +163,7 @@ parseexpr(struct block *block)
decl.declared = true;
type = &types.data[decl.type];
offset += type->size;
- decl.loc.off = offset;
+ decl.loc.off = -offset;
array_add((&block->decls), decl);
}
// a function call
diff --git a/test/exitwrite.pass.nooc b/test/exitwrite.pass.nooc
@@ -10,7 +10,10 @@ let exit proc(i64) = proc(code i64) {
let main proc() = proc() {
let ret i64 = 0
- ret = write(1, "hello", 5)
- ret = write(1, " world\n", 7)
- exit(0)
+ ret = write(1, "hello\n", 6)
+ if > ret 0 {
+ exit(0)
+ } else {
+ exit(1)
+ }
}
diff --git a/x64.c b/x64.c
@@ -71,9 +71,9 @@ size_t
mov_r64_imm(char *buf, enum reg dest, uint64_t imm)
{
if (buf) {
- *(buf++) = REX_W;
+ *(buf++) = REX_W | (dest >= 8 ? REX_B : 0);
*(buf++) = 0xc7;
- *(buf++) = (MOD_DIRECT << 6) | dest;
+ *(buf++) = (MOD_DIRECT << 6) | (dest & 0x7);
*(buf++) = imm & 0xFF;
*(buf++) = (imm >> 8) & 0xFF;
*(buf++) = (imm >> 16) & 0xFF;
@@ -88,9 +88,9 @@ mov_r64_m64(char *buf, enum reg dest, uint64_t addr)
{
uint8_t sib = 0x25;
if (buf) {
- *(buf++) = REX_W;
+ *(buf++) = REX_W | (dest >= 8 ? REX_R : 0);
*(buf++) = 0x8b;
- *(buf++) = (MOD_INDIRECT << 6) | (dest << 3) | 4;
+ *(buf++) = (MOD_INDIRECT << 6) | ((dest & 7) << 3) | 4;
*(buf++) = sib;
*(buf++) = addr & 0xFF;
*(buf++) = (addr >> 8) & 0xFF;
@@ -120,10 +120,34 @@ mov_m64_r64(char *buf, uint64_t addr, enum reg src)
}
size_t
-mov_r64_r64(char *buf, enum reg dest, enum reg src)
+mov_mr64_r64(char *buf, enum reg dest, enum reg src)
{
if (buf) {
*(buf++) = REX_W;
+ *(buf++) = 0x8B;
+ *(buf++) = (MOD_INDIRECT << 6) | (src << 3) | dest;
+ }
+
+ return 3;
+}
+
+size_t
+mov_r64_mr64(char *buf, enum reg dest, enum reg src)
+{
+ if (buf) {
+ *(buf++) = REX_W;
+ *(buf++) = 0x89;
+ *(buf++) = (MOD_INDIRECT << 6) | (dest << 3) | src;
+ }
+
+ return 3;
+}
+
+size_t
+mov_r64_r64(char *buf, enum reg dest, enum reg src)
+{
+ if (buf) {
+ *(buf++) = REX_W | (src >= 8 ? REX_R : 0) | (dest >= 8 ? REX_B : 0);
*(buf++) = 0x89;
*(buf++) = (MOD_DIRECT << 6) | (src << 3) | dest;
}
diff --git a/x64.h b/x64.h
@@ -28,6 +28,8 @@ size_t add_r64_imm(char *buf, enum reg reg, uint64_t imm);
size_t mov_r64_imm(char *buf, enum reg reg, uint64_t imm);
size_t mov_r64_m64(char *buf, enum reg reg, uint64_t addr);
size_t mov_m64_r64(char *buf, uint64_t addr, enum reg reg);
+size_t mov_mr64_r64(char *buf, enum reg dest, enum reg src);
+size_t mov_r64_mr64(char *buf, enum reg dest, enum reg src);
size_t mov_r64_r64(char *buf, enum reg dest, enum reg src);
size_t mov_disp8_m64_r64(char *buf, enum reg dest, int8_t disp, enum reg src);
size_t mov_disp8_r64_m64(char *buf, enum reg dest, enum reg src, int8_t disp);