nooc

nooc programming language compiler
git clone git://git.nihaljere.xyz/nooc
Log | Files | Refs | LICENSE

commit 316b4a26609d1410328c609db350f2173a024ce1
parent f2ef3b6f873ae9aae6ebb54b4f7f14f1c9c6da74
Author: Nihal Jere <nihal@nihaljere.xyz>
Date:   Wed,  5 Jan 2022 10:03:21 -0600

replace x64 code generation with intermediate representation

An IR allows us to do instruction set agnostic code generation and
optimization. This incidentally removes all code generation from
main.c to and puts in in ir.c. There is enough of an intermediate
language to pass all tests. Since all x64 code generation is removed,
generating executables has temporarily be removed.

Diffstat:
MMakefile | 4++--
Marray.c | 1+
Mblockstack.c | 1+
Air.c | 256+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Air.h | 50++++++++++++++++++++++++++++++++++++++++++++++++++
Mlex.c | 1+
Mmain.c | 511+++++++------------------------------------------------------------------------
Mmap.c | 1+
Mnooc.h | 22+++++-----------------
Mparse.c | 6++----
Atest/add.pass.nooc | 6++++++
Atest/decl.pass.nooc | 4++++
Mtest/exit.pass.nooc | 1-
Mtest/exitwrite.pass.nooc | 1+
Mtest/proc.pass.nooc | 4+++-
Mtest/syscall_ret.pass.nooc | 4+++-
Mtest/yes.pass.nooc | 4+++-
Mtype.c | 2++
Mutil.c | 72++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mutil.h | 1+
Mx64.c | 1+
21 files changed, 455 insertions(+), 498 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,8 +1,8 @@ .c.o: $(CC) -Wall -c $< -o $@ -nooc: main.o array.o util.o x64.o elf.o lex.o parse.o map.o siphash.o type.o blake3.o blockstack.o - $(CC) main.o array.o x64.o util.o elf.o lex.o parse.o map.o siphash.o type.o blake3.o blockstack.o -o nooc +nooc: main.o array.o util.o x64.o elf.o lex.o parse.o map.o siphash.o type.o blake3.o blockstack.o ir.o + $(CC) main.o array.o x64.o util.o elf.o lex.o parse.o map.o siphash.o type.o blake3.o blockstack.o ir.o -o nooc clean: rm -f *.o nooc diff --git a/array.c b/array.c @@ -4,6 +4,7 @@ #include <string.h> #include "nooc.h" +#include "ir.h" #include "util.h" #include "array.h" diff --git a/blockstack.c b/blockstack.c @@ -3,6 +3,7 @@ #include <stdlib.h> #include "nooc.h" +#include "ir.h" #include "util.h" struct block *blockstack[BLOCKSTACKSIZE]; diff --git a/ir.c b/ir.c @@ -0,0 +1,256 @@ +#include <assert.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> + +#include "array.h" +#include "nooc.h" +#include "parse.h" +#include "ir.h" +#include "util.h" +#include "blockstack.h" + +extern struct types types; +extern struct exprs exprs; +extern struct assgns assgns; + +#define PUTINS(op, val) ins = (struct instr){(op), (val)} ; array_add(out, ins) ; + +static uint64_t tmpi; +static uint64_t labeli; + +static void +genblock(struct iproc *out, struct block *block); + +static uint64_t +procindex(struct toplevel *top, struct slice *s) +{ + for (size_t i = 0; i < top->code.len; i++) { + struct iproc *iproc = &top->code.data[i]; + if (slice_cmp(s, &iproc->s) == 0) + return i; + } + + die("unknown function, should be unreachable"); + return 0; +} + +static uint64_t +genexpr(struct iproc *out, size_t expri) +{ + struct instr ins; + struct expr *expr = &exprs.data[expri]; + uint64_t what = 0; + switch (expr->kind) { + case EXPR_LIT: + switch (expr->class) { + case C_INT: + what = tmpi++; + PUTINS(IR_ASSIGN, what); + PUTINS(IR_SIZE, 8); // FIXME: should not be hardcoded + PUTINS(IR_IMM, expr->d.v.v.i64); + break; + default: + die("genexpr: EXPR_LIT: unhandled class"); + } + break; + case EXPR_IDENT: { + struct decl *decl = finddecl(expr->d.s); + struct type *type = &types.data[decl->type]; + uint64_t where; + if (decl->toplevel) { + where = tmpi++; + PUTINS(IR_ASSIGN, where); + PUTINS(IR_SIZE, 8); // FIXME: should not be hardcoded + PUTINS(IR_IMM, decl->w.addr); + } else { + where = decl->w.index; + } + + what = tmpi++; + PUTINS(IR_ASSIGN, what); + switch (type->size) { + case 1: + case 2: + case 4: + case 8: + PUTINS(IR_SIZE, type->size); + break; + default: + die("genexpr: unknown size"); + } + PUTINS(IR_LOAD, where); + break; + } + case EXPR_BINARY: { + uint64_t left = genexpr(out, expr->d.bop.left); + uint64_t right = genexpr(out, expr->d.bop.right); + what = tmpi++; + switch (expr->d.bop.kind) { + case BOP_PLUS: + PUTINS(IR_ASSIGN, what); + PUTINS(IR_ADD, left); // FIXME: operand size? + PUTINS(IR_EXTRA, right); + break; + case BOP_EQUAL: + PUTINS(IR_ASSIGN, what); + PUTINS(IR_CEQ, left); + PUTINS(IR_EXTRA, right); + break; + default: + die("genexpr: EXPR_BINARY: unhandled binop kind"); + } + break; + } + case EXPR_UNARY: { + switch (expr->d.uop.kind) { + case UOP_REF: { + struct expr *operand = &exprs.data[expr->d.uop.expr]; + assert(operand->kind == EXPR_IDENT); + struct decl *decl = finddecl(operand->d.s); + // a global + if (decl->toplevel) { + what = tmpi++; + PUTINS(IR_ASSIGN, what); + PUTINS(IR_IMM, decl->w.addr); + } else { + what = decl->w.index; + } + break; + } + default: + die("genexpr: EXPR_UNARY: unhandled unop kind"); + } + break; + } + case EXPR_FCALL: { + // what doesn't matter + what = 1; + uint64_t proc = procindex(out->top, &expr->d.call.name); + size_t params[20]; + assert(expr->d.call.params.len <= 20); + for (size_t i = 0; i < expr->d.call.params.len; i++) { + params[i] = genexpr(out, expr->d.call.params.data[i]); + } + PUTINS(IR_CALL, proc); + for (size_t i = 0; i < expr->d.call.params.len; i++) { + PUTINS(IR_CALLARG, params[i]); + } + break; + } + case EXPR_COND: { + what = 1; // this doesn't matter until we add ternary-like usage + size_t condtmp = genexpr(out, expr->d.cond.cond); + size_t elselabel = labeli++; + size_t endlabel = labeli++; + PUTINS(IR_CONDJUMP, elselabel); + PUTINS(IR_EXTRA, condtmp); + genblock(out, &expr->d.cond.bif); + PUTINS(IR_JUMP, endlabel); + PUTINS(IR_LABEL, elselabel); + genblock(out, &expr->d.cond.belse); + PUTINS(IR_LABEL, endlabel); + break; + } + default: + die("genexpr: expr kind"); + } + + assert(what); + return what; +} + +static void +genblock(struct iproc *out, struct block *block) +{ + struct decl *decl; + struct type *type; + struct assgn *assgn; + struct instr ins; + + for (size_t i = 0; i < block->len; i++) { + struct item *item = &block->data[i]; + uint64_t what; + switch (item->kind) { + case ITEM_DECL: + decl = &block->decls.data[item->idx]; + type = &types.data[decl->type]; + decl->w.index = tmpi++; + PUTINS(IR_ASSIGN, (decl->w.index)); + switch (type->size) { + case 1: + case 2: + case 4: + case 8: + PUTINS(IR_SIZE, type->size); + break; + default: + die("ir_genproc: unknown size"); + } + PUTINS(IR_ALLOC, 1); + what = genexpr(out, decl->val); + switch (type->size) { + case 1: + case 2: + case 4: + case 8: + PUTINS(IR_SIZE, type->size); + break; + default: + die("ir_genproc: unknown size"); + } + PUTINS(IR_STORE, what); + PUTINS(IR_EXTRA, decl->w.index); + break; + case ITEM_ASSGN: + assgn = &assgns.data[item->idx]; + decl = finddecl(assgn->s); + type = &types.data[decl->type]; + what = genexpr(out, assgn->val); + switch (type->size) { + case 1: + case 2: + case 4: + case 8: + PUTINS(IR_SIZE, type->size); + break; + default: + die("ir_genproc: unknown size"); + } + PUTINS(IR_STORE, what); + PUTINS(IR_EXTRA, decl->w.index); + break; + case ITEM_EXPR: + genexpr(out, item->idx); + break; + case ITEM_RETURN: + PUTINS(IR_RETURN, 0); + break; + default: + die("ir_genproc: unreachable"); + } + } +} + +void +genproc(struct iproc *out, struct proc *proc) +{ + tmpi = 1; + labeli = 1; + struct instr ins; + struct type *type; + + blockpush(&proc->block); + + for (size_t i = 0; i < proc->in.len; i++) { + type = &types.data[proc->in.data[i].type]; + PUTINS(IR_IN, tmpi++); + PUTINS(IR_SIZE, type->size); + } + + genblock(out, &proc->block); + + dumpir(out); + blockpop(); +} diff --git a/ir.h b/ir.h @@ -0,0 +1,50 @@ +struct instr { + enum { + IR_NONE, + + IR_IMM, + IR_STORE, + IR_ALLOC, + IR_LOAD, + + IR_CALL, + IR_RETURN, + IR_LABEL, + IR_CONDJUMP, + IR_JUMP, + + // binary ops + IR_ADD, + + // comparison + IR_CEQ, + + // glue + IR_ASSIGN, + IR_CALLARG, + IR_IN, + IR_SIZE, + IR_EXTRA, + } op; + uint64_t id; +}; + +struct iproc { + size_t len; + size_t cap; + struct instr *data; + struct toplevel *top; // FIXME: basically just used to pass a parameter... + struct slice s; +}; + +struct iprocs { + size_t len, cap; + struct iproc *data; +}; + +struct toplevel { + struct data data; + struct iprocs code; +}; + +void genproc(struct iproc *out, struct proc *proc); diff --git a/lex.c b/lex.c @@ -4,6 +4,7 @@ #include <stdlib.h> #include "nooc.h" +#include "ir.h" #include "util.h" #include "lex.h" diff --git a/main.c b/main.c @@ -12,6 +12,7 @@ #include "array.h" #include "x64.h" #include "nooc.h" +#include "ir.h" #include "util.h" #include "elf.h" #include "lex.h" @@ -32,6 +33,7 @@ extern struct types types; char *infile; +// TODO: remove struct data data_seg; uint64_t @@ -49,413 +51,21 @@ data_pushzero(size_t len) } void -decl_set(struct decl *decl, void *ptr) +data_set(uint64_t addr, void *ptr, size_t len) { - struct type *type = &types.data[decl->type]; - assert(decl->place.kind == PLACE_ABS); - memcpy(&data_seg.data[decl->place.l.addr - DATA_OFFSET], ptr, type->size); + memcpy(&data_seg.data[addr - DATA_OFFSET], ptr, len); } void decl_alloc(struct block *block, struct decl *decl) { struct type *type = &types.data[decl->type]; - decl->place.size = type->size; - switch (decl->place.kind) { - case PLACE_ABS: - if (type->class == TYPE_ARRAY) { - struct type *subtype = &types.data[type->d.arr.subtype]; - decl->place.l.addr = data_pushzero(subtype->size * type->d.arr.len); - } else { - decl->place.l.addr = data_pushzero(type->size); - } - break; - case PLACE_FRAME: - decl->place.l.off = block->datasize; - block->datasize += type->size; - break; - default: - die("decl_alloc: unknown decl kind"); - } -} - -size_t -place_move(char *buf, struct place *dest, struct place *src) -{ - assert(dest->size != 0); - assert(src->size != 0); - size_t total = 0; - switch (src->kind) { - case PLACE_REG: - switch (dest->kind) { - case PLACE_REG: - total += mov_r64_r64(buf ? buf + total : NULL, dest->l.reg, src->l.reg); - break; - case PLACE_REGADDR: - switch (dest->size) { - case 8: - total += mov_mr64_r64(buf ? buf + total : NULL, dest->l.reg, src->l.reg); - break; - case 4: - total += mov_mr32_r32(buf ? buf + total : NULL, dest->l.reg, src->l.reg); - break; - case 2: - total += mov_mr16_r16(buf ? buf + total : NULL, dest->l.reg, src->l.reg); - break; - case 1: - total += mov_mr8_r8(buf ? buf + total : NULL, dest->l.reg, src->l.reg); - break; - default: - die("place_move: REG -> REGADDR: unhandled size"); - } - break; - case PLACE_FRAME: - switch (dest->size) { - case 8: - total += mov_disp8_m64_r64(buf ? buf + total : NULL, RBP, -dest->l.off, src->l.reg); - break; - case 4: - total += mov_disp8_m32_r32(buf ? buf + total : NULL, RBP, -dest->l.off, src->l.reg); - break; - case 2: - total += mov_disp8_m16_r16(buf ? buf + total : NULL, RBP, -dest->l.off, src->l.reg); - break; - case 1: - total += mov_disp8_m8_r8(buf ? buf + total : NULL, RBP, -dest->l.off, src->l.reg); - break; - default: - die("place_move: REG -> REGADDR: unhandled size"); - } - break; - default: - die("place_move: unhandled dest case for PLACE_REG"); - } - break; - case PLACE_REGADDR: - switch (src->size) { - case 8: - switch (dest->kind) { - case PLACE_REG: - total += mov_r64_mr64(buf ? buf + total : NULL, dest->l.reg, src->l.reg); - break; - default: - die("place_move: unhandled dest case for PLACE_REGADDR"); - } - break; - case 4: - switch (dest->kind) { - case PLACE_REG: - total += mov_r32_mr32(buf ? buf + total : NULL, dest->l.reg, src->l.reg); - break; - default: - die("place_move: unhandled dest case for PLACE_REGADDR"); - } - break; - case 2: - switch (dest->kind) { - case PLACE_REG: - total += mov_r16_mr16(buf ? buf + total : NULL, dest->l.reg, src->l.reg); - break; - default: - die("place_move: unhandled dest case for PLACE_REGADDR"); - } - break; - case 1: - switch (dest->kind) { - case PLACE_REG: - total += mov_r8_mr8(buf ? buf + total : NULL, dest->l.reg, src->l.reg); - break; - default: - die("place_move: unhandled dest case for PLACE_REGADDR"); - } - break; - default: - die("place_move: REGADDR: src unhandled size"); - } - break; - case PLACE_FRAME: - switch (src->size) { - case 8: - switch (dest->kind) { - case PLACE_REG: - total += mov_disp8_r64_m64(buf ? buf + total : NULL, dest->l.reg, RBP, -src->l.off); - break; - default: - die("place_move: unhandled dest case for PLACE_FRAME"); - } - break; - case 4: - switch (dest->kind) { - case PLACE_REG: - total += mov_disp8_r32_m32(buf ? buf + total : NULL, dest->l.reg, RBP, -src->l.off); - break; - default: - die("place_move: unhandled dest case for PLACE_FRAME"); - } - break; - case 2: - switch (dest->kind) { - case PLACE_REG: - total += mov_disp8_r16_m16(buf ? buf + total : NULL, dest->l.reg, RBP, -src->l.off); - break; - default: - die("place_move: unhandled dest case for PLACE_FRAME"); - } - break; - case 1: - switch (dest->kind) { - case PLACE_REG: - total += mov_disp8_r8_m8(buf ? buf + total : NULL, dest->l.reg, RBP, -src->l.off); - break; - default: - die("place_move: unhandled dest case for PLACE_FRAME"); - } - break; - default: - die("place_move: FRAME: src unhandled size"); - } - break; - case PLACE_ABS: - switch(src->size) { - case 8: - switch (dest->kind) { - case PLACE_REG: - total += mov_r64_m64(buf ? buf + total : NULL, dest->l.reg, src->l.addr); - break; - default: - die("place_move: unhandled dest case for PLACE_ABS"); - } - break; - case 4: - switch (dest->kind) { - case PLACE_REG: - total += mov_r32_m32(buf ? buf + total : NULL, dest->l.reg, src->l.addr); - break; - default: - die("place_move: unhandled dest case for PLACE_ABS"); - } - break; - case 2: - switch (dest->kind) { - case PLACE_REG: - total += mov_r16_m16(buf ? buf + total : NULL, dest->l.reg, src->l.addr); - break; - default: - die("place_move: unhandled dest case for PLACE_ABS"); - } - break; - case 1: - switch (dest->kind) { - case PLACE_REG: - total += mov_r8_m8(buf ? buf + total : NULL, dest->l.reg, src->l.addr); - break; - default: - die("place_move: unhandled dest case for PLACE_ABS"); - } - break; - default: - die("place_move: ABS: src unhandled size"); - } - break; - default: - die("place_move: unhandled src case"); - } - - return total; -} - -size_t genexpr(char *buf, size_t idx, struct place *place); -size_t genproc(char *buf, struct proc *proc); -size_t genblock(char *buf, struct block *block, bool toplevel); - -size_t -gencall(char *buf, size_t addr, struct expr *expr) -{ - size_t len = 0; - struct fparams *params = &expr->d.call.params; - if (params->len > 7) - error(expr->start->line, expr->start->col, "syscall can take at most 7 parameters"); - - struct place place = {PLACE_REG, .size = 8, .l.reg = getreg()}; - - for (int i = 0; i < params->len; i++) { - len += genexpr(buf ? buf + len : NULL, params->data[i], &place); - len += push_r64(buf ? buf + len : NULL, place.l.reg); - } - - len += mov_r64_imm(buf ? buf + len : NULL, place.l.reg, addr); - len += call(buf ? buf + len : NULL, place.l.reg); - - freereg(place.l.reg); - - return len; -} - -size_t -gensyscall(char *buf, struct expr *expr, struct place *place) -{ - unsigned short pushed = 0; - size_t len = 0; - struct fparams *params = &expr->d.call.params; - struct place reg = { .kind = PLACE_REG, .size = 8 }; - if (params->len > 7) - error(expr->start->line, expr->start->col, "syscall can take at most 7 parameters"); - - // encoding for argument registers in ABI order - for (int i = 0; i < params->len; i++) { - if (used_reg & (1 << abi_arg[i])) { - len += push_r64(buf ? buf + len : NULL, abi_arg[i]); - pushed |= (1 << abi_arg[i]); - } else { - used_reg |= (1 << abi_arg[i]); - } - reg.l.reg = abi_arg[i]; - len += genexpr(buf ? buf + len : NULL, params->data[i], &reg); - } - - if (buf) { - char syscall[] = {0x0f, 0x05}; - memcpy(buf + len, syscall, 2); - } - len += 2; - - reg.l.reg = RAX; - len += place_move(buf ? buf + len : NULL, place, &reg); - - for (int i = params->len - 1; i >= 0; i--) { - // FIXME: we shouldn't have to touch the place structure here - if (pushed & (1 << abi_arg[i]) && (place->kind != PLACE_REG || abi_arg[i] != place->l.reg)) { - len += pop_r64(buf ? buf + len : NULL, abi_arg[i]); - } else { - freereg(abi_arg[i]); - } - } - - return len; -} - -size_t -genexpr(char *buf, size_t idx, struct place *out) -{ - size_t total = 0; - struct expr *expr = &exprs.data[idx]; - - if (expr->kind == EXPR_LIT) { - struct place src = {PLACE_REG, .size = 8, .l.reg = getreg()}; - switch (expr->class) { - case C_INT: - total += mov_r64_imm(buf ? buf + total : buf, src.l.reg, expr->d.v.v.i64); - break; - case C_STR: { - int addr = data_push(expr->d.v.v.s.data, expr->d.v.v.s.len); - total += mov_r64_imm(buf ? buf + total : buf, src.l.reg, addr); - break; - } - default: - error(expr->start->line, expr->start->col, "genexpr: unknown value type!"); - } - - total += place_move(buf ? buf + total : NULL, out, &src); - freereg(src.l.reg); - } else if (expr->kind == EXPR_BINARY) { - struct place place1 = { PLACE_REG, .size = 8, .l.reg = getreg() }; - total += genexpr(buf ? buf + total : buf, expr->d.bop.left, &place1); - struct place place2 = { PLACE_REG, .size = 8, .l.reg = getreg() }; - total += genexpr(buf ? buf + total : buf, expr->d.bop.right, &place2); - - // FIXME: abstract these to act on places, so that we can generate more efficient code - switch (expr->d.bop.kind) { - case BOP_PLUS: { - total += add_r64_r64(buf ? buf + total : buf, place1.l.reg, place2.l.reg); - break; - } - case BOP_MINUS: { - total += sub_r64_r64(buf ? buf + total : buf, place1.l.reg, place2.l.reg); - break; - } - case BOP_EQUAL: - case BOP_GREATER: { - total += cmp_r64_r64(buf ? buf + total : buf, place1.l.reg, place2.l.reg); - break; - } - default: - error(expr->start->line, expr->start->col, "genexpr: unknown binary op!"); - } - - freereg(place2.l.reg); - - total += place_move(buf ? buf + total : buf, out, &place1); - freereg(place1.l.reg); - } else if (expr->kind == EXPR_UNARY) { - assert(expr->d.uop.kind == UOP_REF); - struct expr *ident = &exprs.data[expr->d.uop.expr]; - assert(ident->kind == EXPR_IDENT); - struct decl *decl = finddecl(ident->d.s); - struct place src = { .kind = PLACE_REG, .l.reg = getreg(), .size = 8 }; - - switch (decl->place.kind) { - case PLACE_ABS: - total += mov_r64_imm(buf ? buf + total : NULL, src.l.reg, decl->place.l.addr); - total += place_move(buf ? buf + total : NULL, out, &src); - break; - case PLACE_FRAME: - total += lea_disp8(buf ? buf + total : NULL, src.l.reg, RBP, -decl->place.l.off); - total += place_move(buf ? buf + total : NULL, out, &src); - break; - default: - die("genexpr: unhandled place kind for EXPR_UNARY"); - } - - freereg(src.l.reg); - } else if (expr->kind == EXPR_IDENT) { - struct decl *decl = finddecl(expr->d.s); - if (decl == NULL) { - error(expr->start->line, expr->start->col, "genexpr: unknown name '%.*s'", expr->d.s.len, expr->d.s.data); - } - total += place_move(buf ? buf + total : NULL, out, &decl->place); - return total; - - } else if (expr->kind == EXPR_FCALL) { - if (slice_cmplit(&expr->d.call.name, "syscall") == 0) { - total += gensyscall(buf ? buf + total : NULL, expr, out); - } else { - struct decl *decl = finddecl(expr->d.call.name); - if (decl == NULL) { - error(expr->start->line, expr->start->col, "unknown function!"); - } - - total += gencall(buf ? buf + total : NULL, decl->place.l.addr, expr); - } - } else if (expr->kind == EXPR_COND) { - struct expr *binary = &exprs.data[expr->d.cond.cond]; - // FIXME this should go away - assert(binary->kind == EXPR_BINARY); - struct place tempplace = {PLACE_REG, .size = 8, .l.reg = getreg()}; - total += genexpr(buf ? buf + total : NULL, expr->d.cond.cond, &tempplace); - size_t iflen = genblock(NULL, &expr->d.cond.bif, false) + jmp(NULL, 0); - size_t elselen = genblock(NULL, &expr->d.cond.belse, false); - switch (binary->d.bop.kind) { - case BOP_GREATER: - total += jng(buf ? buf + total : NULL, iflen); - break; - case BOP_EQUAL: - total += jne(buf ? buf + total : NULL, iflen); - break; - default: - error(expr->start->line, expr->start->col, "unknown binop for conditional"); - } - freereg(tempplace.l.reg); - total += genblock(buf ? buf + total : NULL, &expr->d.cond.bif, false); - total += jmp(buf ? buf + total: NULL, elselen); - total += genblock(buf ? buf + total : NULL, &expr->d.cond.belse, false); - } else if (expr->kind == EXPR_LOOP) { - size_t back = genblock(NULL, &expr->d.loop.block, false) + jmp(NULL, 0); - total += genblock(buf ? buf + total : NULL, &expr->d.loop.block, false); - total += jmp(buf ? buf + total: NULL, -back); + if (type->class == TYPE_ARRAY) { + struct type *subtype = &types.data[type->d.arr.subtype]; + decl->w.addr = data_pushzero(subtype->size * type->d.arr.len); } else { - error(expr->start->line, expr->start->col, "genexpr: could not generate code for expression"); + decl->w.addr = data_pushzero(type->size); } - return total; } void @@ -464,12 +74,14 @@ evalexpr(struct decl *decl) struct expr *expr = &exprs.data[decl->val]; if (expr->kind == EXPR_LIT) { switch (expr->class) { - case C_INT: - decl_set(decl, &expr->d.v.v); + case C_INT: { + struct type *type = &types.data[decl->type]; + data_set(decl->w.addr, &expr->d.v.v, type->size); break; + } case C_STR: { uint64_t addr = data_push(expr->d.v.v.s.data, expr->d.v.v.s.len); - decl->place.l.addr = addr; + decl->w.addr = addr; break; } default: @@ -480,85 +92,53 @@ evalexpr(struct decl *decl) } } -// FIXME: It is not ideal to calculate length by doing all the calculations to generate instruction, before we actually write the instructions. size_t -genblock(char *buf, struct block *block, bool toplevel) +gentoplevel(struct toplevel *toplevel, struct block *block) { blockpush(block); typecheck(block); size_t total = 0; + struct iproc iproc = { 0 }; + + iproc.s = (struct slice){7, 7, "syscall"}; + array_add((&toplevel->code), iproc); for (int i = 0; i < block->len; i++) { struct item *item = &block->data[i]; - if (item->kind == ITEM_EXPR) { - struct place tempout = {PLACE_REG, .size = 8, .l.reg = getreg()}; - total += genexpr(buf ? buf + total : NULL, item->idx, &tempout); - freereg(tempout.l.reg); - } else if (item->kind == ITEM_DECL) { + + switch (item->kind) { + case ITEM_EXPR: + die("toplevel expressions are unimplemented"); + case ITEM_ASSGN: + die("toplevel assignments are unimplemented"); + case ITEM_DECL: { struct decl *decl = &block->decls.data[item->idx]; struct expr *expr = &exprs.data[decl->val]; - decl->place.kind = toplevel ? PLACE_ABS : PLACE_FRAME; decl_alloc(block, decl); - if (toplevel) { - if (expr->class == C_PROC) { - block->decls.data[item->idx].place.l.addr = total + TEXT_OFFSET; - total += genproc(buf ? buf + total : NULL, &(expr->d.proc)); - } else { - evalexpr(decl); - } + if (expr->class == C_PROC) { + assert(expr->kind = EXPR_PROC); + iproc = (struct iproc){ 0 }; + iproc.top = toplevel; + iproc.s = decl->s; + genproc(&iproc, &(expr->d.proc)); + array_add((&toplevel->code), iproc); } else { - struct place tempout = {PLACE_REG, .size = 8, .l.reg = getreg()}; - total += genexpr(buf ? buf + total : NULL, block->decls.data[item->idx].val, &tempout); - total += place_move(buf ? buf + total : NULL, &decl->place, &tempout); - freereg(tempout.l.reg); + evalexpr(decl); } - decl->declared = true; - } else if (item->kind == ITEM_ASSGN) { - struct expr *expr = &exprs.data[assgns.data[item->idx].val]; - struct assgn *assgn = &assgns.data[item->idx]; - struct decl *decl = finddecl(assgn->s); - if (decl == NULL) - error(assgn->start->line, assgn->start->col, "unknown name"); - - if (!decl->declared) - error(assgn->start->line, assgn->start->col, "assignment before declaration"); - - if (expr->class == C_PROC) { - error(assgn->start->line, assgn->start->col, "reassignment of procedure not allowed (yet)"); - } - - struct place tempout = {PLACE_REG, .size = 8, .l.reg = getreg()}; - total += genexpr(buf ? buf + total : NULL, assgn->val, &tempout); - total += place_move(buf ? buf + total : NULL, &decl->place, &tempout); - freereg(tempout.l.reg); - } else if (item->kind == ITEM_RETURN) { - total += mov_r64_r64(buf ? buf + total : NULL, RSP, RBP); - total += pop_r64(buf ? buf + total : NULL, RBP); - total += ret(buf ? buf + total : NULL); - } else { - error(item->start->line, item->start->col, "cannot generate code for type"); + break; } + default: + die("unreachable"); + } + } blockpop(); return total; } -size_t -genproc(char *buf, struct proc *proc) -{ - size_t total = 0; - - total += push_r64(buf ? buf + total : NULL, RBP); - total += mov_r64_r64(buf ? buf + total : NULL, RBP, RSP); - total += sub_r64_imm(buf ? buf + total : NULL, RSP, proc->block.datasize + 8); - total += genblock(buf ? buf + total : NULL, &proc->block, false); - - return total; -} - struct stat statbuf; int @@ -595,17 +175,8 @@ main(int argc, char *argv[]) inittypes(); struct block items = parse(head); - clearreg(); - size_t len = genblock(NULL, &items, true); - char *text = malloc(len); - if (!text) { - fprintf(stderr, "text allocation failed!"); - return 1; - } - - clearreg(); - size_t len2 = genblock(text, &items, true); - assert(len == len2); + struct toplevel toplevel = { 0 }; + gentoplevel(&toplevel, &items); FILE *out = fopen(argv[2], "w"); if (!out) { @@ -623,7 +194,5 @@ main(int argc, char *argv[]) blockpop(); munmap(addr, statbuf.st_size); - elf(main->place.l.addr, text, len, data_seg.data, data_seg.len, out); - fclose(out); } diff --git a/map.c b/map.c @@ -23,6 +23,7 @@ THIS SOFTWARE. #include <string.h> #include "nooc.h" +#include "ir.h" #include "util.h" #include "map.h" diff --git a/nooc.h b/nooc.h @@ -116,28 +116,16 @@ struct assgns { struct assgn *data; }; -struct place { - enum { - PLACE_ABS = 1, - PLACE_FRAME, - PLACE_STACK, - PLACE_REG, - PLACE_REGADDR, - } kind; - union { - size_t addr; - int64_t off; - int reg; - } l; - size_t size; -}; - struct decl { struct slice s; size_t type; size_t val; // struct exprs bool declared; - struct place place; + bool toplevel; + union { + uint64_t index; + uint64_t addr; + } w; struct token *start; }; diff --git a/parse.c b/parse.c @@ -5,6 +5,7 @@ #include "nooc.h" #include "parse.h" +#include "ir.h" #include "util.h" #include "array.h" #include "type.h" @@ -173,11 +174,8 @@ parseexpr(struct block *block) for (int i = expr.d.proc.in.len - 1; i >= 0; i--) { decl.s = expr.d.proc.in.data[i].name; decl.type = expr.d.proc.in.data[i].type; - decl.place.kind = PLACE_FRAME; type = &types.data[decl.type]; - decl.place.size = type->size; offset += type->size; - decl.place.l.off = -offset - 8; array_add((&expr.d.proc.block.decls), decl); } @@ -187,7 +185,6 @@ parseexpr(struct block *block) decl.declared = true; type = &types.data[decl.type]; offset += type->size; - decl.place.l.off = -offset; array_add((&expr.d.proc.block.decls), decl); } parseblock(&expr.d.proc.block); @@ -396,6 +393,7 @@ parseblock(struct block *block) item.start = tok; if (tok->type == TOK_LET) { struct decl decl = { 0 }; + decl.toplevel = !(blocki - 1); decl.start = tok; item.kind = ITEM_DECL; tok = tok->next; diff --git a/test/add.pass.nooc b/test/add.pass.nooc @@ -0,0 +1,5 @@ +let main proc() = proc() { + let a i64 = 10 + a = + a 10 + syscall(60, 0) +}+ \ No newline at end of file diff --git a/test/decl.pass.nooc b/test/decl.pass.nooc @@ -0,0 +1,3 @@ +let main proc() = proc() { + let foo i64 = 0 +}+ \ No newline at end of file diff --git a/test/exit.pass.nooc b/test/exit.pass.nooc @@ -1,6 +1,5 @@ let exit proc(i64) = proc(code i64) { syscall(60, code) - return } let main proc() = proc() { diff --git a/test/exitwrite.pass.nooc b/test/exitwrite.pass.nooc @@ -1,6 +1,7 @@ let s [6]i8 = "hello\n" let write proc(i64, $i8, i64) (i64) = proc(fd i64, data $i8, len i64) (out i64) { + let a i64 = 0 out = syscall(1, fd, data, len) return } diff --git a/test/proc.pass.nooc b/test/proc.pass.nooc @@ -1,5 +1,7 @@ +let s [6]i8 = "hello\n" + let hello proc() = proc() { - syscall(1, 1, "hello\n", 6) + syscall(1, 1, $s, 6) return } diff --git a/test/syscall_ret.pass.nooc b/test/syscall_ret.pass.nooc @@ -1,5 +1,7 @@ +let s [12]i8 = "syscall_ret\n" + let main proc() = proc() { - let ret i64 = syscall(1, 1, "syscall_ret", 11) + let ret i64 = syscall(1, 1, $s, 11) if = ret 11 { syscall(60, 0) } else { diff --git a/test/yes.pass.nooc b/test/yes.pass.nooc @@ -1,7 +1,9 @@ +let y [2]i8 = "y\n" + let main proc() = proc() { let write i64 = 1 let stdout i64 = 1 let count i64 = 0 - syscall(write, stdout, "y\n", 2) + syscall(write, stdout, $y, 2) syscall(60, 0) } diff --git a/type.c b/type.c @@ -3,9 +3,11 @@ #include <stdint.h> #include <stdlib.h> #include <string.h> +#include <stdio.h> #include "nooc.h" #include "parse.h" +#include "ir.h" #include "util.h" #include "type.h" #include "map.h" diff --git a/util.c b/util.c @@ -6,6 +6,7 @@ #include <string.h> #include "nooc.h" +#include "ir.h" #include "array.h" #include "util.h" @@ -125,6 +126,77 @@ dumpexpr(int indent, struct expr *expr) } } +void +dumpir(struct iproc *instrs) +{ + bool callarg = false; + for (int i = 0; i < instrs->len; i++) { + struct instr *instr = &instrs->data[i]; + if (callarg && instr->op != IR_CALLARG) { + putc('\n', stderr); + callarg = false; + } + + switch (instr->op) { + case IR_IN: + fprintf(stderr, "in %%%lu\n", instr->id); + break; + case IR_SIZE: + fprintf(stderr, "size %lu\n", instr->id); + break; + case IR_IMM: + fprintf(stderr, "imm %lu\n", instr->id); + break; + case IR_ASSIGN: + fprintf(stderr, "%%%lu = ", instr->id); + break; + case IR_ALLOC: + fprintf(stderr, "alloc %lu\n", instr->id); + break; + case IR_STORE: + fprintf(stderr, "store %%%lu", instr->id); + break; + case IR_LOAD: + fprintf(stderr, "load %%%lu\n", instr->id); + break; + case IR_ADD: + fprintf(stderr, "add %%%lu", instr->id); + break; + case IR_CEQ: + fprintf(stderr, "ceq %%%lu", instr->id); + break; + case IR_EXTRA: + fprintf(stderr, ", %%%lu\n", instr->id); + break; + case IR_CALLARG: + fprintf(stderr, ", %%%lu", instr->id); + break; + case IR_CALL: + callarg = true; + fprintf(stderr, "call $%lu", instr->id); + break; + case IR_RETURN: + fputs("return\n", stderr); + break; + case IR_CONDJUMP: + fprintf(stderr, "condjump :%lu", instr->id); + break; + case IR_JUMP: + fprintf(stderr, "jump :%lu\n", instr->id); + break; + case IR_LABEL: + fprintf(stderr, "label :%lu\n", instr->id); + break; + default: + fprintf(stderr, "%d\n", instr->op); + die("dumpir: unknown instruction"); + } + } + + if (callarg) putc('\n', stderr); + + putc('\n', stderr); +} int slice_cmp(struct slice *s1, struct slice *s2) diff --git a/util.h b/util.h @@ -2,6 +2,7 @@ char *exprkind_str(enum exprkind kind); void dumpval(struct expr *e); void dumpbinop(struct binop *op); void dumpexpr(int indent, struct expr *expr); +void dumpir(struct iproc *instrs); int slice_cmp(struct slice *s1, struct slice *s2); int slice_cmplit(struct slice *s1, char *s2); void error(size_t line, size_t col, const char *error, ...); diff --git a/x64.c b/x64.c @@ -5,6 +5,7 @@ #include "nooc.h" #include "x64.h" +#include "ir.h" #include "util.h" enum rex {