nooc

nooc programming language compiler
git clone git://git.nihaljere.xyz/nooc
Log | Files | Refs | LICENSE

parse.c (10917B)


      1 #include <errno.h>
      2 #include <stdbool.h>
      3 #include <stdint.h>
      4 #include <stdlib.h>
      5 
      6 #include "nooc.h"
      7 #include "stack.h"
      8 #include "ir.h"
      9 #include "util.h"
     10 #include "array.h"
     11 #include "type.h"
     12 #include "map.h"
     13 
     14 static const struct token *tok;
     15 static struct stack blocks;
     16 static int loopcount;
     17 
     18 static void parsenametypes(struct nametypes *const nametypes);
     19 static size_t parsetype();
     20 
     21 #define EXPECTADV(t) { expect(t); tok = tok->next; }
     22 
     23 static void
     24 expect(const enum tokentype type)
     25 {
     26 	if (!tok)
     27 		error(tok->line, tok->col, "unexpected null token!");
     28 	if (tok->type != type) {
     29 		error(tok->line, tok->col, "expected %s but got %s", tokenstr[type], tokenstr[tok->type]);
     30 	}
     31 }
     32 
     33 static void
     34 parsestring(struct expr *const expr)
     35 {
     36 	expr->kind = EXPR_LIT;
     37 	expr->class = C_STR;
     38 	expr->d.v.v.s = (struct slice){ 0 };
     39 	const struct slice str = tok->slice;
     40 	for (size_t i = 0; i < str.len; i++) {
     41 		switch (str.data[i]) {
     42 		case '\\':
     43 			if (++i < str.len) {
     44 				char c;
     45 				switch (str.data[i]) {
     46 				case 'n':
     47 					c = '\n';
     48 					array_add((&expr->d.v.v.s), c);
     49 					break;
     50 				case '\\':
     51 					c = '\\';
     52 					array_add((&expr->d.v.v.s), c);
     53 					break;
     54 				default:
     55 					error(tok->line, tok->col, "invalid string escape!");
     56 				}
     57 			} else {
     58 				error(tok->line, tok->col, "string escape without parameter");
     59 			}
     60 			break;
     61 		default:
     62 			array_add((&expr->d.v.v.s), str.data[i]);
     63 		}
     64 	}
     65 	tok = tok->next;
     66 }
     67 
     68 static void
     69 parsenum(struct expr *const expr)
     70 {
     71 	expr->kind = EXPR_LIT;
     72 	expr->class = C_INT;
     73 
     74 	errno = 0;
     75 	if (sizeof(long) == 8)
     76 		expr->d.v.v.i64 = strtol(tok->slice.data, NULL, 10);
     77 	else if (sizeof(long long) == 8)
     78 		expr->d.v.v.i64 = strtoll(tok->slice.data, NULL, 10);
     79 	else
     80 		die("parsenum: unhandled long size");
     81 
     82 	if (errno)
     83 		error(tok->line, tok->col, "failed to parse number");
     84 
     85 	tok = tok->next;
     86 }
     87 
     88 static enum class
     89 typetoclass(const struct type *const type)
     90 {
     91 	switch (type->class) {
     92 	case TYPE_INT:
     93 		return C_INT;
     94 	case TYPE_ARRAY:
     95 		return C_STR;
     96 	case TYPE_REF:
     97 		return C_REF;
     98 	default:
     99 		die("unknown type class");
    100 	}
    101 
    102 	return 0; // warning
    103 }
    104 
    105 static void parseblock(struct block *const block);
    106 
    107 static size_t
    108 parseexpr(struct block *const block)
    109 {
    110 	struct expr expr = { 0 };
    111 	const struct type *type;
    112 	const struct decl *decl;
    113 
    114 	if (tok->type == TOK_LPAREN) {
    115 		tok = tok->next;
    116 		size_t ret = parseexpr(block);
    117 		EXPECTADV(TOK_RPAREN);
    118 		return ret;
    119 	}
    120 
    121 	expr.start = tok;
    122 	switch (tok->type) {
    123 	case TOK_LOOP:
    124 		expr.kind = EXPR_LOOP;
    125 		tok = tok->next;
    126 		loopcount += 1;
    127 		parseblock(&expr.d.loop.block);
    128 		loopcount -= 1;
    129 		break;
    130 	case TOK_IF:
    131 		expr.kind = EXPR_COND;
    132 		tok = tok->next;
    133 		expr.d.cond.cond = parseexpr(block);
    134 		if (exprs.data[expr.d.cond.cond].class != C_BOOL)
    135 			error(expr.start->line, expr.start->col, "expected boolean expression for if condition");
    136 		parseblock(&expr.d.cond.bif);
    137 		if (tok->type == TOK_ELSE) {
    138 			tok = tok->next;
    139 			parseblock(&expr.d.cond.belse);
    140 		}
    141 		break;
    142 	case TOK_NOT:
    143 		tok = tok->next;
    144 		expr.kind = EXPR_UNARY;
    145 		expr.d.uop.kind = UOP_NOT;
    146 		expr.d.uop.expr = parseexpr(block);
    147 		if (exprs.data[expr.d.uop.expr].class != C_BOOL)
    148 			error(tok->line, tok->col, "expected boolean expression as not operand");
    149 		expr.class = C_BOOL;
    150 		break;
    151 	case TOK_EQUAL:
    152 		expr.kind = EXPR_BINARY;
    153 		expr.d.bop.kind = BOP_EQUAL;
    154 		goto bool_common;
    155 	case TOK_GREATER:
    156 		expr.kind = EXPR_BINARY;
    157 		expr.d.bop.kind = BOP_GREATER;
    158 bool_common:
    159 		tok = tok->next;
    160 		expr.d.bop.left = parseexpr(block);
    161 		expr.d.bop.right = parseexpr(block);
    162 		if (exprs.data[expr.d.bop.left].class != exprs.data[expr.d.bop.right].class)
    163 			error(tok->line, tok->col, "expected boolean expression operands to be of same class");
    164 		expr.class = C_BOOL;
    165 		break;
    166 	case TOK_PLUS:
    167 		expr.kind = EXPR_BINARY;
    168 		expr.d.bop.kind = BOP_PLUS;
    169 		goto binary_common;
    170 	case TOK_MINUS:
    171 		expr.kind = EXPR_BINARY;
    172 		expr.d.bop.kind = BOP_MINUS;
    173 binary_common:
    174 		tok = tok->next;
    175 		expr.d.bop.left = parseexpr(block);
    176 		expr.d.bop.right = parseexpr(block);
    177 		if (exprs.data[expr.d.bop.left].class != exprs.data[expr.d.bop.right].class)
    178 			error(tok->line, tok->col, "expected binary expression operands to be of same class");
    179 		expr.class = exprs.data[expr.d.bop.left].class;
    180 		break;
    181 	case TOK_DOLLAR:
    182 		expr.kind = EXPR_UNARY;
    183 		expr.class = C_REF;
    184 		expr.d.uop.kind = UOP_REF;
    185 		tok = tok->next;
    186 		expr.d.uop.expr = parseexpr(block);
    187 		break;
    188 	case TOK_LSQUARE:
    189 		expr.kind = EXPR_ACCESS;
    190 		tok = tok->next;
    191 		expect(TOK_NUM);
    192 		struct expr index = { 0 };
    193 		parsenum(&index);
    194 		if (index.d.v.v.i64 < 0)
    195 			error(tok->line, tok->col, "expected non-negative integer for array index");
    196 		expr.d.access.index = index.d.v.v.i64;
    197 
    198 		expect(TOK_RSQUARE);
    199 		tok = tok->next;
    200 		expr.d.access.array = parseexpr(block);
    201 		expr.class = C_INT; //FIXME: determine from parent type
    202 		break;
    203 	case TOK_NAME:
    204 		// a procedure definition
    205 		if (slice_cmplit(&tok->slice, "proc") == 0) {
    206 			struct decl param = { 0 };
    207 			int8_t offset = 0;
    208 			expr.kind = EXPR_PROC;
    209 			expr.class = C_PROC;
    210 			tok = tok->next;
    211 			parsenametypes(&expr.d.proc.in);
    212 			if (tok->type == TOK_LPAREN)
    213 				parsenametypes(&expr.d.proc.out);
    214 
    215 			for (int i = expr.d.proc.in.len - 1; i >= 0; i--) {
    216 				param.s = expr.d.proc.in.data[i].name;
    217 				param.type = expr.d.proc.in.data[i].type;
    218 				param.in = true;
    219 				type = &types.data[param.type];
    220 				offset += type->size;
    221 				array_add((&expr.d.proc.block.decls), param);
    222 			}
    223 
    224 			for (size_t i = 0; i < expr.d.proc.out.len; i++) {
    225 				param.s = expr.d.proc.out.data[i].name;
    226 				param.type = typeref(expr.d.proc.out.data[i].type);
    227 				param.in = param.out = true;
    228 				type = &types.data[param.type];
    229 				offset += type->size;
    230 				array_add((&expr.d.proc.block.decls), param);
    231 			}
    232 			parseblock(&expr.d.proc.block);
    233 		// a function call
    234 		} else if (tok->next && tok->next->type == TOK_LPAREN) {
    235 			expr.d.call.name = tok->slice;
    236 			decl = finddecl(&blocks, expr.d.call.name);
    237 			if (slice_cmplit(&expr.d.call.name, "syscall") == 0) {
    238 				expr.class = C_INT;
    239 			} else {
    240 				if (decl == NULL)
    241 					error(expr.start->line, expr.start->col, "undeclared procedure '%.*s'", expr.d.s.len, expr.d.s.data);
    242 
    243 				type = &types.data[decl->type];
    244 				if (type->d.params.out.len == 1) {
    245 					struct type *rettype = &types.data[*type->d.params.out.data];
    246 					expr.class = typetoclass(rettype);
    247 				} else if (type->d.params.out.len > 1)
    248 					error(tok->line, tok->col, "only one return supported");
    249 			}
    250 
    251 			tok = tok->next->next;
    252 			expr.kind = EXPR_FCALL;
    253 
    254 			while (tok->type != TOK_RPAREN) {
    255 				size_t pidx = parseexpr(block);
    256 				array_add((&expr.d.call.params), pidx);
    257 				if (tok->type == TOK_RPAREN)
    258 					break;
    259 				EXPECTADV(TOK_COMMA);
    260 			}
    261 			EXPECTADV(TOK_RPAREN);
    262 		// an ident
    263 		} else {
    264 			expr.kind = EXPR_IDENT;
    265 			expr.d.s = tok->slice;
    266 
    267 			decl = finddecl(&blocks, expr.d.s);
    268 			if (decl == NULL)
    269 				error(expr.start->line, expr.start->col, "undeclared identifier '%.*s'", expr.d.s.len, expr.d.s.data);
    270 			expr.class = typetoclass(&types.data[decl->type]);
    271 			tok = tok->next;
    272 		}
    273 		break;
    274 	case TOK_NUM:
    275 		parsenum(&expr);
    276 		break;
    277 	case TOK_STRING:
    278 		parsestring(&expr);
    279 		break;
    280 	default:
    281 		error(tok->line, tok->col, "invalid token for expression");
    282 	}
    283 
    284 	array_add((&exprs), expr);
    285 
    286 	return exprs.len - 1;
    287 }
    288 
    289 static void
    290 parsetypelist(struct typelist *const list)
    291 {
    292 	EXPECTADV(TOK_LPAREN);
    293 	size_t type;
    294 
    295 	while (tok->type != TOK_RPAREN) {
    296 		type = parsetype();
    297 		array_add(list, type);
    298 
    299 		if (tok->type == TOK_RPAREN)
    300 			break;
    301 
    302 		EXPECTADV(TOK_COMMA);
    303 	}
    304 
    305 	tok = tok->next;
    306 }
    307 
    308 static size_t
    309 parsetype()
    310 {
    311 	struct type type = { 0 };
    312 	struct mapkey key;
    313 	union mapval val;
    314 
    315 	if (tok->type == TOK_NAME && slice_cmplit(&tok->slice, "proc") == 0) {
    316 		type.class = TYPE_PROC;
    317 		tok = tok->next;
    318 
    319 		parsetypelist(&type.d.params.in);
    320 		if (tok->type == TOK_LPAREN)
    321 			parsetypelist(&type.d.params.out);
    322 	} else if (tok->type == TOK_DOLLAR) {
    323 		type.class = TYPE_REF;
    324 		type.size = 8;
    325 		tok = tok->next;
    326 
    327 		type.d.subtype = parsetype();
    328 	} else if (tok->type == TOK_LSQUARE) {
    329 		struct expr len = { 0 };
    330 		type.class = TYPE_ARRAY;
    331 		type.size = 0;
    332 		tok = tok->next;
    333 
    334 		expect(TOK_NUM);
    335 		parsenum(&len);
    336 
    337 		if (len.d.v.v.i64 <= 0)
    338 			error(tok->line, tok->col, "expected positive integer for array size");
    339 		type.d.arr.len = len.d.v.v.i64;
    340 
    341 		EXPECTADV(TOK_RSQUARE);
    342 
    343 		type.d.arr.subtype = parsetype();
    344 	} else {
    345 		mapkey(&key, tok->slice.data, tok->slice.len);
    346 		val = mapget(typesmap, &key);
    347 		if (!val.n)
    348 			error(tok->line, tok->col, "unknown type");
    349 
    350 		tok = tok->next;
    351 		return val.n;
    352 	}
    353 
    354 	return type_put(&type);
    355 }
    356 
    357 static void
    358 parsenametypes(struct nametypes *const nametypes)
    359 {
    360 	EXPECTADV(TOK_LPAREN);
    361 	struct nametype nametype;
    362 	while (tok->type != TOK_RPAREN) {
    363 		nametype = (struct nametype){ 0 };
    364 
    365 		expect(TOK_NAME);
    366 		nametype.name = tok->slice;
    367 		tok = tok->next;
    368 
    369 		nametype.type = parsetype();
    370 
    371 		array_add(nametypes, nametype);
    372 
    373 		if (tok->type == TOK_RPAREN)
    374 			break;
    375 
    376 		EXPECTADV(TOK_COMMA);
    377 	}
    378 
    379 	tok = tok->next;
    380 }
    381 
    382 static void
    383 parseblock(struct block *const block)
    384 {
    385 	struct statement statement;
    386 	bool toplevel = stackpeek(&blocks) == NULL;
    387 
    388 	stackpush(&blocks, block);
    389 	if (!toplevel)
    390 		EXPECTADV(TOK_LCURLY);
    391 
    392 	while (!(tok->type == TOK_NONE || (!toplevel && tok->type == TOK_RCURLY))) {
    393 		statement = (struct statement){ 0 };
    394 		statement.start = tok;
    395 		if (tok->type == TOK_LET) {
    396 			struct decl decl = { 0 };
    397 			decl.toplevel = toplevel;
    398 			decl.start = tok;
    399 			statement.kind = STMT_DECL;
    400 			tok = tok->next;
    401 
    402 			expect(TOK_NAME);
    403 			decl.s = tok->slice;
    404 			tok = tok->next;
    405 
    406 			decl.type = parsetype();
    407 			EXPECTADV(TOK_EQUAL);
    408 
    409 			if (finddecl(&blocks, decl.s))
    410 				error(tok->line, tok->col, "repeat declaration!");
    411 
    412 			decl.val = parseexpr(block);
    413 			array_add((&block->decls), decl);
    414 
    415 			statement.idx = block->decls.len - 1;
    416 			array_add(block, statement);
    417 		} else if (tok->type == TOK_RETURN) {
    418 			statement.kind = STMT_RETURN;
    419 			tok = tok->next;
    420 			array_add((block), statement);
    421 		} else if (tok->type == TOK_BREAK) {
    422 			if (!loopcount)
    423 				error(tok->line, tok->col, "break statement outside of loop");
    424 			statement.kind = STMT_BREAK;
    425 			tok = tok->next;
    426 			array_add((block), statement);
    427 		} else if (tok->type == TOK_NAME && tok->next && tok->next->type == TOK_EQUAL) {
    428 			struct assgn assgn = { 0 };
    429 			assgn.start = tok;
    430 			statement.kind = STMT_ASSGN;
    431 			assgn.s = tok->slice;
    432 
    433 			tok = tok->next->next;
    434 			assgn.val = parseexpr(block);
    435 			array_add((&assgns), assgn);
    436 
    437 			statement.idx = assgns.len - 1;
    438 			array_add(block, statement);
    439 		} else {
    440 			statement.kind = STMT_EXPR;
    441 			statement.idx = parseexpr(block);
    442 			array_add(block, statement);
    443 		}
    444 	}
    445 
    446 	if (!toplevel)
    447 		EXPECTADV(TOK_RCURLY);
    448 
    449 	stackpop(&blocks);
    450 }
    451 
    452 struct block
    453 parse(const struct token *const start)
    454 {
    455 	tok = start;
    456 	struct block block = { 0 };
    457 	parseblock(&block);
    458 	if (blocks.data)
    459 		free(blocks.data);
    460 
    461 	blocks = (struct stack){ 0 };
    462 	return block;
    463 }