parse.c (10917B)
1 #include <errno.h> 2 #include <stdbool.h> 3 #include <stdint.h> 4 #include <stdlib.h> 5 6 #include "nooc.h" 7 #include "stack.h" 8 #include "ir.h" 9 #include "util.h" 10 #include "array.h" 11 #include "type.h" 12 #include "map.h" 13 14 static const struct token *tok; 15 static struct stack blocks; 16 static int loopcount; 17 18 static void parsenametypes(struct nametypes *const nametypes); 19 static size_t parsetype(); 20 21 #define EXPECTADV(t) { expect(t); tok = tok->next; } 22 23 static void 24 expect(const enum tokentype type) 25 { 26 if (!tok) 27 error(tok->line, tok->col, "unexpected null token!"); 28 if (tok->type != type) { 29 error(tok->line, tok->col, "expected %s but got %s", tokenstr[type], tokenstr[tok->type]); 30 } 31 } 32 33 static void 34 parsestring(struct expr *const expr) 35 { 36 expr->kind = EXPR_LIT; 37 expr->class = C_STR; 38 expr->d.v.v.s = (struct slice){ 0 }; 39 const struct slice str = tok->slice; 40 for (size_t i = 0; i < str.len; i++) { 41 switch (str.data[i]) { 42 case '\\': 43 if (++i < str.len) { 44 char c; 45 switch (str.data[i]) { 46 case 'n': 47 c = '\n'; 48 array_add((&expr->d.v.v.s), c); 49 break; 50 case '\\': 51 c = '\\'; 52 array_add((&expr->d.v.v.s), c); 53 break; 54 default: 55 error(tok->line, tok->col, "invalid string escape!"); 56 } 57 } else { 58 error(tok->line, tok->col, "string escape without parameter"); 59 } 60 break; 61 default: 62 array_add((&expr->d.v.v.s), str.data[i]); 63 } 64 } 65 tok = tok->next; 66 } 67 68 static void 69 parsenum(struct expr *const expr) 70 { 71 expr->kind = EXPR_LIT; 72 expr->class = C_INT; 73 74 errno = 0; 75 if (sizeof(long) == 8) 76 expr->d.v.v.i64 = strtol(tok->slice.data, NULL, 10); 77 else if (sizeof(long long) == 8) 78 expr->d.v.v.i64 = strtoll(tok->slice.data, NULL, 10); 79 else 80 die("parsenum: unhandled long size"); 81 82 if (errno) 83 error(tok->line, tok->col, "failed to parse number"); 84 85 tok = tok->next; 86 } 87 88 static enum class 89 typetoclass(const struct type *const type) 90 { 91 switch (type->class) { 92 case TYPE_INT: 93 return C_INT; 94 case TYPE_ARRAY: 95 return C_STR; 96 case TYPE_REF: 97 return C_REF; 98 default: 99 die("unknown type class"); 100 } 101 102 return 0; // warning 103 } 104 105 static void parseblock(struct block *const block); 106 107 static size_t 108 parseexpr(struct block *const block) 109 { 110 struct expr expr = { 0 }; 111 const struct type *type; 112 const struct decl *decl; 113 114 if (tok->type == TOK_LPAREN) { 115 tok = tok->next; 116 size_t ret = parseexpr(block); 117 EXPECTADV(TOK_RPAREN); 118 return ret; 119 } 120 121 expr.start = tok; 122 switch (tok->type) { 123 case TOK_LOOP: 124 expr.kind = EXPR_LOOP; 125 tok = tok->next; 126 loopcount += 1; 127 parseblock(&expr.d.loop.block); 128 loopcount -= 1; 129 break; 130 case TOK_IF: 131 expr.kind = EXPR_COND; 132 tok = tok->next; 133 expr.d.cond.cond = parseexpr(block); 134 if (exprs.data[expr.d.cond.cond].class != C_BOOL) 135 error(expr.start->line, expr.start->col, "expected boolean expression for if condition"); 136 parseblock(&expr.d.cond.bif); 137 if (tok->type == TOK_ELSE) { 138 tok = tok->next; 139 parseblock(&expr.d.cond.belse); 140 } 141 break; 142 case TOK_NOT: 143 tok = tok->next; 144 expr.kind = EXPR_UNARY; 145 expr.d.uop.kind = UOP_NOT; 146 expr.d.uop.expr = parseexpr(block); 147 if (exprs.data[expr.d.uop.expr].class != C_BOOL) 148 error(tok->line, tok->col, "expected boolean expression as not operand"); 149 expr.class = C_BOOL; 150 break; 151 case TOK_EQUAL: 152 expr.kind = EXPR_BINARY; 153 expr.d.bop.kind = BOP_EQUAL; 154 goto bool_common; 155 case TOK_GREATER: 156 expr.kind = EXPR_BINARY; 157 expr.d.bop.kind = BOP_GREATER; 158 bool_common: 159 tok = tok->next; 160 expr.d.bop.left = parseexpr(block); 161 expr.d.bop.right = parseexpr(block); 162 if (exprs.data[expr.d.bop.left].class != exprs.data[expr.d.bop.right].class) 163 error(tok->line, tok->col, "expected boolean expression operands to be of same class"); 164 expr.class = C_BOOL; 165 break; 166 case TOK_PLUS: 167 expr.kind = EXPR_BINARY; 168 expr.d.bop.kind = BOP_PLUS; 169 goto binary_common; 170 case TOK_MINUS: 171 expr.kind = EXPR_BINARY; 172 expr.d.bop.kind = BOP_MINUS; 173 binary_common: 174 tok = tok->next; 175 expr.d.bop.left = parseexpr(block); 176 expr.d.bop.right = parseexpr(block); 177 if (exprs.data[expr.d.bop.left].class != exprs.data[expr.d.bop.right].class) 178 error(tok->line, tok->col, "expected binary expression operands to be of same class"); 179 expr.class = exprs.data[expr.d.bop.left].class; 180 break; 181 case TOK_DOLLAR: 182 expr.kind = EXPR_UNARY; 183 expr.class = C_REF; 184 expr.d.uop.kind = UOP_REF; 185 tok = tok->next; 186 expr.d.uop.expr = parseexpr(block); 187 break; 188 case TOK_LSQUARE: 189 expr.kind = EXPR_ACCESS; 190 tok = tok->next; 191 expect(TOK_NUM); 192 struct expr index = { 0 }; 193 parsenum(&index); 194 if (index.d.v.v.i64 < 0) 195 error(tok->line, tok->col, "expected non-negative integer for array index"); 196 expr.d.access.index = index.d.v.v.i64; 197 198 expect(TOK_RSQUARE); 199 tok = tok->next; 200 expr.d.access.array = parseexpr(block); 201 expr.class = C_INT; //FIXME: determine from parent type 202 break; 203 case TOK_NAME: 204 // a procedure definition 205 if (slice_cmplit(&tok->slice, "proc") == 0) { 206 struct decl param = { 0 }; 207 int8_t offset = 0; 208 expr.kind = EXPR_PROC; 209 expr.class = C_PROC; 210 tok = tok->next; 211 parsenametypes(&expr.d.proc.in); 212 if (tok->type == TOK_LPAREN) 213 parsenametypes(&expr.d.proc.out); 214 215 for (int i = expr.d.proc.in.len - 1; i >= 0; i--) { 216 param.s = expr.d.proc.in.data[i].name; 217 param.type = expr.d.proc.in.data[i].type; 218 param.in = true; 219 type = &types.data[param.type]; 220 offset += type->size; 221 array_add((&expr.d.proc.block.decls), param); 222 } 223 224 for (size_t i = 0; i < expr.d.proc.out.len; i++) { 225 param.s = expr.d.proc.out.data[i].name; 226 param.type = typeref(expr.d.proc.out.data[i].type); 227 param.in = param.out = true; 228 type = &types.data[param.type]; 229 offset += type->size; 230 array_add((&expr.d.proc.block.decls), param); 231 } 232 parseblock(&expr.d.proc.block); 233 // a function call 234 } else if (tok->next && tok->next->type == TOK_LPAREN) { 235 expr.d.call.name = tok->slice; 236 decl = finddecl(&blocks, expr.d.call.name); 237 if (slice_cmplit(&expr.d.call.name, "syscall") == 0) { 238 expr.class = C_INT; 239 } else { 240 if (decl == NULL) 241 error(expr.start->line, expr.start->col, "undeclared procedure '%.*s'", expr.d.s.len, expr.d.s.data); 242 243 type = &types.data[decl->type]; 244 if (type->d.params.out.len == 1) { 245 struct type *rettype = &types.data[*type->d.params.out.data]; 246 expr.class = typetoclass(rettype); 247 } else if (type->d.params.out.len > 1) 248 error(tok->line, tok->col, "only one return supported"); 249 } 250 251 tok = tok->next->next; 252 expr.kind = EXPR_FCALL; 253 254 while (tok->type != TOK_RPAREN) { 255 size_t pidx = parseexpr(block); 256 array_add((&expr.d.call.params), pidx); 257 if (tok->type == TOK_RPAREN) 258 break; 259 EXPECTADV(TOK_COMMA); 260 } 261 EXPECTADV(TOK_RPAREN); 262 // an ident 263 } else { 264 expr.kind = EXPR_IDENT; 265 expr.d.s = tok->slice; 266 267 decl = finddecl(&blocks, expr.d.s); 268 if (decl == NULL) 269 error(expr.start->line, expr.start->col, "undeclared identifier '%.*s'", expr.d.s.len, expr.d.s.data); 270 expr.class = typetoclass(&types.data[decl->type]); 271 tok = tok->next; 272 } 273 break; 274 case TOK_NUM: 275 parsenum(&expr); 276 break; 277 case TOK_STRING: 278 parsestring(&expr); 279 break; 280 default: 281 error(tok->line, tok->col, "invalid token for expression"); 282 } 283 284 array_add((&exprs), expr); 285 286 return exprs.len - 1; 287 } 288 289 static void 290 parsetypelist(struct typelist *const list) 291 { 292 EXPECTADV(TOK_LPAREN); 293 size_t type; 294 295 while (tok->type != TOK_RPAREN) { 296 type = parsetype(); 297 array_add(list, type); 298 299 if (tok->type == TOK_RPAREN) 300 break; 301 302 EXPECTADV(TOK_COMMA); 303 } 304 305 tok = tok->next; 306 } 307 308 static size_t 309 parsetype() 310 { 311 struct type type = { 0 }; 312 struct mapkey key; 313 union mapval val; 314 315 if (tok->type == TOK_NAME && slice_cmplit(&tok->slice, "proc") == 0) { 316 type.class = TYPE_PROC; 317 tok = tok->next; 318 319 parsetypelist(&type.d.params.in); 320 if (tok->type == TOK_LPAREN) 321 parsetypelist(&type.d.params.out); 322 } else if (tok->type == TOK_DOLLAR) { 323 type.class = TYPE_REF; 324 type.size = 8; 325 tok = tok->next; 326 327 type.d.subtype = parsetype(); 328 } else if (tok->type == TOK_LSQUARE) { 329 struct expr len = { 0 }; 330 type.class = TYPE_ARRAY; 331 type.size = 0; 332 tok = tok->next; 333 334 expect(TOK_NUM); 335 parsenum(&len); 336 337 if (len.d.v.v.i64 <= 0) 338 error(tok->line, tok->col, "expected positive integer for array size"); 339 type.d.arr.len = len.d.v.v.i64; 340 341 EXPECTADV(TOK_RSQUARE); 342 343 type.d.arr.subtype = parsetype(); 344 } else { 345 mapkey(&key, tok->slice.data, tok->slice.len); 346 val = mapget(typesmap, &key); 347 if (!val.n) 348 error(tok->line, tok->col, "unknown type"); 349 350 tok = tok->next; 351 return val.n; 352 } 353 354 return type_put(&type); 355 } 356 357 static void 358 parsenametypes(struct nametypes *const nametypes) 359 { 360 EXPECTADV(TOK_LPAREN); 361 struct nametype nametype; 362 while (tok->type != TOK_RPAREN) { 363 nametype = (struct nametype){ 0 }; 364 365 expect(TOK_NAME); 366 nametype.name = tok->slice; 367 tok = tok->next; 368 369 nametype.type = parsetype(); 370 371 array_add(nametypes, nametype); 372 373 if (tok->type == TOK_RPAREN) 374 break; 375 376 EXPECTADV(TOK_COMMA); 377 } 378 379 tok = tok->next; 380 } 381 382 static void 383 parseblock(struct block *const block) 384 { 385 struct statement statement; 386 bool toplevel = stackpeek(&blocks) == NULL; 387 388 stackpush(&blocks, block); 389 if (!toplevel) 390 EXPECTADV(TOK_LCURLY); 391 392 while (!(tok->type == TOK_NONE || (!toplevel && tok->type == TOK_RCURLY))) { 393 statement = (struct statement){ 0 }; 394 statement.start = tok; 395 if (tok->type == TOK_LET) { 396 struct decl decl = { 0 }; 397 decl.toplevel = toplevel; 398 decl.start = tok; 399 statement.kind = STMT_DECL; 400 tok = tok->next; 401 402 expect(TOK_NAME); 403 decl.s = tok->slice; 404 tok = tok->next; 405 406 decl.type = parsetype(); 407 EXPECTADV(TOK_EQUAL); 408 409 if (finddecl(&blocks, decl.s)) 410 error(tok->line, tok->col, "repeat declaration!"); 411 412 decl.val = parseexpr(block); 413 array_add((&block->decls), decl); 414 415 statement.idx = block->decls.len - 1; 416 array_add(block, statement); 417 } else if (tok->type == TOK_RETURN) { 418 statement.kind = STMT_RETURN; 419 tok = tok->next; 420 array_add((block), statement); 421 } else if (tok->type == TOK_BREAK) { 422 if (!loopcount) 423 error(tok->line, tok->col, "break statement outside of loop"); 424 statement.kind = STMT_BREAK; 425 tok = tok->next; 426 array_add((block), statement); 427 } else if (tok->type == TOK_NAME && tok->next && tok->next->type == TOK_EQUAL) { 428 struct assgn assgn = { 0 }; 429 assgn.start = tok; 430 statement.kind = STMT_ASSGN; 431 assgn.s = tok->slice; 432 433 tok = tok->next->next; 434 assgn.val = parseexpr(block); 435 array_add((&assgns), assgn); 436 437 statement.idx = assgns.len - 1; 438 array_add(block, statement); 439 } else { 440 statement.kind = STMT_EXPR; 441 statement.idx = parseexpr(block); 442 array_add(block, statement); 443 } 444 } 445 446 if (!toplevel) 447 EXPECTADV(TOK_RCURLY); 448 449 stackpop(&blocks); 450 } 451 452 struct block 453 parse(const struct token *const start) 454 { 455 tok = start; 456 struct block block = { 0 }; 457 parseblock(&block); 458 if (blocks.data) 459 free(blocks.data); 460 461 blocks = (struct stack){ 0 }; 462 return block; 463 }