cproc

Unnamed repository; edit this file 'description' to name the repository.
git clone git://git.nihaljere.xyz/cproc
Log | Files | Refs | Submodules | README | LICENSE

pp.c (13871B)


      1 #include <assert.h>
      2 #include <stdarg.h>
      3 #include <stdbool.h>
      4 #include <stdint.h>
      5 #include <stdio.h>
      6 #include <stdlib.h>
      7 #include <string.h>
      8 #include "util.h"
      9 #include "cc.h"
     10 
     11 struct macroparam {
     12 	char *name;
     13 	enum {
     14 		PARAMTOK = 1<<0,  /* the parameter is used normally */
     15 		PARAMSTR = 1<<1,  /* the parameter is used with the '#' operator */
     16 		PARAMVAR = 1<<2,  /* the parameter is __VA_ARGS__ */
     17 	} flags;
     18 };
     19 
     20 struct macroarg {
     21 	struct token *token;
     22 	size_t ntoken;
     23 	/* stringized argument */
     24 	struct token str;
     25 };
     26 
     27 struct macro {
     28 	enum {
     29 		MACROOBJ,
     30 		MACROFUNC,
     31 	} kind;
     32 	char *name;
     33 	/* whether or not this macro is ineligible for expansion */
     34 	bool hide;
     35 	/* parameters of function-like macro */
     36 	struct macroparam *param;
     37 	size_t nparam;
     38 	/* argument tokens of macro invocation */
     39 	struct macroarg *arg;
     40 	/* replacement list */
     41 	struct token *token;
     42 	size_t ntoken;
     43 };
     44 
     45 struct frame {
     46 	struct token *token;
     47 	size_t ntoken;
     48 	struct macro *macro;
     49 };
     50 
     51 enum ppflags ppflags;
     52 
     53 static struct array ctx;
     54 static struct map *macros;
     55 /* number of macros currently undergoing expansion */
     56 static size_t macrodepth;
     57 
     58 void
     59 ppinit(void)
     60 {
     61 	macros = mkmap(64);
     62 	next();
     63 }
     64 
     65 /* check if two macro definitions are equal, as in C11 6.10.3p2 */
     66 static bool
     67 macroequal(struct macro *m1, struct macro *m2)
     68 {
     69 	struct macroparam *p1, *p2;
     70 	struct token *t1, *t2;
     71 
     72 	if (m1->kind != m2->kind)
     73 		return false;
     74 	if (m1->kind == MACROFUNC) {
     75 		if (m1->nparam != m2->nparam)
     76 			return false;
     77 		for (p1 = m1->param, p2 = m2->param; p1 < m1->param + m1->nparam; ++p1, ++p2) {
     78 			if (strcmp(p1->name, p2->name) != 0 || p1->flags != p2->flags)
     79 				return false;
     80 		}
     81 	}
     82 	if (m1->ntoken != m2->ntoken)
     83 		return false;
     84 	for (t1 = m1->token, t2 = m2->token; t1 < m1->token + m1->ntoken; ++t1, ++t2) {
     85 		if (t1->kind != t2->kind)
     86 			return false;
     87 		if (t1->lit && strcmp(t1->lit, t2->lit) != 0)
     88 			return false;
     89 	}
     90 	return true;
     91 }
     92 
     93 /* find the index of a macro parameter with the given name */
     94 static size_t
     95 macroparam(struct macro *m, struct token *t)
     96 {
     97 	size_t i;
     98 
     99 	if (t->kind == TIDENT) {
    100 		for (i = 0; i < m->nparam; ++i) {
    101 			if (strcmp(m->param[i].name, t->lit) == 0)
    102 				return i;
    103 		}
    104 	}
    105 	return -1;
    106 }
    107 
    108 /* lookup a macro by name */
    109 static struct macro *
    110 macroget(char *name)
    111 {
    112 	struct mapkey k;
    113 
    114 	mapkey(&k, name, strlen(name));
    115 	return mapget(macros, &k);
    116 }
    117 
    118 static void
    119 macrodone(struct macro *m)
    120 {
    121 	m->hide = false;
    122 	if (m->kind == MACROFUNC && m->nparam > 0) {
    123 		free(m->arg[0].token);
    124 		free(m->arg);
    125 	}
    126 	--macrodepth;
    127 }
    128 
    129 static bool
    130 macrovarargs(struct macro *m)
    131 {
    132 	return m->kind == MACROFUNC && m->nparam > 0 && m->param[m->nparam - 1].flags & PARAMVAR;
    133 }
    134 
    135 static struct token *
    136 framenext(struct frame *f)
    137 {
    138 	return f->ntoken--, f->token++;
    139 }
    140 
    141 /* push a new context frame */
    142 static struct frame *
    143 ctxpush(struct token *t, size_t n, struct macro *m, bool space)
    144 {
    145 	struct frame *f;
    146 
    147 	f = arrayadd(&ctx, sizeof(*f));
    148 	f->token = t;
    149 	f->ntoken = n;
    150 	f->macro = m;
    151 	if (n > 0)
    152 		t[0].space = space;
    153 	return f;
    154 }
    155 
    156 /* get the next token from the context */
    157 static struct token *
    158 ctxnext(void)
    159 {
    160 	struct frame *f;
    161 	struct token *t;
    162 	struct macro *m;
    163 	bool space;
    164 	size_t i;
    165 
    166 again:
    167 	for (f = arraylast(&ctx, sizeof(*f)); ctx.len; --f, ctx.len -= sizeof(*f)) {
    168 		if (f->ntoken)
    169 			break;
    170 		if (f->macro)
    171 			macrodone(f->macro);
    172 	}
    173 	if (ctx.len == 0)
    174 		return NULL;
    175 	m = f->macro;
    176 	if (m && m->kind == MACROFUNC) {
    177 		/* try to expand macro parameter */
    178 		space = f->token->space;
    179 		switch (f->token->kind) {
    180 		case THASH:
    181 			framenext(f);
    182 			t = framenext(f);
    183 			assert(t);
    184 			i = macroparam(m, t);
    185 			assert(i != -1);
    186 			f = ctxpush(&m->arg[i].str, 1, NULL, space);
    187 			break;
    188 		case TIDENT:
    189 			i = macroparam(m, f->token);
    190 			if (i == -1)
    191 				break;
    192 			framenext(f);
    193 			if (m->arg[i].ntoken == 0)
    194 				goto again;
    195 			f = ctxpush(m->arg[i].token, m->arg[i].ntoken, NULL, space);
    196 			break;
    197 		}
    198 		/* XXX: token concatenation */
    199 	}
    200 	return framenext(f);
    201 }
    202 
    203 static void
    204 define(void)
    205 {
    206 	struct token *t;
    207 	enum tokenkind prev;
    208 	struct macro *m;
    209 	struct macroparam *p;
    210 	struct array params = {0}, repl = {0};
    211 	struct mapkey k;
    212 	void **entry;
    213 	size_t i;
    214 
    215 	m = xmalloc(sizeof(*m));
    216 	m->name = tokencheck(&tok, TIDENT, "after #define");
    217 	m->hide = false;
    218 	t = arrayadd(&repl, sizeof(*t));
    219 	scan(t);
    220 	if (t->kind == TLPAREN && !t->space) {
    221 		m->kind = MACROFUNC;
    222 		/* read macro parameter names */
    223 		p = NULL;
    224 		while (scan(&tok), tok.kind != TRPAREN) {
    225 			if (p) {
    226 				if (p->flags & PARAMVAR)
    227 					tokencheck(&tok, TRPAREN, "after '...'");
    228 				tokencheck(&tok, TCOMMA, "or ')' after macro parameter");
    229 				scan(&tok);
    230 			}
    231 			p = arrayadd(&params, sizeof(*p));
    232 			p->flags = 0;
    233 			if (tok.kind == TELLIPSIS) {
    234 				p->name = "__VA_ARGS__";
    235 				p->flags |= PARAMVAR;
    236 			} else {
    237 				p->name = tokencheck(&tok, TIDENT, "of macro parameter name or '...'");
    238 			}
    239 		}
    240 		scan(t);  /* first token in replacement list */
    241 	} else {
    242 		m->kind = MACROOBJ;
    243 	}
    244 	m->param = params.val;
    245 	m->nparam = params.len / sizeof(m->param[0]);
    246 
    247 	/* read macro body */
    248 	i = macroparam(m, t);
    249 	while (t->kind != TNEWLINE && t->kind != TEOF) {
    250 		if (t->kind == THASHHASH)
    251 			error(&t->loc, "'##' operator is not yet implemented");
    252 		prev = t->kind;
    253 		t = arrayadd(&repl, sizeof(*t));
    254 		scan(t);
    255 		if (t->kind == TIDENT && strcmp(t->lit, "__VA_ARGS__") == 0 && !macrovarargs(m))
    256 			error(&t->loc, "__VA_ARGS__ can only be used in variadic function-like macros");
    257 		if (m->kind != MACROFUNC)
    258 			continue;
    259 		if (i != -1)
    260 			m->param[i].flags |= PARAMTOK;
    261 		i = macroparam(m, t);
    262 		if (prev == THASH) {
    263 			tokencheck(t, TIDENT, "after '#' operator");
    264 			if (i == -1)
    265 				error(&t->loc, "'%s' is not a macro parameter name", t->lit);
    266 			m->param[i].flags |= PARAMSTR;
    267 			i = -1;
    268 		}
    269 	}
    270 	m->token = repl.val;
    271 	m->ntoken = repl.len / sizeof(*t) - 1;
    272 	tok = *t;
    273 
    274 	mapkey(&k, m->name, strlen(m->name));
    275 	entry = mapput(macros, &k);
    276 	if (*entry && !macroequal(m, *entry))
    277 		error(&tok.loc, "redefinition of macro '%s'", m->name);
    278 	*entry = m;
    279 }
    280 
    281 static void
    282 undef(void)
    283 {
    284 	char *name;
    285 	struct mapkey k;
    286 	void **entry;
    287 	struct macro *m;
    288 
    289 	name = tokencheck(&tok, TIDENT, "after #undef");
    290 	mapkey(&k, name, strlen(name));
    291 	entry = mapput(macros, &k);
    292 	m = *entry;
    293 	if (m) {
    294 		free(name);
    295 		free(m->param);
    296 		free(m->token);
    297 		*entry = NULL;
    298 	}
    299 	scan(&tok);
    300 }
    301 
    302 static void
    303 directive(void)
    304 {
    305 	enum ppflags oldflags;
    306 	char *name;
    307 
    308 	scan(&tok);
    309 	if (tok.kind == TNEWLINE)
    310 		return;  /* empty directive */
    311 	oldflags = ppflags;
    312 	ppflags |= PPNEWLINE;
    313 	name = tokencheck(&tok, TIDENT, "or newline after '#'");
    314 	if (strcmp(name, "if") == 0) {
    315 		error(&tok.loc, "#if directive is not implemented");
    316 	} else if (strcmp(name, "ifdef") == 0) {
    317 		error(&tok.loc, "#ifdef directive is not implemented");
    318 	} else if (strcmp(name, "ifndef") == 0) {
    319 		error(&tok.loc, "#ifndef directive is not implemented");
    320 	} else if (strcmp(name, "elif") == 0) {
    321 		error(&tok.loc, "#elif directive is not implemented");
    322 	} else if (strcmp(name, "endif") == 0) {
    323 		error(&tok.loc, "#endif directive is not implemented");
    324 	} else if (strcmp(name, "include") == 0) {
    325 		error(&tok.loc, "#include directive is not implemented");
    326 	} else if (strcmp(name, "define") == 0) {
    327 		scan(&tok);
    328 		define();
    329 	} else if (strcmp(name, "undef") == 0) {
    330 		scan(&tok);
    331 		undef();
    332 	} else if (strcmp(name, "line") == 0) {
    333 		error(&tok.loc, "#line directive is not implemented");
    334 	} else if (strcmp(name, "error") == 0) {
    335 		error(&tok.loc, "#error directive is not implemented");
    336 	} else if (strcmp(name, "pragma") == 0) {
    337 		error(&tok.loc, "#pragma directive is not implemented");
    338 	} else {
    339 		error(&tok.loc, "invalid preprocessor directive #%s", name);
    340 	}
    341 	free(name);
    342 	tokencheck(&tok, TNEWLINE, "after preprocessing directive");
    343 	ppflags = oldflags;
    344 }
    345 
    346 /* get the next token without expanding it */
    347 static void
    348 nextinto(struct token *t)
    349 {
    350 	static bool newline = true;
    351 
    352 	for (;;) {
    353 		scan(t);
    354 		if (newline && t->kind == THASH) {
    355 			directive();
    356 		} else {
    357 			newline = tok.kind == TNEWLINE;
    358 			break;
    359 		}
    360 	}
    361 }
    362 
    363 static struct token *
    364 rawnext(void)
    365 {
    366 	struct token *t;
    367 
    368 	t = ctxnext();
    369 	if (!t) {
    370 		t = &tok;
    371 		nextinto(t);
    372 	}
    373 	return t;
    374 }
    375 
    376 static bool
    377 peekparen(void)
    378 {
    379 	static struct array pending;
    380 	struct token *t;
    381 	struct frame *f;
    382 
    383 	t = ctxnext();
    384 	if (t) {
    385 		if (t->kind == TLPAREN)
    386 			return true;
    387 		f = arraylast(&ctx, sizeof(*f));
    388 		--f->token;
    389 		++f->ntoken;
    390 		return false;
    391 	}
    392 	pending.len = 0;
    393 	do t = arrayadd(&pending, sizeof(*t)), nextinto(t);
    394 	while (t->kind == TNEWLINE);
    395 	if (t->kind == TLPAREN)
    396 		return true;
    397 	t = pending.val;
    398 	ctxpush(t, pending.len / sizeof(*t), NULL, t[0].space);
    399 	return false;
    400 }
    401 
    402 static void
    403 stringize(struct array *buf, struct token *t)
    404 {
    405 	const char *lit;
    406 
    407 	if ((t->space || t->kind == TNEWLINE) && buf->len > 1 && ((char *)buf->val)[buf->len - 1] != ' ')
    408 		arrayaddbuf(buf, " ", 1);
    409 	lit = t->lit ? t->lit : tokstr[t->kind];
    410 	if (t->kind == TSTRINGLIT || t->kind == TCHARCONST) {
    411 		for (; *lit; ++lit) {
    412 			if (*lit == '\\' || *lit == '"')
    413 				arrayaddbuf(buf, "\\", 1);
    414 			arrayaddbuf(buf, lit, 1);
    415 		}
    416 	} else if (lit) {
    417 		arrayaddbuf(buf, lit, strlen(lit));
    418 	}
    419 }
    420 
    421 static bool
    422 expand(struct token *t)
    423 {
    424 	struct macro *m;
    425 	struct macroparam *p;
    426 	struct macroarg *arg;
    427 	struct array str, tok;
    428 	size_t i, depth, paren;
    429 	bool space;
    430 
    431 	if (t->kind != TIDENT)
    432 		return false;
    433 	m = macroget(t->lit);
    434 	if (!m || m->hide || t->hide) {
    435 		t->hide = true;
    436 		return false;
    437 	}
    438 	space = t->space;
    439 	if (m->kind == MACROFUNC) {
    440 		if (!peekparen())
    441 			return false;
    442 		/* read macro arguments */
    443 		paren = 0;
    444 		depth = macrodepth;
    445 		tok = (struct array){0};
    446 		arg = xreallocarray(NULL, m->nparam, sizeof(*arg));
    447 		t = rawnext();
    448 		for (i = 0; i < m->nparam; ++i) {
    449 			p = &m->param[i];
    450 			if (p->flags & PARAMSTR) {
    451 				str = (struct array){0};
    452 				arrayaddbuf(&str, "\"", 1);
    453 			}
    454 			arg[i].ntoken = 0;
    455 			for (;;) {
    456 				if (t->kind == TEOF)
    457 					error(&t->loc, "EOF when reading macro parameters");
    458 				if (macrodepth <= depth) {
    459 					/* adjust current macro depth, in case it got shallower */
    460 					depth = macrodepth;
    461 					if (paren == 0 && (t->kind == TRPAREN || t->kind == TCOMMA && !(p->flags & PARAMVAR)))
    462 						break;
    463 					switch (t->kind) {
    464 					case TLPAREN: ++paren; break;
    465 					case TRPAREN: --paren; break;
    466 					}
    467 					if (p->flags & PARAMSTR)
    468 						stringize(&str, t);
    469 				}
    470 				if (p->flags & PARAMTOK && !expand(t)) {
    471 					arrayaddbuf(&tok, t, sizeof(*t));
    472 					++arg[i].ntoken;
    473 				}
    474 				t = rawnext();
    475 			}
    476 			if (p->flags & PARAMSTR) {
    477 				arrayaddbuf(&str, "\"", 2);
    478 				arg[i].str = (struct token){
    479 					.kind = TSTRINGLIT,
    480 					.lit = str.val,
    481 				};
    482 			}
    483 			if (t->kind == TRPAREN)
    484 				break;
    485 			t = rawnext();
    486 		}
    487 		if (i + 1 < m->nparam)
    488 			error(&t->loc, "not enough arguments for macro '%s'", m->name);
    489 		if (t->kind != TRPAREN)
    490 			error(&t->loc, "too many arguments for macro '%s'", m->name);
    491 		for (i = 0, t = tok.val; i < m->nparam; ++i) {
    492 			arg[i].token = t;
    493 			t += arg[i].ntoken;
    494 		}
    495 		m->arg = arg;
    496 	}
    497 	ctxpush(m->token, m->ntoken, m, space);
    498 	m->hide = true;
    499 	++macrodepth;
    500 	return true;
    501 }
    502 
    503 static void
    504 keyword(struct token *tok)
    505 {
    506 	static const struct {
    507 		const char *name;
    508 		int value;
    509 	} keywords[] = {
    510 		{"_Alignas",       T_ALIGNAS},
    511 		{"_Alignof",       T_ALIGNOF},
    512 		{"_Atomic",        T_ATOMIC},
    513 		{"_Bool",          T_BOOL},
    514 		{"_Complex",       T_COMPLEX},
    515 		{"_Generic",       T_GENERIC},
    516 		{"_Imaginary",     T_IMAGINARY},
    517 		{"_Noreturn",      T_NORETURN},
    518 		{"_Static_assert", T_STATIC_ASSERT},
    519 		{"_Thread_local",  T_THREAD_LOCAL},
    520 		{"__alignof__",    T_ALIGNOF},
    521 		{"__asm",          T__ASM__},
    522 		{"__asm__",        T__ASM__},
    523 		{"__attribute__",  T__ATTRIBUTE__},
    524 		{"__inline",       TINLINE},
    525 		{"__inline__",     TINLINE},
    526 		{"__signed",       TSIGNED},
    527 		{"__signed__",     TSIGNED},
    528 		{"__thread",       T_THREAD_LOCAL},
    529 		{"__typeof",       T__TYPEOF__},
    530 		{"__typeof__",     T__TYPEOF__},
    531 		{"__volatile__",   TVOLATILE},
    532 		{"auto",           TAUTO},
    533 		{"break",          TBREAK},
    534 		{"case",           TCASE},
    535 		{"char",           TCHAR},
    536 		{"const",          TCONST},
    537 		{"continue",       TCONTINUE},
    538 		{"default",        TDEFAULT},
    539 		{"do",             TDO},
    540 		{"double",         TDOUBLE},
    541 		{"else",           TELSE},
    542 		{"enum",           TENUM},
    543 		{"extern",         TEXTERN},
    544 		{"float",          TFLOAT},
    545 		{"for",            TFOR},
    546 		{"goto",           TGOTO},
    547 		{"if",             TIF},
    548 		{"inline",         TINLINE},
    549 		{"int",            TINT},
    550 		{"long",           TLONG},
    551 		{"register",       TREGISTER},
    552 		{"restrict",       TRESTRICT},
    553 		{"return",         TRETURN},
    554 		{"short",          TSHORT},
    555 		{"signed",         TSIGNED},
    556 		{"sizeof",         TSIZEOF},
    557 		{"static",         TSTATIC},
    558 		{"struct",         TSTRUCT},
    559 		{"switch",         TSWITCH},
    560 		{"typedef",        TTYPEDEF},
    561 		{"union",          TUNION},
    562 		{"unsigned",       TUNSIGNED},
    563 		{"void",           TVOID},
    564 		{"volatile",       TVOLATILE},
    565 		{"while",          TWHILE},
    566 	};
    567 	size_t low = 0, high = LEN(keywords), mid;
    568 	int cmp;
    569 
    570 	while (low < high) {
    571 		mid = (low + high) / 2;
    572 		cmp = strcmp(tok->lit, keywords[mid].name);
    573 		if (cmp == 0) {
    574 			free(tok->lit);
    575 			tok->kind = keywords[mid].value;
    576 			tok->lit = NULL;
    577 			break;
    578 		}
    579 		if (cmp < 0)
    580 			high = mid;
    581 		else
    582 			low = mid + 1;
    583 	}
    584 }
    585 
    586 void
    587 next(void)
    588 {
    589 	struct token *t;
    590 
    591 	do t = rawnext();
    592 	while (expand(t) || t->kind == TNEWLINE && !(ppflags & PPNEWLINE));
    593 	tok = *t;
    594 	if (tok.kind == TIDENT)
    595 		keyword(&tok);
    596 }
    597 
    598 bool
    599 peek(int kind)
    600 {
    601 	static struct token pending;
    602 	struct token old;
    603 
    604 	old = tok;
    605 	next();
    606 	if (tok.kind == kind) {
    607 		next();
    608 		return true;
    609 	}
    610 	pending = tok;
    611 	tok = old;
    612 	ctxpush(&pending, 1, NULL, pending.space);
    613 	return false;
    614 }
    615 
    616 char *
    617 expect(enum tokenkind kind, const char *msg)
    618 {
    619 	char *lit;
    620 
    621 	lit = tokencheck(&tok, kind, msg);
    622 	next();
    623 
    624 	return lit;
    625 }
    626 
    627 bool
    628 consume(int kind)
    629 {
    630 	if (tok.kind != kind)
    631 		return false;
    632 	next();
    633 	return true;
    634 }