I'm new to flex and bison. I want to write a compiler that read C program and translate it to my processor commands that are similar to assembly. I downloaded a pre-written compiler that uses flex and bison. I should change the scanner.l and parser.y as it can process the asm commands that are inside my C code like asm [asm command1
asm command2
asm command3
...]
. Which definitions and rules should I add to these two files?
scanner.l:
%{
#include "scanner.h"
#include "y.tab.h"
#include <stdio.h>
#include <stdlib.h>
#define MAX_STR_CONST 1000
char string_buf[MAX_STR_CONST];
char *string_buf_ptr;
int line_num = 1;
int line_pos = 1;
void updatePosition();
#define YY_USER_ACTION updatePosition();
%}
NUMBER (0)|([1-9][0-9]*)
HEXNUM ((0x)|(0X))([a-fA-F0-9]+)
IDENT [a-zA-Z_][a-zA-Z0-9_]*
%x comment
%x str
%option noyywrap
%option yylineno
%option nounput
%%
" string_buf_ptr = string_buf; BEGIN(str);
<str>{
" { /* saw closing quote - all done */
BEGIN(INITIAL);
*string_buf_ptr = '';
/* return string constant token type and
* value to parser
*/
yylval.strConst = new std::string(string_buf);
return T_STR_CONST;
}
{
/* error - unterminated string constant */
/* generate error message */
yyerror("Unterminated string constant.");
}
<<EOF>> { return T_UNTERM_STRING; }
\[0-7]{1,3} {
/* octal escape sequence */
int result;
(void) sscanf( yytext + 1, "%o", &result );
if ( result > 0xff )
/* error, constant is out-of-bounds */
*string_buf_ptr++ = result;
}
\[0-9]+ {
/* generate error - bad escape sequence; something
* like '48' or '777777'
*/
yyerror("Bad string escape sequence.");
}
\n *string_buf_ptr++ = '
';
\t *string_buf_ptr++ = '';
\r *string_buf_ptr++ = '
';
\b *string_buf_ptr++ = '';
\f *string_buf_ptr++ = 'f';
\(.|
) *string_buf_ptr++ = yytext[1];
[^\
"]+ {
char *yptr = yytext;
while ( *yptr )
*string_buf_ptr++ = *yptr++;
}
}
"/*" BEGIN(comment);
<comment>{
[^*
]* /* eat anything that's not a '*' */
"*"+[^*/
]* /* eat up '*'s not followed by '/'s */
<<EOF>> { return T_UNTERM_COMMENT; }
"*"+"/" BEGIN(INITIAL);
}
"do" { return T_DO; }
"while" { return T_WHILE; }
"for" { return T_FOR; }
"if" { return T_IF; }
"else" { return T_ELSE; }
"int" { return T_INT_TYPE; }
"string" { return T_STRING_TYPE; }
"void" { return T_VOID_TYPE; }
"struct" { return T_STRUCT; }
"return" { return T_RETURN; }
"switch" { return T_SWITCH; }
"case" { return T_CASE; }
"default" { return T_DEFAULT; }
"break" { return T_BREAK; }
"continue" { return T_CONTINUE; }
"sizeof" { return T_SIZEOF; }
"{" { return '{'; }
"}" { return '}'; }
"(" { return '('; }
")" { return ')'; }
"[" { return '['; }
"]" { return ']'; }
"+" { return '+'; }
"-" { return '-'; }
"*" { return '*'; }
"/" { return '/'; }
"%" { return '%'; }
"=" { return '='; }
">" { return '>'; }
"<" { return '<'; }
"!" { return '!'; }
"|" { return '|'; }
"&" { return '&'; }
"^" { return '^'; }
"~" { return '~'; }
"." { return '.'; }
":" { return ':'; }
";" { return ';'; }
"," { return ','; }
"<<" { return T_LEFT_SHIFT; }
">>" { return T_RIGHT_SHIFT; }
"&&" { return T_BOOL_AND; }
"||" { return T_BOOL_OR; }
"+=" { return T_PLUS_EQUALS; }
"-=" { return T_MINUS_EQUALS; }
"*=" { return T_STAR_EQUALS; }
"/=" { return T_DIV_EQUALS; }
"%=" { return T_MOD_EQUALS; }
"==" { return T_EQUAL; }
"<=" { return T_LESS_OR_EQUAL; }
">=" { return T_GREATER_OR_EQUAL; }
"!=" { return T_NOT_EQUAL; }
"|=" { return T_BIT_OR_EQUALS; }
"&=" { return T_BIT_AND_EQUALS; }
"^=" { return T_BIT_XOR_EQUALS; }
"~=" { return T_BIT_NOT_EQUALS; }
"->" { return T_ARROW; }
"<<=" { return T_LEFT_SHIFT_EQUALS; }
">>=" { return T_RIGHT_SHIFT_EQUALS; }
"++" { return T_PLUS_PLUS; }
"--" { return T_MINUS_MINUS; }
" "|""|"
"|"
"|"const" {}
{HEXNUM} { yylval.intConst = std::strtoul(yytext, NULL, 0); return T_INT_CONST; }
{NUMBER} { yylval.intConst = atoi(yytext); return T_INT_CONST; }
{IDENT} { yylval.ident = new std::string(yytext); return T_IDENT; }
. {{ char err[] = "Unknown Character: a"; err[strlen(err)-1] = *yytext; yyerror(err); }}
%%
/**
* This function is called on every token, and updates the yylloc global variable, which stores the
* location/position of the current token.
*/
void updatePosition() {
yylloc.first_line = line_num;
yylloc.first_column = line_pos;
char* text = yytext;
while(*text != '') {
if(*text == '
') {
line_num++;
line_pos = 1;
} else {
line_pos++;
}
text++;
}
yylloc.last_line = line_num;
yylloc.last_column = line_pos;
}
parser.y:
%code requires {
#include "Declaration.h"
#include "Expression.h"
#include "Statement.h"
#include "Type.h"
#include "Parser.h"
#include "Util.h"
extern Program* program_out;
}
%locations
%define parse.lac full
%error-verbose
%{
#include "Parser.h"
#include "scanner.h"
#include <string>
#include <iostream>
#include "Type.h"
%}
//%parse-param {Program*& out}
%union {
char* cstr;
std::string* ident;
std::string* strConst;
unsigned int intConst;
Type* type;
std::vector<Declaration*>* declareList;
Declaration* declare;
ConstantExpression* constant;
std::vector<FunctionParameter*>* paramList;
FunctionParameter* param;
std::vector<StructMember*>* structMemberList;
StructMember* structMember;
StatementBlock* statementBlock;
Statement* statement;
std::vector<Statement*>* statementList;
Expression* expression;
std::vector<Expression*>* expressionList;
}
%type <type> type
%type <cstr> root
%type <declareList> root_declare_list
%type <declare> root_declare
%type <constant> constant
%type <paramList> param_list non_empty_param_list
%type <param> param
%type <structMemberList> struct_list
%type <structMember> struct_member;
%type <statementBlock> statement_block
%type <statementList> statement_list
%type <statement> statement
%type <expression> expression
%type <expressionList> argument_list non_empty_argument_list
%token <ident> T_IDENT
%token <strConst> T_STR_CONST
%token <intConst> T_INT_CONST
%token T_IF T_ELSE T_FOR T_WHILE T_DO T_SIZEOF
%token T_INT_TYPE T_STRING_TYPE T_VOID_TYPE T_STRUCT
%token T_RETURN T_SWITCH T_CASE T_DEFAULT T_BREAK T_CONTINUE
%token T_BOOL_OR T_BOOL_AND
%token T_LEFT_SHIFT T_RIGHT_SHIFT T_PLUS_EQUALS T_MINUS_EQUALS
%token T_STAR_EQUALS T_DIV_EQUALS T_MOD_EQUALS T_EQUAL
%token T_LESS_OR_EQUAL T_GREATER_OR_EQUAL T_NOT_EQUAL
%token T_BIT_OR_EQUALS T_BIT_AND_EQUALS T_BIT_XOR_EQUALS
%token T_BIT_NOT_EQUALS T_ARROW T_LEFT_SHIFT_EQUALS
%token T_RIGHT_SHIFT_EQUALS T_PLUS_PLUS T_MINUS_MINUS
%token T_UNTERM_STRING T_UNTERM_COMMENT
/* tokens for precedence */
%token PREC_ADDRESS PREC_DEREFERENCE PREC_UNARY_MINUS PREC_UNARY_PLUS
%token PREC_SUFFIX_PLUS_PLUS PREC_SUFFIX_MINUS_MINUS
%token PREC_PREFIX_PLUS_PLUS PREC_PREFIX_MINUS_MINUS
%token PREC_APPLICATION
/* lowest precedence */
%left ','
%right T_BIT_AND_EQUALS T_BIT_XOR_EQUALS T_BIT_OR_EQUALS
%right T_LEFT_SHIFT_EQUALS T_RIGHT_SHIFT_EQUALS
%right T_STAR_EQUALS T_DIV_EQUALS T_MOD_EQUALS
%right T_PLUS_EQUALS T_MINUS_EQUALS
%right '='
%left T_BOOL_OR
%left T_BOOL_AND
%left '|'
%left '^'
%left '&'
%left T_EQUAL T_NOT_EQUAL
%left '>' T_GREATER_OR_EQUAL
%left '<' T_LESS_OR_EQUAL
%left T_LEFT_SHIFT T_RIGHT_SHIFT
%left '+' '-'
%left '*' '/' '%'
%right PREC_ADDRESS
%right PREC_DEREFERENCE
%right '!' '~'
%right PREC_UNARY_PLUS PREC_UNARY_MINUS
%right PREC_PREFIX_PLUS_PLUS PREC_PREFIX_MINUS_MINUS
%right T_PLUS_PLUS T_MINUS_MINUS
%left T_ARROW
%left '.'
%left '['
%left PREC_APPLICATION
%left PREC_SUFFIX_PLUS_PLUS PREC_SUFFIX_MINUS_MINUS
%nonassoc T_IF
%nonassoc T_ELSE
/* highest precedence */
%%
root:
root_declare_list { $$ = NULL; program_out = new Program(@$, *$1); delete $1; }
;
root_declare_list:
root_declare_list root_declare { $$ = $1; $1->push_back($2); }
| { $$ = new std::vector<Declaration*>(); }
;
root_declare:
type T_IDENT '(' param_list ')' ';' { $$ = new FunctionPrototype(@$, $1, *$2, *$4); delete $2; delete $4; }
| type T_IDENT '(' param_list ')' statement_block { $$ = new FunctionDeclaration(@$, $1, *$2, *$4, $6); delete $2; delete $4; }
| type T_IDENT ';' { $$ = new GlobalVarDeclaration(@$, $1, *$2); delete $2; }
| type T_IDENT '[' T_INT_CONST ']' ';' { $$ = new GlobalArrayDeclaration(@$, $1, *$2, $4); delete $2; }
| type T_IDENT '=' constant ';' { $$ = new GlobalVarDeclarationInit(@$, $1, *$2, $4); delete $2; }
| T_STRUCT T_IDENT '{' struct_list '}' ';' { $$ = new StructDeclaration(@$, *$2, *$4); delete $2; delete $4; }
| T_STRUCT T_IDENT ';' { $$ = new StructPredeclaration(@$, *$2); delete $2; }
;
constant:
T_INT_CONST { $$ = new IntConstantExpression(@$, $1); }
| T_STR_CONST { $$ = new StringConstantExpression(@$, *$1); delete $1; }
;
param_list:
non_empty_param_list { $$ = $1; }
|
与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…