c++ - flex/bison based compiler: parsing asm[assembly commands] inside C program

Question

Welcome To Ask or Share your Answers For Others

c++ - flex/bison based compiler: parsing asm[assembly commands] inside C program

posted Oct 6, 2021 in Technique[技术] by 深蓝 (71.8m points)

c++ - flex/bison based compiler: parsing asm[assembly commands] inside C program

I'm new to flex and bison. I want to write a compiler that read C program and translate it to my processor commands that are similar to assembly. I downloaded a pre-written compiler that uses flex and bison. I should change the scanner.l and parser.y as it can process the asm commands that are inside my C code like asm [asm command1 asm command2 asm command3 ...]. Which definitions and rules should I add to these two files?

scanner.l:

%{
#include "scanner.h"
#include "y.tab.h"
#include <stdio.h>
#include <stdlib.h>
#define MAX_STR_CONST 1000
char string_buf[MAX_STR_CONST];
char *string_buf_ptr;
int line_num = 1;
int line_pos = 1;

void updatePosition();
#define YY_USER_ACTION updatePosition();

%}

NUMBER  (0)|([1-9][0-9]*)
HEXNUM  ((0x)|(0X))([a-fA-F0-9]+)
IDENT   [a-zA-Z_][a-zA-Z0-9_]*

%x comment
%x str


%option noyywrap
%option yylineno
%option nounput

%%

"      string_buf_ptr = string_buf; BEGIN(str);
<str>{
"      { /* saw closing quote - all done */
            BEGIN(INITIAL);
            *string_buf_ptr = '';
            /* return string constant token type and
            * value to parser
            */
            yylval.strConst = new std::string(string_buf);
            return T_STR_CONST;
        }


      {
            /* error - unterminated string constant */
            /* generate error message */
            yyerror("Unterminated string constant.");
        }

<<EOF>> { return T_UNTERM_STRING; }

\[0-7]{1,3} {
        /* octal escape sequence */
        int result;
        
        (void) sscanf( yytext + 1, "%o", &result );

        if ( result > 0xff )
                /* error, constant is out-of-bounds */

        *string_buf_ptr++ = result;
        }

\[0-9]+ {
        /* generate error - bad escape sequence; something
        * like '48' or '777777'
        */
        yyerror("Bad string escape sequence.");
        }

\n         *string_buf_ptr++ = '
';
\t         *string_buf_ptr++ = '';
\r         *string_buf_ptr++ = '
';
\b         *string_buf_ptr++ = '';
\f         *string_buf_ptr++ = 'f';

\(.|
)    *string_buf_ptr++ = yytext[1];

[^\
"]+  {
            char *yptr = yytext;
            
            while ( *yptr )
                    *string_buf_ptr++ = *yptr++;
            }
}

"/*"            BEGIN(comment);
<comment>{
[^*
]*        /* eat anything that's not a '*' */
"*"+[^*/
]*   /* eat up '*'s not followed by '/'s */

             
<<EOF>>         { return T_UNTERM_COMMENT; }
"*"+"/"        BEGIN(INITIAL);
}




"do"            { return T_DO; }
"while"         { return T_WHILE; }
"for"           { return T_FOR; }
"if"            { return T_IF; }
"else"          { return T_ELSE; }
"int"           { return T_INT_TYPE; }
"string"        { return T_STRING_TYPE; }
"void"          { return T_VOID_TYPE; }
"struct"        { return T_STRUCT; }
"return"        { return T_RETURN; }
"switch"        { return T_SWITCH; }
"case"          { return T_CASE; }
"default"       { return T_DEFAULT; }
"break"         { return T_BREAK; }
"continue"      { return T_CONTINUE; }
"sizeof"        { return T_SIZEOF; }

"{"             { return '{'; }
"}"             { return '}'; }
"("             { return '('; }
")"             { return ')'; }
"["             { return '['; }
"]"             { return ']'; }
"+"             { return '+'; }
"-"             { return '-'; }
"*"             { return '*'; }
"/"             { return '/'; }
"%"             { return '%'; }
"="             { return '='; }
">"             { return '>'; }
"<"             { return '<'; }
"!"             { return '!'; }
"|"             { return '|'; }
"&"             { return '&'; }
"^"             { return '^'; }
"~"             { return '~'; }
"."             { return '.'; }
":"             { return ':'; }
";"             { return ';'; }
","             { return ','; }

"<<"            { return T_LEFT_SHIFT; }
">>"            { return T_RIGHT_SHIFT; }
"&&"            { return T_BOOL_AND; }
"||"            { return T_BOOL_OR; }
"+="            { return T_PLUS_EQUALS; }
"-="            { return T_MINUS_EQUALS; }
"*="            { return T_STAR_EQUALS; }
"/="            { return T_DIV_EQUALS; }
"%="            { return T_MOD_EQUALS; }
"=="            { return T_EQUAL; }
"<="            { return T_LESS_OR_EQUAL; }
">="            { return T_GREATER_OR_EQUAL; }
"!="            { return T_NOT_EQUAL; }
"|="            { return T_BIT_OR_EQUALS; }
"&="            { return T_BIT_AND_EQUALS; }
"^="            { return T_BIT_XOR_EQUALS; }
"~="            { return T_BIT_NOT_EQUALS; }
"->"            { return T_ARROW; }
"<<="           { return T_LEFT_SHIFT_EQUALS; }
">>="           { return T_RIGHT_SHIFT_EQUALS; }
"++"            { return T_PLUS_PLUS; }
"--"            { return T_MINUS_MINUS; }

" "|""|"
"|"
"|"const"  {}
{HEXNUM}        { yylval.intConst = std::strtoul(yytext, NULL, 0); return T_INT_CONST; }
{NUMBER}        { yylval.intConst = atoi(yytext); return T_INT_CONST; }
{IDENT}         { yylval.ident = new std::string(yytext); return T_IDENT; }
.               {{ char err[] = "Unknown Character: a"; err[strlen(err)-1] = *yytext; yyerror(err); }}

%%

/**
 * This function is called on every token, and updates the yylloc global variable, which stores the
 * location/position of the current token.
 */
void updatePosition() {
    yylloc.first_line = line_num;
    yylloc.first_column = line_pos;
    char* text = yytext;
    while(*text != '') {
        if(*text == '
') {
            line_num++;
            line_pos = 1;
        } else {
            line_pos++;
        }
        text++;
    }
    yylloc.last_line = line_num;
    yylloc.last_column = line_pos;
}

parser.y:

%code requires {

#include "Declaration.h"
#include "Expression.h"
#include "Statement.h"
#include "Type.h"
#include "Parser.h"
#include "Util.h"

extern Program* program_out;

}

%locations
%define parse.lac full
%error-verbose

%{
#include "Parser.h"
#include "scanner.h"
#include <string>
#include <iostream>
#include "Type.h"

%}

//%parse-param {Program*& out}

%union {
    char* cstr;
    std::string* ident;
    std::string* strConst;
    unsigned int intConst;
    Type* type;
    std::vector<Declaration*>* declareList;
    Declaration* declare;
    ConstantExpression* constant;
    std::vector<FunctionParameter*>* paramList;
    FunctionParameter* param;
    std::vector<StructMember*>* structMemberList;
    StructMember* structMember;
    StatementBlock* statementBlock;
    Statement* statement;
    std::vector<Statement*>* statementList;
    Expression* expression;
    std::vector<Expression*>* expressionList;
}

%type <type> type
%type <cstr> root
%type <declareList> root_declare_list
%type <declare> root_declare
%type <constant> constant
%type <paramList> param_list non_empty_param_list
%type <param> param
%type <structMemberList> struct_list
%type <structMember> struct_member;
%type <statementBlock> statement_block
%type <statementList> statement_list
%type <statement> statement
%type <expression> expression
%type <expressionList> argument_list non_empty_argument_list

%token <ident> T_IDENT
%token <strConst> T_STR_CONST
%token <intConst> T_INT_CONST
%token T_IF T_ELSE T_FOR T_WHILE T_DO T_SIZEOF 
%token T_INT_TYPE T_STRING_TYPE T_VOID_TYPE T_STRUCT
%token T_RETURN T_SWITCH T_CASE T_DEFAULT T_BREAK T_CONTINUE
%token T_BOOL_OR T_BOOL_AND
%token T_LEFT_SHIFT T_RIGHT_SHIFT T_PLUS_EQUALS T_MINUS_EQUALS 
%token T_STAR_EQUALS T_DIV_EQUALS T_MOD_EQUALS T_EQUAL 
%token T_LESS_OR_EQUAL T_GREATER_OR_EQUAL T_NOT_EQUAL
%token T_BIT_OR_EQUALS T_BIT_AND_EQUALS T_BIT_XOR_EQUALS
%token T_BIT_NOT_EQUALS T_ARROW T_LEFT_SHIFT_EQUALS
%token T_RIGHT_SHIFT_EQUALS T_PLUS_PLUS T_MINUS_MINUS
%token T_UNTERM_STRING T_UNTERM_COMMENT

/* tokens for precedence */
%token PREC_ADDRESS PREC_DEREFERENCE PREC_UNARY_MINUS PREC_UNARY_PLUS
%token PREC_SUFFIX_PLUS_PLUS PREC_SUFFIX_MINUS_MINUS
%token PREC_PREFIX_PLUS_PLUS PREC_PREFIX_MINUS_MINUS
%token PREC_APPLICATION

/* lowest precedence */
%left ','
%right T_BIT_AND_EQUALS T_BIT_XOR_EQUALS T_BIT_OR_EQUALS
%right T_LEFT_SHIFT_EQUALS T_RIGHT_SHIFT_EQUALS
%right T_STAR_EQUALS T_DIV_EQUALS T_MOD_EQUALS
%right T_PLUS_EQUALS T_MINUS_EQUALS
%right '='
%left T_BOOL_OR
%left T_BOOL_AND
%left '|'
%left '^'
%left '&'
%left T_EQUAL T_NOT_EQUAL
%left '>' T_GREATER_OR_EQUAL
%left '<' T_LESS_OR_EQUAL
%left T_LEFT_SHIFT T_RIGHT_SHIFT
%left '+' '-'
%left '*' '/' '%'
%right PREC_ADDRESS
%right PREC_DEREFERENCE
%right '!' '~'
%right PREC_UNARY_PLUS PREC_UNARY_MINUS
%right PREC_PREFIX_PLUS_PLUS PREC_PREFIX_MINUS_MINUS
%right T_PLUS_PLUS T_MINUS_MINUS
%left T_ARROW
%left '.'
%left '['
%left PREC_APPLICATION
%left PREC_SUFFIX_PLUS_PLUS PREC_SUFFIX_MINUS_MINUS
%nonassoc T_IF
%nonassoc T_ELSE
/* highest precedence */



%%

root:
        root_declare_list                                       { $$ = NULL; program_out = new Program(@$, *$1); delete $1; }
    ;

root_declare_list:
        root_declare_list root_declare                          { $$ = $1; $1->push_back($2); }
    |                                                           { $$ = new std::vector<Declaration*>(); }
    ;

root_declare:
        type T_IDENT '(' param_list ')' ';'                     { $$ = new FunctionPrototype(@$, $1, *$2, *$4); delete $2; delete $4; }
    |   type T_IDENT '(' param_list ')' statement_block         { $$ = new FunctionDeclaration(@$, $1, *$2, *$4, $6); delete $2; delete $4; }
    |   type T_IDENT ';'                                        { $$ = new GlobalVarDeclaration(@$, $1, *$2); delete $2; }
    |   type T_IDENT '[' T_INT_CONST ']' ';'                    { $$ = new GlobalArrayDeclaration(@$, $1, *$2, $4); delete $2; }
    |   type T_IDENT '=' constant ';'                           { $$ = new GlobalVarDeclarationInit(@$, $1, *$2, $4); delete $2; }
    |   T_STRUCT T_IDENT '{' struct_list '}' ';'                { $$ = new StructDeclaration(@$, *$2, *$4); delete $2; delete $4; }
    |   T_STRUCT T_IDENT ';'                                    { $$ = new StructPredeclaration(@$, *$2); delete $2; }
    ;

constant:
        T_INT_CONST                                             { $$ = new IntConstantExpression(@$, $1); }
    |   T_STR_CONST                                             { $$ = new StringConstantExpression(@$, *$1); delete $1; }
    ;

param_list:
        non_empty_param_list                                    { $$ = $1; }
    |

与恶龙缠斗过久,自身亦成为恶龙；凝视深渊过久,深渊将回以凝视…

Categories

c++ - flex/bison based compiler: parsing asm[assembly commands] inside C program

c++ - flex/bison based compiler: parsing asm[assembly commands] inside C program

Please log in or register to add a comment.

Please log in or register to reply this article.

1 Reply

Please log in or register to add a comment.

Just Browsing Browsing

Most popular tags