lex/yaccなソレ使ってMinIPS*2アセンブラぽいの

lex/yacc使ってアセンブラぽいの書けそう。とりあえずアセンブリプログラムを入力として、MinIPSマシン語を2進数で吐かせてみた。

% flex -I asm.l
% bison -d -y asm.y
% gcc lex.yy.c y.tab.c -lfl -o minasm
% ./minasm
add $1, $2, $3; add instruction
sub $2, $2, $3; hoge hoge hoge
j 10h
jr $5
00000000010000110000100000100000
00000000010000110001000000100010
00001000000000000000000000010000
00000000101000000000000000001000

lex/yaccなインターフェース、慣れるとわりと楽かも。

/**
* asm.l
**/
%{
#include "y.tab.h"
int hex2int(char *hval)
{
  int ival = 0, i;
  for(;*hval;++hval){
    if(*hval>='0' && *hval<='9'){
      ival = ival*16 + *hval - '0';
    }
    else if(*hval>='A' && *hval<='F'){
      ival = ival*16 + *hval - 'A' + 10;
    }
    else if(*hval>='a' && *hval<='f'){
      ival = ival*16 + *hval - 'a' + 10;
    }
    else{
      return ival;
    }
  }
  return ival;
}
%}

ALPHA [a-zA-Z]
NUM [0-9]
HEX [0-9a-fA-F]
ALNUM [0-9a-zA-Z_]
ID [a-zA-Z_][0-9a-zA-Z_]*
RET [\r\n]|\r\n
SP [ \t]
SPRET [\r\n \t]

%state COMMENT
%%

<INITIAL>{ID} {
  yylval.sval = yytext;
  return(IDENT);
}
<INITIAL>[0-9]+ {
  yylval.ival = atoi(yytext);
  return(INTEGER);
}
<INITIAL>{HEX}+h {
  yylval.ival = hex2int(yytext);
  return(INTEGER);
};
<INITIAL>\: return(COL);
<INITIAL>\$ return(DOL);
<INITIAL>\, return(CAMMA);
<INITIAL>\; {BEGIN(COMMENT);}
<INITIAL>{RET} return(EOL);
<INITIAL>{SP} ;
<INITIAL>\( return(LPAREN);
<INITIAL>\) return(LPAREN);
<INITIAL>. {
  yyerror("Illegal character");
  return(EOL);
}
<COMMENT>{RET} {BEGIN(INITIAL);return(EOL);}
<COMMENT>[^\r\n] ;
%%

こっちはだいたいスムーズに書けたけど、

<COMMENT>{RET} {BEGIN(INITIAL);return(EOL);}

のところでEOL返すの忘れたりした。

/**
* asm.y
**/
%{
#include <stdio.h>
#include <string.h>
#include "optable.h"
#define MAXCHAR 1000
char strbuf[MAXCHAR];

typedef enum Type {RForm, IForm, JForm} Type;
typedef struct Field{
  Type type;
  int op, rs, rt, rd, shamt, fun, imm, target;
} Field;
#define PROGSIZE 60000
Field program[PROGSIZE];
int counter = 0;
int assoc_search(AssocTable *, char *);
int printField(Field *);
%}

%union {
   int ival;
   float fval;
   char *sval;
}

%token <sval> IDENT
%token <ival> INTEGER
%token COL DOL CAMMA LPAREN RPAREN
%token EOL

%type <ival> integer
%type <sval> ident

%%
input :
      | input line
      ;

line : EOL
     | rform EOL
     | iform EOL
     | jform EOL
     ;
rform : ident DOL integer CAMMA DOL integer CAMMA DOL integer{
        Field *field = &program[counter++];
        field->type = RForm;
        field->op = 0;
        field->rs = $6;
        field->rt = $9;
        field->rd = $3;
        field->shamt = 0;
        field->fun = assoc_search(&funtable, $1);
      }
      | ident DOL integer {
        Field *field = &program[counter++];
        field->type = RForm;
        field->op = 0;
        field->rs = $3;
        field->rt = 0;
        field->rd = 0;
        field->shamt = 0;
        field->fun = assoc_search(&funtable, $1);
      }
      ;
iform : ident DOL integer CAMMA DOL integer CAMMA integer{
        Field *field = &program[counter++];
        int fun;
        if((fun = assoc_search(&funtable, $1))!=-1){
          field->type = RForm;
          field->op = 0;
          field->rs = 0;
          field->rt = $6;
          field->rd = $3;
          field->shamt = $8;
          field->fun = fun;
        }
        else{
          field->type = IForm;
          field->op = assoc_search(&optable, $1);
          field->rs = $6;
          field->rt = $3;
          field->imm = $8;
        }
      }
      | ident DOL integer CAMMA integer LPAREN DOL integer RPAREN{
        Field *field = &program[counter++];
        field->type = IForm;
        field->op = assoc_search(&optable, $1);
        field->rs = $8;
        field->rt = $3;
        field->imm = $5;
      }
      | ident DOL integer CAMMA integer{
        Field *field = &program[counter++];
        field->type = IForm;
        field->op = assoc_search(&optable, $1);
        field->rs = 0;
        field->rt = $3;
        field->imm = $5;
      }
      ;
jform : ident integer {
        Field *field = &program[counter++];
        field->type = JForm;
        field->op = assoc_search(&optable, $1);
        field->target = $2;
      }
      ;
ident : IDENT { $$ = strncpy(strbuf, $1, MAXCHAR);}
      ;
integer : INTEGER { $$ = $1;}
        ;

%%

int assoc_search(AssocTable *assoc, char *key)
{
  int val, i;
  for(i=0;assoc->len;++i){
    if(strcmp(key, assoc->data[i].key)==0){
      return assoc->data[i].val;
    }
  }
  return -1;
}
void printBin(unsigned int val, int len)
{
  for(;len>0;--len){
    int x = (val&((1<<len-1))) != 0;
    putchar('0'+x);
  }
}
int printField(Field *field)
{
  switch(field->type){
  default:
  case RForm:
    printBin(field->op, 6);
    printBin(field->rs, 5);
    printBin(field->rt, 5);
    printBin(field->rd, 5);
    printBin(field->shamt, 5);
    printBin(field->fun, 6);
    break;
  case IForm:
    printBin(field->op, 6);
    printBin(field->rs, 5);
    printBin(field->rt, 5);
    printBin(field->imm, 16);
    break;
  case JForm:
    printBin(field->op, 6);
    printBin(field->target, 26);
    break;
  }
}
int yyerror(const char *s)
{
  fprintf(stderr, "%s\n", s);
}
int main()
{
  int pc;
  yyparse();
  for(pc=0;pc<counter;++pc){
    printField(&program[pc]);
    putchar('\n');
  }
}

こっちもyaccファイルの書き方わかればわりと簡単。最初ハッシュテーブル書こうとした。いったい俺は何回ハッシュテーブルを実装すればいいんだろう、と思ったところでそういやこんぐらいなら別に単なる配列でも一瞬だと気付いてやめた。

/**
 * optable.h
 **/
typedef struct KeyVal{
  char *key;
  int val;
} KeyVal;
typedef struct AssocTable{
  int len;
  KeyVal *data;
} AssocTable;
KeyVal fundata[] = {
  {"add", 32},
  {"addu", 33},
  {"sub", 34},
  {"subu", 35},
  {"and", 36},
  {"or", 37},
  {"sllv", 4},
  {"srlv", 6},
  {"srav", 7},
  {"slt", 42},
  {"sltu", 43},
  {"sll", 0},
  {"srl", 2},
  {"sra", 3},
  {"jr", 8},
};
AssocTable funtable = {sizeof(fundata), fundata};
KeyVal opdata[] = {
  {"addi", 8},
  {"addiu", 9},
  {"andi", 12},
  {"ori", 13},
  {"beq", 4},
  {"bne", 5},
  {"slti", 10},
  {"sltiu", 11},
  {"lb", 32},
  {"lw", 35},
  {"sb", 40},
  {"sw", 43},
  {"lui", 15},
  {"j", 2},
  {"jal", 3},
  {"mfe", 16},
  {"mte", 16},
  {"rte", 16},
};
AssocTable optable = {sizeof(opdata), opdata};

なんかまあMinIPSの命令のopだとかfunctだとか。だいたいMIPSに酷似しています。

test