みにかできた
正式名称「MinIPSアセンブラもどき以前の何か」。こういう名前つけるの好きな自分再確認。ついでにpre alpha 1とかいうバージョン情報付けといたら、いいかげんにしろって感じですね。
まあ実際その程度のことしかせんけど←こういう言い訳良くないね。
一応ディスアセンブルもできるようになったので一段落。次の実験までに作っときたかったからできて良かった。「OCamlとかもしくはC++で書きてえかも」との思いをぐっと押さえて、早くできそうな言語でやったのが良かったかな。Schemeあたりは悪くなかったかもしれんけど、実験に使うWindowsPCで動かすの面倒だし。
http://konbu.s13.xrea.com/lib/minika.exe
使い方は
minika.exe (asm|dsm) [input [output] ]
asmだとアセンブラ、dsmだとディスアセンブラっぽい動作。入力ファイルと出力ファイルは省略できます。その時はそれぞれ標準入力と標準出力に。
なんかアセンブルしてみる。
C:\hoge>cat hoge.s lui $10, 65535 ori $1, $0, 1 ori $2, $0, 2 ori $3, $0, 3 sll $4, $1, 1 srl $4, $4, 1 beq $1, $4, 32 nop srl $4, $2, 1 sll $4, $4, 1 beq $2, $4, 31 C:\hoge>minika asm hoge.s hoge.hex C:\hoge>cat hoge.hex 3C0AFFFF 34010001 34020002 34030003 00012040 00042042 10810020 00000000 00022042 00042040 1082001F
なんか妙な32ビットデータの16進表記が吐かれてますね。
ちょうどいいのでディスアセンブルしてみます。
C:\hoge>minika dsm hoge.hex lui $10, 65535 ori $1, $0, 1 ori $2, $0, 2 ori $3, $0, 3 sll $4, $1, 1 srl $4, $4, 1 beq $1, $4, 32 nop srl $4, $2, 1 sll $4, $4, 1 beq $2, $4, 31
もとに戻せました。やったー。
なんで吐くのが文字列かと言うと、16進文字列のデータを大学のMinIPSプロセッサ実験で使うからです。TAの人が「こーやってこーやって命令表からこーしてあーして、16進文字列を計算して、プログラム書けるよー」と教えてくれたのですが、俺はそんなの手計算したくないです。
以下コード。やっぱりけっこうひどいコードだと思う。あきらかに設計とか何も考えずに書いてます。ちなみに完全DMD2.007より前ではコンパイルできないコードになりました。クロージャはよいですね。クロージャさえあればご飯三杯はいける。
import std.stdio; import std.stream; import std.cstream; import std.string; import std.ctype; import std.regexp; import std.contracts; import std.c.stdio; import std.format; string radixalphabets(uint radix) { static invariant char[] fullalphabets = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; return fullalphabets[0..radix]; } bool isalphabet(char c, string alphabets) { foreach(char alph; alphabets) { if(c==alph) return true; } return false; } string remsp(string str) { char[] rem; foreach(char c; str){ if(!isspace(c)) rem ~= c; } return assumeUnique(rem); } string bin2hex(string line) { char[] hex; string binalph = radixalphabets(2); string hexalph = radixalphabets(16); string bin_s = remsp(line); for(int i=0;i+3<bin_s.length;i+=4){ string byte_s = bin_s[i..i+4]; byte h = 0; foreach(char c; byte_s){ if(!isalphabet(c, binalph)){ return ""; } h = h*2 + (c-'0'); } hex ~= hexalph[h]; } return assumeUnique(hex); } string hex2bin(string line) { char[] bin; string hex = line.toupper(); string hexalph = radixalphabets(16); foreach(char c; hex){ if(!isalphabet(c, hexalph)) return ""; byte h = c>='A'? c-'A'+10 : c-'0'; for(int i=8;i>0;i/=2){ byte b = h / i; h -= b * i; bin ~= b==0 ? '0' : '1'; } } return assumeUnique(bin); } string to_s(string num, int radix, int len) { return to_s(atoi(num), radix, len); } string to_s(int num, int radix, int len) { char[] tostr = new char[len]; string base = toString(cast(long)num, cast(uint)radix); if(base.length==len) return base; for(int i=0;i<len;++i){ tostr[i] = i+base.length<len ? '0' : base[i+base.length-len]; } return assumeUnique(tostr); } enum SrcType{ CONST, MATCH } union FieldSrc{ string constant; int match; } class Field{ SrcType stype; FieldSrc src; int len; this(){} public static Field matchField(int m, int len) { Field field = new Field; field.stype = SrcType.MATCH; field.src.match = m; field.len = len; return field; } public static Field constField(int c, int len) { Field field = new Field; field.stype = SrcType.CONST; field.src.constant = to_s(c, 2, len); field.len = len; return field; } } string delegate(string)[string] asmtable; void initasm() { RegExp RPat = new RegExp("^\\s*(\\S+)\\s+\\$(\\d+)\\s*,\\s*\\$(\\d+)\\s*,\\s*\\$(\\d+)"); RegExp IPat = new RegExp("^\\s*(\\S+)\\s+\\$(\\d+)\\s*,\\s*\\$(\\d+)\\s*,\\s*(\\d+)"); RegExp MPat = new RegExp("^\\s*(\\S+)\\s+\\$(\\d+)\\s*,\\s*(\\d+)(\\(\\$(\\d+)\\))?"); RegExp EPat = new RegExp("^\\s*(\\S+)\\s+\\$(.+)\\s*,\\s*\\$(.+)"); RegExp JPat = new RegExp("^\\s*(\\S+)\\s+(\\d+)\\s*"); RegExp JRPat = new RegExp("^\\s*(\\S+)\\s+\\$(\\d+)"); RegExp DPat = new RegExp("."); string mkfield(int id, string[] mat, Field field) { switch(field.stype){ case SrcType.CONST: return field.src.constant; case SrcType.MATCH: if(field.src.match==1){ return to_s(id, 2, field.len); } else{ return to_s(mat[field.src.match], 2, field.len); } } return "".idup; } string delegate(string) mkinst(int id, RegExp pat, Field[] def) { return (string line){ if(string[] mat = pat.match(line)){ char[] instruction; foreach(Field field; def){ instruction ~= mkfield(id, mat, field); } return bin2hex(assumeUnique(instruction)); } return "".idup; }; } Field c0_5 = Field.constField(0, 5); Field c0_6 = Field.constField(0, 6); Field mat1_6 = Field.matchField(1, 6); Field mat2_5 = Field.matchField(2, 5); Field mat3_5 = Field.matchField(3, 5); Field mat4_5 = Field.matchField(4, 5); Field[] fundef = [c0_6, mat3_5, mat4_5, mat2_5, c0_5, mat1_6]; asmtable["add"] = mkinst(32, RPat, fundef); asmtable["addu"] = mkinst(33, RPat, fundef); asmtable["sub"] = mkinst(34, RPat, fundef); asmtable["subu"] = mkinst(35, RPat, fundef); asmtable["and"] = mkinst(36, RPat, fundef); asmtable["or"] = mkinst(37, RPat, fundef); asmtable["sllv"] = mkinst(4, RPat, fundef); asmtable["srlv"] = mkinst(6, RPat, fundef); asmtable["srav"] = mkinst(7, RPat, fundef); asmtable["slt"] = mkinst(42, RPat, fundef); asmtable["sltu"] = mkinst(43, RPat, fundef); Field[] shadef = [c0_6, c0_5, mat3_5, mat2_5, mat4_5, mat1_6]; asmtable["sll"] = mkinst(0, IPat, shadef); asmtable["srl"] = mkinst(2, IPat, shadef); asmtable["sra"] = mkinst(3, IPat, shadef); Field mat4_16 = Field.matchField(4, 16); Field[] immdef = [mat1_6, mat3_5, mat2_5, mat4_16]; asmtable["addi"] = mkinst(8, IPat, immdef); asmtable["addiu"] = mkinst(9, IPat, immdef); asmtable["andi"] = mkinst(12, IPat, immdef); asmtable["ori"] = mkinst(13, IPat, immdef); asmtable["beq"] = mkinst(4, IPat, immdef); asmtable["bne"] = mkinst(5, IPat, immdef); asmtable["slti"] = mkinst(10, IPat, immdef); asmtable["sltiu"] = mkinst(11, IPat, immdef); Field mat3_16 = Field.matchField(3, 16); Field mat5_5 = Field.matchField(5, 5); Field[] memdef = [mat1_6, mat5_5, mat2_5, mat3_16]; asmtable["lb"] = mkinst(32, MPat, memdef); asmtable["lw"] = mkinst(35, MPat, memdef); asmtable["sb"] = mkinst(40, MPat, memdef); asmtable["sw"] = mkinst(43, MPat, memdef); Field[] luidef = [mat1_6, c0_5, mat2_5, mat3_16]; asmtable["lui"] = mkinst(15, MPat, luidef); Field mat2_26 = Field.matchField(2, 26); Field[] jdef = [mat1_6, mat2_26]; asmtable["j"] = mkinst(2, JPat, jdef); asmtable["jal"] = mkinst(3, JPat, jdef); Field[] jrdef = [c0_6, mat2_5, c0_5, c0_5, c0_5, mat1_6]; asmtable["jr"] = mkinst(8, JRPat, jrdef); Field c14_5 = Field.constField(14, 5); Field c0_11 = Field.constField(0, 11); Field[] mfedef = [mat1_6, c0_5, c14_5, mat2_5, c0_11]; asmtable["mfe"] = mkinst(16, EPat, mfedef); Field c4_5 = Field.constField(4, 5); Field c12_5 = Field.constField(12, 5); Field[] mtedef = [mat1_6, c4_5, mat3_5, c12_5, c0_11]; asmtable["mte"] = mkinst(16, EPat, mtedef); Field c1_1 = Field.constField(1, 1); Field c0_19 = Field.constField(0, 19); Field c32_6 = Field.constField(32, 6); Field[] rfedef = [mat1_6, c1_1, c0_19, c32_6]; asmtable["rfe"] = mkinst(16, DPat, rfedef); Field[] nopdef = [Field.constField(0, 32)]; asmtable["nop"] = mkinst(0, DPat, nopdef); } string asmble(string line) { RegExp opPat = new RegExp("^\\s*(\\S+)\\s*"); if(opPat.match(line)){ string op = opPat.match(1); if(op in asmtable){ return asmtable[op](line); } } return ""; } uint bin2int(string bstr) { uint val = 0; foreach(char b; bstr){ val = val*2 + b - '0'; } return val; } string reg_tos(int reg) { return "$"~toString(cast(long)reg, cast(uint)10); } string imm_tos(int imm) { return toString(cast(long)imm, cast(uint)10); } string delegate(string)[int] disasmtable; void initdisasm() { string RFormat = "%s $%d, $%d, $%d"; string[int] funtable; string[int] shtable; string[int] excetable; string code_op(string instruction){return instruction[0..6];} string code_rs(string instruction){return instruction[6..11];} string code_rt(string instruction){return instruction[11..16];} string code_rd(string instruction){return instruction[16..21];} string code_sh(string instruction){return instruction[21..26];} string code_fun(string instruction){return instruction[26..32];} string code_imm(string instruction){return instruction[16..32];} string code_target(string instruction){return instruction[6..32];} string str_Roperands(int rd, int rs, int rt) { return reg_tos(rd)~", "~reg_tos(rs)~", "~reg_tos(rt); } string str_Ioperands(int rt, int rs, int imm) { return reg_tos(rt)~", "~reg_tos(rs)~", "~imm_tos(imm); } string str_Moperands(int rt, int rs, int imm) { return reg_tos(rt)~", "~imm_tos(imm)~"("~reg_tos(rs)~")"; } disasmtable[0] = (string instruction){ int rs = bin2int(code_rs(instruction)); int rt = bin2int(code_rt(instruction)); int rd = bin2int(code_rd(instruction)); int sh = bin2int(code_sh(instruction)); int fun = bin2int(code_fun(instruction)); if(fun in shtable){ if(sh==0) return "nop".idup; return shtable[fun]~" "~str_Ioperands(rd, rt, sh); } else if(fun in funtable){ return funtable[fun]~" "~str_Roperands(rd, rs, rt); } else if(fun == 8){ return "jr "~reg_tos(rs); } return "".idup; }; string delegate(string) mkasm_imm(string str_op){ return (string instruction){ int rs = bin2int(code_rs(instruction)); int rt = bin2int(code_rt(instruction)); int imm = bin2int(code_imm(instruction)); return str_op~" "~str_Ioperands(rt, rs, imm); }; } disasmtable[8] = mkasm_imm("addi"); disasmtable[9] = mkasm_imm("addiu"); disasmtable[12] = mkasm_imm("andi"); disasmtable[13] = mkasm_imm("ori"); disasmtable[4] = mkasm_imm("beq"); disasmtable[5] = mkasm_imm("bne"); disasmtable[10] = mkasm_imm("slti"); disasmtable[11] = mkasm_imm("sltiu"); string delegate(string) mkasm_mem(string str_op){ return (string instruction){ int rs = bin2int(code_rs(instruction)); int rt = bin2int(code_rt(instruction)); int imm = bin2int(code_imm(instruction)); return str_op~" "~str_Moperands(rt, rs, imm); }; } disasmtable[32] = mkasm_mem("lb"); disasmtable[35] = mkasm_mem("lw"); disasmtable[40] = mkasm_mem("sb"); disasmtable[43] = mkasm_mem("sw"); disasmtable[15] = (string instruction){ int rt = bin2int(code_rt(instruction)); int imm = bin2int(code_imm(instruction)); return "lui "~reg_tos(rt)~", "~imm_tos(imm); }; string delegate(string) mkasm_j(string str_op){ return (string instruction){ int target = bin2int(code_target(instruction)); return str_op~" "~imm_tos(target); }; } disasmtable[2] = mkasm_j("j"); disasmtable[3] = mkasm_j("jal"); disasmtable[16] = (string instruction){ if(instruction[6] == '1'){ return "rfe".idup; } int rs = bin2int(code_rs(instruction)); int rt = bin2int(code_rt(instruction)); int rd = bin2int(code_rd(instruction)); switch(rs){ case 0: return "mfe "~reg_tos(rd)~", $epc"; case 4: return "mte $status, "~reg_tos(rt); } return "".idup; }; funtable[32] = "add"; funtable[33] = "addu"; funtable[34] = "sub"; funtable[35] = "subu"; funtable[36] = "and"; funtable[37] = "or"; funtable[4] = "sllv"; funtable[6] = "srlv"; funtable[7] = "srav"; funtable[42] = "slt"; funtable[43] = "sltu"; shtable[0] = "sll"; shtable[2] = "srl"; shtable[3] = "sra"; } string disasmble(string line) { RegExp hexPat = new RegExp("[0-9A-F]{8}"); if(hexPat.match(toupper(line))){ string instruction = hex2bin(hexPat.match(0)); int id = bin2int(instruction[0..6]); if(id in disasmtable){ return disasmtable[id](instruction); } } return "".idup; } int main(string[] args) { Stream input, output; if(args.length==1){ //dout.writeLine(args[0]~" (asm|dsm|b2h|h2b) [input [output]]"); dout.writeLine(args[0]~" (asm|dsm) [input [output]]"); return 0; } string function(string)[string] convs; //convs["b2h"] = &bin2hex; //convs["h2b"] = &hex2bin; convs["asm"] = &asmble; convs["dsm"] = &disasmble; initasm(); initdisasm(); if(args.length==2){ input = din; output = dout; } else if(args.length==3){ input = new BufferedFile(args[2]); output = dout; } else{ input = new BufferedFile(args[2]); output = new BufferedFile(args[3], FileMode.OutNew); } if(!(args[1] in convs)){ dout.writeLine("unknown mode"); return 1; } string function(string) conv = convs[args[1]]; foreach(char[] line; input){ output.writeLine(conv(assumeUnique(line))); } output.flush(); output.close(); return 0; }
テストとか全然してないですね。そんなもんですね。一応目的を達成してしまった。
今後阿部研究室(たぶん今MinIPSのほげほげを管理してるとこ)いったりすることあれば、たぶんC言語で書きなおしたりするかもなー、とか思う。