1 /**
2  * Disassembler for convert Ethereum Virtual Machine (EVM) bytecode to EVM
3  * assembly language.
4  *
5  * Copyright © 2016, Eric Scrivner
6  *
7  * License: Subject to the terms of the MIT license, as written in the included
8  * LICENSE.txt file.
9  * Authors: Eric Scrivner
10  */
11 module phlogiston.disassembler.disassembler;
12 
13 import std.algorithm;
14 import std.bigint;
15 import std.conv;
16 import std.range;
17 import std..string;
18 
19 import phlogiston.evm.opcodes;
20 
21 /// Converts a stream of bytes into EVM assembly code.
22 class Disassembler {
23     /**
24      * This routine converts the given EVM bytecode into a string containing
25      * EVM assembly language code.
26      *
27      * Params:
28      *     vmBytes = The string containing EVM bytecode.
29      *
30      * Returns: An array EVM assembly language code opcodes.
31      */
32     public string[] disassemble(string vmBytes) {
33         ubyte[] bytecode = hexStringToByteArray(vmBytes.strip());
34 
35         string[] results;
36         auto nameForOpcode = generateBytecodeToOpcodeNameMap();
37 
38         for (size_t pc = 0; pc < bytecode.length; pc++) {
39             auto opcode = bytecode[pc];
40 
41             switch(opcode) {
42             case Opcode.PUSH1:
43             case Opcode.PUSH2:
44             case Opcode.PUSH3:
45             case Opcode.PUSH4:
46             case Opcode.PUSH5:
47             case Opcode.PUSH6:
48             case Opcode.PUSH7:
49             case Opcode.PUSH8:
50             case Opcode.PUSH9:
51             case Opcode.PUSH10:
52             case Opcode.PUSH11:
53             case Opcode.PUSH12:
54             case Opcode.PUSH13:
55             case Opcode.PUSH14:
56             case Opcode.PUSH15:
57             case Opcode.PUSH16:
58             case Opcode.PUSH17:
59             case Opcode.PUSH18:
60             case Opcode.PUSH19:
61             case Opcode.PUSH20:
62             case Opcode.PUSH21:
63             case Opcode.PUSH22:
64             case Opcode.PUSH23:
65             case Opcode.PUSH24:
66             case Opcode.PUSH25:
67             case Opcode.PUSH26:
68             case Opcode.PUSH27:
69             case Opcode.PUSH28:
70             case Opcode.PUSH29:
71             case Opcode.PUSH30:
72             case Opcode.PUSH31:
73             case Opcode.PUSH32:
74                 size_t numBytes = getPushOpcodeBytes(opcode);
75                 BigInt number = BigInt(0);
76 
77                 // Iterate through the bytes for the argument and consolidate them into
78                 // a single integer value.
79                 for (ubyte i = 1; i <= numBytes; i++) {
80                     number <<= 8;
81                     if (pc + i < bytecode.length) {
82                         number |= bytecode[pc + i];
83                     }
84                 }
85 
86                 results ~= format("%s 0x%02x", nameForOpcode[opcode], number);
87                 // Ensure that we skip past the bytes for the argument
88                 pc += numBytes;
89                 break;
90             default:
91                 if (opcode in nameForOpcode) {
92                     results ~= nameForOpcode[opcode];
93                 }
94                 break;
95             }
96         }
97 
98         return results;
99     }
100 
101     /**
102      * Returns: The number of bytes in the argument for a push opcode.
103      */
104     private size_t getPushOpcodeBytes(const uint opcode) {
105         // Get the number of bytes for the argument by doing some math on the
106         // opcode. Produces a number in range [1, 32].
107         return (opcode - Opcode.PUSH1 + 1);
108     }
109 
110     /**
111      * Converts a long hexadecimal string into an array of bytes.
112      *
113      * Params:
114      *      hexString = A string containing hexadecimal bytes.
115      *
116      * Returns: An array of bytes in the same order they appear in the string.
117      */
118     private ubyte[] hexStringToByteArray(string hexString) {
119         ubyte[] results;
120 
121         foreach (nextByte; chunks(hexString, 2)) {
122             results ~= parse!ubyte(nextByte, 16);
123         }
124 
125         return results;
126     }
127 }