1 /** 2 * Parser for the Ethereum Virtual Machine (EVM) assembler. 3 * 4 * Copyright © 2016, Eric Scrivner 5 * 6 * License: Subject to the terms of the MIT license, as written in the included 7 * LICENSE.txt file. 8 * Authors: Eric Scrivner 9 */ 10 module phlogiston.assembler.parser; 11 12 import std.algorithm; 13 import std.bigint; 14 import std.conv; 15 import std.exception; 16 import std.range; 17 import std.stdio; 18 import std..string; 19 20 import phlogiston.assembler.scanner; 21 import phlogiston.assembler.token; 22 import phlogiston.evm.opcodes; 23 24 /// Error raised whenever an issue in parsing is encountered 25 class ParseError : Exception { 26 @safe pure nothrow this(string msg, 27 string file = __FILE__, 28 size_t line = __LINE__, 29 Throwable next = null) 30 { 31 super(msg, file, line, next); 32 } 33 } 34 35 /// Converts a token stream from a scanner into EVM bytecode. 36 class Parser { 37 /** 38 * Parses the given token stream into a sequence of bytes representing an 39 * EVM bytecode program. 40 * 41 * Throws: ParseError if an error is encountered while parsing. 42 * 43 * Params: 44 * scanner = The scanner used for tokenizing input. 45 * 46 * Returns: Bytes corresponding to EVM bytecode. 47 */ 48 public ubyte[] parse(ref Scanner scanner) { 49 Token currentToken = scanner.nextToken(); 50 auto opcodeNameToBytecode = generateOpcodeNameToBytecodeMap(); 51 52 ubyte[] bytecodeProgram; 53 54 while (!cast(EndOfStream)currentToken) { 55 if (cast(Whitespace)currentToken) { 56 currentToken = scanner.nextToken(); 57 continue; 58 } else if (cast(StackOpcode)currentToken) { 59 StackOpcode stackOpcode = cast(StackOpcode)currentToken; 60 bytecodeProgram ~= opcodeNameToBytecode[stackOpcode.m_opcode]; 61 } else if (cast(PushOpcode)currentToken) { 62 PushOpcode pushOpcode = cast(PushOpcode)currentToken; 63 64 // Add opcode to bytecode stream 65 ubyte pushBytecode = opcodeNameToBytecode[pushOpcode.m_opcode]; 66 bytecodeProgram ~= pushBytecode; 67 68 expectToken!Whitespace(scanner); 69 expectToken!Number(scanner); 70 71 size_t expectedNumBytes = (pushBytecode - Opcode.PUSH1 + 1); 72 73 // Add argument to bytecode stream 74 Number argumentToken = cast(Number)scanner.currentToken; 75 auto pushArgumentBytes = encodeNumberAsBytes( 76 argumentToken, expectedNumBytes); 77 78 if (pushArgumentBytes.length > expectedNumBytes) { 79 throw new ParseError( 80 format("Number 0x%x is too big for opcode PUSH%d" ~ 81 " (Line %d, Column %d)", 82 argumentToken.m_value, 83 expectedNumBytes, 84 scanner.lineNumber, 85 scanner.columnNumber)); 86 } 87 88 pushArgumentBytes.each!( 89 (argByte) => bytecodeProgram ~= argByte); 90 } 91 92 currentToken = scanner.nextToken(); 93 } 94 95 return bytecodeProgram; 96 } 97 98 /** 99 * This routine asserts that the next token in the stream has the expected 100 * type. 101 * 102 * Params: 103 * scanner = The scanner for retrieving the next token. 104 */ 105 private void expectToken(T)(ref Scanner scanner) { 106 scanner.nextToken(); 107 assert(cast(T)scanner.currentToken); 108 } 109 110 /** 111 * This routine converts the given number into a big-endian encoded range 112 * of bytes. 113 * 114 * Params: 115 * number = The number to be encoded. 116 * expectedNumBytes = The number of bytes that should compose number. 117 * 118 * Returns: byte range containing big-endian encoding of number. 119 */ 120 private ubyte[] encodeNumberAsBytes(Number number, 121 size_t expectedNumBytes) { 122 ubyte[] result; 123 124 // Convert to hex, remove underscores, and create range of two 125 // character chunks. 126 auto reverseHexBytes = chunks( 127 number.m_value.toHex().replace("_", ""), 2); 128 129 foreach (nextByte; reverseHexBytes) { 130 dchar[] byteChars = nextByte.array(); 131 result ~= std.conv.parse!ubyte(byteChars, 16); 132 } 133 134 return result; 135 } 136 } 137 138 /// 139 unittest { 140 // Simple single opcode 141 auto scanner = new Scanner(cast(ubyte[])"ADD"); 142 auto parser = new Parser(); 143 auto bytecode = parser.parse(scanner); 144 assert(bytecode == [0x01]); 145 146 // Multiple opcodes 147 scanner = new Scanner(cast(ubyte[])"PUSH1 0xfa\nPUSH1 0xab"); 148 bytecode = parser.parse(scanner); 149 assert(bytecode == [Opcode.PUSH1, 0xfa, Opcode.PUSH1, 0xab]); 150 151 // Mixed stack and push opcodes 152 scanner = new Scanner(cast(ubyte[])"PUSH1 0xfa\nDUP1\nMUL\nPUSH1 0x60\n"); 153 bytecode = parser.parse(scanner); 154 assert(bytecode == [Opcode.PUSH1, 0xfa, 155 Opcode.DUP1, 156 Opcode.MUL, 157 Opcode.PUSH1, 0x60]); 158 159 // Properly ignores all sorts of whitespace 160 scanner = new Scanner(cast(ubyte[])" \r\nPUSH1\t0xfa\r\nDUP1\n\r\t "); 161 bytecode = parser.parse(scanner); 162 assert(bytecode == [Opcode.PUSH1, 0xfa, Opcode.DUP1]); 163 } 164 165 unittest { 166 // Number is too big for opcode 167 auto scanner = new Scanner(cast(ubyte[])"PUSH2 0xabcdef"); 168 auto parser = new Parser(); 169 assertThrown!ParseError(parser.parse(scanner)); 170 }