1 /**
2  * Parser for the Ethereum Virtual Machine (EVM) assembler.
3  *
4  * Copyright © 2016, Eric Scrivner
5  *
6  * License: Subject to the terms of the MIT license, as written in the included
7  * LICENSE.txt file.
8  * Authors: Eric Scrivner
9  */
10 module phlogiston.assembler.parser;
11 
12 import std.algorithm;
13 import std.bigint;
14 import std.conv;
15 import std.exception;
16 import std.range;
17 import std.stdio;
18 import std..string;
19 
20 import phlogiston.assembler.scanner;
21 import phlogiston.assembler.token;
22 import phlogiston.evm.opcodes;
23 
24 /// Error raised whenever an issue in parsing is encountered
25 class ParseError : Exception {
26     @safe pure nothrow this(string msg,
27                             string file = __FILE__,
28                             size_t line = __LINE__,
29                             Throwable next = null)
30         {
31             super(msg, file, line, next);
32         }
33 }
34 
35 /// Converts a token stream from a scanner into EVM bytecode.
36 class Parser {
37     /**
38      * Parses the given token stream into a sequence of bytes representing an
39      * EVM bytecode program.
40      *
41      * Throws: ParseError if an error is encountered while parsing.
42      *
43      * Params:
44      *     scanner = The scanner used for tokenizing input.
45      *
46      * Returns: Bytes corresponding to EVM bytecode.
47      */
48     public ubyte[] parse(ref Scanner scanner) {
49         Token currentToken = scanner.nextToken();
50         auto opcodeNameToBytecode = generateOpcodeNameToBytecodeMap();
51 
52         ubyte[] bytecodeProgram;
53 
54         while (!cast(EndOfStream)currentToken) {
55             if (cast(Whitespace)currentToken) {
56                 currentToken = scanner.nextToken();
57                 continue;
58             } else if (cast(StackOpcode)currentToken) {
59                 StackOpcode stackOpcode = cast(StackOpcode)currentToken;
60                 bytecodeProgram ~= opcodeNameToBytecode[stackOpcode.m_opcode];
61             } else if (cast(PushOpcode)currentToken) {
62                 PushOpcode pushOpcode = cast(PushOpcode)currentToken;
63 
64                 // Add opcode to bytecode stream
65                 ubyte pushBytecode = opcodeNameToBytecode[pushOpcode.m_opcode];
66                 bytecodeProgram ~= pushBytecode;
67 
68                 expectToken!Whitespace(scanner);
69                 expectToken!Number(scanner);
70 
71                 size_t expectedNumBytes = (pushBytecode - Opcode.PUSH1 + 1);
72 
73                 // Add argument to bytecode stream
74                 Number argumentToken = cast(Number)scanner.currentToken;
75                 auto pushArgumentBytes = encodeNumberAsBytes(
76                     argumentToken, expectedNumBytes);
77 
78                 if (pushArgumentBytes.length > expectedNumBytes) {
79                     throw new ParseError(
80                         format("Number 0x%x is too big for opcode PUSH%d" ~
81                                " (Line %d, Column %d)",
82                                argumentToken.m_value,
83                                expectedNumBytes,
84                                scanner.lineNumber,
85                                scanner.columnNumber));
86                 }
87 
88                 pushArgumentBytes.each!(
89                     (argByte) => bytecodeProgram ~= argByte);
90             }
91 
92             currentToken = scanner.nextToken();
93         }
94 
95         return bytecodeProgram;
96     }
97 
98     /**
99      * This routine asserts that the next token in the stream has the expected
100      * type.
101      *
102      * Params:
103      *     scanner = The scanner for retrieving the next token.
104      */
105     private void expectToken(T)(ref Scanner scanner) {
106         scanner.nextToken();
107         assert(cast(T)scanner.currentToken);
108     }
109     
110     /**
111      * This routine converts the given number into a big-endian encoded range
112      * of bytes.
113      *
114      * Params:
115      *     number = The number to be encoded.
116      *     expectedNumBytes = The number of bytes that should compose number.
117      *
118      * Returns: byte range containing big-endian encoding of number.
119      */
120     private ubyte[] encodeNumberAsBytes(Number number,
121                                         size_t expectedNumBytes) {
122         ubyte[] result;
123 
124         // Convert to hex, remove underscores, and create range of two
125         // character chunks.
126         auto reverseHexBytes = chunks(
127             number.m_value.toHex().replace("_", ""), 2);
128 
129         foreach (nextByte; reverseHexBytes) {
130             dchar[] byteChars = nextByte.array();
131             result ~= std.conv.parse!ubyte(byteChars, 16);
132         }
133 
134         return result;
135     }
136 }
137 
138 ///
139 unittest {
140     // Simple single opcode
141     auto scanner = new Scanner(cast(ubyte[])"ADD");
142     auto parser = new Parser();
143     auto bytecode = parser.parse(scanner);
144     assert(bytecode == [0x01]);
145 
146     // Multiple opcodes
147     scanner = new Scanner(cast(ubyte[])"PUSH1 0xfa\nPUSH1 0xab");
148     bytecode = parser.parse(scanner);
149     assert(bytecode == [Opcode.PUSH1, 0xfa, Opcode.PUSH1, 0xab]);
150 
151     // Mixed stack and push opcodes
152     scanner = new Scanner(cast(ubyte[])"PUSH1 0xfa\nDUP1\nMUL\nPUSH1 0x60\n");
153     bytecode = parser.parse(scanner);
154     assert(bytecode == [Opcode.PUSH1, 0xfa,
155                         Opcode.DUP1,
156                         Opcode.MUL,
157                         Opcode.PUSH1, 0x60]);
158 
159     // Properly ignores all sorts of whitespace
160     scanner = new Scanner(cast(ubyte[])"  \r\nPUSH1\t0xfa\r\nDUP1\n\r\t    ");
161     bytecode = parser.parse(scanner);
162     assert(bytecode == [Opcode.PUSH1, 0xfa, Opcode.DUP1]);
163 }
164 
165 unittest {
166     // Number is too big for opcode
167     auto scanner = new Scanner(cast(ubyte[])"PUSH2 0xabcdef");
168     auto parser = new Parser();
169     assertThrown!ParseError(parser.parse(scanner));
170 }