-
Notifications
You must be signed in to change notification settings - Fork 0
/
bfc.c
287 lines (273 loc) · 8.06 KB
/
bfc.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
#include <stdio.h>
// the number of registers to provide
const int REG_COUNT = 8192;
// this is provides unique values for the names of LLVM SSA variables
static int rv = 1;
// this contains a stack of loop numbers...
static int loopStack[4096];
// and an index to that stack
static int loopStackIndex = 0;
// emit file the header
void emit_header ( FILE* fp )
{
// first, define @op_out which corresponds to output
fprintf(fp, "define internal void @op_out(i64 %%val) nounwind\n");
fprintf(fp, "{\n");
fprintf(fp, "entry:\n");
fprintf(fp, "\t%%conv = trunc i64 %%val to i32\n");
fprintf(fp, "\t%%call = tail call i32 @putchar ( i32 %%conv ) nounwind\n");
fprintf(fp, "\tret void\n");
fprintf(fp, "}\n\n");
fprintf(fp, "declare i32 @putchar(i32) nounwind\n\n");
// next, define @op_in which corresponds to input
fprintf(fp, "define internal i64 @op_in() nounwind\n");
fprintf(fp, "{\n");
fprintf(fp, "entry:\n");
fprintf(fp, "\t%%call = tail call i32 @getchar() nounwind\n");
fprintf(fp, "\t%%conv = sext i32 %%call to i64\n");
fprintf(fp, "\tret i64 %%conv\n");
fprintf(fp, "}\n\n");
fprintf(fp, "declare i32 @getchar() nounwind\n\n");
// now, define the actual program body
fprintf(fp, "define void @program() nounwind\n");
fprintf(fp, "{\n");
fprintf(fp, "entry:\n");
// allocate the index stack slot...
fprintf(fp, "\t%%index = alloca i64, align 8\n");
// and stack space for the registers
fprintf(fp, "\t%%registers = alloca [%d x i64], align 8\n", REG_COUNT);
// set the initial index to 0
fprintf(fp, "\tstore i64 0, i64* %%index\n");
// get a pointer to the first element of the registers
fprintf(fp, "\t%%regroot = getelementptr [%d x i64]* %%registers, i64 0, i64 0\n", REG_COUNT);
// clear all the registers
fprintf(fp, "\t%%ptrconv = bitcast i64* %%regroot to i8*\n");
fprintf(fp, "\tcall void @llvm.memset.i64(i8* %%ptrconv, i8 0, i64 %d, i32 8)\n", REG_COUNT * 8);
}
// emit the footer
void emit_footer ( FILE* fp )
{
// return a value (void!)
fprintf(fp, "\tret void\n");
fprintf(fp, "}\n\n");
// declaration for the memset intrinsic
fprintf(fp, "declare void @llvm.memset.i64(i8*, i8, i64, i32)\n");
// actual C entry point, whcih just calls program
fprintf(fp, "define i32 @main() nounwind\n");
fprintf(fp, "{\n");
fprintf(fp, "entry:\n");
fprintf(fp, "\tcall void @program() nounwind\n");
fprintf(fp, "ret i32 0\n");
fprintf(fp, "}\n\n");
}
// an add is any arithmetic, so a sequence of +s and -s
void emit_add ( FILE* fp, long amount )
{
int opID = rv++;
if (amount == 0) // if we're not actually modifying, short-circuit
return;
// load the index...
fprintf(fp, "\t%%idx%d = load i64* %%index\n", opID);
// load the actual register...
fprintf(fp, "\t%%ptr%d = getelementptr i64* %%regroot, i64 %%idx%d\n", opID, opID);
fprintf(fp, "\t%%tmp%d = load i64* %%ptr%d\n", opID, opID);
// perform the arithmetic...
fprintf(fp, "\t%%add%d = add i64 %%tmp%d, %ld\n", opID, opID, amount);
// and store.
fprintf(fp, "\tstore i64 %%add%d, i64* %%ptr%d\n", opID, opID);
}
// a move moves the register index, so a sequence of >s and <s
void emit_move ( FILE* fp, long amount )
{
int opID = rv++;
if (amount == 0)
return; // short-circuit
// load the index
fprintf(fp, "\t%%idx%d = load i64* %%index\n", opID);
// adjust it
fprintf(fp, "\t%%add%d = add i64 %%idx%d, %ld\n", opID, opID, amount);
// store the new index
fprintf(fp, "\tstore i64 %%add%d, i64* %%index\n", opID);
}
// emits an output operation
void emit_out ( FILE* fp )
{
int opID = rv++;
// load the index
fprintf(fp, "\t%%idx%d = load i64* %%index\n", opID);
// load the register
fprintf(fp, "\t%%ptr%d = getelementptr i64* %%regroot, i64 %%idx%d\n", opID, opID);
fprintf(fp, "\t%%tmp%d = load i64* %%ptr%d\n", opID, opID);
// pass it to @op_out
fprintf(fp, "\tcall void @op_out(i64 %%tmp%d) nounwind\n", opID);
}
// emits an input instruction
void emit_in ( FILE* fp )
{
int opID = rv++;
// get the input
fprintf(fp, "\t%%tmp%d = call i64 @op_in() nounwind\n", opID);
// load the index
fprintf(fp, "\t%%idx%d = load i64* %%index\n", opID);
// store the new value
fprintf(fp, "\t%%ptr%d = getelementptr i64* %%regroot, i64 %%idx%d\n", opID, opID);
fprintf(fp, "\tstore i64 %%tmp%d, i64* %%ptr%d\n", opID, opID);
}
// this emits a loop header and the beginning of the body, corresponding to a [
void emit_loop_open ( FILE* fp )
{
// push this entry onto the stack
int opID = rv++;
int stackIndex = loopStackIndex++;
loopStack[stackIndex] = opID;
// begin the loop header
fprintf(fp, "\tbr label %%loopHeader%d\n", opID);
fprintf(fp, "loopHeader%d:\n", opID);
// the loop header checks the exit condition, so load the index...
fprintf(fp, "\t%%idx%d = load i64* %%index\n", opID);
// load the register...
fprintf(fp, "\t%%ptr%d = getelementptr i64* %%regroot, i64 %%idx%d\n", opID, opID);
fprintf(fp, "\t%%tmp%d = load i64* %%ptr%d\n", opID, opID);
// compare to zero...
fprintf(fp, "\t%%cmp%d = icmp eq i64 %%tmp%d, 0\n", opID, opID);
// if zero, jump to the corresponding ] which is the end of the loop
// otherwise, run through the loop body
fprintf(fp, "\tbr i1 %%cmp%d, label %%loopExit%d, label %%loopBody%d\n", opID, opID, opID);
// beginning of loop body
fprintf(fp, "loopBody%d:\n", opID);
}
// this emits the loop exit, which corresponds to ]
void emit_loop_close ( FILE* fp )
{
// get the top loop from the stack
int stackIndex = --loopStackIndex;
int loopID = loopStack[stackIndex];
// jump to the header, which will jump to the loopExit if the exit condition is met
fprintf(fp, "\tbr label %%loopHeader%d\n", loopID);
fprintf(fp, "loopExit%d:\n", loopID);
// continue
}
// this type contains a reader state
typedef enum
{
BFP_STATE_ARITHMETIC,
BFP_STATE_POINTER,
BFP_STATE_NONE,
} BFP_State;
// this generic function does operation emission, all fairly obvious
static void emit ( FILE* out, BFP_State* state, char lastChar, long amount )
{
switch (*state)
{
case BFP_STATE_ARITHMETIC:
emit_add(out, amount);
*state = BFP_STATE_NONE;
break;
case BFP_STATE_POINTER:
emit_move(out, amount);
*state = BFP_STATE_NONE;
break;
case BFP_STATE_NONE:
switch (lastChar)
{
case '.':
emit_out(out);
break;
case ',':
emit_in(out);
break;
case '[':
emit_loop_open(out);
break;
case ']':
emit_loop_close(out);
break;
}
break;
}
}
// the real meat of the compiler - read in as brainfuck, compile to out as LLVM IR
void bfp ( FILE* in, FILE* out )
{
long amount = 0;
BFP_State state = BFP_STATE_NONE;
// emit the header
emit_header(out);
// loop through everything
while (!feof(in))
{
// basically, a finite state machine
char ch;
fread(&ch, 1, 1, in);
if (state == BFP_STATE_ARITHMETIC) // we're doing arithmetic at the moment! go us
{
// if it's a + or -, adjust the working value
if (ch == '+')
amount++;
else if (ch == '-')
amount--;
else
{
// it's something else! emit the instruction, push it back to be read next loop
emit(out, &state, ch, amount);
ungetc(ch, in);
amount = 0;
}
}
else if (state == BFP_STATE_POINTER) // same as above but for pointers
{
if (ch == '>')
amount++;
else if (ch == '<')
amount--;
else
{
emit(out, &state, ch, amount);
ungetc(ch, in);
amount = 0;
}
}
else // just emit an operation or enter the appropriate state
{
if (ch == '+')
{
state = BFP_STATE_ARITHMETIC;
amount = 1;
}
else if (ch == '-')
{
state = BFP_STATE_ARITHMETIC;
amount = -1;
}
else if (ch == '>')
{
state = BFP_STATE_POINTER;
amount = 1;
}
else if (ch == '<')
{
state = BFP_STATE_POINTER;
amount = -1;
}
else
{
emit(out, &state, ch, 0);
}
}
}
// emit any trailing +s or -s (maybe this should be removed, as it won't contribute to the output?)
emit(out, &state, 0, amount);
emit_footer(out);
}
int main ()
{
// open up llvm-as | opt -std-compile-opts | lli as a pipe
// so that we pipe everything to the LLVM toolchain
FILE* asOut;
asOut = popen("llvm-as | opt -std-compile-opts | lli", "w");
// run on stdin
bfp(stdin, asOut);
// tidy up
pclose(asOut);
return 0;
}