Jiajun Yao

Stay hungry, Stay foolish.

JIT in Action

JIT is a way of executing computer code that involves compilation during execution of a program at runtime. This post shows how to execute Brainfuck programs using JIT in various ways.

  1. Interpreter
  2. Transpilation
  3. LLVM
  4. DynASM

Interpreter

Before showing how to use JIT to run a Brainfuck program, lets see how it can be run by an interpreter.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#include <cstdio>
#include <cstdlib>
#include <string>
#include <vector>
#include <fstream>
#include <streambuf>

class BrainfuckInterpreter {
  public:
    BrainfuckInterpreter(const std::string& program)
      : program(program) {}

    void run();

  private:
    const std::string program;
};

void BrainfuckInterpreter::run() {
  char data[30000] = {0};
  char* ptr = data;
  std::vector<size_t> stack;
  size_t skips = 0;

  for (size_t i = 0; i < program.size(); ++i) {
    switch(program[i]) {
      case '>':
        if (!skips) {
          ++ptr;
        }
        break;
      case '<':
        if (!skips) {
          --ptr;
        }
        break;
      case '+':
        if (!skips) {
          ++*ptr;
        }
        break;
      case '-':
        if (!skips) {
          --*ptr;
        }
        break;
      case '.':
        if (!skips) {
          putchar(static_cast<int>(*ptr));
        }
        break;
      case ',':
        if (!skips) {
          *ptr = static_cast<char>(getchar());
        }
        break;
      case '[':
        stack.emplace_back(i);
        if (!*ptr) {
          ++skips;
        }
        break;
      case ']':
        if (*ptr) {
          i = *(stack.end() - 1);
        } else {
          stack.pop_back();
        }
        if (skips) {
          --skips;
        }
        break;
    }
  }
}

int main(int argc, char** argv) {
  std::ifstream ifs(argv[1]);
  std::string program((std::istreambuf_iterator<char>(ifs)), (std::istreambuf_iterator<char>()));
  BrainfuckInterpreter interpreter(program);
  interpreter.run();
  return 0;
}
1
2
3
g++ -std=c++11 -o brainfuck brainfuck.cc

./brainfuck program.bf

For a complicated Brainfuck program like mandelbrot.bf, running in an interpreter will be slow.

Transpilation

One way to generate and run native code for a Brainfuck program is transpilaiton. The first step is generating the equivalent C/C++ code for the Brainfuck program and then using a conventional compiler to generate a shared object. The second step is using dlopen() to load the shared object and run it.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#include <dlfcn.h>
#include <cstdio>
#include <cstdlib>
#include <string>
#include <vector>
#include <fstream>
#include <iostream>
#include <streambuf>

class BrainfuckTranspiler {
  public:
    BrainfuckTranspiler(const std::string& program)
      : program(program) {}

    void run();

  private:
    const std::string program;
};

void BrainfuckTranspiler::run() {
  std::string ccname = std::tmpnam(nullptr);
  ccname += ".cc";
  std::ofstream ofs;
  ofs.open(ccname);

  ofs << R"(
#include <cstdio>
#include <vector>

extern "C" void _run() {
  char data[30000] = {0};
  char* ptr = data;
  )";

  for (size_t i = 0; i < program.size(); ++i) {
    switch(program[i]) {
      case '>':
        ofs << "++ptr;" << std::endl;
        break;
      case '<':
        ofs << "--ptr;" << std::endl;
        break;
      case '+':
        ofs << "++*ptr;" << std::endl;
        break;
      case '-':
        ofs << "--*ptr;" << std::endl;
        break;
      case '.':
        ofs << "putchar(static_cast<int>(*ptr));" << std::endl;
        break;
      case ',':
        ofs << "*ptr = static_cast<char>(getchar());" << std::endl;
        break;
      case '[':
        ofs << "while (*ptr) {" << std::endl;
        break;
      case ']':
        ofs << "}" << std::endl;
        break;
    }
  }
  ofs << "}";
  ofs.close();

  std::string soname = std::tmpnam(nullptr);
  soname += ".so";
  std::string compile = "g++ " + ccname + " -o " + soname + " -shared -fPIC";
  system(compile.c_str());

  void* handle = dlopen(soname.c_str(), RTLD_NOW);
  if (!handle) {
    std::cerr << dlerror() << std::endl;
  }
  dlerror();
  void (*_run)();
  _run = (void (*)()) dlsym(handle, "_run");
  char* error = dlerror();
  if (error) {
    std::cerr << error << std::endl;
  }
  (*_run)();
  dlclose(handle);

  remove(ccname.c_str());
  remove(soname.c_str());
}

int main(int argc, char** argv) {
  std::ifstream ifs(argv[1]);
  std::string program((std::istreambuf_iterator<char>(ifs)), (std::istreambuf_iterator<char>()));
  BrainfuckTranspiler transpiler(program);
  transpiler.run();
  return 0;
}
1
2
3
g++ -std=c++11 -ldl -o brainfuck brainfuck.cc

./brainfuck program.bf

LLVM

Besides using an external compiler process to generate the native code, we can also use LLVM mcjit by first generating the equivalent LLVM IR for the Brainfuck program. We can generate the IR manually or using clang frontend. Here I’m using clang to generate the IR but LLVM has an example of manually generating it. The LLVM version I’m using is 7.0.1.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#include <cstdlib>
#include <string>
#include <vector>
#include <sstream>
#include <fstream>
#include <iostream>
#include <streambuf>
#include <clang/Driver/Compilation.h>
#include <clang/Driver/Driver.h>
#include <clang/CodeGen/CodeGenAction.h>
#include <clang/Frontend/FrontendOptions.h>
#include <clang/Frontend/CompilerInstance.h>
#include <clang/Frontend/CompilerInvocation.h>
#include <clang/Frontend/TextDiagnosticPrinter.h>
#include <llvm/IR/Module.h>
#include <llvm/ADT/StringRef.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/Support/Host.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/ExecutionEngine/GenericValue.h>

class BrainfuckLLVM {
  public:
    BrainfuckLLVM(const std::string& program, const std::string& clangexe)
      : program(program), clangexe(clangexe) {}

    void run();

  private:
    const std::string program;
    const std::string clangexe;
};

void BrainfuckLLVM::run() {
  std::string ccname = std::tmpnam(nullptr);
  ccname += ".cc";
  std::ofstream ofs;
  ofs.open(ccname);

  ofs << R"(
#include <cstdio>
#include <vector>

extern "C" void _run() {
  char data[30000] = {0};
  char* ptr = data;
  )";

  for (size_t i = 0; i < program.size(); ++i) {
    switch(program[i]) {
      case '>':
        ofs << "++ptr;" << std::endl;
        break;
      case '<':
        ofs << "--ptr;" << std::endl;
        break;
      case '+':
        ofs << "++*ptr;" << std::endl;
        break;
      case '-':
        ofs << "--*ptr;" << std::endl;
        break;
      case '.':
        ofs << "putchar(static_cast<int>(*ptr));" << std::endl;
        break;
      case ',':
        ofs << "*ptr = static_cast<char>(getchar());" << std::endl;
        break;
      case '[':
        ofs << "while (*ptr) {" << std::endl;
        break;
      case ']':
        ofs << "}" << std::endl;
        break;
    }
  }
  ofs << "}";
  ofs.close();

  // Generate LLVM IR using clang frontend
  clang::IntrusiveRefCntPtr<clang::DiagnosticOptions> dopts = new clang::DiagnosticOptions();
  clang::TextDiagnosticPrinter* tdp = new clang::TextDiagnosticPrinter(llvm::errs(), &*dopts);
  clang::IntrusiveRefCntPtr<clang::DiagnosticIDs> dids(new clang::DiagnosticIDs());
  clang::DiagnosticsEngine dengine(dids, &*dopts, tdp);

  llvm::Triple triple(llvm::sys::getProcessTriple());
  clang::driver::Driver driver(clangexe, triple.str(), dengine);
  driver.setTitle("clang");
  driver.setCheckInputsExist(false);

  llvm::SmallVector<const char*, 16> args;
  args.push_back("clang");
  args.push_back(ccname.c_str());
  args.push_back("-fsyntax-only");
  std::unique_ptr<clang::driver::Compilation> compilation(driver.BuildCompilation(args));
  const clang::driver::JobList& jobs = compilation->getJobs();
  const clang::driver::Command& command = static_cast<clang::driver::Command>(*jobs.begin());
  const clang::driver::ArgStringList& ccargs = command.getArguments();
  std::unique_ptr<clang::CompilerInvocation> cinvocation(new clang::CompilerInvocation());
  clang::CompilerInvocation::CreateFromArgs(*cinvocation, const_cast<const char**>(ccargs.data()),
      const_cast<const char**>(ccargs.data()) + ccargs.size(), dengine);
  clang::CompilerInstance cinstance;
  cinstance.setInvocation(std::move(cinvocation));
  cinstance.createDiagnostics();

  llvm::LLVMContext context;
  clang::EmitLLVMOnlyAction action(&context);
  cinstance.ExecuteAction(action);
  std::unique_ptr<llvm::Module> module = action.takeModule();

  // Use mcjit to generate the native code from IR and run it
  llvm::InitializeNativeTarget();
  llvm::InitializeNativeTargetAsmPrinter();
  llvm::Function* _run = module->getFunction("_run");
  llvm::ExecutionEngine* ee = llvm::EngineBuilder(std::move(module)).create();
  llvm::GenericValue gv = ee->runFunction(_run, std::vector<llvm::GenericValue>());
  fflush(stdout);

  remove(ccname.c_str());
}

int main(int argc, char** argv) {
  std::ifstream ifs(argv[1]);
  std::string program((std::istreambuf_iterator<char>(ifs)), (std::istreambuf_iterator<char>()));
  BrainfuckLLVM llvm(program, argv[2]);
  llvm.run();
  return 0;
}
1
2
3
g++ -std=c++11 `/path/to/llvm-config --cxxflags --ldflags` -lclangASTMatchers -lclangFrontendTool -lclangFrontend -lclangDriver -lclangSerialization -lclangCodeGen -lclangParse -lclangSema -lclangToolingInclusions -lclangToolingCore  -lclangFormat -lclangIndex -lclangCrossTU -lclangStaticAnalyzerFrontend -lclangStaticAnalyzerCheckers -lclangStaticAnalyzerCore -lclangAnalysis -lclangARCMigrate -lclangRewriteFrontend -lclangRewrite -lclangEdit -lclangAST -lclangLex -lclangBasic `/path/to/llvm-config --libs` -o brainfuck brainfuck.cc

./brainfuck program.bf /path/to/clang

DynASM

Another way of generating the native code is using DynASM from LuaJIT and there is an example online.

Comments