OverTheWireAdvent Self-Replicating Toy

15 minute read

  • Tags: rev
  • Points: 350
  • Solves: 33

Can you design your own self-replicating toy?

chal.c

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

#define MAX_STACK_SIZE 65536

typedef struct {
  unsigned char data[MAX_STACK_SIZE];
  int size;
} stack;

stack *new_stack() {
  stack *st = malloc(sizeof(stack));
  st->size = 0;
  return st;
}

void push_stack(stack *st, char value) {
  if (st->size >= MAX_STACK_SIZE) {
    puts("Stack overflow!");
    exit(1);
  }
  st->data[st->size] = value;
  st->size++;
}

char pop_stack(stack *st) {
  if (st->size == 0) {
    puts("Stack underflow!");
    exit(1);
  }
  st->size--;
  return st->data[st->size];
}

typedef struct {
  stack *data;
  stack *code;
  stack *output;
  stack *functions[0x20];
} vm_state;

vm_state new_vm_state() {
  vm_state state;
  state.data = new_stack();
  state.code = new_stack();
  state.output = new_stack();
  for (int i = 0; i < 0x20; i++) {
    state.functions[i] = new_stack();
  }
  return state;
}

void execute_one_inst(vm_state *vm) {
  unsigned char inst = pop_stack(vm->code);
  if (inst < 0x80) {
    push_stack(vm->data, inst);
    return;
  }
  if (inst == 0x80) {
    push_stack(vm->data, pop_stack(vm->data) ^ 0x80);
    return;
  }
  if (inst == 0x81) {
    unsigned char val = pop_stack(vm->data);
    if (val == 0) {
      push_stack(vm->data, 0xff);
    } else {
      push_stack(vm->data, 0);
    }
    return;
  }
  if (inst == 0x82) {
    unsigned char a = pop_stack(vm->data);
    unsigned char b = pop_stack(vm->data);
    push_stack(vm->data, a & b);
    return;
  }
  if (inst == 0x83) {
    unsigned char a = pop_stack(vm->data);
    unsigned char b = pop_stack(vm->data);
    push_stack(vm->data, a | b);
    return;
  }
  if (inst == 0x84) {
    unsigned char a = pop_stack(vm->data);
    unsigned char b = pop_stack(vm->data);
    push_stack(vm->data, a ^ b);
    return;
  }
  if (inst == 0x90) {
    unsigned char a = pop_stack(vm->data);
    unsigned char b = pop_stack(vm->data);
    push_stack(vm->data, a);
    push_stack(vm->data, b);
    return;
  }
  if (inst == 0x91) {
    unsigned char val = pop_stack(vm->data);
    push_stack(vm->data, val);
    push_stack(vm->data, val);
    return;
  }
  if (inst == 0xa0) {
    unsigned char index = pop_stack(vm->data);
    if (index >= 0x20) {
      puts("Invalid index");
      exit(1);
    }
    vm->functions[index]->size = 0;
    unsigned char c;
    while ((c = pop_stack(vm->data)) != 0xa1) {
      push_stack(vm->functions[index], c);
    }
    return;
  }
  if (inst == 0xb0) {
    push_stack(vm->output, pop_stack(vm->data));
    return;
  }
  if (inst >= 0xc0 && inst < 0xe0) {
    unsigned char index = inst - 0xc0;
    stack *func = vm->functions[index];
    for (int i = func->size - 1; i >= 0; i--) {
      push_stack(vm->code, func->data[i]);
    }
    return;
  }
  if (inst >= 0xe0) {
    unsigned char index = inst - 0xe0;
    if (pop_stack(vm->data)) {
      stack *func = vm->functions[index];
      for (int i = func->size - 1; i >= 0; i--) {
        push_stack(vm->code, func->data[i]);
      }
    }
    return;
  }
}

int print_flag() {
  puts("Congratulations!\n");
  system("/bin/cat flag.txt");
}

int main() {
  setvbuf(stdout, NULL, _IONBF, 0);
  puts("We just discovered a strange element called Assemblium.");
  puts("They are like mini robots. There are different isotopes, each having");
  puts("different behavior... though we haven't figured that out exactly yet.");
  puts("");
  puts("But I've got a great idea - why don't we build a toy that replicates");
  puts("itself? That would eliminate all our human, ahem, elvian costs.");
  puts("");
  puts("Let's do this!");
  puts("");
  printf("Length of your Assemblium sequence: ");
  int length = 0;
  scanf("%d", &length);
  if (length <= 0 || length >= 65535) {
    puts("Invalid length!");
    exit(1);
  }
  puts("Enter your Assemblium sequence:");
  unsigned char *user_code = malloc(length);
  for (int i = 0; i < length; i++) {
    read(0, user_code + i, 1);
  }
  vm_state vm = new_vm_state();
  for (int i = length - 1; i >= 0; i--) {
    push_stack(vm.code, user_code[i]);
  }
  while (vm.code->size > 0) {
    execute_one_inst(&vm);
  }
  if (length != vm.output->size) {
    goto fail;
  }
  for (int i = 0; i < length; i++) {
    if (vm.output->data[i] != user_code[i]) {
      goto fail;
    }
  }
  print_flag();
  exit(0);
fail:
  puts("Nope. The Assemblium sequence you provided produced the following:");
  write(1, vm.output->data, vm.output->size);
}

Solution

Summary

  • Understanding the byte code meaning
  • Considering the strategy to generate quine
  • Create quine code
Understanding the byte code meaning

First, we looked at the code to see how we can get the flag. The program takes byte code from input, and executes byte code in its own VM, and then compare the VM output and VM bytecode, and if they are identical, then prints the flag. This means that we must create a bytecode, whose output is the same as the bytecode itself.

This type of programming challenge is called quine (See Quine - Wikipedia).

There are a lot of programming languages and many people are developing their quines (for fun) in many languages; however, the VM and bytecode are designed on their own; we must understand it first, and write our own quine.

The VM has 3 stacks and 32 function slots as follows:

  • code stack: byte code is executed by popping one byte from this stack.
  • data stack: data is pushed onto / popped from this stack.
  • output stack: output instruction will output data into this stack.
  • function slot: 32 function slots to define functions. each function slot has code stack.

Byte code meanings:

  • 0x00 .. 0x7f: IMM (push the byte itself onto data stack)
  • 0x80: XOR80 (xor the top of the data stack with 0x80)
  • 0x81: Z2FF (if the top of the data stack is 0x00, replace it with 0xff)
  • 0x82: AND (pop a, b from the data stack and push a&b back onto the data stack
  • 0x83: OR (pop a, b from the data stack and push a b back onto the data stack
  • 0x84: XOR (pop a, b from the data stack and push a^b back onto the data stack
  • 0x85 .. 0x8f: NOP
  • 0x90: XCHG (exchange the values of the top 2 elements of the data stack)
  • 0x91: DUP (duplicate the top of the data stack)
  • 0x92 .. 0x9f: NOP
  • 0xa0: FUNDEF (define function: first pop function index from the data stack (should be 0x00 .. 0x1f, then pop code from the data stack the until it reaches 0xa1 = RET and store it in function slot)
  • 0xa1: RET in function definition (terminates function definition. NOP outside function definition)
  • 0xa2 .. 0xaf: NOP
  • 0xb0: OUT (pop an element from the data stack and push it onto the output stack)
  • 0xb1 .. 0xbf: NOP
  • 0xc0 .. 0xdf: CALL (copy the function definition onto the code stack)
  • 0xe0 .. 0xff: IFCALL (copy an element from the data stack, and if it is not 0, then copy the function definition onto the code stack)

First we wrote a python interpreter, to make it easier to understand the behaviour of the bytecode VM. Using the code below, we can successfully emulate the VM behaviour (with debug output).

class VM:
    def __init__(self, code):
        self.code = list(code[::-1])
        self.data = []
        self.out = []
        self.func = [[] for i in range(20)]
        self.debug = False

    def hexstr(self, x):
        return ' '.join(list(map(lambda x: '%02x'%x, x)))

    def printstate(self):
        print()
        print('code   ' + self.hexstr(self.code))
        print('data   ' + self.hexstr(self.data))
        print('out    ' + self.hexstr(self.out))

    def printfunc(self, i):
        print('func%2d %s' % (i, self.hexstr(self.func[i])))

    def run(self):
        while len(self.code) > 0:
            if self.debug:
                self.printstate()
            self.step()
        self.printstate()
        return self.out

    def step(self):
        c = self.code.pop()
        if c < 0x80: # PUSH IMM
            self.data.append(c)
        if c == 0x80: # XOR80
            self.data[-1] ^= 0x80
        if c == 0x81: # ZERO2FF
            if self.data[-1] == 0:
                self.data[-1] = 0xff
        if c == 0x82: # AND
            a = self.data.pop()
            self.data[-1] &= a
        if c == 0x83: # OR
            a = self.data.pop()
            self.data[-1] |= a
        if c == 0x84: # XOR
            a = self.data.pop()
            self.data[-1] ^= a
        if c == 0x90: # XCHG
            a = self.data[-1]
            self.data[-1] = self.data[-2]
            self.data[-2] = a
        if c == 0x91: # DUP
            self.data.append(self.data[-1])
        if c == 0xa0: # FUNCDEF
            index = self.data.pop()
            self.func[index] = []
            while True:
                a = self.data.pop()
                if a == 0xa1: break
                self.func[index].append(a)
            if self.debug:
                self.printfunc(index)
        if c == 0xb0: # OUT
            self.out.append(self.data.pop())
        if c >= 0xc0 and c < 0xe0:
            self.code.extend(self.func[c-0xc0][::-1])
        if c >= 0xe0:
            if self.data.pop() != 0:
                self.code.extend(self.func[c-0xe0][::-1])
Considering the strategy to generate quine

As explained above, there are a lot of quines for various programming languages; however, there are not so many quines for byte code (like JVM etc.). I searched for a quine for similar Turing Machine-like language, and found a blog entry by a Japanese researcher, who wrote a quine for Brainfuck language (it is one of obscure programming languages). He explains the quine is generated in three parts: (A) a program generating B+C (B) convert A to “A and a program of generating A”, (C) output A+B+C. We consider a similar way to find out the right strategy.

After several trial and error, we found out the strategy:

  1. Create a string output function (terminates at 0x00).
  2. Encode the function code in a specific pattern (say longer encoding), which can be converted to shorter and 0x00..0x7f only encoding (say shorter encoding).
  3. Create a function which takes shorter encoding and output longer encoding.
  4. Create a function which just push the representation of functions to the data stack in shorter encoding.
  5. Create a main function which calls the defined functions in this order:
    • push short encoding representation
    • output (converting to long encoding)
    • push short encoding representation
    • output (directly)
  6. Some adjustment (like function headers and footers)

The actual trial and error is described below.

Create quine code - trial and error

I have tried to write output function for null-terminated string. There is no direct IF or JMP opcode to write loops, but there is IFCALL opcode which will do both conditional execution and recursive call, and we can use this. Pseudo code is:

function out_string() {
  output(pop(stack_top));
  duplicate(stack_top);
  if (pop(stack_top) != 0x00) call out_string();
}

which is converted to (function slot 1):

(mnemonic) out dup ifcall1 ret
(opcode)   b0  91  e1      a1

The function definition needs to be pushed onto the data stack (in top-to-bottom, hence reverse order). Unfortunately all opcodes are >=0x80, and we have to push opcode-0x80 and then use XOR80.

(semantics) push(0xa1) push(0xe1) push(0x91) push(0xb0)  push(funcindex)  funcdefinition
(mnemonic)  0x21 xor80 0x61 xor80 0x11 xor80 0x30 xor80  0x01             FUNDEF
(opcode)    21   80    61   80    11   80    30   80     01               a0

Here we can see pattern of 0x?? 0x80 so frequently. I think this will make it easier to output the code itself, and I tried to express every function definition in a sequence of this 0x?? 0x80 pattern. Basically 0x80 0xaa 0x80 (xor80 nop xor80) does nothing, and we can safely insert it into the place which non-0x80 appears more than twice in row. We can also avoid 0x00 and 0x80 in 0x??. Thus the resulting sequence will be:

(opcode) 21 80 61 80 11 80 30 80 01 80 aa 80 a0 80

It is a tedious task to do this manually, and I wrote a function which converts function body (b0 91 e1 a1) into this sequence. See makefun function in the Python script. Note that we have to push a dummy element onto the stack before this definition, otherwise it generates stack underflow when executing dummy xor80.

Once we define this function, we can output some string as:

(mnemonic) 0x00 0x43 0x42 0x41 call1
(opcode)   00   43   42   41   c1

Next I think about shorter encoding of the function definition (21 80 61 80 ...). First I tried to write a function which takes 21 61 11 ... and interleave it with 0x80 to output 21 80 61 80 11 80 ...; This will work for the first function definition, but the remaining part remains unresolved.

Then I noticed the code expressed in 0x01..0x7f will push itself onto the data stack. If we define this as a function, we can use the above string-output function to output the code itself. Then I thought about encoding the function in only 0x01..0x7f code (0x00 is excluded, since it is a string terminator).

The function is currently expressed as (0x?? 0x80)+, but 0x?? might be >= 0x80. I encode this as: 0x01 0x?? for values < 0x80, and 0x02 (0x??-0x80) for values >= 0x80. I used 0x04 as the terminator here. See encode function to understand the algorithm.

Pseudo code for decoding & output:

function decode_output() {
  x = pop();
  y = pop();
  if (x == 0x02) y = y ^ 0x80;
  out(y);
  out(0x80);
  if (peek() != 0x04) decode_output();
}

to express in byte code (I could use another function call, but I simplify by using zero2ff instruction):

function decode_output() {
  push(and(0x01, pop()))  # 0x01 -> 0x01, 0x02 -> 0x00
  push(zero2ff(pop()))    # 0x01 -> 0x01, 0x02 -> 0xff
  push(and(0x80, pop()))  # 0x01 -> 0x00, 0x02 -> 0x80
  push(xor(pop(), pop())) # xor the following byte with 0x00 or 0x80
  out(pop())              # output the decoded value
  out(0x80)               # output 0x80
  dup()
  ifcall(and(0x03, pop()), decode_output)
}
(mnemonic) 0x01 AND Z2FF 0x00 0x80 AND XOR OUT 0x00 0x80 OUT DUP 0x03 AND IFCALL1 RET
(opcode)   01   82  81   00   80   82  84  b0  00   80   b0  91  03   82  e1      a1

Then we encode (1) decode+output function (2) output function, and put it in (3) string generating function.

  • function 1 (decoder output)
  • function 2 (plain output)
  • function 3 (push encoded representation of 1+2)
  • call function 3, call function 1, call function 3, call function 2

This program generates almost the same byte sequence as the original program. However you have to adjust some part: (a) the header/footer of function 3 is missing (b) the call chain is missing. Then I created function 4 (main function) to invoke all of them and adjust the above parts.

Finally, the code looks like:

  • push dummy data
  • function 4 (call3; call1; call3; call2; output footer of 3 (including 0x00); output call4)
  • function 1 (decoder output)
  • function 2 (plain output)
  • function 3 (encoded representation of [push dummy data + 4 + 1 + 2 + header of 3] + terminating 0x00)
  • call 4

Once you test this in simulator, you can run it against the CTF server and get the result.

[1, 1, 1, 33, 1, 48, 1, 1, 1, 1, 2, 4, 1, 68, 2, 42, 1, 48, 1, 1, 1, 1, 2, 4, 1, 32, 2, 42, 1, 48, 1, 3, 2, 42, 1, 48, 1, 1, 1, 2, 2, 2, 1, 66, 1, 67, 1, 65, 1, 67, 1, 4, 2, 42, 2, 32, 2, 42, 1, 33, 1, 97, 1, 2, 1, 3, 2, 42, 1, 17, 1, 48, 1, 1, 1, 1, 2, 4, 1, 1, 1, 2, 2, 2, 1, 48, 1, 4, 1, 2, 1, 1, 1, 1, 2, 4, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 1, 1, 2, 42, 1, 1, 2, 42, 2, 32, 2, 42, 1, 33, 1, 98, 1, 17, 1, 48, 1, 2, 2, 42, 2, 32, 2, 42, 1, 33, 4, 0]
[+] Opening connection to 3.93.128.89 on port 1214: Done
[*] Switching to interactive mode
Congratulations!
AOTW{G0od_job_writing_y0ur_v3ry_0wN_quin3!}

Script to solve

from pwn import *

class VM:
    def __init__(self, code):
        self.code = list(code[::-1])
        self.data = []
        self.out = []
        self.func = [[] for i in range(20)]
        self.debug = False

    def hexstr(self, x):
        return ' '.join(list(map(lambda x: '%02x'%x, x)))

    def printstate(self):
        print()
        print('code   ' + self.hexstr(self.code))
        print('data   ' + self.hexstr(self.data))
        print('out    ' + self.hexstr(self.out))

    def printfunc(self, i):
        print('func%2d %s' % (i, self.hexstr(self.func[i])))

    def run(self):
        while len(self.code) > 0:
            if self.debug:
                self.printstate()
            self.step()
        self.printstate()
        return self.out

    def step(self):
        c = self.code.pop()
        if c < 0x80: # PUSH IMM
            self.data.append(c)
        if c == 0x80: # XOR80
            self.data[-1] ^= 0x80
        if c == 0x81: # ZERO2FF
            if self.data[-1] == 0:
                self.data[-1] = 0xff
        if c == 0x82: # AND
            a = self.data.pop()
            self.data[-1] &= a
        if c == 0x83: # OR
            a = self.data.pop()
            self.data[-1] |= a
        if c == 0x84: # XOR
            a = self.data.pop()
            self.data[-1] ^= a
        if c == 0x90: # XCHG
            a = self.data[-1]
            self.data[-1] = self.data[-2]
            self.data[-2] = a
        if c == 0x91: # DUP
            self.data.append(self.data[-1])
        if c == 0xa0: # FUNCDEF
            index = self.data.pop()
            self.func[index] = []
            while True:
                a = self.data.pop()
                if a == 0xa1: break
                self.func[index].append(a)
            if self.debug:
                self.printfunc(index)
        if c == 0xb0: # OUT
            self.out.append(self.data.pop())
        if c >= 0xc0 and c < 0xe0:
            self.code.extend(self.func[c-0xc0][::-1])
        if c >= 0xe0:
            if self.data.pop() != 0:
                self.code.extend(self.func[c-0xe0][::-1])

def pushimm80(imm):
    if imm >= 0x80:
        if imm == 0x80:
            return [0x01, 0x80, 0x01, 0x80, 0x84, 0x80]
        else:
            return [imm^0x80, 0x80]
    else:
        if imm == 0x0:
            return [0x01, 0x80, 0x02, 0x80, 0x82, 0x80]
        else:
            return [imm, 0x80, 0xaa, 0x80]

def pushimm(imm):
    if imm >= 0x80:
        if imm == 0x80:
            return [0x01, 0x01, 0x84, 0x80]
        else:
            return [imm^0x80, 0x80]
    else:
        if imm == 0x0:
            return [0x01, 0x01, 0x84]
        else:
            return [imm]

# convert code to function definition (xx 80 xx 80 ...)
def makefun(orig, index):
    rv = []
    orig = list(orig)[::-1]
    for i in range(len(orig)):
        rv.extend(pushimm80(orig[i]))
    rv.extend(pushimm80(index))
    rv.extend([0xa0, 0x80, 0xaa, 0x80])
    return rv

# encode with only 0x01..0x7f
# 0x01 0x?? -> output 0x?? 0x80; loop
# 0x02 0x?? -> output 0x??^0x80 0x80; loop
# 0x04 -> end
def encode(orig):
    rv = []
    orig = list(orig)[::2]
    for i in range(len(orig)):
        if orig[i] >= 0x80:
            rv += [0x02, orig[i]^0x80]
        else:
            rv += [0x01, orig[i]]
    rv += [0x4, 0x0]
    return rv

# 0..0x7f: push immediate
XOR80 = 0x80
ZERO2FF = 0x81
AND = 0x82
OR = 0x83
XOR = 0x84
# 0x85..0x8f: NOP
XCHG = 0x90
DUP = 0x91
# 0x92..0x9f: NOP
FUNDEF = 0xa0
RET = 0xa1
# 0xa1: func end
# 0xa2..0xaf: NOP
OUT = 0xb0
CALL = 0xc0
IFCALL = 0xe0

### main function
# function 4() { call3; call1; call3; call2; 0x00; out; 0x03; out; 0x20; xor80; out; 0x44; xor80; out }
### decode & print string
# function 1() { 0x01; and; z2ff; 0x00; xor80; and; xor; out; 0x00; xor80; out; dup; 0x03; and; ifcall1; ret }
### print string
# function 2() { out; dup; ifcall2; ret }
### encoded code
# function 3() { push encoded repr of 1,2; ret }
func4 = makefun(bytes.fromhex('c3 c1 c3 c2 00 b0 03 b0 20 80 b0 44 80 b0 a1'), 4)
func1 = makefun(bytes.fromhex('01 82 81 00 80 82 84 b0 00 80 b0 91 03 82 e1 a1'), 1)
func2 = makefun(bytes.fromhex('b0 91 e2 a1'), 2)
selfcode = encode([0x01, 0x80] + func4 + func1 + func2 + [0x21, 0x80])
print(selfcode)
func3 = [0x21, 0x80] + selfcode + [0x03, 0xa0]
code = [0x01, 0x80] + func4 + func1 + func2 + func3 + [0xc4]

test = False

if test:
    vm = VM(code)
    vm.debug = True
    out = vm.run()
    print()
    print('code = ' + vm.hexstr(code))
    print('out  = ' + vm.hexstr(out))
    if code == out:
        print("OK!!!")
else:
    #p = process('./vm')
    p = remote('3.93.128.89', 1214)
    p.sendlineafter(b'Length of your Assemblium sequence: ', str(len(code)))
    p.sendafter(b'Enter your Assemblium sequence:\n', bytes(code))
    p.interactive()

by Fukutomo

Updated: