OverTheWireAdvent Self-Replicating Toy
- Tags: rev
- Points: 350
- Solves: 33
Can you design your own self-replicating toy?
chal.c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#define MAX_STACK_SIZE 65536
typedef struct {
unsigned char data[MAX_STACK_SIZE];
int size;
} stack;
stack *new_stack() {
stack *st = malloc(sizeof(stack));
st->size = 0;
return st;
}
void push_stack(stack *st, char value) {
if (st->size >= MAX_STACK_SIZE) {
puts("Stack overflow!");
exit(1);
}
st->data[st->size] = value;
st->size++;
}
char pop_stack(stack *st) {
if (st->size == 0) {
puts("Stack underflow!");
exit(1);
}
st->size--;
return st->data[st->size];
}
typedef struct {
stack *data;
stack *code;
stack *output;
stack *functions[0x20];
} vm_state;
vm_state new_vm_state() {
vm_state state;
state.data = new_stack();
state.code = new_stack();
state.output = new_stack();
for (int i = 0; i < 0x20; i++) {
state.functions[i] = new_stack();
}
return state;
}
void execute_one_inst(vm_state *vm) {
unsigned char inst = pop_stack(vm->code);
if (inst < 0x80) {
push_stack(vm->data, inst);
return;
}
if (inst == 0x80) {
push_stack(vm->data, pop_stack(vm->data) ^ 0x80);
return;
}
if (inst == 0x81) {
unsigned char val = pop_stack(vm->data);
if (val == 0) {
push_stack(vm->data, 0xff);
} else {
push_stack(vm->data, 0);
}
return;
}
if (inst == 0x82) {
unsigned char a = pop_stack(vm->data);
unsigned char b = pop_stack(vm->data);
push_stack(vm->data, a & b);
return;
}
if (inst == 0x83) {
unsigned char a = pop_stack(vm->data);
unsigned char b = pop_stack(vm->data);
push_stack(vm->data, a | b);
return;
}
if (inst == 0x84) {
unsigned char a = pop_stack(vm->data);
unsigned char b = pop_stack(vm->data);
push_stack(vm->data, a ^ b);
return;
}
if (inst == 0x90) {
unsigned char a = pop_stack(vm->data);
unsigned char b = pop_stack(vm->data);
push_stack(vm->data, a);
push_stack(vm->data, b);
return;
}
if (inst == 0x91) {
unsigned char val = pop_stack(vm->data);
push_stack(vm->data, val);
push_stack(vm->data, val);
return;
}
if (inst == 0xa0) {
unsigned char index = pop_stack(vm->data);
if (index >= 0x20) {
puts("Invalid index");
exit(1);
}
vm->functions[index]->size = 0;
unsigned char c;
while ((c = pop_stack(vm->data)) != 0xa1) {
push_stack(vm->functions[index], c);
}
return;
}
if (inst == 0xb0) {
push_stack(vm->output, pop_stack(vm->data));
return;
}
if (inst >= 0xc0 && inst < 0xe0) {
unsigned char index = inst - 0xc0;
stack *func = vm->functions[index];
for (int i = func->size - 1; i >= 0; i--) {
push_stack(vm->code, func->data[i]);
}
return;
}
if (inst >= 0xe0) {
unsigned char index = inst - 0xe0;
if (pop_stack(vm->data)) {
stack *func = vm->functions[index];
for (int i = func->size - 1; i >= 0; i--) {
push_stack(vm->code, func->data[i]);
}
}
return;
}
}
int print_flag() {
puts("Congratulations!\n");
system("/bin/cat flag.txt");
}
int main() {
setvbuf(stdout, NULL, _IONBF, 0);
puts("We just discovered a strange element called Assemblium.");
puts("They are like mini robots. There are different isotopes, each having");
puts("different behavior... though we haven't figured that out exactly yet.");
puts("");
puts("But I've got a great idea - why don't we build a toy that replicates");
puts("itself? That would eliminate all our human, ahem, elvian costs.");
puts("");
puts("Let's do this!");
puts("");
printf("Length of your Assemblium sequence: ");
int length = 0;
scanf("%d", &length);
if (length <= 0 || length >= 65535) {
puts("Invalid length!");
exit(1);
}
puts("Enter your Assemblium sequence:");
unsigned char *user_code = malloc(length);
for (int i = 0; i < length; i++) {
read(0, user_code + i, 1);
}
vm_state vm = new_vm_state();
for (int i = length - 1; i >= 0; i--) {
push_stack(vm.code, user_code[i]);
}
while (vm.code->size > 0) {
execute_one_inst(&vm);
}
if (length != vm.output->size) {
goto fail;
}
for (int i = 0; i < length; i++) {
if (vm.output->data[i] != user_code[i]) {
goto fail;
}
}
print_flag();
exit(0);
fail:
puts("Nope. The Assemblium sequence you provided produced the following:");
write(1, vm.output->data, vm.output->size);
}
Solution
Summary
- Understanding the byte code meaning
- Considering the strategy to generate quine
- Create quine code
Understanding the byte code meaning
First, we looked at the code to see how we can get the flag. The program takes byte code from input, and executes byte code in its own VM, and then compare the VM output and VM bytecode, and if they are identical, then prints the flag. This means that we must create a bytecode, whose output is the same as the bytecode itself.
This type of programming challenge is called quine (See Quine - Wikipedia).
There are a lot of programming languages and many people are developing their quines (for fun) in many languages; however, the VM and bytecode are designed on their own; we must understand it first, and write our own quine.
The VM has 3 stacks and 32 function slots as follows:
- code stack: byte code is executed by popping one byte from this stack.
- data stack: data is pushed onto / popped from this stack.
- output stack: output instruction will output data into this stack.
- function slot: 32 function slots to define functions. each function slot has code stack.
Byte code meanings:
0x00
..0x7f
:IMM
(push the byte itself onto data stack)0x80
:XOR80
(xor the top of the data stack with0x80
)0x81
:Z2FF
(if the top of the data stack is0x00
, replace it with0xff
)0x82
:AND
(pop a, b from the data stack and push a&b back onto the data stack-
0x83
:OR
(pop a, b from the data stack and push ab back onto the data stack 0x84
:XOR
(pop a, b from the data stack and push a^b back onto the data stack0x85
..0x8f
:NOP
0x90
:XCHG
(exchange the values of the top 2 elements of the data stack)0x91
:DUP
(duplicate the top of the data stack)0x92
..0x9f
:NOP
0xa0
:FUNDEF
(define function: first pop function index from the data stack (should be 0x00 .. 0x1f, then pop code from the data stack the until it reaches0xa1
=RET
and store it in function slot)0xa1
:RET
in function definition (terminates function definition.NOP
outside function definition)0xa2
..0xaf
:NOP
0xb0
:OUT
(pop an element from the data stack and push it onto the output stack)0xb1
..0xbf
:NOP
0xc0
..0xdf
:CALL
(copy the function definition onto the code stack)0xe0
..0xff
:IFCALL
(copy an element from the data stack, and if it is not 0, then copy the function definition onto the code stack)
First we wrote a python interpreter, to make it easier to understand the behaviour of the bytecode VM. Using the code below, we can successfully emulate the VM behaviour (with debug output).
class VM:
def __init__(self, code):
self.code = list(code[::-1])
self.data = []
self.out = []
self.func = [[] for i in range(20)]
self.debug = False
def hexstr(self, x):
return ' '.join(list(map(lambda x: '%02x'%x, x)))
def printstate(self):
print()
print('code ' + self.hexstr(self.code))
print('data ' + self.hexstr(self.data))
print('out ' + self.hexstr(self.out))
def printfunc(self, i):
print('func%2d %s' % (i, self.hexstr(self.func[i])))
def run(self):
while len(self.code) > 0:
if self.debug:
self.printstate()
self.step()
self.printstate()
return self.out
def step(self):
c = self.code.pop()
if c < 0x80: # PUSH IMM
self.data.append(c)
if c == 0x80: # XOR80
self.data[-1] ^= 0x80
if c == 0x81: # ZERO2FF
if self.data[-1] == 0:
self.data[-1] = 0xff
if c == 0x82: # AND
a = self.data.pop()
self.data[-1] &= a
if c == 0x83: # OR
a = self.data.pop()
self.data[-1] |= a
if c == 0x84: # XOR
a = self.data.pop()
self.data[-1] ^= a
if c == 0x90: # XCHG
a = self.data[-1]
self.data[-1] = self.data[-2]
self.data[-2] = a
if c == 0x91: # DUP
self.data.append(self.data[-1])
if c == 0xa0: # FUNCDEF
index = self.data.pop()
self.func[index] = []
while True:
a = self.data.pop()
if a == 0xa1: break
self.func[index].append(a)
if self.debug:
self.printfunc(index)
if c == 0xb0: # OUT
self.out.append(self.data.pop())
if c >= 0xc0 and c < 0xe0:
self.code.extend(self.func[c-0xc0][::-1])
if c >= 0xe0:
if self.data.pop() != 0:
self.code.extend(self.func[c-0xe0][::-1])
Considering the strategy to generate quine
As explained above, there are a lot of quines for various programming languages; however, there are not so many quines for byte code (like JVM etc.). I searched for a quine for similar Turing Machine-like language, and found a blog entry by a Japanese researcher, who wrote a quine for Brainfuck language (it is one of obscure programming languages). He explains the quine is generated in three parts: (A) a program generating B+C (B) convert A to “A and a program of generating A”, (C) output A+B+C. We consider a similar way to find out the right strategy.
After several trial and error, we found out the strategy:
- Create a string output function (terminates at 0x00).
- Encode the function code in a specific pattern (say longer encoding), which can be converted to shorter and 0x00..0x7f only encoding (say shorter encoding).
- Create a function which takes shorter encoding and output longer encoding.
- Create a function which just push the representation of functions to the data stack in shorter encoding.
- Create a main function which calls the defined functions in this order:
- push short encoding representation
- output (converting to long encoding)
- push short encoding representation
- output (directly)
- Some adjustment (like function headers and footers)
The actual trial and error is described below.
Create quine code - trial and error
I have tried to write output function for null-terminated string. There is no direct IF or JMP opcode to write loops, but there is IFCALL opcode which will do both conditional execution and recursive call, and we can use this. Pseudo code is:
function out_string() {
output(pop(stack_top));
duplicate(stack_top);
if (pop(stack_top) != 0x00) call out_string();
}
which is converted to (function slot 1):
(mnemonic) out dup ifcall1 ret
(opcode) b0 91 e1 a1
The function definition needs to be pushed onto the data stack (in top-to-bottom, hence reverse order). Unfortunately all opcodes are >=0x80, and we have to push opcode-0x80 and then use XOR80.
(semantics) push(0xa1) push(0xe1) push(0x91) push(0xb0) push(funcindex) funcdefinition
(mnemonic) 0x21 xor80 0x61 xor80 0x11 xor80 0x30 xor80 0x01 FUNDEF
(opcode) 21 80 61 80 11 80 30 80 01 a0
Here we can see pattern of 0x?? 0x80
so frequently. I think this will make it easier to output the code itself, and I tried to express every function definition in a sequence of this 0x?? 0x80
pattern. Basically 0x80 0xaa 0x80
(xor80 nop xor80
) does nothing, and we can safely insert it into the place which non-0x80 appears more than twice in row. We can also avoid 0x00
and 0x80
in 0x??
. Thus the resulting sequence will be:
(opcode) 21 80 61 80 11 80 30 80 01 80 aa 80 a0 80
It is a tedious task to do this manually, and I wrote a function which converts function body (b0 91 e1 a1
) into this sequence. See makefun
function in the Python script. Note that we have to push a dummy element onto the stack before this definition, otherwise it generates stack underflow when executing dummy xor80
.
Once we define this function, we can output some string as:
(mnemonic) 0x00 0x43 0x42 0x41 call1
(opcode) 00 43 42 41 c1
Next I think about shorter encoding of the function definition (21 80 61 80 ...
). First I tried to write a function which takes 21 61 11 ...
and interleave it with 0x80
to output 21 80 61 80 11 80 ...
; This will work for the first function definition, but the remaining part remains unresolved.
Then I noticed the code expressed in 0x01
..0x7f
will push itself onto the data stack. If we define this as a function, we can use the above string-output function to output the code itself. Then I thought about encoding the function in only 0x01
..0x7f
code (0x00
is excluded, since it is a string terminator).
The function is currently expressed as (0x?? 0x80)+
, but 0x??
might be >= 0x80
. I encode this as: 0x01 0x??
for values < 0x80
, and 0x02 (0x??-0x80)
for values >= 0x80
. I used 0x04 as the terminator here. See encode function to understand the algorithm.
Pseudo code for decoding & output:
function decode_output() {
x = pop();
y = pop();
if (x == 0x02) y = y ^ 0x80;
out(y);
out(0x80);
if (peek() != 0x04) decode_output();
}
to express in byte code (I could use another function call, but I simplify by using zero2ff
instruction):
function decode_output() {
push(and(0x01, pop())) # 0x01 -> 0x01, 0x02 -> 0x00
push(zero2ff(pop())) # 0x01 -> 0x01, 0x02 -> 0xff
push(and(0x80, pop())) # 0x01 -> 0x00, 0x02 -> 0x80
push(xor(pop(), pop())) # xor the following byte with 0x00 or 0x80
out(pop()) # output the decoded value
out(0x80) # output 0x80
dup()
ifcall(and(0x03, pop()), decode_output)
}
(mnemonic) 0x01 AND Z2FF 0x00 0x80 AND XOR OUT 0x00 0x80 OUT DUP 0x03 AND IFCALL1 RET
(opcode) 01 82 81 00 80 82 84 b0 00 80 b0 91 03 82 e1 a1
Then we encode (1) decode+output function (2) output function, and put it in (3) string generating function.
- function 1 (decoder output)
- function 2 (plain output)
- function 3 (push encoded representation of 1+2)
- call function 3, call function 1, call function 3, call function 2
This program generates almost the same byte sequence as the original program. However you have to adjust some part: (a) the header/footer of function 3 is missing (b) the call chain is missing. Then I created function 4 (main function) to invoke all of them and adjust the above parts.
Finally, the code looks like:
- push dummy data
- function 4 (call3; call1; call3; call2; output footer of 3 (including 0x00); output call4)
- function 1 (decoder output)
- function 2 (plain output)
- function 3 (encoded representation of [push dummy data + 4 + 1 + 2 + header of 3] + terminating 0x00)
- call 4
Once you test this in simulator, you can run it against the CTF server and get the result.
[1, 1, 1, 33, 1, 48, 1, 1, 1, 1, 2, 4, 1, 68, 2, 42, 1, 48, 1, 1, 1, 1, 2, 4, 1, 32, 2, 42, 1, 48, 1, 3, 2, 42, 1, 48, 1, 1, 1, 2, 2, 2, 1, 66, 1, 67, 1, 65, 1, 67, 1, 4, 2, 42, 2, 32, 2, 42, 1, 33, 1, 97, 1, 2, 1, 3, 2, 42, 1, 17, 1, 48, 1, 1, 1, 1, 2, 4, 1, 1, 1, 2, 2, 2, 1, 48, 1, 4, 1, 2, 1, 1, 1, 1, 2, 4, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 1, 1, 2, 42, 1, 1, 2, 42, 2, 32, 2, 42, 1, 33, 1, 98, 1, 17, 1, 48, 1, 2, 2, 42, 2, 32, 2, 42, 1, 33, 4, 0]
[+] Opening connection to 3.93.128.89 on port 1214: Done
[*] Switching to interactive mode
Congratulations!
AOTW{G0od_job_writing_y0ur_v3ry_0wN_quin3!}
Script to solve
from pwn import *
class VM:
def __init__(self, code):
self.code = list(code[::-1])
self.data = []
self.out = []
self.func = [[] for i in range(20)]
self.debug = False
def hexstr(self, x):
return ' '.join(list(map(lambda x: '%02x'%x, x)))
def printstate(self):
print()
print('code ' + self.hexstr(self.code))
print('data ' + self.hexstr(self.data))
print('out ' + self.hexstr(self.out))
def printfunc(self, i):
print('func%2d %s' % (i, self.hexstr(self.func[i])))
def run(self):
while len(self.code) > 0:
if self.debug:
self.printstate()
self.step()
self.printstate()
return self.out
def step(self):
c = self.code.pop()
if c < 0x80: # PUSH IMM
self.data.append(c)
if c == 0x80: # XOR80
self.data[-1] ^= 0x80
if c == 0x81: # ZERO2FF
if self.data[-1] == 0:
self.data[-1] = 0xff
if c == 0x82: # AND
a = self.data.pop()
self.data[-1] &= a
if c == 0x83: # OR
a = self.data.pop()
self.data[-1] |= a
if c == 0x84: # XOR
a = self.data.pop()
self.data[-1] ^= a
if c == 0x90: # XCHG
a = self.data[-1]
self.data[-1] = self.data[-2]
self.data[-2] = a
if c == 0x91: # DUP
self.data.append(self.data[-1])
if c == 0xa0: # FUNCDEF
index = self.data.pop()
self.func[index] = []
while True:
a = self.data.pop()
if a == 0xa1: break
self.func[index].append(a)
if self.debug:
self.printfunc(index)
if c == 0xb0: # OUT
self.out.append(self.data.pop())
if c >= 0xc0 and c < 0xe0:
self.code.extend(self.func[c-0xc0][::-1])
if c >= 0xe0:
if self.data.pop() != 0:
self.code.extend(self.func[c-0xe0][::-1])
def pushimm80(imm):
if imm >= 0x80:
if imm == 0x80:
return [0x01, 0x80, 0x01, 0x80, 0x84, 0x80]
else:
return [imm^0x80, 0x80]
else:
if imm == 0x0:
return [0x01, 0x80, 0x02, 0x80, 0x82, 0x80]
else:
return [imm, 0x80, 0xaa, 0x80]
def pushimm(imm):
if imm >= 0x80:
if imm == 0x80:
return [0x01, 0x01, 0x84, 0x80]
else:
return [imm^0x80, 0x80]
else:
if imm == 0x0:
return [0x01, 0x01, 0x84]
else:
return [imm]
# convert code to function definition (xx 80 xx 80 ...)
def makefun(orig, index):
rv = []
orig = list(orig)[::-1]
for i in range(len(orig)):
rv.extend(pushimm80(orig[i]))
rv.extend(pushimm80(index))
rv.extend([0xa0, 0x80, 0xaa, 0x80])
return rv
# encode with only 0x01..0x7f
# 0x01 0x?? -> output 0x?? 0x80; loop
# 0x02 0x?? -> output 0x??^0x80 0x80; loop
# 0x04 -> end
def encode(orig):
rv = []
orig = list(orig)[::2]
for i in range(len(orig)):
if orig[i] >= 0x80:
rv += [0x02, orig[i]^0x80]
else:
rv += [0x01, orig[i]]
rv += [0x4, 0x0]
return rv
# 0..0x7f: push immediate
XOR80 = 0x80
ZERO2FF = 0x81
AND = 0x82
OR = 0x83
XOR = 0x84
# 0x85..0x8f: NOP
XCHG = 0x90
DUP = 0x91
# 0x92..0x9f: NOP
FUNDEF = 0xa0
RET = 0xa1
# 0xa1: func end
# 0xa2..0xaf: NOP
OUT = 0xb0
CALL = 0xc0
IFCALL = 0xe0
### main function
# function 4() { call3; call1; call3; call2; 0x00; out; 0x03; out; 0x20; xor80; out; 0x44; xor80; out }
### decode & print string
# function 1() { 0x01; and; z2ff; 0x00; xor80; and; xor; out; 0x00; xor80; out; dup; 0x03; and; ifcall1; ret }
### print string
# function 2() { out; dup; ifcall2; ret }
### encoded code
# function 3() { push encoded repr of 1,2; ret }
func4 = makefun(bytes.fromhex('c3 c1 c3 c2 00 b0 03 b0 20 80 b0 44 80 b0 a1'), 4)
func1 = makefun(bytes.fromhex('01 82 81 00 80 82 84 b0 00 80 b0 91 03 82 e1 a1'), 1)
func2 = makefun(bytes.fromhex('b0 91 e2 a1'), 2)
selfcode = encode([0x01, 0x80] + func4 + func1 + func2 + [0x21, 0x80])
print(selfcode)
func3 = [0x21, 0x80] + selfcode + [0x03, 0xa0]
code = [0x01, 0x80] + func4 + func1 + func2 + func3 + [0xc4]
test = False
if test:
vm = VM(code)
vm.debug = True
out = vm.run()
print()
print('code = ' + vm.hexstr(code))
print('out = ' + vm.hexstr(out))
if code == out:
print("OK!!!")
else:
#p = process('./vm')
p = remote('3.93.128.89', 1214)
p.sendlineafter(b'Length of your Assemblium sequence: ', str(len(code)))
p.sendafter(b'Enter your Assemblium sequence:\n', bytes(code))
p.interactive()
by Fukutomo