Trying out a x64 emitter

This commit is contained in:
Krzosa Karol
2022-06-28 14:26:52 +02:00
parent a363a5e982
commit e7c14010f2

190
x64_funtime.cpp Normal file
View File

@@ -0,0 +1,190 @@
#include <stdio.h>
#include <stdint.h>
#include <windows.h>
#define assert(x) do{if(!(x))__debugbreak();}while(0)
#define assert_in_range(x, a, b) assert(((x) >= a) && ((x) <= b))
#define assert_is_one(x) assert_in_range(x, 0, 1)
uint8_t* code_start;
uint8_t* code;
void emit(uint8_t c) {
*code++ = c;
}
void emit32(uint32_t c) {
*(uint32_t*)code = c;
code += 4;
}
// Prefix ordering in X64:
// 1. Legacy prefixes (optional)
// 2. REX prefix (optional)
// 3. Opcode (1 or 2 or 3 bytes)
// 4. ModR/M (1 byte if required)
// 5. SIB (1 byte if required)
// 6. Displacement (1 or 2 or 4 bytes)
// 7. Immediate (1 or 2 or 4 bytes)
// Basic op codes
const uint8_t OP_BREAK = 0xCC;
const uint8_t OP_RETURN = 0xC3;
// From manual:
// The operand-size override prefix allows a program to switch between 16-and 32-bit operand sizes. Either size can
// be the default; use of the prefix selects the non-default size.
// When using the REX prefix, adding this changes the operand size to 32bits
const uint8_t OPERAND_SIZE_OVERRIDE = 0x66;
const uint8_t GS_SEGMENT_OVERRIDE = 0x65;
// Registers
// al(8lo), ah(8hi), ax(16), eax(32), rax(64)
enum{RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15};
typedef uint8_t Register;
// REX - 64 bit mode prefix
// we write this when
// * instruction references extended(64bit) register
// * uses a 64 bit operand.
// W 1 bit When 1, a 64 - bit operand size is used. Otherwise, when 0, the default operand size is used(which is 32 - bit for most but not all instructions, see this table).
// R 1 bit This 1 - bit value is an extension to the MODRM.reg field.See Registers.
// X 1 bit This 1 - bit value is an extension to the SIB.index field.See 64 - bit addressing.
// B 1 bit This 1 - bit value is an extension to the MODRM.rm field or the SIB.base field.See 64 - bit addressing.
uint8_t rex(uint8_t w, uint8_t r, uint8_t x, uint8_t b) {
assert_is_one(w); assert_is_one(r);
assert_is_one(x); assert_is_one(b);
return 0b0100 << 4 | w << 3 | r << 2 | x << 1 | b;
}
#define REX_W rex(1,0,0,0)
// ModR/M
//
// Addressing
// - Indirect : operand is stored under the address specified by one of the registers
// - Displaced: operand is stored under the displaced(offset) address specified by one of the registers, [rsp + 32h] other example: [rsp + rax*2 + 32], we use 2 registers - keyword SIB
// - Register : operand is stored in the register
// - Immediate: operand is stored right in the instruction, seems like existence of immeadiate depends on the instruction
enum{ // modr/m modes
MODE_INDIRECT_ADDRESSING = 0,
MODE_DISPLACED8_ADDRESSING = 1,
MODE_DISPLACED32_ADDRESSING = 2,
MODE_DISPLACED16_ADDRESSING = 2,
MODE_REGISTER_ADDRESSING = 3,
};
// SIB Addressing
// 7-6 bits for scale (1, 2, 4, 8)
// 5-3 register with index (index * scale)
// 2-0 register with base base + (index * scale)
const uint8_t SIB_SCALE1 = 0;
const uint8_t SIB_SCALE2 = 1;
const uint8_t SIB_SCALE4 = 2;
const uint8_t SIB_SCALE8 = 3;
const uint8_t MODRM_RM_SIB = 4;
// reg, rm - specify 3 bit register
// to address extended registers r8 - r15, you need to set proper REX bit
uint8_t modrm(uint8_t mode, uint8_t reg, uint8_t rm) {
assert_in_range(mode, 0, 3);
assert_in_range(reg, 0, 7);
assert_in_range(rm, 0, 7);
return (mode << 6) | (reg << 3) | rm;
}
// We need to set appropriate flag on the rex value to
// use extended registers
uint8_t adjust_registers_and_get_rex(Register *rm, Register *rx) {
uint8_t rex_value = REX_W;
assert_in_range(*rm, 0, 15);
assert_in_range(*rx, 0, 15);
if (*rm >= 8) {
rex_value |= 1 << 2;
*rm -= 8;
}
if (*rx >= 8) {
rex_value |= 1;
*rx -= 8;
}
return rex_value;
}
// add rax, rcx => emit_register_op(0x03, RAX, RCX)
void emit_register_op(uint8_t op_code, Register rm, Register rx) {
uint8_t rex_value = adjust_registers_and_get_rex(&rm, &rx);
emit(rex_value);
emit(op_code);
emit(modrm(MODE_REGISTER_ADDRESSING, rm, rx));
}
// add rax, [rcx] => emit_indirect_op(0x03, RAX, RCX);
void emit_indirect_op(uint8_t op_code, Register rx, Register rm) {
assert(rx != RSP); // SIB
assert(rx != 8 + RSP); // SIB // R12
assert(rx != RBP); // Displacement32
assert(rx != 8 + RBP); // Displacement32 // R13
uint8_t rex_value = adjust_registers_and_get_rex(&rm, &rx);
emit(rex_value);
emit(op_code);
emit(modrm(MODE_INDIRECT_ADDRESSING, rm, rx));
}
void emit_indirect_displaced8_op(uint8_t op_code, Register rx, Register rm, uint8_t displacement) {
assert(rx != RSP); // SIB
assert(rx != 8 + RSP); // SIB // R12
uint8_t rex_value = adjust_registers_and_get_rex(&rm, &rx);
emit(rex_value);
emit(op_code);
emit(modrm(MODE_DISPLACED8_ADDRESSING, rm, rx));
emit(displacement);
}
void emit_indirect_displaced32_op(uint8_t op_code, Register rx, Register rm, uint32_t displacement) {
assert(rx != RSP); // SIB
assert(rx != 8 + RSP); // SIB // R12
uint8_t rex_value = adjust_registers_and_get_rex(&rm, &rx);
emit(rex_value);
emit(op_code);
emit(modrm(MODE_DISPLACED32_ADDRESSING, rm, rx));
emit32(displacement);
}
// extern "C" void asm_test();
void test_x64_stuff() {
//asm_test();
code = code_start = (uint8_t*)VirtualAlloc(NULL, 4096, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
emit(OP_BREAK);
// REX.W + F7 /4
#if 0
emit(REX_W);
emit(0xF7);
emit(modrm(MODE_REGISTER_ADDRESSING, 0x4, RCX));
#endif
for (Register dst = RAX; dst <= R15; dst++) {
for (Register src = RAX; src <= R15; src++) {
if ((dst & 7) != RSP) {
emit_indirect_displaced32_op(0x03, dst, src, 0xffffff);
emit_indirect_displaced8_op(0x03, dst, src, 0x32);
}
if ((dst & 7) != RSP && (dst & 7) != RBP)
emit_indirect_op(0x03, dst, src);
emit_register_op(0x03, dst, src);
}
}
emit(OP_RETURN);
auto add = (uint64_t(*)(uint64_t, uint64_t))code_start;
uint64_t result = add(11, 22);
}