Trying out a x64 emitter

2022-06-28 14:26:52 +02:00
parent a363a5e982
commit e7c14010f2
1 changed files with 190 additions and 0 deletions
--- a/x64_funtime.cpp
+++ b/x64_funtime.cpp
@@ -0,0 +1,190 @@
 #include <stdio.h>
 #include <stdint.h>
 #include <windows.h>
 #define assert(x) do{if(!(x))__debugbreak();}while(0)
 #define assert_in_range(x, a, b) assert(((x) >= a) && ((x) <= b))
 #define assert_is_one(x) assert_in_range(x, 0, 1)
 uint8_t* code_start;
 uint8_t* code;
 void emit(uint8_t c) {
  *code++ = c;
 }
 void emit32(uint32_t c) {
  *(uint32_t*)code = c;
  code += 4;
 }
 // Prefix ordering in X64:
 // 1. Legacy prefixes (optional)
 // 2. REX prefix      (optional)
 // 3. Opcode          (1 or 2 or 3 bytes)
 // 4. ModR/M          (1 byte if required)
 // 5. SIB             (1 byte if required)
 // 6. Displacement    (1 or 2 or 4 bytes)
 // 7. Immediate       (1 or 2 or 4 bytes)
 // Basic op codes
 const uint8_t OP_BREAK = 0xCC;
 const uint8_t OP_RETURN = 0xC3;
 // From manual:
 // The operand-size override prefix allows a program to switch between 16-and 32-bit operand sizes. Either size can
 // be the default; use of the prefix selects the non-default size.
 // When using the REX prefix, adding this changes the operand size to 32bits
 const uint8_t OPERAND_SIZE_OVERRIDE = 0x66;
 const uint8_t GS_SEGMENT_OVERRIDE   = 0x65;
 // Registers
 // al(8lo), ah(8hi), ax(16), eax(32), rax(64)
 enum{RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15};
 typedef uint8_t Register;
 // REX - 64 bit mode prefix
 // we write this when
 // * instruction references extended(64bit) register
 // * uses a 64 bit operand.
 // W 	1 bit 	When 1, a 64 - bit operand size is used. Otherwise, when 0, the default operand size is used(which is 32 - bit for most but not all instructions, see this table).
 // R 	1 bit 	This 1 - bit value is an extension to the MODRM.reg field.See Registers.
 // X 	1 bit 	This 1 - bit value is an extension to the SIB.index field.See 64 - bit addressing.
 // B 	1 bit 	This 1 - bit value is an extension to the MODRM.rm field or the SIB.base field.See 64 - bit addressing.
 uint8_t rex(uint8_t w, uint8_t r, uint8_t x, uint8_t b) {
  assert_is_one(w); assert_is_one(r);
  assert_is_one(x); assert_is_one(b);
  return  0b0100 << 4 | w << 3 | r << 2 | x << 1 | b;
 }
 #define REX_W rex(1,0,0,0)
 // ModR/M
 //
 // Addressing
 // - Indirect : operand is stored under the address specified by one of the registers
 // - Displaced: operand is stored under the displaced(offset) address specified by one of the registers, [rsp + 32h] other example: [rsp + rax*2 + 32], we use 2 registers - keyword SIB
 // - Register : operand is stored in the register
 // - Immediate: operand is stored right in the instruction, seems like existence of immeadiate depends on the instruction
 enum{ // modr/m modes
  MODE_INDIRECT_ADDRESSING    = 0,
  MODE_DISPLACED8_ADDRESSING  = 1,
  MODE_DISPLACED32_ADDRESSING = 2,
  MODE_DISPLACED16_ADDRESSING = 2,
  MODE_REGISTER_ADDRESSING    = 3,
 };
 // SIB Addressing
 // 7-6 bits for scale (1, 2, 4, 8)
 // 5-3 register with index (index * scale)
 // 2-0 register with base base + (index * scale)
 const uint8_t SIB_SCALE1 = 0;
 const uint8_t SIB_SCALE2 = 1;
 const uint8_t SIB_SCALE4 = 2;
 const uint8_t SIB_SCALE8 = 3;
 const uint8_t MODRM_RM_SIB = 4;
 // reg, rm - specify 3 bit register
 // to address extended registers r8 - r15, you need to set proper REX bit
 uint8_t modrm(uint8_t mode, uint8_t reg, uint8_t rm) {
  assert_in_range(mode, 0, 3);
  assert_in_range(reg, 0, 7);
  assert_in_range(rm, 0, 7);
  return (mode << 6) | (reg << 3) | rm;
 }
 // We need to set appropriate flag on the rex value to
 // use extended registers
 uint8_t adjust_registers_and_get_rex(Register *rm, Register *rx) {
  uint8_t rex_value = REX_W;
  assert_in_range(*rm, 0, 15);
  assert_in_range(*rx, 0, 15);
  if (*rm >= 8) {
    rex_value |= 1 << 2;
    *rm -= 8;
  }
  if (*rx >= 8) {
    rex_value |= 1;
    *rx -= 8;
  }
  return rex_value;
 }
 // add rax, rcx  => emit_register_op(0x03, RAX, RCX)
 void emit_register_op(uint8_t op_code, Register rm, Register rx) {
  uint8_t rex_value = adjust_registers_and_get_rex(&rm, &rx);
  emit(rex_value);
  emit(op_code);
  emit(modrm(MODE_REGISTER_ADDRESSING, rm, rx));
 }
 // add rax, [rcx] => emit_indirect_op(0x03, RAX, RCX);
 void emit_indirect_op(uint8_t op_code, Register rx, Register rm) {
  assert(rx != RSP); // SIB
  assert(rx != 8 + RSP); // SIB // R12
  assert(rx != RBP); // Displacement32
  assert(rx != 8 + RBP); // Displacement32 // R13
  uint8_t rex_value = adjust_registers_and_get_rex(&rm, &rx);
  emit(rex_value);
  emit(op_code);
  emit(modrm(MODE_INDIRECT_ADDRESSING, rm, rx));
 }
 void emit_indirect_displaced8_op(uint8_t op_code, Register rx, Register rm, uint8_t displacement) {
  assert(rx != RSP); // SIB
  assert(rx != 8 + RSP); // SIB // R12
  uint8_t rex_value = adjust_registers_and_get_rex(&rm, &rx);
  emit(rex_value);
  emit(op_code);
  emit(modrm(MODE_DISPLACED8_ADDRESSING, rm, rx));
  emit(displacement);
 }
 void emit_indirect_displaced32_op(uint8_t op_code, Register rx, Register rm, uint32_t displacement) {
  assert(rx != RSP); // SIB
  assert(rx != 8 + RSP); // SIB // R12
  uint8_t rex_value = adjust_registers_and_get_rex(&rm, &rx);
  emit(rex_value);
  emit(op_code);
  emit(modrm(MODE_DISPLACED32_ADDRESSING, rm, rx));
  emit32(displacement);
 }
 // extern "C" void asm_test();
 void test_x64_stuff() {
  //asm_test();
  code = code_start = (uint8_t*)VirtualAlloc(NULL, 4096, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
  emit(OP_BREAK);
  // REX.W + F7 /4
  #if 0
    emit(REX_W);
    emit(0xF7);
    emit(modrm(MODE_REGISTER_ADDRESSING, 0x4, RCX));
  #endif
  for (Register dst = RAX; dst <= R15; dst++) {
    for (Register src = RAX; src <= R15; src++) {
      if ((dst & 7) != RSP) {
        emit_indirect_displaced32_op(0x03, dst, src, 0xffffff);
        emit_indirect_displaced8_op(0x03, dst, src, 0x32);
      }
      if ((dst & 7) != RSP && (dst & 7) != RBP)
        emit_indirect_op(0x03, dst, src);
      emit_register_op(0x03, dst, src);
    }
  }
  emit(OP_RETURN);
  auto add = (uint64_t(*)(uint64_t, uint64_t))code_start;
  uint64_t result = add(11, 22);
 }