#include <stdio.h>
#include <stdint.h>
#include <windows.h>

#define assert(x) do{if(!(x))__debugbreak();}while(0)
#define assert_in_range(x, a, b) assert(((x) >= a) && ((x) <= b))
#define assert_is_one(x) assert_in_range(x, 0, 1)

uint8_t* code_start;
uint8_t* code;

void emit(uint8_t c) {
  *code++ = c;
}

void emit32(uint32_t c) {
  *(uint32_t*)code = c;
  code += 4;
}

// Prefix ordering in X64:
// 1. Legacy prefixes (optional)
// 2. REX prefix      (optional)
// 3. Opcode          (1 or 2 or 3 bytes)
// 4. ModR/M          (1 byte if required)
// 5. SIB             (1 byte if required)
// 6. Displacement    (1 or 2 or 4 bytes)
// 7. Immediate       (1 or 2 or 4 bytes)

// Basic op codes
const uint8_t OP_BREAK = 0xCC;
const uint8_t OP_RETURN = 0xC3;

// From manual:
// The operand-size override prefix allows a program to switch between 16-and 32-bit operand sizes. Either size can
// be the default; use of the prefix selects the non-default size.
// When using the REX prefix, adding this changes the operand size to 32bits
const uint8_t OPERAND_SIZE_OVERRIDE = 0x66;
const uint8_t GS_SEGMENT_OVERRIDE   = 0x65;

// Registers
// al(8lo), ah(8hi), ax(16), eax(32), rax(64)
enum{RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15};
typedef uint8_t Register;

// REX - 64 bit mode prefix
// we write this when
// * instruction references extended(64bit) register
// * uses a 64 bit operand.
// W 	1 bit 	When 1, a 64 - bit operand size is used. Otherwise, when 0, the default operand size is used(which is 32 - bit for most but not all instructions, see this table).
// R 	1 bit 	This 1 - bit value is an extension to the MODRM.reg field.See Registers.
// X 	1 bit 	This 1 - bit value is an extension to the SIB.index field.See 64 - bit addressing.
// B 	1 bit 	This 1 - bit value is an extension to the MODRM.rm field or the SIB.base field.See 64 - bit addressing.
uint8_t rex(uint8_t w, uint8_t r, uint8_t x, uint8_t b) {
  assert_is_one(w); assert_is_one(r);
  assert_is_one(x); assert_is_one(b);
  return  0b0100 << 4 | w << 3 | r << 2 | x << 1 | b;
}
#define REX_W rex(1,0,0,0)

// ModR/M
//
// Addressing
// - Indirect : operand is stored under the address specified by one of the registers
// - Displaced: operand is stored under the displaced(offset) address specified by one of the registers, [rsp + 32h] other example: [rsp + rax*2 + 32], we use 2 registers - keyword SIB
// - Register : operand is stored in the register
// - Immediate: operand is stored right in the instruction, seems like existence of immeadiate depends on the instruction
enum{ // modr/m modes
  MODE_INDIRECT_ADDRESSING    = 0,
  MODE_DISPLACED8_ADDRESSING  = 1,
  MODE_DISPLACED32_ADDRESSING = 2,
  MODE_DISPLACED16_ADDRESSING = 2,
  MODE_REGISTER_ADDRESSING    = 3,
};

// SIB Addressing
// 7-6 bits for scale (1, 2, 4, 8)
// 5-3 register with index (index * scale)
// 2-0 register with base base + (index * scale)
const uint8_t SIB_SCALE1 = 0;
const uint8_t SIB_SCALE2 = 1;
const uint8_t SIB_SCALE4 = 2;
const uint8_t SIB_SCALE8 = 3;
const uint8_t MODRM_RM_SIB = 4;

// reg, rm - specify 3 bit register
// to address extended registers r8 - r15, you need to set proper REX bit
uint8_t modrm(uint8_t mode, uint8_t reg, uint8_t rm) {
  assert_in_range(mode, 0, 3);
  assert_in_range(reg, 0, 7);
  assert_in_range(rm, 0, 7);
  return (mode << 6) | (reg << 3) | rm;
}

// We need to set appropriate flag on the rex value to
// use extended registers
uint8_t adjust_registers_and_get_rex(Register *rm, Register *rx) {
  uint8_t rex_value = REX_W;
  assert_in_range(*rm, 0, 15);
  assert_in_range(*rx, 0, 15);

  if (*rm >= 8) {
    rex_value |= 1 << 2;
    *rm -= 8;
  }

  if (*rx >= 8) {
    rex_value |= 1;
    *rx -= 8;
  }

  return rex_value;
}

// add rax, rcx  => emit_register_op(0x03, RAX, RCX)
void emit_register_op(uint8_t op_code, Register rm, Register rx) {
  uint8_t rex_value = adjust_registers_and_get_rex(&rm, &rx);

  emit(rex_value);
  emit(op_code);
  emit(modrm(MODE_REGISTER_ADDRESSING, rm, rx));
}

// add rax, [rcx] => emit_indirect_op(0x03, RAX, RCX);
void emit_indirect_op(uint8_t op_code, Register rx, Register rm) {
  assert(rx != RSP); // SIB
  assert(rx != 8 + RSP); // SIB // R12
  assert(rx != RBP); // Displacement32
  assert(rx != 8 + RBP); // Displacement32 // R13

  uint8_t rex_value = adjust_registers_and_get_rex(&rm, &rx);
  emit(rex_value);
  emit(op_code);
  emit(modrm(MODE_INDIRECT_ADDRESSING, rm, rx));
}

void emit_indirect_displaced8_op(uint8_t op_code, Register rx, Register rm, uint8_t displacement) {
  assert(rx != RSP); // SIB
  assert(rx != 8 + RSP); // SIB // R12

  uint8_t rex_value = adjust_registers_and_get_rex(&rm, &rx);
  emit(rex_value);
  emit(op_code);
  emit(modrm(MODE_DISPLACED8_ADDRESSING, rm, rx));
  emit(displacement);
}

void emit_indirect_displaced32_op(uint8_t op_code, Register rx, Register rm, uint32_t displacement) {
  assert(rx != RSP); // SIB
  assert(rx != 8 + RSP); // SIB // R12

  uint8_t rex_value = adjust_registers_and_get_rex(&rm, &rx);
  emit(rex_value);
  emit(op_code);
  emit(modrm(MODE_DISPLACED32_ADDRESSING, rm, rx));
  emit32(displacement);
}


// extern "C" void asm_test();
void test_x64_stuff() {
  //asm_test();

  code = code_start = (uint8_t*)VirtualAlloc(NULL, 4096, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
  emit(OP_BREAK);

  // REX.W + F7 /4
  #if 0
    emit(REX_W);
    emit(0xF7);
    emit(modrm(MODE_REGISTER_ADDRESSING, 0x4, RCX));
  #endif

  for (Register dst = RAX; dst <= R15; dst++) {
    for (Register src = RAX; src <= R15; src++) {
      if ((dst & 7) != RSP) {
        emit_indirect_displaced32_op(0x03, dst, src, 0xffffff);
        emit_indirect_displaced8_op(0x03, dst, src, 0x32);
      }
      if ((dst & 7) != RSP && (dst & 7) != RBP)
        emit_indirect_op(0x03, dst, src);
      emit_register_op(0x03, dst, src);
    }
  }

  emit(OP_RETURN);

  auto add = (uint64_t(*)(uint64_t, uint64_t))code_start;
  uint64_t result = add(11, 22);
}