Test cases working, testing operator overloads vs no overloads
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
@echo off
|
||||
|
||||
pushd %~dp0
|
||||
clang main.cpp -O2 -mfma -mavx2 -Wall -Wno-unused-function -Wno-missing-braces -fno-exceptions -fdiagnostics-absolute-paths -I".." -g -o main.exe -Wl,user32.lib
|
||||
clang main.cpp -mfma -mavx2 -Wall -Wno-unused-function -Wno-missing-braces -fno-exceptions -fdiagnostics-absolute-paths -I".." -g -o main.exe -Wl,user32.lib
|
||||
popd
|
||||
28
main.cpp
28
main.cpp
@@ -280,7 +280,8 @@ F32 edge_function(Vec4 vecp0, Vec4 vecp1, Vec4 p) {
|
||||
#define F32x8 __m256
|
||||
#define S32x8 __m256i
|
||||
|
||||
S32 render_triangle_test_case_number;
|
||||
S32 render_triangle_test_case_number = 3;
|
||||
S32 render_triangle_test_case_angle = 1;
|
||||
U64 filled_pixel_count;
|
||||
U64 filled_pixel_cycles;
|
||||
U64 triangle_count;
|
||||
@@ -800,9 +801,11 @@ main(int argc, char **argv) {
|
||||
r.depth320 = (F32 *)arena_push_size(os.perm_arena, sizeof(F32) * screen_x * screen_y);
|
||||
|
||||
String frame_data = {};
|
||||
String raster_details = {};
|
||||
UISetup setup[] = {
|
||||
UI_SIGNAL("Change scene"_s, scene_callback),
|
||||
UI_LABEL(&frame_data),
|
||||
UI_LABEL(&raster_details),
|
||||
UI_LABEL(&os.text),
|
||||
};
|
||||
UI ui = ui_make(setup, buff_cap(setup));
|
||||
@@ -879,14 +882,29 @@ main(int argc, char **argv) {
|
||||
}
|
||||
|
||||
ui_end_frame(os.screen, &ui, &font);
|
||||
frame_data = string_fmt(os.frame_arena, "FPS:%f dt:%f frame:%u camera_pos: %f %f %f camera_yaw: %f %f"
|
||||
"\nCycle per pixel: %llu Cycles:%llu Pixels:%llu Triangles:%llu",
|
||||
os.fps, os.delta_time*1000, os.frame, r.camera_pos.x, r.camera_pos.y, r.camera_pos.z, r.camera_yaw.x, r.camera_yaw.y,
|
||||
filled_pixel_cycles/filled_pixel_count, filled_pixel_cycles, filled_pixel_count, triangle_count);
|
||||
frame_data = string_fmt(os.frame_arena, "FPS:%f dt:%f frame:%u camera_pos: %f %f %f camera_yaw: %f %f",
|
||||
os.fps, os.delta_time*1000, os.frame, r.camera_pos.x, r.camera_pos.y, r.camera_pos.z, r.camera_yaw.x, r.camera_yaw.y);
|
||||
if(filled_pixel_count){
|
||||
raster_details = string_fmt(os.frame_arena, "\nAngle:%d Case:%d Cycle per pixel: %llu Cycles:%llu Pixels:%llu Triangles:%llu",
|
||||
render_triangle_test_case_angle, render_triangle_test_case_number, filled_pixel_cycles/filled_pixel_count, filled_pixel_cycles, filled_pixel_count, triangle_count);
|
||||
|
||||
filled_pixel_count = 0;
|
||||
filled_pixel_cycles = 0;
|
||||
triangle_count = 0;
|
||||
}
|
||||
|
||||
if(os.frame % 4 == 0){
|
||||
render_triangle_test_case_number++;
|
||||
if(render_triangle_test_case_number == 6){
|
||||
render_triangle_test_case_number = 0;
|
||||
try_again: switch(render_triangle_test_case_angle){
|
||||
case 0: r.camera_pos = vec3(-228,94.5,-107); r.camera_yaw = vec2(-1.25, 0.21); break;
|
||||
case 1: r.camera_pos = vec3(-356,89.5,168); r.camera_yaw = vec2(0.2, 0); break;
|
||||
case 2: render_triangle_test_case_angle = 0; goto try_again; break;
|
||||
}
|
||||
render_triangle_test_case_angle += 1;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -532,18 +532,27 @@ void draw_triangle_nearest_simd_with_overloads(Bitmap* dst, F32 *depth_buffer, B
|
||||
dst_b.simd = {_mm256_sqrt_ps(dst_b.simd)};
|
||||
}
|
||||
|
||||
Vec8I result;
|
||||
for(S64 i = 0; i < 8; i++){
|
||||
if (should_fill[i]){
|
||||
U8 red = (U8)(dst_r[i] * 255);
|
||||
U8 green = (U8)(dst_g[i] * 255);
|
||||
U8 blue = (U8)(dst_b[i] * 255);
|
||||
U8 alpha = (U8)(dst_a[i] * 255);
|
||||
result.e[i] = (U32)(alpha << 24 | blue << 16 | green << 8 | red << 0);
|
||||
}
|
||||
}
|
||||
// Convert to integer format
|
||||
dst_r = dst_r * var255;
|
||||
dst_g = dst_g * var255;
|
||||
dst_b = dst_b * var255;
|
||||
dst_a = dst_a * var255;
|
||||
|
||||
_mm256_maskstore_epi32((int *)dst_memory, should_fill.simd, result.simd);
|
||||
Vec8I dst_r_int = convert_vec8_to_vec8i(dst_r);
|
||||
Vec8I dst_g_int = convert_vec8_to_vec8i(dst_g);
|
||||
Vec8I dst_b_int = convert_vec8_to_vec8i(dst_b);
|
||||
Vec8I dst_a_int = convert_vec8_to_vec8i(dst_a);
|
||||
|
||||
Vec8I dst_int_a_shifted = {_mm256_slli_epi32(dst_a_int.simd, 24)};
|
||||
Vec8I dst_int_b_shifted = {_mm256_slli_epi32(dst_b_int.simd, 16)};
|
||||
Vec8I dst_int_g_shifted = {_mm256_slli_epi32(dst_g_int.simd, 8)};
|
||||
Vec8I dst_int_r_shifted = dst_r_int;
|
||||
|
||||
Vec8I packed_abgr0 = {_mm256_or_si256(dst_int_a_shifted.simd, dst_int_b_shifted.simd)};
|
||||
Vec8I packed_abgr1 = {_mm256_or_si256(packed_abgr0.simd, dst_int_g_shifted.simd)};
|
||||
Vec8I packed_abgr2 = {_mm256_or_si256(packed_abgr1.simd, dst_int_r_shifted.simd)};
|
||||
|
||||
_mm256_maskstore_epi32((int *)dst_memory, should_fill.simd, packed_abgr2.simd);
|
||||
|
||||
}
|
||||
Cy0 -= dx10;
|
||||
|
||||
Reference in New Issue
Block a user