Test cases working, testing operator overloads vs no overloads

This commit is contained in:
Krzosa Karol
2022-07-09 16:09:21 +02:00
parent f1e0646269
commit 33f22effd4
3 changed files with 47 additions and 20 deletions

View File

@@ -1,5 +1,5 @@
@echo off
pushd %~dp0
clang main.cpp -O2 -mfma -mavx2 -Wall -Wno-unused-function -Wno-missing-braces -fno-exceptions -fdiagnostics-absolute-paths -I".." -g -o main.exe -Wl,user32.lib
clang main.cpp -mfma -mavx2 -Wall -Wno-unused-function -Wno-missing-braces -fno-exceptions -fdiagnostics-absolute-paths -I".." -g -o main.exe -Wl,user32.lib
popd

View File

@@ -280,7 +280,8 @@ F32 edge_function(Vec4 vecp0, Vec4 vecp1, Vec4 p) {
#define F32x8 __m256
#define S32x8 __m256i
S32 render_triangle_test_case_number;
S32 render_triangle_test_case_number = 3;
S32 render_triangle_test_case_angle = 1;
U64 filled_pixel_count;
U64 filled_pixel_cycles;
U64 triangle_count;
@@ -800,9 +801,11 @@ main(int argc, char **argv) {
r.depth320 = (F32 *)arena_push_size(os.perm_arena, sizeof(F32) * screen_x * screen_y);
String frame_data = {};
String raster_details = {};
UISetup setup[] = {
UI_SIGNAL("Change scene"_s, scene_callback),
UI_LABEL(&frame_data),
UI_LABEL(&raster_details),
UI_LABEL(&os.text),
};
UI ui = ui_make(setup, buff_cap(setup));
@@ -879,14 +882,29 @@ main(int argc, char **argv) {
}
ui_end_frame(os.screen, &ui, &font);
frame_data = string_fmt(os.frame_arena, "FPS:%f dt:%f frame:%u camera_pos: %f %f %f camera_yaw: %f %f"
"\nCycle per pixel: %llu Cycles:%llu Pixels:%llu Triangles:%llu",
os.fps, os.delta_time*1000, os.frame, r.camera_pos.x, r.camera_pos.y, r.camera_pos.z, r.camera_yaw.x, r.camera_yaw.y,
filled_pixel_cycles/filled_pixel_count, filled_pixel_cycles, filled_pixel_count, triangle_count);
frame_data = string_fmt(os.frame_arena, "FPS:%f dt:%f frame:%u camera_pos: %f %f %f camera_yaw: %f %f",
os.fps, os.delta_time*1000, os.frame, r.camera_pos.x, r.camera_pos.y, r.camera_pos.z, r.camera_yaw.x, r.camera_yaw.y);
if(filled_pixel_count){
raster_details = string_fmt(os.frame_arena, "\nAngle:%d Case:%d Cycle per pixel: %llu Cycles:%llu Pixels:%llu Triangles:%llu",
render_triangle_test_case_angle, render_triangle_test_case_number, filled_pixel_cycles/filled_pixel_count, filled_pixel_cycles, filled_pixel_count, triangle_count);
filled_pixel_count = 0;
filled_pixel_cycles = 0;
triangle_count = 0;
}
if(os.frame % 4 == 0){
render_triangle_test_case_number++;
if(render_triangle_test_case_number == 6){
render_triangle_test_case_number = 0;
try_again: switch(render_triangle_test_case_angle){
case 0: r.camera_pos = vec3(-228,94.5,-107); r.camera_yaw = vec2(-1.25, 0.21); break;
case 1: r.camera_pos = vec3(-356,89.5,168); r.camera_yaw = vec2(0.2, 0); break;
case 2: render_triangle_test_case_angle = 0; goto try_again; break;
}
render_triangle_test_case_angle += 1;
}
}
}
}

View File

@@ -532,18 +532,27 @@ void draw_triangle_nearest_simd_with_overloads(Bitmap* dst, F32 *depth_buffer, B
dst_b.simd = {_mm256_sqrt_ps(dst_b.simd)};
}
Vec8I result;
for(S64 i = 0; i < 8; i++){
if (should_fill[i]){
U8 red = (U8)(dst_r[i] * 255);
U8 green = (U8)(dst_g[i] * 255);
U8 blue = (U8)(dst_b[i] * 255);
U8 alpha = (U8)(dst_a[i] * 255);
result.e[i] = (U32)(alpha << 24 | blue << 16 | green << 8 | red << 0);
}
}
// Convert to integer format
dst_r = dst_r * var255;
dst_g = dst_g * var255;
dst_b = dst_b * var255;
dst_a = dst_a * var255;
_mm256_maskstore_epi32((int *)dst_memory, should_fill.simd, result.simd);
Vec8I dst_r_int = convert_vec8_to_vec8i(dst_r);
Vec8I dst_g_int = convert_vec8_to_vec8i(dst_g);
Vec8I dst_b_int = convert_vec8_to_vec8i(dst_b);
Vec8I dst_a_int = convert_vec8_to_vec8i(dst_a);
Vec8I dst_int_a_shifted = {_mm256_slli_epi32(dst_a_int.simd, 24)};
Vec8I dst_int_b_shifted = {_mm256_slli_epi32(dst_b_int.simd, 16)};
Vec8I dst_int_g_shifted = {_mm256_slli_epi32(dst_g_int.simd, 8)};
Vec8I dst_int_r_shifted = dst_r_int;
Vec8I packed_abgr0 = {_mm256_or_si256(dst_int_a_shifted.simd, dst_int_b_shifted.simd)};
Vec8I packed_abgr1 = {_mm256_or_si256(packed_abgr0.simd, dst_int_g_shifted.simd)};
Vec8I packed_abgr2 = {_mm256_or_si256(packed_abgr1.simd, dst_int_r_shifted.simd)};
_mm256_maskstore_epi32((int *)dst_memory, should_fill.simd, packed_abgr2.simd);
}
Cy0 -= dx10;