From 0ff19b6ab4d4fa9c013b969b9cf93061a651b001 Mon Sep 17 00:00:00 2001 From: Krzosa Karol Date: Sat, 9 Jul 2022 14:49:39 +0200 Subject: [PATCH] Working on measurements --- build.bat | 2 +- main.cpp | 39 ++++++++++++-------------------- optimization_log.cpp | 27 +++++++++++----------- profile.cpp | 54 -------------------------------------------- 4 files changed, 29 insertions(+), 93 deletions(-) delete mode 100644 profile.cpp diff --git a/build.bat b/build.bat index 780abfa..469a4c8 100644 --- a/build.bat +++ b/build.bat @@ -1,5 +1,5 @@ @echo off pushd %~dp0 -clang main.cpp -O2 -mfma -mavx2 -Wall -Wno-unused-function -Wno-missing-braces -fno-exceptions -fdiagnostics-absolute-paths -I".." -g -o main.exe -Wl,user32.lib +clang main.cpp -mfma -mavx2 -Wall -Wno-unused-function -Wno-missing-braces -fno-exceptions -fdiagnostics-absolute-paths -I".." -g -o main.exe -Wl,user32.lib popd \ No newline at end of file diff --git a/main.cpp b/main.cpp index c8f4222..d4e5452 100644 --- a/main.cpp +++ b/main.cpp @@ -80,9 +80,9 @@ // #include "obj_dump.cpp" #include "multimedia.cpp" -#include "profile.cpp" #include "obj.cpp" #include "vec.cpp" +#define PROFILE_SCOPE(x) struct Vertex { Vec3 pos; @@ -281,7 +281,8 @@ F32 edge_function(Vec4 vecp0, Vec4 vecp1, Vec4 p) { #define S32x8 __m256i U64 filled_pixel_count; -U64 filled_pixel_total_time; +U64 filled_pixel_cycles; +U64 triangle_count; #include "optimization_log.cpp" function @@ -291,7 +292,7 @@ void draw_triangle_nearest(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 lig Vec3 norm0, Vec3 norm1, Vec3 norm2) { if(src->pixels == 0) return; - PROFILE_SCOPE(draw_triangle); + U64 fill_pixels_begin = __rdtsc(); F32 min_x1 = (F32)(min(p0.x, min(p1.x, p2.x))); F32 min_y1 = (F32)(min(p0.y, min(p1.y, p2.y))); @@ -362,7 +363,6 @@ void draw_triangle_nearest(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 lig F32x8 p2_x = _mm256_set1_ps(p2.x); F32x8 p2_y = _mm256_set1_ps(p2.y); - U64 fill_pixels_begin = __rdtsc(); for (S64 y = min_y; y < max_y; y++) { F32x8 Y = _mm256_set1_ps(y); for (S64 x8 = min_x; x8 < max_x; x8+=8) { @@ -562,10 +562,9 @@ void draw_triangle_nearest(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 lig } destination += dst->x; } - U64 end_time = __rdtsc(); - filled_pixel_total_time += end_time - fill_pixels_begin; - filled_pixel_count += (max_x - min_x)*(max_y - min_y); + filled_pixel_cycles += __rdtsc() - fill_pixels_begin; + filled_pixel_count += (max_x - min_x)*(max_y - min_y); } function @@ -708,9 +707,11 @@ void draw_mesh(Render *r, String scene_name, Obj_Material *materials, Obj_Mesh * } - draw_triangle_nearest(&r->screen320, r->depth320, image, light_direction, in[0].pos, in[1].pos, in[2].pos, in[0].tex, in[1].tex, in[2].tex, in[0].norm, in[1].norm, in[2].norm); + triangle_count++; + draw_triangle_nearest_b(&r->screen320, r->depth320, image, light_direction, in[0].pos, in[1].pos, in[2].pos, in[0].tex, in[1].tex, in[2].tex, in[0].norm, in[1].norm, in[2].norm); if (in_count > 3) { - draw_triangle_nearest(&r->screen320, r->depth320, image, light_direction, in[0].pos, in[2].pos, in[3].pos, in[0].tex, in[2].tex, in[3].tex, in[0].norm, in[2].norm, in[3].norm); + triangle_count++; + draw_triangle_nearest_b(&r->screen320, r->depth320, image, light_direction, in[0].pos, in[2].pos, in[3].pos, in[0].tex, in[2].tex, in[3].tex, in[0].norm, in[2].norm, in[3].norm); } } } @@ -1106,7 +1107,6 @@ main(int argc, char **argv) { thread_ctx.log_proc = windows_log; fprintf(global_file, "\n---------------------"); - os.window_size.x = 1280; os.window_size.y = 720; os.window_resizable = 1; @@ -1209,23 +1209,14 @@ main(int argc, char **argv) { ui_end_frame(os.screen, &ui, &font); frame_data = string_fmt(os.frame_arena, "FPS:%f dt:%f frame:%u camera_pos: %f %f %f camera_yaw: %f %f" - "\nAvg_Time: %llu Time:%llu Count:%llu", + "\nCycle per pixel: %llu Cycles:%llu Pixels:%llu Triangles:%llu", os.fps, os.delta_time*1000, os.frame, r.camera_pos.x, r.camera_pos.y, r.camera_pos.z, r.camera_yaw.x, r.camera_yaw.y, - filled_pixel_total_time/filled_pixel_count, filled_pixel_total_time, filled_pixel_count); + filled_pixel_cycles/filled_pixel_count, filled_pixel_cycles, filled_pixel_count, triangle_count); + filled_pixel_count = 0; + filled_pixel_cycles = 0; + triangle_count = 0; - for(int i = 0; i < ProfileScopeName_Count; i++){ - auto *scope = &profile_scopes[i]; - if(scope->i == 0) continue; - - U64 total = 0; - for(int i = 0; i < scope->i; i++){ - total += scope->samples[i]; - } - - log_info("\n%s :: Total: %llu Hits: %llu, Avg: %llu", profile_scope_names[i], total, (U64)scope->i, total / scope->i); - scope->i = 0; - } } } diff --git a/optimization_log.cpp b/optimization_log.cpp index 50a80a7..7017422 100644 --- a/optimization_log.cpp +++ b/optimization_log.cpp @@ -106,8 +106,9 @@ void draw_triangle_nearest_b(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l Vec4 p0, Vec4 p1, Vec4 p2, Vec2 tex0, Vec2 tex1, Vec2 tex2, Vec3 norm0, Vec3 norm1, Vec3 norm2) { - // if(os.frame > 10) PROFILE_BEGIN(draw_triangle); - // ZoneScopedN("draw_triangle"); + if(src->pixels == 0) return; + U64 fill_pixels_begin = __rdtsc(); + F32 min_x1 = (F32)(min(p0.x, min(p1.x, p2.x))); F32 min_y1 = (F32)(min(p0.y, min(p1.y, p2.y))); F32 max_x1 = (F32)(max(p0.x, max(p1.x, p2.x))); @@ -117,6 +118,9 @@ void draw_triangle_nearest_b(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l S64 max_x = (S64)min((F32)dst->x, ceil(max_x1)); S64 max_y = (S64)min((F32)dst->y, ceil(max_y1)); + if (min_y >= max_y) return; + if (min_x >= max_x) return; + F32 dy10 = (p1.y - p0.y); F32 dy21 = (p2.y - p1.y); F32 dy02 = (p0.y - p2.y); @@ -135,7 +139,6 @@ void draw_triangle_nearest_b(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l U32 *destination = dst->pixels + dst->x*min_y; F32 area = (p1.y - p0.y) * (p2.x - p0.x) - (p1.x - p0.x) * (p2.y - p0.y); - U64 fill_pixels_begin = __rdtsc(); for (S64 y = min_y; y < max_y; y++) { F32 Cx0 = Cy0; F32 Cx1 = Cy1; @@ -223,12 +226,10 @@ void draw_triangle_nearest_b(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l Cy2 -= dx02; destination += dst->x; } + U64 end_time = __rdtsc(); -U64 end_time = __rdtsc(); - - filled_pixel_total_time += end_time - fill_pixels_begin; - filled_pixel_count += (max_x - min_x)*(max_y - min_y); - // if(os.frame > 10) PROFILE_END(draw_triangle); + filled_pixel_cycles += end_time - fill_pixels_begin; + filled_pixel_count += (max_x - min_x)*(max_y - min_y); } @@ -817,6 +818,7 @@ void draw_triangle_nearest_f(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l Vec2 tex0, Vec2 tex1, Vec2 tex2, Vec3 norm0, Vec3 norm1, Vec3 norm2) { if(src->pixels == 0) return; + U64 fill_pixels_begin = __rdtsc(); PROFILE_SCOPE(draw_triangle); @@ -869,7 +871,6 @@ void draw_triangle_nearest_f(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l F32 area = (p1.y - p0.y) * (p2.x - p0.x) - (p1.x - p0.x) * (p2.y - p0.y); Vec8 area8 = vec8(area); - U64 fill_pixels_begin = __rdtsc(); for (S64 y = min_y; y < max_y; y++) { Vec8 Cx0 = vec8(Cy0); Vec8 Cx1 = vec8(Cy1); @@ -1025,7 +1026,7 @@ void draw_triangle_nearest_f(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l } U64 end_time = __rdtsc(); - filled_pixel_total_time += end_time - fill_pixels_begin; + filled_pixel_cycles += end_time - fill_pixels_begin; filled_pixel_count += (max_x - min_x)*(max_y - min_y); } @@ -1035,6 +1036,7 @@ void draw_triangle_nearest_g(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l Vec2 tex0, Vec2 tex1, Vec2 tex2, Vec3 norm0, Vec3 norm1, Vec3 norm2) { if(src->pixels == 0) return; + U64 fill_pixels_begin = __rdtsc(); PROFILE_SCOPE(draw_triangle); @@ -1109,7 +1111,6 @@ void draw_triangle_nearest_g(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l F32 area = (p1.y - p0.y) * (p2.x - p0.x) - (p1.x - p0.x) * (p2.y - p0.y); F32x8 area8 = _mm256_set1_ps(area); - U64 fill_pixels_begin = __rdtsc(); for (S64 y = min_y; y < max_y; y++) { F32x8 Cx0 = _mm256_set1_ps(Cy0); F32x8 Cx1 = _mm256_set1_ps(Cy1); @@ -1311,8 +1312,6 @@ void draw_triangle_nearest_g(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l Cy2 -= dx02; destination += dst->x; } - U64 end_time = __rdtsc(); - - filled_pixel_total_time += end_time - fill_pixels_begin; + filled_pixel_cycles += __rdtsc() - fill_pixels_begin; filled_pixel_count += (max_x - min_x)*(max_y - min_y); } diff --git a/profile.cpp b/profile.cpp deleted file mode 100644 index 60bd012..0000000 --- a/profile.cpp +++ /dev/null @@ -1,54 +0,0 @@ -enum ProfileScopeName { - ProfileScopeName_draw_triangle, - ProfileScopeName_fill_triangle, - ProfileScopeName_draw_all_meshes, - ProfileScopeName_draw_mesh, - ProfileScopeName_draw_set_of_mesh_indices, - ProfileScopeName_main_loop, - ProfileScopeName_fill_triangle_after_depth_test, - ProfileScopeName_fill_triangle_inner, - ProfileScopeName_fill_triangle_outer, - ProfileScopeName_Count, -}; - -const char *profile_scope_names[] = { - "draw_triangle", - "fill_triangle", - "draw_all_meshes", - "draw_mesh", - "draw_set_of_mesh_indices", - "main_loop", - "fill_triangle_after_depth_test", - "fill_triangle_inner", - "fill_triangle_outer", -}; - -struct ProfileState { - U64 samples[5096*32]; - S32 i; -}; - -global ProfileState profile_scopes[ProfileScopeName_Count]; - -force_inline void -profile_begin(ProfileScopeName name){ - ProfileState *p = profile_scopes + name; - p->samples[p->i] = __rdtsc(); -} - -force_inline void -profile_end(ProfileScopeName name){ - ProfileState *p = profile_scopes + name; - p->samples[p->i] = __rdtsc() - p->samples[p->i]; - p->i = (p->i + 1) % buff_cap(p->samples); -} - -struct Profile_Scope{ - ProfileScopeName n; - force_inline Profile_Scope(ProfileScopeName name){ profile_begin(name); n=name; } - force_inline ~Profile_Scope(){ profile_end(n); } -}; - -#define PROFILE_BEGIN(name) profile_begin(ProfileScopeName_##name) -#define PROFILE_END(name) profile_end(ProfileScopeName_##name) -#define PROFILE_SCOPE(name) Profile_Scope profile_scope_##__LINE__(ProfileScopeName_##name)