Working on measurements

This commit is contained in:
Krzosa Karol
2022-07-09 14:49:39 +02:00
parent 77639d4178
commit 0ff19b6ab4
4 changed files with 29 additions and 93 deletions

View File

@@ -1,5 +1,5 @@
@echo off
pushd %~dp0
clang main.cpp -O2 -mfma -mavx2 -Wall -Wno-unused-function -Wno-missing-braces -fno-exceptions -fdiagnostics-absolute-paths -I".." -g -o main.exe -Wl,user32.lib
clang main.cpp -mfma -mavx2 -Wall -Wno-unused-function -Wno-missing-braces -fno-exceptions -fdiagnostics-absolute-paths -I".." -g -o main.exe -Wl,user32.lib
popd

View File

@@ -80,9 +80,9 @@
// #include "obj_dump.cpp"
#include "multimedia.cpp"
#include "profile.cpp"
#include "obj.cpp"
#include "vec.cpp"
#define PROFILE_SCOPE(x)
struct Vertex {
Vec3 pos;
@@ -281,7 +281,8 @@ F32 edge_function(Vec4 vecp0, Vec4 vecp1, Vec4 p) {
#define S32x8 __m256i
U64 filled_pixel_count;
U64 filled_pixel_total_time;
U64 filled_pixel_cycles;
U64 triangle_count;
#include "optimization_log.cpp"
function
@@ -291,7 +292,7 @@ void draw_triangle_nearest(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 lig
Vec3 norm0, Vec3 norm1, Vec3 norm2) {
if(src->pixels == 0) return;
PROFILE_SCOPE(draw_triangle);
U64 fill_pixels_begin = __rdtsc();
F32 min_x1 = (F32)(min(p0.x, min(p1.x, p2.x)));
F32 min_y1 = (F32)(min(p0.y, min(p1.y, p2.y)));
@@ -362,7 +363,6 @@ void draw_triangle_nearest(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 lig
F32x8 p2_x = _mm256_set1_ps(p2.x);
F32x8 p2_y = _mm256_set1_ps(p2.y);
U64 fill_pixels_begin = __rdtsc();
for (S64 y = min_y; y < max_y; y++) {
F32x8 Y = _mm256_set1_ps(y);
for (S64 x8 = min_x; x8 < max_x; x8+=8) {
@@ -562,9 +562,8 @@ void draw_triangle_nearest(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 lig
}
destination += dst->x;
}
U64 end_time = __rdtsc();
filled_pixel_total_time += end_time - fill_pixels_begin;
filled_pixel_cycles += __rdtsc() - fill_pixels_begin;
filled_pixel_count += (max_x - min_x)*(max_y - min_y);
}
@@ -708,9 +707,11 @@ void draw_mesh(Render *r, String scene_name, Obj_Material *materials, Obj_Mesh *
}
draw_triangle_nearest(&r->screen320, r->depth320, image, light_direction, in[0].pos, in[1].pos, in[2].pos, in[0].tex, in[1].tex, in[2].tex, in[0].norm, in[1].norm, in[2].norm);
triangle_count++;
draw_triangle_nearest_b(&r->screen320, r->depth320, image, light_direction, in[0].pos, in[1].pos, in[2].pos, in[0].tex, in[1].tex, in[2].tex, in[0].norm, in[1].norm, in[2].norm);
if (in_count > 3) {
draw_triangle_nearest(&r->screen320, r->depth320, image, light_direction, in[0].pos, in[2].pos, in[3].pos, in[0].tex, in[2].tex, in[3].tex, in[0].norm, in[2].norm, in[3].norm);
triangle_count++;
draw_triangle_nearest_b(&r->screen320, r->depth320, image, light_direction, in[0].pos, in[2].pos, in[3].pos, in[0].tex, in[2].tex, in[3].tex, in[0].norm, in[2].norm, in[3].norm);
}
}
}
@@ -1106,7 +1107,6 @@ main(int argc, char **argv) {
thread_ctx.log_proc = windows_log;
fprintf(global_file, "\n---------------------");
os.window_size.x = 1280;
os.window_size.y = 720;
os.window_resizable = 1;
@@ -1209,23 +1209,14 @@ main(int argc, char **argv) {
ui_end_frame(os.screen, &ui, &font);
frame_data = string_fmt(os.frame_arena, "FPS:%f dt:%f frame:%u camera_pos: %f %f %f camera_yaw: %f %f"
"\nAvg_Time: %llu Time:%llu Count:%llu",
"\nCycle per pixel: %llu Cycles:%llu Pixels:%llu Triangles:%llu",
os.fps, os.delta_time*1000, os.frame, r.camera_pos.x, r.camera_pos.y, r.camera_pos.z, r.camera_yaw.x, r.camera_yaw.y,
filled_pixel_total_time/filled_pixel_count, filled_pixel_total_time, filled_pixel_count);
filled_pixel_cycles/filled_pixel_count, filled_pixel_cycles, filled_pixel_count, triangle_count);
filled_pixel_count = 0;
filled_pixel_cycles = 0;
triangle_count = 0;
for(int i = 0; i < ProfileScopeName_Count; i++){
auto *scope = &profile_scopes[i];
if(scope->i == 0) continue;
U64 total = 0;
for(int i = 0; i < scope->i; i++){
total += scope->samples[i];
}
log_info("\n%s :: Total: %llu Hits: %llu, Avg: %llu", profile_scope_names[i], total, (U64)scope->i, total / scope->i);
scope->i = 0;
}
}
}

View File

@@ -106,8 +106,9 @@ void draw_triangle_nearest_b(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l
Vec4 p0, Vec4 p1, Vec4 p2,
Vec2 tex0, Vec2 tex1, Vec2 tex2,
Vec3 norm0, Vec3 norm1, Vec3 norm2) {
// if(os.frame > 10) PROFILE_BEGIN(draw_triangle);
// ZoneScopedN("draw_triangle");
if(src->pixels == 0) return;
U64 fill_pixels_begin = __rdtsc();
F32 min_x1 = (F32)(min(p0.x, min(p1.x, p2.x)));
F32 min_y1 = (F32)(min(p0.y, min(p1.y, p2.y)));
F32 max_x1 = (F32)(max(p0.x, max(p1.x, p2.x)));
@@ -117,6 +118,9 @@ void draw_triangle_nearest_b(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l
S64 max_x = (S64)min((F32)dst->x, ceil(max_x1));
S64 max_y = (S64)min((F32)dst->y, ceil(max_y1));
if (min_y >= max_y) return;
if (min_x >= max_x) return;
F32 dy10 = (p1.y - p0.y);
F32 dy21 = (p2.y - p1.y);
F32 dy02 = (p0.y - p2.y);
@@ -135,7 +139,6 @@ void draw_triangle_nearest_b(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l
U32 *destination = dst->pixels + dst->x*min_y;
F32 area = (p1.y - p0.y) * (p2.x - p0.x) - (p1.x - p0.x) * (p2.y - p0.y);
U64 fill_pixels_begin = __rdtsc();
for (S64 y = min_y; y < max_y; y++) {
F32 Cx0 = Cy0;
F32 Cx1 = Cy1;
@@ -223,12 +226,10 @@ void draw_triangle_nearest_b(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l
Cy2 -= dx02;
destination += dst->x;
}
U64 end_time = __rdtsc();
U64 end_time = __rdtsc();
filled_pixel_total_time += end_time - fill_pixels_begin;
filled_pixel_cycles += end_time - fill_pixels_begin;
filled_pixel_count += (max_x - min_x)*(max_y - min_y);
// if(os.frame > 10) PROFILE_END(draw_triangle);
}
@@ -817,6 +818,7 @@ void draw_triangle_nearest_f(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l
Vec2 tex0, Vec2 tex1, Vec2 tex2,
Vec3 norm0, Vec3 norm1, Vec3 norm2) {
if(src->pixels == 0) return;
U64 fill_pixels_begin = __rdtsc();
PROFILE_SCOPE(draw_triangle);
@@ -869,7 +871,6 @@ void draw_triangle_nearest_f(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l
F32 area = (p1.y - p0.y) * (p2.x - p0.x) - (p1.x - p0.x) * (p2.y - p0.y);
Vec8 area8 = vec8(area);
U64 fill_pixels_begin = __rdtsc();
for (S64 y = min_y; y < max_y; y++) {
Vec8 Cx0 = vec8(Cy0);
Vec8 Cx1 = vec8(Cy1);
@@ -1025,7 +1026,7 @@ void draw_triangle_nearest_f(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l
}
U64 end_time = __rdtsc();
filled_pixel_total_time += end_time - fill_pixels_begin;
filled_pixel_cycles += end_time - fill_pixels_begin;
filled_pixel_count += (max_x - min_x)*(max_y - min_y);
}
@@ -1035,6 +1036,7 @@ void draw_triangle_nearest_g(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l
Vec2 tex0, Vec2 tex1, Vec2 tex2,
Vec3 norm0, Vec3 norm1, Vec3 norm2) {
if(src->pixels == 0) return;
U64 fill_pixels_begin = __rdtsc();
PROFILE_SCOPE(draw_triangle);
@@ -1109,7 +1111,6 @@ void draw_triangle_nearest_g(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l
F32 area = (p1.y - p0.y) * (p2.x - p0.x) - (p1.x - p0.x) * (p2.y - p0.y);
F32x8 area8 = _mm256_set1_ps(area);
U64 fill_pixels_begin = __rdtsc();
for (S64 y = min_y; y < max_y; y++) {
F32x8 Cx0 = _mm256_set1_ps(Cy0);
F32x8 Cx1 = _mm256_set1_ps(Cy1);
@@ -1311,8 +1312,6 @@ void draw_triangle_nearest_g(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l
Cy2 -= dx02;
destination += dst->x;
}
U64 end_time = __rdtsc();
filled_pixel_total_time += end_time - fill_pixels_begin;
filled_pixel_cycles += __rdtsc() - fill_pixels_begin;
filled_pixel_count += (max_x - min_x)*(max_y - min_y);
}

View File

@@ -1,54 +0,0 @@
enum ProfileScopeName {
ProfileScopeName_draw_triangle,
ProfileScopeName_fill_triangle,
ProfileScopeName_draw_all_meshes,
ProfileScopeName_draw_mesh,
ProfileScopeName_draw_set_of_mesh_indices,
ProfileScopeName_main_loop,
ProfileScopeName_fill_triangle_after_depth_test,
ProfileScopeName_fill_triangle_inner,
ProfileScopeName_fill_triangle_outer,
ProfileScopeName_Count,
};
const char *profile_scope_names[] = {
"draw_triangle",
"fill_triangle",
"draw_all_meshes",
"draw_mesh",
"draw_set_of_mesh_indices",
"main_loop",
"fill_triangle_after_depth_test",
"fill_triangle_inner",
"fill_triangle_outer",
};
struct ProfileState {
U64 samples[5096*32];
S32 i;
};
global ProfileState profile_scopes[ProfileScopeName_Count];
force_inline void
profile_begin(ProfileScopeName name){
ProfileState *p = profile_scopes + name;
p->samples[p->i] = __rdtsc();
}
force_inline void
profile_end(ProfileScopeName name){
ProfileState *p = profile_scopes + name;
p->samples[p->i] = __rdtsc() - p->samples[p->i];
p->i = (p->i + 1) % buff_cap(p->samples);
}
struct Profile_Scope{
ProfileScopeName n;
force_inline Profile_Scope(ProfileScopeName name){ profile_begin(name); n=name; }
force_inline ~Profile_Scope(){ profile_end(n); }
};
#define PROFILE_BEGIN(name) profile_begin(ProfileScopeName_##name)
#define PROFILE_END(name) profile_end(ProfileScopeName_##name)
#define PROFILE_SCOPE(name) Profile_Scope profile_scope_##__LINE__(ProfileScopeName_##name)