Working on measurements
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
@echo off
|
@echo off
|
||||||
|
|
||||||
pushd %~dp0
|
pushd %~dp0
|
||||||
clang main.cpp -O2 -mfma -mavx2 -Wall -Wno-unused-function -Wno-missing-braces -fno-exceptions -fdiagnostics-absolute-paths -I".." -g -o main.exe -Wl,user32.lib
|
clang main.cpp -mfma -mavx2 -Wall -Wno-unused-function -Wno-missing-braces -fno-exceptions -fdiagnostics-absolute-paths -I".." -g -o main.exe -Wl,user32.lib
|
||||||
popd
|
popd
|
||||||
37
main.cpp
37
main.cpp
@@ -80,9 +80,9 @@
|
|||||||
|
|
||||||
// #include "obj_dump.cpp"
|
// #include "obj_dump.cpp"
|
||||||
#include "multimedia.cpp"
|
#include "multimedia.cpp"
|
||||||
#include "profile.cpp"
|
|
||||||
#include "obj.cpp"
|
#include "obj.cpp"
|
||||||
#include "vec.cpp"
|
#include "vec.cpp"
|
||||||
|
#define PROFILE_SCOPE(x)
|
||||||
|
|
||||||
struct Vertex {
|
struct Vertex {
|
||||||
Vec3 pos;
|
Vec3 pos;
|
||||||
@@ -281,7 +281,8 @@ F32 edge_function(Vec4 vecp0, Vec4 vecp1, Vec4 p) {
|
|||||||
#define S32x8 __m256i
|
#define S32x8 __m256i
|
||||||
|
|
||||||
U64 filled_pixel_count;
|
U64 filled_pixel_count;
|
||||||
U64 filled_pixel_total_time;
|
U64 filled_pixel_cycles;
|
||||||
|
U64 triangle_count;
|
||||||
#include "optimization_log.cpp"
|
#include "optimization_log.cpp"
|
||||||
|
|
||||||
function
|
function
|
||||||
@@ -291,7 +292,7 @@ void draw_triangle_nearest(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 lig
|
|||||||
Vec3 norm0, Vec3 norm1, Vec3 norm2) {
|
Vec3 norm0, Vec3 norm1, Vec3 norm2) {
|
||||||
if(src->pixels == 0) return;
|
if(src->pixels == 0) return;
|
||||||
|
|
||||||
PROFILE_SCOPE(draw_triangle);
|
U64 fill_pixels_begin = __rdtsc();
|
||||||
|
|
||||||
F32 min_x1 = (F32)(min(p0.x, min(p1.x, p2.x)));
|
F32 min_x1 = (F32)(min(p0.x, min(p1.x, p2.x)));
|
||||||
F32 min_y1 = (F32)(min(p0.y, min(p1.y, p2.y)));
|
F32 min_y1 = (F32)(min(p0.y, min(p1.y, p2.y)));
|
||||||
@@ -362,7 +363,6 @@ void draw_triangle_nearest(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 lig
|
|||||||
F32x8 p2_x = _mm256_set1_ps(p2.x);
|
F32x8 p2_x = _mm256_set1_ps(p2.x);
|
||||||
F32x8 p2_y = _mm256_set1_ps(p2.y);
|
F32x8 p2_y = _mm256_set1_ps(p2.y);
|
||||||
|
|
||||||
U64 fill_pixels_begin = __rdtsc();
|
|
||||||
for (S64 y = min_y; y < max_y; y++) {
|
for (S64 y = min_y; y < max_y; y++) {
|
||||||
F32x8 Y = _mm256_set1_ps(y);
|
F32x8 Y = _mm256_set1_ps(y);
|
||||||
for (S64 x8 = min_x; x8 < max_x; x8+=8) {
|
for (S64 x8 = min_x; x8 < max_x; x8+=8) {
|
||||||
@@ -562,9 +562,8 @@ void draw_triangle_nearest(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 lig
|
|||||||
}
|
}
|
||||||
destination += dst->x;
|
destination += dst->x;
|
||||||
}
|
}
|
||||||
U64 end_time = __rdtsc();
|
|
||||||
|
|
||||||
filled_pixel_total_time += end_time - fill_pixels_begin;
|
filled_pixel_cycles += __rdtsc() - fill_pixels_begin;
|
||||||
filled_pixel_count += (max_x - min_x)*(max_y - min_y);
|
filled_pixel_count += (max_x - min_x)*(max_y - min_y);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -708,9 +707,11 @@ void draw_mesh(Render *r, String scene_name, Obj_Material *materials, Obj_Mesh *
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
draw_triangle_nearest(&r->screen320, r->depth320, image, light_direction, in[0].pos, in[1].pos, in[2].pos, in[0].tex, in[1].tex, in[2].tex, in[0].norm, in[1].norm, in[2].norm);
|
triangle_count++;
|
||||||
|
draw_triangle_nearest_b(&r->screen320, r->depth320, image, light_direction, in[0].pos, in[1].pos, in[2].pos, in[0].tex, in[1].tex, in[2].tex, in[0].norm, in[1].norm, in[2].norm);
|
||||||
if (in_count > 3) {
|
if (in_count > 3) {
|
||||||
draw_triangle_nearest(&r->screen320, r->depth320, image, light_direction, in[0].pos, in[2].pos, in[3].pos, in[0].tex, in[2].tex, in[3].tex, in[0].norm, in[2].norm, in[3].norm);
|
triangle_count++;
|
||||||
|
draw_triangle_nearest_b(&r->screen320, r->depth320, image, light_direction, in[0].pos, in[2].pos, in[3].pos, in[0].tex, in[2].tex, in[3].tex, in[0].norm, in[2].norm, in[3].norm);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1106,7 +1107,6 @@ main(int argc, char **argv) {
|
|||||||
thread_ctx.log_proc = windows_log;
|
thread_ctx.log_proc = windows_log;
|
||||||
fprintf(global_file, "\n---------------------");
|
fprintf(global_file, "\n---------------------");
|
||||||
|
|
||||||
|
|
||||||
os.window_size.x = 1280;
|
os.window_size.x = 1280;
|
||||||
os.window_size.y = 720;
|
os.window_size.y = 720;
|
||||||
os.window_resizable = 1;
|
os.window_resizable = 1;
|
||||||
@@ -1209,23 +1209,14 @@ main(int argc, char **argv) {
|
|||||||
|
|
||||||
ui_end_frame(os.screen, &ui, &font);
|
ui_end_frame(os.screen, &ui, &font);
|
||||||
frame_data = string_fmt(os.frame_arena, "FPS:%f dt:%f frame:%u camera_pos: %f %f %f camera_yaw: %f %f"
|
frame_data = string_fmt(os.frame_arena, "FPS:%f dt:%f frame:%u camera_pos: %f %f %f camera_yaw: %f %f"
|
||||||
"\nAvg_Time: %llu Time:%llu Count:%llu",
|
"\nCycle per pixel: %llu Cycles:%llu Pixels:%llu Triangles:%llu",
|
||||||
os.fps, os.delta_time*1000, os.frame, r.camera_pos.x, r.camera_pos.y, r.camera_pos.z, r.camera_yaw.x, r.camera_yaw.y,
|
os.fps, os.delta_time*1000, os.frame, r.camera_pos.x, r.camera_pos.y, r.camera_pos.z, r.camera_yaw.x, r.camera_yaw.y,
|
||||||
filled_pixel_total_time/filled_pixel_count, filled_pixel_total_time, filled_pixel_count);
|
filled_pixel_cycles/filled_pixel_count, filled_pixel_cycles, filled_pixel_count, triangle_count);
|
||||||
|
|
||||||
|
filled_pixel_count = 0;
|
||||||
|
filled_pixel_cycles = 0;
|
||||||
|
triangle_count = 0;
|
||||||
|
|
||||||
for(int i = 0; i < ProfileScopeName_Count; i++){
|
|
||||||
auto *scope = &profile_scopes[i];
|
|
||||||
if(scope->i == 0) continue;
|
|
||||||
|
|
||||||
U64 total = 0;
|
|
||||||
for(int i = 0; i < scope->i; i++){
|
|
||||||
total += scope->samples[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
log_info("\n%s :: Total: %llu Hits: %llu, Avg: %llu", profile_scope_names[i], total, (U64)scope->i, total / scope->i);
|
|
||||||
scope->i = 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -106,8 +106,9 @@ void draw_triangle_nearest_b(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l
|
|||||||
Vec4 p0, Vec4 p1, Vec4 p2,
|
Vec4 p0, Vec4 p1, Vec4 p2,
|
||||||
Vec2 tex0, Vec2 tex1, Vec2 tex2,
|
Vec2 tex0, Vec2 tex1, Vec2 tex2,
|
||||||
Vec3 norm0, Vec3 norm1, Vec3 norm2) {
|
Vec3 norm0, Vec3 norm1, Vec3 norm2) {
|
||||||
// if(os.frame > 10) PROFILE_BEGIN(draw_triangle);
|
if(src->pixels == 0) return;
|
||||||
// ZoneScopedN("draw_triangle");
|
U64 fill_pixels_begin = __rdtsc();
|
||||||
|
|
||||||
F32 min_x1 = (F32)(min(p0.x, min(p1.x, p2.x)));
|
F32 min_x1 = (F32)(min(p0.x, min(p1.x, p2.x)));
|
||||||
F32 min_y1 = (F32)(min(p0.y, min(p1.y, p2.y)));
|
F32 min_y1 = (F32)(min(p0.y, min(p1.y, p2.y)));
|
||||||
F32 max_x1 = (F32)(max(p0.x, max(p1.x, p2.x)));
|
F32 max_x1 = (F32)(max(p0.x, max(p1.x, p2.x)));
|
||||||
@@ -117,6 +118,9 @@ void draw_triangle_nearest_b(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l
|
|||||||
S64 max_x = (S64)min((F32)dst->x, ceil(max_x1));
|
S64 max_x = (S64)min((F32)dst->x, ceil(max_x1));
|
||||||
S64 max_y = (S64)min((F32)dst->y, ceil(max_y1));
|
S64 max_y = (S64)min((F32)dst->y, ceil(max_y1));
|
||||||
|
|
||||||
|
if (min_y >= max_y) return;
|
||||||
|
if (min_x >= max_x) return;
|
||||||
|
|
||||||
F32 dy10 = (p1.y - p0.y);
|
F32 dy10 = (p1.y - p0.y);
|
||||||
F32 dy21 = (p2.y - p1.y);
|
F32 dy21 = (p2.y - p1.y);
|
||||||
F32 dy02 = (p0.y - p2.y);
|
F32 dy02 = (p0.y - p2.y);
|
||||||
@@ -135,7 +139,6 @@ void draw_triangle_nearest_b(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l
|
|||||||
|
|
||||||
U32 *destination = dst->pixels + dst->x*min_y;
|
U32 *destination = dst->pixels + dst->x*min_y;
|
||||||
F32 area = (p1.y - p0.y) * (p2.x - p0.x) - (p1.x - p0.x) * (p2.y - p0.y);
|
F32 area = (p1.y - p0.y) * (p2.x - p0.x) - (p1.x - p0.x) * (p2.y - p0.y);
|
||||||
U64 fill_pixels_begin = __rdtsc();
|
|
||||||
for (S64 y = min_y; y < max_y; y++) {
|
for (S64 y = min_y; y < max_y; y++) {
|
||||||
F32 Cx0 = Cy0;
|
F32 Cx0 = Cy0;
|
||||||
F32 Cx1 = Cy1;
|
F32 Cx1 = Cy1;
|
||||||
@@ -223,12 +226,10 @@ void draw_triangle_nearest_b(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l
|
|||||||
Cy2 -= dx02;
|
Cy2 -= dx02;
|
||||||
destination += dst->x;
|
destination += dst->x;
|
||||||
}
|
}
|
||||||
|
U64 end_time = __rdtsc();
|
||||||
|
|
||||||
U64 end_time = __rdtsc();
|
filled_pixel_cycles += end_time - fill_pixels_begin;
|
||||||
|
|
||||||
filled_pixel_total_time += end_time - fill_pixels_begin;
|
|
||||||
filled_pixel_count += (max_x - min_x)*(max_y - min_y);
|
filled_pixel_count += (max_x - min_x)*(max_y - min_y);
|
||||||
// if(os.frame > 10) PROFILE_END(draw_triangle);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -817,6 +818,7 @@ void draw_triangle_nearest_f(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l
|
|||||||
Vec2 tex0, Vec2 tex1, Vec2 tex2,
|
Vec2 tex0, Vec2 tex1, Vec2 tex2,
|
||||||
Vec3 norm0, Vec3 norm1, Vec3 norm2) {
|
Vec3 norm0, Vec3 norm1, Vec3 norm2) {
|
||||||
if(src->pixels == 0) return;
|
if(src->pixels == 0) return;
|
||||||
|
U64 fill_pixels_begin = __rdtsc();
|
||||||
|
|
||||||
PROFILE_SCOPE(draw_triangle);
|
PROFILE_SCOPE(draw_triangle);
|
||||||
|
|
||||||
@@ -869,7 +871,6 @@ void draw_triangle_nearest_f(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l
|
|||||||
F32 area = (p1.y - p0.y) * (p2.x - p0.x) - (p1.x - p0.x) * (p2.y - p0.y);
|
F32 area = (p1.y - p0.y) * (p2.x - p0.x) - (p1.x - p0.x) * (p2.y - p0.y);
|
||||||
Vec8 area8 = vec8(area);
|
Vec8 area8 = vec8(area);
|
||||||
|
|
||||||
U64 fill_pixels_begin = __rdtsc();
|
|
||||||
for (S64 y = min_y; y < max_y; y++) {
|
for (S64 y = min_y; y < max_y; y++) {
|
||||||
Vec8 Cx0 = vec8(Cy0);
|
Vec8 Cx0 = vec8(Cy0);
|
||||||
Vec8 Cx1 = vec8(Cy1);
|
Vec8 Cx1 = vec8(Cy1);
|
||||||
@@ -1025,7 +1026,7 @@ void draw_triangle_nearest_f(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l
|
|||||||
}
|
}
|
||||||
U64 end_time = __rdtsc();
|
U64 end_time = __rdtsc();
|
||||||
|
|
||||||
filled_pixel_total_time += end_time - fill_pixels_begin;
|
filled_pixel_cycles += end_time - fill_pixels_begin;
|
||||||
filled_pixel_count += (max_x - min_x)*(max_y - min_y);
|
filled_pixel_count += (max_x - min_x)*(max_y - min_y);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1035,6 +1036,7 @@ void draw_triangle_nearest_g(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l
|
|||||||
Vec2 tex0, Vec2 tex1, Vec2 tex2,
|
Vec2 tex0, Vec2 tex1, Vec2 tex2,
|
||||||
Vec3 norm0, Vec3 norm1, Vec3 norm2) {
|
Vec3 norm0, Vec3 norm1, Vec3 norm2) {
|
||||||
if(src->pixels == 0) return;
|
if(src->pixels == 0) return;
|
||||||
|
U64 fill_pixels_begin = __rdtsc();
|
||||||
|
|
||||||
PROFILE_SCOPE(draw_triangle);
|
PROFILE_SCOPE(draw_triangle);
|
||||||
|
|
||||||
@@ -1109,7 +1111,6 @@ void draw_triangle_nearest_g(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l
|
|||||||
F32 area = (p1.y - p0.y) * (p2.x - p0.x) - (p1.x - p0.x) * (p2.y - p0.y);
|
F32 area = (p1.y - p0.y) * (p2.x - p0.x) - (p1.x - p0.x) * (p2.y - p0.y);
|
||||||
F32x8 area8 = _mm256_set1_ps(area);
|
F32x8 area8 = _mm256_set1_ps(area);
|
||||||
|
|
||||||
U64 fill_pixels_begin = __rdtsc();
|
|
||||||
for (S64 y = min_y; y < max_y; y++) {
|
for (S64 y = min_y; y < max_y; y++) {
|
||||||
F32x8 Cx0 = _mm256_set1_ps(Cy0);
|
F32x8 Cx0 = _mm256_set1_ps(Cy0);
|
||||||
F32x8 Cx1 = _mm256_set1_ps(Cy1);
|
F32x8 Cx1 = _mm256_set1_ps(Cy1);
|
||||||
@@ -1311,8 +1312,6 @@ void draw_triangle_nearest_g(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l
|
|||||||
Cy2 -= dx02;
|
Cy2 -= dx02;
|
||||||
destination += dst->x;
|
destination += dst->x;
|
||||||
}
|
}
|
||||||
U64 end_time = __rdtsc();
|
filled_pixel_cycles += __rdtsc() - fill_pixels_begin;
|
||||||
|
|
||||||
filled_pixel_total_time += end_time - fill_pixels_begin;
|
|
||||||
filled_pixel_count += (max_x - min_x)*(max_y - min_y);
|
filled_pixel_count += (max_x - min_x)*(max_y - min_y);
|
||||||
}
|
}
|
||||||
|
|||||||
54
profile.cpp
54
profile.cpp
@@ -1,54 +0,0 @@
|
|||||||
enum ProfileScopeName {
|
|
||||||
ProfileScopeName_draw_triangle,
|
|
||||||
ProfileScopeName_fill_triangle,
|
|
||||||
ProfileScopeName_draw_all_meshes,
|
|
||||||
ProfileScopeName_draw_mesh,
|
|
||||||
ProfileScopeName_draw_set_of_mesh_indices,
|
|
||||||
ProfileScopeName_main_loop,
|
|
||||||
ProfileScopeName_fill_triangle_after_depth_test,
|
|
||||||
ProfileScopeName_fill_triangle_inner,
|
|
||||||
ProfileScopeName_fill_triangle_outer,
|
|
||||||
ProfileScopeName_Count,
|
|
||||||
};
|
|
||||||
|
|
||||||
const char *profile_scope_names[] = {
|
|
||||||
"draw_triangle",
|
|
||||||
"fill_triangle",
|
|
||||||
"draw_all_meshes",
|
|
||||||
"draw_mesh",
|
|
||||||
"draw_set_of_mesh_indices",
|
|
||||||
"main_loop",
|
|
||||||
"fill_triangle_after_depth_test",
|
|
||||||
"fill_triangle_inner",
|
|
||||||
"fill_triangle_outer",
|
|
||||||
};
|
|
||||||
|
|
||||||
struct ProfileState {
|
|
||||||
U64 samples[5096*32];
|
|
||||||
S32 i;
|
|
||||||
};
|
|
||||||
|
|
||||||
global ProfileState profile_scopes[ProfileScopeName_Count];
|
|
||||||
|
|
||||||
force_inline void
|
|
||||||
profile_begin(ProfileScopeName name){
|
|
||||||
ProfileState *p = profile_scopes + name;
|
|
||||||
p->samples[p->i] = __rdtsc();
|
|
||||||
}
|
|
||||||
|
|
||||||
force_inline void
|
|
||||||
profile_end(ProfileScopeName name){
|
|
||||||
ProfileState *p = profile_scopes + name;
|
|
||||||
p->samples[p->i] = __rdtsc() - p->samples[p->i];
|
|
||||||
p->i = (p->i + 1) % buff_cap(p->samples);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct Profile_Scope{
|
|
||||||
ProfileScopeName n;
|
|
||||||
force_inline Profile_Scope(ProfileScopeName name){ profile_begin(name); n=name; }
|
|
||||||
force_inline ~Profile_Scope(){ profile_end(n); }
|
|
||||||
};
|
|
||||||
|
|
||||||
#define PROFILE_BEGIN(name) profile_begin(ProfileScopeName_##name)
|
|
||||||
#define PROFILE_END(name) profile_end(ProfileScopeName_##name)
|
|
||||||
#define PROFILE_SCOPE(name) Profile_Scope profile_scope_##__LINE__(ProfileScopeName_##name)
|
|
||||||
Reference in New Issue
Block a user