Update README, add images

This commit is contained in:
Krzosa Karol
2022-07-27 09:20:15 +02:00
parent 1ffb77d09d
commit 3deb94aad6
6 changed files with 92 additions and 83 deletions

4
.gitignore vendored
View File

@@ -18,4 +18,6 @@ data.txt
*.bin
*.4c
asset.log.txt
perfclocks.txt
perfclocks*
*.ipynb
zmiany

View File

@@ -1,4 +1,27 @@
# Realtime Software Renderer
![screenshot1](assets/Screenshot1.png)
![screenshot2](assets/Screenshot2.png)
## Clipping
There are 3 clipping stages, 2 clipping stages in 3D space against zfar and znear and 1 clipping
stage in 2D against left, bottom, right, top(2D image bounds).
First the triangles get clipped against the zfar plane,
if a triangle has even one vertex outside the clipping region, the entire triangle gets cut.
So far I didn't have problems with that. It simplifies the computations and splitting triangles
on zfar seems like a waste of power.
The second clipping stage is znear plane. Triangles get fully and nicely clipped against znear.
Every time a triangle gets partially outside the clipping region it gets cut to the znear and
either one or two new triangles get derived from the old one.
Last clipping stage is performed in the 2D image space. Every triangle has a corresponding AABB
box. In this box every pixel gets tested to see if it's in the triangle. In this clipping stage
the box is clipped to the image metrics - 0, 0, width, height.
### Things to do:
@@ -43,7 +66,7 @@
- [ ] Outlines
- [ ] Lightning
- [ ] Proper normal interpolation
* `https://hero.handmade.network/episode/code/day101/#105
* https://hero.handmade.network/episode/code/day101/#105
- [ ] Phong
- [x] diffuse
- [x] ambient
@@ -57,14 +80,15 @@
- [x] Simple profiling tooling
- [x] Statistics based on profiler data
- [x] Find cool profilers - ExtraSleepy, Vtune
- [ ] Optimizations
- [ ] Inline edge function
- [ ] Expand edge functions to more optimized version
- [ ] Test 4x2 bitmap layout?
- [ ] Edge function to integer
- [ ] Use integer bit operations to figure out if plus. (edge0|edge1|edge2)>=0
- [ ] SIMD
- [ ] Multithreading
- [x] Optimizations
- [x] Inline edge function
- [x] Expand edge functions to more optimized version
- [-] Test 4x2 bitmap layout?
- [-] Edge function to integer
- [-] Use integer bit operations to figure out if plus. (edge0|edge1|edge2)>=0
- [x] SIMD
- [x] Optimized SIMD
- [x] Multithreading
- [x] Text rendering
- [ ] UI
@@ -76,32 +100,9 @@
- [x] Gamma correct alpha blending for rectangles and bitmaps
- [ ] Plotting of profile data
- [x] Simple scatter plot
### Urgent:
- [ ] Simplify the code, especially for the 2d routines
- [x] Asset processor as second program
## Clipping
There are 3 clipping stages, 2 clipping stages in 3D space against zfar and znear and 1 clipping
stage in 2D against left, bottom, right, top(2D image bounds).
First the triangles get clipped against the zfar plane,
if a triangle has even one vertex outside the clipping region, the entire triangle gets cut.
So far I didn't have problems with that. It simplifies the computations and splitting triangles
on zfar seems like a waste of power.
The second clipping stage is znear plane. Triangles get fully and nicely clipped against znear.
Every time a triangle gets partially outside the clipping region it gets cut to the znear and
either one or two new triangles get derived from the old one.
Last clipping stage is performed in the 2D image space. Every triangle has a corresponding AABB
box. In this box every pixel gets tested to see if it's in the triangle. In this clipping stage
the box is clipped to the image metrics - 0, 0, width, height.
### Resources that helped me build the rasterizer (Might be helpful to you too):

BIN
assets/Screenshot1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.2 MiB

BIN
assets/Screenshot2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.4 MiB

View File

@@ -78,12 +78,13 @@
/// - [x] Simple scatter plot
/// - [x] Asset processor as second program
// #include "obj_dump.cpp"
#include "multimedia.cpp"
#include "obj.cpp"
#include "obj_dump.cpp"
// #include "multimedia.cpp"
// #include "obj.cpp"
#include "vec.cpp"
#include "work_queue.cpp"
#define PROFILE_SCOPE(x)
#define MULTITHREADING 1
struct Vertex {
Vec3 pos;
@@ -296,7 +297,7 @@ F32 edge_function(Vec4 vecp0, Vec4 vecp1, Vec4 p) {
#define S32x8 __m256i
S32 render_triangle_test_case_number = 5;
S32 render_triangle_test_case_angle = 1;
S32 render_triangle_test_case_angle = -1;
U64 filled_pixel_count;
U64 filled_pixel_cycles;
U64 triangle_count;
@@ -635,7 +636,7 @@ void draw_mesh(Render *r, String scene_name, Obj_Material *materials, Obj_Mesh *
Vec3 p0_to_p1 = vert[1].pos - vert[0].pos;
Vec3 p0_to_p2 = vert[2].pos - vert[0].pos;
Vec3 normal = normalize(cross(p0_to_p1, p0_to_p2));
// Vec3 light_direction = mat4_rotation_x(light_rotation) * vec3(0, 0, 1);
Vec3 light_direction = mat4_rotation_x(light_rotation) * vec3(0, 0, 1);
if (dot(normal, p0_to_camera) > 0) { //@Note: Backface culling
/// ## Clipping
@@ -731,6 +732,7 @@ void draw_mesh(Render *r, String scene_name, Obj_Material *materials, Obj_Mesh *
triangle_count++;
if (in_count > 3) triangle_count++;
#if MULTITHREADING
Render_Command *command = array_alloc(os.perm_arena, &r->commands);
command->src = image;
command->p0 = in[0].pos;
@@ -750,12 +752,12 @@ void draw_mesh(Render *r, String scene_name, Obj_Material *materials, Obj_Mesh *
command->tex2 = in[3].tex;
}
#if 0
#else
switch(render_triangle_test_case_number){
case 0: break;
case 1:
draw_triangle_nearest_a(&r->screen320, r->depth320, image, light_direction, in[0].pos, in[1].pos, in[2].pos, in[0].tex, in[1].tex, in[2].tex, in[0].norm, in[1].norm, in[2].norm);
if (in_count > 3) draw_triangle_nearest_a(&r->screen320, r->depth320, image, light_direction, in[0].pos, in[2].pos, in[3].pos, in[0].tex, in[2].tex, in[3].tex, in[0].norm, in[2].norm, in[3].norm);
break;
case 2:
draw_triangle_nearest_b(&r->screen320, r->depth320, image, light_direction, in[0].pos, in[1].pos, in[2].pos, in[0].tex, in[1].tex, in[2].tex, in[0].norm, in[1].norm, in[2].norm);
if (in_count > 3) draw_triangle_nearest_b(&r->screen320, r->depth320, image, light_direction, in[0].pos, in[2].pos, in[3].pos, in[0].tex, in[2].tex, in[3].tex, in[0].norm, in[2].norm, in[3].norm);
@@ -797,8 +799,7 @@ UI_SIGNAL_CALLBACK(scene_callback) {
} break;
case Scene_Sponza: {
speed = 100;
r.camera_pos = vec3(-228,94.5,-107);
r.camera_yaw = vec2(-1.25, 0.21);
r.camera_pos = vec3(-1020, 687, -85); r.camera_yaw = vec2(-1.3, -0.44);
obj = sponza;
} break;
case Scene_Count:
@@ -810,36 +811,49 @@ UI_SIGNAL_CALLBACK(scene_callback) {
FILE *global_file;
function void
windows_log(Log_Kind kind, String string, char *file, int line){
// fprintf(global_file, "%s", string.str);
fprintf(global_file, "%s", string.str);
// OutputDebugStringA((const char *)string.str);
}
function void
next_test_case(B32 first_time){
render_triangle_test_case_number += 1;
if(first_time || render_triangle_test_case_number == 6){
render_triangle_test_case_angle += 1;
render_triangle_test_case_number = 1;
try_again: switch(render_triangle_test_case_angle){
case 0: r.camera_pos = vec3(-1020, 687, -85); r.camera_yaw = vec2(-1.3, -0.44); break;
case 1: r.camera_pos = vec3(-356,89.5,168); r.camera_yaw = vec2(0.2, 0); break;
case 2: render_triangle_test_case_angle = 0; goto try_again; break;
}
}
}
int
main(int argc, char **argv) {
global_file = fopen("perfclocks.txt", "a");
thread_ctx.log_proc = windows_log;
fprintf(global_file, "\n---------------------");
os.window_size.x = 1920;
os.window_size.y = 1080;
os.window_resizable = 1;
assert(os_init());
Font font = os_load_font(os.perm_arena, 12*os.dpi_scale, "Arial", 0);
test_array_list();
// test_array_list();
f22 = load_obj_dump(os.perm_arena, "plane.bin"_s);
sponza = load_obj_dump(os.perm_arena, "sponza.bin"_s);
// Obj sponza_obj = load_obj(&os_process_heap, "assets/sponza/sponza.obj"_s);
// sponza = &sponza_obj;
// f22 = load_obj_dump(os.perm_arena, "plane.bin"_s);
// sponza = load_obj_dump(os.perm_arena, "sponza.bin"_s);
Obj sponza_obj = load_obj(&os_process_heap, "assets/sponza/sponza.obj"_s);
sponza = &sponza_obj;
scene_callback();
next_test_case(true);
int screen_x = os.window_size.x;
int screen_y = os.window_size.y;
r.camera_pos = vec3(-228,94.5,-107);
r.camera_yaw = vec2(-1.25, 0.21);
r.screen320 = {(U32 *)arena_push_size(os.perm_arena, screen_x*screen_y*sizeof(U32)), screen_x, screen_y};
r.depth320 = (F32 *)arena_push_size(os.perm_arena, sizeof(F32) * screen_x * screen_y);
r.commands.block_size = 1024*1024;
ThreadStartupInfo thread_infos[16] = {};
init_work_queue(&r.work_queue, buff_cap(thread_infos), thread_infos);
@@ -911,8 +925,8 @@ main(int argc, char **argv) {
draw_mesh(&r, obj->name, obj->materials.data, mesh+i, vertices, tex_coords, normals);
}
Render_Tile_Job_Data tile_job_data[16];
#if MULTITHREADING
Render_Tile_Job_Data tile_job_data[32];
S32 x_tiles = 1;
S32 y_tiles = 16;
F32 block_size_x = r.screen320.x / x_tiles;
@@ -935,7 +949,7 @@ main(int argc, char **argv) {
wait_until_completion(&r.work_queue);
array_free_all_nodes(&r.commands);
#endif
// @Note: Draw 320screen to OS screen
U32* ptr = os.screen->pixels;
@@ -952,30 +966,23 @@ main(int argc, char **argv) {
ui_end_frame(os.screen, &ui, &font);
frame_data = string_fmt(os.frame_arena, "FPS:%f dt:%f frame:%u camera_pos: %f %f %f camera_yaw: %f %f",
os.fps, os.delta_time*1000, os.frame, r.camera_pos.x, r.camera_pos.y, r.camera_pos.z, r.camera_yaw.x, r.camera_yaw.y);
if(filled_pixel_count){
raster_details = string_fmt(os.frame_arena, "\nAngle:%d Case:%d Cycle per pixel: %llu Cycles:%llu Pixels:%llu Triangles:%llu",
render_triangle_test_case_angle, render_triangle_test_case_number, filled_pixel_cycles/filled_pixel_count, filled_pixel_cycles, filled_pixel_count, triangle_count);
#if MULTITHREADING
if(os.frame == 1) log_info("Angle;Frame_Time\n");
log_info("%d;%f\n", render_triangle_test_case_angle, os.delta_time*1000);
#else
if(os.frame == 1) log_info("Angle;Algorithm;Frame_Time;Cycles_Per_Pixel;Cycles_To_Process_Triangles;Pixels_Processed;Triangles\n");
log_info("%d;%d;%f;%llu;%llu;%llu;%llu\n", render_triangle_test_case_angle, render_triangle_test_case_number,
os.delta_time*1000, filled_pixel_cycles/filled_pixel_count, filled_pixel_cycles, filled_pixel_count, triangle_count);
#endif
filled_pixel_count = 0;
filled_pixel_cycles = 0;
triangle_count = 0;
}
filled_pixel_count = 0;
filled_pixel_cycles = 0;
triangle_count = 0;
// @Todo I think there is bug with test_case_number, after doing full round it
// skips a phase
if(os.frame % 60 == 0){
continue;
render_triangle_test_case_number++;
if(render_triangle_test_case_number == 6){
render_triangle_test_case_number = 0;
try_again: switch(render_triangle_test_case_angle){
case 0: r.camera_pos = vec3(-228,94.5,-107); r.camera_yaw = vec2(-1.25, 0.21); break;
case 1: r.camera_pos = vec3(-356,89.5,168); r.camera_yaw = vec2(0.2, 0); break;
case 2: r.camera_pos = vec3(-1020, 687, -85); r.camera_yaw = vec2(-1.3, -0.44); break;
case 3: render_triangle_test_case_angle = 0; goto try_again; break;
}
render_triangle_test_case_angle += 1;
}
if(os.frame % 15 == 0){
next_test_case(false);
}
}

View File

@@ -7,7 +7,6 @@ void draw_triangle_nearest_a(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l
Vec2 tex0, Vec2 tex1, Vec2 tex2,
Vec3 norm0, Vec3 norm1, Vec3 norm2) {
if(src->pixels == 0) return;
U64 fill_pixels_begin = __rdtsc();
F32 min_x1 = (F32)(min(p0.x, min(p1.x, p2.x)));
F32 min_y1 = (F32)(min(p0.y, min(p1.y, p2.y)));
@@ -22,9 +21,10 @@ void draw_triangle_nearest_a(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l
if (min_y >= max_y) return;
if (min_x >= max_x) return;
U64 fill_pixels_begin = __rdtsc();
U32 *destination = dst->pixels + dst->x*min_y;
F32 area = (p1.y - p0.y) * (p2.x - p0.x) - (p1.x - p0.x) * (p2.y - p0.y);
F32 area = edge_function(p0, p1, p2);
for (S64 y = min_y; y < max_y; y++) {
for (S64 x = min_x; x < max_x; x++) {
F32 Cx0 = edge_function(p0, p1, { (F32)x,(F32)y });
@@ -116,7 +116,6 @@ void draw_triangle_nearest_b(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l
Vec2 tex0, Vec2 tex1, Vec2 tex2,
Vec3 norm0, Vec3 norm1, Vec3 norm2) {
if(src->pixels == 0) return;
U64 fill_pixels_begin = __rdtsc();
F32 min_x1 = (F32)(min(p0.x, min(p1.x, p2.x)));
F32 min_y1 = (F32)(min(p0.y, min(p1.y, p2.y)));
@@ -129,6 +128,7 @@ void draw_triangle_nearest_b(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 l
if (min_y >= max_y) return;
if (min_x >= max_x) return;
U64 fill_pixels_begin = __rdtsc();
F32 dy10 = (p1.y - p0.y);
F32 dy21 = (p2.y - p1.y);
@@ -246,7 +246,6 @@ void draw_triangle_bilinear(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 li
Vec2 tex0, Vec2 tex1, Vec2 tex2,
Vec3 norm0, Vec3 norm1, Vec3 norm2) {
if(src->pixels == 0) return;
U64 fill_pixels_begin = __rdtsc();
F32 min_x1 = (F32)(min(p0.x, min(p1.x, p2.x)));
F32 min_y1 = (F32)(min(p0.y, min(p1.y, p2.y)));
F32 max_x1 = (F32)(max(p0.x, max(p1.x, p2.x)));
@@ -260,6 +259,7 @@ void draw_triangle_bilinear(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 li
if (min_y >= max_y) return;
if (min_x >= max_x) return;
U64 fill_pixels_begin = __rdtsc();
F32 area = edge_function(p0, p1, p2);
for (S64 y = min_y; y < max_y; y++) {
@@ -347,7 +347,6 @@ void draw_triangle_nearest_simd_with_overloads(Bitmap* dst, F32 *depth_buffer, B
Vec2 tex0, Vec2 tex1, Vec2 tex2,
Vec3 norm0, Vec3 norm1, Vec3 norm2) {
if(src->pixels == 0) return;
U64 fill_pixels_begin = __rdtsc();
F32 min_x1 = (F32)(min(p0.x, min(p1.x, p2.x)));
F32 min_y1 = (F32)(min(p0.y, min(p1.y, p2.y)));
@@ -362,6 +361,7 @@ void draw_triangle_nearest_simd_with_overloads(Bitmap* dst, F32 *depth_buffer, B
if (min_y >= max_y) return;
if (min_x >= max_x) return;
U64 fill_pixels_begin = __rdtsc();
F32 dy10 = (p1.y - p0.y);
F32 dy21 = (p2.y - p1.y);
F32 dy02 = (p0.y - p2.y);
@@ -570,9 +570,6 @@ void draw_triangle_nearest_simd_without_overloads(Bitmap* dst, F32 *depth_buffer
Vec2 tex0, Vec2 tex1, Vec2 tex2,
Vec3 norm0, Vec3 norm1, Vec3 norm2) {
if(src->pixels == 0) return;
U64 fill_pixels_begin = __rdtsc();
PROFILE_SCOPE(draw_triangle);
F32 min_x1 = (F32)(min(p0.x, min(p1.x, p2.x)));
F32 min_y1 = (F32)(min(p0.y, min(p1.y, p2.y)));
@@ -587,6 +584,7 @@ void draw_triangle_nearest_simd_without_overloads(Bitmap* dst, F32 *depth_buffer
if (min_y >= max_y) return;
if (min_x >= max_x) return;
U64 fill_pixels_begin = __rdtsc();
F32 dy10 = (p1.y - p0.y);
F32 dy21 = (p2.y - p1.y);
F32 dy02 = (p0.y - p2.y);
@@ -858,7 +856,6 @@ void draw_triangle_nearest_final(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Ve
Vec3 norm0, Vec3 norm1, Vec3 norm2) {
if(src->pixels == 0) return;
U64 fill_pixels_begin = __rdtsc();
F32 region_min_x = 0;
F32 region_min_y = 0;
@@ -878,6 +875,8 @@ void draw_triangle_nearest_final(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Ve
if (min_y >= max_y) return;
if (min_x >= max_x) return;
U64 fill_pixels_begin = __rdtsc();
F32 dy10 = (p1.y - p0.y);
F32 dy21 = (p2.y - p1.y);
F32 dy02 = (p0.y - p2.y);