From 415a2122b4a1d87bd24a6ead6066ddee0db62fab Mon Sep 17 00:00:00 2001 From: Faerbit Date: Sun, 7 Jun 2020 12:10:51 +0200 Subject: [PATCH] Optimized performance. --- Makefile | 4 +-- wtracer.cpp | 77 ++++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 63 insertions(+), 18 deletions(-) diff --git a/Makefile b/Makefile index bd4a046..dbc800e 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ CXX = g++ -CXXFLAGS = -std=c++14 -Wall -Wextra -Wno-unused-parameter -march=native -Ofast -fopenmp -flto +CXXFLAGS = -std=c++20 -Wall -Wextra -Wno-unused-parameter -march=native -Ofast -flto -fopenmp DEPS = util.h vec3.h color.h ray.h camera.h hittable.h hittable_list.h sphere.h material.h lodepng.h OBJ = wtracer.o material.o vec3.o lodepng.o @@ -20,5 +20,5 @@ $(TARGET): $(OBJ) $(CXX) $(CXXFLAGS) -o $@ $^ clean: - $(RM) $(TARGET) $(OBJ) image.ppm + $(RM) $(TARGET) $(OBJ) image.png diff --git a/wtracer.cpp b/wtracer.cpp index 5127941..ddfd00c 100644 --- a/wtracer.cpp +++ b/wtracer.cpp @@ -1,10 +1,15 @@ #include #include #include +#include #define LODEPNG_NO_COMPILE_DECODER #include "lodepng.h" +#ifdef _OPENMP +#include +#endif + #include "color.h" #include "vec3.h" #include "ray.h" @@ -78,11 +83,52 @@ Hittable_list setup_random_scene() { return world; } +struct render_params { + int image_width; + int image_height; + int samples_per_pixel; + int max_depth; + Hittable_list& world; + Camera& cam; +}; + +void split_line(const int& line_length, int& start, int& end) { +#ifdef _OPENMP + const int line_length_thread = static_cast(line_length/omp_get_num_threads()); + start = omp_get_thread_num() * line_length_thread; + end = std::min((omp_get_thread_num()+1) * line_length_thread, line_length); +#else + start = 0; + end = line_length; +#endif +} + +void render(const render_params& params, std::vector& image) { + int line_start, line_end; + split_line(params.image_width, line_start, line_end); + for (int i = params.image_height-1; i>=0; --i) { + #ifdef _OPENMP + if (omp_get_thread_num() == 0) + #endif + std::cerr << "\rScanline remaining: " << std::setw(4) << i << std::flush; + for (int j = line_start; j(image_width / aspect_ratio); //const int samples_per_pixel = 1000; const int samples_per_pixel = 400; @@ -98,21 +144,20 @@ int main() { Camera cam(lookfrom, lookat, vup, 20, aspect_ratio, aperture, dist_to_focus); + render_params params = { + image_width, + image_height, + samples_per_pixel, + max_depth, + world, + cam, + }; + std::vector image(image_width * image_height); - for (int i = image_height-1; i>=0; --i) { - std::cerr << "\rScanline remaining: " << std::setw(4) << i << std::flush; - #pragma omp parallel for - for (int j = 0; j img_lode(image_width * image_height * 3); for (int i = 0; i