Optimized performance.

This commit is contained in:
Faerbit 2020-06-07 12:10:51 +02:00
parent 98a6c551c3
commit 415a2122b4
2 changed files with 63 additions and 18 deletions

View File

@ -1,6 +1,6 @@
CXX = g++
CXXFLAGS = -std=c++14 -Wall -Wextra -Wno-unused-parameter -march=native -Ofast -fopenmp -flto
CXXFLAGS = -std=c++20 -Wall -Wextra -Wno-unused-parameter -march=native -Ofast -flto -fopenmp
DEPS = util.h vec3.h color.h ray.h camera.h hittable.h hittable_list.h sphere.h material.h lodepng.h
OBJ = wtracer.o material.o vec3.o lodepng.o
@ -20,5 +20,5 @@ $(TARGET): $(OBJ)
$(CXX) $(CXXFLAGS) -o $@ $^
clean:
$(RM) $(TARGET) $(OBJ) image.ppm
$(RM) $(TARGET) $(OBJ) image.png

View File

@ -1,10 +1,15 @@
#include <iostream>
#include <iomanip>
#include <memory>
#include <cstdio>
#define LODEPNG_NO_COMPILE_DECODER
#include "lodepng.h"
#ifdef _OPENMP
#include <omp.h>
#endif
#include "color.h"
#include "vec3.h"
#include "ray.h"
@ -78,11 +83,52 @@ Hittable_list setup_random_scene() {
return world;
}
struct render_params {
int image_width;
int image_height;
int samples_per_pixel;
int max_depth;
Hittable_list& world;
Camera& cam;
};
void split_line(const int& line_length, int& start, int& end) {
#ifdef _OPENMP
const int line_length_thread = static_cast<int>(line_length/omp_get_num_threads());
start = omp_get_thread_num() * line_length_thread;
end = std::min((omp_get_thread_num()+1) * line_length_thread, line_length);
#else
start = 0;
end = line_length;
#endif
}
void render(const render_params& params, std::vector<Color>& image) {
int line_start, line_end;
split_line(params.image_width, line_start, line_end);
for (int i = params.image_height-1; i>=0; --i) {
#ifdef _OPENMP
if (omp_get_thread_num() == 0)
#endif
std::cerr << "\rScanline remaining: " << std::setw(4) << i << std::flush;
for (int j = line_start; j<line_end; ++j) {
Color pixel_color(0, 0, 0);
for (int s = 0; s<params.samples_per_pixel; ++s) {
auto u = double(j + random_double(-0.5, 0.5)) / (params.image_width - 1);
auto v = double(i + random_double(-0.5, 0.5)) / (params.image_height - 1);
Ray r = params.cam.get_ray(u, v);
pixel_color += ray_color(r, params.world, params.max_depth);
}
image[i*params.image_width+j] = pixel_color;
}
}
}
int main() {
const auto aspect_ratio = 16.0 / 9.0;
//const int image_width = 1280;
const int image_width = 768;
//const int image_width = 384;
//const int image_width = 768;
const int image_width = 384;
const int image_height = static_cast<int>(image_width / aspect_ratio);
//const int samples_per_pixel = 1000;
const int samples_per_pixel = 400;
@ -98,21 +144,20 @@ int main() {
Camera cam(lookfrom, lookat, vup, 20, aspect_ratio, aperture, dist_to_focus);
render_params params = {
image_width,
image_height,
samples_per_pixel,
max_depth,
world,
cam,
};
std::vector<Color> image(image_width * image_height);
for (int i = image_height-1; i>=0; --i) {
std::cerr << "\rScanline remaining: " << std::setw(4) << i << std::flush;
#pragma omp parallel for
for (int j = 0; j<image_width; ++j) {
Color pixel_color(0, 0, 0);
for (int s = 0; s<samples_per_pixel; ++s) {
auto u = double(j + random_double(-0.5, 0.5)) / (image_width - 1);
auto v = double(i + random_double(-0.5, 0.5)) / (image_height - 1);
Ray r = cam.get_ray(u, v);
pixel_color += ray_color(r, world, max_depth);
}
image.at(i*image_width+j) = pixel_color;
}
#pragma omp parallel
{
render(params, image);
}
std::cerr << "\nAssembling image.\n";
@ -120,7 +165,7 @@ int main() {
std::vector<unsigned char> img_lode(image_width * image_height * 3);
for (int i = 0; i<image_height; ++i) {
for (int j = 0; j<image_width; ++j) {
write_color_vec(img_lode, i*image_width*3+j*3, image.at((image_height-1-i)*image_width+j), samples_per_pixel);
write_color_vec(img_lode, i*image_width*3+j*3, image[(image_height-1-i)*image_width+j], samples_per_pixel);
}
}
std::cerr << "Writing file \"image.png\".\n";