glava-ridged/glfft/glfft_common.hpp

180 lines
5.5 KiB
C++
Raw Normal View History

2023-09-07 16:16:06 -06:00
/* Copyright (C) 2015 Hans-Kristian Arntzen <maister@archlinux.us>
*
* Permission is hereby granted, free of charge,
* to any person obtaining a copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
// For the most part used by the implementation.
#ifndef GLFFT_COMMON_HPP__
#define GLFFT_COMMON_HPP__
#include "glfft_interface.hpp"
#include <functional>
#include <cstddef>
#include <cstdlib>
#include <stdexcept>
#include <string>
#include <cstring>
#include <memory>
#include <unordered_map>
namespace GLFFT
{
enum Direction
{
/// Forward FFT transform.
Forward = -1,
/// Inverse FFT transform, but with two inputs (in frequency domain) which are multiplied together
/// for convolution.
InverseConvolve = 0,
/// Inverse FFT transform.
Inverse = 1
};
enum Mode
{
Horizontal,
HorizontalDual,
Vertical,
VerticalDual,
ResolveRealToComplex,
ResolveComplexToReal,
};
enum Type
{
/// Regular complex-to-complex transform.
ComplexToComplex,
/// Complex-to-complex dual transform where the complex value is four-dimensional,
/// i.e. a vector of two complex values. Typically used to transform RGBA data.
ComplexToComplexDual,
/// Complex-to-real transform. N / 2 + 1 complex values are used per row with a stride of N complex samples.
ComplexToReal,
/// Real-to-complex transform. N / 2 + 1 complex output samples are created per row with a stride of N complex samples.
RealToComplex
};
enum Target
{
/// GL_SHADER_STORAGE_BUFFER
SSBO,
/// Textures, when used as output, type is determined by transform type.
/// ComplexToComplex / RealToComplex -> GL_RG16F
/// ComplexToComplexDual -> GL_RGBA16F
Image,
/// Real-valued (single component) textures, when used as output, type is determined by transform type.
/// ComplexToReal -> GL_R32F (because GLES 3.1 doesn't have GL_R16F image type).
ImageReal
};
struct Parameters
{
unsigned workgroup_size_x;
unsigned workgroup_size_y;
unsigned workgroup_size_z;
unsigned radix;
unsigned vector_size;
Direction direction;
Mode mode;
Target input_target;
Target output_target;
bool p1;
bool shared_banked;
bool fft_fp16, input_fp16, output_fp16;
bool fft_normalize;
bool operator==(const Parameters &other) const
{
return std::memcmp(this, &other, sizeof(Parameters)) == 0;
}
};
/// @brief Options for FFT implementation.
/// Defaults for performance as conservative.
struct FFTOptions
{
struct Performance
{
/// Workgroup size used in layout(local_size_x).
/// Only affects performance, however, large values may make implementations of smaller sized FFTs impossible.
/// FFT constructor will throw in this case.
unsigned workgroup_size_x = 4;
/// Workgroup size used in layout(local_size_x).
/// Only affects performance, however, large values may make implementations of smaller sized FFTs impossible.
/// FFT constructor will throw in this case.
unsigned workgroup_size_y = 1;
/// Vector size. Very GPU dependent. "Scalar" GPUs prefer 2 here, vector GPUs prefer 4 (and maybe 8).
unsigned vector_size = 2;
/// Whether to use banked shared memory or not.
/// Desktop GPUs prefer true here, false for mobile in general.
bool shared_banked = false;
} performance;
struct Type
{
/// Whether internal shader should be mediump float.
bool fp16 = false;
/// Whether input SSBO is a packed 2xfp16 format. Otherwise, regular FP32.
bool input_fp16 = false;
/// Whether output SSBO is a packed 2xfp16 format. Otherwise, regular FP32.
bool output_fp16 = false;
/// Whether to apply 1 / N normalization factor.
bool normalize = false;
} type;
};
}
namespace std
{
template<>
struct hash<GLFFT::Parameters>
{
std::size_t operator()(const GLFFT::Parameters &params) const
{
std::size_t h = 0;
hash<uint8_t> hasher;
for (std::size_t i = 0; i < sizeof(GLFFT::Parameters); i++)
{
h ^= hasher(reinterpret_cast<const uint8_t*>(&params)[i]);
}
return h;
}
};
}
namespace GLFFT
{
class ProgramCache
{
public:
Program* find_program(const Parameters &parameters) const;
void insert_program(const Parameters &parameters, std::unique_ptr<Program> program);
size_t cache_size() const { return programs.size(); }
private:
std::unordered_map<Parameters, std::unique_ptr<Program>> programs;
};
}
#endif