glava-ridged/glfft/glfft_wisdom.hpp

150 lines
4.8 KiB
C++
Raw Normal View History

2023-09-07 16:16:06 -06:00
/* Copyright (C) 2015 Hans-Kristian Arntzen <maister@archlinux.us>
*
* Permission is hereby granted, free of charge,
* to any person obtaining a copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef GLFFT_WISDOM_HPP__
#define GLFFT_WISDOM_HPP__
#include <unordered_map>
#include <utility>
#include <string>
#include "glfft_common.hpp"
#include "glfft_interface.hpp"
namespace GLFFT
{
struct WisdomPass
{
struct
{
unsigned Nx;
unsigned Ny;
unsigned radix;
Mode mode;
Target input_target;
Target output_target;
FFTOptions::Type type;
} pass;
double cost;
bool operator==(const WisdomPass &other) const
{
return std::memcmp(&pass, &other.pass, sizeof(pass)) == 0;
}
};
}
namespace std
{
template<>
struct hash<GLFFT::WisdomPass>
{
std::size_t operator()(const GLFFT::WisdomPass &params) const
{
std::size_t h = 0;
hash<uint8_t> hasher;
for (std::size_t i = 0; i < sizeof(params.pass); i++)
{
h ^= hasher(reinterpret_cast<const uint8_t*>(&params.pass)[i]);
}
return h;
}
};
}
namespace GLFFT
{
// Adds information which depends on the GPU vendor.
// This can speed up learning process, since there will be fewer "obviously wrong" settings to test.
struct FFTStaticWisdom
{
enum Tristate { True = 1, False = 0, DontCare = -1 };
unsigned min_workgroup_size = 1;
unsigned min_workgroup_size_shared = 1;
unsigned max_workgroup_size = 128; // GLES 3.1 mandates support for this.
unsigned min_vector_size = 2;
unsigned max_vector_size = 4;
Tristate shared_banked = DontCare;
};
class FFTWisdom
{
public:
std::pair<double, FFTOptions::Performance> learn_optimal_options(Context *ctx,
unsigned Nx, unsigned Ny, unsigned radix,
Mode mode, Target input_target, Target output_target, const FFTOptions::Type &type);
void learn_optimal_options_exhaustive(Context *ctx,
unsigned Nx, unsigned Ny,
Type type, Target input_target, Target output_target, const FFTOptions::Type &fft_type);
const std::pair<const WisdomPass, FFTOptions::Performance>* find_optimal_options(unsigned Nx, unsigned Ny, unsigned radix,
Mode mode, Target input_target, Target output_target, const FFTOptions::Type &base_options) const;
const FFTOptions::Performance& find_optimal_options_or_default(unsigned Nx, unsigned Ny, unsigned radix,
Mode mode, Target input_target, Target output_target, const FFTOptions &base_options) const;
void set_static_wisdom(FFTStaticWisdom static_wisdom) { this->static_wisdom = static_wisdom; }
static FFTStaticWisdom get_static_wisdom_from_renderer(Context *context);
void set_bench_params(unsigned warmup,
unsigned iterations, unsigned dispatches, double timeout)
{
params.warmup = warmup;
params.iterations = iterations;
params.dispatches = dispatches;
params.timeout = timeout;
}
#ifdef GLFFT_SERIALIZATION
// Serialization interface.
std::string archive() const;
void extract(const char *json);
#endif
private:
std::unordered_map<WisdomPass, FFTOptions::Performance> library;
std::pair<double, FFTOptions::Performance> study(Context *context,
const WisdomPass &pass, FFTOptions::Type options) const;
double bench(Context *cmd, Resource *output, Resource *input,
const WisdomPass &pass, const FFTOptions &options,
const std::shared_ptr<ProgramCache> &cache) const;
FFTStaticWisdom static_wisdom;
struct
{
unsigned warmup = 2;
unsigned iterations = 20;
unsigned dispatches = 50;
double timeout = 1.0;
} params;
};
}
#endif