glava-ridged/glfft/glfft_wisdom.hpp

/* Copyright (C) 2015 Hans-Kristian Arntzen <maister@archlinux.us>
 *
 * Permission is hereby granted, free of charge,
 * to any person obtaining a copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation the rights to
 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 * and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#ifndef GLFFT_WISDOM_HPP__
#define GLFFT_WISDOM_HPP__

#include <unordered_map>
#include <utility>
#include <string>
#include "glfft_common.hpp"
#include "glfft_interface.hpp"

namespace GLFFT
{

struct WisdomPass
{
    struct
    {
        unsigned Nx;
        unsigned Ny;
        unsigned radix;
        Mode mode;
        Target input_target;
        Target output_target;
        FFTOptions::Type type;
    } pass;

    double cost;

    bool operator==(const WisdomPass &other) const
    {
        return std::memcmp(&pass, &other.pass, sizeof(pass)) == 0;
    }
};

}

namespace std
{
    template<>
    struct hash<GLFFT::WisdomPass>
    {
        std::size_t operator()(const GLFFT::WisdomPass &params) const
        {
            std::size_t h = 0;
            hash<uint8_t> hasher;
            for (std::size_t i = 0; i < sizeof(params.pass); i++)
            {
                h ^= hasher(reinterpret_cast<const uint8_t*>(&params.pass)[i]);
            }

            return h;
        }
    };
}

namespace GLFFT
{

// Adds information which depends on the GPU vendor.
// This can speed up learning process, since there will be fewer "obviously wrong" settings to test.
struct FFTStaticWisdom
{
    enum Tristate { True = 1, False = 0, DontCare = -1 };

    unsigned min_workgroup_size = 1;
    unsigned min_workgroup_size_shared = 1;
    unsigned max_workgroup_size = 128; // GLES 3.1 mandates support for this.
    unsigned min_vector_size = 2;
    unsigned max_vector_size = 4;
    Tristate shared_banked = DontCare;
};

class FFTWisdom
{
    public:
        std::pair<double, FFTOptions::Performance> learn_optimal_options(Context *ctx,
                unsigned Nx, unsigned Ny, unsigned radix,
                Mode mode, Target input_target, Target output_target, const FFTOptions::Type &type);

        void learn_optimal_options_exhaustive(Context *ctx,
                unsigned Nx, unsigned Ny,
                Type type, Target input_target, Target output_target, const FFTOptions::Type &fft_type);

        const std::pair<const WisdomPass, FFTOptions::Performance>* find_optimal_options(unsigned Nx, unsigned Ny, unsigned radix,
                Mode mode, Target input_target, Target output_target, const FFTOptions::Type &base_options) const;

        const FFTOptions::Performance& find_optimal_options_or_default(unsigned Nx, unsigned Ny, unsigned radix,
                Mode mode, Target input_target, Target output_target, const FFTOptions &base_options) const;

        void set_static_wisdom(FFTStaticWisdom static_wisdom) { this->static_wisdom = static_wisdom; }
        static FFTStaticWisdom get_static_wisdom_from_renderer(Context *context);

        void set_bench_params(unsigned warmup,
                unsigned iterations, unsigned dispatches, double timeout)
        {
            params.warmup = warmup;
            params.iterations = iterations;
            params.dispatches = dispatches;
            params.timeout = timeout;
        }

#ifdef GLFFT_SERIALIZATION
        // Serialization interface.
        std::string archive() const;
        void extract(const char *json);
#endif

    private:
        std::unordered_map<WisdomPass, FFTOptions::Performance> library;

        std::pair<double, FFTOptions::Performance> study(Context *context,
                const WisdomPass &pass, FFTOptions::Type options) const;

        double bench(Context *cmd, Resource *output, Resource *input,
                const WisdomPass &pass, const FFTOptions &options,
                const std::shared_ptr<ProgramCache> &cache) const;

        FFTStaticWisdom static_wisdom;

        struct
        {
            unsigned warmup = 2;
            unsigned iterations = 20;
            unsigned dispatches = 50;
            double timeout = 1.0;
        } params;
};

}

#endif
Init 2023-09-07 16:16:06 -06:00			`/* Copyright (C) 2015 Hans-Kristian Arntzen <maister@archlinux.us>`
			`*`
			`* Permission is hereby granted, free of charge,`
			`* to any person obtaining a copy of this software and associated documentation files (the "Software"),`
			`* to deal in the Software without restriction, including without limitation the rights to`
			`* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,`
			`* and to permit persons to whom the Software is furnished to do so, subject to the following conditions:`
			`*`
			`* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.`
			`*`
			`* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,`
			`* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,`
			`* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.`
			`* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,`
			`* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,`
			`* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.`
			`*/`

			`#ifndef GLFFT_WISDOM_HPP__`
			`#define GLFFT_WISDOM_HPP__`

			`#include <unordered_map>`
			`#include <utility>`
			`#include <string>`
			`#include "glfft_common.hpp"`
			`#include "glfft_interface.hpp"`

			`namespace GLFFT`
			`{`

			`struct WisdomPass`
			`{`
			`struct`
			`{`
			`unsigned Nx;`
			`unsigned Ny;`
			`unsigned radix;`
			`Mode mode;`
			`Target input_target;`
			`Target output_target;`
			`FFTOptions::Type type;`
			`} pass;`

			`double cost;`

			`bool operator==(const WisdomPass &other) const`
			`{`
			`return std::memcmp(&pass, &other.pass, sizeof(pass)) == 0;`
			`}`
			`};`

			`}`

			`namespace std`
			`{`
			`template<>`
			`struct hash<GLFFT::WisdomPass>`
			`{`
			`std::size_t operator()(const GLFFT::WisdomPass &params) const`
			`{`
			`std::size_t h = 0;`
			`hash<uint8_t> hasher;`
			`for (std::size_t i = 0; i < sizeof(params.pass); i++)`
			`{`
			`h ^= hasher(reinterpret_cast<const uint8_t*>(&params.pass)[i]);`
			`}`

			`return h;`
			`}`
			`};`
			`}`

			`namespace GLFFT`
			`{`

			`// Adds information which depends on the GPU vendor.`
			`// This can speed up learning process, since there will be fewer "obviously wrong" settings to test.`
			`struct FFTStaticWisdom`
			`{`
			`enum Tristate { True = 1, False = 0, DontCare = -1 };`

			`unsigned min_workgroup_size = 1;`
			`unsigned min_workgroup_size_shared = 1;`
			`unsigned max_workgroup_size = 128; // GLES 3.1 mandates support for this.`
			`unsigned min_vector_size = 2;`
			`unsigned max_vector_size = 4;`
			`Tristate shared_banked = DontCare;`
			`};`

			`class FFTWisdom`
			`{`
			`public:`
			`std::pair<double, FFTOptions::Performance> learn_optimal_options(Context *ctx,`
			`unsigned Nx, unsigned Ny, unsigned radix,`
			`Mode mode, Target input_target, Target output_target, const FFTOptions::Type &type);`

			`void learn_optimal_options_exhaustive(Context *ctx,`
			`unsigned Nx, unsigned Ny,`
			`Type type, Target input_target, Target output_target, const FFTOptions::Type &fft_type);`

			`const std::pair<const WisdomPass, FFTOptions::Performance>* find_optimal_options(unsigned Nx, unsigned Ny, unsigned radix,`
			`Mode mode, Target input_target, Target output_target, const FFTOptions::Type &base_options) const;`

			`const FFTOptions::Performance& find_optimal_options_or_default(unsigned Nx, unsigned Ny, unsigned radix,`
			`Mode mode, Target input_target, Target output_target, const FFTOptions &base_options) const;`

			`void set_static_wisdom(FFTStaticWisdom static_wisdom) { this->static_wisdom = static_wisdom; }`
			`static FFTStaticWisdom get_static_wisdom_from_renderer(Context *context);`

			`void set_bench_params(unsigned warmup,`
			`unsigned iterations, unsigned dispatches, double timeout)`
			`{`
			`params.warmup = warmup;`
			`params.iterations = iterations;`
			`params.dispatches = dispatches;`
			`params.timeout = timeout;`
			`}`

			`#ifdef GLFFT_SERIALIZATION`
			`// Serialization interface.`
			`std::string archive() const;`
			`void extract(const char *json);`
			`#endif`

			`private:`
			`std::unordered_map<WisdomPass, FFTOptions::Performance> library;`

			`std::pair<double, FFTOptions::Performance> study(Context *context,`
			`const WisdomPass &pass, FFTOptions::Type options) const;`

			`double bench(Context cmd, Resource output, Resource *input,`
			`const WisdomPass &pass, const FFTOptions &options,`
			`const std::shared_ptr<ProgramCache> &cache) const;`

			`FFTStaticWisdom static_wisdom;`

			`struct`
			`{`
			`unsigned warmup = 2;`
			`unsigned iterations = 20;`
			`unsigned dispatches = 50;`
			`double timeout = 1.0;`
			`} params;`
			`};`

			`}`

			`#endif`