| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | #pragma once |
| |
|
| | #include "constant_folder-generated.h" |
| | #include "model-generated.h" |
| | #include "model_interface.h" |
| | #include "raii_wrapper.h" |
| |
|
| | #include <condition_variable> |
| | #include <cstring> |
| | #include <future> |
| | #include <mutex> |
| | #include <numeric> |
| | #include <shared_mutex> |
| | #include <unordered_map> |
| | #include <unordered_set> |
| |
|
| | namespace ait { |
| |
|
| | enum class BufferState { |
| | CLEAN = 0, |
| | CONSTANTS_UPDATED = 1, |
| | CONSTANTS_FOLDED = 2 |
| | }; |
| |
|
| | |
| | |
| | |
| | |
| | class ModelContainerBase { |
| | public: |
| | ModelContainerBase( |
| | size_t num_inputs, |
| | size_t num_outputs, |
| | size_t num_bound_constants, |
| | size_t num_unbound_constants, |
| | size_t params_size, |
| | AITemplateAllocator& allocator); |
| |
|
| | protected: |
| | |
| | |
| | |
| | |
| | std::unordered_map<std::string, size_t> bound_constant_name_to_idx_; |
| |
|
| | |
| | |
| | |
| | std::unordered_map<std::string, size_t> unbound_constant_name_to_idx_; |
| |
|
| | |
| | |
| | |
| | |
| | std::unordered_set<std::string> constant_folding_inputs_; |
| | std::unordered_set<std::string> constant_folding_optional_inputs_; |
| |
|
| | |
| | |
| | |
| | std::vector<size_t> constant_folding_outputs_offsets_; |
| | |
| | std::vector<size_t> bound_constant_offsets_; |
| |
|
| | |
| | size_t constants_size_; |
| | |
| | |
| | GPUPtr constants_primary_; |
| | GPUPtr constants_secondary_; |
| | bool use_constants_primary_buffer_; |
| | |
| | BufferState buffer_state_; |
| | |
| | std::unordered_map<std::string, const void*> model_constants_; |
| |
|
| | |
| | size_t num_params_; |
| |
|
| | |
| | |
| | std::vector<const char*> param_names_; |
| | std::vector<std::vector<int64_t>> max_param_shapes_; |
| | std::vector<AITemplateDtype> param_dtypes_; |
| |
|
| | |
| | std::vector<size_t> bound_constant_size_; |
| | std::vector<AITemplateDtype> bound_constant_dtypes_; |
| |
|
| | |
| | |
| | std::vector<size_t> max_param_storage_bytes_; |
| | std::vector<size_t> max_param_numel_; |
| | }; |
| |
|
| | |
| | |
| | |
| | class ModelContainer; |
| | ModelContainer* CreateModelContainer( |
| | size_t num_runtimes, |
| | AITemplateAllocator& allocator); |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | class ModelContainer : ModelContainerBase { |
| | public: |
| | ModelContainer( |
| | size_t num_models, |
| | size_t num_inputs, |
| | size_t num_outputs, |
| | size_t num_bound_constants, |
| | size_t num_unbound_constants, |
| | size_t params_size, |
| | AITemplateAllocator& allocator); |
| |
|
| | void Run( |
| | const AITData* inputs, |
| | size_t num_inputs, |
| | AITData* outputs, |
| | size_t num_outputs, |
| | StreamType stream, |
| | bool sync, |
| | bool graph_mode, |
| | int64_t** output_shapes_out); |
| |
|
| | void RunWithOutputsOnHost( |
| | const AITData* inputs, |
| | size_t num_inputs, |
| | AITData* outputs, |
| | size_t num_outputs, |
| | StreamType stream, |
| | bool graph_mode, |
| | int64_t** output_shapes_out); |
| |
|
| | void Profile( |
| | const AITData* inputs, |
| | size_t num_inputs, |
| | AITData* outputs, |
| | size_t num_outputs, |
| | StreamType stream, |
| | size_t num_iters, |
| | const char* filename); |
| |
|
| | float Benchmark( |
| | const AITData* inputs, |
| | size_t num_inputs, |
| | AITData* outputs, |
| | size_t num_outputs, |
| | StreamType stream, |
| | bool graph_mode, |
| | size_t count, |
| | size_t num_threads, |
| | bool use_unique_stream_per_thread, |
| | int64_t** output_shapes_out); |
| |
|
| | void SetConstant(const char* name, const AITData& tensor); |
| | void SetManyConstants( |
| | const char** names, |
| | const AITData* tensors, |
| | size_t num_tensors); |
| |
|
| | uint8_t* GetInactiveConstantsBuffer(); |
| | void SetDoubleBufferConstant( |
| | const char* name, |
| | const AITData& tensor, |
| | StreamType stream = 0); |
| | void SetManyDoubleBufferConstants( |
| | const char** names, |
| | const AITData* tensors, |
| | size_t num_tensors, |
| | StreamType stream = 0); |
| |
|
| | size_t NumInputs() const; |
| | size_t NumOutputs() const; |
| |
|
| | const char* InputName(size_t input_idx) const; |
| | const char* OutputName(size_t output_idx) const; |
| |
|
| | AITemplateParamShape MaxInputShape(size_t input_idx) const; |
| | AITemplateParamShape MaxOutputShape(size_t output_idx) const; |
| |
|
| | AITemplateDtype InputDtype(size_t input_idx) const; |
| | AITemplateDtype OutputDtype(size_t output_idx) const; |
| |
|
| | size_t MaxOutputStorageBytes(size_t output_idx) const; |
| |
|
| | size_t GetNumRuntimes() const { |
| | return models_.size(); |
| | } |
| |
|
| | void FoldConstants(StreamType stream, bool sync, bool double_buffer = false); |
| | void SwapConstants(); |
| |
|
| | size_t GetNumConstants(bool unbound_constants_only = true) const; |
| | size_t GetNumConstantFoldingInputs(bool unbound_constants_only = true) const; |
| |
|
| | |
| | |
| | |
| | |
| | void WriteAllConstantNamesTo( |
| | const char** names_out, |
| | bool unbound_constants_only, |
| | bool constant_folding_inputs_only) const; |
| |
|
| | private: |
| | void WaitForAllModels(bool include_constant_folder = false); |
| | void FoldConstantsImpl(StreamType stream, bool double_buffer = false); |
| | void SetConstantImpl( |
| | const char* name, |
| | const AITData& tensor, |
| | bool use_secondary_buffer = false, |
| | StreamType stream = 0); |
| | void SwapConstantFolderBuffer(); |
| |
|
| | void PrepareForRun( |
| | Model* model, |
| | const AITData* inputs, |
| | size_t num_inputs, |
| | AITData* outputs, |
| | size_t num_outputs); |
| |
|
| | Model* GetAvailableModel(); |
| | void ReclaimFinishedModels(std::unique_lock<std::mutex>& lk); |
| | void ValidateParamDtype(AITemplateDtype dtype, size_t idx) const; |
| | void ValidateBoundConstantDtype(AITemplateDtype dtype, size_t idx) const; |
| |
|
| | float BenchmarkImpl( |
| | const AITData* inputs, |
| | size_t num_inputs, |
| | AITData* outputs, |
| | size_t num_outputs, |
| | StreamType stream, |
| | bool graph_mode, |
| | size_t count, |
| | int64_t** output_shapes_out); |
| |
|
| | AITemplateAllocator& allocator_; |
| |
|
| | std::vector<std::unique_ptr<Model>> models_; |
| | std::unique_ptr<ConstantFolder> constant_folder_; |
| | std::vector<Model*> available_models_; |
| | std::deque<Model*> pending_models_; |
| |
|
| | |
| | std::mutex models_mutex_; |
| | |
| | std::condition_variable pending_models_available_; |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | std::shared_mutex constants_sync_mutex_; |
| | |
| | |
| | std::shared_mutex constants_double_buffer_mutex_; |
| |
|
| | size_t num_inputs_; |
| | size_t num_outputs_; |
| |
|
| | bool constant_folded_once_ = false; |
| | }; |
| |
|
| | } |
| |
|