| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | #pragma once |
| |
|
| | #include "AbstractConfig.h" |
| | #include "ActivityType.h" |
| |
|
| | #include <assert.h> |
| | #include <chrono> |
| | #include <functional> |
| | #include <set> |
| | #include <string> |
| | #include <vector> |
| |
|
| | namespace libkineto { |
| |
|
| | class Config : public AbstractConfig { |
| | public: |
| | Config(); |
| | Config& operator=(const Config&) = delete; |
| | Config(Config&&) = delete; |
| | Config& operator=(Config&&) = delete; |
| |
|
| | |
| | std::unique_ptr<Config> clone() const { |
| | auto cfg = std::unique_ptr<Config>(new Config(*this)); |
| | cloneFeaturesInto(*cfg); |
| | return cfg; |
| | } |
| |
|
| | bool handleOption(const std::string& name, std::string& val) override; |
| |
|
| | void setClientDefaults() override; |
| |
|
| | |
| | const std::string& eventLogFile() const { |
| | return eventLogFile_; |
| | } |
| |
|
| | bool activityProfilerEnabled() const { |
| | return activityProfilerEnabled_ || |
| | activitiesOnDemandTimestamp_.time_since_epoch().count() > 0; |
| | } |
| |
|
| | |
| | const std::string& activitiesLogFile() const { |
| | return activitiesLogFile_; |
| | } |
| |
|
| | |
| | const std::string& activitiesLogUrl() const { |
| | return activitiesLogUrl_; |
| | } |
| |
|
| | void setActivitiesLogUrl(const std::string& url) { |
| | activitiesLogUrl_ = url; |
| | } |
| |
|
| | bool activitiesLogToMemory() const { |
| | return activitiesLogToMemory_; |
| | } |
| |
|
| | bool eventProfilerEnabled() const { |
| | return !eventNames_.empty() || !metricNames_.empty(); |
| | } |
| |
|
| | |
| | bool eventProfilerEnabledForDevice(uint32_t dev) const { |
| | return 0 != (eventProfilerDeviceMask_ & (1 << dev)); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | std::chrono::milliseconds samplePeriod() const { |
| | return samplePeriod_; |
| | } |
| |
|
| | void setSamplePeriod(std::chrono::milliseconds period) { |
| | samplePeriod_ = period; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | std::chrono::milliseconds multiplexPeriod() const { |
| | return multiplexPeriod_; |
| | } |
| |
|
| | void setMultiplexPeriod(std::chrono::milliseconds period) { |
| | multiplexPeriod_ = period; |
| | } |
| |
|
| | |
| | |
| | std::chrono::milliseconds reportPeriod() const { |
| | return reportPeriod_; |
| | } |
| |
|
| | void setReportPeriod(std::chrono::milliseconds msecs); |
| |
|
| | |
| | |
| | |
| | int samplesPerReport() const { |
| | return samplesPerReport_; |
| | } |
| |
|
| | void setSamplesPerReport(int count) { |
| | samplesPerReport_ = count; |
| | } |
| |
|
| | |
| | const std::set<std::string>& eventNames() const { |
| | return eventNames_; |
| | } |
| |
|
| | |
| | void addEvents(const std::set<std::string>& names) { |
| | eventNames_.insert(names.begin(), names.end()); |
| | } |
| |
|
| | |
| | const std::set<std::string>& metricNames() const { |
| | return metricNames_; |
| | } |
| |
|
| | |
| | void addMetrics(const std::set<std::string>& names) { |
| | metricNames_.insert(names.begin(), names.end()); |
| | } |
| |
|
| | const std::vector<int>& percentiles() const { |
| | return eventReportPercentiles_; |
| | } |
| |
|
| | |
| | std::chrono::seconds eventProfilerOnDemandDuration() const { |
| | return eventProfilerOnDemandDuration_; |
| | } |
| |
|
| | void setEventProfilerOnDemandDuration(std::chrono::seconds duration) { |
| | eventProfilerOnDemandDuration_ = duration; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | int maxEventProfilersPerGpu() const { |
| | return eventProfilerMaxInstancesPerGpu_; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | std::chrono::seconds eventProfilerHeartbeatMonitorPeriod() const { |
| | return eventProfilerHeartbeatMonitorPeriod_; |
| | } |
| |
|
| | |
| | const std::set<ActivityType>& selectedActivityTypes() const { |
| | return selectedActivityTypes_; |
| | } |
| |
|
| | |
| | bool perThreadBufferEnabled() const { |
| | return perThreadBufferEnabled_; |
| | } |
| |
|
| | void setSelectedActivityTypes(const std::set<ActivityType>& types) { |
| | selectedActivityTypes_ = types; |
| | } |
| |
|
| | bool isReportInputShapesEnabled() const { |
| | return enableReportInputShapes_; |
| | } |
| |
|
| | bool isProfileMemoryEnabled() const { |
| | return enableProfileMemory_; |
| | } |
| |
|
| | bool isWithStackEnabled() const { |
| | return enableWithStack_; |
| | } |
| |
|
| | bool isWithFlopsEnabled() const { |
| | return enableWithFlops_; |
| | } |
| |
|
| | bool isWithModulesEnabled() const { |
| | return enableWithModules_; |
| | } |
| |
|
| | |
| | std::chrono::milliseconds activitiesDuration() const { |
| | return activitiesDuration_; |
| | } |
| |
|
| | |
| | int activitiesRunIterations() const { |
| | return activitiesRunIterations_; |
| | } |
| |
|
| | int activitiesMaxGpuBufferSize() const { |
| | return activitiesMaxGpuBufferSize_; |
| | } |
| |
|
| | std::chrono::seconds activitiesWarmupDuration() const { |
| | return activitiesWarmupDuration_; |
| | } |
| |
|
| | int activitiesWarmupIterations() const { |
| | return activitiesWarmupIterations_; |
| | } |
| |
|
| | |
| | bool activitiesCudaSyncWaitEvents() const { |
| | return activitiesCudaSyncWaitEvents_; |
| | } |
| |
|
| | void setActivitiesCudaSyncWaitEvents(bool enable) { |
| | activitiesCudaSyncWaitEvents_ = enable; |
| | } |
| |
|
| | |
| | const std::chrono::time_point<std::chrono::system_clock> requestTimestamp() |
| | const { |
| | if (profileStartTime_.time_since_epoch().count()) { |
| | return profileStartTime_; |
| | } |
| | |
| | if (requestTimestamp_.time_since_epoch().count() == 0) { |
| | return requestTimestamp_; |
| | } |
| |
|
| | |
| | return requestTimestamp_ + maxRequestAge() + activitiesWarmupDuration(); |
| | } |
| |
|
| | bool hasProfileStartTime() const { |
| | return requestTimestamp_.time_since_epoch().count() > 0 || |
| | profileStartTime_.time_since_epoch().count() > 0; |
| | } |
| |
|
| | int profileStartIteration() const { |
| | return profileStartIteration_; |
| | } |
| |
|
| | bool hasProfileStartIteration() const { |
| | return profileStartIteration_ >= 0 && activitiesRunIterations_ > 0; |
| | } |
| |
|
| | void setProfileStartIteration(int iter) { |
| | profileStartIteration_ = iter; |
| | } |
| |
|
| | int profileStartIterationRoundUp() const { |
| | return profileStartIterationRoundUp_; |
| | } |
| |
|
| | |
| | int startIterationIncludingWarmup() const { |
| | if (!hasProfileStartIteration()) { |
| | return -1; |
| | } |
| | return profileStartIteration_ - activitiesWarmupIterations_; |
| | } |
| |
|
| | const std::chrono::seconds maxRequestAge() const; |
| |
|
| | |
| | |
| | |
| | int verboseLogLevel() const { |
| | return verboseLogLevel_; |
| | } |
| |
|
| | |
| | |
| | const std::vector<std::string>& verboseLogModules() const { |
| | return verboseLogModules_; |
| | } |
| |
|
| | bool sigUsr2Enabled() const { |
| | return enableSigUsr2_; |
| | } |
| |
|
| | bool ipcFabricEnabled() const { |
| | return enableIpcFabric_; |
| | } |
| |
|
| | std::chrono::seconds onDemandConfigUpdateIntervalSecs() const { |
| | return onDemandConfigUpdateIntervalSecs_; |
| | } |
| |
|
| | static std::chrono::milliseconds alignUp( |
| | std::chrono::milliseconds duration, |
| | std::chrono::milliseconds alignment) { |
| | duration += alignment; |
| | return duration - (duration % alignment); |
| | } |
| |
|
| | std::chrono::time_point<std::chrono::system_clock> |
| | eventProfilerOnDemandStartTime() const { |
| | return eventProfilerOnDemandTimestamp_; |
| | } |
| |
|
| | std::chrono::time_point<std::chrono::system_clock> |
| | eventProfilerOnDemandEndTime() const { |
| | return eventProfilerOnDemandTimestamp_ + eventProfilerOnDemandDuration_; |
| | } |
| |
|
| | std::chrono::time_point<std::chrono::system_clock> |
| | activityProfilerRequestReceivedTime() const { |
| | return activitiesOnDemandTimestamp_; |
| | } |
| |
|
| | static constexpr std::chrono::milliseconds kControllerIntervalMsecs{1000}; |
| |
|
| | |
| | const std::string& requestTraceID() const { |
| | return requestTraceID_; |
| | } |
| |
|
| | void setRequestTraceID(const std::string& tid) { |
| | requestTraceID_ = tid; |
| | } |
| |
|
| | const std::string& requestGroupTraceID() const { |
| | return requestGroupTraceID_; |
| | } |
| |
|
| | void setRequestGroupTraceID(const std::string& gtid) { |
| | requestGroupTraceID_ = gtid; |
| | } |
| |
|
| | size_t cuptiDeviceBufferSize() const { |
| | return cuptiDeviceBufferSize_; |
| | } |
| |
|
| | size_t cuptiDeviceBufferPoolLimit() const { |
| | return cuptiDeviceBufferPoolLimit_; |
| | } |
| |
|
| | bool memoryProfilerEnabled() const { |
| | return memoryProfilerEnabled_; |
| | } |
| |
|
| | int profileMemoryDuration() const { |
| | return profileMemoryDuration_; |
| | } |
| | void updateActivityProfilerRequestReceivedTime(); |
| |
|
| | void printActivityProfilerConfig(std::ostream& s) const override; |
| | void setActivityDependentConfig() override; |
| |
|
| | void validate(const std::chrono::time_point<std::chrono::system_clock>& |
| | fallbackProfileStartTime) override; |
| |
|
| | static void addConfigFactory( |
| | std::string name, |
| | std::function<AbstractConfig*(Config&)> factory); |
| |
|
| | void print(std::ostream& s) const; |
| |
|
| | |
| | |
| | |
| | |
| | static std::shared_ptr<void> getStaticObjectsLifetimeHandle(); |
| |
|
| | bool getTSCTimestampFlag() const { |
| | return useTSCTimestamp_; |
| | } |
| |
|
| | void setTSCTimestampFlag(bool flag) { |
| | useTSCTimestamp_ = flag; |
| | } |
| |
|
| | const std::string& getCustomConfig() const { |
| | return customConfig_; |
| | } |
| |
|
| | uint32_t maxEvents() const { |
| | return maxEvents_; |
| | } |
| |
|
| | private: |
| | explicit Config(const Config& other) = default; |
| |
|
| | AbstractConfig* cloneDerived(AbstractConfig& parent) const override { |
| | |
| | assert(false); |
| | return nullptr; |
| | } |
| |
|
| | uint8_t createDeviceMask(const std::string& val); |
| |
|
| | |
| | |
| | void setActivityTypes(const std::vector<std::string>& selected_activities); |
| |
|
| | |
| | void selectDefaultActivityTypes() { |
| | |
| | for (ActivityType t : defaultActivityTypes()) { |
| | selectedActivityTypes_.insert(t); |
| | } |
| | } |
| |
|
| | int verboseLogLevel_; |
| | std::vector<std::string> verboseLogModules_; |
| |
|
| | |
| | |
| | std::chrono::milliseconds samplePeriod_; |
| | std::chrono::milliseconds reportPeriod_; |
| | int samplesPerReport_; |
| | std::set<std::string> eventNames_; |
| | std::set<std::string> metricNames_; |
| |
|
| | |
| | std::chrono::seconds eventProfilerOnDemandDuration_; |
| | |
| | std::chrono::time_point<std::chrono::system_clock> |
| | eventProfilerOnDemandTimestamp_; |
| |
|
| | int eventProfilerMaxInstancesPerGpu_; |
| |
|
| | |
| | |
| | std::chrono::seconds eventProfilerHeartbeatMonitorPeriod_; |
| |
|
| | |
| | std::string eventLogFile_; |
| | std::vector<int> eventReportPercentiles_ = {5, 25, 50, 75, 95}; |
| | uint8_t eventProfilerDeviceMask_ = ~0; |
| | std::chrono::milliseconds multiplexPeriod_; |
| |
|
| | |
| | bool activityProfilerEnabled_; |
| |
|
| | |
| | bool perThreadBufferEnabled_; |
| | std::set<ActivityType> selectedActivityTypes_; |
| |
|
| | |
| | std::string activitiesLogFile_; |
| |
|
| | std::string activitiesLogUrl_; |
| |
|
| | |
| | bool activitiesLogToMemory_{false}; |
| |
|
| | int activitiesMaxGpuBufferSize_; |
| | std::chrono::seconds activitiesWarmupDuration_; |
| | int activitiesWarmupIterations_; |
| | bool activitiesCudaSyncWaitEvents_; |
| |
|
| | |
| | |
| | |
| | bool enableReportInputShapes_{false}; |
| | bool enableProfileMemory_{false}; |
| | bool enableWithStack_{false}; |
| | bool enableWithFlops_{false}; |
| | bool enableWithModules_{false}; |
| |
|
| | |
| | std::chrono::milliseconds activitiesDuration_; |
| | int activitiesRunIterations_; |
| |
|
| | |
| | |
| | std::string activitiesExternalAPIIterationsTarget_; |
| | |
| | std::vector<std::string> activitiesExternalAPIFilter_; |
| | |
| | int activitiesExternalAPINetSizeThreshold_; |
| | |
| | int activitiesExternalAPIGpuOpCountThreshold_; |
| | |
| | std::chrono::time_point<std::chrono::system_clock> |
| | activitiesOnDemandTimestamp_; |
| |
|
| | |
| | |
| | std::chrono::time_point<std::chrono::system_clock> profileStartTime_; |
| | |
| | int profileStartIteration_; |
| | int profileStartIterationRoundUp_; |
| |
|
| | |
| | std::chrono::time_point<std::chrono::system_clock> requestTimestamp_; |
| |
|
| | |
| | bool enableSigUsr2_; |
| |
|
| | |
| | bool enableIpcFabric_; |
| | std::chrono::seconds onDemandConfigUpdateIntervalSecs_; |
| |
|
| | |
| | std::string requestTraceID_; |
| | std::string requestGroupTraceID_; |
| |
|
| | |
| | size_t cuptiDeviceBufferSize_; |
| | size_t cuptiDeviceBufferPoolLimit_; |
| |
|
| | |
| | bool useTSCTimestamp_{true}; |
| |
|
| | |
| | bool memoryProfilerEnabled_{false}; |
| | int profileMemoryDuration_{1000}; |
| |
|
| | |
| | |
| | std::string customConfig_; |
| |
|
| | |
| | uint32_t maxEvents_{1000000}; |
| | }; |
| |
|
| | constexpr char kUseDaemonEnvVar[] = "KINETO_USE_DAEMON"; |
| |
|
| | bool isDaemonEnvVarSet(); |
| |
|
| | } |
| |
|