|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
#include "AbstractConfig.h"
|
|
|
#include "ActivityType.h"
|
|
|
|
|
|
#include <assert.h>
|
|
|
#include <chrono>
|
|
|
#include <functional>
|
|
|
#include <set>
|
|
|
#include <string>
|
|
|
#include <vector>
|
|
|
|
|
|
namespace libkineto {
|
|
|
|
|
|
class Config : public AbstractConfig {
|
|
|
public:
|
|
|
Config();
|
|
|
Config& operator=(const Config&) = delete;
|
|
|
Config(Config&&) = delete;
|
|
|
Config& operator=(Config&&) = delete;
|
|
|
|
|
|
|
|
|
std::unique_ptr<Config> clone() const {
|
|
|
auto cfg = std::unique_ptr<Config>(new Config(*this));
|
|
|
cloneFeaturesInto(*cfg);
|
|
|
return cfg;
|
|
|
}
|
|
|
|
|
|
bool handleOption(const std::string& name, std::string& val) override;
|
|
|
|
|
|
void setClientDefaults() override;
|
|
|
|
|
|
|
|
|
const std::string& eventLogFile() const {
|
|
|
return eventLogFile_;
|
|
|
}
|
|
|
|
|
|
bool activityProfilerEnabled() const {
|
|
|
return activityProfilerEnabled_ ||
|
|
|
activitiesOnDemandTimestamp_.time_since_epoch().count() > 0;
|
|
|
}
|
|
|
|
|
|
|
|
|
const std::string& activitiesLogFile() const {
|
|
|
return activitiesLogFile_;
|
|
|
}
|
|
|
|
|
|
|
|
|
const std::string& activitiesLogUrl() const {
|
|
|
return activitiesLogUrl_;
|
|
|
}
|
|
|
|
|
|
void setActivitiesLogUrl(const std::string& url) {
|
|
|
activitiesLogUrl_ = url;
|
|
|
}
|
|
|
|
|
|
bool activitiesLogToMemory() const {
|
|
|
return activitiesLogToMemory_;
|
|
|
}
|
|
|
|
|
|
bool eventProfilerEnabled() const {
|
|
|
return !eventNames_.empty() || !metricNames_.empty();
|
|
|
}
|
|
|
|
|
|
|
|
|
bool eventProfilerEnabledForDevice(uint32_t dev) const {
|
|
|
return 0 != (eventProfilerDeviceMask_ & (1 << dev));
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
std::chrono::milliseconds samplePeriod() const {
|
|
|
return samplePeriod_;
|
|
|
}
|
|
|
|
|
|
void setSamplePeriod(std::chrono::milliseconds period) {
|
|
|
samplePeriod_ = period;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
std::chrono::milliseconds multiplexPeriod() const {
|
|
|
return multiplexPeriod_;
|
|
|
}
|
|
|
|
|
|
void setMultiplexPeriod(std::chrono::milliseconds period) {
|
|
|
multiplexPeriod_ = period;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::chrono::milliseconds reportPeriod() const {
|
|
|
return reportPeriod_;
|
|
|
}
|
|
|
|
|
|
void setReportPeriod(std::chrono::milliseconds msecs);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int samplesPerReport() const {
|
|
|
return samplesPerReport_;
|
|
|
}
|
|
|
|
|
|
void setSamplesPerReport(int count) {
|
|
|
samplesPerReport_ = count;
|
|
|
}
|
|
|
|
|
|
|
|
|
const std::set<std::string>& eventNames() const {
|
|
|
return eventNames_;
|
|
|
}
|
|
|
|
|
|
|
|
|
void addEvents(const std::set<std::string>& names) {
|
|
|
eventNames_.insert(names.begin(), names.end());
|
|
|
}
|
|
|
|
|
|
|
|
|
const std::set<std::string>& metricNames() const {
|
|
|
return metricNames_;
|
|
|
}
|
|
|
|
|
|
|
|
|
void addMetrics(const std::set<std::string>& names) {
|
|
|
metricNames_.insert(names.begin(), names.end());
|
|
|
}
|
|
|
|
|
|
const std::vector<int>& percentiles() const {
|
|
|
return eventReportPercentiles_;
|
|
|
}
|
|
|
|
|
|
|
|
|
std::chrono::seconds eventProfilerOnDemandDuration() const {
|
|
|
return eventProfilerOnDemandDuration_;
|
|
|
}
|
|
|
|
|
|
void setEventProfilerOnDemandDuration(std::chrono::seconds duration) {
|
|
|
eventProfilerOnDemandDuration_ = duration;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int maxEventProfilersPerGpu() const {
|
|
|
return eventProfilerMaxInstancesPerGpu_;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
std::chrono::seconds eventProfilerHeartbeatMonitorPeriod() const {
|
|
|
return eventProfilerHeartbeatMonitorPeriod_;
|
|
|
}
|
|
|
|
|
|
|
|
|
const std::set<ActivityType>& selectedActivityTypes() const {
|
|
|
return selectedActivityTypes_;
|
|
|
}
|
|
|
|
|
|
|
|
|
bool perThreadBufferEnabled() const {
|
|
|
return perThreadBufferEnabled_;
|
|
|
}
|
|
|
|
|
|
void setSelectedActivityTypes(const std::set<ActivityType>& types) {
|
|
|
selectedActivityTypes_ = types;
|
|
|
}
|
|
|
|
|
|
bool isReportInputShapesEnabled() const {
|
|
|
return enableReportInputShapes_;
|
|
|
}
|
|
|
|
|
|
bool isProfileMemoryEnabled() const {
|
|
|
return enableProfileMemory_;
|
|
|
}
|
|
|
|
|
|
bool isWithStackEnabled() const {
|
|
|
return enableWithStack_;
|
|
|
}
|
|
|
|
|
|
bool isWithFlopsEnabled() const {
|
|
|
return enableWithFlops_;
|
|
|
}
|
|
|
|
|
|
bool isWithModulesEnabled() const {
|
|
|
return enableWithModules_;
|
|
|
}
|
|
|
|
|
|
|
|
|
std::chrono::milliseconds activitiesDuration() const {
|
|
|
return activitiesDuration_;
|
|
|
}
|
|
|
|
|
|
|
|
|
int activitiesRunIterations() const {
|
|
|
return activitiesRunIterations_;
|
|
|
}
|
|
|
|
|
|
int activitiesMaxGpuBufferSize() const {
|
|
|
return activitiesMaxGpuBufferSize_;
|
|
|
}
|
|
|
|
|
|
std::chrono::seconds activitiesWarmupDuration() const {
|
|
|
return activitiesWarmupDuration_;
|
|
|
}
|
|
|
|
|
|
int activitiesWarmupIterations() const {
|
|
|
return activitiesWarmupIterations_;
|
|
|
}
|
|
|
|
|
|
|
|
|
bool activitiesCudaSyncWaitEvents() const {
|
|
|
return activitiesCudaSyncWaitEvents_;
|
|
|
}
|
|
|
|
|
|
void setActivitiesCudaSyncWaitEvents(bool enable) {
|
|
|
activitiesCudaSyncWaitEvents_ = enable;
|
|
|
}
|
|
|
|
|
|
|
|
|
const std::chrono::time_point<std::chrono::system_clock> requestTimestamp()
|
|
|
const {
|
|
|
if (profileStartTime_.time_since_epoch().count()) {
|
|
|
return profileStartTime_;
|
|
|
}
|
|
|
|
|
|
if (requestTimestamp_.time_since_epoch().count() == 0) {
|
|
|
return requestTimestamp_;
|
|
|
}
|
|
|
|
|
|
|
|
|
return requestTimestamp_ + maxRequestAge() + activitiesWarmupDuration();
|
|
|
}
|
|
|
|
|
|
bool hasProfileStartTime() const {
|
|
|
return requestTimestamp_.time_since_epoch().count() > 0 ||
|
|
|
profileStartTime_.time_since_epoch().count() > 0;
|
|
|
}
|
|
|
|
|
|
int profileStartIteration() const {
|
|
|
return profileStartIteration_;
|
|
|
}
|
|
|
|
|
|
bool hasProfileStartIteration() const {
|
|
|
return profileStartIteration_ >= 0 && activitiesRunIterations_ > 0;
|
|
|
}
|
|
|
|
|
|
void setProfileStartIteration(int iter) {
|
|
|
profileStartIteration_ = iter;
|
|
|
}
|
|
|
|
|
|
int profileStartIterationRoundUp() const {
|
|
|
return profileStartIterationRoundUp_;
|
|
|
}
|
|
|
|
|
|
|
|
|
int startIterationIncludingWarmup() const {
|
|
|
if (!hasProfileStartIteration()) {
|
|
|
return -1;
|
|
|
}
|
|
|
return profileStartIteration_ - activitiesWarmupIterations_;
|
|
|
}
|
|
|
|
|
|
const std::chrono::seconds maxRequestAge() const;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int verboseLogLevel() const {
|
|
|
return verboseLogLevel_;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const std::vector<std::string>& verboseLogModules() const {
|
|
|
return verboseLogModules_;
|
|
|
}
|
|
|
|
|
|
bool sigUsr2Enabled() const {
|
|
|
return enableSigUsr2_;
|
|
|
}
|
|
|
|
|
|
bool ipcFabricEnabled() const {
|
|
|
return enableIpcFabric_;
|
|
|
}
|
|
|
|
|
|
std::chrono::seconds onDemandConfigUpdateIntervalSecs() const {
|
|
|
return onDemandConfigUpdateIntervalSecs_;
|
|
|
}
|
|
|
|
|
|
static std::chrono::milliseconds alignUp(
|
|
|
std::chrono::milliseconds duration,
|
|
|
std::chrono::milliseconds alignment) {
|
|
|
duration += alignment;
|
|
|
return duration - (duration % alignment);
|
|
|
}
|
|
|
|
|
|
std::chrono::time_point<std::chrono::system_clock>
|
|
|
eventProfilerOnDemandStartTime() const {
|
|
|
return eventProfilerOnDemandTimestamp_;
|
|
|
}
|
|
|
|
|
|
std::chrono::time_point<std::chrono::system_clock>
|
|
|
eventProfilerOnDemandEndTime() const {
|
|
|
return eventProfilerOnDemandTimestamp_ + eventProfilerOnDemandDuration_;
|
|
|
}
|
|
|
|
|
|
std::chrono::time_point<std::chrono::system_clock>
|
|
|
activityProfilerRequestReceivedTime() const {
|
|
|
return activitiesOnDemandTimestamp_;
|
|
|
}
|
|
|
|
|
|
static constexpr std::chrono::milliseconds kControllerIntervalMsecs{1000};
|
|
|
|
|
|
|
|
|
const std::string& requestTraceID() const {
|
|
|
return requestTraceID_;
|
|
|
}
|
|
|
|
|
|
void setRequestTraceID(const std::string& tid) {
|
|
|
requestTraceID_ = tid;
|
|
|
}
|
|
|
|
|
|
const std::string& requestGroupTraceID() const {
|
|
|
return requestGroupTraceID_;
|
|
|
}
|
|
|
|
|
|
void setRequestGroupTraceID(const std::string& gtid) {
|
|
|
requestGroupTraceID_ = gtid;
|
|
|
}
|
|
|
|
|
|
size_t cuptiDeviceBufferSize() const {
|
|
|
return cuptiDeviceBufferSize_;
|
|
|
}
|
|
|
|
|
|
size_t cuptiDeviceBufferPoolLimit() const {
|
|
|
return cuptiDeviceBufferPoolLimit_;
|
|
|
}
|
|
|
|
|
|
bool memoryProfilerEnabled() const {
|
|
|
return memoryProfilerEnabled_;
|
|
|
}
|
|
|
|
|
|
int profileMemoryDuration() const {
|
|
|
return profileMemoryDuration_;
|
|
|
}
|
|
|
void updateActivityProfilerRequestReceivedTime();
|
|
|
|
|
|
void printActivityProfilerConfig(std::ostream& s) const override;
|
|
|
void setActivityDependentConfig() override;
|
|
|
|
|
|
void validate(const std::chrono::time_point<std::chrono::system_clock>&
|
|
|
fallbackProfileStartTime) override;
|
|
|
|
|
|
static void addConfigFactory(
|
|
|
std::string name,
|
|
|
std::function<AbstractConfig*(Config&)> factory);
|
|
|
|
|
|
void print(std::ostream& s) const;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static std::shared_ptr<void> getStaticObjectsLifetimeHandle();
|
|
|
|
|
|
bool getTSCTimestampFlag() const {
|
|
|
return useTSCTimestamp_;
|
|
|
}
|
|
|
|
|
|
void setTSCTimestampFlag(bool flag) {
|
|
|
useTSCTimestamp_ = flag;
|
|
|
}
|
|
|
|
|
|
const std::string& getCustomConfig() const {
|
|
|
return customConfig_;
|
|
|
}
|
|
|
|
|
|
uint32_t maxEvents() const {
|
|
|
return maxEvents_;
|
|
|
}
|
|
|
|
|
|
private:
|
|
|
explicit Config(const Config& other) = default;
|
|
|
|
|
|
AbstractConfig* cloneDerived(AbstractConfig& parent) const override {
|
|
|
|
|
|
assert(false);
|
|
|
return nullptr;
|
|
|
}
|
|
|
|
|
|
uint8_t createDeviceMask(const std::string& val);
|
|
|
|
|
|
|
|
|
|
|
|
void setActivityTypes(const std::vector<std::string>& selected_activities);
|
|
|
|
|
|
|
|
|
void selectDefaultActivityTypes() {
|
|
|
|
|
|
for (ActivityType t : defaultActivityTypes()) {
|
|
|
selectedActivityTypes_.insert(t);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
int verboseLogLevel_;
|
|
|
std::vector<std::string> verboseLogModules_;
|
|
|
|
|
|
|
|
|
|
|
|
std::chrono::milliseconds samplePeriod_;
|
|
|
std::chrono::milliseconds reportPeriod_;
|
|
|
int samplesPerReport_;
|
|
|
std::set<std::string> eventNames_;
|
|
|
std::set<std::string> metricNames_;
|
|
|
|
|
|
|
|
|
std::chrono::seconds eventProfilerOnDemandDuration_;
|
|
|
|
|
|
std::chrono::time_point<std::chrono::system_clock>
|
|
|
eventProfilerOnDemandTimestamp_;
|
|
|
|
|
|
int eventProfilerMaxInstancesPerGpu_;
|
|
|
|
|
|
|
|
|
|
|
|
std::chrono::seconds eventProfilerHeartbeatMonitorPeriod_;
|
|
|
|
|
|
|
|
|
std::string eventLogFile_;
|
|
|
std::vector<int> eventReportPercentiles_ = {5, 25, 50, 75, 95};
|
|
|
uint8_t eventProfilerDeviceMask_ = ~0;
|
|
|
std::chrono::milliseconds multiplexPeriod_;
|
|
|
|
|
|
|
|
|
bool activityProfilerEnabled_;
|
|
|
|
|
|
|
|
|
bool perThreadBufferEnabled_;
|
|
|
std::set<ActivityType> selectedActivityTypes_;
|
|
|
|
|
|
|
|
|
std::string activitiesLogFile_;
|
|
|
|
|
|
std::string activitiesLogUrl_;
|
|
|
|
|
|
|
|
|
bool activitiesLogToMemory_{false};
|
|
|
|
|
|
int activitiesMaxGpuBufferSize_;
|
|
|
std::chrono::seconds activitiesWarmupDuration_;
|
|
|
int activitiesWarmupIterations_;
|
|
|
bool activitiesCudaSyncWaitEvents_;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bool enableReportInputShapes_{false};
|
|
|
bool enableProfileMemory_{false};
|
|
|
bool enableWithStack_{false};
|
|
|
bool enableWithFlops_{false};
|
|
|
bool enableWithModules_{false};
|
|
|
|
|
|
|
|
|
std::chrono::milliseconds activitiesDuration_;
|
|
|
int activitiesRunIterations_;
|
|
|
|
|
|
|
|
|
|
|
|
std::string activitiesExternalAPIIterationsTarget_;
|
|
|
|
|
|
std::vector<std::string> activitiesExternalAPIFilter_;
|
|
|
|
|
|
int activitiesExternalAPINetSizeThreshold_;
|
|
|
|
|
|
int activitiesExternalAPIGpuOpCountThreshold_;
|
|
|
|
|
|
std::chrono::time_point<std::chrono::system_clock>
|
|
|
activitiesOnDemandTimestamp_;
|
|
|
|
|
|
|
|
|
|
|
|
std::chrono::time_point<std::chrono::system_clock> profileStartTime_;
|
|
|
|
|
|
int profileStartIteration_;
|
|
|
int profileStartIterationRoundUp_;
|
|
|
|
|
|
|
|
|
std::chrono::time_point<std::chrono::system_clock> requestTimestamp_;
|
|
|
|
|
|
|
|
|
bool enableSigUsr2_;
|
|
|
|
|
|
|
|
|
bool enableIpcFabric_;
|
|
|
std::chrono::seconds onDemandConfigUpdateIntervalSecs_;
|
|
|
|
|
|
|
|
|
std::string requestTraceID_;
|
|
|
std::string requestGroupTraceID_;
|
|
|
|
|
|
|
|
|
size_t cuptiDeviceBufferSize_;
|
|
|
size_t cuptiDeviceBufferPoolLimit_;
|
|
|
|
|
|
|
|
|
bool useTSCTimestamp_{true};
|
|
|
|
|
|
|
|
|
bool memoryProfilerEnabled_{false};
|
|
|
int profileMemoryDuration_{1000};
|
|
|
|
|
|
|
|
|
|
|
|
std::string customConfig_;
|
|
|
|
|
|
|
|
|
uint32_t maxEvents_{1000000};
|
|
|
};
|
|
|
|
|
|
constexpr char kUseDaemonEnvVar[] = "KINETO_USE_DAEMON";
|
|
|
|
|
|
bool isDaemonEnvVarSet();
|
|
|
|
|
|
}
|
|
|
|