File size: 4,599 Bytes
7fc5a59 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | #ifndef OPENPOSE_UTILITIES_PROFILER_HPP
#define OPENPOSE_UTILITIES_PROFILER_HPP
#include <chrono>
#include <string>
#include <openpose/core/macros.hpp>
#include <openpose/utilities/enumClasses.hpp>
namespace op
{
// The following functions provides basic functions to measure time. Usage example:
// const auto timerInit = getTimerInit();
// // [Some code in here]
// const auto timeSeconds = getTimeSeconds(timerInit);
// const printTime(timeSeconds, "Function X took ", " seconds.");
OP_API std::chrono::time_point<std::chrono::high_resolution_clock> getTimerInit();
OP_API double getTimeSeconds(const std::chrono::time_point<std::chrono::high_resolution_clock>& timerInit);
OP_API void printTime(
const std::chrono::time_point<std::chrono::high_resolution_clock>& timerInit, const std::string& firstMessage,
const std::string& secondMessage, const Priority priority);
// The following functions will run REPS times and average the final time in seconds. Usage example:
// const auto REPS = 1000;
// double time = 0.;
// OP_PROFILE_INIT(REPS);
// // [Some code in here]
// OP_PROFILE_END(time, 1e3, REPS); // Time in msec. 1 = sec, 1e3 = msec, 1e6 = usec, 1e9 = nsec, etc.
// opLog("Function X took " + std::to_string(time) + " milliseconds.");
#define OP_PROFILE_INIT(REPS) \
{ \
const auto timerInit = getTimerInit(); \
for (auto rep = 0 ; rep < (REPS) ; ++rep) \
{
#define OP_PROFILE_END(finalTime, factor, REPS) \
} \
(finalTime) = (factor)/(float)(REPS)*getTimeSeconds(timerInit); \
}
// The following functions will run REPS times, wait for the kernels to finish, and then average the final time
// in seconds. Usage example:
// const auto REPS = 1000;
// double time = 0.;
// OP_CUDA_PROFILE_INIT(REPS);
// // [Some code with CUDA calls in here]
// OP_CUDA_PROFILE_END(time, 1e3, REPS); // Time in msec. 1 = sec, 1e3 = msec, 1e6 = usec, 1e9 = nsec, etc.
// opLog("Function X took " + std::to_string(time) + " milliseconds.");
// Analogous to OP_PROFILE_INIT, but also waits for CUDA kernels to finish their asynchronous operations
// It requires: #include <cuda_runtime.h>
#define OP_CUDA_PROFILE_INIT(REPS) \
{ \
cudaDeviceSynchronize(); \
const auto timerInit = getTimerInit(); \
for (auto rep = 0 ; rep < (REPS) ; ++rep) \
{
// Analogous to OP_PROFILE_END, but also waits for CUDA kernels to finish their asynchronous operations
// It requires: #include <cuda_runtime.h>
#define OP_CUDA_PROFILE_END(finalTime, factor, REPS) \
} \
cudaDeviceSynchronize(); \
(finalTime) = (factor)/(float)(REPS)*getTimeSeconds(timerInit); \
cudaCheck(__LINE__, __FUNCTION__, __FILE__); \
}
// Enable PROFILER_ENABLED on Makefile.config or CMake in order to use this function. Otherwise nothing will be outputted.
// How to use - example:
// For GPU - It can only be applied in the main.cpp file:
// Profiler::profileGpuMemory(__LINE__, __FUNCTION__, __FILE__);
// For time:
// // ... inside continuous loop ...
// const auto profilerKey = Profiler::timerInit(__LINE__, __FUNCTION__, __FILE__);
// // functions to do...
// Profiler::timerEnd(profilerKey);
// Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, NUMBER_ITERATIONS);
class OP_API Profiler
{
public:
static unsigned long long DEFAULT_X;
// Non-thread safe, it must be performed at the beginning of the code before any parallelization occurs
static void setDefaultX(const unsigned long long defaultX);
static const std::string timerInit(const int line, const std::string& function, const std::string& file);
static void timerEnd(const std::string& key);
static void printAveragedTimeMsOnIterationX(
const std::string& key, const int line, const std::string& function, const std::string& file,
const unsigned long long x = DEFAULT_X);
static void printAveragedTimeMsEveryXIterations(
const std::string& key, const int line, const std::string& function, const std::string& file,
const unsigned long long x = DEFAULT_X);
static void profileGpuMemory(const int line, const std::string& function, const std::string& file);
};
}
#endif // OPENPOSE_UTILITIES_PROFILER_HPP
|