File size: 1,095 Bytes
c1af2fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
// Original TunableOp is from onnxruntime.
// https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/core/framework/tunable.h
// https://github.com/microsoft/onnxruntime/tree/main/onnxruntime/core/providers/rocm/tunable
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
//
// Adapting TunableOp into PyTorch
// Copyright (c) Advanced Micro Devices, Inc.
//
#pragma once

#include <cuda_runtime.h>

#include <ATen/cuda/tunable/Tunable.h>

namespace at::cuda::tunable {

class StreamTimer : public ITimer {
  public:
    StreamTimer();
    ~StreamTimer() override;

    void Start() override;

    void End() override;

    float Duration() override;

  private:
    cudaEvent_t start_{};
    cudaEvent_t end_{};
};

class StreamTimerNoSync : public ITimer {
  public:
    StreamTimerNoSync();
    ~StreamTimerNoSync() override;

    void Start() override;

    void End() override;

    float Duration() override;

  private:
    cudaEvent_t start_{};
    cudaEvent_t end_{};
};

} // namespace at::cuda::tunable