File size: 6,940 Bytes
e05eed1
98a67a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
// SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0


#include "quad_rectify_cpu.h"

#include <iostream>

#include "../geometry.h"
#include "quad_rectify_shared.h"

using namespace std;

template<typename quads_accessor_t, typename output_accessor_t, typename scalar_t>
void quad_rectify_calc_quad_width_impl(const quads_accessor_t &quads,
                                       output_accessor_t output,
                                       const scalar_t outputHeight,
                                       const scalar_t roundFactor,
                                       const scalar_t maxWidth)
{
    const int64_t numQuads = quads.size(0);

    for (int64_t quadIdx = 0; quadIdx < numQuads; ++quadIdx) {
        auto quadWidth = calc_quad_width(quads[quadIdx], outputHeight, roundFactor, maxWidth);

        output[quadIdx] = Convert<scalar_t, int64_t>::LeftToRight(quadWidth);
    }
}

template<typename quads_accessor_t, typename output_accessor_t, typename scalar_t>
void quad_rectify_cpu_forward_impl(const quads_accessor_t &quads,
                                   output_accessor_t output,
                                   const scalar_t imageHeight,
                                   const scalar_t imageWidth,
                                   bool isotropic)
{
    typedef Point_<scalar_t> Point_t;

    const int64_t numQuads = quads.size(0);
    const int64_t outputHeight = output.size(1);
    const int64_t outputWidth = output.size(2);

    for (int64_t quadIdx = 0; quadIdx < numQuads; ++quadIdx) {
        auto currQuad = quads[quadIdx];

        scalar_t quadWidth = isotropic ? calc_quad_width<scalar_t>(currQuad, outputHeight, 1, outputWidth) : scalar_t(outputWidth);

        for (int64_t row = 0; row < outputHeight; ++row) {
            for (int64_t col = 0; col < outputWidth; ++col) {
                Point_t outputPoint = calc_rect_value<scalar_t>(currQuad,
                                                                quadWidth,
                                                                outputHeight,
                                                                col,
                                                                row,
                                                                imageWidth,
                                                                imageHeight);

                auto currOutput = output[quadIdx][row][col];
                currOutput[0] = outputPoint.X;
                currOutput[1] = outputPoint.Y;
            }
        }
    }
}

/*template<typename scalar_t>
void quad_rectify_cpu_backward_impl(torch::Tensor quads,
                                    torch::Tensor gradOutput,
                                    torch::Tensor gradInput)
{
    const int64_t batchSize = gradOutput.size(0);
    const int64_t outputHeight = gradOutput.size(1);
    const int64_t outputWidth = gradOutput.size(2);

    auto gradInputAccess = gradInput.accessor<scalar_t, 3>();
    auto gradOutputAccess = gradOutput.accessor<scalar_t, 4>();

    for (int64_t batchIdx = 0; batchIdx < batchSize; ++batchIdx) {
        auto batchInputAccess = gradInputAccess[batchIdx];
        auto batchOutputAccess = gradOutputAccess[batchIdx];

        for (int64_t rowIdx = 0; rowIdx < outputHeight; ++rowIdx) {
            for (int64_t colIdx = 0; colIdx < outputWidth; ++colIdx) {

                const scalar_t fRow = scalar_t(rowIdx) / outputHeight;
                const scalar_t fCol = scalar_t(colIdx) / outputWidth;
                const scalar_t fRowCol = fRow * fCol;

                for (int64_t dim = 0; dim < 2; ++dim) {
                    const scalar_t dOut = batchOutputAccess[rowIdx][colIdx][dim];

                    const scalar_t gradIns[] = {
                        dOut * (fRowCol - fCol - fRow + 1),
                        dOut * (fCol - fRowCol),
                        dOut * fRowCol,
                        dOut * (fRow - fRowCol)
                    };

                    for (int64_t quadIdx = 0; quadIdx < 4; ++quadIdx) {
                        batchInputAccess[quadIdx][dim] += 2.0f * gradIns[quadIdx];
                    }
                }
            }
        }
    }
}*/

torch::Tensor quad_rectify_cpu_calc_quad_width(torch::Tensor quads,
                                               int64_t outputHeight,
                                               int64_t roundFactor,
                                               float maxWidth)
{
    auto output = torch::empty({ quads.size(0) },
                               quads.options().dtype(torch::kInt64));

    AT_DISPATCH_FLOATING_TYPES(
        quads.scalar_type(),
        "quad_rectify_cpu_calc_quad_width",
        ([&] {
            quad_rectify_calc_quad_width_impl(
                quads.accessor<scalar_t, 3>(),
                output.accessor<int64_t, 1>(),
                Convert<scalar_t, int64_t>::RightToLeft(outputHeight),
                Convert<scalar_t, int64_t>::RightToLeft(roundFactor),
                Convert<scalar_t, float>::RightToLeft(maxWidth)
            );
        })
    );

    return output;
}

torch::Tensor quad_rectify_cpu_forward(torch::Tensor quads,
                                       int64_t imageHeight,
                                       int64_t imageWidth,
                                       int64_t outputHeight,
                                       int64_t outputWidth,
                                       bool isotropic)
{
    auto output = torch::empty({ quads.size(0), outputHeight, outputWidth, 2 },
                               quads.options());

    AT_DISPATCH_FLOATING_TYPES(
        quads.scalar_type(),
        "quad_rectify_cpu_forward",
        ([&] {
            quad_rectify_cpu_forward_impl(
                quads.accessor<scalar_t, 3>(),
                output.accessor<scalar_t, 4>(),
                Convert<scalar_t, int64_t>::RightToLeft(imageHeight),
                Convert<scalar_t, int64_t>::RightToLeft(imageWidth),
                isotropic
            );
        })
    );

    return output;
}

torch::Tensor quad_rectify_cpu_backward(torch::Tensor quads,
                                        torch::Tensor gradOutput,
                                        int64_t imageHeight,
                                        int64_t imageWidth,
                                        bool isotropic)
{
    auto gradInput = torch::zeros_like(quads);

    throw std::runtime_error("Calling backward, and it's not implemented!");

    /*AT_DISPATCH_FLOATING_TYPES_AND_HALF(
        quads.scalar_type(),
        "quad_rectify_cpu_backward",
        ([&] {
            quad_rectify_cpu_backward_impl<scalar_t>(quads,
                                                     gradOutput,
                                                     gradInput);
        })
    );*/

    return gradInput;
}