# MIT License

# Copyright (c) 2020 Songyou Peng, Michael Niemeyer, Lars Mescheder, Marc Pollefeys, Andreas Geiger.
# Copyright (c) 2025 VAST-AI-Research and contributors.

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE

# modified from https://github.com/autonomousvision/convolutional_occupancy_networks/blob/master/src/encoder/pointnet.py

import torch
import torch.nn as nn
import copy
from torch import Tensor
from torch_scatter import scatter_mean

def scale_tensor(
    dat, inp_scale=None, tgt_scale=None
):
    if inp_scale is None:
        inp_scale = (-0.5, 0.5)
    if tgt_scale is None:
        tgt_scale = (0, 1)
    assert tgt_scale[1] > tgt_scale[0] and inp_scale[1] > inp_scale[0]
    if isinstance(tgt_scale, Tensor):
        assert dat.shape[-1] == tgt_scale.shape[-1]
    dat = (dat - inp_scale[0]) / (inp_scale[1] - inp_scale[0])
    dat = dat * (tgt_scale[1] - tgt_scale[0]) + tgt_scale[0]
    return dat.clamp(tgt_scale[0] + 1e-6, tgt_scale[1] - 1e-6)

# Resnet Blocks for pointnet
class ResnetBlockFC(nn.Module):
    ''' Fully connected ResNet Block class.

    Args:
        size_in (int): input dimension
        size_out (int): output dimension
        size_h (int): hidden dimension
    '''

    def __init__(self, size_in, size_out=None, size_h=None):
        super().__init__()
        # Attributes
        if size_out is None:
            size_out = size_in

        if size_h is None:
            size_h = min(size_in, size_out)

        self.size_in = size_in
        self.size_h = size_h
        self.size_out = size_out
        # Submodules
        self.fc_0 = nn.Linear(size_in, size_h)
        self.fc_1 = nn.Linear(size_h, size_out)
        self.actvn = nn.GELU(approximate="tanh")

        if size_in == size_out:
            self.shortcut = None
        else:
            self.shortcut = nn.Linear(size_in, size_out, bias=False)
        # Initialization
        nn.init.xavier_uniform_(self.fc_0.weight)
        if self.fc_0.bias is not None:
            nn.init.constant_(self.fc_0.bias, 0)
        if self.shortcut is not None:
            nn.init.xavier_uniform_(self.shortcut.weight)
            if self.shortcut.bias is not None:
                nn.init.constant_(self.shortcut.bias, 0)
        
        nn.init.xavier_uniform_(self.fc_1.weight)
        if self.fc_1.bias is not None:
            nn.init.constant_(self.fc_1.bias, 0)
        
        
    def forward(self, x):
        net = self.fc_0(self.actvn(x))
        dx = self.fc_1(self.actvn(net))

        if self.shortcut is not None:
            x_s = self.shortcut(x)
        else:
            x_s = x

        return x_s + dx

class LocalPoolPointnet(nn.Module):
    def __init__(self, in_channels=3, out_channels=128, hidden_dim=128, scatter_type='mean', n_blocks=5):
        super().__init__()
        self.scatter_type = scatter_type
        self.in_channels = in_channels
        self.hidden_dim = hidden_dim
        self.out_channels = out_channels
        self.fc_pos = nn.Linear(in_channels, 2*hidden_dim)
        self.blocks = nn.ModuleList([
            ResnetBlockFC(2*hidden_dim, hidden_dim) for i in range(n_blocks)
        ])
        self.fc_c = nn.Linear(hidden_dim, out_channels)
        self.in_channels = in_channels
        if self.scatter_type == 'mean':
            self.scatter = scatter_mean
        else:
            raise ValueError('Incorrect scatter type')
        self.initialize_weights()

    def initialize_weights(self):
        
        nn.init.xavier_uniform_(self.fc_pos.weight)
        if self.fc_pos.bias is not None:
            nn.init.constant_(self.fc_pos.bias, 0)

        nn.init.xavier_uniform_(self.fc_c.weight)
        if self.fc_c.bias is not None:
            nn.init.constant_(self.fc_c.bias, 0)

    def convert_to_sparse_feats(self, c, sparse_coords):
        '''
        Input:
            sparse_coords: Tensor [Nx, 4], point to sparse indices
            c: Tensor [B, res, C], input feats of each grid
        Output:
            c_out: Tensor [B, Np, C], aggregated grid feats of each point
        '''
        feats_new = torch.zeros((sparse_coords.shape[0], c.shape[-1]), device=c.device, dtype=c.dtype)
        offsets = 0
        
        batch_nums = copy.deepcopy(sparse_coords[..., 0])
        for i in range(len(c)):
            coords_num_i = (batch_nums == i).sum()
            feats_new[offsets: offsets + coords_num_i] = c[i, : coords_num_i]
            offsets += coords_num_i
        return feats_new

    def generate_sparse_grid_features(self, index, c, max_coord_num):
        # scatter grid features from points
        bs, fea_dim = c.size(0), c.size(2)
        res = max_coord_num
        c_out = c.new_zeros(bs, self.out_channels, res)
        c_out = scatter_mean(c.permute(0, 2, 1), index, out=c_out).permute(0, 2, 1) # B x res X C
        return c_out

    def pool_sparse_local(self, index, c, max_coord_num):
        '''
        Input:
            index: Tensor [B, 1, Np], sparse indices of each point
            c: Tensor [B, Np, C], input feats of each point
        Output:
            c_out: Tensor [B, Np, C], aggregated grid feats of each point
        '''
        
        bs, fea_dim = c.size(0), c.size(2)
        res = max_coord_num
        c_out = c.new_zeros(bs, fea_dim, res)
        c_out = self.scatter(c.permute(0, 2, 1), index, out=c_out)

        # gather feature back to points
        c_out = c_out.gather(dim=2, index=index.expand(-1, fea_dim, -1))
        return c_out.permute(0, 2, 1)

    @torch.no_grad()
    def coordinate2sparseindex(self, x, sparse_coords, res):
        '''
        Input:
            x: Tensor [B, Np, 3], points scaled at ([0, 1] * res)
            sparse_coords: Tensor [Nx, 4] ([batch_number, x, y, z])
            res: Int, resolution of the grid index
        Output:
            sparse_index: Tensor [B, 1, Np], sparse indices of each point
        '''
        B = x.shape[0]
        sparse_index = torch.zeros((B, x.shape[1]), device=x.device, dtype=torch.int64)
        
        index = (x[..., 0] * res + x[..., 1]) * res + x[..., 2]
        sparse_indices = copy.deepcopy(sparse_coords)
        sparse_indices[..., 1] = (sparse_indices[..., 1] * res + sparse_indices[..., 2]) * res + sparse_indices[..., 3]
        sparse_indices = sparse_indices[..., :2]
        
        for i in range(B):
            mask_i = sparse_indices[..., 0] == i
            coords_i = sparse_indices[mask_i, 1]
            coords_num_i = len(coords_i)
            sparse_index[i] = torch.searchsorted(coords_i, index[i])
                
        return sparse_index[:, None, :]

    def forward(self, p, sparse_coords, res=64, bbox_size=(-0.5, 0.5)):
        '''
        Input:
            p : Tensor [B, Np(819_200), 3]
            sparse_coords: Tensor [Nx, 4] ([batch_number, x, y, z])

        Output:
            sparse_pc_feats: [Nx, self.out_channels]
        '''
        batch_size, T, D = p.size()
        # print('batch_size, T, D', batch_size, T, D)
        max_coord_num = 0
        for i in range(batch_size):
            max_coord_num = max(max_coord_num, (sparse_coords[..., 0] == i).sum().item() + 5)
        
        if D == self.in_channels:
            p, normals = p[..., :3], p[..., 3:]

        coord = (scale_tensor(p, inp_scale=bbox_size) * res)
        p = 2 * (coord - (coord.floor() + 0.5)) # dist to the centrios, [-1., 1.]
        index = self.coordinate2sparseindex(coord.long(), sparse_coords, res)

        if D == self.in_channels:
            p = torch.cat((p, normals), dim=-1)
        net = self.fc_pos(p)
        net = self.blocks[0](net)
        for block in self.blocks[1:]:
            pooled = self.pool_sparse_local(index, net, max_coord_num=max_coord_num)
            
            net = torch.cat([net, pooled], dim=2)
            net = block(net)
        c = self.fc_c(net)
        feats = self.generate_sparse_grid_features(index, c, max_coord_num=max_coord_num)
        feats = self.convert_to_sparse_feats(feats, sparse_coords)
        
        # torch.cuda.empty_cache()
        return feats