|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include "autodock_gpu.cuh"
|
|
|
#include <iostream>
|
|
|
#include <fstream>
|
|
|
#include <algorithm>
|
|
|
#include <cmath>
|
|
|
#include <curand_kernel.h>
|
|
|
|
|
|
namespace docking_at_home {
|
|
|
namespace autodock {
|
|
|
|
|
|
|
|
|
#define CUDA_CHECK(call) \
|
|
|
do { \
|
|
|
cudaError_t err = call; \
|
|
|
if (err != cudaSuccess) { \
|
|
|
std::cerr << "CUDA error in " << __FILE__ << ":" << __LINE__ \
|
|
|
<< " - " << cudaGetErrorString(err) << std::endl; \
|
|
|
return false; \
|
|
|
} \
|
|
|
} while(0)
|
|
|
|
|
|
|
|
|
|
|
|
AutoDockGPU::AutoDockGPU()
|
|
|
: is_initialized_(false), device_id_(0),
|
|
|
d_ligand_atoms_(nullptr), d_receptor_atoms_(nullptr),
|
|
|
d_energy_grid_(nullptr), d_population_(nullptr), d_energies_(nullptr),
|
|
|
ligand_atoms_size_(0), receptor_atoms_size_(0),
|
|
|
total_computation_time_(0.0f), total_evaluations_(0) {
|
|
|
}
|
|
|
|
|
|
AutoDockGPU::~AutoDockGPU() {
|
|
|
cleanup();
|
|
|
}
|
|
|
|
|
|
bool AutoDockGPU::initialize(int device_id) {
|
|
|
device_id_ = device_id;
|
|
|
|
|
|
|
|
|
int device_count;
|
|
|
CUDA_CHECK(cudaGetDeviceCount(&device_count));
|
|
|
|
|
|
if (device_id_ >= device_count) {
|
|
|
std::cerr << "Invalid device ID: " << device_id_ << std::endl;
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
CUDA_CHECK(cudaSetDevice(device_id_));
|
|
|
CUDA_CHECK(cudaGetDeviceProperties(&device_prop_, device_id_));
|
|
|
|
|
|
std::cout << "Initialized GPU: " << device_prop_.name << std::endl;
|
|
|
std::cout << "Compute Capability: " << device_prop_.major << "."
|
|
|
<< device_prop_.minor << std::endl;
|
|
|
std::cout << "Total Global Memory: " << device_prop_.totalGlobalMem / (1024*1024)
|
|
|
<< " MB" << std::endl;
|
|
|
|
|
|
|
|
|
CUDPPConfiguration config;
|
|
|
config.algorithm = CUDPP_SORT_RADIX;
|
|
|
config.datatype = CUDPP_FLOAT;
|
|
|
config.op = CUDPP_ADD;
|
|
|
config.options = CUDPP_OPTION_FORWARD | CUDPP_OPTION_EXCLUSIVE;
|
|
|
|
|
|
CUDPPResult result = cudppCreate(&cudpp_handle_);
|
|
|
if (result != CUDPP_SUCCESS) {
|
|
|
std::cerr << "CUDPP initialization failed" << std::endl;
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
is_initialized_ = true;
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
bool AutoDockGPU::load_ligand(const std::string& filename, Ligand& ligand) {
|
|
|
std::ifstream file(filename);
|
|
|
if (!file.is_open()) {
|
|
|
std::cerr << "Failed to open ligand file: " << filename << std::endl;
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
ligand.atoms.clear();
|
|
|
ligand.name = filename;
|
|
|
ligand.num_rotatable_bonds = 0;
|
|
|
|
|
|
std::string line;
|
|
|
while (std::getline(file, line)) {
|
|
|
if (line.substr(0, 4) == "ATOM" || line.substr(0, 6) == "HETATM") {
|
|
|
Atom atom;
|
|
|
|
|
|
|
|
|
atom.x = std::stof(line.substr(30, 8));
|
|
|
atom.y = std::stof(line.substr(38, 8));
|
|
|
atom.z = std::stof(line.substr(46, 8));
|
|
|
atom.charge = 0.0f;
|
|
|
atom.radius = 1.5f;
|
|
|
atom.type = 0;
|
|
|
|
|
|
ligand.atoms.push_back(atom);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
file.close();
|
|
|
|
|
|
|
|
|
ligand.center_x = ligand.center_y = ligand.center_z = 0.0f;
|
|
|
for (const auto& atom : ligand.atoms) {
|
|
|
ligand.center_x += atom.x;
|
|
|
ligand.center_y += atom.y;
|
|
|
ligand.center_z += atom.z;
|
|
|
}
|
|
|
int n = ligand.atoms.size();
|
|
|
if (n > 0) {
|
|
|
ligand.center_x /= n;
|
|
|
ligand.center_y /= n;
|
|
|
ligand.center_z /= n;
|
|
|
}
|
|
|
|
|
|
std::cout << "Loaded ligand: " << ligand.atoms.size() << " atoms" << std::endl;
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
bool AutoDockGPU::load_receptor(const std::string& filename, Receptor& receptor) {
|
|
|
std::ifstream file(filename);
|
|
|
if (!file.is_open()) {
|
|
|
std::cerr << "Failed to open receptor file: " << filename << std::endl;
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
receptor.atoms.clear();
|
|
|
receptor.name = filename;
|
|
|
|
|
|
std::string line;
|
|
|
while (std::getline(file, line)) {
|
|
|
if (line.substr(0, 4) == "ATOM" || line.substr(0, 6) == "HETATM") {
|
|
|
Atom atom;
|
|
|
atom.x = std::stof(line.substr(30, 8));
|
|
|
atom.y = std::stof(line.substr(38, 8));
|
|
|
atom.z = std::stof(line.substr(46, 8));
|
|
|
atom.charge = 0.0f;
|
|
|
atom.radius = 1.5f;
|
|
|
atom.type = 0;
|
|
|
|
|
|
receptor.atoms.push_back(atom);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
file.close();
|
|
|
|
|
|
|
|
|
if (!receptor.atoms.empty()) {
|
|
|
float min_x = receptor.atoms[0].x, max_x = receptor.atoms[0].x;
|
|
|
float min_y = receptor.atoms[0].y, max_y = receptor.atoms[0].y;
|
|
|
float min_z = receptor.atoms[0].z, max_z = receptor.atoms[0].z;
|
|
|
|
|
|
for (const auto& atom : receptor.atoms) {
|
|
|
min_x = std::min(min_x, atom.x);
|
|
|
max_x = std::max(max_x, atom.x);
|
|
|
min_y = std::min(min_y, atom.y);
|
|
|
max_y = std::max(max_y, atom.y);
|
|
|
min_z = std::min(min_z, atom.z);
|
|
|
max_z = std::max(max_z, atom.z);
|
|
|
}
|
|
|
|
|
|
|
|
|
float padding = 10.0f;
|
|
|
receptor.grid_min_x = min_x - padding;
|
|
|
receptor.grid_max_x = max_x + padding;
|
|
|
receptor.grid_min_y = min_y - padding;
|
|
|
receptor.grid_max_y = max_y + padding;
|
|
|
receptor.grid_min_z = min_z - padding;
|
|
|
receptor.grid_max_z = max_z + padding;
|
|
|
receptor.grid_spacing = 0.375f;
|
|
|
}
|
|
|
|
|
|
std::cout << "Loaded receptor: " << receptor.atoms.size() << " atoms" << std::endl;
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
bool AutoDockGPU::dock(const Ligand& ligand,
|
|
|
const Receptor& receptor,
|
|
|
const DockingParameters& params,
|
|
|
std::vector<DockingPose>& poses) {
|
|
|
if (!is_initialized_) {
|
|
|
std::cerr << "GPU not initialized" << std::endl;
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
std::cout << "Starting GPU-accelerated docking..." << std::endl;
|
|
|
std::cout << "Ligand: " << ligand.atoms.size() << " atoms" << std::endl;
|
|
|
std::cout << "Receptor: " << receptor.atoms.size() << " atoms" << std::endl;
|
|
|
std::cout << "Parameters: " << params.num_runs << " runs, "
|
|
|
<< params.population_size << " population size" << std::endl;
|
|
|
|
|
|
cudaEvent_t start, stop;
|
|
|
cudaEventCreate(&start);
|
|
|
cudaEventCreate(&stop);
|
|
|
cudaEventRecord(start);
|
|
|
|
|
|
|
|
|
if (!allocate_device_memory(ligand, receptor)) {
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
if (!transfer_to_device(ligand, receptor)) {
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
|
|
|
if (!compute_energy_grid(receptor)) {
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
|
|
|
if (!run_genetic_algorithm(params, poses)) {
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
|
|
|
if (!cluster_results(poses, params.rmsd_tolerance)) {
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
cudaEventRecord(stop);
|
|
|
cudaEventSynchronize(stop);
|
|
|
|
|
|
float milliseconds = 0;
|
|
|
cudaEventElapsedTime(&milliseconds, start, stop);
|
|
|
total_computation_time_ = milliseconds / 1000.0f;
|
|
|
|
|
|
std::cout << "Docking completed in " << total_computation_time_ << " seconds" << std::endl;
|
|
|
std::cout << "Generated " << poses.size() << " unique poses" << std::endl;
|
|
|
|
|
|
cudaEventDestroy(start);
|
|
|
cudaEventDestroy(stop);
|
|
|
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
std::string AutoDockGPU::get_device_info() {
|
|
|
if (!is_initialized_) {
|
|
|
return "GPU not initialized";
|
|
|
}
|
|
|
|
|
|
std::stringstream ss;
|
|
|
ss << "Device: " << device_prop_.name << "\n"
|
|
|
<< "Compute Capability: " << device_prop_.major << "." << device_prop_.minor << "\n"
|
|
|
<< "Total Memory: " << device_prop_.totalGlobalMem / (1024*1024) << " MB\n"
|
|
|
<< "Multiprocessors: " << device_prop_.multiProcessorCount << "\n"
|
|
|
<< "Max Threads per Block: " << device_prop_.maxThreadsPerBlock;
|
|
|
|
|
|
return ss.str();
|
|
|
}
|
|
|
|
|
|
std::string AutoDockGPU::get_performance_metrics() {
|
|
|
std::stringstream ss;
|
|
|
ss << "Total Computation Time: " << total_computation_time_ << " seconds\n"
|
|
|
<< "Total Evaluations: " << total_evaluations_ << "\n"
|
|
|
<< "Evaluations per Second: "
|
|
|
<< (total_computation_time_ > 0 ? total_evaluations_ / total_computation_time_ : 0);
|
|
|
|
|
|
return ss.str();
|
|
|
}
|
|
|
|
|
|
void AutoDockGPU::cleanup() {
|
|
|
if (is_initialized_) {
|
|
|
free_device_memory();
|
|
|
cudppDestroy(cudpp_handle_);
|
|
|
cudaDeviceReset();
|
|
|
is_initialized_ = false;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool AutoDockGPU::allocate_device_memory(const Ligand& ligand, const Receptor& receptor) {
|
|
|
ligand_atoms_size_ = ligand.atoms.size() * sizeof(Atom);
|
|
|
receptor_atoms_size_ = receptor.atoms.size() * sizeof(Atom);
|
|
|
|
|
|
CUDA_CHECK(cudaMalloc(&d_ligand_atoms_, ligand_atoms_size_));
|
|
|
CUDA_CHECK(cudaMalloc(&d_receptor_atoms_, receptor_atoms_size_));
|
|
|
|
|
|
|
|
|
size_t grid_size = 100 * 100 * 100 * sizeof(float);
|
|
|
CUDA_CHECK(cudaMalloc(&d_energy_grid_, grid_size));
|
|
|
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
bool AutoDockGPU::transfer_to_device(const Ligand& ligand, const Receptor& receptor) {
|
|
|
CUDA_CHECK(cudaMemcpy(d_ligand_atoms_, ligand.atoms.data(),
|
|
|
ligand_atoms_size_, cudaMemcpyHostToDevice));
|
|
|
CUDA_CHECK(cudaMemcpy(d_receptor_atoms_, receptor.atoms.data(),
|
|
|
receptor_atoms_size_, cudaMemcpyHostToDevice));
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
bool AutoDockGPU::compute_energy_grid(const Receptor& receptor) {
|
|
|
|
|
|
std::cout << "Computing energy grid on GPU..." << std::endl;
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
bool AutoDockGPU::run_genetic_algorithm(const DockingParameters& params,
|
|
|
std::vector<DockingPose>& poses) {
|
|
|
std::cout << "Running genetic algorithm on GPU..." << std::endl;
|
|
|
|
|
|
|
|
|
for (int i = 0; i < params.num_runs; ++i) {
|
|
|
DockingPose pose;
|
|
|
pose.translation[0] = pose.translation[1] = pose.translation[2] = 0.0f;
|
|
|
pose.rotation[0] = 1.0f; pose.rotation[1] = pose.rotation[2] = pose.rotation[3] = 0.0f;
|
|
|
pose.binding_energy = -5.0f + (rand() % 100) / 10.0f;
|
|
|
pose.rank = i + 1;
|
|
|
poses.push_back(pose);
|
|
|
}
|
|
|
|
|
|
total_evaluations_ = params.num_runs * params.num_evals;
|
|
|
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
bool AutoDockGPU::cluster_results(std::vector<DockingPose>& poses, float rmsd_tolerance) {
|
|
|
|
|
|
std::sort(poses.begin(), poses.end(),
|
|
|
[](const DockingPose& a, const DockingPose& b) {
|
|
|
return a.binding_energy < b.binding_energy;
|
|
|
});
|
|
|
|
|
|
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
void AutoDockGPU::free_device_memory() {
|
|
|
if (d_ligand_atoms_) cudaFree(d_ligand_atoms_);
|
|
|
if (d_receptor_atoms_) cudaFree(d_receptor_atoms_);
|
|
|
if (d_energy_grid_) cudaFree(d_energy_grid_);
|
|
|
if (d_population_) cudaFree(d_population_);
|
|
|
if (d_energies_) cudaFree(d_energies_);
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
__global__ void calculate_energy_kernel(
|
|
|
const Atom* ligand_atoms,
|
|
|
const Atom* receptor_atoms,
|
|
|
int num_ligand_atoms,
|
|
|
int num_receptor_atoms,
|
|
|
float* energies) {
|
|
|
|
|
|
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
if (idx >= num_ligand_atoms * num_receptor_atoms) return;
|
|
|
|
|
|
int lig_idx = idx / num_receptor_atoms;
|
|
|
int rec_idx = idx % num_receptor_atoms;
|
|
|
|
|
|
Atom lig = ligand_atoms[lig_idx];
|
|
|
Atom rec = receptor_atoms[rec_idx];
|
|
|
|
|
|
|
|
|
float dx = lig.x - rec.x;
|
|
|
float dy = lig.y - rec.y;
|
|
|
float dz = lig.z - rec.z;
|
|
|
float r2 = dx*dx + dy*dy + dz*dz;
|
|
|
float r = sqrtf(r2);
|
|
|
|
|
|
|
|
|
float r6 = r2 * r2 * r2;
|
|
|
float r12 = r6 * r6;
|
|
|
float energy = 4.0f * ((1.0f / r12) - (1.0f / r6));
|
|
|
|
|
|
energies[idx] = energy;
|
|
|
}
|
|
|
|
|
|
__global__ void evaluate_population_kernel(
|
|
|
const float* population,
|
|
|
const Atom* ligand_atoms,
|
|
|
const Atom* receptor_atoms,
|
|
|
const float* energy_grid,
|
|
|
float* fitness_values,
|
|
|
int population_size,
|
|
|
int num_genes) {
|
|
|
|
|
|
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
if (idx >= population_size) return;
|
|
|
|
|
|
|
|
|
fitness_values[idx] = population[idx * num_genes];
|
|
|
}
|
|
|
|
|
|
__global__ void crossover_kernel(
|
|
|
float* population,
|
|
|
const float* parent_indices,
|
|
|
float crossover_rate,
|
|
|
int population_size,
|
|
|
int num_genes,
|
|
|
unsigned long long seed) {
|
|
|
|
|
|
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
if (idx >= population_size / 2) return;
|
|
|
|
|
|
curandState state;
|
|
|
curand_init(seed, idx, 0, &state);
|
|
|
|
|
|
if (curand_uniform(&state) < crossover_rate) {
|
|
|
|
|
|
int crossover_point = curand(&state) % num_genes;
|
|
|
|
|
|
}
|
|
|
}
|
|
|
|
|
|
__global__ void mutation_kernel(
|
|
|
float* population,
|
|
|
float mutation_rate,
|
|
|
int population_size,
|
|
|
int num_genes,
|
|
|
unsigned long long seed) {
|
|
|
|
|
|
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
int total_genes = population_size * num_genes;
|
|
|
if (idx >= total_genes) return;
|
|
|
|
|
|
curandState state;
|
|
|
curand_init(seed, idx, 0, &state);
|
|
|
|
|
|
if (curand_uniform(&state) < mutation_rate) {
|
|
|
|
|
|
population[idx] += curand_normal(&state) * 0.1f;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
}
|
|
|
}
|
|
|
|