Initial upload: model weights, config, metrics, README, model_def.py, inference.py
Browse files- README.md +37 -0
- config.json +15 -0
- inference.py +18 -0
- metrics.json +14 -0
- model_def.py +28 -0
- pytorch_model.bin +3 -0
README.md
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Multi-output DNA Structure Regressor (PyTorch)
|
| 2 |
+
|
| 3 |
+
This repository contains a PyTorch MLP that predicts multiple structural targets from engineered DNA features.
|
| 4 |
+
|
| 5 |
+
## Model
|
| 6 |
+
- **Architecture:** 3-layer MLP (512→256→128, dropout 0.3)
|
| 7 |
+
- **Inputs:** 109658 features
|
| 8 |
+
- **Outputs:** 6 targets → mfe_energy, num_pairs, stem_len_mean, num_stems, num_hairpins, num_internal_loops
|
| 9 |
+
- **Loss:** MSE
|
| 10 |
+
- **Optimizer:** Adam (lr=0.0001)
|
| 11 |
+
- **Epochs:** 15
|
| 12 |
+
|
| 13 |
+
## Metrics (test)
|
| 14 |
+
- Overall MSE: `15022.6787`
|
| 15 |
+
- Overall R²: `-34.0313`
|
| 16 |
+
- Training time (s): `131.85`
|
| 17 |
+
- Prediction time (s): `0.2694`
|
| 18 |
+
|
| 19 |
+
### MAE per target
|
| 20 |
+
```json
|
| 21 |
+
{
|
| 22 |
+
"mfe_energy": 139.4054718017578,
|
| 23 |
+
"num_pairs": 116.53337097167969,
|
| 24 |
+
"stem_len_mean": 2.4054114818573,
|
| 25 |
+
"num_stems": 69.17422485351562,
|
| 26 |
+
"num_hairpins": 14.115099906921387,
|
| 27 |
+
"num_internal_loops": 94.97564697265625
|
| 28 |
+
}
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
## Usage
|
| 32 |
+
```bash
|
| 33 |
+
pip install torch numpy
|
| 34 |
+
python inference.py
|
| 35 |
+
```
|
| 36 |
+
|
| 37 |
+
Ensure to apply any preprocessing (e.g., scaling, SVD) used during training.
|
config.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_type": "ImprovedNet",
|
| 3 |
+
"input_size": 109658,
|
| 4 |
+
"output_size": 6,
|
| 5 |
+
"hidden": [
|
| 6 |
+
512,
|
| 7 |
+
256,
|
| 8 |
+
128
|
| 9 |
+
],
|
| 10 |
+
"dropout": 0.3,
|
| 11 |
+
"loss": "MSELoss",
|
| 12 |
+
"optimizer": "Adam",
|
| 13 |
+
"learning_rate": 0.0001,
|
| 14 |
+
"num_epochs": 15
|
| 15 |
+
}
|
inference.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch, json, numpy as np
|
| 2 |
+
from model_def import load_model
|
| 3 |
+
|
| 4 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 5 |
+
|
| 6 |
+
with open("config.json") as f:
|
| 7 |
+
cfg = json.load(f)
|
| 8 |
+
|
| 9 |
+
model = load_model("pytorch_model.bin", "config.json", device=DEVICE)
|
| 10 |
+
|
| 11 |
+
x = np.random.randn(1, cfg["input_size"]).astype("float32")
|
| 12 |
+
x_t = torch.from_numpy(x).to(DEVICE)
|
| 13 |
+
|
| 14 |
+
with torch.no_grad():
|
| 15 |
+
y_hat = model(x_t).cpu().numpy()
|
| 16 |
+
|
| 17 |
+
print("Pred shape:", y_hat.shape)
|
| 18 |
+
print("Pred sample:", y_hat[0][: min(5, y_hat.shape[1])])
|
metrics.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"overall_mse": 15022.6787109375,
|
| 3 |
+
"overall_r2": -34.03133010864258,
|
| 4 |
+
"training_time_seconds": 131.85455417633057,
|
| 5 |
+
"prediction_time_seconds": 0.2694435119628906,
|
| 6 |
+
"mae_per_target": {
|
| 7 |
+
"mfe_energy": 139.4054718017578,
|
| 8 |
+
"num_pairs": 116.53337097167969,
|
| 9 |
+
"stem_len_mean": 2.4054114818573,
|
| 10 |
+
"num_stems": 69.17422485351562,
|
| 11 |
+
"num_hairpins": 14.115099906921387,
|
| 12 |
+
"num_internal_loops": 94.97564697265625
|
| 13 |
+
}
|
| 14 |
+
}
|
model_def.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
|
| 5 |
+
class ImprovedNet(nn.Module):
|
| 6 |
+
def __init__(self, input_features, output_features, dropout=0.30):
|
| 7 |
+
super().__init__()
|
| 8 |
+
self.layer1 = nn.Linear(input_features, 512)
|
| 9 |
+
self.layer2 = nn.Linear(512, 256)
|
| 10 |
+
self.layer3 = nn.Linear(256, 128)
|
| 11 |
+
self.output_layer = nn.Linear(128, output_features)
|
| 12 |
+
self.dropout = nn.Dropout(p=dropout)
|
| 13 |
+
|
| 14 |
+
def forward(self, x):
|
| 15 |
+
x = F.relu(self.layer1(x)); x = self.dropout(x)
|
| 16 |
+
x = F.relu(self.layer2(x)); x = self.dropout(x)
|
| 17 |
+
x = F.relu(self.layer3(x))
|
| 18 |
+
return self.output_layer(x)
|
| 19 |
+
|
| 20 |
+
def load_model(weights_path, config_path="config.json", device="cpu"):
|
| 21 |
+
import json
|
| 22 |
+
with open(config_path) as f:
|
| 23 |
+
cfg = json.load(f)
|
| 24 |
+
model = ImprovedNet(cfg["input_size"], cfg["output_size"], dropout=cfg.get("dropout", 0.30))
|
| 25 |
+
model.load_state_dict(torch.load(weights_path, map_location=device))
|
| 26 |
+
model.to(device)
|
| 27 |
+
model.eval()
|
| 28 |
+
return model
|
pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7636d3543892429806ac381d235df25751e679ca5c956b0b9ba8e3d588b086e
|
| 3 |
+
size 225245281
|