Spaces:

ankitt6174
/

dna-mutation-prediction

Sleeping

App Files Files Community

ankitt6174 commited on Sep 24, 2025

Commit

e883774

1 Parent(s): 6bd2415

Making more effificient

Browse files

Files changed (2) hide show

app.py +15 -3
predict.py +68 -58

app.py CHANGED Viewed

@@ -1,8 +1,19 @@
 from fastapi import FastAPI
 from pydantic import BaseModel
-from predict import predict
-app = FastAPI()
 class InputData(BaseModel):
     dnasequence: str
@@ -104,7 +115,8 @@ def home():
 @app.post("/predict")
 def prediction(data: InputData):
-    result = predict(
         seq = data.dnasequence,
         pos=101,
         ref = data.reference,

 from fastapi import FastAPI
 from pydantic import BaseModel
+from predict import PredictionModel
+from contextlib import asynccontextmanager
+ml_models = {}
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Load the ML model
+    ml_models["dna_mutation_predictor"] = PredictionModel("./model/model.pth")
+    yield
+    # Clean up the ML models and release the resources
+    ml_models.clear()
+app = FastAPI(lifespan=lifespan)
 class InputData(BaseModel):
     dnasequence: str
 @app.post("/predict")
 def prediction(data: InputData):
+    predictor = ml_models["dna_mutation_predictor"]
+    result = predictor.predict(
         seq = data.dnasequence,
         pos=101,
         ref = data.reference,

predict.py CHANGED Viewed

@@ -5,22 +5,6 @@ import torch
 import torch.nn as nn
 import math
-print("="*30)
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-print("Using device:", device)
-print("="*30)
-checkpoint = torch.load("./model/model.pth", map_location=device, weights_only=False)
-feature_scaler = checkpoint['feature_scaler']
-hyperparameters = checkpoint['hyperparameters']
-vocab = checkpoint['vocab']
-mutation_type_encoder = checkpoint['encoders']['mutation_type']
-chromosome_encoder = checkpoint['encoders']['chromosome']
-ref_encoder = checkpoint['encoders']['ref']
-alt_encoder = checkpoint['encoders']['alt']
 chrom_lengths = {
     'chr1': 248956422,
     'chr2': 242193529,
@@ -46,7 +30,7 @@ chrom_lengths = {
     'chr22': 50818468,
 }
-def get_feature_data(seq, pos, ref, alt, chrom, genomic_pos, mutation_type):
     def gc_content(seq):
         seq = seq.upper()
         gc = seq.count('G') + seq.count('C')
@@ -81,15 +65,15 @@ def get_feature_data(seq, pos, ref, alt, chrom, genomic_pos, mutation_type):
         return genomic_pos / chrom_length
     def get_dummies(mutation_type, chrom, ref, alt):
-        mutation_type_df = pd.DataFrame([[mutation_type]], columns=mutation_type_encoder.feature_names_in_)
-        chromosome_df = pd.DataFrame([[chrom]], columns=chromosome_encoder.feature_names_in_)
-        ref_df = pd.DataFrame([[ref]], columns=ref_encoder.feature_names_in_)
-        alt_df = pd.DataFrame([[alt]], columns=alt_encoder.feature_names_in_)
-        mutation_type_encoded = mutation_type_encoder.transform(mutation_type_df).toarray()[0]
-        chromosome_encoded = chromosome_encoder.transform(chromosome_df).toarray()[0]
-        ref_encoded = ref_encoder.transform(ref_df).toarray()[0]
-        alt_encoded = alt_encoder.transform(alt_df).toarray()[0]
         return np.concatenate([mutation_type_encoded, chromosome_encoded, ref_encoded, alt_encoded])
@@ -125,11 +109,11 @@ def get_feature_data(seq, pos, ref, alt, chrom, genomic_pos, mutation_type):
     return result
-def get_codon(seq, k=hyperparameters['k-mers']):
     return [seq[i:i+k] for i in range(len(seq) - k + 1)]
-def get_tensor(text):
-    return [vocab[codons.lower()] for codons in get_codon(text)]
 class PositionalEncoding(nn.Module):
     def __init__(self, embed_dim, max_len=5000):
@@ -215,33 +199,59 @@ class CNNTransformerHybrid(nn.Module):
         output = self.fc_layers(combined_features)
         return output
-model = CNNTransformerHybrid(
-    vocab_size = len(vocab),
-    embed_dim = hyperparameters['embed_dim'],
-    num_classes = 2,
-    max_len = hyperparameters['max_len'],
-    dropout = hyperparameters['dropout'],
-    num_heads = hyperparameters['num_heads'],
-    num_transformer_layers = hyperparameters['num_transformer_layers'],
-    ff_dim = hyperparameters['ff_dim'],
-    cnn_out_channels = hyperparameters['cnn_out_channels'],
-    num_extra_features = 39,
-)
-model.load_state_dict(checkpoint['model_state_dict'])
-model.to(device)
-model.eval()
-def predict(seq, pos, ref, alt, chrom, genomic_pos, mutation_type):
-    features = get_feature_data(seq, pos, ref, alt, chrom, genomic_pos, mutation_type)['Array']
-    scaled_features = feature_scaler.transform(features.reshape(1, -1))
-    with torch.no_grad():
-        input_tensor = torch.tensor(get_tensor(seq)).unsqueeze(0).to(device)
-        features_tensor = torch.tensor(scaled_features, dtype=torch.float32).to(device)
-        output = model(input_tensor, features_tensor)
-    return {
-        'Prediction': torch.softmax(output, dim=1).argmax(dim=1).item(),
-        'Confidence': torch.softmax(output, dim=1)[0]
-    }

 import torch.nn as nn
 import math
 chrom_lengths = {
     'chr1': 248956422,
     'chr2': 242193529,
     'chr22': 50818468,
 }
+def get_feature_data(seq, pos, ref, alt, chrom, genomic_pos, mutation_type, encoders):
     def gc_content(seq):
         seq = seq.upper()
         gc = seq.count('G') + seq.count('C')
         return genomic_pos / chrom_length
     def get_dummies(mutation_type, chrom, ref, alt):
+        mutation_type_df = pd.DataFrame([[mutation_type]], columns=encoders['mutation_type'].feature_names_in_)
+        chromosome_df = pd.DataFrame([[chrom]], columns=encoders['chromosome'].feature_names_in_)
+        ref_df = pd.DataFrame([[ref]], columns=encoders['ref'].feature_names_in_)
+        alt_df = pd.DataFrame([[alt]], columns=encoders['alt'].feature_names_in_)
+        mutation_type_encoded = encoders['mutation_type'].transform(mutation_type_df).toarray()[0]
+        chromosome_encoded = encoders['chromosome'].transform(chromosome_df).toarray()[0]
+        ref_encoded = encoders['ref'].transform(ref_df).toarray()[0]
+        alt_encoded = encoders['alt'].transform(alt_df).toarray()[0]
         return np.concatenate([mutation_type_encoded, chromosome_encoded, ref_encoded, alt_encoded])
     return result
+def get_codon(seq, k):
     return [seq[i:i+k] for i in range(len(seq) - k + 1)]
+def get_tensor(text, vocab, k):
+    return [vocab[codons.lower()] for codons in get_codon(text, k)]
 class PositionalEncoding(nn.Module):
     def __init__(self, embed_dim, max_len=5000):
         output = self.fc_layers(combined_features)
         return output
+class PredictionModel:
+    def __init__(self, model_path: str):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        print("="*30)
+        print(f"Loading model on device: {self.device}")
+        checkpoint = torch.load(model_path, map_location=self.device, weights_only=False)
+        self.feature_scaler = checkpoint['feature_scaler']
+        self.hyperparameters = checkpoint['hyperparameters']
+        self.vocab = checkpoint['vocab']
+        self.encoders = checkpoint['encoders']
+        self.model = CNNTransformerHybrid(
+            vocab_size=len(self.vocab),
+            embed_dim=self.hyperparameters['embed_dim'],
+            num_classes=2,
+            max_len=self.hyperparameters['max_len'],
+            dropout=self.hyperparameters['dropout'],
+            num_heads=self.hyperparameters['num_heads'],
+            num_transformer_layers=self.hyperparameters['num_transformer_layers'],
+            ff_dim=self.hyperparameters['ff_dim'],
+            cnn_out_channels=self.hyperparameters['cnn_out_channels'],
+            num_extra_features=39,
+        )
+        self.model.load_state_dict(checkpoint['model_state_dict'])
+        self.model.to(self.device)
+        self.model.eval()
+        print("Model loaded successfully.")
+        print("="*30)
+    def predict(self, seq, pos, ref, alt, chrom, genomic_pos, mutation_type):
+        features = get_feature_data(
+            seq, pos, ref, alt, chrom, genomic_pos, mutation_type, self.encoders
+        )['Array']
+        scaled_features = self.feature_scaler.transform(features.reshape(1, -1))
+        with torch.no_grad():
+            input_tensor = torch.tensor(
+                get_tensor(seq, self.vocab, self.hyperparameters['k-mers'])
+            ).unsqueeze(0).to(self.device)
+            features_tensor = torch.tensor(
+                scaled_features, dtype=torch.float32
+            ).to(self.device)
+            output = self.model(input_tensor, features_tensor)
+        confidence = torch.softmax(output, dim=1)[0]
+        prediction = confidence.argmax().item()
+        return {
+            'Prediction': prediction,
+            'Confidence': confidence
+        }