nileshhanotia
/

mutation-pathogenicity-predictor

+import gradio as gr
+import torch
+from huggingface_hub import hf_hub_download
+from encoder import MutationEncoder
+from model import MutationPredictorCNN
+# Load model
+MODEL_PATH = hf_hub_download(
+    repo_id="nileshhanotia/mutation-pathogenicity-predictor",
+    filename="pytorch_model.pth"
+)
+device = torch.device("cpu")
+model = MutationPredictorCNN().to(device)
+checkpoint = torch.load(MODEL_PATH, map_location=device)
+model.load_state_dict(checkpoint["model_state_dict"])
+model.eval()
+encoder = MutationEncoder()
+def generate_explainability(ref_seq, mut_seq, importance, encoded_tensor):
+    """
+    Generate explainability visualization using the encoded tensor
+    to match exactly what the model sees
+    """
+    # Extract mutation position from the encoding (positions 990:1089)
+    diff_mask = encoded_tensor[990:1089]
+    mutation_pos = torch.argmax(diff_mask).item()
+    # Check if mutation was detected
+    if diff_mask[mutation_pos].item() == 0:
+        return "No mutation detected in encoding"
+    # Clean sequences
+    ref_seq = ref_seq.strip().upper()
+    mut_seq = mut_seq.strip().upper()
+    # Create pointer aligned to mutation position
+    pointer = " " * mutation_pos + "^"
+    # Extract bases at mutation position
+    if mutation_pos < len(ref_seq) and mutation_pos < len(mut_seq):
+        ref_base = ref_seq[mutation_pos]
+        mut_base = mut_seq[mutation_pos]
+        substitution = f"{ref_base}>{mut_base}"
+    else:
+        substitution = "Unknown"
+    # Format explainability output
+    explainability_text = (
+        "Mutated sequence:\n"
+        + mut_seq + "\n"
+        + pointer + "\n\n"
+        + f"Mutation position: {mutation_pos}\n"
+        + f"Substitution: {substitution}\n"
+        + f"Importance score: {importance:.4f}"
+    )
+    return explainability_text
+def predict(ref_seq, mut_seq):
+    """
+    Predict pathogenicity and generate explainability
+    """
+    # Clean input sequences
+    ref_seq = ref_seq.strip().upper()
+    mut_seq = mut_seq.strip().upper()
+    # Validate sequences
+    if not ref_seq or not mut_seq:
+        return "Error", 0.0, 0.0, "Please provide both reference and mutated sequences"
+    if len(ref_seq) != len(mut_seq):
+        return "Error", 0.0, 0.0, f"Sequences must be same length (ref: {len(ref_seq)}, mut: {len(mut_seq)})"
+    try:
+        # Encode mutation
+        encoded = encoder.encode_mutation(ref_seq, mut_seq)
+        # Add batch dimension
+        tensor = encoded.unsqueeze(0).to(device)
+        # Get model predictions
+        with torch.no_grad():
+            logit, importance = model(tensor)
+            probability = logit.item()  # Model already outputs sigmoid
+            importance_val = importance.item()
+        # Determine label
+        label = "Pathogenic" if probability >= 0.5 else "Benign"
+        # Generate explainability using the encoded tensor
+        explain = generate_explainability(
+            ref_seq,
+            mut_seq,
+            importance_val,
+            encoded
+        )
+        return label, probability, importance_val, explain
+    except Exception as e:
+        error_msg = f"Error during prediction: {str(e)}"
+        return "Error", 0.0, 0.0, error_msg
+# UI
+with gr.Blocks(title="DNA Mutation Pathogenicity Predictor") as demo:
+    gr.Markdown("""
+    # 🧬 Explainable Mutation Pathogenicity Predictor
+    Predict whether a DNA mutation is pathogenic or benign with explainability
+    showing the mutation position and importance.
+    """)
+    with gr.Row():
+        with gr.Column():
+            ref_input = gr.Textbox(
+                label="Reference sequence (99bp)",
+                placeholder="Enter reference DNA sequence (A, T, G, C)",
+                lines=3
+            )
+            mut_input = gr.Textbox(
+                label="Mutated sequence (99bp)",
+                placeholder="Enter mutated DNA sequence (A, T, G, C)",
+                lines=3
+            )
+            with gr.Row():
+                clear_btn = gr.Button("Clear")
+                submit = gr.Button("Predict", variant="primary")
+        with gr.Column():
+            prediction = gr.Textbox(
+                label="Prediction",
+                interactive=False
+            )
+            probability = gr.Number(
+                label="Pathogenic Probability",
+                interactive=False
+            )
+            importance = gr.Number(
+                label="Mutation Importance Score",
+                interactive=False
+            )
+            # Explainability visualization
+            explainability = gr.Textbox(
+                label="Explainability Visualization",
+                lines=8,
+                interactive=False
+            )
+    # Examples
+    gr.Markdown("### Examples")
+    gr.Examples(
+        examples=[
+            [
+                "AAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
+                "AAAAAAAAAATAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+            ],
+            [
+                "ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA",
+                "ATCGATCGATGGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA"
+            ]
+        ],
+        inputs=[ref_input, mut_input],
+        label="Click an example to load"
+    )
+    # Button actions
+    submit.click(
+        fn=predict,
+        inputs=[ref_input, mut_input],
+        outputs=[prediction, probability, importance, explainability]
+    )
+    clear_btn.click(
+        fn=lambda: ("", "", "", 0.0, 0.0, ""),
+        outputs=[ref_input, mut_input, prediction, probability, importance, explainability]
+    )
+if __name__ == "__main__":
+    demo.launch()