Spaces:

ullahi
/

demo-gradio

Runtime error

App Files Files Community

ullahi commited on May 11, 2025

Commit

b82eba8

verified ·

1 Parent(s): f9bce78

updated

Browse files

Files changed (1) hide show

app.py +29 -21

app.py CHANGED Viewed

@@ -2,18 +2,27 @@ import gradio as gr
 import torch
 import numpy as np
 import matplotlib.pyplot as plt
-from enformer_pytorch import Enformer, load_pretrained_from_url
 from einops import rearrange
-# Load pretrained Enformer model (or use from_pretrained if you're using HF model)
-model = load_pretrained_from_url("https://dl.fbaipublicfiles.com/enformer/enformer_pytorch.pt")
 model.eval()
-# Helper: one-hot encode DNA (A, C, G, T)
-def one_hot_encode(sequence, length=196_608):
     mapping = {'A': 0, 'C': 1, 'G': 2, 'T': 3}
     one_hot = np.zeros((length, 4), dtype=np.float32)
-    sequence = sequence.upper().replace("N", "A")  # replace ambiguous bases
     for i, base in enumerate(sequence[:length]):
         if base in mapping:
             one_hot[i, mapping[base]] = 1.0
@@ -22,31 +31,30 @@ def one_hot_encode(sequence, length=196_608):
 # Prediction function
 def predict_expression(dna_sequence):
     encoded = one_hot_encode(dna_sequence)
-    input_tensor = torch.tensor(encoded).unsqueeze(0)  # (1, length, 4)
-    input_tensor = rearrange(input_tensor, 'b l c -> b c l')  # (1, 4, length)
     with torch.no_grad():
         output = model(input_tensor)
-        expression = output['human']  # shape: (1, 896, 5313)
-        avg_expr = expression[0].mean(dim=0).numpy()  # average across sequence positions
-    # Plot first 10 tissues (customize as needed)
-    plt.figure(figsize=(12, 4))
-    plt.bar(range(10), avg_expr[:10])
     plt.xticks(range(10), [f"Tissue {i}" for i in range(10)])
-    plt.ylabel("Predicted Expression")
-    plt.title("Gene Expression Prediction (avg across bins)")
     plt.tight_layout()
     return plt.gcf()
-# Gradio Interface
 demo = gr.Interface(
     fn=predict_expression,
-    inputs=gr.Textbox(lines=5, label="Paste DNA Sequence (A/C/G/T only, ~200kb)"),
-    outputs=gr.Plot(label="Predicted Gene Expression"),
-    title="Gene Expression Predictor (Enformer)",
-    description="Paste a DNA sequence to predict tissue-specific gene expression using a pretrained Enformer model."
 )
 demo.launch()

 import torch
 import numpy as np
 import matplotlib.pyplot as plt
+from enformer_pytorch import Enformer
 from einops import rearrange
+# Initialize Enformer with correct architecture (based on EleutherAI/enformer-191k)
+model = Enformer(
+    num_channels=1536,
+    num_classes=5313,
+    target_length=896,
+    depth=11,
+    heads=8
+)
 model.eval()
+# Optionally load pretrained weights if available locally or upload to HF Spaces manually
+# model.load_state_dict(torch.load("enformer-191k.pth"))  # optional for offline Spaces
+# Helper function to one-hot encode DNA
+def one_hot_encode(sequence, length=196608):
     mapping = {'A': 0, 'C': 1, 'G': 2, 'T': 3}
     one_hot = np.zeros((length, 4), dtype=np.float32)
+    sequence = sequence.upper().replace("N", "A")
     for i, base in enumerate(sequence[:length]):
         if base in mapping:
             one_hot[i, mapping[base]] = 1.0
 # Prediction function
 def predict_expression(dna_sequence):
     encoded = one_hot_encode(dna_sequence)
+    input_tensor = torch.tensor(encoded).unsqueeze(0)  # shape: (1, length, 4)
+    input_tensor = rearrange(input_tensor, 'b l c -> b c l')  # shape: (1, 4, length)
     with torch.no_grad():
         output = model(input_tensor)
+        avg_expression = output[0].mean(dim=0).numpy()  # (5313,)
+    # Plot first 10 expression predictions
+    plt.figure(figsize=(10, 4))
+    plt.bar(range(10), avg_expression[:10])
     plt.xticks(range(10), [f"Tissue {i}" for i in range(10)])
+    plt.title("Predicted Gene Expression")
+    plt.ylabel("Signal")
     plt.tight_layout()
     return plt.gcf()
+# Gradio app
 demo = gr.Interface(
     fn=predict_expression,
+    inputs=gr.Textbox(lines=6, label="Paste DNA Sequence (200k bp)"),
+    outputs=gr.Plot(label="Predicted Expression Tracks (first 10 tissues)"),
+    title="Gene Expression Prediction with Enformer",
+    description="Paste a 200kb DNA sequence and see predicted expression levels using Enformer."
 )
 demo.launch()