Spaces:
Sleeping
Sleeping
File size: 4,594 Bytes
4205633 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import gradio as gr
import pandas as pd
import numpy as np
def predict_risk(file):
"""
Process uploaded gene expression data and predict heart failure risk.
Args:
file: Uploaded CSV or XLSX file
Returns:
DataFrame with Sample IDs, Age, and Heart Failure Risk predictions
"""
try:
# Read the uploaded file
if file.name.endswith('.csv'):
df = pd.read_csv(file.name)
elif file.name.endswith('.xlsx'):
df = pd.read_excel(file.name)
else:
return pd.DataFrame({"Error": ["Unsupported file format. Please upload .csv or .xlsx"]})
# Step A: Extract the first column as Sample_IDs
# Handle both named and unnamed first columns
first_col_name = df.columns[0]
Sample_IDs = df.iloc[:, 0].values
# Step B: Extract all other columns as Model_Features (the floats)
Model_Features = df.iloc[:, 1:].values
# ---------------------------------------------------------
# REAL MODEL LOADING LOGIC (Add this part)
# ---------------------------------------------------------
import joblib
import os
# Load your model (ensure 'my_model.pkl' is in your Space's files)
# If your model is named differently, change this filename!
model_path = "my_model.pkl"
if os.path.exists(model_path):
model = joblib.load(model_path)
# Run the prediction on the extracted features
# This assumes your model outputs a list of lists like [[Age, Risk], [Age, Risk]]
predictions = model.predict(Model_Features)
# Split the results
# If your model outputs a different shape, you might need to adjust index [:, 0] or [:, 1]
Age = predictions[:, 0]
Heart_Failure_Risk = predictions[:, 1]
else:
# Fallback if model file is missing (prevents crashing during setup)
return pd.DataFrame({"Error": ["Model file not found. Please upload 'my_model.pkl'."]})
# ---------------------------------------------------------
# Step 4: Combine results into a new DataFrame
results_df = pd.DataFrame({
'Sample_ID': Sample_IDs,
'Age': Age,
'Heart_Failure_Risk': np.round(Heart_Failure_Risk, 4)
})
return results_df
except Exception as e:
# Return error message as DataFrame
return pd.DataFrame({"Error": [f"An error occurred: {str(e)}"]})
# Create Gradio Interface
with gr.Blocks(title="Bioinformatics AI Agent - Heart Failure Risk Prediction") as demo:
gr.Markdown(
"""
# 🧬 Bioinformatics AI Agent
## Heart Failure Risk Prediction from Gene Expression Data
Upload your gene expression data file (.csv or .xlsx) to predict heart failure risk.
**Expected Format:**
- First column: Sample IDs (can be named or unnamed)
- Remaining columns: Gene expression values (numeric features)
"""
)
with gr.Row():
with gr.Column():
file_input = gr.File(
label="Upload Gene Expression Data",
file_types=[".csv", ".xlsx"],
type="filepath"
)
predict_btn = gr.Button("Predict Risk", variant="primary")
with gr.Column():
output_dataframe = gr.Dataframe(
label="Prediction Results",
headers=["Sample_ID", "Age", "Heart_Failure_Risk"],
datatype=["str", "number", "number"],
row_count=10
)
gr.Markdown(
"""
### 📊 Output Columns:
- **Sample_ID**: Identifier from your input file
- **Age**: Predicted age (20-90 years)
- **Heart_Failure_Risk**: Risk score (0-1, where 1 is highest risk)
---
*Note: Current predictions are placeholder values. Replace the prediction logic in `app.py` with your trained model.*
"""
)
# Connect the button to the prediction function
predict_btn.click(
fn=predict_risk,
inputs=file_input,
outputs=output_dataframe
)
# Also allow prediction on file upload
file_input.change(
fn=predict_risk,
inputs=file_input,
outputs=output_dataframe
)
# Launch the app
if __name__ == "__main__":
demo.launch()
|