HackAdamHealth's picture
Upload 4 files
4205633 verified
import gradio as gr
import pandas as pd
import numpy as np
def predict_risk(file):
"""
Process uploaded gene expression data and predict heart failure risk.
Args:
file: Uploaded CSV or XLSX file
Returns:
DataFrame with Sample IDs, Age, and Heart Failure Risk predictions
"""
try:
# Read the uploaded file
if file.name.endswith('.csv'):
df = pd.read_csv(file.name)
elif file.name.endswith('.xlsx'):
df = pd.read_excel(file.name)
else:
return pd.DataFrame({"Error": ["Unsupported file format. Please upload .csv or .xlsx"]})
# Step A: Extract the first column as Sample_IDs
# Handle both named and unnamed first columns
first_col_name = df.columns[0]
Sample_IDs = df.iloc[:, 0].values
# Step B: Extract all other columns as Model_Features (the floats)
Model_Features = df.iloc[:, 1:].values
# ---------------------------------------------------------
# REAL MODEL LOADING LOGIC (Add this part)
# ---------------------------------------------------------
import joblib
import os
# Load your model (ensure 'my_model.pkl' is in your Space's files)
# If your model is named differently, change this filename!
model_path = "my_model.pkl"
if os.path.exists(model_path):
model = joblib.load(model_path)
# Run the prediction on the extracted features
# This assumes your model outputs a list of lists like [[Age, Risk], [Age, Risk]]
predictions = model.predict(Model_Features)
# Split the results
# If your model outputs a different shape, you might need to adjust index [:, 0] or [:, 1]
Age = predictions[:, 0]
Heart_Failure_Risk = predictions[:, 1]
else:
# Fallback if model file is missing (prevents crashing during setup)
return pd.DataFrame({"Error": ["Model file not found. Please upload 'my_model.pkl'."]})
# ---------------------------------------------------------
# Step 4: Combine results into a new DataFrame
results_df = pd.DataFrame({
'Sample_ID': Sample_IDs,
'Age': Age,
'Heart_Failure_Risk': np.round(Heart_Failure_Risk, 4)
})
return results_df
except Exception as e:
# Return error message as DataFrame
return pd.DataFrame({"Error": [f"An error occurred: {str(e)}"]})
# Create Gradio Interface
with gr.Blocks(title="Bioinformatics AI Agent - Heart Failure Risk Prediction") as demo:
gr.Markdown(
"""
# 🧬 Bioinformatics AI Agent
## Heart Failure Risk Prediction from Gene Expression Data
Upload your gene expression data file (.csv or .xlsx) to predict heart failure risk.
**Expected Format:**
- First column: Sample IDs (can be named or unnamed)
- Remaining columns: Gene expression values (numeric features)
"""
)
with gr.Row():
with gr.Column():
file_input = gr.File(
label="Upload Gene Expression Data",
file_types=[".csv", ".xlsx"],
type="filepath"
)
predict_btn = gr.Button("Predict Risk", variant="primary")
with gr.Column():
output_dataframe = gr.Dataframe(
label="Prediction Results",
headers=["Sample_ID", "Age", "Heart_Failure_Risk"],
datatype=["str", "number", "number"],
row_count=10
)
gr.Markdown(
"""
### 📊 Output Columns:
- **Sample_ID**: Identifier from your input file
- **Age**: Predicted age (20-90 years)
- **Heart_Failure_Risk**: Risk score (0-1, where 1 is highest risk)
---
*Note: Current predictions are placeholder values. Replace the prediction logic in `app.py` with your trained model.*
"""
)
# Connect the button to the prediction function
predict_btn.click(
fn=predict_risk,
inputs=file_input,
outputs=output_dataframe
)
# Also allow prediction on file upload
file_input.change(
fn=predict_risk,
inputs=file_input,
outputs=output_dataframe
)
# Launch the app
if __name__ == "__main__":
demo.launch()