import gradio as gr import pandas as pd import numpy as np def predict_risk(file): """ Process uploaded gene expression data and predict heart failure risk. Args: file: Uploaded CSV or XLSX file Returns: DataFrame with Sample IDs, Age, and Heart Failure Risk predictions """ try: # Read the uploaded file if file.name.endswith('.csv'): df = pd.read_csv(file.name) elif file.name.endswith('.xlsx'): df = pd.read_excel(file.name) else: return pd.DataFrame({"Error": ["Unsupported file format. Please upload .csv or .xlsx"]}) # Step A: Extract the first column as Sample_IDs # Handle both named and unnamed first columns first_col_name = df.columns[0] Sample_IDs = df.iloc[:, 0].values # Step B: Extract all other columns as Model_Features (the floats) Model_Features = df.iloc[:, 1:].values # --------------------------------------------------------- # REAL MODEL LOADING LOGIC (Add this part) # --------------------------------------------------------- import joblib import os # Load your model (ensure 'my_model.pkl' is in your Space's files) # If your model is named differently, change this filename! model_path = "my_model.pkl" if os.path.exists(model_path): model = joblib.load(model_path) # Run the prediction on the extracted features # This assumes your model outputs a list of lists like [[Age, Risk], [Age, Risk]] predictions = model.predict(Model_Features) # Split the results # If your model outputs a different shape, you might need to adjust index [:, 0] or [:, 1] Age = predictions[:, 0] Heart_Failure_Risk = predictions[:, 1] else: # Fallback if model file is missing (prevents crashing during setup) return pd.DataFrame({"Error": ["Model file not found. Please upload 'my_model.pkl'."]}) # --------------------------------------------------------- # Step 4: Combine results into a new DataFrame results_df = pd.DataFrame({ 'Sample_ID': Sample_IDs, 'Age': Age, 'Heart_Failure_Risk': np.round(Heart_Failure_Risk, 4) }) return results_df except Exception as e: # Return error message as DataFrame return pd.DataFrame({"Error": [f"An error occurred: {str(e)}"]}) # Create Gradio Interface with gr.Blocks(title="Bioinformatics AI Agent - Heart Failure Risk Prediction") as demo: gr.Markdown( """ # 🧬 Bioinformatics AI Agent ## Heart Failure Risk Prediction from Gene Expression Data Upload your gene expression data file (.csv or .xlsx) to predict heart failure risk. **Expected Format:** - First column: Sample IDs (can be named or unnamed) - Remaining columns: Gene expression values (numeric features) """ ) with gr.Row(): with gr.Column(): file_input = gr.File( label="Upload Gene Expression Data", file_types=[".csv", ".xlsx"], type="filepath" ) predict_btn = gr.Button("Predict Risk", variant="primary") with gr.Column(): output_dataframe = gr.Dataframe( label="Prediction Results", headers=["Sample_ID", "Age", "Heart_Failure_Risk"], datatype=["str", "number", "number"], row_count=10 ) gr.Markdown( """ ### 📊 Output Columns: - **Sample_ID**: Identifier from your input file - **Age**: Predicted age (20-90 years) - **Heart_Failure_Risk**: Risk score (0-1, where 1 is highest risk) --- *Note: Current predictions are placeholder values. Replace the prediction logic in `app.py` with your trained model.* """ ) # Connect the button to the prediction function predict_btn.click( fn=predict_risk, inputs=file_input, outputs=output_dataframe ) # Also allow prediction on file upload file_input.change( fn=predict_risk, inputs=file_input, outputs=output_dataframe ) # Launch the app if __name__ == "__main__": demo.launch()