File size: 4,594 Bytes
4205633
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import gradio as gr
import pandas as pd
import numpy as np

def predict_risk(file):
    """
    Process uploaded gene expression data and predict heart failure risk.
    
    Args:
        file: Uploaded CSV or XLSX file
        
    Returns:
        DataFrame with Sample IDs, Age, and Heart Failure Risk predictions
    """
    try:
        # Read the uploaded file
        if file.name.endswith('.csv'):
            df = pd.read_csv(file.name)
        elif file.name.endswith('.xlsx'):
            df = pd.read_excel(file.name)
        else:
            return pd.DataFrame({"Error": ["Unsupported file format. Please upload .csv or .xlsx"]})
        
        # Step A: Extract the first column as Sample_IDs
        # Handle both named and unnamed first columns
        first_col_name = df.columns[0]
        Sample_IDs = df.iloc[:, 0].values
        
        # Step B: Extract all other columns as Model_Features (the floats)
        Model_Features = df.iloc[:, 1:].values
        
        # ---------------------------------------------------------
        # REAL MODEL LOADING LOGIC (Add this part)
        # ---------------------------------------------------------
        import joblib
        import os

        # Load your model (ensure 'my_model.pkl' is in your Space's files)
        # If your model is named differently, change this filename!
        model_path = "my_model.pkl" 
        
        if os.path.exists(model_path):
            model = joblib.load(model_path)
            
            # Run the prediction on the extracted features
            # This assumes your model outputs a list of lists like [[Age, Risk], [Age, Risk]]
            predictions = model.predict(Model_Features)
            
            # Split the results
            # If your model outputs a different shape, you might need to adjust index [:, 0] or [:, 1]
            Age = predictions[:, 0]
            Heart_Failure_Risk = predictions[:, 1]
            
        else:
            # Fallback if model file is missing (prevents crashing during setup)
            return pd.DataFrame({"Error": ["Model file not found. Please upload 'my_model.pkl'."]})
        
        # ---------------------------------------------------------
        
        # Step 4: Combine results into a new DataFrame
        results_df = pd.DataFrame({
            'Sample_ID': Sample_IDs,
            'Age': Age,
            'Heart_Failure_Risk': np.round(Heart_Failure_Risk, 4)
        })
        
        return results_df
        
    except Exception as e:
        # Return error message as DataFrame
        return pd.DataFrame({"Error": [f"An error occurred: {str(e)}"]})


# Create Gradio Interface
with gr.Blocks(title="Bioinformatics AI Agent - Heart Failure Risk Prediction") as demo:
    gr.Markdown(
        """
        # 🧬 Bioinformatics AI Agent
        ## Heart Failure Risk Prediction from Gene Expression Data
        
        Upload your gene expression data file (.csv or .xlsx) to predict heart failure risk.
        
        **Expected Format:**
        - First column: Sample IDs (can be named or unnamed)
        - Remaining columns: Gene expression values (numeric features)
        """
    )
    
    with gr.Row():
        with gr.Column():
            file_input = gr.File(
                label="Upload Gene Expression Data",
                file_types=[".csv", ".xlsx"],
                type="filepath"
            )
            predict_btn = gr.Button("Predict Risk", variant="primary")
        
        with gr.Column():
            output_dataframe = gr.Dataframe(
                label="Prediction Results",
                headers=["Sample_ID", "Age", "Heart_Failure_Risk"],
                datatype=["str", "number", "number"],
                row_count=10
            )
    
    gr.Markdown(
        """
        ### 📊 Output Columns:
        - **Sample_ID**: Identifier from your input file
        - **Age**: Predicted age (20-90 years)
        - **Heart_Failure_Risk**: Risk score (0-1, where 1 is highest risk)
        
        ---
        *Note: Current predictions are placeholder values. Replace the prediction logic in `app.py` with your trained model.*
        """
    )
    
    # Connect the button to the prediction function
    predict_btn.click(
        fn=predict_risk,
        inputs=file_input,
        outputs=output_dataframe
    )
    
    # Also allow prediction on file upload
    file_input.change(
        fn=predict_risk,
        inputs=file_input,
        outputs=output_dataframe
    )

# Launch the app
if __name__ == "__main__":
    demo.launch()