import pandas as pd
import gradio as gr
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load cured patients data from a CSV file
file_path = 'cured_patients.csv'  # Ensure this file is available in the working directory
cured_patients = pd.read_csv(file_path)

# Encode categorical data
label_encoder = LabelEncoder()
cured_patients['Gender'] = label_encoder.fit_transform(cured_patients['Gender'])
if 'Disease Type' in cured_patients.columns:
    cured_patients['Disease Type'] = label_encoder.fit_transform(cured_patients['Disease Type'])

# Vectorize symptoms
vectorizer = CountVectorizer()
symptoms_matrix = vectorizer.fit_transform(cured_patients['Symptoms'])

# Scale numerical features
scaler = StandardScaler()
cured_patients[['Age', 'Lab Results']] = scaler.fit_transform(cured_patients[['Age', 'Lab Results']])

# Define function to find similar patients
def find_similar_patients(disease_type, symptoms, age, gender, lab_results):
    # Handle missing Disease Type
    if not disease_type:
        disease_type = None

    # Prepare patient dictionary
    current_patient = {
        'Disease Type': disease_type,
        'Symptoms': symptoms,
        'Age': float(age),
        'Gender': gender,
        'Lab Results': float(lab_results),
    }
    
    # Filter based on Disease Type if provided
    if disease_type and 'Disease Type' in cured_patients.columns:
        filtered_patients = cured_patients[cured_patients['Disease Type'] == disease_type]
        if filtered_patients.empty:
            filtered_patients = cured_patients  # If no match, use all
    else:
        filtered_patients = cured_patients

    # Combine features
    features = pd.concat([
        pd.DataFrame(symptoms_matrix.toarray()),
        filtered_patients[['Age', 'Gender', 'Lab Results']]
    ], axis=1).fillna(0)

    # Convert input patient data into feature format
    patient_symptoms = vectorizer.transform([current_patient['Symptoms']]).toarray()
    try:
        patient_gender = label_encoder.transform([current_patient['Gender']])[0]
    except ValueError:
        patient_gender = 0  # Default for unseen labels

    patient_numerical = scaler.transform([[current_patient['Age'], current_patient['Lab Results']]])
    patient_combined = pd.concat([pd.DataFrame(patient_symptoms), pd.DataFrame(patient_numerical)], axis=1)
    patient_combined['Gender'] = patient_gender
    patient_combined = patient_combined.fillna(0)

    # Compute similarity
    similarity_scores = cosine_similarity(patient_combined, features)
    filtered_patients['Similarity'] = similarity_scores[0]
    filtered_patients['Match Percentage'] = (filtered_patients['Similarity'] * 100).round(2)

    # Retrieve top similar patients
    top_matches = filtered_patients.sort_values(by='Similarity', ascending=False).head(5)
    return top_matches[['Patient ID', 'Remedial Measures', 'Match Percentage']].to_dict(orient='records')

# Gradio Interface
def gradio_interface(disease_type, symptoms, age, gender, lab_results):
    results = find_similar_patients(disease_type, symptoms, age, gender, lab_results)
    return results

demo = gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.Textbox(label="Disease Type (leave blank for None)"),
        gr.Textbox(label="Symptoms (comma separated)"),
        gr.Number(label="Age"),
        gr.Radio(["Male", "Female", "Other"], label="Gender"),
        gr.Number(label="Lab Results Value")
    ],
    outputs=gr.JSON(label="Top Matching Patients"),
    title="Patient Similarity Finder",
    description="Finds similar cured patients based on disease, symptoms, and lab results."
)

demo.launch()