Spaces:

byteforcegokul
/

sample

Runtime error

File size: 3,765 Bytes

bb05a2a
0377fe9
 
 
 
bb05a2a
0377fe9
 
bb05a2a
 
 
 
 
0377fe9
 
bb05a2a
 
 
 
 
 
 
 
 
0377fe9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bb05a2a
0377fe9
 
 
 
bb05a2a
0377fe9
bb05a2a
0377fe9
bb05a2a
 
0377fe9
 
bb05a2a
0377fe9
 
bb05a2a
0377fe9
bb05a2a
0377fe9

import pandas as pd
import gradio as gr
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load cured patients data from a CSV file
file_path = 'cured_patients.csv'  # Ensure this file is available in the working directory
cured_patients = pd.read_csv(file_path)

# Encode categorical data
label_encoder = LabelEncoder()
cured_patients['Gender'] = label_encoder.fit_transform(cured_patients['Gender'])
if 'Disease Type' in cured_patients.columns:
    cured_patients['Disease Type'] = label_encoder.fit_transform(cured_patients['Disease Type'])

# Vectorize symptoms
vectorizer = CountVectorizer()
symptoms_matrix = vectorizer.fit_transform(cured_patients['Symptoms'])

# Scale numerical features
scaler = StandardScaler()
cured_patients[['Age', 'Lab Results']] = scaler.fit_transform(cured_patients[['Age', 'Lab Results']])

# Define function to find similar patients
def find_similar_patients(disease_type, symptoms, age, gender, lab_results):
    # Handle missing Disease Type
    if not disease_type:
        disease_type = None

    # Prepare patient dictionary
    current_patient = {
        'Disease Type': disease_type,
        'Symptoms': symptoms,
        'Age': float(age),
        'Gender': gender,
        'Lab Results': float(lab_results),
    }
    
    # Filter based on Disease Type if provided
    if disease_type and 'Disease Type' in cured_patients.columns:
        filtered_patients = cured_patients[cured_patients['Disease Type'] == disease_type]
        if filtered_patients.empty:
            filtered_patients = cured_patients  # If no match, use all
    else:
        filtered_patients = cured_patients

    # Combine features
    features = pd.concat([
        pd.DataFrame(symptoms_matrix.toarray()),
        filtered_patients[['Age', 'Gender', 'Lab Results']]
    ], axis=1).fillna(0)

    # Convert input patient data into feature format
    patient_symptoms = vectorizer.transform([current_patient['Symptoms']]).toarray()
    try:
        patient_gender = label_encoder.transform([current_patient['Gender']])[0]
    except ValueError:
        patient_gender = 0  # Default for unseen labels

    patient_numerical = scaler.transform([[current_patient['Age'], current_patient['Lab Results']]])
    patient_combined = pd.concat([pd.DataFrame(patient_symptoms), pd.DataFrame(patient_numerical)], axis=1)
    patient_combined['Gender'] = patient_gender
    patient_combined = patient_combined.fillna(0)

    # Compute similarity
    similarity_scores = cosine_similarity(patient_combined, features)
    filtered_patients['Similarity'] = similarity_scores[0]
    filtered_patients['Match Percentage'] = (filtered_patients['Similarity'] * 100).round(2)

    # Retrieve top similar patients
    top_matches = filtered_patients.sort_values(by='Similarity', ascending=False).head(5)
    return top_matches[['Patient ID', 'Remedial Measures', 'Match Percentage']].to_dict(orient='records')

# Gradio Interface
def gradio_interface(disease_type, symptoms, age, gender, lab_results):
    results = find_similar_patients(disease_type, symptoms, age, gender, lab_results)
    return results

demo = gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.Textbox(label="Disease Type (leave blank for None)"),
        gr.Textbox(label="Symptoms (comma separated)"),
        gr.Number(label="Age"),
        gr.Radio(["Male", "Female", "Other"], label="Gender"),
        gr.Number(label="Lab Results Value")
    ],
    outputs=gr.JSON(label="Top Matching Patients"),
    title="Patient Similarity Finder",
    description="Finds similar cured patients based on disease, symptoms, and lab results."
)

demo.launch()