import pandas as pd import gradio as gr from sklearn.preprocessing import LabelEncoder, StandardScaler from sklearn.feature_extraction.text import CountVectorizer from sklearn.metrics.pairwise import cosine_similarity # Load cured patients data from a CSV file file_path = 'cured_patients.csv' # Ensure this file is available in the working directory cured_patients = pd.read_csv(file_path) # Encode categorical data label_encoder = LabelEncoder() cured_patients['Gender'] = label_encoder.fit_transform(cured_patients['Gender']) if 'Disease Type' in cured_patients.columns: cured_patients['Disease Type'] = label_encoder.fit_transform(cured_patients['Disease Type']) # Vectorize symptoms vectorizer = CountVectorizer() symptoms_matrix = vectorizer.fit_transform(cured_patients['Symptoms']) # Scale numerical features scaler = StandardScaler() cured_patients[['Age', 'Lab Results']] = scaler.fit_transform(cured_patients[['Age', 'Lab Results']]) # Define function to find similar patients def find_similar_patients(disease_type, symptoms, age, gender, lab_results): # Handle missing Disease Type if not disease_type: disease_type = None # Prepare patient dictionary current_patient = { 'Disease Type': disease_type, 'Symptoms': symptoms, 'Age': float(age), 'Gender': gender, 'Lab Results': float(lab_results), } # Filter based on Disease Type if provided if disease_type and 'Disease Type' in cured_patients.columns: filtered_patients = cured_patients[cured_patients['Disease Type'] == disease_type] if filtered_patients.empty: filtered_patients = cured_patients # If no match, use all else: filtered_patients = cured_patients # Combine features features = pd.concat([ pd.DataFrame(symptoms_matrix.toarray()), filtered_patients[['Age', 'Gender', 'Lab Results']] ], axis=1).fillna(0) # Convert input patient data into feature format patient_symptoms = vectorizer.transform([current_patient['Symptoms']]).toarray() try: patient_gender = label_encoder.transform([current_patient['Gender']])[0] except ValueError: patient_gender = 0 # Default for unseen labels patient_numerical = scaler.transform([[current_patient['Age'], current_patient['Lab Results']]]) patient_combined = pd.concat([pd.DataFrame(patient_symptoms), pd.DataFrame(patient_numerical)], axis=1) patient_combined['Gender'] = patient_gender patient_combined = patient_combined.fillna(0) # Compute similarity similarity_scores = cosine_similarity(patient_combined, features) filtered_patients['Similarity'] = similarity_scores[0] filtered_patients['Match Percentage'] = (filtered_patients['Similarity'] * 100).round(2) # Retrieve top similar patients top_matches = filtered_patients.sort_values(by='Similarity', ascending=False).head(5) return top_matches[['Patient ID', 'Remedial Measures', 'Match Percentage']].to_dict(orient='records') # Gradio Interface def gradio_interface(disease_type, symptoms, age, gender, lab_results): results = find_similar_patients(disease_type, symptoms, age, gender, lab_results) return results demo = gr.Interface( fn=gradio_interface, inputs=[ gr.Textbox(label="Disease Type (leave blank for None)"), gr.Textbox(label="Symptoms (comma separated)"), gr.Number(label="Age"), gr.Radio(["Male", "Female", "Other"], label="Gender"), gr.Number(label="Lab Results Value") ], outputs=gr.JSON(label="Top Matching Patients"), title="Patient Similarity Finder", description="Finds similar cured patients based on disease, symptoms, and lab results." ) demo.launch()