Spaces:

byteforcegokul
/

sample

Runtime error

App Files Files Community

byteforcegokul commited on Apr 2, 2025

Commit

0377fe9

verified ·

1 Parent(s): 115b0e5

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -108

app.py CHANGED Viewed

@@ -1,32 +1,18 @@
-#input values, note give None if disease_type is not mentioned.
-disease_type = input("Enter the Disease Type: ")
-symptoms = input("Enter the Symptoms (comma separated): ")
-age = float(input("Enter the Age: "))
-gender = input("Enter the Gender (Male/Female/Other): ")
-lab_results = float(input("Enter the Lab Results value: "))
-###################################################################
 import pandas as pd
-# Upload cured patients data from a CSV file
-# Assuming the CSV file is stored in the same directory as the Jupyter Notebook
-file_path = 'cured_patients.csv'  # Path to the CSV file
-# Load the data
 cured_patients = pd.read_csv(file_path)
-# Display the first few rows of the dataset to ensure it's loaded correctly
-from sklearn.preprocessing import LabelEncoder
-from sklearn.feature_extraction.text import CountVectorizer
-from sklearn.preprocessing import StandardScaler
 # Encode categorical data
 label_encoder = LabelEncoder()
 cured_patients['Gender'] = label_encoder.fit_transform(cured_patients['Gender'])
-cured_patients['Disease Type'] = label_encoder.fit_transform(cured_patients['Disease Type'])
 # Vectorize symptoms
 vectorizer = CountVectorizer()
@@ -36,97 +22,73 @@ symptoms_matrix = vectorizer.fit_transform(cured_patients['Symptoms'])
 scaler = StandardScaler()
 cured_patients[['Age', 'Lab Results']] = scaler.fit_transform(cured_patients[['Age', 'Lab Results']])
-from sklearn.metrics.pairwise import cosine_similarity
-import pandas as pd
-# Define a function to find similar patients
-def find_similar_patients(current_patient, cured_patients, n=3):
-    # Handle NaN values in the cured_patients dataset
-    cured_patients.fillna({
-        'Age': cured_patients['Age'].median(),
-        'Gender': 'Unknown',
-        'Lab Results': cured_patients['Lab Results'].median(),
-        'Disease Type': 'Unknown',
-    }, inplace=True)
     # Filter based on Disease Type if provided
-    if 'Disease Type' in current_patient and current_patient['Disease Type']:
-        filtered_cured_patients = cured_patients[cured_patients['Disease Type'] == current_patient['Disease Type']]
-        if filtered_cured_patients.empty:
-            print("No matching Disease Type found. Returning results from all patients.")
-            filtered_cured_patients = cured_patients
     else:
-        print("Disease Type not provided. Using all patients for similarity calculation.")
-        filtered_cured_patients = cured_patients
-    # Combine features (Symptoms vector + numerical features)
     features = pd.concat([
         pd.DataFrame(symptoms_matrix.toarray()),
-        filtered_cured_patients[['Age', 'Gender', 'Lab Results']]
-    ], axis=1)
-    # Check and fill any remaining NaN values in features
-    features = features.fillna(0)
-    # Convert current patient into the same feature format
-    current_features = vectorizer.transform([current_patient['Symptoms']]).toarray()
-    # Handle unseen labels for 'Gender'
     try:
-        current_gender = label_encoder.transform([current_patient['Gender']])[0]
     except ValueError:
-        print(f"Warning: Unseen label '{current_patient['Gender']}' for Gender. Assigning default value 0.")
-        current_gender = 0
-    # The StandardScaler was fitted only on 'Age' and 'Lab Results'
-    current_numerical = scaler.transform([[current_patient['Age'], current_patient['Lab Results']]])
-    # Add 'Gender' to the transformed data
-    current_numerical_df = pd.DataFrame(current_numerical, columns=['Age', 'Lab Results'])
-    current_numerical_df['Gender'] = current_gender
-    # Combine current patient features
-    current_combined = pd.concat([pd.DataFrame(current_features), current_numerical_df], axis=1)
-    # Check and fill any NaN values in current_combined
-    current_combined = current_combined.fillna(0)
-    # Compute similarity scores
-    similarity_scores = cosine_similarity(current_combined, features)
-    filtered_cured_patients['Similarity'] = similarity_scores[0]
-    # Calculate match percentage
-    filtered_cured_patients['Match Percentage'] = (filtered_cured_patients['Similarity'] * 100).round(2)
-    # Retrieve top n similar patients
-    top_matches = filtered_cured_patients.sort_values(by='Similarity', ascending=False).head(8)
-    return top_matches[['Patient ID', 'Remedial Measures', 'Similarity', 'Match Percentage']]
-# Example current patient details
-current_patient = {
-    'Disease Type': disease_type,
-    'Symptoms': symptoms,
-    'Age': age,
-    'Gender': gender,
-    'Lab Results': lab_results,
-}
-# Find similar patients
-similar_patients = find_similar_patients(current_patient, cured_patients)
-print(similar_patients)
-##################################################
-# sample input:
-# Enter the Disease Type: COVID-19
-# Enter the Symptoms (comma separated): Dry cough, Fatigue, Loss of taste
-# Enter the Age: 30
-# Enter the Gender (Male/Female/Other): Male
-# Enter the Lab Results value: 27

 import pandas as pd
+import gradio as gr
+from sklearn.preprocessing import LabelEncoder, StandardScaler
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+# Load cured patients data from a CSV file
+file_path = 'cured_patients.csv'  # Ensure this file is available in the working directory
 cured_patients = pd.read_csv(file_path)
 # Encode categorical data
 label_encoder = LabelEncoder()
 cured_patients['Gender'] = label_encoder.fit_transform(cured_patients['Gender'])
+if 'Disease Type' in cured_patients.columns:
+    cured_patients['Disease Type'] = label_encoder.fit_transform(cured_patients['Disease Type'])
 # Vectorize symptoms
 vectorizer = CountVectorizer()
 scaler = StandardScaler()
 cured_patients[['Age', 'Lab Results']] = scaler.fit_transform(cured_patients[['Age', 'Lab Results']])
+# Define function to find similar patients
+def find_similar_patients(disease_type, symptoms, age, gender, lab_results):
+    # Handle missing Disease Type
+    if not disease_type:
+        disease_type = None
+    # Prepare patient dictionary
+    current_patient = {
+        'Disease Type': disease_type,
+        'Symptoms': symptoms,
+        'Age': float(age),
+        'Gender': gender,
+        'Lab Results': float(lab_results),
+    }
     # Filter based on Disease Type if provided
+    if disease_type and 'Disease Type' in cured_patients.columns:
+        filtered_patients = cured_patients[cured_patients['Disease Type'] == disease_type]
+        if filtered_patients.empty:
+            filtered_patients = cured_patients  # If no match, use all
     else:
+        filtered_patients = cured_patients
+    # Combine features
     features = pd.concat([
         pd.DataFrame(symptoms_matrix.toarray()),
+        filtered_patients[['Age', 'Gender', 'Lab Results']]
+    ], axis=1).fillna(0)
+    # Convert input patient data into feature format
+    patient_symptoms = vectorizer.transform([current_patient['Symptoms']]).toarray()
     try:
+        patient_gender = label_encoder.transform([current_patient['Gender']])[0]
     except ValueError:
+        patient_gender = 0  # Default for unseen labels
+    patient_numerical = scaler.transform([[current_patient['Age'], current_patient['Lab Results']]])
+    patient_combined = pd.concat([pd.DataFrame(patient_symptoms), pd.DataFrame(patient_numerical)], axis=1)
+    patient_combined['Gender'] = patient_gender
+    patient_combined = patient_combined.fillna(0)
+    # Compute similarity
+    similarity_scores = cosine_similarity(patient_combined, features)
+    filtered_patients['Similarity'] = similarity_scores[0]
+    filtered_patients['Match Percentage'] = (filtered_patients['Similarity'] * 100).round(2)
+    # Retrieve top similar patients
+    top_matches = filtered_patients.sort_values(by='Similarity', ascending=False).head(5)
+    return top_matches[['Patient ID', 'Remedial Measures', 'Match Percentage']].to_dict(orient='records')
+# Gradio Interface
+def gradio_interface(disease_type, symptoms, age, gender, lab_results):
+    results = find_similar_patients(disease_type, symptoms, age, gender, lab_results)
+    return results
+demo = gr.Interface(
+    fn=gradio_interface,
+    inputs=[
+        gr.Textbox(label="Disease Type (leave blank for None)"),
+        gr.Textbox(label="Symptoms (comma separated)"),
+        gr.Number(label="Age"),
+        gr.Radio(["Male", "Female", "Other"], label="Gender"),
+        gr.Number(label="Lab Results Value")
+    ],
+    outputs=gr.JSON(label="Top Matching Patients"),
+    title="Patient Similarity Finder",
+    description="Finds similar cured patients based on disease, symptoms, and lab results."
+)
+demo.launch()