byteforcegokul commited on
Commit
0377fe9
·
verified ·
1 Parent(s): 115b0e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -108
app.py CHANGED
@@ -1,32 +1,18 @@
1
- #input values, note give None if disease_type is not mentioned.
2
-
3
- disease_type = input("Enter the Disease Type: ")
4
- symptoms = input("Enter the Symptoms (comma separated): ")
5
- age = float(input("Enter the Age: "))
6
- gender = input("Enter the Gender (Male/Female/Other): ")
7
- lab_results = float(input("Enter the Lab Results value: "))
8
-
9
- ###################################################################
10
-
11
  import pandas as pd
 
 
 
 
12
 
13
- # Upload cured patients data from a CSV file
14
- # Assuming the CSV file is stored in the same directory as the Jupyter Notebook
15
- file_path = 'cured_patients.csv' # Path to the CSV file
16
-
17
- # Load the data
18
  cured_patients = pd.read_csv(file_path)
19
 
20
- # Display the first few rows of the dataset to ensure it's loaded correctly
21
-
22
- from sklearn.preprocessing import LabelEncoder
23
- from sklearn.feature_extraction.text import CountVectorizer
24
- from sklearn.preprocessing import StandardScaler
25
-
26
  # Encode categorical data
27
  label_encoder = LabelEncoder()
28
  cured_patients['Gender'] = label_encoder.fit_transform(cured_patients['Gender'])
29
- cured_patients['Disease Type'] = label_encoder.fit_transform(cured_patients['Disease Type'])
 
30
 
31
  # Vectorize symptoms
32
  vectorizer = CountVectorizer()
@@ -36,97 +22,73 @@ symptoms_matrix = vectorizer.fit_transform(cured_patients['Symptoms'])
36
  scaler = StandardScaler()
37
  cured_patients[['Age', 'Lab Results']] = scaler.fit_transform(cured_patients[['Age', 'Lab Results']])
38
 
39
-
40
- from sklearn.metrics.pairwise import cosine_similarity
41
- import pandas as pd
42
-
43
- # Define a function to find similar patients
44
- def find_similar_patients(current_patient, cured_patients, n=3):
45
- # Handle NaN values in the cured_patients dataset
46
- cured_patients.fillna({
47
- 'Age': cured_patients['Age'].median(),
48
- 'Gender': 'Unknown',
49
- 'Lab Results': cured_patients['Lab Results'].median(),
50
- 'Disease Type': 'Unknown',
51
- }, inplace=True)
52
-
 
53
  # Filter based on Disease Type if provided
54
- if 'Disease Type' in current_patient and current_patient['Disease Type']:
55
- filtered_cured_patients = cured_patients[cured_patients['Disease Type'] == current_patient['Disease Type']]
56
- if filtered_cured_patients.empty:
57
- print("No matching Disease Type found. Returning results from all patients.")
58
- filtered_cured_patients = cured_patients
59
  else:
60
- print("Disease Type not provided. Using all patients for similarity calculation.")
61
- filtered_cured_patients = cured_patients
62
 
63
- # Combine features (Symptoms vector + numerical features)
64
  features = pd.concat([
65
  pd.DataFrame(symptoms_matrix.toarray()),
66
- filtered_cured_patients[['Age', 'Gender', 'Lab Results']]
67
- ], axis=1)
68
-
69
- # Check and fill any remaining NaN values in features
70
- features = features.fillna(0)
71
 
72
- # Convert current patient into the same feature format
73
- current_features = vectorizer.transform([current_patient['Symptoms']]).toarray()
74
-
75
- # Handle unseen labels for 'Gender'
76
  try:
77
- current_gender = label_encoder.transform([current_patient['Gender']])[0]
78
  except ValueError:
79
- print(f"Warning: Unseen label '{current_patient['Gender']}' for Gender. Assigning default value 0.")
80
- current_gender = 0
81
-
82
- # The StandardScaler was fitted only on 'Age' and 'Lab Results'
83
- current_numerical = scaler.transform([[current_patient['Age'], current_patient['Lab Results']]])
84
-
85
- # Add 'Gender' to the transformed data
86
- current_numerical_df = pd.DataFrame(current_numerical, columns=['Age', 'Lab Results'])
87
- current_numerical_df['Gender'] = current_gender
88
-
89
- # Combine current patient features
90
- current_combined = pd.concat([pd.DataFrame(current_features), current_numerical_df], axis=1)
91
-
92
- # Check and fill any NaN values in current_combined
93
- current_combined = current_combined.fillna(0)
94
-
95
- # Compute similarity scores
96
- similarity_scores = cosine_similarity(current_combined, features)
97
- filtered_cured_patients['Similarity'] = similarity_scores[0]
98
-
99
- # Calculate match percentage
100
- filtered_cured_patients['Match Percentage'] = (filtered_cured_patients['Similarity'] * 100).round(2)
101
-
102
- # Retrieve top n similar patients
103
- top_matches = filtered_cured_patients.sort_values(by='Similarity', ascending=False).head(8)
104
- return top_matches[['Patient ID', 'Remedial Measures', 'Similarity', 'Match Percentage']]
105
-
106
- # Example current patient details
107
- current_patient = {
108
- 'Disease Type': disease_type,
109
- 'Symptoms': symptoms,
110
- 'Age': age,
111
- 'Gender': gender,
112
- 'Lab Results': lab_results,
113
- }
114
-
115
- # Find similar patients
116
- similar_patients = find_similar_patients(current_patient, cured_patients)
117
- print(similar_patients)
118
-
119
-
120
-
121
-
122
-
123
-
124
- ##################################################
125
- # sample input:
126
-
127
- # Enter the Disease Type: COVID-19
128
- # Enter the Symptoms (comma separated): Dry cough, Fatigue, Loss of taste
129
- # Enter the Age: 30
130
- # Enter the Gender (Male/Female/Other): Male
131
- # Enter the Lab Results value: 27
132
-
 
 
 
 
 
 
 
 
 
 
 
1
  import pandas as pd
2
+ import gradio as gr
3
+ from sklearn.preprocessing import LabelEncoder, StandardScaler
4
+ from sklearn.feature_extraction.text import CountVectorizer
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
 
7
+ # Load cured patients data from a CSV file
8
+ file_path = 'cured_patients.csv' # Ensure this file is available in the working directory
 
 
 
9
  cured_patients = pd.read_csv(file_path)
10
 
 
 
 
 
 
 
11
  # Encode categorical data
12
  label_encoder = LabelEncoder()
13
  cured_patients['Gender'] = label_encoder.fit_transform(cured_patients['Gender'])
14
+ if 'Disease Type' in cured_patients.columns:
15
+ cured_patients['Disease Type'] = label_encoder.fit_transform(cured_patients['Disease Type'])
16
 
17
  # Vectorize symptoms
18
  vectorizer = CountVectorizer()
 
22
  scaler = StandardScaler()
23
  cured_patients[['Age', 'Lab Results']] = scaler.fit_transform(cured_patients[['Age', 'Lab Results']])
24
 
25
+ # Define function to find similar patients
26
+ def find_similar_patients(disease_type, symptoms, age, gender, lab_results):
27
+ # Handle missing Disease Type
28
+ if not disease_type:
29
+ disease_type = None
30
+
31
+ # Prepare patient dictionary
32
+ current_patient = {
33
+ 'Disease Type': disease_type,
34
+ 'Symptoms': symptoms,
35
+ 'Age': float(age),
36
+ 'Gender': gender,
37
+ 'Lab Results': float(lab_results),
38
+ }
39
+
40
  # Filter based on Disease Type if provided
41
+ if disease_type and 'Disease Type' in cured_patients.columns:
42
+ filtered_patients = cured_patients[cured_patients['Disease Type'] == disease_type]
43
+ if filtered_patients.empty:
44
+ filtered_patients = cured_patients # If no match, use all
 
45
  else:
46
+ filtered_patients = cured_patients
 
47
 
48
+ # Combine features
49
  features = pd.concat([
50
  pd.DataFrame(symptoms_matrix.toarray()),
51
+ filtered_patients[['Age', 'Gender', 'Lab Results']]
52
+ ], axis=1).fillna(0)
 
 
 
53
 
54
+ # Convert input patient data into feature format
55
+ patient_symptoms = vectorizer.transform([current_patient['Symptoms']]).toarray()
 
 
56
  try:
57
+ patient_gender = label_encoder.transform([current_patient['Gender']])[0]
58
  except ValueError:
59
+ patient_gender = 0 # Default for unseen labels
60
+
61
+ patient_numerical = scaler.transform([[current_patient['Age'], current_patient['Lab Results']]])
62
+ patient_combined = pd.concat([pd.DataFrame(patient_symptoms), pd.DataFrame(patient_numerical)], axis=1)
63
+ patient_combined['Gender'] = patient_gender
64
+ patient_combined = patient_combined.fillna(0)
65
+
66
+ # Compute similarity
67
+ similarity_scores = cosine_similarity(patient_combined, features)
68
+ filtered_patients['Similarity'] = similarity_scores[0]
69
+ filtered_patients['Match Percentage'] = (filtered_patients['Similarity'] * 100).round(2)
70
+
71
+ # Retrieve top similar patients
72
+ top_matches = filtered_patients.sort_values(by='Similarity', ascending=False).head(5)
73
+ return top_matches[['Patient ID', 'Remedial Measures', 'Match Percentage']].to_dict(orient='records')
74
+
75
+ # Gradio Interface
76
+ def gradio_interface(disease_type, symptoms, age, gender, lab_results):
77
+ results = find_similar_patients(disease_type, symptoms, age, gender, lab_results)
78
+ return results
79
+
80
+ demo = gr.Interface(
81
+ fn=gradio_interface,
82
+ inputs=[
83
+ gr.Textbox(label="Disease Type (leave blank for None)"),
84
+ gr.Textbox(label="Symptoms (comma separated)"),
85
+ gr.Number(label="Age"),
86
+ gr.Radio(["Male", "Female", "Other"], label="Gender"),
87
+ gr.Number(label="Lab Results Value")
88
+ ],
89
+ outputs=gr.JSON(label="Top Matching Patients"),
90
+ title="Patient Similarity Finder",
91
+ description="Finds similar cured patients based on disease, symptoms, and lab results."
92
+ )
93
+
94
+ demo.launch()