byteforcegokul commited on
Commit
bb05a2a
·
verified ·
1 Parent(s): 90a0520

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -0
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #input values, note give None if disease_type is not mentioned.
2
+
3
+ disease_type = input("Enter the Disease Type: ")
4
+ symptoms = input("Enter the Symptoms (comma separated): ")
5
+ age = float(input("Enter the Age: "))
6
+ gender = input("Enter the Gender (Male/Female/Other): ")
7
+ lab_results = float(input("Enter the Lab Results value: "))
8
+
9
+ ###################################################################
10
+
11
+ import pandas as pd
12
+
13
+ # Upload cured patients data from a CSV file
14
+ # Assuming the CSV file is stored in the same directory as the Jupyter Notebook
15
+ file_path = 'cured_patients.csv' # Path to the CSV file
16
+
17
+ # Load the data
18
+ cured_patients = pd.read_csv(file_path)
19
+
20
+ # Display the first few rows of the dataset to ensure it's loaded correctly
21
+
22
+ from sklearn.preprocessing import LabelEncoder
23
+ from sklearn.feature_extraction.text import CountVectorizer
24
+ from sklearn.preprocessing import StandardScaler
25
+
26
+ # Encode categorical data
27
+ label_encoder = LabelEncoder()
28
+ cured_patients['Gender'] = label_encoder.fit_transform(cured_patients['Gender'])
29
+ cured_patients['Disease Type'] = label_encoder.fit_transform(cured_patients['Disease Type'])
30
+
31
+ # Vectorize symptoms
32
+ vectorizer = CountVectorizer()
33
+ symptoms_matrix = vectorizer.fit_transform(cured_patients['Symptoms'])
34
+
35
+ # Scale numerical features
36
+ scaler = StandardScaler()
37
+ cured_patients[['Age', 'Lab Results']] = scaler.fit_transform(cured_patients[['Age', 'Lab Results']])
38
+
39
+
40
+ from sklearn.metrics.pairwise import cosine_similarity
41
+ import pandas as pd
42
+
43
+ # Define a function to find similar patients
44
+ def find_similar_patients(current_patient, cured_patients, n=3):
45
+ # Handle NaN values in the cured_patients dataset
46
+ cured_patients.fillna({
47
+ 'Age': cured_patients['Age'].median(),
48
+ 'Gender': 'Unknown',
49
+ 'Lab Results': cured_patients['Lab Results'].median(),
50
+ 'Disease Type': 'Unknown',
51
+ }, inplace=True)
52
+
53
+ # Filter based on Disease Type if provided
54
+ if 'Disease Type' in current_patient and current_patient['Disease Type']:
55
+ filtered_cured_patients = cured_patients[cured_patients['Disease Type'] == current_patient['Disease Type']]
56
+ if filtered_cured_patients.empty:
57
+ print("No matching Disease Type found. Returning results from all patients.")
58
+ filtered_cured_patients = cured_patients
59
+ else:
60
+ print("Disease Type not provided. Using all patients for similarity calculation.")
61
+ filtered_cured_patients = cured_patients
62
+
63
+ # Combine features (Symptoms vector + numerical features)
64
+ features = pd.concat([
65
+ pd.DataFrame(symptoms_matrix.toarray()),
66
+ filtered_cured_patients[['Age', 'Gender', 'Lab Results']]
67
+ ], axis=1)
68
+
69
+ # Check and fill any remaining NaN values in features
70
+ features = features.fillna(0)
71
+
72
+ # Convert current patient into the same feature format
73
+ current_features = vectorizer.transform([current_patient['Symptoms']]).toarray()
74
+
75
+ # Handle unseen labels for 'Gender'
76
+ try:
77
+ current_gender = label_encoder.transform([current_patient['Gender']])[0]
78
+ except ValueError:
79
+ print(f"Warning: Unseen label '{current_patient['Gender']}' for Gender. Assigning default value 0.")
80
+ current_gender = 0
81
+
82
+ # The StandardScaler was fitted only on 'Age' and 'Lab Results'
83
+ current_numerical = scaler.transform([[current_patient['Age'], current_patient['Lab Results']]])
84
+
85
+ # Add 'Gender' to the transformed data
86
+ current_numerical_df = pd.DataFrame(current_numerical, columns=['Age', 'Lab Results'])
87
+ current_numerical_df['Gender'] = current_gender
88
+
89
+ # Combine current patient features
90
+ current_combined = pd.concat([pd.DataFrame(current_features), current_numerical_df], axis=1)
91
+
92
+ # Check and fill any NaN values in current_combined
93
+ current_combined = current_combined.fillna(0)
94
+
95
+ # Compute similarity scores
96
+ similarity_scores = cosine_similarity(current_combined, features)
97
+ filtered_cured_patients['Similarity'] = similarity_scores[0]
98
+
99
+ # Calculate match percentage
100
+ filtered_cured_patients['Match Percentage'] = (filtered_cured_patients['Similarity'] * 100).round(2)
101
+
102
+ # Retrieve top n similar patients
103
+ top_matches = filtered_cured_patients.sort_values(by='Similarity', ascending=False).head(8)
104
+ return top_matches[['Patient ID', 'Remedial Measures', 'Similarity', 'Match Percentage']]
105
+
106
+ # Example current patient details
107
+ current_patient = {
108
+ 'Disease Type': disease_type,
109
+ 'Symptoms': symptoms,
110
+ 'Age': age,
111
+ 'Gender': gender,
112
+ 'Lab Results': lab_results,
113
+ }
114
+
115
+ # Find similar patients
116
+ similar_patients = find_similar_patients(current_patient, cured_patients)
117
+ print(similar_patients)
118
+
119
+
120
+
121
+
122
+
123
+
124
+ ##################################################
125
+ # sample input:
126
+
127
+ # Enter the Disease Type: COVID-19
128
+ # Enter the Symptoms (comma separated): Dry cough, Fatigue, Loss of taste
129
+ # Enter the Age: 30
130
+ # Enter the Gender (Male/Female/Other): Male
131
+ # Enter the Lab Results value: 27
132
+