QueenS5Ella commited on
Commit
e6a0ae8
·
verified ·
1 Parent(s): a5e0485

Delete medic_bot.py

Browse files
Files changed (1) hide show
  1. medic_bot.py +0 -359
medic_bot.py DELETED
@@ -1,359 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """Medic_bot.ipynb
3
-
4
- Automatically generated by Colab.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/#fileId=https%3A//huggingface.co/spaces/QueenS5Ella/Royalty/blob/main/Medic_bot.ipynb
8
- """
9
-
10
- # IMPORT THE NECESSARY LIBARIES 1
11
- #Import Python libraries: Numpy and Pandas
12
- import pandas as pd
13
- from sklearn.feature_extraction.text import TfidfVectorizer
14
- from sklearn.metrics.pairwise import cosine_similarity
15
- import openai
16
- import faiss
17
- import numpy as np
18
-
19
- #import libraries &modules for data visualization
20
- from pandas.plotting import scatter_matrix
21
- from matplotlib import pyplot
22
-
23
- #import scikit-learn module for algoruthm/model: Linear Regression
24
- from sklearn.neighbors import KNeighborsRegressor
25
-
26
- #import scikit learn module to split the dataset into train/test sub-datasets
27
- from sklearn.model_selection import train_test_split
28
-
29
- #Import scikit-learn module for K-fold cross validation - algorithm/model evluation & vallidation
30
- from sklearn.model_selection import KFold
31
- from sklearn.model_selection import cross_val_score
32
-
33
- #Import sckit-learn module for classification report
34
- from sklearn.metrics import classification_report
35
-
36
- from sklearn.preprocessing import LabelEncoder
37
- from sklearn.preprocessing import OrdinalEncoder
38
-
39
- # IMPORTATION OF NECESSARY LIBRARIES 2
40
- import os # for handling data
41
- import re # for text preprocessing
42
-
43
- # For Natural Language Processing tasks
44
- import nltk
45
- from sklearn.model_selection import train_test_split
46
-
47
- nltk.download("punkt")
48
- nltk.download("stopwords")
49
-
50
- # Optional: for vectorization and building of the models
51
- from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
52
-
53
- #IMPORTATION OF THE DIFFERENT MODELS FOR THE CHATBOT
54
- from sklearn.linear_model import LogisticRegression
55
- from sklearn.ensemble import RandomForestRegressor
56
- import xgboost as xgb
57
- from sklearn.linear_model import Ridge
58
- from sklearn.neural_network import MLPRegressor
59
-
60
- import scipy
61
- print(scipy.__version__)
62
-
63
- import gradio as gr
64
-
65
- # 🔑 Replace with your real OpenAI API key
66
- client = OpenAI(api_key = "sk-...") # <- Replace this with your actual API key
67
-
68
- # 📄 Load dataset
69
- d1 = pd.read_csv("ai-medical-chatbot.csv")
70
- d1.dropna(subset=["Description", "Doctor"], inplace=True)
71
-
72
- vector1 = TfidfVectorizer()
73
- # Keep the sparse matrix — don't convert to dense
74
- qvs = vector1.fit_transform(d1["Description"]) # No .toarray()
75
-
76
- d1.head()
77
-
78
- def find_best_match(user_input):
79
- user_vec = vector1.transform([user_input]) # Still a sparse matrix
80
- similarities = cosine_similarity(user_vec, qvs)
81
- best_idx = np.argmax(similarities[0])
82
- best_score = float(similarities[0][best_idx])
83
- return d1.iloc[best_idx]["Description"], d1.iloc[best_idx]["Doctor"], best_score
84
-
85
- # 🔍 Vectorize questions
86
- #vectorizer = TfidfVectorizer()
87
- #question_vectors = vectorizer.fit_transform(df["Question"]).toarray()
88
-
89
- # 🔎 Find the most similar FAQ match
90
- #def find_best_match(user_input):
91
- #user_vec = vectorizer.transform([user_input]).toarray()
92
- #similarities = cosine_similarity(user_vec, question_vectors)
93
- #best_idx = np.argmax(similarities[0])
94
- # best_score = float(similarities[0][best_idx])
95
- # return df.iloc[best_idx]["Question"], df.iloc[best_idx]["Answer"], best_score
96
-
97
- # 🤖 Query OpenAI if no good FAQ match
98
- def query_gpt(user_input):
99
- try:
100
- response = client.chat.completions.create(
101
- model="gpt-4", # or use "gpt-3.5-turbo"
102
- messages=[
103
- {"role": "system", "content": "You are a pediatric pulmonology expert."},
104
- {"role": "user", "content": user_input},
105
- {"role": "assistant", "content": "Hello"}
106
- ]
107
- )
108
- return response.choices[0].message["content"]
109
- except Exception as e:
110
- return f"⚠️ GPT Error: {str(e)}"
111
-
112
- # 💬 Chatbot response logic
113
- def chatbot_response(user_input):
114
- if not user_input.strip():
115
- return "Please enter a question."
116
-
117
- try:
118
- matched_q, matched_a, score = find_best_match(user_input)
119
- if score > 0.75:
120
- return f"📚 **Answer from FAQ**:\n\n**Q:** {matched_q}\n**A:** {matched_a}"
121
- else:
122
- gpt_answer = query_gpt(user_input)
123
- return f"🤖 **Answer from GPT-4**:\n\n{gpt_answer}"
124
- except Exception as e:
125
- return f"❌ Error processing your question: {str(e)}"
126
-
127
- # 🌐 Launch Gradio interface
128
- gr.Interface(
129
- fn=chatbot_response,
130
- inputs=gr.Textbox(label="Ask any pediatric pulmonology related questions"),
131
- outputs=gr.Textbox(label="Response", lines=10),
132
- title="Pediatric Pulmonology Medicbot",
133
- description="Answers common non-critical questions about pediatric pulmonology using a mix of FAQ and GPT-4."
134
- ).launch(share=True)
135
-
136
- # Set your OpenAI key
137
- #openai.api_key = "sk-..." # <- Replace this with your actual API key
138
-
139
- # Load CSV
140
- chat = pd.read_csv("PedMedQA_final.csv")
141
-
142
- chat.head()
143
-
144
- chat.describe()
145
-
146
- chat.isnull().sum()
147
-
148
- chat.shape
149
-
150
- chat.info()
151
-
152
- chat["answer"]. unique()
153
-
154
- chat["answer"].value_counts()
155
-
156
- chat["answer"] = chat["answer"].fillna("Reassurance")
157
- print(chat["answer"])
158
-
159
- chat["age_years"].unique
160
-
161
- chat["age_years"].value_counts
162
-
163
- chat.head()
164
-
165
- chat.dtypes
166
-
167
- chat.dropna(subset=["question", "answer"], inplace=True)
168
- chat.drop_duplicates(subset=["question"], inplace=True)
169
-
170
- chat.isnull().sum()
171
-
172
- #oe = OrdinalEncoder()
173
-
174
- #chat["index"] = oe.fit_transform(chat[["index"]])
175
- chat["index"].head(3)
176
-
177
- #chat["meta_info"] = oe.fit_transform(chat[["meta_info"]])
178
- chat["meta_info"].head(3)
179
-
180
- #chat["question"] = oe.fit_transform(chat[["question"]])
181
- chat["question"].head(3)
182
-
183
- #chat["answer_idx"] = oe.fit_transform(chat[["answer_idx"]])
184
- chat["answer_idx"].head(3)
185
-
186
- #chat["answer"] = oe.fit_transform(chat[["answer"]])
187
- chat["answer"].head(3)
188
-
189
- #chat["options"] = oe.fit_transform(chat[["options"]])
190
- chat["options"].head(3)
191
-
192
- chat.shape
193
-
194
- chat.columns
195
-
196
- from sklearn.linear_model import LassoCV
197
- from sklearn.feature_selection import SelectFromModel
198
-
199
- #clf = LassoCV.fit(X_train, Y_trarin)
200
- #importance = np.abs(clf.coef)
201
- #print(importance)
202
-
203
- while True:
204
- user_input = input("You can ask me any pediatric pulmonology related question (or type 'exit'): ")
205
-
206
- if user_input.lower() == "exit":
207
- break
208
-
209
- response = chatbot_response(user_input)
210
- print(response)
211
-
212
- #response = chatbot_response(ui)
213
- #print(response)
214
- chat.dropna(subset=["question", "answer"], inplace=True)
215
-
216
- from sklearn.feature_extraction.text import TfidfVectorizer
217
-
218
- # Vectorize the questions using TF-IDF
219
- # ✅ 1. Fit and transform your dataset questions
220
- vector1 = TfidfVectorizer()
221
- qvs = vector1.fit_transform(chat["question"]).toarray()
222
-
223
- # ✅ 2. Later, transform user input using the same vectorizer
224
- user_vec = vector1.transform([user_input]).toarray()
225
-
226
- # 🔌 Connect to OpenAI
227
- #openai.api_key = "your-openai-api-key" # Replace with your real key
228
-
229
- # 📄 Step 1: Load your dataset
230
- df.dropna(subset=["Question", "Answer"], inplace=True)
231
-
232
- # 🧠 Step 2: Vectorize dataset questions
233
- #vectorizer = TfidfVectorizer()
234
- #question_vectors = vectorizer.fit_transform(df["Question"]).toarray()
235
-
236
- # 🔍 Step 3: Find most similar question
237
- def find_best_match(user_input):
238
- user_vec = vector1.transform([user_input]).toarray()
239
- similarities = cosine_similarity(user_vec, qvs)
240
- best_idx = np.argmax(similarities[0])
241
- best_score = similarities[0][answer_idx]
242
- return df.iloc[best_idx]["question"], chat.iloc[best_idx]["answer"], best_score
243
-
244
- # 🤖 Step 4: Fallback to GPT-4 if no good match
245
- def query_gpt(user_input):
246
- response = client.chat.completions.create(
247
- model="gpt-4",
248
- messages=[
249
- {"role": "system", "content": "You are a pediatric pulmonology expert."},
250
- {"role": "user", "content": user_input}
251
- ]
252
- )
253
- try:
254
- # some risky code
255
- risky_function()
256
- except Exception as e:
257
- print(f"An error occurred: {e}")
258
-
259
- # 💬 Step 5: Define chatbot logic
260
- def chatbot_response(user_input):
261
- matched_q, matched_a, score = find_best_match(user_input)
262
- if score > 0.75:
263
- return f"📚 Answer from FAQ:\nQ: {matched_q}\nA: {matched_a}"
264
- else:
265
- return f"🤖 Answer from GPT-4:\n{query_gpt(user_input)}"
266
-
267
- # 🌐 Step 6: Launch Gradio interface
268
- gr.Interface(
269
- fn=chatbot_response,
270
- inputs=gr.Textbox(label="Ask any pediatric pulmonology related question"),
271
- outputs=gr.Textbox(label="Response"),
272
- title="Royalty Medic_bot",
273
- description="Get non-crtical answers to common pediatric respiratory health questions."
274
- ).launch(share=True)
275
-
276
-
277
- def find_best_match(user_input):
278
- input_vec = vectorizer.transform([user_input]).toarray()
279
- sims = cosine_similarity(input_vec, question_vectors)
280
- idx = np.argmax(sims)
281
- score = sims[0][answer_idx]
282
- return chat.iloc[answer_idx]["Question"], chat.iloc[answer_idx]["Answer"], score
283
-
284
- while True:
285
- user_input = input("🧒 Ask a pediatric pulmonology question (or type 'exit'): ")
286
- if user_input.lower() == "exit":
287
- print("👋 Goodbye!")
288
- break
289
- print(chatbot_response(user_input))
290
-
291
-
292
- def chatbot_gradio_interface(user_input):
293
- return chatbot_response(user_input)
294
-
295
- gr.Interface(fn=chatbot_gradio_interface,
296
- inputs="text",
297
- outputs="text",
298
- title="Pediatric Pulmonology Medicbot",
299
- description="Ask any question related to pediatric lung health.").launch(share=True)
300
-
301
-
302
-
303
-
304
-
305
- # Build FAISS index for similarity search
306
- index = faiss.IndexFlatL2(question_vectors.shape[1])
307
- index.add(np.array(question_vectors))
308
-
309
- # Function to find the closest question
310
- def find_most_similar_question(user_question, top_k=1):
311
- user_vec = vectorizer.transform([user_question]).toarray()
312
- D, I = index.search(user_vec, top_k)
313
- return df.iloc[I[0][0]]["Question"], df.iloc[I[0][0]]["Answer"]
314
-
315
- # Function to query a language model
316
- def ask_openai(question, model="gpt-4"):
317
- try:
318
- response = client.chat.completions.create(
319
- model=model,
320
- messages=[
321
- {"role": "system", "content": "You are a pediatric pulmonology expert."},
322
- {"role": "user", "content": question},
323
- ],
324
- temperature=0.3,
325
- )
326
- return response.choices[0].message["content"]
327
- except Exception as e:
328
- print(f"Error with {model}: {e}")
329
- return None
330
-
331
- # Main chatbot function
332
- def pediatric_pulmonology_chatbot(user_input):
333
- matched_question, matched_answer = find_most_similar_question(user_input)
334
-
335
- similarity = cosine_similarity(
336
- vectorizer.transform([user_input]), vectorizer.transform([matched_question])
337
- )[0][0]
338
-
339
- if similarity > 0.7:
340
- return f"(From Knowledge Base)\nQ: {matched_question}\nA: {matched_answer}"
341
- else:
342
- # Try GPT-4 first
343
- reply = ask_openai(user_input, model="gpt-4")
344
- if reply:
345
- return f"(From GPT-4)\n{reply}"
346
- else:
347
- # Fallback to GPT-3.5
348
- reply = ask_openai(user_input, model="gpt-3.5-turbo")
349
- if reply:
350
- return f"(From GPT-3.5)\n{reply}"
351
- else:
352
- return "Sorry, I couldn't find an answer to that."
353
-
354
- # 🔁 Example interaction
355
- while True:
356
- user_input = input("\n👶 Ask a pediatric pulmonology question (or type 'exit'): ")
357
- if user_input.lower() == "exit":
358
- break
359
- print(pediatric_pulmonology_chatbot(user_input))