Santiago Valencia commited on
Commit
323dc50
·
1 Parent(s): 8eb7935

changed gpt-llm.py name to app.py

Browse files
Files changed (1) hide show
  1. gpt-llm.py → app.py +587 -587
gpt-llm.py → app.py RENAMED
@@ -1,588 +1,588 @@
1
- import torch
2
- import matplotlib.pyplot as plt
3
- import numpy as np
4
- import pandas as pd
5
- import requests
6
- import tqdm
7
- from sentence_transformers import SentenceTransformer, util
8
- import re
9
- from datetime import datetime, date
10
- import time
11
- from openai import OpenAI
12
- import json
13
- import os
14
- from typing import Dict, Any, List
15
- import textwrap
16
- from flask import Flask, request, jsonify
17
- import gradio as gr
18
- import streamlit as st
19
-
20
- DESCRIPTION = '''
21
- <div>
22
- <h1 style="text-align: center;">Phobos 🪐</h1>
23
- <p>This is a open tuned model that was fitted onto a RAG pipeline using <a href="https://huggingface.co/sentence-transformers/all-mpnet-base-v2"><b>all-mpnet-base-v2</b></a>.</p>
24
- <h3 style="text-align: center;">In order to chat, please say 'gen phobos' = General Question you have of any topic. Say 'phobos' for questions specifically medical.</h3>
25
- </div>
26
- '''
27
-
28
- # API keys
29
- api_key = os.getenv('OPEN_AI_API_KEY')
30
-
31
- df_embeds = pd.read_csv("chunks_tokenized.csv")
32
- df_embeds["embeddings"] = df_embeds["embeddings"].apply(lambda x: np.fromstring(x.strip("[]"), sep=" "))
33
-
34
- embeds_dict = df_embeds.to_dict(orient="records")
35
-
36
- # convert into tensors
37
- embeddings = torch.tensor(np.array(df_embeds["embeddings"].to_list()), dtype=torch.float32).to('cuda')
38
-
39
-
40
- # Make a text wrapper
41
- def text_wrapper(text):
42
- """
43
- Wraps the text that will pass here
44
- """
45
-
46
- clean_text = textwrap.fill(text, 80)
47
-
48
- print(clean_text)
49
-
50
- # Let's first get the embedding model
51
- embedding_model = SentenceTransformer(model_name_or_path="all-mpnet-base-v2",
52
- device='cuda')
53
-
54
-
55
- # functionize RAG Pipeline
56
-
57
- def rag_pipeline(query,
58
- embedding_model,
59
- embeddings,
60
- device: str,
61
- chunk_min_token: list):
62
- """
63
- Grabs a query and retrieve data all in passages, augments them, than it
64
- it outputs the top 5 relevant results regarding query's meaning using dot scores.
65
- """
66
-
67
- # Retrieval
68
- query_embeddings = embedding_model.encode(query, convert_to_tensor=True).to(device)
69
-
70
- # Augmentation
71
- dot_scores = util.dot_score(a=query_embeddings, b=embeddings)[0]
72
-
73
- # Output
74
- scores, indices = torch.topk(dot_scores, k=5)
75
- counting = 0
76
- for score, idx in zip(scores, indices):
77
- counting+=1
78
- clean_score = score.item()*100
79
- print(f"For the ({counting}) result has a score: {round(clean_score, 2)}%")
80
- print(f"On index: {idx}")
81
- print(f"Relevant Text:\n")
82
- print(f"{text_wrapper(chunk_min_token[idx]['sentence_chunk'])}\n")
83
-
84
-
85
- # Message request to gpt
86
- def message_request_to_model(input_text: str):
87
- """
88
- Message to pass to the request on API
89
- """
90
- message_to_model = [
91
- {"role": "system", "content": "You are a helpful assistant called 'Phobos'."},
92
- {"role": "user", "content": input_text}, # This must be in string format or else the request won't be successful
93
- ]
94
-
95
- return message_to_model
96
-
97
-
98
- # Functionize API request from the very beginning as calling gpt for the first time
99
- def request_gpt_model(input_text,
100
- temperature,
101
- message_to_model_api,
102
- model: str="gpt-3.5-turbo"):
103
- """
104
- This will pass in a request to the gpt api with the messages and
105
- will take the whole prompt generated as input as intructions to model
106
- and output the similiar meaning on the output.
107
- """
108
- # Create client
109
- client = OpenAI(api_key=api_key)
110
-
111
- # Make a request, for the input prompt
112
- response = client.chat.completions.create(
113
- model=model,
114
- messages=message_to_model_api,
115
- temperature=temperature,
116
- )
117
-
118
- # Output the message in readable format
119
- output = response.choices[0].message.content
120
- json_response = json.dumps(json.loads(response.model_dump_json()), indent=4)
121
- # print(f"{text_wrapper(output)}")
122
- print(output)
123
- return output, json_response
124
-
125
- # Functionize saving output to file
126
- def save_log_models_activity(query, prompt, continue_question, output, cont_output, embeds_dict, json_response,
127
- model, rag_pipeline, message_request_to_model, indices, embedding_model, source_directed: str):
128
- """
129
- This will save the models input and output interaction, onto
130
- a txt file, for each request, labeling model that was used.
131
- What sort of embedding process, pipeline that was used and
132
- date and time it was ran
133
- """
134
- # If there is a follow up question:
135
- input_query = ""
136
- if continue_question != "":
137
- input_query += continue_question
138
- else:
139
- input_query += query
140
-
141
- clean_query = re.sub(r'[^\w\s]', '', input_query).replace(' ', '_')
142
- file_path = os.path.join("./logfiles/may-2024/", f"{clean_query}.txt")
143
-
144
- #Open the file in write mode
145
- with open(file_path, 'w', encoding='utf-8') as file:
146
- file.write(f"Original Query: {query}\n\n")
147
- if prompt != "":
148
- file.write(f"Base Prompt: {prompt}\n\n")
149
- if continue_question != "":
150
- file.write(f"Follow up question:\n\n{continue_question}\n\n")
151
- file.write(f"Output:\n\n {cont_output}")
152
- else:
153
- file.write(f"Output:\n\n{output}\n\n")
154
-
155
- # Json response
156
- file.write(f"\n\nJson format response: {json_response}\n\n")
157
-
158
- for idx in indices:
159
- # Let's log the models activity in txt file
160
- if rag_pipeline:
161
- file.write(f"{source_directed}")
162
- file.write(f"\n\nPipeline Used: RAG\n")
163
- file.write(f"Embedding Model used on tokenizing pipeline:\n\n{embedding_model}\n")
164
-
165
- file.write(f"\nRelevant Passages: {embeds_dict[idx]['sentence_chunk']}\n\n")
166
- break
167
- file.write(f"Model used: {model}\n")
168
- # file.write(f"{message_request_to_model}")
169
- today = date.today()
170
- current_time = datetime.now().time()
171
- file.write(f"Date: {today.strftime('%B %d, %Y')}\nTime: {current_time.strftime('%H:%M:%S')}\n\n")
172
-
173
-
174
- # retrieve rag resources such as score and indices
175
- def rag_resources(query: str,
176
- device: str="cuda"):
177
- """
178
- Extracts only the scores and indices of the top 5 best results
179
- according to dot scores on query.
180
- """
181
-
182
- # Retrieval
183
- query_embeddings = embedding_model.encode(query, convert_to_tensor=True).to(device)
184
-
185
- # Augmentation
186
- dot_scores = util.dot_score(a=query_embeddings, b=embeddings)[0]
187
-
188
- # Output
189
- scores, indices = torch.topk(dot_scores, k=5)
190
-
191
- return scores, indices
192
-
193
- # Format the prompt
194
- def rag_prompt_formatter(prompt: str,
195
- prev_quest: list,
196
- context_items: List[Dict[str, Any]]):
197
- """
198
- Format the base prompt with the user query.
199
- """
200
- # Convert the list into string
201
- prev_questions_str ='\n'.join(prev_quest) # convert to string so we can later format on base_prompt
202
-
203
- context = "- " + "\n- ".join(i["sentence_chunk"] for i in context_items)
204
-
205
- base_prompt = """In this text, you will act as supportive medical assistant.
206
- Give yourself room to think.
207
- Explain each topic with facts and also suggestions based on the users needs.
208
- Keep your answers thorough but practical.
209
- \nHere are the past questions and answers you gave to the user, to serve you as a memory:
210
- {previous_questions}
211
- \nYou as the assistant will recieve context items for retrieving information.
212
- \nNow use the following context items to answer the user query. Be advised if the user does not give you
213
- any query that seems medical, DO NOT extract the relevant passages:
214
- {context}
215
- \nRelevant passages: Please extract the context items that helped you answer the user's question
216
- <extract relevant passages from the context here>
217
- User query: {query}
218
- Answer:"""
219
-
220
- prompt = base_prompt.format(previous_questions=prev_questions_str, context=context, query=prompt)
221
- return prompt
222
-
223
- # Format general prompt for any question
224
- def general_prompt_formatter(prompt: str,
225
- prev_quest: list):
226
- """
227
- Formats the prompt to just past the 10 previous questions without
228
- rag.
229
- """
230
- # Convert the list into string
231
- prev_questions_str ='\n'.join(prev_quest) # convert to string so we can later format on base_prompt
232
-
233
- base_prompt = """In this text, you will act as supportive assistant.
234
- Give yourself room to think.
235
- Explain each topic with facts and also suggestions based on the users needs.
236
- Keep your answers thorough but practical.
237
- \nHere are the past questions and answers you gave to the user, to serve you as a memory:
238
- {previous_questions}
239
- \nAnswer the User query regardless if there was past questions or not.
240
- \nUser query: {query}
241
- Answer:"""
242
- prompt = base_prompt.format(previous_questions=prev_questions_str, query=prompt) # format method expect a string to subsistute not a list
243
- return prompt
244
-
245
- # Saving 10 Previous questions and answers
246
- def prev_recent_questions(input_text: str,
247
- ai_output: list):
248
- """
249
- Saves the previous 10 questions asked by the user into
250
- a .txt file, stores those file in a list, when the len()
251
- of that list reaches 10 it will reset to expect the next 10
252
- questions and answer given by AI.
253
- """
254
- formatted_response = f"Current Question: {input_text}\n\n"
255
-
256
- # Convert the tuple elements to strings and concatenate them with the formatted_response
257
- formatted_response += "".join(str(elem) for elem in ai_output)
258
-
259
- # clean the query (input_text)
260
- clean_query = re.sub(r'[^\w\s]', '', input_text).replace(' ', '_')
261
- file_path = os.path.join("./memory/may-2024", f"{clean_query}.txt")
262
-
263
- # Let's save the content in the path for the .txt file
264
- try:
265
- with open(file_path, 'w', encoding='utf-8') as file:
266
- file.write(formatted_response)
267
- today = date.today()
268
- current_time = datetime.now().today()
269
- file.write(f"\n\nDate: {today.strftime('%B %d, %Y')}\nTime: {current_time.strftime('%H:%M:%S')}\n\n")
270
- except Exception as e:
271
- print(f"Error writing file: {e}")
272
-
273
- # # Make a list of the path names
274
- return file_path
275
-
276
-
277
- # Function RAG-GPT
278
- def rag_gpt(query: str,
279
- previous_quest: list,
280
- continue_question: str="",
281
- rag_pipeline: bool=True,
282
- temperature: int=0,
283
- model: str="gpt-3.5-turbo",
284
- embeds_dict=embeds_dict):
285
- """
286
- This contains the RAG system implemented with
287
- OpenAI models. This will process the the data through
288
- RAG, afterwards be formatted into instructive prompt to model
289
- filled with examples, context items and query. Afterwards,
290
- this prompt is passed the models endpoint on API and cleanly return's
291
- the output on response.
292
- """
293
-
294
- if continue_question == "":
295
- print(f"Your question: {query}\n")
296
- else:
297
- print(f"Your Question: {continue_question}\n")
298
-
299
- # Show query
300
- query_back = f"Your question: {query}\n"
301
- cont_query_back = f"Your Question: {continue_question}\n"
302
- top_score_back = ""
303
- # RAG resources
304
- # scores, indices = rag_resources(query)
305
- if rag_pipeline:
306
- scores, indices = rag_resources(query)
307
- # Get context item for prompt generation
308
- context_items = [embeds_dict[idx] for idx in indices]
309
-
310
- # augment the context items with the base prompt and user query
311
- prompt = rag_prompt_formatter(prompt=query, prev_quest=previous_quest, context_items=context_items)
312
-
313
- # Show analytics on response data
314
- top_score = [score.item() for score in scores]
315
- print(f"Highest Result: {round(top_score[0], 2)*100}%\n")
316
- top_score_back += f"Highest Result: {round(top_score[0], 2)*100}%\n"
317
-
318
- else:
319
- prompt = general_prompt_formatter(prompt=query, prev_quest=previous_quest)
320
- print(f"Here is the previous 7 questions: {previous_quest}")
321
- print(f"This is the prompt: {prompt}")
322
- print(f"\nEnd of prompt")
323
-
324
- # all variables to return back to json on API endpoint for gardio
325
- cont_output_back = ""
326
- output_back = ""
327
- source_grabbed_back = ""
328
- url_source_back = ""
329
- pdf_source_back = ""
330
- link_or_pagnum_back = ""
331
-
332
- # LLM input prompt
333
- # If there is follow up question
334
- # Let's log the models activity in txt file
335
- if continue_question != "":
336
- message_request = message_request_to_model(input_text=continue_question)
337
- cont_output, json_response = request_gpt_model(continue_question, temperature=temperature, message_to_model_api=message_request, model=model)
338
- cont_output_back += cont_output
339
- output = ""
340
- index = embeds_dict[indices[0]]
341
- # Let's get the link or page number of retrieval
342
- link_or_pagnum = index["link_or_page_number"]
343
- link_or_pagnum = str(link_or_pagnum)
344
- if link_or_pagnum.isdigit():
345
- link_or_pagnum_back += link_or_pagnum
346
- # link_or_pagnum = int(link_or_pagnum)
347
- source = f"The sources origins comes from a PDF"
348
- # source_back += source
349
- save_log_models_activity(query=query,
350
- prompt=prompt,
351
- continue_question=continue_question,
352
- output=output,
353
- cont_output=cont_output,
354
- embeds_dict=embeds_dict,
355
- json_response=json_response,
356
- model=model,
357
- rag_pipeline=rag_pipeline,
358
- message_request_to_model=continue_question,
359
- indices=indices,
360
- embedding_model=embedding_model,
361
- source_directed=source)
362
-
363
- else:
364
- link = f"Source Directed : {index['link_or_page_number']}"
365
- # link_back += link
366
- save_log_models_activity(query=query,
367
- prompt=prompt,
368
- continue_question=continue_question,
369
- output=output,
370
- cont_output=cont_output,
371
- embeds_dict=embeds_dict,
372
- json_response=json_response,
373
- model=model,
374
- rag_pipeline=rag_pipeline,
375
- message_request_to_model=continue_question,
376
- indices=indices,
377
- embedding_model=embedding_model,
378
- source_directed=link)
379
-
380
- # If no follow up question
381
- else:
382
- message_request = message_request_to_model(input_text=prompt)
383
- output, json_response = request_gpt_model(prompt, temperature=temperature, message_to_model_api=message_request, model=model)
384
- output_back += output
385
- cont_output = ""
386
- if rag_pipeline:
387
- index = embeds_dict[indices[0]]
388
- # Let's get the link or page number of retrieval
389
- link_or_pagnum = index["link_or_page_number"]
390
- link_or_pagnum = str(link_or_pagnum)
391
- if link_or_pagnum.isdigit():
392
- link_or_pagnum_back += link_or_pagnum
393
- print("is digit\n")
394
- source = f"The sources origins comes from a PDF"
395
- # source_back += source
396
- save_log_models_activity(query=query,
397
- prompt=prompt,
398
- continue_question=continue_question,
399
- output=output,
400
- cont_output=cont_output,
401
- embeds_dict=embeds_dict,
402
- json_response=json_response,
403
- model=model,
404
- rag_pipeline=rag_pipeline,
405
- message_request_to_model=query,
406
- indices=indices,
407
- embedding_model=embedding_model,
408
- source_directed=source)
409
-
410
- else:
411
- link = f"Source Directed : {index['link_or_page_number']}"
412
- # link_back += link
413
- save_log_models_activity(query=query,
414
- prompt=prompt,
415
- continue_question=continue_question,
416
- output=output,
417
- cont_output=cont_output,
418
- embeds_dict=embeds_dict,
419
- json_response=json_response,
420
- model=model,
421
- rag_pipeline=rag_pipeline,
422
- message_request_to_model=query,
423
- indices=indices,
424
- embedding_model=embedding_model,
425
- source_directed=link)
426
- else:
427
- save_log_models_activity(query=query,
428
- prompt=prompt,
429
- continue_question="",
430
- output=output,
431
- cont_output="",
432
- embeds_dict=embeds_dict,
433
- json_response=json_response,
434
- model=model,
435
- rag_pipeline=rag_pipeline,
436
- message_request_to_model="",
437
- indices="",
438
- embedding_model=embedding_model,
439
- source_directed="")
440
-
441
- if rag_pipeline:
442
- for idx in indices:
443
- print(f"\n\nOriginated Source:\n\n {embeds_dict[idx]['sentence_chunk']}\n")
444
- source_grabbed_back += f"\n\nOriginated Source:\n\n {embeds_dict[idx]['sentence_chunk']}\n"
445
- link_or_pagnum = embeds_dict[idx]['link_or_page_number']
446
- link_or_pagnum = str(link_or_pagnum)
447
- if link_or_pagnum.isdigit():
448
- link_or_pagnum = int(link_or_pagnum)
449
- print(f"The sources origins comes from a PDF")
450
- pdf_source_back += f"The sources origins comes from a PDF"
451
- else:
452
- print(f"Source Directed : {embeds_dict[idx]['link_or_page_number']}")
453
- url_source_back += f"Source Directed : {embeds_dict[idx]['link_or_page_number']}"
454
- break
455
-
456
- else:
457
- pass
458
-
459
- if continue_question != "":
460
- return cont_output_back, source_grabbed_back, pdf_source_back, url_source_back
461
-
462
- else:
463
- return output_back, source_grabbed_back, pdf_source_back, url_source_back
464
-
465
- # Mode of the LLM
466
- llm_mode = ""
467
-
468
- # List of files paths for memory
469
- memory_file_paths = []
470
-
471
- # first time condition
472
- first_time = True
473
-
474
- # Previous 5 questions stored in a dictionary for the memory of LLM
475
- prev_5_questions_list = []
476
-
477
- def check_cuda_and_gpu_type():
478
- # Your logic to check CUDA availability and GPU type
479
- if torch.cuda.is_available():
480
- gpu_info = torch.cuda.get_device_name(0) # Get info about first GPU
481
- return f"CUDA is Available! GPU Info: {gpu_info}"
482
- else:
483
- return "CUDA is Not Available."
484
-
485
-
486
- def bot_comms(input, history):
487
- """
488
- Communication between UI on gradio to the rag_gpt model.
489
- """
490
- global llm_mode
491
- global memory_file_paths
492
- global prev_5_questions_list
493
- global first_time
494
-
495
- if input == "cuda info":
496
- output = check_cuda_and_gpu_type()
497
- return output
498
-
499
- state_mode = True
500
- # Input as 'gen phobos'
501
- if input == "gen phobos":
502
- output_text = "Great! Ask me any question. 🦧"
503
- llm_mode = input
504
- return output_text
505
-
506
- if input == "phobos":
507
- output_text = "Okay! What's your medical questions.⚕️"
508
- llm_mode = input
509
- return output_text
510
-
511
- # Reset memory with command
512
- if input == "reset memory":
513
- memory_file_paths = []
514
- output_text = f"Manually Resetted Memory! 🧠"
515
- return output_text
516
-
517
- if llm_mode == "gen phobos":
518
- # Get the 10 previous file paths
519
- for path in memory_file_paths:
520
- with open(path, 'r', encoding='utf-8') as file:
521
- q_a = file.read()
522
- # Now we have the q/a in string format
523
- q_a = str(q_a)
524
- # Make keys and values for prev dict
525
- prev_5_questions_list.append(q_a)
526
-
527
- if first_time:
528
- state_mode = False
529
- # Get the previous questions and answers list to pass to rag_gpt to place on base prompt
530
- gen_gpt_output = rag_gpt(input, previous_quest=[], rag_pipeline=state_mode)
531
- first_time = False
532
- else:
533
- state_mode = False
534
- gen_gpt_output = rag_gpt(input, previous_quest=prev_5_questions_list, rag_pipeline=state_mode)
535
-
536
- # reset the memory file_paths
537
- if len(memory_file_paths) == 5:
538
- memory_file_paths = []
539
-
540
- file_path = prev_recent_questions(input_text=input, ai_output=gen_gpt_output)
541
- memory_file_paths.append(file_path)
542
-
543
- if llm_mode == "phobos":
544
- for path in memory_file_paths:
545
- with open(path, 'r', encoding='utf-8') as file:
546
- q_a = file.read()
547
- # Now we have the q/a in string format
548
- q_a = str(q_a)
549
- # Make keys and values for prev dict
550
- prev_5_questions_list.append(q_a)
551
-
552
- if first_time:
553
- # Get the previous questions and answers list to pass to rag_gpt to place on base prompt
554
- rag_output_text = rag_gpt(input, previous_quest=[], rag_pipeline=state_mode)
555
- first_time = False
556
- # return jsonify({'output': rag_output_text})
557
- else:
558
- rag_output_text = rag_gpt(input, previous_quest=prev_5_questions_list, rag_pipeline=state_mode)
559
- # return jsonify({'output': rag_output_text})
560
-
561
- # reset the memory file_paths
562
- if len(memory_file_paths) == 5:
563
- memory_file_paths = []
564
-
565
- file_path = prev_recent_questions(input_text=input, ai_output=rag_output_text)
566
- memory_file_paths.append(file_path)
567
-
568
- output = rag_gpt(query=input,
569
- previous_quest=[],
570
- rag_pipeline=False)
571
- formatted_response = "\n".join(output[0].split("\n"))
572
- return formatted_response
573
-
574
- # Gradio block
575
- chatbot=gr.Chatbot(height=725, label='Gradio ChatInterface')
576
-
577
- with gr.Blocks(fill_height=True) as demo:
578
- gr.Markdown(DESCRIPTION)
579
- gr.ChatInterface(
580
- fn=bot_comms,
581
- chatbot=chatbot,
582
- fill_height=True,
583
- examples=["gen phobos", "phobos", "reset memory", "cuda info"],
584
- cache_examples=False
585
- )
586
-
587
- if __name__ == "__main__":
588
  demo.launch()
 
1
+ import torch
2
+ import matplotlib.pyplot as plt
3
+ import numpy as np
4
+ import pandas as pd
5
+ import requests
6
+ import tqdm
7
+ from sentence_transformers import SentenceTransformer, util
8
+ import re
9
+ from datetime import datetime, date
10
+ import time
11
+ from openai import OpenAI
12
+ import json
13
+ import os
14
+ from typing import Dict, Any, List
15
+ import textwrap
16
+ from flask import Flask, request, jsonify
17
+ import gradio as gr
18
+ import streamlit as st
19
+
20
+ DESCRIPTION = '''
21
+ <div>
22
+ <h1 style="text-align: center;">Phobos 🪐</h1>
23
+ <p>This is a open tuned model that was fitted onto a RAG pipeline using <a href="https://huggingface.co/sentence-transformers/all-mpnet-base-v2"><b>all-mpnet-base-v2</b></a>.</p>
24
+ <h3 style="text-align: center;">In order to chat, please say 'gen phobos' = General Question you have of any topic. Say 'phobos' for questions specifically medical.</h3>
25
+ </div>
26
+ '''
27
+
28
+ # API keys
29
+ api_key = os.getenv('OPEN_AI_API_KEY')
30
+
31
+ df_embeds = pd.read_csv("chunks_tokenized.csv")
32
+ df_embeds["embeddings"] = df_embeds["embeddings"].apply(lambda x: np.fromstring(x.strip("[]"), sep=" "))
33
+
34
+ embeds_dict = df_embeds.to_dict(orient="records")
35
+
36
+ # convert into tensors
37
+ embeddings = torch.tensor(np.array(df_embeds["embeddings"].to_list()), dtype=torch.float32).to('cuda')
38
+
39
+
40
+ # Make a text wrapper
41
+ def text_wrapper(text):
42
+ """
43
+ Wraps the text that will pass here
44
+ """
45
+
46
+ clean_text = textwrap.fill(text, 80)
47
+
48
+ print(clean_text)
49
+
50
+ # Let's first get the embedding model
51
+ embedding_model = SentenceTransformer(model_name_or_path="all-mpnet-base-v2",
52
+ device='cuda')
53
+
54
+
55
+ # functionize RAG Pipeline
56
+
57
+ def rag_pipeline(query,
58
+ embedding_model,
59
+ embeddings,
60
+ device: str,
61
+ chunk_min_token: list):
62
+ """
63
+ Grabs a query and retrieve data all in passages, augments them, than it
64
+ it outputs the top 5 relevant results regarding query's meaning using dot scores.
65
+ """
66
+
67
+ # Retrieval
68
+ query_embeddings = embedding_model.encode(query, convert_to_tensor=True).to(device)
69
+
70
+ # Augmentation
71
+ dot_scores = util.dot_score(a=query_embeddings, b=embeddings)[0]
72
+
73
+ # Output
74
+ scores, indices = torch.topk(dot_scores, k=5)
75
+ counting = 0
76
+ for score, idx in zip(scores, indices):
77
+ counting+=1
78
+ clean_score = score.item()*100
79
+ print(f"For the ({counting}) result has a score: {round(clean_score, 2)}%")
80
+ print(f"On index: {idx}")
81
+ print(f"Relevant Text:\n")
82
+ print(f"{text_wrapper(chunk_min_token[idx]['sentence_chunk'])}\n")
83
+
84
+
85
+ # Message request to gpt
86
+ def message_request_to_model(input_text: str):
87
+ """
88
+ Message to pass to the request on API
89
+ """
90
+ message_to_model = [
91
+ {"role": "system", "content": "You are a helpful assistant called 'Phobos'."},
92
+ {"role": "user", "content": input_text}, # This must be in string format or else the request won't be successful
93
+ ]
94
+
95
+ return message_to_model
96
+
97
+
98
+ # Functionize API request from the very beginning as calling gpt for the first time
99
+ def request_gpt_model(input_text,
100
+ temperature,
101
+ message_to_model_api,
102
+ model: str="gpt-3.5-turbo"):
103
+ """
104
+ This will pass in a request to the gpt api with the messages and
105
+ will take the whole prompt generated as input as intructions to model
106
+ and output the similiar meaning on the output.
107
+ """
108
+ # Create client
109
+ client = OpenAI(api_key=api_key)
110
+
111
+ # Make a request, for the input prompt
112
+ response = client.chat.completions.create(
113
+ model=model,
114
+ messages=message_to_model_api,
115
+ temperature=temperature,
116
+ )
117
+
118
+ # Output the message in readable format
119
+ output = response.choices[0].message.content
120
+ json_response = json.dumps(json.loads(response.model_dump_json()), indent=4)
121
+ # print(f"{text_wrapper(output)}")
122
+ print(output)
123
+ return output, json_response
124
+
125
+ # Functionize saving output to file
126
+ def save_log_models_activity(query, prompt, continue_question, output, cont_output, embeds_dict, json_response,
127
+ model, rag_pipeline, message_request_to_model, indices, embedding_model, source_directed: str):
128
+ """
129
+ This will save the models input and output interaction, onto
130
+ a txt file, for each request, labeling model that was used.
131
+ What sort of embedding process, pipeline that was used and
132
+ date and time it was ran
133
+ """
134
+ # If there is a follow up question:
135
+ input_query = ""
136
+ if continue_question != "":
137
+ input_query += continue_question
138
+ else:
139
+ input_query += query
140
+
141
+ clean_query = re.sub(r'[^\w\s]', '', input_query).replace(' ', '_')
142
+ file_path = os.path.join("./logfiles/may-2024/", f"{clean_query}.txt")
143
+
144
+ #Open the file in write mode
145
+ with open(file_path, 'w', encoding='utf-8') as file:
146
+ file.write(f"Original Query: {query}\n\n")
147
+ if prompt != "":
148
+ file.write(f"Base Prompt: {prompt}\n\n")
149
+ if continue_question != "":
150
+ file.write(f"Follow up question:\n\n{continue_question}\n\n")
151
+ file.write(f"Output:\n\n {cont_output}")
152
+ else:
153
+ file.write(f"Output:\n\n{output}\n\n")
154
+
155
+ # Json response
156
+ file.write(f"\n\nJson format response: {json_response}\n\n")
157
+
158
+ for idx in indices:
159
+ # Let's log the models activity in txt file
160
+ if rag_pipeline:
161
+ file.write(f"{source_directed}")
162
+ file.write(f"\n\nPipeline Used: RAG\n")
163
+ file.write(f"Embedding Model used on tokenizing pipeline:\n\n{embedding_model}\n")
164
+
165
+ file.write(f"\nRelevant Passages: {embeds_dict[idx]['sentence_chunk']}\n\n")
166
+ break
167
+ file.write(f"Model used: {model}\n")
168
+ # file.write(f"{message_request_to_model}")
169
+ today = date.today()
170
+ current_time = datetime.now().time()
171
+ file.write(f"Date: {today.strftime('%B %d, %Y')}\nTime: {current_time.strftime('%H:%M:%S')}\n\n")
172
+
173
+
174
+ # retrieve rag resources such as score and indices
175
+ def rag_resources(query: str,
176
+ device: str="cuda"):
177
+ """
178
+ Extracts only the scores and indices of the top 5 best results
179
+ according to dot scores on query.
180
+ """
181
+
182
+ # Retrieval
183
+ query_embeddings = embedding_model.encode(query, convert_to_tensor=True).to(device)
184
+
185
+ # Augmentation
186
+ dot_scores = util.dot_score(a=query_embeddings, b=embeddings)[0]
187
+
188
+ # Output
189
+ scores, indices = torch.topk(dot_scores, k=5)
190
+
191
+ return scores, indices
192
+
193
+ # Format the prompt
194
+ def rag_prompt_formatter(prompt: str,
195
+ prev_quest: list,
196
+ context_items: List[Dict[str, Any]]):
197
+ """
198
+ Format the base prompt with the user query.
199
+ """
200
+ # Convert the list into string
201
+ prev_questions_str ='\n'.join(prev_quest) # convert to string so we can later format on base_prompt
202
+
203
+ context = "- " + "\n- ".join(i["sentence_chunk"] for i in context_items)
204
+
205
+ base_prompt = """In this text, you will act as supportive medical assistant.
206
+ Give yourself room to think.
207
+ Explain each topic with facts and also suggestions based on the users needs.
208
+ Keep your answers thorough but practical.
209
+ \nHere are the past questions and answers you gave to the user, to serve you as a memory:
210
+ {previous_questions}
211
+ \nYou as the assistant will recieve context items for retrieving information.
212
+ \nNow use the following context items to answer the user query. Be advised if the user does not give you
213
+ any query that seems medical, DO NOT extract the relevant passages:
214
+ {context}
215
+ \nRelevant passages: Please extract the context items that helped you answer the user's question
216
+ <extract relevant passages from the context here>
217
+ User query: {query}
218
+ Answer:"""
219
+
220
+ prompt = base_prompt.format(previous_questions=prev_questions_str, context=context, query=prompt)
221
+ return prompt
222
+
223
+ # Format general prompt for any question
224
+ def general_prompt_formatter(prompt: str,
225
+ prev_quest: list):
226
+ """
227
+ Formats the prompt to just past the 10 previous questions without
228
+ rag.
229
+ """
230
+ # Convert the list into string
231
+ prev_questions_str ='\n'.join(prev_quest) # convert to string so we can later format on base_prompt
232
+
233
+ base_prompt = """In this text, you will act as supportive assistant.
234
+ Give yourself room to think.
235
+ Explain each topic with facts and also suggestions based on the users needs.
236
+ Keep your answers thorough but practical.
237
+ \nHere are the past questions and answers you gave to the user, to serve you as a memory:
238
+ {previous_questions}
239
+ \nAnswer the User query regardless if there was past questions or not.
240
+ \nUser query: {query}
241
+ Answer:"""
242
+ prompt = base_prompt.format(previous_questions=prev_questions_str, query=prompt) # format method expect a string to subsistute not a list
243
+ return prompt
244
+
245
+ # Saving 10 Previous questions and answers
246
+ def prev_recent_questions(input_text: str,
247
+ ai_output: list):
248
+ """
249
+ Saves the previous 10 questions asked by the user into
250
+ a .txt file, stores those file in a list, when the len()
251
+ of that list reaches 10 it will reset to expect the next 10
252
+ questions and answer given by AI.
253
+ """
254
+ formatted_response = f"Current Question: {input_text}\n\n"
255
+
256
+ # Convert the tuple elements to strings and concatenate them with the formatted_response
257
+ formatted_response += "".join(str(elem) for elem in ai_output)
258
+
259
+ # clean the query (input_text)
260
+ clean_query = re.sub(r'[^\w\s]', '', input_text).replace(' ', '_')
261
+ file_path = os.path.join("./memory/may-2024", f"{clean_query}.txt")
262
+
263
+ # Let's save the content in the path for the .txt file
264
+ try:
265
+ with open(file_path, 'w', encoding='utf-8') as file:
266
+ file.write(formatted_response)
267
+ today = date.today()
268
+ current_time = datetime.now().today()
269
+ file.write(f"\n\nDate: {today.strftime('%B %d, %Y')}\nTime: {current_time.strftime('%H:%M:%S')}\n\n")
270
+ except Exception as e:
271
+ print(f"Error writing file: {e}")
272
+
273
+ # # Make a list of the path names
274
+ return file_path
275
+
276
+
277
+ # Function RAG-GPT
278
+ def rag_gpt(query: str,
279
+ previous_quest: list,
280
+ continue_question: str="",
281
+ rag_pipeline: bool=True,
282
+ temperature: int=0,
283
+ model: str="gpt-3.5-turbo",
284
+ embeds_dict=embeds_dict):
285
+ """
286
+ This contains the RAG system implemented with
287
+ OpenAI models. This will process the the data through
288
+ RAG, afterwards be formatted into instructive prompt to model
289
+ filled with examples, context items and query. Afterwards,
290
+ this prompt is passed the models endpoint on API and cleanly return's
291
+ the output on response.
292
+ """
293
+
294
+ if continue_question == "":
295
+ print(f"Your question: {query}\n")
296
+ else:
297
+ print(f"Your Question: {continue_question}\n")
298
+
299
+ # Show query
300
+ query_back = f"Your question: {query}\n"
301
+ cont_query_back = f"Your Question: {continue_question}\n"
302
+ top_score_back = ""
303
+ # RAG resources
304
+ # scores, indices = rag_resources(query)
305
+ if rag_pipeline:
306
+ scores, indices = rag_resources(query)
307
+ # Get context item for prompt generation
308
+ context_items = [embeds_dict[idx] for idx in indices]
309
+
310
+ # augment the context items with the base prompt and user query
311
+ prompt = rag_prompt_formatter(prompt=query, prev_quest=previous_quest, context_items=context_items)
312
+
313
+ # Show analytics on response data
314
+ top_score = [score.item() for score in scores]
315
+ print(f"Highest Result: {round(top_score[0], 2)*100}%\n")
316
+ top_score_back += f"Highest Result: {round(top_score[0], 2)*100}%\n"
317
+
318
+ else:
319
+ prompt = general_prompt_formatter(prompt=query, prev_quest=previous_quest)
320
+ print(f"Here is the previous 7 questions: {previous_quest}")
321
+ print(f"This is the prompt: {prompt}")
322
+ print(f"\nEnd of prompt")
323
+
324
+ # all variables to return back to json on API endpoint for gardio
325
+ cont_output_back = ""
326
+ output_back = ""
327
+ source_grabbed_back = ""
328
+ url_source_back = ""
329
+ pdf_source_back = ""
330
+ link_or_pagnum_back = ""
331
+
332
+ # LLM input prompt
333
+ # If there is follow up question
334
+ # Let's log the models activity in txt file
335
+ if continue_question != "":
336
+ message_request = message_request_to_model(input_text=continue_question)
337
+ cont_output, json_response = request_gpt_model(continue_question, temperature=temperature, message_to_model_api=message_request, model=model)
338
+ cont_output_back += cont_output
339
+ output = ""
340
+ index = embeds_dict[indices[0]]
341
+ # Let's get the link or page number of retrieval
342
+ link_or_pagnum = index["link_or_page_number"]
343
+ link_or_pagnum = str(link_or_pagnum)
344
+ if link_or_pagnum.isdigit():
345
+ link_or_pagnum_back += link_or_pagnum
346
+ # link_or_pagnum = int(link_or_pagnum)
347
+ source = f"The sources origins comes from a PDF"
348
+ # source_back += source
349
+ save_log_models_activity(query=query,
350
+ prompt=prompt,
351
+ continue_question=continue_question,
352
+ output=output,
353
+ cont_output=cont_output,
354
+ embeds_dict=embeds_dict,
355
+ json_response=json_response,
356
+ model=model,
357
+ rag_pipeline=rag_pipeline,
358
+ message_request_to_model=continue_question,
359
+ indices=indices,
360
+ embedding_model=embedding_model,
361
+ source_directed=source)
362
+
363
+ else:
364
+ link = f"Source Directed : {index['link_or_page_number']}"
365
+ # link_back += link
366
+ save_log_models_activity(query=query,
367
+ prompt=prompt,
368
+ continue_question=continue_question,
369
+ output=output,
370
+ cont_output=cont_output,
371
+ embeds_dict=embeds_dict,
372
+ json_response=json_response,
373
+ model=model,
374
+ rag_pipeline=rag_pipeline,
375
+ message_request_to_model=continue_question,
376
+ indices=indices,
377
+ embedding_model=embedding_model,
378
+ source_directed=link)
379
+
380
+ # If no follow up question
381
+ else:
382
+ message_request = message_request_to_model(input_text=prompt)
383
+ output, json_response = request_gpt_model(prompt, temperature=temperature, message_to_model_api=message_request, model=model)
384
+ output_back += output
385
+ cont_output = ""
386
+ if rag_pipeline:
387
+ index = embeds_dict[indices[0]]
388
+ # Let's get the link or page number of retrieval
389
+ link_or_pagnum = index["link_or_page_number"]
390
+ link_or_pagnum = str(link_or_pagnum)
391
+ if link_or_pagnum.isdigit():
392
+ link_or_pagnum_back += link_or_pagnum
393
+ print("is digit\n")
394
+ source = f"The sources origins comes from a PDF"
395
+ # source_back += source
396
+ save_log_models_activity(query=query,
397
+ prompt=prompt,
398
+ continue_question=continue_question,
399
+ output=output,
400
+ cont_output=cont_output,
401
+ embeds_dict=embeds_dict,
402
+ json_response=json_response,
403
+ model=model,
404
+ rag_pipeline=rag_pipeline,
405
+ message_request_to_model=query,
406
+ indices=indices,
407
+ embedding_model=embedding_model,
408
+ source_directed=source)
409
+
410
+ else:
411
+ link = f"Source Directed : {index['link_or_page_number']}"
412
+ # link_back += link
413
+ save_log_models_activity(query=query,
414
+ prompt=prompt,
415
+ continue_question=continue_question,
416
+ output=output,
417
+ cont_output=cont_output,
418
+ embeds_dict=embeds_dict,
419
+ json_response=json_response,
420
+ model=model,
421
+ rag_pipeline=rag_pipeline,
422
+ message_request_to_model=query,
423
+ indices=indices,
424
+ embedding_model=embedding_model,
425
+ source_directed=link)
426
+ else:
427
+ save_log_models_activity(query=query,
428
+ prompt=prompt,
429
+ continue_question="",
430
+ output=output,
431
+ cont_output="",
432
+ embeds_dict=embeds_dict,
433
+ json_response=json_response,
434
+ model=model,
435
+ rag_pipeline=rag_pipeline,
436
+ message_request_to_model="",
437
+ indices="",
438
+ embedding_model=embedding_model,
439
+ source_directed="")
440
+
441
+ if rag_pipeline:
442
+ for idx in indices:
443
+ print(f"\n\nOriginated Source:\n\n {embeds_dict[idx]['sentence_chunk']}\n")
444
+ source_grabbed_back += f"\n\nOriginated Source:\n\n {embeds_dict[idx]['sentence_chunk']}\n"
445
+ link_or_pagnum = embeds_dict[idx]['link_or_page_number']
446
+ link_or_pagnum = str(link_or_pagnum)
447
+ if link_or_pagnum.isdigit():
448
+ link_or_pagnum = int(link_or_pagnum)
449
+ print(f"The sources origins comes from a PDF")
450
+ pdf_source_back += f"The sources origins comes from a PDF"
451
+ else:
452
+ print(f"Source Directed : {embeds_dict[idx]['link_or_page_number']}")
453
+ url_source_back += f"Source Directed : {embeds_dict[idx]['link_or_page_number']}"
454
+ break
455
+
456
+ else:
457
+ pass
458
+
459
+ if continue_question != "":
460
+ return cont_output_back, source_grabbed_back, pdf_source_back, url_source_back
461
+
462
+ else:
463
+ return output_back, source_grabbed_back, pdf_source_back, url_source_back
464
+
465
+ # Mode of the LLM
466
+ llm_mode = ""
467
+
468
+ # List of files paths for memory
469
+ memory_file_paths = []
470
+
471
+ # first time condition
472
+ first_time = True
473
+
474
+ # Previous 5 questions stored in a dictionary for the memory of LLM
475
+ prev_5_questions_list = []
476
+
477
+ def check_cuda_and_gpu_type():
478
+ # Your logic to check CUDA availability and GPU type
479
+ if torch.cuda.is_available():
480
+ gpu_info = torch.cuda.get_device_name(0) # Get info about first GPU
481
+ return f"CUDA is Available! GPU Info: {gpu_info}"
482
+ else:
483
+ return "CUDA is Not Available."
484
+
485
+
486
+ def bot_comms(input, history):
487
+ """
488
+ Communication between UI on gradio to the rag_gpt model.
489
+ """
490
+ global llm_mode
491
+ global memory_file_paths
492
+ global prev_5_questions_list
493
+ global first_time
494
+
495
+ if input == "cuda info":
496
+ output = check_cuda_and_gpu_type()
497
+ return output
498
+
499
+ state_mode = True
500
+ # Input as 'gen phobos'
501
+ if input == "gen phobos":
502
+ output_text = "Great! Ask me any question. 🦧"
503
+ llm_mode = input
504
+ return output_text
505
+
506
+ if input == "phobos":
507
+ output_text = "Okay! What's your medical questions.⚕️"
508
+ llm_mode = input
509
+ return output_text
510
+
511
+ # Reset memory with command
512
+ if input == "reset memory":
513
+ memory_file_paths = []
514
+ output_text = f"Manually Resetted Memory! 🧠"
515
+ return output_text
516
+
517
+ if llm_mode == "gen phobos":
518
+ # Get the 10 previous file paths
519
+ for path in memory_file_paths:
520
+ with open(path, 'r', encoding='utf-8') as file:
521
+ q_a = file.read()
522
+ # Now we have the q/a in string format
523
+ q_a = str(q_a)
524
+ # Make keys and values for prev dict
525
+ prev_5_questions_list.append(q_a)
526
+
527
+ if first_time:
528
+ state_mode = False
529
+ # Get the previous questions and answers list to pass to rag_gpt to place on base prompt
530
+ gen_gpt_output = rag_gpt(input, previous_quest=[], rag_pipeline=state_mode)
531
+ first_time = False
532
+ else:
533
+ state_mode = False
534
+ gen_gpt_output = rag_gpt(input, previous_quest=prev_5_questions_list, rag_pipeline=state_mode)
535
+
536
+ # reset the memory file_paths
537
+ if len(memory_file_paths) == 5:
538
+ memory_file_paths = []
539
+
540
+ file_path = prev_recent_questions(input_text=input, ai_output=gen_gpt_output)
541
+ memory_file_paths.append(file_path)
542
+
543
+ if llm_mode == "phobos":
544
+ for path in memory_file_paths:
545
+ with open(path, 'r', encoding='utf-8') as file:
546
+ q_a = file.read()
547
+ # Now we have the q/a in string format
548
+ q_a = str(q_a)
549
+ # Make keys and values for prev dict
550
+ prev_5_questions_list.append(q_a)
551
+
552
+ if first_time:
553
+ # Get the previous questions and answers list to pass to rag_gpt to place on base prompt
554
+ rag_output_text = rag_gpt(input, previous_quest=[], rag_pipeline=state_mode)
555
+ first_time = False
556
+ # return jsonify({'output': rag_output_text})
557
+ else:
558
+ rag_output_text = rag_gpt(input, previous_quest=prev_5_questions_list, rag_pipeline=state_mode)
559
+ # return jsonify({'output': rag_output_text})
560
+
561
+ # reset the memory file_paths
562
+ if len(memory_file_paths) == 5:
563
+ memory_file_paths = []
564
+
565
+ file_path = prev_recent_questions(input_text=input, ai_output=rag_output_text)
566
+ memory_file_paths.append(file_path)
567
+
568
+ output = rag_gpt(query=input,
569
+ previous_quest=[],
570
+ rag_pipeline=False)
571
+ formatted_response = "\n".join(output[0].split("\n"))
572
+ return formatted_response
573
+
574
+ # Gradio block
575
+ chatbot=gr.Chatbot(height=725, label='Gradio ChatInterface')
576
+
577
+ with gr.Blocks(fill_height=True) as demo:
578
+ gr.Markdown(DESCRIPTION)
579
+ gr.ChatInterface(
580
+ fn=bot_comms,
581
+ chatbot=chatbot,
582
+ fill_height=True,
583
+ examples=["gen phobos", "phobos", "reset memory", "cuda info"],
584
+ cache_examples=False
585
+ )
586
+
587
+ if __name__ == "__main__":
588
  demo.launch()