mohith96 commited on
Commit
55a80e5
·
verified ·
1 Parent(s): 9f2336f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -6
app.py CHANGED
@@ -1,15 +1,130 @@
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  @st.cache_resource
4
- def process_pdfs(files):
5
- # your expensive PDF parsing/embedding logic
6
- return [f.name for f in files] # placeholder
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
- uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- st.write("DEBUG:", uploaded_files) # see what's happening
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  retriever = None
13
  if uploaded_files:
14
  st.info("Processing uploaded PDFs...")
15
- retriever = process_pdfs(uploaded_files)
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import os
3
+ import json
4
+ import requests
5
+ from langchain_community.document_loaders import PyMuPDFLoader
6
+ from openai import OpenAI
7
+ import tiktoken
8
+ import pandas as pd
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ from langchain_community.embeddings.openai import OpenAIEmbeddings
11
+ from langchain_community.vectorstores import Chroma
12
+ import tempfile
13
+
14
+
15
+ OPENAI_API_KEY = os.environ.get("API_KEY")
16
+ OPENAI_API_BASE = os.environ.get("API_BASE")
17
+
18
+ # Initialize OpenAI client
19
+ client = OpenAI(
20
+ api_key=OPENAI_API_KEY,
21
+ base_url=OPENAI_API_BASE
22
+ )
23
+
24
+ # Define the system prompt for the model
25
+ qna_system_message = """
26
+ You are an AI assistant designed to support professional doctors at St. Bernard's Medical Center. Your task is to provide evidence-based, concise, and relevant medical information to doctors' clinical questions based on the context provided.
27
+ User input will include the necessary context for you to answer their questions. This context will begin with the token: ###Context. The context contains references to specific portions of trusted medical literature and research articles relevant to the query, along with their source details.
28
+ When crafting your response:
29
+ 1. Use only the provided context to answer the question.
30
+ 2. If the answer is found in the context, respond with concise and actionable medical insights.
31
+ 3. Include the source reference with the page number, journal name, or publication, as provided in the context.
32
+ 4. If the question is unrelated to the context or the context is empty, clearly respond with: "Sorry, this is out of my knowledge base."
33
+ Please adhere to the following response guidelines:
34
+ - Provide clear, direct answers using only the given context.
35
+ - Do not include any additional information outside of the context.
36
+ - Avoid rephrasing or summarizing the context unless explicitly relevant to the question.
37
+ - If no relevant answer exists in the context, respond with: "Sorry, this is out of my knowledge base."
38
+ - If the context is not provided, your response should also be: "Sorry, this is out of my knowledge base."
39
+ Here is an example of how to structure your response:
40
+ Answer:
41
+ [Answer based on context]
42
+ Source:
43
+ [Source details with page or section]
44
+ """
45
+
46
+ # Define the user message template
47
+ qna_user_message_template = """
48
+ ###Context
49
+ Here are some excerpts from GEN AI Research Paper and their sources that are relevant to the Gen AI question mentioned below:
50
+ {context}
51
+ ###Question
52
+ {question}
53
+ """
54
 
55
  @st.cache_resource
56
+ def load_and_process_pdfs(uploaded_files):
57
+ all_documents = []
58
+ for uploaded_file in uploaded_files:
59
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
60
+ tmp_file.write(uploaded_file.getvalue())
61
+ tmp_file_path = tmp_file.name
62
+ loader = PyMuPDFLoader(tmp_file_path)
63
+ documents = loader.load()
64
+ all_documents.extend(documents)
65
+ os.remove(tmp_file_path) # Clean up the temporary file
66
+ text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
67
+ encoding_name='cl100k_base',
68
+ chunk_size=1000,
69
+ )
70
+ document_chunks = text_splitter.split_documents(all_documents)
71
 
72
+ embedding_model = OpenAIEmbeddings(
73
+ openai_api_key=OPENAI_API_KEY,
74
+ openai_api_base=OPENAI_API_BASE
75
+ )
76
+
77
+ # Create an in-memory vector store (or use a persistent one if needed)
78
+ vectorstore = Chroma.from_documents(
79
+ document_chunks,
80
+ embedding_model
81
+ )
82
+ return vectorstore.as_retriever(search_type='similarity', search_kwargs={'k': 5})
83
+
84
+ def generate_rag_response(user_input, retriever, max_tokens=500, temperature=0, top_p=0.95):
85
+ # Retrieve relevant document chunks
86
+ relevant_document_chunks = retriever.get_relevant_documents(query=user_input)
87
+ context_list = [d.page_content for d in relevant_document_chunks]
88
 
89
+ # Combine document chunks into a single context
90
+ context_for_query = ". ".join(context_list)
91
+
92
+ user_message = qna_user_message_template.replace('{context}', context_for_query)
93
+ user_message = user_message.replace('{question}', user_input)
94
+
95
+ # Generate the response
96
+ try:
97
+ response = client.chat.completions.create(
98
+ model="gpt-4o-mini",
99
+ messages=[
100
+ {"role": "system", "content": qna_system_message},
101
+ {"role": "user", "content": user_message}
102
+ ],
103
+ max_tokens=max_tokens,
104
+ temperature=temperature,
105
+ top_p=top_p
106
+ )
107
+ response = response.choices[0].message.content.strip()
108
+ except Exception as e:
109
+ response = f'Sorry, I encountered the following error: \n {e}'
110
+
111
+ return response
112
+
113
+ # Streamlit App
114
+ st.title("LLM-Powered Research Assistant")
115
+
116
+ uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)
117
 
118
  retriever = None
119
  if uploaded_files:
120
  st.info("Processing uploaded PDFs...")
121
+ retriever = load_and_process_pdfs(uploaded_files)
122
+ st.success("PDFs processed and ready for questioning!")
123
+
124
+
125
+ if retriever:
126
+ user_question = st.text_input("Ask a question about the uploaded documents:")
127
+ if user_question:
128
+ with st.spinner("Generating response..."):
129
+ rag_response = generate_rag_response(user_question, retriever)
130
+ st.write(rag_response)