Update app.py
Browse files
app.py
CHANGED
|
@@ -1,130 +1,27 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
-
import os
|
| 3 |
-
import json
|
| 4 |
-
import requests
|
| 5 |
-
from langchain_community.document_loaders import PyMuPDFLoader
|
| 6 |
-
from openai import OpenAI
|
| 7 |
-
import tiktoken
|
| 8 |
-
import pandas as pd
|
| 9 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 10 |
-
from langchain_community.embeddings.openai import OpenAIEmbeddings
|
| 11 |
-
from langchain_community.vectorstores import Chroma
|
| 12 |
import tempfile
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
OPENAI_API_KEY = os.environ.get("API_KEY")
|
| 16 |
-
OPENAI_API_BASE = os.environ.get("API_BASE")
|
| 17 |
-
|
| 18 |
-
# Initialize OpenAI client
|
| 19 |
-
client = OpenAI(
|
| 20 |
-
api_key=OPENAI_API_KEY,
|
| 21 |
-
base_url=OPENAI_API_BASE
|
| 22 |
-
)
|
| 23 |
-
|
| 24 |
-
# Define the system prompt for the model
|
| 25 |
-
qna_system_message = """
|
| 26 |
-
You are an AI assistant designed to support professional doctors at St. Bernard's Medical Center. Your task is to provide evidence-based, concise, and relevant medical information to doctors' clinical questions based on the context provided.
|
| 27 |
-
User input will include the necessary context for you to answer their questions. This context will begin with the token: ###Context. The context contains references to specific portions of trusted medical literature and research articles relevant to the query, along with their source details.
|
| 28 |
-
When crafting your response:
|
| 29 |
-
1. Use only the provided context to answer the question.
|
| 30 |
-
2. If the answer is found in the context, respond with concise and actionable medical insights.
|
| 31 |
-
3. Include the source reference with the page number, journal name, or publication, as provided in the context.
|
| 32 |
-
4. If the question is unrelated to the context or the context is empty, clearly respond with: "Sorry, this is out of my knowledge base."
|
| 33 |
-
Please adhere to the following response guidelines:
|
| 34 |
-
- Provide clear, direct answers using only the given context.
|
| 35 |
-
- Do not include any additional information outside of the context.
|
| 36 |
-
- Avoid rephrasing or summarizing the context unless explicitly relevant to the question.
|
| 37 |
-
- If no relevant answer exists in the context, respond with: "Sorry, this is out of my knowledge base."
|
| 38 |
-
- If the context is not provided, your response should also be: "Sorry, this is out of my knowledge base."
|
| 39 |
-
Here is an example of how to structure your response:
|
| 40 |
-
Answer:
|
| 41 |
-
[Answer based on context]
|
| 42 |
-
Source:
|
| 43 |
-
[Source details with page or section]
|
| 44 |
-
"""
|
| 45 |
-
|
| 46 |
-
# Define the user message template
|
| 47 |
-
qna_user_message_template = """
|
| 48 |
-
###Context
|
| 49 |
-
Here are some excerpts from GEN AI Research Paper and their sources that are relevant to the Gen AI question mentioned below:
|
| 50 |
-
{context}
|
| 51 |
-
###Question
|
| 52 |
-
{question}
|
| 53 |
-
"""
|
| 54 |
|
| 55 |
@st.cache_resource
|
| 56 |
-
def
|
| 57 |
-
|
| 58 |
for uploaded_file in uploaded_files:
|
|
|
|
| 59 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
|
| 60 |
tmp_file.write(uploaded_file.getvalue())
|
| 61 |
tmp_file_path = tmp_file.name
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
| 67 |
-
encoding_name='cl100k_base',
|
| 68 |
-
chunk_size=1000,
|
| 69 |
-
)
|
| 70 |
-
document_chunks = text_splitter.split_documents(all_documents)
|
| 71 |
-
|
| 72 |
-
embedding_model = OpenAIEmbeddings(
|
| 73 |
-
openai_api_key=OPENAI_API_KEY,
|
| 74 |
-
openai_api_base=OPENAI_API_BASE
|
| 75 |
-
)
|
| 76 |
|
| 77 |
-
|
| 78 |
-
vectorstore = Chroma.from_documents(
|
| 79 |
-
document_chunks,
|
| 80 |
-
embedding_model
|
| 81 |
-
)
|
| 82 |
-
return vectorstore.as_retriever(search_type='similarity', search_kwargs={'k': 5})
|
| 83 |
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
context_list = [d.page_content for d in relevant_document_chunks]
|
| 88 |
-
|
| 89 |
-
# Combine document chunks into a single context
|
| 90 |
-
context_for_query = ". ".join(context_list)
|
| 91 |
-
|
| 92 |
-
user_message = qna_user_message_template.replace('{context}', context_for_query)
|
| 93 |
-
user_message = user_message.replace('{question}', user_input)
|
| 94 |
-
|
| 95 |
-
# Generate the response
|
| 96 |
-
try:
|
| 97 |
-
response = client.chat.completions.create(
|
| 98 |
-
model="gpt-4o-mini",
|
| 99 |
-
messages=[
|
| 100 |
-
{"role": "system", "content": qna_system_message},
|
| 101 |
-
{"role": "user", "content": user_message}
|
| 102 |
-
],
|
| 103 |
-
max_tokens=max_tokens,
|
| 104 |
-
temperature=temperature,
|
| 105 |
-
top_p=top_p
|
| 106 |
-
)
|
| 107 |
-
response = response.choices[0].message.content.strip()
|
| 108 |
-
except Exception as e:
|
| 109 |
-
response = f'Sorry, I encountered the following error: \n {e}'
|
| 110 |
-
|
| 111 |
-
return response
|
| 112 |
-
|
| 113 |
-
# Streamlit App
|
| 114 |
-
st.title("LLM-Powered Research Assistant")
|
| 115 |
-
|
| 116 |
-
uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)
|
| 117 |
|
| 118 |
-
retriever = None
|
| 119 |
if uploaded_files:
|
| 120 |
st.info("Processing uploaded PDFs...")
|
| 121 |
-
|
| 122 |
-
st.success("
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
if retriever:
|
| 126 |
-
user_question = st.text_input("Ask a question about the uploaded documents:")
|
| 127 |
-
if user_question:
|
| 128 |
-
with st.spinner("Generating response..."):
|
| 129 |
-
rag_response = generate_rag_response(user_question, retriever)
|
| 130 |
-
st.write(rag_response)
|
|
|
|
| 1 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import tempfile
|
| 3 |
+
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
@st.cache_resource
|
| 6 |
+
def process_pdfs(uploaded_files):
|
| 7 |
+
file_names = []
|
| 8 |
for uploaded_file in uploaded_files:
|
| 9 |
+
# Save uploaded file to a temp file
|
| 10 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
|
| 11 |
tmp_file.write(uploaded_file.getvalue())
|
| 12 |
tmp_file_path = tmp_file.name
|
| 13 |
+
file_names.append(uploaded_file.name)
|
| 14 |
+
# Clean up immediately
|
| 15 |
+
os.remove(tmp_file_path)
|
| 16 |
+
return file_names
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
+
st.title("PDF Upload Test")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
+
uploaded_files = st.file_uploader(
|
| 21 |
+
"Upload PDF files", type=["pdf"], accept_multiple_files=True
|
| 22 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
|
|
|
| 24 |
if uploaded_files:
|
| 25 |
st.info("Processing uploaded PDFs...")
|
| 26 |
+
file_names = process_pdfs(uploaded_files)
|
| 27 |
+
st.success(f"Uploaded {len(file_names)} file(s): {file_names}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|