mohith96 commited on
Commit
1daeb77
·
verified ·
1 Parent(s): 55a80e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -117
app.py CHANGED
@@ -1,130 +1,27 @@
1
  import streamlit as st
2
- import os
3
- import json
4
- import requests
5
- from langchain_community.document_loaders import PyMuPDFLoader
6
- from openai import OpenAI
7
- import tiktoken
8
- import pandas as pd
9
- from langchain.text_splitter import RecursiveCharacterTextSplitter
10
- from langchain_community.embeddings.openai import OpenAIEmbeddings
11
- from langchain_community.vectorstores import Chroma
12
  import tempfile
13
-
14
-
15
- OPENAI_API_KEY = os.environ.get("API_KEY")
16
- OPENAI_API_BASE = os.environ.get("API_BASE")
17
-
18
- # Initialize OpenAI client
19
- client = OpenAI(
20
- api_key=OPENAI_API_KEY,
21
- base_url=OPENAI_API_BASE
22
- )
23
-
24
- # Define the system prompt for the model
25
- qna_system_message = """
26
- You are an AI assistant designed to support professional doctors at St. Bernard's Medical Center. Your task is to provide evidence-based, concise, and relevant medical information to doctors' clinical questions based on the context provided.
27
- User input will include the necessary context for you to answer their questions. This context will begin with the token: ###Context. The context contains references to specific portions of trusted medical literature and research articles relevant to the query, along with their source details.
28
- When crafting your response:
29
- 1. Use only the provided context to answer the question.
30
- 2. If the answer is found in the context, respond with concise and actionable medical insights.
31
- 3. Include the source reference with the page number, journal name, or publication, as provided in the context.
32
- 4. If the question is unrelated to the context or the context is empty, clearly respond with: "Sorry, this is out of my knowledge base."
33
- Please adhere to the following response guidelines:
34
- - Provide clear, direct answers using only the given context.
35
- - Do not include any additional information outside of the context.
36
- - Avoid rephrasing or summarizing the context unless explicitly relevant to the question.
37
- - If no relevant answer exists in the context, respond with: "Sorry, this is out of my knowledge base."
38
- - If the context is not provided, your response should also be: "Sorry, this is out of my knowledge base."
39
- Here is an example of how to structure your response:
40
- Answer:
41
- [Answer based on context]
42
- Source:
43
- [Source details with page or section]
44
- """
45
-
46
- # Define the user message template
47
- qna_user_message_template = """
48
- ###Context
49
- Here are some excerpts from GEN AI Research Paper and their sources that are relevant to the Gen AI question mentioned below:
50
- {context}
51
- ###Question
52
- {question}
53
- """
54
 
55
  @st.cache_resource
56
- def load_and_process_pdfs(uploaded_files):
57
- all_documents = []
58
  for uploaded_file in uploaded_files:
 
59
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
60
  tmp_file.write(uploaded_file.getvalue())
61
  tmp_file_path = tmp_file.name
62
- loader = PyMuPDFLoader(tmp_file_path)
63
- documents = loader.load()
64
- all_documents.extend(documents)
65
- os.remove(tmp_file_path) # Clean up the temporary file
66
- text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
67
- encoding_name='cl100k_base',
68
- chunk_size=1000,
69
- )
70
- document_chunks = text_splitter.split_documents(all_documents)
71
-
72
- embedding_model = OpenAIEmbeddings(
73
- openai_api_key=OPENAI_API_KEY,
74
- openai_api_base=OPENAI_API_BASE
75
- )
76
 
77
- # Create an in-memory vector store (or use a persistent one if needed)
78
- vectorstore = Chroma.from_documents(
79
- document_chunks,
80
- embedding_model
81
- )
82
- return vectorstore.as_retriever(search_type='similarity', search_kwargs={'k': 5})
83
 
84
- def generate_rag_response(user_input, retriever, max_tokens=500, temperature=0, top_p=0.95):
85
- # Retrieve relevant document chunks
86
- relevant_document_chunks = retriever.get_relevant_documents(query=user_input)
87
- context_list = [d.page_content for d in relevant_document_chunks]
88
-
89
- # Combine document chunks into a single context
90
- context_for_query = ". ".join(context_list)
91
-
92
- user_message = qna_user_message_template.replace('{context}', context_for_query)
93
- user_message = user_message.replace('{question}', user_input)
94
-
95
- # Generate the response
96
- try:
97
- response = client.chat.completions.create(
98
- model="gpt-4o-mini",
99
- messages=[
100
- {"role": "system", "content": qna_system_message},
101
- {"role": "user", "content": user_message}
102
- ],
103
- max_tokens=max_tokens,
104
- temperature=temperature,
105
- top_p=top_p
106
- )
107
- response = response.choices[0].message.content.strip()
108
- except Exception as e:
109
- response = f'Sorry, I encountered the following error: \n {e}'
110
-
111
- return response
112
-
113
- # Streamlit App
114
- st.title("LLM-Powered Research Assistant")
115
-
116
- uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)
117
 
118
- retriever = None
119
  if uploaded_files:
120
  st.info("Processing uploaded PDFs...")
121
- retriever = load_and_process_pdfs(uploaded_files)
122
- st.success("PDFs processed and ready for questioning!")
123
-
124
-
125
- if retriever:
126
- user_question = st.text_input("Ask a question about the uploaded documents:")
127
- if user_question:
128
- with st.spinner("Generating response..."):
129
- rag_response = generate_rag_response(user_question, retriever)
130
- st.write(rag_response)
 
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
2
  import tempfile
3
+ import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  @st.cache_resource
6
+ def process_pdfs(uploaded_files):
7
+ file_names = []
8
  for uploaded_file in uploaded_files:
9
+ # Save uploaded file to a temp file
10
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
11
  tmp_file.write(uploaded_file.getvalue())
12
  tmp_file_path = tmp_file.name
13
+ file_names.append(uploaded_file.name)
14
+ # Clean up immediately
15
+ os.remove(tmp_file_path)
16
+ return file_names
 
 
 
 
 
 
 
 
 
 
17
 
18
+ st.title("PDF Upload Test")
 
 
 
 
 
19
 
20
+ uploaded_files = st.file_uploader(
21
+ "Upload PDF files", type=["pdf"], accept_multiple_files=True
22
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
 
24
  if uploaded_files:
25
  st.info("Processing uploaded PDFs...")
26
+ file_names = process_pdfs(uploaded_files)
27
+ st.success(f"Uploaded {len(file_names)} file(s): {file_names}")