cryogenic22 commited on
Commit
48e07bb
·
verified ·
1 Parent(s): 5cf581b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -10
app.py CHANGED
@@ -19,6 +19,8 @@ from googleapiclient.http import MediaIoBaseDownload
19
  from google.oauth2 import service_account
20
  import tempfile
21
  import os
 
 
22
 
23
 
24
  # SQLite Database Functions (database.py)
@@ -88,12 +90,16 @@ def upload_and_parse_documents(documents):
88
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
89
  for doc in documents:
90
  try:
 
 
 
 
91
  # Create a temporary file
92
  with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
93
  tmp_file.write(doc.read())
94
  tmp_file_path = tmp_file.name
95
 
96
- loader = PyPDFLoader(tmp_file_path) # Use the temporary file path
97
  pages = loader.load()
98
  document_names.append(doc.name)
99
  page_contents = []
@@ -104,13 +110,12 @@ def upload_and_parse_documents(documents):
104
  document_pages.append(page_contents)
105
 
106
  # Remove the temporary file
107
- os.remove(tmp_file_path)
108
 
109
  except Exception as e:
110
  st.error(f"Error parsing document {doc.name}: {e}")
111
  return all_texts, document_names, document_pages
112
 
113
-
114
  @st.cache_data
115
  def parse_pdf_from_url(url):
116
  try:
@@ -177,16 +182,19 @@ def get_embeddings_model():
177
  return None
178
 
179
  # QA System Initialization (qa_system.py)
 
180
  @st.cache_resource
181
- def initialize_qa_system(_vector_store): # Add a leading underscore to 'vector_store'
182
  try:
 
 
 
 
 
 
183
  qa_pipeline = RetrievalQA.from_chain_type(
184
- llm=pipeline(
185
- "text-davinci-003",
186
- model="gpt-4",
187
- api_key=os.environ.get('OPENAI_API_KEY'),
188
- prompt_template="Extract the specific details relevant to the query accurately from the document without adding additional information that is not present in the text. Provide concise, clear responses that stay within the boundaries of the document's content."),
189
- retriever=_vector_store.as_retriever() # Use '_vector_store' here as well
190
  )
191
  return qa_pipeline
192
  except Exception as e:
 
19
  from google.oauth2 import service_account
20
  import tempfile
21
  import os
22
+ from langchain.llms import OpenAI # Import the OpenAI class
23
+
24
 
25
 
26
  # SQLite Database Functions (database.py)
 
90
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
91
  for doc in documents:
92
  try:
93
+ if doc.name in document_names:
94
+ st.warning(f"Duplicate file name detected: {doc.name}. This file will be ignored.", icon="⚠️")
95
+ continue # Skip to the next file
96
+
97
  # Create a temporary file
98
  with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
99
  tmp_file.write(doc.read())
100
  tmp_file_path = tmp_file.name
101
 
102
+ loader = PyPDFLoader(tmp_file_path)
103
  pages = loader.load()
104
  document_names.append(doc.name)
105
  page_contents = []
 
110
  document_pages.append(page_contents)
111
 
112
  # Remove the temporary file
113
+ os.remove(tmp_file_path)
114
 
115
  except Exception as e:
116
  st.error(f"Error parsing document {doc.name}: {e}")
117
  return all_texts, document_names, document_pages
118
 
 
119
  @st.cache_data
120
  def parse_pdf_from_url(url):
121
  try:
 
182
  return None
183
 
184
  # QA System Initialization (qa_system.py)
185
+
186
  @st.cache_resource
187
+ def initialize_qa_system(_vector_store):
188
  try:
189
+ llm = OpenAI(
190
+ model_name="gpt-4", # Or another OpenAI model like "text-davinci-003"
191
+ api_key=st.secrets["OPENAI_API_KEY"],
192
+ prompt_template="Extract the specific details relevant to the query accurately from the document without adding additional information that is not present in the text. Provide concise, clear responses that stay within the boundaries of the document's content."
193
+ )
194
+
195
  qa_pipeline = RetrievalQA.from_chain_type(
196
+ llm=llm,
197
+ retriever=_vector_store.as_retriever()
 
 
 
 
198
  )
199
  return qa_pipeline
200
  except Exception as e: