RFP_Analyzer_Agent

Paused

App Files Files Community

cryogenic22 commited on Nov 30, 2024

Commit

48e07bb

verified ·

1 Parent(s): 5cf581b

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -10

app.py CHANGED Viewed

@@ -19,6 +19,8 @@ from googleapiclient.http import MediaIoBaseDownload
 from google.oauth2 import service_account
 import tempfile
 import os
 # SQLite Database Functions (database.py)
@@ -88,12 +90,16 @@ def upload_and_parse_documents(documents):
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
     for doc in documents:
         try:
             # Create a temporary file
             with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
                 tmp_file.write(doc.read())
                 tmp_file_path = tmp_file.name
-            loader = PyPDFLoader(tmp_file_path) # Use the temporary file path
             pages = loader.load()
             document_names.append(doc.name)
             page_contents = []
@@ -104,13 +110,12 @@ def upload_and_parse_documents(documents):
             document_pages.append(page_contents)
             # Remove the temporary file
-            os.remove(tmp_file_path)
         except Exception as e:
             st.error(f"Error parsing document {doc.name}: {e}")
     return all_texts, document_names, document_pages
 @st.cache_data
 def parse_pdf_from_url(url):
     try:
@@ -177,16 +182,19 @@ def get_embeddings_model():
         return None
 # QA System Initialization (qa_system.py)
 @st.cache_resource
-def initialize_qa_system(_vector_store):  # Add a leading underscore to 'vector_store'
     try:
         qa_pipeline = RetrievalQA.from_chain_type(
-            llm=pipeline(
-                "text-davinci-003",
-                model="gpt-4",
-                api_key=os.environ.get('OPENAI_API_KEY'),
-                prompt_template="Extract the specific details relevant to the query accurately from the document without adding additional information that is not present in the text. Provide concise, clear responses that stay within the boundaries of the document's content."),
-            retriever=_vector_store.as_retriever()  # Use '_vector_store' here as well
         )
         return qa_pipeline
     except Exception as e:

 from google.oauth2 import service_account
 import tempfile
 import os
+from langchain.llms import OpenAI  # Import the OpenAI class
 # SQLite Database Functions (database.py)
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
     for doc in documents:
         try:
+            if doc.name in document_names:
+                st.warning(f"Duplicate file name detected: {doc.name}. This file will be ignored.", icon="⚠️")
+                continue  # Skip to the next file
             # Create a temporary file
             with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
                 tmp_file.write(doc.read())
                 tmp_file_path = tmp_file.name
+            loader = PyPDFLoader(tmp_file_path)
             pages = loader.load()
             document_names.append(doc.name)
             page_contents = []
             document_pages.append(page_contents)
             # Remove the temporary file
+            os.remove(tmp_file_path)
         except Exception as e:
             st.error(f"Error parsing document {doc.name}: {e}")
     return all_texts, document_names, document_pages
 @st.cache_data
 def parse_pdf_from_url(url):
     try:
         return None
 # QA System Initialization (qa_system.py)
 @st.cache_resource
+def initialize_qa_system(_vector_store):
     try:
+        llm = OpenAI(
+            model_name="gpt-4",  # Or another OpenAI model like "text-davinci-003"
+            api_key=st.secrets["OPENAI_API_KEY"],
+            prompt_template="Extract the specific details relevant to the query accurately from the document without adding additional information that is not present in the text. Provide concise, clear responses that stay within the boundaries of the document's content."
+        )
         qa_pipeline = RetrievalQA.from_chain_type(
+            llm=llm,
+            retriever=_vector_store.as_retriever()
         )
         return qa_pipeline
     except Exception as e: