RFP_Analyzer_Agent_backup

Build error

App Files Files Community

cryogenic22 commited on Dec 1, 2024

Commit

c7f45b3

verified ·

1 Parent(s): fa9d843

Update backend.py

Browse files

Files changed (1) hide show

backend.py +54 -25

backend.py CHANGED Viewed

@@ -23,7 +23,10 @@ from langchain.llms import OpenAI  # Import the OpenAI class
 from langchain.chat_models import ChatOpenAI  # Import ChatOpenAI
 from langchain.memory import ConversationBufferMemory
 from langchain.agents import create_openai_tools_agent, AgentExecutor
 # SQLite Database Functions (database.py)
@@ -35,18 +38,19 @@ def create_connection(db_file):
         st.error(f"Error: {e}")
     return None
 def create_tables(conn):
     try:
-        sql_create_documents_table = '''
         CREATE TABLE IF NOT EXISTS documents (
             id INTEGER PRIMARY KEY AUTOINCREMENT,
             name TEXT NOT NULL,
             content TEXT NOT NULL,
             upload_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP
         );
-        '''
-        sql_create_queries_table = '''
         CREATE TABLE IF NOT EXISTS queries (
             id INTEGER PRIMARY KEY AUTOINCREMENT,
             query TEXT NOT NULL,
@@ -55,9 +59,9 @@ def create_tables(conn):
             query_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
             FOREIGN KEY (document_id) REFERENCES documents (id)
         );
-        '''
-        sql_create_annotations_table = '''
         CREATE TABLE IF NOT EXISTS annotations (
             id INTEGER PRIMARY KEY AUTOINCREMENT,
             document_id INTEGER NOT NULL,
@@ -66,8 +70,8 @@ def create_tables(conn):
             annotation_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
             FOREIGN KEY (document_id) REFERENCES documents (id)
         );
-        '''
         c = conn.cursor()
         c.execute(sql_create_documents_table)
         c.execute(sql_create_queries_table)
@@ -75,15 +79,21 @@ def create_tables(conn):
     except Error as e:
         st.error(f"Error: {e}")
 # FAISS Initialization (faiss_initialization.py)
 def initialize_faiss(embeddings, documents, document_names):
     try:
-        vector_store = FAISS.from_texts(documents, embeddings, metadatas=[{"source": name} for name in document_names])
         return vector_store
     except Exception as e:
         st.error(f"Error initializing FAISS: {e}")
         return None
 # Document Upload & Parsing Functions (document_parsing.py)
 @st.cache_data
 def upload_and_parse_documents(documents):
@@ -94,7 +104,10 @@ def upload_and_parse_documents(documents):
     for doc in documents:
         try:
             if doc.name in document_names:
-                st.warning(f"Duplicate file name detected: {doc.name}. This file will be ignored.", icon="⚠️")
                 continue  # Skip to the next file
             # Create a temporary file
@@ -118,7 +131,8 @@ def upload_and_parse_documents(documents):
         except Exception as e:
             st.error(f"Error parsing document {doc.name}: {e}")
     return all_texts, document_names, document_pages
 @st.cache_data
 def parse_pdf_from_url(url):
     try:
@@ -130,7 +144,9 @@ def parse_pdf_from_url(url):
         pages = loader.load()
         all_texts = []
         document_name = url.split("/")[-1]
-        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
         for page in pages:
             chunks = text_splitter.split_text(page.page_content)
             all_texts.extend(chunks)
@@ -142,15 +158,16 @@ def parse_pdf_from_url(url):
         st.error(f"Error parsing PDF from URL: {e}")
         return None, None
 @st.cache_data
 def parse_pdf_from_google_drive(file_id):
     try:
         # Authenticate and create the drive service
         credentials = service_account.Credentials.from_service_account_info(
             st.secrets["gdrive_service_account"],
-            scopes=["https://www.googleapis.com/auth/drive"]
         )
-        service = build('drive', 'v3', credentials=credentials)
         request = service.files().get_media(fileId=file_id)
         fh = BytesIO()
         downloader = MediaIoBaseDownload(fh, request)
@@ -164,7 +181,9 @@ def parse_pdf_from_google_drive(file_id):
         pages = loader.load()
         all_texts = []
         document_name = f"GoogleDrive_{file_id}.pdf"
-        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
         for page in pages:
             chunks = text_splitter.split_text(page.page_content)
             all_texts.extend(chunks)
@@ -173,6 +192,7 @@ def parse_pdf_from_google_drive(file_id):
         st.error(f"Error downloading PDF from Google Drive: {e}")
         return None, None
 # Embeddings for Semantic Search (embeddings.py)
 @st.cache_resource
 def get_embeddings_model():
@@ -184,6 +204,7 @@ def get_embeddings_model():
         st.error(f"Error loading embeddings model: {e}")
         return None
 # QA System Initialization (qa_system.py)
@@ -193,29 +214,37 @@ def initialize_qa_system(_vector_store):
         llm = ChatOpenAI(
             temperature=0,
             model_name="gpt-4",  # Or another OpenAI model like "gpt-3.5-turbo"
-            api_key=os.environ.get('OPENAI_API_KEY'),
         )
         # Define the prompt template
-        prompt = ChatPromptTemplate.from_messages([
-            ("system", "You are a helpful assistant"),
-            MessagesPlaceholder(variable_name="chat_history"),
-            ("human", "{input}"),
-        ])
         # Define the tools
         tools = [
             Tool(
                 name="Search",
-                func=_vector_store.as_retriever(search_kwargs={"k": 2}).get_relevant_documents,
                 description="useful for when you need to answer questions about the documents you have been uploaded. Input should be a fully formed question.",
             )
         ]
         # Create the agent and executor
         agent = create_openai_tools_agent(llm=llm, tools=tools, prompt=prompt)
-        agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, memory=ConversationBufferMemory(memory_key="chat_history"))
         return agent_executor  # Return the agent executor
     except Exception as e:

 from langchain.chat_models import ChatOpenAI  # Import ChatOpenAI
 from langchain.memory import ConversationBufferMemory
 from langchain.agents import create_openai_tools_agent, AgentExecutor
+from langchain.prompts import (
+    ChatPromptTemplate,
+    MessagesPlaceholder,
+)  # Import necessary classes
 # SQLite Database Functions (database.py)
         st.error(f"Error: {e}")
     return None
 def create_tables(conn):
     try:
+        sql_create_documents_table = """
         CREATE TABLE IF NOT EXISTS documents (
             id INTEGER PRIMARY KEY AUTOINCREMENT,
             name TEXT NOT NULL,
             content TEXT NOT NULL,
             upload_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP
         );
+        """
+        sql_create_queries_table = """
         CREATE TABLE IF NOT EXISTS queries (
             id INTEGER PRIMARY KEY AUTOINCREMENT,
             query TEXT NOT NULL,
             query_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
             FOREIGN KEY (document_id) REFERENCES documents (id)
         );
+        """
+        sql_create_annotations_table = """
         CREATE TABLE IF NOT EXISTS annotations (
             id INTEGER PRIMARY KEY AUTOINCREMENT,
             document_id INTEGER NOT NULL,
             annotation_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
             FOREIGN KEY (document_id) REFERENCES documents (id)
         );
+        """
         c = conn.cursor()
         c.execute(sql_create_documents_table)
         c.execute(sql_create_queries_table)
     except Error as e:
         st.error(f"Error: {e}")
 # FAISS Initialization (faiss_initialization.py)
 def initialize_faiss(embeddings, documents, document_names):
     try:
+        vector_store = FAISS.from_texts(
+            documents,
+            embeddings,
+            metadatas=[{"source": name} for name in document_names],
+        )
         return vector_store
     except Exception as e:
         st.error(f"Error initializing FAISS: {e}")
         return None
 # Document Upload & Parsing Functions (document_parsing.py)
 @st.cache_data
 def upload_and_parse_documents(documents):
     for doc in documents:
         try:
             if doc.name in document_names:
+                st.warning(
+                    f"Duplicate file name detected: {doc.name}. This file will be ignored.",
+                    icon="⚠️",
+                )
                 continue  # Skip to the next file
             # Create a temporary file
         except Exception as e:
             st.error(f"Error parsing document {doc.name}: {e}")
     return all_texts, document_names, document_pages
 @st.cache_data
 def parse_pdf_from_url(url):
     try:
         pages = loader.load()
         all_texts = []
         document_name = url.split("/")[-1]
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000, chunk_overlap=100
+        )
         for page in pages:
             chunks = text_splitter.split_text(page.page_content)
             all_texts.extend(chunks)
         st.error(f"Error parsing PDF from URL: {e}")
         return None, None
 @st.cache_data
 def parse_pdf_from_google_drive(file_id):
     try:
         # Authenticate and create the drive service
         credentials = service_account.Credentials.from_service_account_info(
             st.secrets["gdrive_service_account"],
+            scopes=["https://www.googleapis.com/auth/drive"],
         )
+        service = build("drive", "v3", credentials=credentials)
         request = service.files().get_media(fileId=file_id)
         fh = BytesIO()
         downloader = MediaIoBaseDownload(fh, request)
         pages = loader.load()
         all_texts = []
         document_name = f"GoogleDrive_{file_id}.pdf"
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000, chunk_overlap=100
+        )
         for page in pages:
             chunks = text_splitter.split_text(page.page_content)
             all_texts.extend(chunks)
         st.error(f"Error downloading PDF from Google Drive: {e}")
         return None, None
 # Embeddings for Semantic Search (embeddings.py)
 @st.cache_resource
 def get_embeddings_model():
         st.error(f"Error loading embeddings model: {e}")
         return None
 # QA System Initialization (qa_system.py)
         llm = ChatOpenAI(
             temperature=0,
             model_name="gpt-4",  # Or another OpenAI model like "gpt-3.5-turbo"
+            api_key=os.environ.get("OPENAI_API_KEY"),
         )
         # Define the prompt template
+        prompt = ChatPromptTemplate.from_messages(
+            [
+                ("system", "You are a helpful assistant"),
+                MessagesPlaceholder(variable_name="chat_history"),
+                ("human", "{input}"),
+            ]
+        )
         # Define the tools
         tools = [
             Tool(
                 name="Search",
+                func=_vector_store.as_retriever(
+                    search_kwargs={"k": 2}
+                ).get_relevant_documents,
                 description="useful for when you need to answer questions about the documents you have been uploaded. Input should be a fully formed question.",
             )
         ]
         # Create the agent and executor
         agent = create_openai_tools_agent(llm=llm, tools=tools, prompt=prompt)
+        agent_executor = AgentExecutor(
+            agent=agent,
+            tools=tools,
+            verbose=True,
+            memory=ConversationBufferMemory(memory_key="chat_history"),
+        )
         return agent_executor  # Return the agent executor
     except Exception as e: