cryogenic22 commited on
Commit
c7f45b3
·
verified ·
1 Parent(s): fa9d843

Update backend.py

Browse files
Files changed (1) hide show
  1. backend.py +54 -25
backend.py CHANGED
@@ -23,7 +23,10 @@ from langchain.llms import OpenAI # Import the OpenAI class
23
  from langchain.chat_models import ChatOpenAI # Import ChatOpenAI
24
  from langchain.memory import ConversationBufferMemory
25
  from langchain.agents import create_openai_tools_agent, AgentExecutor
26
-
 
 
 
27
 
28
 
29
  # SQLite Database Functions (database.py)
@@ -35,18 +38,19 @@ def create_connection(db_file):
35
  st.error(f"Error: {e}")
36
  return None
37
 
 
38
  def create_tables(conn):
39
  try:
40
- sql_create_documents_table = '''
41
  CREATE TABLE IF NOT EXISTS documents (
42
  id INTEGER PRIMARY KEY AUTOINCREMENT,
43
  name TEXT NOT NULL,
44
  content TEXT NOT NULL,
45
  upload_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP
46
  );
47
- '''
48
 
49
- sql_create_queries_table = '''
50
  CREATE TABLE IF NOT EXISTS queries (
51
  id INTEGER PRIMARY KEY AUTOINCREMENT,
52
  query TEXT NOT NULL,
@@ -55,9 +59,9 @@ def create_tables(conn):
55
  query_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
56
  FOREIGN KEY (document_id) REFERENCES documents (id)
57
  );
58
- '''
59
-
60
- sql_create_annotations_table = '''
61
  CREATE TABLE IF NOT EXISTS annotations (
62
  id INTEGER PRIMARY KEY AUTOINCREMENT,
63
  document_id INTEGER NOT NULL,
@@ -66,8 +70,8 @@ def create_tables(conn):
66
  annotation_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
67
  FOREIGN KEY (document_id) REFERENCES documents (id)
68
  );
69
- '''
70
-
71
  c = conn.cursor()
72
  c.execute(sql_create_documents_table)
73
  c.execute(sql_create_queries_table)
@@ -75,15 +79,21 @@ def create_tables(conn):
75
  except Error as e:
76
  st.error(f"Error: {e}")
77
 
 
78
  # FAISS Initialization (faiss_initialization.py)
79
  def initialize_faiss(embeddings, documents, document_names):
80
  try:
81
- vector_store = FAISS.from_texts(documents, embeddings, metadatas=[{"source": name} for name in document_names])
 
 
 
 
82
  return vector_store
83
  except Exception as e:
84
  st.error(f"Error initializing FAISS: {e}")
85
  return None
86
 
 
87
  # Document Upload & Parsing Functions (document_parsing.py)
88
  @st.cache_data
89
  def upload_and_parse_documents(documents):
@@ -94,7 +104,10 @@ def upload_and_parse_documents(documents):
94
  for doc in documents:
95
  try:
96
  if doc.name in document_names:
97
- st.warning(f"Duplicate file name detected: {doc.name}. This file will be ignored.", icon="⚠️")
 
 
 
98
  continue # Skip to the next file
99
 
100
  # Create a temporary file
@@ -118,7 +131,8 @@ def upload_and_parse_documents(documents):
118
  except Exception as e:
119
  st.error(f"Error parsing document {doc.name}: {e}")
120
  return all_texts, document_names, document_pages
121
-
 
122
  @st.cache_data
123
  def parse_pdf_from_url(url):
124
  try:
@@ -130,7 +144,9 @@ def parse_pdf_from_url(url):
130
  pages = loader.load()
131
  all_texts = []
132
  document_name = url.split("/")[-1]
133
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
 
 
134
  for page in pages:
135
  chunks = text_splitter.split_text(page.page_content)
136
  all_texts.extend(chunks)
@@ -142,15 +158,16 @@ def parse_pdf_from_url(url):
142
  st.error(f"Error parsing PDF from URL: {e}")
143
  return None, None
144
 
 
145
  @st.cache_data
146
  def parse_pdf_from_google_drive(file_id):
147
  try:
148
  # Authenticate and create the drive service
149
  credentials = service_account.Credentials.from_service_account_info(
150
  st.secrets["gdrive_service_account"],
151
- scopes=["https://www.googleapis.com/auth/drive"]
152
  )
153
- service = build('drive', 'v3', credentials=credentials)
154
  request = service.files().get_media(fileId=file_id)
155
  fh = BytesIO()
156
  downloader = MediaIoBaseDownload(fh, request)
@@ -164,7 +181,9 @@ def parse_pdf_from_google_drive(file_id):
164
  pages = loader.load()
165
  all_texts = []
166
  document_name = f"GoogleDrive_{file_id}.pdf"
167
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
 
 
168
  for page in pages:
169
  chunks = text_splitter.split_text(page.page_content)
170
  all_texts.extend(chunks)
@@ -173,6 +192,7 @@ def parse_pdf_from_google_drive(file_id):
173
  st.error(f"Error downloading PDF from Google Drive: {e}")
174
  return None, None
175
 
 
176
  # Embeddings for Semantic Search (embeddings.py)
177
  @st.cache_resource
178
  def get_embeddings_model():
@@ -184,6 +204,7 @@ def get_embeddings_model():
184
  st.error(f"Error loading embeddings model: {e}")
185
  return None
186
 
 
187
  # QA System Initialization (qa_system.py)
188
 
189
 
@@ -193,29 +214,37 @@ def initialize_qa_system(_vector_store):
193
  llm = ChatOpenAI(
194
  temperature=0,
195
  model_name="gpt-4", # Or another OpenAI model like "gpt-3.5-turbo"
196
- api_key=os.environ.get('OPENAI_API_KEY'),
197
  )
198
 
199
  # Define the prompt template
200
- prompt = ChatPromptTemplate.from_messages([
201
- ("system", "You are a helpful assistant"),
202
- MessagesPlaceholder(variable_name="chat_history"),  
203
-
204
- ("human", "{input}"),
205
- ])
 
206
 
207
  # Define the tools
208
  tools = [
209
  Tool(
210
  name="Search",
211
- func=_vector_store.as_retriever(search_kwargs={"k": 2}).get_relevant_documents,
 
 
212
  description="useful for when you need to answer questions about the documents you have been uploaded. Input should be a fully formed question.",
213
  )
214
  ]
215
 
216
  # Create the agent and executor
217
  agent = create_openai_tools_agent(llm=llm, tools=tools, prompt=prompt)
218
- agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, memory=ConversationBufferMemory(memory_key="chat_history"))
 
 
 
 
 
219
 
220
  return agent_executor # Return the agent executor
221
  except Exception as e:
 
23
  from langchain.chat_models import ChatOpenAI # Import ChatOpenAI
24
  from langchain.memory import ConversationBufferMemory
25
  from langchain.agents import create_openai_tools_agent, AgentExecutor
26
+ from langchain.prompts import (
27
+ ChatPromptTemplate,
28
+ MessagesPlaceholder,
29
+ ) # Import necessary classes
30
 
31
 
32
  # SQLite Database Functions (database.py)
 
38
  st.error(f"Error: {e}")
39
  return None
40
 
41
+
42
  def create_tables(conn):
43
  try:
44
+ sql_create_documents_table = """
45
  CREATE TABLE IF NOT EXISTS documents (
46
  id INTEGER PRIMARY KEY AUTOINCREMENT,
47
  name TEXT NOT NULL,
48
  content TEXT NOT NULL,
49
  upload_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP
50
  );
51
+ """
52
 
53
+ sql_create_queries_table = """
54
  CREATE TABLE IF NOT EXISTS queries (
55
  id INTEGER PRIMARY KEY AUTOINCREMENT,
56
  query TEXT NOT NULL,
 
59
  query_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
60
  FOREIGN KEY (document_id) REFERENCES documents (id)
61
  );
62
+ """
63
+
64
+ sql_create_annotations_table = """
65
  CREATE TABLE IF NOT EXISTS annotations (
66
  id INTEGER PRIMARY KEY AUTOINCREMENT,
67
  document_id INTEGER NOT NULL,
 
70
  annotation_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
71
  FOREIGN KEY (document_id) REFERENCES documents (id)
72
  );
73
+ """
74
+
75
  c = conn.cursor()
76
  c.execute(sql_create_documents_table)
77
  c.execute(sql_create_queries_table)
 
79
  except Error as e:
80
  st.error(f"Error: {e}")
81
 
82
+
83
  # FAISS Initialization (faiss_initialization.py)
84
  def initialize_faiss(embeddings, documents, document_names):
85
  try:
86
+ vector_store = FAISS.from_texts(
87
+ documents,
88
+ embeddings,
89
+ metadatas=[{"source": name} for name in document_names],
90
+ )
91
  return vector_store
92
  except Exception as e:
93
  st.error(f"Error initializing FAISS: {e}")
94
  return None
95
 
96
+
97
  # Document Upload & Parsing Functions (document_parsing.py)
98
  @st.cache_data
99
  def upload_and_parse_documents(documents):
 
104
  for doc in documents:
105
  try:
106
  if doc.name in document_names:
107
+ st.warning(
108
+ f"Duplicate file name detected: {doc.name}. This file will be ignored.",
109
+ icon="⚠️",
110
+ )
111
  continue # Skip to the next file
112
 
113
  # Create a temporary file
 
131
  except Exception as e:
132
  st.error(f"Error parsing document {doc.name}: {e}")
133
  return all_texts, document_names, document_pages
134
+
135
+
136
  @st.cache_data
137
  def parse_pdf_from_url(url):
138
  try:
 
144
  pages = loader.load()
145
  all_texts = []
146
  document_name = url.split("/")[-1]
147
+ text_splitter = RecursiveCharacterTextSplitter(
148
+ chunk_size=1000, chunk_overlap=100
149
+ )
150
  for page in pages:
151
  chunks = text_splitter.split_text(page.page_content)
152
  all_texts.extend(chunks)
 
158
  st.error(f"Error parsing PDF from URL: {e}")
159
  return None, None
160
 
161
+
162
  @st.cache_data
163
  def parse_pdf_from_google_drive(file_id):
164
  try:
165
  # Authenticate and create the drive service
166
  credentials = service_account.Credentials.from_service_account_info(
167
  st.secrets["gdrive_service_account"],
168
+ scopes=["https://www.googleapis.com/auth/drive"],
169
  )
170
+ service = build("drive", "v3", credentials=credentials)
171
  request = service.files().get_media(fileId=file_id)
172
  fh = BytesIO()
173
  downloader = MediaIoBaseDownload(fh, request)
 
181
  pages = loader.load()
182
  all_texts = []
183
  document_name = f"GoogleDrive_{file_id}.pdf"
184
+ text_splitter = RecursiveCharacterTextSplitter(
185
+ chunk_size=1000, chunk_overlap=100
186
+ )
187
  for page in pages:
188
  chunks = text_splitter.split_text(page.page_content)
189
  all_texts.extend(chunks)
 
192
  st.error(f"Error downloading PDF from Google Drive: {e}")
193
  return None, None
194
 
195
+
196
  # Embeddings for Semantic Search (embeddings.py)
197
  @st.cache_resource
198
  def get_embeddings_model():
 
204
  st.error(f"Error loading embeddings model: {e}")
205
  return None
206
 
207
+
208
  # QA System Initialization (qa_system.py)
209
 
210
 
 
214
  llm = ChatOpenAI(
215
  temperature=0,
216
  model_name="gpt-4", # Or another OpenAI model like "gpt-3.5-turbo"
217
+ api_key=os.environ.get("OPENAI_API_KEY"),
218
  )
219
 
220
  # Define the prompt template
221
+ prompt = ChatPromptTemplate.from_messages(
222
+ [
223
+ ("system", "You are a helpful assistant"),
224
+ MessagesPlaceholder(variable_name="chat_history"),
225
+ ("human", "{input}"),
226
+ ]
227
+ )
228
 
229
  # Define the tools
230
  tools = [
231
  Tool(
232
  name="Search",
233
+ func=_vector_store.as_retriever(
234
+ search_kwargs={"k": 2}
235
+ ).get_relevant_documents,
236
  description="useful for when you need to answer questions about the documents you have been uploaded. Input should be a fully formed question.",
237
  )
238
  ]
239
 
240
  # Create the agent and executor
241
  agent = create_openai_tools_agent(llm=llm, tools=tools, prompt=prompt)
242
+ agent_executor = AgentExecutor(
243
+ agent=agent,
244
+ tools=tools,
245
+ verbose=True,
246
+ memory=ConversationBufferMemory(memory_key="chat_history"),
247
+ )
248
 
249
  return agent_executor # Return the agent executor
250
  except Exception as e: