cryogenic22 commited on
Commit
61e7e62
·
verified ·
1 Parent(s): c9ae383

Update utils/database.py

Browse files
Files changed (1) hide show
  1. utils/database.py +114 -0
utils/database.py CHANGED
@@ -1108,6 +1108,30 @@ def process_document(file_path):
1108
  return chunks, full_content
1109
 
1110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1111
  def display_vector_store_info():
1112
  """
1113
  Display information about the current vector store state.
@@ -1154,6 +1178,96 @@ def display_vector_store_info():
1154
  st.error(traceback.format_exc())
1155
 
1156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1157
  def initialize_qa_system(vector_store):
1158
  """
1159
  Initialize QA system with optimized retrieval.
 
1108
  return chunks, full_content
1109
 
1110
 
1111
+ def delete_collection(conn: sqlite3.Connection, collection_id: int) -> bool:
1112
+ """Delete a collection and its associations."""
1113
+ try:
1114
+ with conn_lock:
1115
+ cursor = conn.cursor()
1116
+ # Delete the collection's document associations first
1117
+ cursor.execute('''
1118
+ DELETE FROM document_collections
1119
+ WHERE collection_id = ?
1120
+ ''', (collection_id,))
1121
+
1122
+ # Then delete the collection itself
1123
+ cursor.execute('''
1124
+ DELETE FROM collections
1125
+ WHERE id = ?
1126
+ ''', (collection_id,))
1127
+
1128
+ conn.commit()
1129
+ return True
1130
+
1131
+ except sqlite3.Error as e:
1132
+ st.error(f"Error deleting collection: {e}")
1133
+ return False
1134
+
1135
  def display_vector_store_info():
1136
  """
1137
  Display information about the current vector store state.
 
1178
  st.error(traceback.format_exc())
1179
 
1180
 
1181
+ def process_and_store_document(uploaded_file) -> Optional[int]:
1182
+ """
1183
+ Process an uploaded document and store it in the database.
1184
+
1185
+ Args:
1186
+ uploaded_file: Streamlit's UploadedFile object
1187
+
1188
+ Returns:
1189
+ Optional[int]: The ID of the stored document if successful, None otherwise
1190
+ """
1191
+ try:
1192
+ # Create a temporary file to store the uploaded content
1193
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
1194
+ tmp_file.write(uploaded_file.getvalue())
1195
+ tmp_file.flush()
1196
+
1197
+ # Load and process the PDF
1198
+ loader = PyPDFLoader(tmp_file.name)
1199
+ documents = loader.load()
1200
+
1201
+ # Create text splitter for processing
1202
+ text_splitter = RecursiveCharacterTextSplitter(
1203
+ chunk_size=1000,
1204
+ chunk_overlap=200,
1205
+ length_function=len,
1206
+ separators=["\n\n", "\n", " ", ""]
1207
+ )
1208
+
1209
+ # Split documents into chunks
1210
+ chunks = text_splitter.split_documents(documents)
1211
+
1212
+ # Extract full content for database storage
1213
+ full_content = "\n".join(doc.page_content for doc in documents)
1214
+
1215
+ # Store in database
1216
+ with st.session_state.db_conn as conn:
1217
+ cursor = conn.cursor()
1218
+
1219
+ # Insert document
1220
+ cursor.execute('''
1221
+ INSERT INTO documents (name, content, upload_date)
1222
+ VALUES (?, ?, ?)
1223
+ ''', (uploaded_file.name, full_content, datetime.now()))
1224
+
1225
+ # Get the document ID
1226
+ document_id = cursor.lastrowid
1227
+
1228
+ conn.commit()
1229
+
1230
+ return document_id
1231
+
1232
+ except Exception as e:
1233
+ st.error(f"Error processing document {uploaded_file.name}: {str(e)}")
1234
+ import traceback
1235
+ st.error(traceback.format_exc())
1236
+ return None
1237
+ finally:
1238
+ # Clean up temporary file
1239
+ import os
1240
+ try:
1241
+ os.unlink(tmp_file.name)
1242
+ except:
1243
+ pass
1244
+
1245
+ def get_document_content(conn: sqlite3.Connection, document_id: int) -> Optional[str]:
1246
+ """
1247
+ Retrieve the content of a specific document.
1248
+
1249
+ Args:
1250
+ conn: Database connection
1251
+ document_id: ID of the document to retrieve
1252
+
1253
+ Returns:
1254
+ Optional[str]: The document content if found, None otherwise
1255
+ """
1256
+ try:
1257
+ cursor = conn.cursor()
1258
+ cursor.execute('''
1259
+ SELECT content
1260
+ FROM documents
1261
+ WHERE id = ?
1262
+ ''', (document_id,))
1263
+
1264
+ result = cursor.fetchone()
1265
+ return result[0] if result else None
1266
+
1267
+ except sqlite3.Error as e:
1268
+ st.error(f"Error retrieving document content: {e}")
1269
+ return None
1270
+
1271
  def initialize_qa_system(vector_store):
1272
  """
1273
  Initialize QA system with optimized retrieval.