File size: 2,942 Bytes
5fffd14 2957871 2feba09 264c011 2feba09 264c011 2feba09 264c011 2feba09 2957871 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import os
from datetime import datetime
from dotenv import load_dotenv
from typing import List, Dict, Any
# Import local modules
from .db import SimpleDB
from .vector_db import ChromaVectorDB
from .query_engine import QueryEngine
from .document_parser import SimpleDocumentParser
# Load environment variables
load_dotenv()
class DocumentAssistant:
def __init__(self):
"""Initialize the document assistant"""
self.db = SimpleDB()
self.vector_db = ChromaVectorDB(os.getenv("CHROMA_DB_PATH", "./data/chroma_db"))
self.query_engine = QueryEngine(os.getenv("GROQ_API_KEY"))
self.document_parser = SimpleDocumentParser()
def process_query(self, query: str):
"""Process a query and return the response"""
# Log the query
self.db.log_query(query)
# Get relevant documents
relevant_docs = self.vector_db.search(query)
# Generate response
response = self.query_engine.generate_response(query, relevant_docs)
# Log the response
self.db.log_query(query, response)
return response
def upload_document(self, file_path: str):
"""Process and index a document"""
# Get file metadata
filename = os.path.basename(file_path)
file_type = os.path.splitext(filename)[1].lower()
# Parse document
text_chunks = self.document_parser.parse_document(file_path)
# Add to database
doc_id = self.db.add_document(filename, file_path, file_type)
# Add to vector database
self.vector_db.add_document(file_path, text_chunks, {"doc_id": doc_id})
return {
"status": "success",
"message": f"Document {filename} indexed successfully",
"chunks": len(text_chunks)
}
def get_all_documents(self):
"""Get all documents"""
return self.db.get_all_documents()
def reset_database(self):
"""Reset the ChromaDB database"""
try:
# Reset the vector database
if hasattr(self, 'vector_db') and self.vector_db is not None:
# Try to reset the collection
success = self.vector_db.reset_collection()
# Also clear the SimpleDB
if hasattr(self, 'db') and self.db is not None:
try:
self.db.clear_all()
print("SimpleDB cleared successfully")
except Exception as db_error:
print(f"Error clearing SimpleDB: {str(db_error)}")
return success
else:
print("Vector database not initialized")
return False
except Exception as e:
print(f"Error resetting database: {str(e)}")
return False |