File size: 2,578 Bytes
5fffd14 2957871 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import os
from datetime import datetime
from dotenv import load_dotenv
from typing import List, Dict, Any
# Import local modules
from .db import SimpleDB
from .vector_db import ChromaVectorDB
from .query_engine import QueryEngine
from .document_parser import SimpleDocumentParser
# Load environment variables
load_dotenv()
class DocumentAssistant:
def __init__(self):
"""Initialize the document assistant"""
self.db = SimpleDB()
self.vector_db = ChromaVectorDB(os.getenv("CHROMA_DB_PATH", "./data/chroma_db"))
self.query_engine = QueryEngine(os.getenv("GROQ_API_KEY"))
self.document_parser = SimpleDocumentParser()
def process_query(self, query: str):
"""Process a query and return the response"""
# Log the query
self.db.log_query(query)
# Get relevant documents
relevant_docs = self.vector_db.search(query)
# Generate response
response = self.query_engine.generate_response(query, relevant_docs)
# Log the response
self.db.log_query(query, response)
return response
def upload_document(self, file_path: str):
"""Process and index a document"""
# Get file metadata
filename = os.path.basename(file_path)
file_type = os.path.splitext(filename)[1].lower()
# Parse document
text_chunks = self.document_parser.parse_document(file_path)
# Add to database
doc_id = self.db.add_document(filename, file_path, file_type)
# Add to vector database
self.vector_db.add_document(file_path, text_chunks, {"doc_id": doc_id})
return {
"status": "success",
"message": f"Document {filename} indexed successfully",
"chunks": len(text_chunks)
}
def get_all_documents(self):
"""Get all documents"""
return self.db.get_all_documents()
def reset_database(self):
"""Reset the ChromaDB database"""
# This is a placeholder - you'll need to implement this in your DocumentAssistant class
# Typically, this would involve deleting all collections or recreating the database
try:
# Example implementation - adjust based on your actual DocumentAssistant implementation
self.vector_db = None
self.initialize_vectorstore()
return True
except Exception as e:
print(f"Error resetting database: {str(e)}")
return False |