import os from datetime import datetime from dotenv import load_dotenv from typing import List, Dict, Any # Import local modules from .db import SimpleDB from .vector_db import ChromaVectorDB from .query_engine import QueryEngine from .document_parser import SimpleDocumentParser # Load environment variables load_dotenv() class DocumentAssistant: def __init__(self): """Initialize the document assistant""" self.db = SimpleDB() self.vector_db = ChromaVectorDB(os.getenv("CHROMA_DB_PATH", "./data/chroma_db")) self.query_engine = QueryEngine(os.getenv("GROQ_API_KEY")) self.document_parser = SimpleDocumentParser() def process_query(self, query: str): """Process a query and return the response""" # Log the query self.db.log_query(query) # Get relevant documents relevant_docs = self.vector_db.search(query) # Generate response response = self.query_engine.generate_response(query, relevant_docs) # Log the response self.db.log_query(query, response) return response def upload_document(self, file_path: str): """Process and index a document""" # Get file metadata filename = os.path.basename(file_path) file_type = os.path.splitext(filename)[1].lower() # Parse document text_chunks = self.document_parser.parse_document(file_path) # Add to database doc_id = self.db.add_document(filename, file_path, file_type) # Add to vector database self.vector_db.add_document(file_path, text_chunks, {"doc_id": doc_id}) return { "status": "success", "message": f"Document {filename} indexed successfully", "chunks": len(text_chunks) } def get_all_documents(self): """Get all documents""" return self.db.get_all_documents() def reset_database(self): """Reset the ChromaDB database""" try: # Reset the vector database if hasattr(self, 'vector_db') and self.vector_db is not None: # Try to reset the collection success = self.vector_db.reset_collection() # Also clear the SimpleDB if hasattr(self, 'db') and self.db is not None: try: self.db.clear_all() print("SimpleDB cleared successfully") except Exception as db_error: print(f"Error clearing SimpleDB: {str(db_error)}") return success else: print("Vector database not initialized") return False except Exception as e: print(f"Error resetting database: {str(e)}") return False