|
|
import os |
|
|
from datetime import datetime |
|
|
from dotenv import load_dotenv |
|
|
from typing import List, Dict, Any |
|
|
|
|
|
|
|
|
from .db import SimpleDB |
|
|
from .vector_db import ChromaVectorDB |
|
|
from .query_engine import QueryEngine |
|
|
from .document_parser import SimpleDocumentParser |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
class DocumentAssistant: |
|
|
def __init__(self): |
|
|
"""Initialize the document assistant""" |
|
|
self.db = SimpleDB() |
|
|
self.vector_db = ChromaVectorDB(os.getenv("CHROMA_DB_PATH", "./data/chroma_db")) |
|
|
self.query_engine = QueryEngine(os.getenv("GROQ_API_KEY")) |
|
|
self.document_parser = SimpleDocumentParser() |
|
|
|
|
|
def process_query(self, query: str): |
|
|
"""Process a query and return the response""" |
|
|
|
|
|
self.db.log_query(query) |
|
|
|
|
|
|
|
|
relevant_docs = self.vector_db.search(query) |
|
|
|
|
|
|
|
|
response = self.query_engine.generate_response(query, relevant_docs) |
|
|
|
|
|
|
|
|
self.db.log_query(query, response) |
|
|
|
|
|
return response |
|
|
|
|
|
def upload_document(self, file_path: str): |
|
|
"""Process and index a document""" |
|
|
|
|
|
filename = os.path.basename(file_path) |
|
|
file_type = os.path.splitext(filename)[1].lower() |
|
|
|
|
|
|
|
|
text_chunks = self.document_parser.parse_document(file_path) |
|
|
|
|
|
|
|
|
doc_id = self.db.add_document(filename, file_path, file_type) |
|
|
|
|
|
|
|
|
self.vector_db.add_document(file_path, text_chunks, {"doc_id": doc_id}) |
|
|
|
|
|
return { |
|
|
"status": "success", |
|
|
"message": f"Document {filename} indexed successfully", |
|
|
"chunks": len(text_chunks) |
|
|
} |
|
|
|
|
|
def get_all_documents(self): |
|
|
"""Get all documents""" |
|
|
return self.db.get_all_documents() |
|
|
|
|
|
def reset_database(self): |
|
|
"""Reset the ChromaDB database""" |
|
|
try: |
|
|
|
|
|
if hasattr(self, 'vector_db') and self.vector_db is not None: |
|
|
|
|
|
success = self.vector_db.reset_collection() |
|
|
|
|
|
|
|
|
if hasattr(self, 'db') and self.db is not None: |
|
|
try: |
|
|
self.db.clear_all() |
|
|
print("SimpleDB cleared successfully") |
|
|
except Exception as db_error: |
|
|
print(f"Error clearing SimpleDB: {str(db_error)}") |
|
|
|
|
|
return success |
|
|
else: |
|
|
print("Vector database not initialized") |
|
|
return False |
|
|
except Exception as e: |
|
|
print(f"Error resetting database: {str(e)}") |
|
|
return False |