File size: 2,942 Bytes
5fffd14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2957871
 
 
 
 
2feba09
 
264c011
 
2feba09
 
 
264c011
 
 
 
 
2feba09
264c011
2feba09
 
 
2957871
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import os
from datetime import datetime
from dotenv import load_dotenv
from typing import List, Dict, Any

# Import local modules
from .db import SimpleDB
from .vector_db import ChromaVectorDB
from .query_engine import QueryEngine
from .document_parser import SimpleDocumentParser

# Load environment variables
load_dotenv()

class DocumentAssistant:
    def __init__(self):
        """Initialize the document assistant"""
        self.db = SimpleDB()
        self.vector_db = ChromaVectorDB(os.getenv("CHROMA_DB_PATH", "./data/chroma_db"))
        self.query_engine = QueryEngine(os.getenv("GROQ_API_KEY"))
        self.document_parser = SimpleDocumentParser()
    
    def process_query(self, query: str):
        """Process a query and return the response"""
        # Log the query
        self.db.log_query(query)
        
        # Get relevant documents
        relevant_docs = self.vector_db.search(query)
        
        # Generate response
        response = self.query_engine.generate_response(query, relevant_docs)
        
        # Log the response
        self.db.log_query(query, response)
        
        return response
    
    def upload_document(self, file_path: str):
        """Process and index a document"""
        # Get file metadata
        filename = os.path.basename(file_path)
        file_type = os.path.splitext(filename)[1].lower()
        
        # Parse document
        text_chunks = self.document_parser.parse_document(file_path)
        
        # Add to database
        doc_id = self.db.add_document(filename, file_path, file_type)
        
        # Add to vector database
        self.vector_db.add_document(file_path, text_chunks, {"doc_id": doc_id})
        
        return {
            "status": "success",
            "message": f"Document {filename} indexed successfully",
            "chunks": len(text_chunks)
        }
    
    def get_all_documents(self):
        """Get all documents"""
        return self.db.get_all_documents()
    
    def reset_database(self):
        """Reset the ChromaDB database"""
        try:
            # Reset the vector database
            if hasattr(self, 'vector_db') and self.vector_db is not None:
                # Try to reset the collection
                success = self.vector_db.reset_collection()
                
                # Also clear the SimpleDB
                if hasattr(self, 'db') and self.db is not None:
                    try:
                        self.db.clear_all()
                        print("SimpleDB cleared successfully")
                    except Exception as db_error:
                        print(f"Error clearing SimpleDB: {str(db_error)}")
                
                return success
            else:
                print("Vector database not initialized")
                return False
        except Exception as e:
            print(f"Error resetting database: {str(e)}")
            return False