File size: 4,538 Bytes
d9feb53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
"""
Resource manager to handle loading and persisting data across requests.
"""

import pickle
import faiss
import os
import sys
import traceback

class ResourceManager:
    _instance = None
    
    def __new__(cls):
        if cls._instance is None:
            cls._instance = super(ResourceManager, cls).__new__(cls)
            cls._instance.faiss_index = None
            cls._instance.doc_chunks = None
            cls._instance.embedding_vectors = None
            cls._instance.initialized = False
            cls._instance.director = None
        return cls._instance
    
    def load_resources(self):
        """Load all required resources"""
        if self.initialized:
            print("Resources already loaded, skipping...")
            return True
            
        success = True
        # Load FAISS index
        try:
            print("Loading FAISS index...")
            self.faiss_index = faiss.read_index("embeddings/faiss_index.index")
            print("FAISS index loaded successfully")
        except Exception as e:
            print(f"Error loading FAISS index: {e}")
            success = False
        
        # Load document chunks
        try:
            print("Loading document chunks...")
            with open("data/doc_chunks.pkl", "rb") as f:
                self.doc_chunks = pickle.load(f)
            print(f"Loaded {len(self.doc_chunks)} document chunks")
        except Exception as e:
            print(f"Error loading document chunks: {e}")
            success = False
            
        # Load embeddings if available
        try:
            print("Loading embeddings...")
            with open("embeddings/embeddings.pkl", "rb") as f:
                self.embedding_vectors = pickle.load(f)
            print("Embeddings loaded successfully")
        except Exception as e:
            print(f"Error loading embeddings: {e}")
            # This is not critical, so don't set success to False
            
        if success:
            self.initialized = True
            print("All critical resources loaded successfully!")
        
        return success
    
    def get_faiss_index(self):
        """Get the FAISS index, loading if necessary"""
        if not self.initialized:
            self.load_resources()
        return self.faiss_index
    
    def get_doc_chunks(self):
        """Get the document chunks, loading if necessary"""
        if not self.initialized:
            self.load_resources()
        return self.doc_chunks
    
    def get_embedding_vectors(self):
        """Get the embedding vectors, loading if necessary"""
        if not self.initialized:
            self.load_resources()
        return self.embedding_vectors
        
    def get_director(self):
        """Get the agent director, initializing if necessary"""
        return self.director
        
    def set_director(self, director):
        """Set the agent director"""
        self.director = director

def check_data_files():
    """Check if required data files exist and download if needed."""
    data_files = [
        "embeddings/faiss_index.index", 
        "data/doc_chunks.pkl", 
        "embeddings/embeddings.pkl"
    ]

    missing_files = [f for f in data_files if not os.path.exists(f)]
    data_ready = True

    if missing_files:
        print(f"Missing data files: {missing_files}")
        print("Downloading or creating required data files...")
        try:
            import subprocess
            result = subprocess.run(
                [sys.executable, "download_from_hub.py"],
                check=False,
                capture_output=True,
                text=True
            )
            print(result.stdout)
            if result.returncode != 0:
                print(f"Warning: Data preparation finished with return code {result.returncode}")
                print(f"Error output: {result.stderr}")
                data_ready = False
        except Exception as e:
            print(f"Error preparing data: {e}")
            traceback.print_exc()
            data_ready = False

    # Verify files exist before importing modules that need them
    if not all(os.path.exists(f) for f in data_files):
        print("Warning: Some required data files are still missing.")
        print("The application may not function correctly.")
        data_ready = False
    else:
        # Load resources into the resource manager
        resource_manager = ResourceManager()
        data_ready = resource_manager.load_resources()
        
    return data_ready