Executor-Tyrant-Framework commited on
Commit
1f4edd6
·
verified ·
1 Parent(s): c46fb41

Delete recursive_context.py

Browse files
Files changed (1) hide show
  1. recursive_context.py +0 -244
recursive_context.py DELETED
@@ -1,244 +0,0 @@
1
- """
2
- Recursive Context Manager for Clawdbot
3
- [Corrected version to fix SyntaxError and /.cache PermissionError]
4
- """
5
-
6
- from pathlib import Path
7
- from typing import List, Dict, Optional, Tuple
8
- import chromadb
9
- from chromadb.config import Settings
10
- from chromadb.utils.embedding_functions import ONNXMiniLM_L6_V2
11
- import hashlib
12
- import json
13
- import os
14
- import time
15
- import threading
16
-
17
- def _select_chroma_path():
18
- data_path = Path("/data/chroma_db")
19
- try:
20
- data_path.mkdir(parents=True, exist_ok=True)
21
- test_file = data_path / ".write_test"
22
- test_file.write_text("test")
23
- test_file.unlink()
24
- return str(data_path)
25
- except (OSError, PermissionError):
26
- workspace_path = Path("/workspace/chroma_db")
27
- workspace_path.mkdir(parents=True, exist_ok=True)
28
- return str(workspace_path)
29
-
30
- CHROMA_DB_PATH = _select_chroma_path()
31
-
32
- class HFDatasetPersistence:
33
- def __init__(self, repo_id: str = None):
34
- from huggingface_hub import HfApi
35
- self.api = HfApi()
36
- self.repo_id = repo_id or os.getenv("MEMORY_REPO")
37
- self.token = (
38
- os.getenv("HF_TOKEN") or
39
- os.getenv("HUGGING_FACE_HUB_TOKEN") or
40
- os.getenv("HUGGINGFACE_TOKEN")
41
- )
42
- self._repo_ready = False
43
- self._save_lock = threading.Lock()
44
- self._pending_save = False
45
- self._last_save_time = 0
46
- self.SAVE_DEBOUNCE_SECONDS = 10
47
-
48
- if self.repo_id and self.token:
49
- self._ensure_repo_exists()
50
- self._verify_write_permissions()
51
-
52
- def _ensure_repo_exists(self):
53
- if self._repo_ready: return
54
- try:
55
- self.api.repo_info(repo_id=self.repo_id, repo_type="dataset", token=self.token)
56
- self._repo_ready = True
57
- except Exception:
58
- try:
59
- self.api.create_repo(repo_id=self.repo_id, repo_type="dataset", private=True, token=self.token)
60
- self._repo_ready = True
61
- except Exception: pass
62
-
63
- @property
64
- def is_configured(self):
65
- return bool(self.repo_id and self.token)
66
-
67
- def _verify_write_permissions(self):
68
- try:
69
- self.api.whoami(token=self.token)
70
- except Exception: pass
71
-
72
- def save_conversations(self, conversations_data: List[Dict], force: bool = False):
73
- if not self.is_configured or not self._repo_ready: return False
74
- current_time = time.time()
75
- if not force and (current_time - self._last_save_time < self.SAVE_DEBOUNCE_SECONDS):
76
- self._pending_save = True
77
- return False
78
- with self._save_lock:
79
- try:
80
- temp_path = Path("/tmp/conversations_backup.json")
81
- temp_path.write_text(json.dumps(conversations_data, indent=2))
82
- self.api.upload_file(
83
- path_or_fileobj=str(temp_path),
84
- path_in_repo="conversations.json",
85
- repo_id=self.repo_id,
86
- repo_type="dataset",
87
- token=self.token,
88
- commit_message=f"Backup {len(conversations_data)} conversations"
89
- )
90
- self._last_save_time = current_time
91
- self._pending_save = False
92
- return True
93
- except Exception: return False
94
-
95
- def load_conversations(self) -> List[Dict]:
96
- if not self.is_configured: return []
97
- try:
98
- from huggingface_hub import hf_hub_download
99
- local_path = hf_hub_download(repo_id=self.repo_id, filename="conversations.json", repo_type="dataset", token=self.token)
100
- with open(local_path, 'r') as f: return json.load(f)
101
- except Exception: return []
102
-
103
- class RecursiveContextManager:
104
- def __init__(self, repo_path: str):
105
- self.repo_path = Path(repo_path)
106
- self.persistence = HFDatasetPersistence()
107
-
108
- # FIX: Explicitly configure embedding model path to prevent PermissionError
109
- self.embedding_function = ONNXMiniLM_L6_V2()
110
- cache_dir = os.getenv("CHROMA_CACHE_DIR", "/tmp/.cache/chroma")
111
- self.embedding_function.DOWNLOAD_PATH = cache_dir
112
- os.makedirs(cache_dir, exist_ok=True)
113
-
114
- self.chroma_client = chromadb.PersistentClient(
115
- path=CHROMA_DB_PATH,
116
- settings=Settings(anonymized_telemetry=False, allow_reset=True)
117
- )
118
-
119
- collection_name = self._get_collection_name()
120
- # Ensure the collection uses the custom embedding function
121
- self.collection = self.chroma_client.get_or_create_collection(
122
- name=collection_name,
123
- embedding_function=self.embedding_function,
124
- metadata={"description": "E-T Systems codebase"}
125
- )
126
-
127
- conversations_name = f"conversations_{collection_name.split('_')[1]}"
128
- self.conversations = self.chroma_client.get_or_create_collection(
129
- name=conversations_name,
130
- embedding_function=self.embedding_function,
131
- metadata={"description": "Clawdbot conversation history"}
132
- )
133
-
134
- if self.conversations.count() == 0:
135
- self._restore_from_cloud()
136
-
137
- self._saves_since_backup = 0
138
- self.BACKUP_EVERY_N_SAVES = 1 # Sync frequently for reliability
139
- self._is_first_save = True
140
-
141
- def _restore_from_cloud(self):
142
- cloud_data = self.persistence.load_conversations()
143
- if not cloud_data: return
144
- for conv in cloud_data:
145
- try:
146
- self.conversations.add(documents=[conv["document"]], metadatas=[conv["metadata"]], ids=[conv["id"]])
147
- except Exception: pass
148
-
149
- def _backup_to_cloud(self, force: bool = False):
150
- if self.conversations.count() == 0: return
151
- all_convs = self.conversations.get(include=["documents", "metadatas"])
152
- backup_data = [{"id": id_, "document": doc, "metadata": meta}
153
- for doc, meta, id_ in zip(all_convs["documents"], all_convs["metadatas"], all_convs["ids"])]
154
- self.persistence.save_conversations(backup_data, force=force)
155
-
156
- def _get_collection_name(self) -> str:
157
- path_hash = hashlib.md5(str(self.repo_path).encode()).hexdigest()[:8]
158
- return f"codebase_{path_hash}"
159
-
160
- def _index_codebase(self):
161
- code_extensions = {'.py', '.js', '.ts', '.tsx', '.jsx', '.md', '.txt', '.json', '.yaml', '.yml'}
162
- skip_dirs = {'node_modules', '.git', '__pycache__', 'venv', 'env', '.venv', 'dist', 'build'}
163
- documents, metadatas, ids = [], [], []
164
- for file_path in self.repo_path.rglob('*'):
165
- if file_path.is_dir() or any(skip in file_path.parts for skip in skip_dirs) or file_path.suffix not in code_extensions:
166
- continue
167
- try:
168
- content = file_path.read_text(encoding='utf-8', errors='ignore')
169
- if not content.strip() or len(content) > 100000: continue
170
- rel = str(file_path.relative_to(self.repo_path))
171
- documents.append(content); ids.append(rel)
172
- metadatas.append({"path": rel, "type": file_path.suffix[1:], "size": len(content)})
173
- except Exception: continue
174
- if documents:
175
- for i in range(0, len(documents), 100):
176
- self.collection.add(documents=documents[i:i+100], metadatas=metadatas[i:i+100], ids=ids[i:i+100])
177
-
178
- def search_code(self, query: str, n_results: int = 5) -> List[Dict]:
179
- if self.collection.count() == 0: return []
180
- results = self.collection.query(query_texts=[query], n_results=min(n_results, self.collection.count()))
181
- return [{"file": m['path'], "snippet": d[:500], "relevance": round(1-dist, 3)}
182
- for d, m, dist in zip(results['documents'][0], results['metadatas'][0], results['distances'][0])]
183
-
184
- def read_file(self, path: str, lines: Optional[Tuple[int, int]] = None) -> str:
185
- full_path = self.repo_path / path
186
- if not full_path.exists(): return "Error: File not found"
187
- try:
188
- content = full_path.read_text(encoding='utf-8', errors='ignore')
189
- if lines:
190
- l_list = content.split('\n')
191
- return '\n'.join(l_list[lines[0]-1:lines[1]])
192
- return content
193
- except Exception as e: return str(e)
194
-
195
- def search_testament(self, query: str) -> str:
196
- t_path = self.repo_path / "TESTAMENT.md"
197
- if not t_path.exists(): return "Testament not found"
198
- try:
199
- sections = t_path.read_text(encoding='utf-8').split('\n## ')
200
- relevant = [('## ' + s if not s.startswith('#') else s) for s in sections if query.lower() in s.lower()]
201
- return '\n\n'.join(relevant) if relevant else "No matches"
202
- except Exception as e: return str(e)
203
-
204
- def list_files(self, directory: str = ".") -> List[str]:
205
- d_path = self.repo_path / directory
206
- if not d_path.exists(): return ["Error: Not found"]
207
- try:
208
- return [(f.name + '/' if f.is_dir() else f.name) for f in sorted(d_path.iterdir()) if not f.name.startswith('.')]
209
- except Exception as e: return [str(e)]
210
-
211
- def save_conversation_turn(self, user_message: str, assistant_message: str, turn_id: int):
212
- # FIX: Ensure all brackets and quotes are closed correctly
213
- combined = f"USER: {user_message}\n\nASSISTANT: {assistant_message}"
214
- u_id = f"turn_{int(time.time())}_{turn_id}"
215
- self.conversations.add(
216
- documents=[combined],
217
- metadatas=[{"user": user_message[:500], "assistant": assistant_message[:500], "turn": turn_id}],
218
- ids=[u_id]
219
- )
220
- if self._is_first_save:
221
- self._backup_to_cloud(force=True)
222
- self._is_first_save = False
223
- else:
224
- self._saves_since_backup += 1
225
- if self._saves_since_backup >= self.BACKUP_EVERY_N_SAVES:
226
- self._backup_to_cloud()
227
- self._saves_since_backup = 0
228
-
229
- def search_conversations(self, query: str, n_results: int = 5) -> List[Dict]:
230
- if self.conversations.count() == 0: return []
231
- res = self.conversations.query(query_texts=[query], n_results=min(n_results, self.conversations.count()))
232
- return [{"turn": m.get("turn"), "full_text": d} for d, m in zip(res['documents'][0], res['metadatas'][0])]
233
-
234
- def get_conversation_count(self) -> int:
235
- return self.conversations.count()
236
-
237
- def get_stats(self) -> Dict:
238
- return {"total_files": self.collection.count(), "conversations": self.conversations.count(), "storage_path": CHROMA_DB_PATH, "cloud_backup_configured": self.persistence.is_configured, "cloud_backup_repo": self.persistence.repo_id}
239
-
240
- def force_backup(self):
241
- self._backup_to_cloud(force=True)
242
-
243
- def shutdown(self):
244
- self.force_backup()