JatinAutonomousLabs commited on
Commit
eaf0400
·
verified ·
1 Parent(s): 1af1949

Create artifact_registry.py

Browse files
Files changed (1) hide show
  1. artifact_registry.py +384 -0
artifact_registry.py ADDED
@@ -0,0 +1,384 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ artifact_registry.py - Artifact Tracking & Management
3
+ ======================================================
4
+
5
+ Tracks all files created during execution to ensure user access.
6
+ Provides download manifests and file management.
7
+
8
+ Author: AI Lab Team
9
+ Last Updated: 2025-10-10
10
+ Version: 1.0
11
+ """
12
+
13
+ import json
14
+ import os
15
+ import shutil
16
+ import uuid
17
+ from datetime import datetime
18
+ from typing import List, Dict, Any, Optional
19
+ from logging_config import get_logger
20
+
21
+ # Import config
22
+ try:
23
+ import graph_config as cfg
24
+ except ImportError:
25
+ class cfg:
26
+ USER_ARTIFACTS_DIR = "outputs/user_artifacts"
27
+ ARTIFACT_REGISTRY_FILE = "outputs/artifact_registry.json"
28
+ ARTIFACT_RETENTION_DAYS = 30
29
+ MAX_ARTIFACT_SIZE_MB = 100
30
+
31
+ log = get_logger(__name__)
32
+
33
+
34
+ class ArtifactRegistry:
35
+ """
36
+ Registry for tracking all created artifacts.
37
+
38
+ Features:
39
+ - Tracks file creation
40
+ - Ensures user accessibility
41
+ - Provides download manifests
42
+ - Manages file lifecycle
43
+ """
44
+
45
+ def __init__(self, registry_path: str = None):
46
+ """
47
+ Initialize artifact registry.
48
+
49
+ Args:
50
+ registry_path: Path to registry JSON file
51
+ """
52
+ self.registry_path = registry_path or cfg.ARTIFACT_REGISTRY_FILE
53
+ self.artifacts = self._load_registry()
54
+
55
+ # Ensure user artifacts directory exists
56
+ os.makedirs(cfg.USER_ARTIFACTS_DIR, exist_ok=True)
57
+
58
+ log.info(f"Artifact Registry initialized: {len(self.artifacts)} artifacts")
59
+
60
+ def register_artifact(self, filepath: str, metadata: Dict[str, Any]) -> tuple:
61
+ """
62
+ Register an artifact in the system.
63
+
64
+ Args:
65
+ filepath: Original file path
66
+ metadata: Artifact metadata (type, session_id, user_request, etc.)
67
+
68
+ Returns:
69
+ Tuple of (artifact_id, user_accessible_path)
70
+ """
71
+ # Generate unique ID
72
+ artifact_id = uuid.uuid4().hex[:8]
73
+
74
+ # Ensure file is in user-accessible location
75
+ user_path = self._ensure_user_accessible(filepath)
76
+
77
+ # Get file stats
78
+ try:
79
+ size_bytes = os.path.getsize(user_path)
80
+ size_mb = size_bytes / 1_000_000
81
+
82
+ # Check size limit
83
+ if size_mb > cfg.MAX_ARTIFACT_SIZE_MB:
84
+ log.warning(
85
+ f"⚠️ Artifact exceeds size limit: {size_mb:.2f}MB > "
86
+ f"{cfg.MAX_ARTIFACT_SIZE_MB}MB"
87
+ )
88
+ except Exception:
89
+ size_bytes = 0
90
+ size_mb = 0.0
91
+
92
+ # Create registry entry
93
+ self.artifacts[artifact_id] = {
94
+ "id": artifact_id,
95
+ "original_path": filepath,
96
+ "user_path": user_path,
97
+ "filename": os.path.basename(user_path),
98
+ "created_at": datetime.utcnow().isoformat(),
99
+ "size_bytes": size_bytes,
100
+ "size_mb": round(size_mb, 2),
101
+ "type": metadata.get("type", "unknown"),
102
+ "session_id": metadata.get("session_id"),
103
+ "user_request": metadata.get("user_request", "")[:200],
104
+ "language": metadata.get("language"),
105
+ "tier": metadata.get("tier"),
106
+ "metadata": metadata
107
+ }
108
+
109
+ # Save registry
110
+ self._save_registry()
111
+
112
+ log.info(f"✅ Artifact registered: {artifact_id} -> {os.path.basename(user_path)}")
113
+
114
+ return artifact_id, user_path
115
+
116
+ def _ensure_user_accessible(self, filepath: str) -> str:
117
+ """
118
+ Copy file to user-accessible location if needed.
119
+
120
+ Args:
121
+ filepath: Original file path
122
+
123
+ Returns:
124
+ Path in user-accessible directory
125
+ """
126
+ # If already in user directory, return as-is
127
+ if filepath.startswith(cfg.USER_ARTIFACTS_DIR):
128
+ return filepath
129
+
130
+ # If file doesn't exist, return original path
131
+ if not os.path.exists(filepath):
132
+ log.warning(f"⚠️ File not found: {filepath}")
133
+ return filepath
134
+
135
+ # Copy to user directory
136
+ filename = os.path.basename(filepath)
137
+ user_path = os.path.join(cfg.USER_ARTIFACTS_DIR, filename)
138
+
139
+ # Handle name collisions
140
+ counter = 1
141
+ base, ext = os.path.splitext(filename)
142
+ while os.path.exists(user_path):
143
+ user_path = os.path.join(
144
+ cfg.USER_ARTIFACTS_DIR,
145
+ f"{base}_{counter}{ext}"
146
+ )
147
+ counter += 1
148
+
149
+ try:
150
+ shutil.copy2(filepath, user_path)
151
+ log.info(f"📋 Copied to user dir: {filename}")
152
+ except Exception as e:
153
+ log.error(f"Failed to copy {filepath}: {e}")
154
+ return filepath
155
+
156
+ return user_path
157
+
158
+ def get_session_artifacts(self, session_id: str) -> List[Dict[str, Any]]:
159
+ """
160
+ Get all artifacts for a session.
161
+
162
+ Args:
163
+ session_id: Session identifier
164
+
165
+ Returns:
166
+ List of artifact dicts
167
+ """
168
+ return [
169
+ artifact for artifact in self.artifacts.values()
170
+ if artifact.get("session_id") == session_id
171
+ ]
172
+
173
+ def get_download_manifest(self, session_id: str) -> Dict[str, Any]:
174
+ """
175
+ Create download manifest for UI.
176
+
177
+ Args:
178
+ session_id: Session identifier
179
+
180
+ Returns:
181
+ Dict with download information
182
+ """
183
+ artifacts = self.get_session_artifacts(session_id)
184
+
185
+ total_size_mb = sum(a.get("size_mb", 0) for a in artifacts)
186
+
187
+ return {
188
+ "session_id": session_id,
189
+ "total_files": len(artifacts),
190
+ "total_size_mb": round(total_size_mb, 2),
191
+ "files": [
192
+ {
193
+ "id": a["id"],
194
+ "filename": a["filename"],
195
+ "path": a["user_path"],
196
+ "type": a["type"],
197
+ "size_kb": round(a["size_bytes"] / 1000, 1),
198
+ "size_mb": a.get("size_mb", 0),
199
+ "created": a["created_at"],
200
+ "language": a.get("language")
201
+ }
202
+ for a in artifacts
203
+ ]
204
+ }
205
+
206
+ def get_artifact_by_id(self, artifact_id: str) -> Optional[Dict[str, Any]]:
207
+ """
208
+ Get artifact by ID.
209
+
210
+ Args:
211
+ artifact_id: Artifact identifier
212
+
213
+ Returns:
214
+ Artifact dict or None
215
+ """
216
+ return self.artifacts.get(artifact_id)
217
+
218
+ def delete_artifact(self, artifact_id: str) -> bool:
219
+ """
220
+ Delete an artifact.
221
+
222
+ Args:
223
+ artifact_id: Artifact identifier
224
+
225
+ Returns:
226
+ True if deleted successfully
227
+ """
228
+ artifact = self.artifacts.get(artifact_id)
229
+ if not artifact:
230
+ return False
231
+
232
+ # Delete file
233
+ user_path = artifact.get("user_path")
234
+ if user_path and os.path.exists(user_path):
235
+ try:
236
+ os.remove(user_path)
237
+ log.info(f"🗑️ Deleted file: {os.path.basename(user_path)}")
238
+ except Exception as e:
239
+ log.error(f"Failed to delete {user_path}: {e}")
240
+ return False
241
+
242
+ # Remove from registry
243
+ del self.artifacts[artifact_id]
244
+ self._save_registry()
245
+
246
+ return True
247
+
248
+ def cleanup_old_artifacts(self, days: int = None):
249
+ """
250
+ Delete artifacts older than specified days.
251
+
252
+ Args:
253
+ days: Age threshold in days (uses config default if None)
254
+ """
255
+ days = days or cfg.ARTIFACT_RETENTION_DAYS
256
+ cutoff = datetime.utcnow().timestamp() - (days * 86400)
257
+
258
+ deleted_count = 0
259
+
260
+ for artifact_id, artifact in list(self.artifacts.items()):
261
+ created_str = artifact.get("created_at", "")
262
+ try:
263
+ created_dt = datetime.fromisoformat(created_str)
264
+ created_ts = created_dt.timestamp()
265
+
266
+ if created_ts < cutoff:
267
+ if self.delete_artifact(artifact_id):
268
+ deleted_count += 1
269
+ except Exception as e:
270
+ log.debug(f"Cleanup skip {artifact_id}: {e}")
271
+
272
+ if deleted_count > 0:
273
+ log.info(f"🧹 Cleaned up {deleted_count} old artifacts")
274
+
275
+ def get_all_artifacts(self) -> List[Dict[str, Any]]:
276
+ """
277
+ Get all artifacts in registry.
278
+
279
+ Returns:
280
+ List of all artifact dicts
281
+ """
282
+ return list(self.artifacts.values())
283
+
284
+ def get_registry_stats(self) -> Dict[str, Any]:
285
+ """
286
+ Get registry statistics.
287
+
288
+ Returns:
289
+ Dict with stats
290
+ """
291
+ artifacts = self.get_all_artifacts()
292
+
293
+ total_size = sum(a.get("size_bytes", 0) for a in artifacts)
294
+
295
+ # Count by type
296
+ type_counts = {}
297
+ for a in artifacts:
298
+ atype = a.get("type", "unknown")
299
+ type_counts[atype] = type_counts.get(atype, 0) + 1
300
+
301
+ # Count by session
302
+ session_counts = {}
303
+ for a in artifacts:
304
+ session = a.get("session_id", "unknown")
305
+ session_counts[session] = session_counts.get(session, 0) + 1
306
+
307
+ return {
308
+ "total_artifacts": len(artifacts),
309
+ "total_size_mb": round(total_size / 1_000_000, 2),
310
+ "by_type": type_counts,
311
+ "by_session": len(session_counts),
312
+ "unique_sessions": list(session_counts.keys())
313
+ }
314
+
315
+ def _load_registry(self) -> Dict[str, Dict[str, Any]]:
316
+ """
317
+ Load registry from disk.
318
+
319
+ Returns:
320
+ Dict of artifacts
321
+ """
322
+ if not os.path.exists(self.registry_path):
323
+ return {}
324
+
325
+ try:
326
+ with open(self.registry_path, 'r', encoding='utf-8') as f:
327
+ return json.load(f)
328
+ except Exception as e:
329
+ log.error(f"Failed to load registry: {e}")
330
+ return {}
331
+
332
+ def _save_registry(self):
333
+ """Save registry to disk."""
334
+ # Ensure directory exists
335
+ os.makedirs(os.path.dirname(self.registry_path), exist_ok=True)
336
+
337
+ try:
338
+ with open(self.registry_path, 'w', encoding='utf-8') as f:
339
+ json.dump(self.artifacts, f, indent=2)
340
+ except Exception as e:
341
+ log.error(f"Failed to save registry: {e}")
342
+
343
+ def export_manifest(self, output_path: str = None) -> str:
344
+ """
345
+ Export complete manifest to JSON file.
346
+
347
+ Args:
348
+ output_path: Output file path
349
+
350
+ Returns:
351
+ Path to exported manifest
352
+ """
353
+ output_path = output_path or os.path.join(
354
+ cfg.USER_ARTIFACTS_DIR,
355
+ f"manifest_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}.json"
356
+ )
357
+
358
+ manifest = {
359
+ "exported_at": datetime.utcnow().isoformat(),
360
+ "stats": self.get_registry_stats(),
361
+ "artifacts": self.get_all_artifacts()
362
+ }
363
+
364
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
365
+
366
+ with open(output_path, 'w', encoding='utf-8') as f:
367
+ json.dump(manifest, f, indent=2)
368
+
369
+ log.info(f"📊 Manifest exported: {output_path}")
370
+ return output_path
371
+
372
+
373
+ # Global instance
374
+ artifact_registry = ArtifactRegistry()
375
+
376
+
377
+ # ============================================================================
378
+ # EXPORTS
379
+ # ============================================================================
380
+
381
+ __all__ = [
382
+ 'ArtifactRegistry',
383
+ 'artifact_registry'
384
+ ]