JatinAutonomousLabs commited on
Commit
d02b81b
·
verified ·
1 Parent(s): b3a6593

Create file_browser_manager.py

Browse files
Files changed (1) hide show
  1. file_browser_manager.py +414 -0
file_browser_manager.py ADDED
@@ -0,0 +1,414 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ file_browser_manager.py - Integrated File Browser for Gradio
3
+ =============================================================
4
+
5
+ Provides real-time file access and management for all generated artifacts.
6
+ Works seamlessly with the existing artifact registry system.
7
+
8
+ Author: AI Lab Team
9
+ Version: 1.0
10
+ """
11
+
12
+ import os
13
+ import json
14
+ import shutil
15
+ import zipfile
16
+ from datetime import datetime
17
+ from pathlib import Path
18
+ from typing import List, Dict, Tuple, Optional
19
+ import mimetypes
20
+
21
+ from logging_config import get_logger
22
+
23
+ log = get_logger(__name__)
24
+
25
+
26
+ class FileBrowserManager:
27
+ """
28
+ Manages file browsing, preview, and download functionality.
29
+ Integrates with artifact_registry for complete file tracking.
30
+ """
31
+
32
+ def __init__(self, base_dirs: List[str] = None):
33
+ """
34
+ Initialize file browser.
35
+
36
+ Args:
37
+ base_dirs: List of directories to monitor (default: outputs, uploads)
38
+ """
39
+ self.base_dirs = base_dirs or [
40
+ "outputs",
41
+ "outputs/user_artifacts",
42
+ "uploads",
43
+ "/tmp"
44
+ ]
45
+
46
+ # Ensure all directories exist
47
+ for directory in self.base_dirs:
48
+ os.makedirs(directory, exist_ok=True)
49
+
50
+ log.info(f"📂 File Browser initialized: {len(self.base_dirs)} directories")
51
+
52
+ def scan_all_files(self) -> List[Dict]:
53
+ """
54
+ Scan all monitored directories and return file information.
55
+
56
+ Returns:
57
+ List of file info dicts with path, size, type, etc.
58
+ """
59
+ all_files = []
60
+
61
+ for base_dir in self.base_dirs:
62
+ if not os.path.exists(base_dir):
63
+ continue
64
+
65
+ try:
66
+ for root, dirs, files in os.walk(base_dir):
67
+ for filename in files:
68
+ # Skip hidden files and system files
69
+ if filename.startswith('.') or filename.endswith('.json'):
70
+ continue
71
+
72
+ filepath = os.path.join(root, filename)
73
+
74
+ try:
75
+ stat = os.stat(filepath)
76
+
77
+ file_info = {
78
+ 'filename': filename,
79
+ 'path': filepath,
80
+ 'relative_path': os.path.relpath(filepath, base_dir),
81
+ 'directory': os.path.dirname(filepath),
82
+ 'size_bytes': stat.st_size,
83
+ 'size_kb': round(stat.st_size / 1000, 1),
84
+ 'size_mb': round(stat.st_size / (1000 * 1000), 2),
85
+ 'modified': datetime.fromtimestamp(stat.st_mtime).isoformat(),
86
+ 'extension': os.path.splitext(filename)[1],
87
+ 'type': self._get_file_type(filename),
88
+ 'base_dir': base_dir
89
+ }
90
+
91
+ all_files.append(file_info)
92
+
93
+ except Exception as e:
94
+ log.warning(f"Failed to stat {filepath}: {e}")
95
+
96
+ except Exception as e:
97
+ log.error(f"Failed to scan {base_dir}: {e}")
98
+
99
+ # Sort by modified time (newest first)
100
+ all_files.sort(key=lambda x: x['modified'], reverse=True)
101
+
102
+ return all_files
103
+
104
+ def _get_file_type(self, filename: str) -> str:
105
+ """Determine file type from extension."""
106
+ ext = os.path.splitext(filename)[1].lower()
107
+
108
+ type_map = {
109
+ '.py': 'Python Script',
110
+ '.ipynb': 'Jupyter Notebook',
111
+ '.js': 'JavaScript',
112
+ '.ts': 'TypeScript',
113
+ '.html': 'HTML',
114
+ '.css': 'CSS',
115
+ '.json': 'JSON',
116
+ '.txt': 'Text',
117
+ '.md': 'Markdown',
118
+ '.docx': 'Word Document',
119
+ '.xlsx': 'Excel Spreadsheet',
120
+ '.pdf': 'PDF Document',
121
+ '.png': 'PNG Image',
122
+ '.jpg': 'JPEG Image',
123
+ '.jpeg': 'JPEG Image',
124
+ '.gif': 'GIF Image',
125
+ '.svg': 'SVG Image',
126
+ '.csv': 'CSV Data',
127
+ '.zip': 'ZIP Archive'
128
+ }
129
+
130
+ return type_map.get(ext, 'File')
131
+
132
+ def get_file_tree_markdown(self, max_files: int = 50) -> str:
133
+ """
134
+ Generate markdown representation of file tree.
135
+
136
+ Args:
137
+ max_files: Maximum files to display
138
+
139
+ Returns:
140
+ Markdown formatted file tree
141
+ """
142
+ files = self.scan_all_files()
143
+
144
+ if not files:
145
+ return "📁 **No files found**\n\n*Upload files or generate artifacts to see them here.*"
146
+
147
+ # Group by type
148
+ by_type = {}
149
+ for file in files:
150
+ ftype = file['type']
151
+ if ftype not in by_type:
152
+ by_type[ftype] = []
153
+ by_type[ftype].append(file)
154
+
155
+ # Build markdown
156
+ md = f"## 📂 File Browser\n\n"
157
+ md += f"**Total: {len(files)} files**\n\n"
158
+
159
+ # Summary by type
160
+ md += "### 📊 By Type\n\n"
161
+ for ftype, type_files in sorted(by_type.items()):
162
+ total_size = sum(f['size_kb'] for f in type_files)
163
+ md += f"- **{ftype}**: {len(type_files)} files ({total_size:.1f} KB)\n"
164
+
165
+ md += "\n### 📄 Recent Files\n\n"
166
+
167
+ # Show recent files
168
+ display_files = files[:max_files]
169
+
170
+ for file in display_files:
171
+ icon = self._get_file_icon(file['type'])
172
+ md += f"{icon} **{file['filename']}**\n"
173
+ md += f" - Type: {file['type']}\n"
174
+ md += f" - Size: {file['size_kb']} KB\n"
175
+ md += f" - Path: `{file['relative_path']}`\n"
176
+ md += f" - Modified: {file['modified'][:16]}\n\n"
177
+
178
+ if len(files) > max_files:
179
+ md += f"\n*... and {len(files) - max_files} more files*\n"
180
+
181
+ return md
182
+
183
+ def _get_file_icon(self, file_type: str) -> str:
184
+ """Get emoji icon for file type."""
185
+ icon_map = {
186
+ 'Python Script': '🐍',
187
+ 'Jupyter Notebook': '📓',
188
+ 'JavaScript': '📜',
189
+ 'TypeScript': '📘',
190
+ 'HTML': '🌐',
191
+ 'Word Document': '📄',
192
+ 'Excel Spreadsheet': '📊',
193
+ 'PDF Document': '📕',
194
+ 'Markdown': '📝',
195
+ 'Text': '📃',
196
+ 'Image': '🖼️',
197
+ 'PNG Image': '🖼️',
198
+ 'JPEG Image': '🖼️',
199
+ 'CSV Data': '📈',
200
+ 'ZIP Archive': '📦'
201
+ }
202
+ return icon_map.get(file_type, '📄')
203
+
204
+ def get_files_for_download(self) -> List[str]:
205
+ """
206
+ Get list of file paths for Gradio Files component.
207
+
208
+ Returns:
209
+ List of absolute file paths
210
+ """
211
+ files = self.scan_all_files()
212
+ return [f['path'] for f in files]
213
+
214
+ def create_download_package(self, session_id: str = None) -> str:
215
+ """
216
+ Create a ZIP file containing all recent artifacts.
217
+
218
+ Args:
219
+ session_id: Optional session ID to filter files
220
+
221
+ Returns:
222
+ Path to created ZIP file
223
+ """
224
+ files = self.scan_all_files()
225
+
226
+ if not files:
227
+ raise ValueError("No files to package")
228
+
229
+ # Create ZIP filename
230
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
231
+ zip_filename = f"artifacts_{timestamp}.zip"
232
+ zip_path = os.path.join("outputs", zip_filename)
233
+
234
+ # Create ZIP
235
+ with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
236
+ for file_info in files:
237
+ # Add file with relative path
238
+ arcname = os.path.join(
239
+ file_info['type'].replace(' ', '_'),
240
+ file_info['filename']
241
+ )
242
+ zipf.write(file_info['path'], arcname)
243
+
244
+ log.info(f"📦 Created download package: {zip_filename} ({len(files)} files)")
245
+
246
+ return zip_path
247
+
248
+ def get_file_stats(self) -> Dict:
249
+ """
250
+ Get comprehensive file statistics.
251
+
252
+ Returns:
253
+ Dict with file counts, sizes, and types
254
+ """
255
+ files = self.scan_all_files()
256
+
257
+ total_size = sum(f['size_bytes'] for f in files)
258
+
259
+ # Count by type
260
+ by_type = {}
261
+ for file in files:
262
+ ftype = file['type']
263
+ by_type[ftype] = by_type.get(ftype, 0) + 1
264
+
265
+ # Count by directory
266
+ by_dir = {}
267
+ for file in files:
268
+ base = file['base_dir']
269
+ by_dir[base] = by_dir.get(base, 0) + 1
270
+
271
+ return {
272
+ 'total_files': len(files),
273
+ 'total_size_bytes': total_size,
274
+ 'total_size_mb': round(total_size / (1000 * 1000), 2),
275
+ 'by_type': by_type,
276
+ 'by_directory': by_dir,
277
+ 'most_recent': files[0] if files else None
278
+ }
279
+
280
+ def delete_file(self, filepath: str) -> Tuple[bool, str]:
281
+ """
282
+ Delete a specific file.
283
+
284
+ Args:
285
+ filepath: Absolute path to file
286
+
287
+ Returns:
288
+ (success, message) tuple
289
+ """
290
+ try:
291
+ if not os.path.exists(filepath):
292
+ return False, "File not found"
293
+
294
+ os.remove(filepath)
295
+ log.info(f"🗑️ Deleted file: {filepath}")
296
+ return True, f"Deleted {os.path.basename(filepath)}"
297
+
298
+ except Exception as e:
299
+ log.error(f"Failed to delete {filepath}: {e}")
300
+ return False, f"Error: {e}"
301
+
302
+ def clear_all_files(self) -> Tuple[int, str]:
303
+ """
304
+ Clear all files from monitored directories.
305
+
306
+ Returns:
307
+ (count, message) tuple
308
+ """
309
+ files = self.scan_all_files()
310
+ deleted = 0
311
+
312
+ for file_info in files:
313
+ try:
314
+ os.remove(file_info['path'])
315
+ deleted += 1
316
+ except Exception as e:
317
+ log.warning(f"Failed to delete {file_info['path']}: {e}")
318
+
319
+ log.info(f"🗑️ Cleared {deleted} files")
320
+ return deleted, f"Cleared {deleted} files"
321
+
322
+ def get_file_preview(self, filepath: str, max_lines: int = 50) -> str:
323
+ """
324
+ Generate preview of file content.
325
+
326
+ Args:
327
+ filepath: Path to file
328
+ max_lines: Maximum lines to show for text files
329
+
330
+ Returns:
331
+ Preview text or message
332
+ """
333
+ if not os.path.exists(filepath):
334
+ return "❌ File not found"
335
+
336
+ ext = os.path.splitext(filepath)[1].lower()
337
+
338
+ # Text-based files
339
+ text_extensions = ['.txt', '.md', '.py', '.js', '.ts', '.json', '.csv', '.html', '.css']
340
+
341
+ if ext in text_extensions:
342
+ try:
343
+ with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
344
+ lines = f.readlines()
345
+
346
+ if len(lines) <= max_lines:
347
+ return ''.join(lines)
348
+ else:
349
+ preview = ''.join(lines[:max_lines])
350
+ return f"{preview}\n\n... ({len(lines) - max_lines} more lines)"
351
+
352
+ except Exception as e:
353
+ return f"❌ Preview error: {e}"
354
+
355
+ # Binary files
356
+ size = os.path.getsize(filepath)
357
+ return f"📦 Binary file ({size / 1000:.1f} KB)\n\nDownload to view content."
358
+
359
+
360
+ # Global instance
361
+ file_browser = FileBrowserManager()
362
+
363
+
364
+ def get_file_browser() -> FileBrowserManager:
365
+ """Get global file browser instance."""
366
+ return file_browser
367
+
368
+
369
+ # Convenience functions for Gradio integration
370
+ def refresh_file_list() -> str:
371
+ """Refresh and return file tree markdown."""
372
+ return file_browser.get_file_tree_markdown()
373
+
374
+
375
+ def get_download_files() -> List[str]:
376
+ """Get file paths for download."""
377
+ return file_browser.get_files_for_download()
378
+
379
+
380
+ def create_zip_package() -> str:
381
+ """Create and return path to ZIP package."""
382
+ try:
383
+ return file_browser.create_download_package()
384
+ except Exception as e:
385
+ log.error(f"Failed to create ZIP: {e}")
386
+ return ""
387
+
388
+
389
+ def get_stats_markdown() -> str:
390
+ """Get file statistics as markdown."""
391
+ stats = file_browser.get_file_stats()
392
+
393
+ md = "### 📊 Statistics\n\n"
394
+ md += f"- **Total Files**: {stats['total_files']}\n"
395
+ md += f"- **Total Size**: {stats['total_size_mb']} MB\n\n"
396
+
397
+ if stats['by_type']:
398
+ md += "**By Type:**\n"
399
+ for ftype, count in sorted(stats['by_type'].items()):
400
+ md += f"- {ftype}: {count}\n"
401
+
402
+ return md
403
+
404
+
405
+ # Export all
406
+ __all__ = [
407
+ 'FileBrowserManager',
408
+ 'file_browser',
409
+ 'get_file_browser',
410
+ 'refresh_file_list',
411
+ 'get_download_files',
412
+ 'create_zip_package',
413
+ 'get_stats_markdown'
414
+ ]