Translsis commited on
Commit
88c18c0
·
verified ·
1 Parent(s): a9475ec

Upload: app.py

Browse files
Files changed (1) hide show
  1. app.py +622 -0
app.py ADDED
@@ -0,0 +1,622 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import logging
4
+ import asyncio
5
+ import tempfile
6
+ import git
7
+ import shutil
8
+ import zipfile
9
+ from pathlib import Path
10
+ from typing import Tuple, Optional
11
+ from huggingface_hub import login, HfApi, create_repo, upload_file
12
+ from urllib.parse import urlparse
13
+ from functools import partial
14
+ from datetime import datetime
15
+
16
+ # Setup logging
17
+ logging.basicConfig(
18
+ level=logging.INFO,
19
+ format='%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s'
20
+ )
21
+ logger = logging.getLogger(__name__)
22
+
23
+ class HuggingFaceManager:
24
+ def __init__(self):
25
+ self.api = HfApi()
26
+ self.token: Optional[str] = None
27
+ self.temp_dir = Path(tempfile.mkdtemp())
28
+
29
+ def cleanup(self):
30
+ """Clean up temporary directory"""
31
+ if self.temp_dir.exists():
32
+ shutil.rmtree(self.temp_dir, ignore_errors=True)
33
+
34
+ def validate_repo_name(self, repo_name: str) -> bool:
35
+ """Validate repository name format"""
36
+ if not repo_name or '/' not in repo_name:
37
+ raise ValueError("Repository name must be in format 'username/repo-name'")
38
+ username, repo = repo_name.split('/', 1)
39
+ return all(name.strip() for name in [username, repo])
40
+
41
+ def validate_url(self, url: str) -> bool:
42
+ """Validate URL format"""
43
+ try:
44
+ result = urlparse(url)
45
+ return all([result.scheme, result.netloc])
46
+ except Exception:
47
+ return False
48
+
49
+ async def create_zip_archive(
50
+ self,
51
+ source_path: Path,
52
+ password: Optional[str] = None,
53
+ progress_callback=None
54
+ ) -> Optional[Path]:
55
+ """Create a zip archive of the folder, optionally with password protection"""
56
+ try:
57
+ if not source_path.exists():
58
+ raise ValueError(f"Source path {source_path} does not exist")
59
+ timestamp = datetime.now().strftime("%S_%M_%H_%d_%m_%Y")
60
+ zip_path = self.temp_dir / f"archive_{timestamp}.zip" # Tạo tên file zip
61
+
62
+ # Count total files for progress
63
+ total_files = sum(1 for _ in source_path.rglob('*') if _.is_file())
64
+ processed_files = 0
65
+
66
+ # Create zip archive
67
+ if password:
68
+ # Using external zip command for password protection
69
+ files_list = [
70
+ str(f.relative_to(source_path))
71
+ for f in source_path.rglob('*')
72
+ if f.is_file()
73
+ ]
74
+
75
+ # Create temporary file listing
76
+ list_file = self.temp_dir / "files.txt"
77
+ list_file.write_text("\n".join(files_list))
78
+
79
+ # Use zip command with password
80
+ process = await asyncio.create_subprocess_exec(
81
+ 'zip', '-r', '-P', password, str(zip_path), '.',
82
+ cwd=str(source_path),
83
+ stdout=asyncio.subprocess.PIPE,
84
+ stderr=asyncio.subprocess.PIPE
85
+ )
86
+
87
+ await process.communicate()
88
+ if process.returncode != 0:
89
+ raise Exception("Failed to create password-protected zip")
90
+
91
+ if progress_callback:
92
+ progress_callback(1.0) # Complete
93
+ else:
94
+ # Use Python's zipfile for non-password zip
95
+ with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
96
+ for file_path in source_path.rglob('*'):
97
+ if file_path.is_file():
98
+ relative_path = file_path.relative_to(source_path)
99
+ zipf.write(file_path, relative_path)
100
+ processed_files += 1
101
+ if progress_callback:
102
+ progress_callback(processed_files / total_files)
103
+
104
+ return zip_path
105
+ except Exception as e:
106
+ logger.error(f"Failed to create zip archive: {e}")
107
+ return None
108
+
109
+ async def login_and_validate(self, token: str) -> str:
110
+ """Login to Hugging Face and validate token"""
111
+ if not token.strip():
112
+ return "Error: Token cannot be empty"
113
+
114
+ try:
115
+ loop = asyncio.get_event_loop()
116
+ await loop.run_in_executor(None, login, token)
117
+ await loop.run_in_executor(None, self.api.whoami)
118
+
119
+ self.token = token
120
+ return "Login successful!"
121
+ except Exception as e:
122
+ logger.error(f"Login failed: {e}")
123
+ return f"Login failed: {str(e)}"
124
+
125
+ async def download_from_url(
126
+ self,
127
+ url: str,
128
+ download_type: str,
129
+ progress: Optional[gr.Progress] = None
130
+ ) -> Tuple[str, Optional[str]]:
131
+ """Download content from URL using wget or git"""
132
+ if not self.validate_url(url):
133
+ return "Invalid URL format!", None
134
+
135
+ output_path = self.temp_dir / "downloaded_content"
136
+ if output_path.exists():
137
+ shutil.rmtree(output_path)
138
+ output_path.mkdir(parents=True)
139
+
140
+ try:
141
+ if download_type == "wget":
142
+ if progress:
143
+ progress(0, desc="Downloading with wget...")
144
+
145
+ process = await asyncio.create_subprocess_exec(
146
+ 'wget', '-P', str(output_path), url,
147
+ stdout=asyncio.subprocess.PIPE,
148
+ stderr=asyncio.subprocess.PIPE
149
+ )
150
+
151
+ try:
152
+ stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=300)
153
+ if process.returncode != 0:
154
+ raise Exception(f"wget failed: {stderr.decode()}")
155
+ except asyncio.TimeoutError:
156
+ raise Exception("Download timed out after 5 minutes")
157
+
158
+ elif download_type == "git":
159
+ if progress:
160
+ progress(0, desc="Cloning git repository...")
161
+
162
+ await asyncio.get_event_loop().run_in_executor(
163
+ None,
164
+ partial(git.Repo.clone_from, url, str(output_path), depth=1)
165
+ )
166
+
167
+ if progress:
168
+ progress(1, desc="Download complete")
169
+
170
+ return "Download successful!", str(output_path)
171
+
172
+ except Exception as e:
173
+ logger.error(f"Download failed: {e}")
174
+ if output_path.exists():
175
+ shutil.rmtree(output_path)
176
+ return f"Download failed: {str(e)}", None
177
+
178
+ async def create_repository(
179
+ self,
180
+ repo_name: str,
181
+ repo_type: str,
182
+ is_private: bool
183
+ ) -> str:
184
+ """Create a new repository on Hugging Face"""
185
+ if not self.token:
186
+ return "Please login first!"
187
+
188
+ try:
189
+ self.validate_repo_name(repo_name)
190
+
191
+ await asyncio.get_event_loop().run_in_executor(
192
+ None,
193
+ partial(
194
+ create_repo,
195
+ repo_name,
196
+ private=is_private,
197
+ repo_type=repo_type,
198
+ exist_ok=True
199
+ )
200
+ )
201
+
202
+ return f"Repository '{repo_name}' created successfully!"
203
+ except Exception as e:
204
+ logger.error(f"Failed to create repository: {e}")
205
+ return f"Failed to create repository: {str(e)}"
206
+
207
+ async def upload_file_worker(
208
+ self,
209
+ file_path: Path,
210
+ relative_path: Path,
211
+ repo_name: str,
212
+ repo_type: str,
213
+ progress_callback=None
214
+ ) -> Optional[str]:
215
+ """Upload a single file to repository"""
216
+ try:
217
+ await asyncio.get_event_loop().run_in_executor(
218
+ None,
219
+ partial(
220
+ upload_file,
221
+ path_or_fileobj=str(file_path),
222
+ path_in_repo=str(relative_path),
223
+ repo_id=repo_name,
224
+ repo_type=repo_type,
225
+ commit_message=f"Upload: {relative_path}"
226
+ )
227
+ )
228
+ if progress_callback:
229
+ progress_callback()
230
+ return str(relative_path)
231
+ except Exception as e:
232
+ logger.error(f"Error uploading {relative_path}: {e}")
233
+ return None
234
+
235
+ async def upload_folder(
236
+ self,
237
+ folder_path: str,
238
+ repo_name: str,
239
+ repo_type: str,
240
+ should_zip: bool = False,
241
+ zip_password: Optional[str] = None,
242
+ progress: Optional[gr.Progress] = None
243
+ ) -> str:
244
+ """Upload entire folder to repository, optionally as a zip archive"""
245
+ if not self.token:
246
+ return "Please login first!"
247
+
248
+ folder_path = Path(folder_path)
249
+ if not folder_path.exists():
250
+ return f"Folder {folder_path} does not exist!"
251
+
252
+ try:
253
+ self.validate_repo_name(repo_name)
254
+
255
+ if should_zip:
256
+ if progress:
257
+ progress(0, desc="Creating zip archive...")
258
+
259
+ zip_path = await self.create_zip_archive(
260
+ folder_path,
261
+ zip_password,
262
+ lambda p: progress(p * 0.5) if progress else None
263
+ )
264
+
265
+ if not zip_path:
266
+ return "Failed to create zip archive"
267
+
268
+ if progress:
269
+ progress(0.5, desc="Uploading zip file...")
270
+
271
+ # Upload single zip file
272
+ await self.upload_file_worker(
273
+ zip_path,
274
+ Path(zip_path.name),
275
+ repo_name,
276
+ repo_type,
277
+ lambda: progress(1.0) if progress else None
278
+ )
279
+
280
+ return "Zip file uploaded successfully!"
281
+ else:
282
+ # Original folder upload logic
283
+ files_to_upload = []
284
+ for root, dirs, files in os.walk(folder_path):
285
+ if '.git' in dirs:
286
+ dirs.remove('.git')
287
+
288
+ for file in files:
289
+ file_path = Path(root) / file
290
+ relative_path = file_path.relative_to(folder_path)
291
+ files_to_upload.append((file_path, relative_path))
292
+
293
+ if not files_to_upload:
294
+ return "No files found to upload"
295
+
296
+ if progress:
297
+ progress(0, desc=f"Uploading {len(files_to_upload)} files...")
298
+
299
+ # Track progress
300
+ uploaded_count = 0
301
+ def update_progress():
302
+ nonlocal uploaded_count
303
+ uploaded_count += 1
304
+ if progress:
305
+ progress(uploaded_count / len(files_to_upload))
306
+
307
+ # Upload files in parallel
308
+ tasks = [
309
+ self.upload_file_worker(
310
+ file_path,
311
+ relative_path,
312
+ repo_name,
313
+ repo_type,
314
+ update_progress
315
+ )
316
+ for file_path, relative_path in files_to_upload
317
+ ]
318
+
319
+ results = await asyncio.gather(*tasks, return_exceptions=True)
320
+
321
+ successful = sum(1 for r in results if r is not None)
322
+ failed = len(files_to_upload) - successful
323
+
324
+ return f"Uploaded {successful} files successfully. Failed: {failed}"
325
+
326
+ except Exception as e:
327
+ logger.error(f"Upload failed: {e}")
328
+ return f"Upload failed: {str(e)}"
329
+ finally:
330
+ # Cleanup temporary zip file if created
331
+ if should_zip:
332
+ zip_path = self.temp_dir / "archive.zip"
333
+ if zip_path.exists():
334
+ zip_path.unlink()
335
+
336
+ async def delete_files(
337
+ self,
338
+ folder_path: str,
339
+ repo_name: str,
340
+ repo_type: str,
341
+ dry_run: bool = True,
342
+ progress: Optional[gr.Progress] = None
343
+ ) -> str:
344
+ """Delete files from repository based on local folder structure"""
345
+ if not self.token:
346
+ return "Please login first!"
347
+
348
+ try:
349
+ self.validate_repo_name(repo_name)
350
+
351
+ folder_path = Path(folder_path)
352
+ if not folder_path.exists():
353
+ return f"Folder {folder_path} does not exist!"
354
+
355
+ # Get repository files
356
+ existing_files = await asyncio.get_event_loop().run_in_executor(
357
+ None,
358
+ partial(self.api.list_repo_files, repo_id=repo_name, repo_type=repo_type)
359
+ )
360
+
361
+ # Find files to delete
362
+ files_to_delete = []
363
+ for root, dirs, files in os.walk(folder_path):
364
+ if '.git' in dirs:
365
+ dirs.remove('.git')
366
+
367
+ for file in files:
368
+ file_path = Path(root) / file
369
+ relative_path = str(file_path.relative_to(folder_path))
370
+ if relative_path in existing_files:
371
+ files_to_delete.append(relative_path)
372
+
373
+ if not files_to_delete:
374
+ return "No matching files found to delete"
375
+
376
+ if dry_run:
377
+ return f"Dry run: Would delete {len(files_to_delete)} files"
378
+
379
+ # Perform deletion
380
+ if progress:
381
+ progress(0, desc=f"Deleting {len(files_to_delete)} files...")
382
+
383
+ deleted_count = 0
384
+ for file_path in files_to_delete:
385
+ try:
386
+ await asyncio.get_event_loop().run_in_executor(
387
+ None,
388
+ partial(
389
+ self.api.delete_file,
390
+ repo_id=repo_name,
391
+ path_in_repo=file_path,
392
+ repo_type=repo_type
393
+ )
394
+ )
395
+ deleted_count += 1
396
+ if progress:
397
+ progress(deleted_count / len(files_to_delete))
398
+ except Exception as e:
399
+ logger.error(f"Error deleting {file_path}: {e}")
400
+
401
+ return f"Successfully deleted {deleted_count} out of {len(files_to_delete)} files"
402
+
403
+ except Exception as e:
404
+ logger.error(f"Delete operation failed: {e}")
405
+ return f"Delete operation failed: {str(e)}"
406
+
407
+ def create_interface() -> gr.Blocks:
408
+ """Create Gradio interface"""
409
+ manager = HuggingFaceManager()
410
+
411
+ with gr.Blocks(title="Hugging Face Repository Manager") as app:
412
+ gr.Markdown("# Hugging Face Repository Manager")
413
+
414
+ # Login Tab
415
+ with gr.Tab("Login"):
416
+ token_input = gr.Textbox(
417
+ label="Hugging Face Token",
418
+ type="password",
419
+ placeholder="Enter your Hugging Face token"
420
+ )
421
+ login_btn = gr.Button("Login", variant="primary")
422
+ login_output = gr.Textbox(label="Login Status", interactive=False)
423
+
424
+ login_btn.click(
425
+ fn=lambda x: asyncio.run(manager.login_and_validate(x)),
426
+ inputs=[token_input],
427
+ outputs=[login_output]
428
+ )
429
+
430
+ # Download Tab
431
+ with gr.Tab("Download"):
432
+ download_url = gr.Textbox(
433
+ label="URL",
434
+ placeholder="Enter URL to download"
435
+ )
436
+ download_type = gr.Radio(
437
+ choices=["wget", "git"],
438
+ label="Download Type",
439
+ value="wget"
440
+ )
441
+ download_btn = gr.Button("Download", variant="primary")
442
+ download_output = gr.Textbox(label="Status", interactive=False)
443
+ download_path = gr.Textbox(
444
+ label="Download Path",
445
+ interactive=False,
446
+ visible=False
447
+ )
448
+
449
+ download_btn.click(
450
+ fn=lambda x, y: asyncio.run(manager.download_from_url(x, y)),
451
+ inputs=[download_url, download_type],
452
+ outputs=[download_output, download_path]
453
+ )
454
+
455
+ # Create Repository Tab
456
+ with gr.Tab("Create Repository"):
457
+ repo_name = gr.Textbox(
458
+ label="Repository Name",
459
+ placeholder="username/repo-name"
460
+ )
461
+ repo_type = gr.Radio(
462
+ choices=["model", "dataset", "space"],
463
+ label="Repository Type",
464
+ value="model"
465
+ )
466
+ is_private = gr.Checkbox(label="Private Repository", value=True)
467
+ create_btn = gr.Button("Create Repository", variant="primary")
468
+ create_output = gr.Textbox(label="Status", interactive=False)
469
+
470
+ create_btn.click(
471
+ fn=lambda x, y, z: asyncio.run(manager.create_repository(x, y, z)),
472
+ inputs=[repo_name, repo_type, is_private],
473
+ outputs=[create_output]
474
+ )
475
+
476
+ # Upload Tab
477
+ with gr.Tab("Upload"):
478
+ with gr.Row():
479
+ upload_folder_path = gr.Textbox(
480
+ label="Local Folder Path",
481
+ placeholder="Path to local folder"
482
+ )
483
+ use_downloaded = gr.Checkbox(
484
+ label="Use Downloaded Content",
485
+ value=False
486
+ )
487
+
488
+ upload_repo_name = gr.Textbox(
489
+ label="Repository Name",
490
+ placeholder="username/repo-name"
491
+ )
492
+ upload_repo_type = gr.Radio(
493
+ choices=["model", "dataset", "space"],
494
+ label="Repository Type",
495
+ value="model"
496
+ )
497
+
498
+ with gr.Row():
499
+ should_zip = gr.Checkbox(
500
+ label="Create Zip Archive",
501
+ value=False
502
+ )
503
+ zip_password = gr.Textbox(
504
+ label="Zip Password (Optional)",
505
+ type="password",
506
+ placeholder="Leave empty for no password",
507
+ visible=False
508
+ )
509
+
510
+ # Show/hide password field based on zip checkbox
511
+ should_zip.change(
512
+ fn=lambda x: gr.update(visible=x),
513
+ inputs=[should_zip],
514
+ outputs=[zip_password]
515
+ )
516
+
517
+ upload_btn = gr.Button("Upload Files", variant="primary")
518
+ upload_output = gr.Textbox(label="Status", interactive=False)
519
+
520
+ def prepare_upload_path(folder_path, use_downloaded, downloaded_path):
521
+ return downloaded_path if use_downloaded and downloaded_path else folder_path
522
+
523
+ upload_btn.click(
524
+ fn=lambda *args: asyncio.run(
525
+ manager.upload_folder(
526
+ prepare_upload_path(args[0], args[1], args[2]),
527
+ args[3],
528
+ args[4],
529
+ args[5],
530
+ args[6] if args[6] and args[6].strip() else None
531
+ )
532
+ ),
533
+ inputs=[
534
+ upload_folder_path,
535
+ use_downloaded,
536
+ download_path,
537
+ upload_repo_name,
538
+ upload_repo_type,
539
+ should_zip,
540
+ zip_password
541
+ ],
542
+ outputs=[upload_output]
543
+ )
544
+
545
+ # Delete Tab
546
+ with gr.Tab("Delete"):
547
+ delete_folder_path = gr.Textbox(
548
+ label="Local Folder Path",
549
+ placeholder="Path to local folder for reference"
550
+ )
551
+ delete_repo_name = gr.Textbox(
552
+ label="Repository Name",
553
+ placeholder="username/repo-name"
554
+ )
555
+ delete_repo_type = gr.Radio(
556
+ choices=["model", "dataset", "space"],
557
+ label="Repository Type",
558
+ value="model"
559
+ )
560
+ dry_run = gr.Checkbox(
561
+ label="Dry Run",
562
+ value=True,
563
+ info="Preview changes without making them"
564
+ )
565
+ delete_btn = gr.Button("Delete Files", variant="secondary")
566
+ delete_output = gr.Textbox(label="Status", interactive=False)
567
+
568
+ delete_btn.click(
569
+ fn=lambda *args: asyncio.run(manager.delete_files(*args)),
570
+ inputs=[
571
+ delete_folder_path,
572
+ delete_repo_name,
573
+ delete_repo_type,
574
+ dry_run
575
+ ],
576
+ outputs=[delete_output]
577
+ )
578
+
579
+ # Error handling
580
+ gr.Error()
581
+
582
+ # Footer information
583
+ gr.Markdown("""
584
+ ### Information
585
+ - Get your token from [Hugging Face Settings](https://huggingface.co/settings/tokens)
586
+ - Repository names must be in format: username/repository-name
587
+ - For files larger than 5GB, use Git LFS
588
+ - Repository types:
589
+ - Model: For ML models and weights
590
+ - Dataset: For datasets and data files
591
+ - Space: For demos and applications
592
+ """)
593
+
594
+ return app
595
+
596
+ def main():
597
+ """Application entry point"""
598
+ try:
599
+ logger.info("Starting Hugging Face Repository Manager")
600
+ app = create_interface()
601
+
602
+ # Configure and launch the application
603
+ app.queue()
604
+ app.launch(
605
+ server_name="0.0.0.0",
606
+ server_port=7860,
607
+ share=True,
608
+ max_threads=2,
609
+ # Security configurations
610
+ auth=None,
611
+ ssl_keyfile=None,
612
+ ssl_certfile=None,
613
+ ssl_verify=True
614
+ )
615
+ except Exception as e:
616
+ logger.error(f"Application failed to start: {e}")
617
+ raise
618
+ finally:
619
+ logger.info("Shutting down Hugging Face Repository Manager")
620
+
621
+ if __name__ == "__main__":
622
+ main()