Fred808 commited on
Commit
cb3dd33
·
verified ·
1 Parent(s): 82ac2d1

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +31 -0
  2. app.py +504 -0
  3. env.example +13 -0
  4. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # Install system dependencies
4
+ RUN apt-get update && apt-get install -y --no-install-recommends \
5
+ unrar-free \
6
+ curl \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ # Set working directory
10
+ WORKDIR /app
11
+
12
+ # Copy requirements and install Python dependencies
13
+ COPY requirements.txt .
14
+ RUN pip install --no-cache-dir -r requirements.txt
15
+
16
+ # Copy application code
17
+ COPY app.py .
18
+
19
+ # Create necessary directories
20
+ RUN mkdir -p downloads extracted_tmp
21
+
22
+ # Expose port
23
+ EXPOSE 7860
24
+
25
+ # Set environment variables
26
+ ENV PYTHONUNBUFFERED=1
27
+ ENV HF_HOME=/app/.cache/huggingface
28
+
29
+ # Run the application
30
+ CMD ["python", "app.py"]
31
+
app.py ADDED
@@ -0,0 +1,504 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import requests
4
+ import subprocess
5
+ import shutil
6
+ import asyncio
7
+ import threading
8
+ import time
9
+ import hashlib
10
+ from typing import Dict, List, Set
11
+ from fastapi import FastAPI, BackgroundTasks, HTTPException
12
+ from fastapi.responses import HTMLResponse, JSONResponse
13
+ from fastapi.middleware.cors import CORSMiddleware
14
+ from huggingface_hub import HfApi, list_repo_files
15
+ import uvicorn
16
+
17
+ # ==== CONFIGURATION ====
18
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
19
+ SOURCE_REPO_ID = os.getenv("SOURCE_REPO", "Fred808/BG1")
20
+ DEST_REPO_ID = os.getenv("DEST_REPO", "Fred808/BG2")
21
+
22
+ DOWNLOAD_FOLDER = "downloads"
23
+ EXTRACT_FOLDER = "extracted_tmp"
24
+
25
+ DOWNLOAD_STATE_FILE = "download_progress.json"
26
+ PROCESS_STATE_FILE = "process_progress.json"
27
+ UPLOADED_FOLDERS_FILE = "uploaded_folders.json" # New: Track uploaded folder hashes
28
+ FAILED_FILES_LOG = "failed_files.txt"
29
+
30
+ CHUNK_SIZE = 3 # Smaller chunks for Space environment
31
+ PROCESSING_DELAY = 2 # Delay between processing files (seconds)
32
+
33
+ os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
34
+ os.makedirs(EXTRACT_FOLDER, exist_ok=True)
35
+
36
+ api = HfApi(token=HF_TOKEN)
37
+
38
+ # Global state
39
+ processing_status = {
40
+ "is_running": False,
41
+ "current_file": None,
42
+ "total_files": 0,
43
+ "processed_files": 0,
44
+ "failed_files": 0,
45
+ "uploaded_folders": 0,
46
+ "last_update": None,
47
+ "logs": []
48
+ }
49
+
50
+ app = FastAPI(title="RAR Processing Service", description="Automated RAR extraction and upload service")
51
+
52
+ # Add CORS middleware
53
+ app.add_middleware(
54
+ CORSMiddleware,
55
+ allow_origins=["*"],
56
+ allow_credentials=True,
57
+ allow_methods=["*"],
58
+ allow_headers=["*"],
59
+ )
60
+
61
+ def log_message(message: str):
62
+ """Add message to logs and print it"""
63
+ timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
64
+ log_entry = f"[{timestamp}] {message}"
65
+ print(log_entry)
66
+ processing_status["logs"].append(log_entry)
67
+ processing_status["last_update"] = timestamp
68
+ # Keep only last 100 log entries
69
+ if len(processing_status["logs"]) > 100:
70
+ processing_status["logs"] = processing_status["logs"][-100:]
71
+
72
+ def log_failed_file(filename: str, error_msg: str):
73
+ """Log failed files to a separate file for later review"""
74
+ with open(FAILED_FILES_LOG, "a") as f:
75
+ f.write(f"{filename}: {error_msg}\n")
76
+ log_message(f"❌ Failed: {filename} - {error_msg}")
77
+
78
+ def get_folder_hash(folder_name: str) -> str:
79
+ """Generate a hash for the folder name to use as a unique identifier"""
80
+ return hashlib.md5(folder_name.encode()).hexdigest()
81
+
82
+ def load_uploaded_folders() -> Set[str]:
83
+ """Load set of uploaded folder hashes"""
84
+ if os.path.exists(UPLOADED_FOLDERS_FILE):
85
+ try:
86
+ with open(UPLOADED_FOLDERS_FILE, "r") as f:
87
+ data = json.load(f)
88
+ return set(data.get("uploaded_folder_hashes", []))
89
+ except json.JSONDecodeError:
90
+ log_message(f"⚠️ Warning: Could not decode {UPLOADED_FOLDERS_FILE}. Starting with empty set.")
91
+ return set()
92
+ return set()
93
+
94
+ def save_uploaded_folders(uploaded_set: Set[str]):
95
+ """Save set of uploaded folder hashes"""
96
+ with open(UPLOADED_FOLDERS_FILE, "w") as f:
97
+ json.dump({"uploaded_folder_hashes": list(uploaded_set)}, f)
98
+
99
+ def load_download_state() -> int:
100
+ """Load download progress state"""
101
+ if os.path.exists(DOWNLOAD_STATE_FILE):
102
+ try:
103
+ with open(DOWNLOAD_STATE_FILE, "r") as f:
104
+ return json.load(f).get("next_download_index", 0)
105
+ except json.JSONDecodeError:
106
+ log_message(f"⚠️ Warning: Could not decode {DOWNLOAD_STATE_FILE}. Starting download from index 0.")
107
+ return 0
108
+ return 0
109
+
110
+ def save_download_state(next_index: int):
111
+ """Save download progress state"""
112
+ with open(DOWNLOAD_STATE_FILE, "w") as f:
113
+ json.dump({"next_download_index": next_index}, f)
114
+
115
+ def load_processed_files_state() -> set:
116
+ """Load processed files from the state file"""
117
+ if os.path.exists(PROCESS_STATE_FILE):
118
+ try:
119
+ with open(PROCESS_STATE_FILE, "r") as f:
120
+ data = json.load(f)
121
+ return set(data.get("processed_rars", []))
122
+ except json.JSONDecodeError:
123
+ log_message(f"⚠️ Warning: Could not decode {PROCESS_STATE_FILE}. Starting with empty processed list.")
124
+ return set()
125
+ return set()
126
+
127
+ def save_processed_files_state(processed_set: set):
128
+ """Save processed files to the state file"""
129
+ with open(PROCESS_STATE_FILE, "w") as f:
130
+ json.dump({"processed_rars": list(processed_set)}, f)
131
+
132
+ def download_rar_files(start_index: int, chunk_size: int) -> tuple:
133
+ """Downloads a batch of RAR files from the source dataset"""
134
+ try:
135
+ all_files = list_repo_files(repo_id=SOURCE_REPO_ID, repo_type="dataset", token=HF_TOKEN)
136
+ # Filter for .rar files and exclude the specific one
137
+ rar_files_in_repo = sorted([f for f in all_files if f.endswith(".rar") and "ZBrush/3DConceptArtist_TheUltimateZbrushGuide_DownloadPirate.com.rar" not in f])
138
+
139
+ end_index = start_index + chunk_size
140
+ files_to_download_metadata = rar_files_in_repo[start_index:end_index]
141
+
142
+ if not files_to_download_metadata:
143
+ log_message("✅ No more RAR files to download.")
144
+ return [], start_index
145
+
146
+ log_message(f"📥 Downloading RAR files {start_index + 1} to {end_index} from {SOURCE_REPO_ID}")
147
+
148
+ downloaded_paths = []
149
+ for file_path_in_repo in files_to_download_metadata:
150
+ filename = os.path.basename(file_path_in_repo)
151
+ dest_path = os.path.join(DOWNLOAD_FOLDER, filename)
152
+ file_url = f"https://huggingface.co/datasets/{SOURCE_REPO_ID}/resolve/main/{file_path_in_repo}"
153
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"}
154
+
155
+ if os.path.exists(dest_path):
156
+ log_message(f"⏩ Already exists, skipping: {filename}")
157
+ downloaded_paths.append(dest_path)
158
+ continue
159
+
160
+ log_message(f"🔽 Downloading: {file_path_in_repo}")
161
+ try:
162
+ with requests.get(file_url, headers=headers, stream=True) as r:
163
+ r.raise_for_status()
164
+ with open(dest_path, "wb") as f:
165
+ for chunk in r.iter_content(chunk_size=8192):
166
+ f.write(chunk)
167
+ log_message(f"✅ Downloaded: {filename}")
168
+ downloaded_paths.append(dest_path)
169
+ except Exception as e:
170
+ log_message(f"❌ Failed to download {file_path_in_repo}: {e}")
171
+ log_failed_file(file_path_in_repo, f"Download failed: {e}")
172
+
173
+ return downloaded_paths, end_index
174
+ except Exception as e:
175
+ log_message(f"❌ Error in download_rar_files: {e}")
176
+ return [], start_index
177
+
178
+ def extract_and_upload_rar(rar_path: str, processed_rars_set: set, uploaded_folders_set: Set[str]) -> bool:
179
+ """Extracts a single RAR file and uploads its contents to the destination repo"""
180
+ filename = os.path.basename(rar_path)
181
+ processing_status["current_file"] = filename
182
+
183
+ if filename in processed_rars_set:
184
+ log_message(f"⏩ {filename} already processed, skipping extraction and upload.")
185
+ return True
186
+
187
+ # Generate folder name and hash for tracking
188
+ folder_name = filename.replace(".rar", "")
189
+ folder_hash = get_folder_hash(folder_name)
190
+
191
+ # Check if this folder content has already been uploaded
192
+ if folder_hash in uploaded_folders_set:
193
+ log_message(f"🔒 Folder '{folder_name}' already uploaded (hash: {folder_hash[:8]}...), skipping.")
194
+ processed_rars_set.add(filename)
195
+ save_processed_files_state(processed_rars_set)
196
+ return True
197
+
198
+ log_message(f"📦 Attempting to extract: {filename}")
199
+ current_extract_folder = os.path.join(EXTRACT_FOLDER, f"{folder_name}_extracted")
200
+ os.makedirs(current_extract_folder, exist_ok=True)
201
+
202
+ try:
203
+ if shutil.which("unrar") is None:
204
+ raise RuntimeError("unrar command not found. Please install unrar.")
205
+
206
+ # Use -idq to suppress query messages and -o+ to overwrite without prompting
207
+ unrar_command = ["unrar", "x", "-o+", rar_path, current_extract_folder]
208
+ log_message(f"Running command: {' '.join(unrar_command)}")
209
+
210
+ result = subprocess.run(
211
+ unrar_command,
212
+ check=True,
213
+ capture_output=True,
214
+ text=True,
215
+ encoding='utf-8'
216
+ )
217
+
218
+ extracted_contents = os.listdir(current_extract_folder)
219
+ if not extracted_contents:
220
+ raise Exception("Extraction completed but no files were produced in the target directory.")
221
+ log_message(f"Successfully extracted {len(extracted_contents)} items")
222
+
223
+ # Upload extracted files
224
+ upload_count = 0
225
+ uploaded_files = []
226
+
227
+ for root, _, files in os.walk(current_extract_folder):
228
+ for file in files:
229
+ local_path = os.path.join(root, file)
230
+ # Construct path in repo relative to the extracted content's root
231
+ path_in_repo = os.path.join(folder_name, os.path.relpath(local_path, current_extract_folder))
232
+ log_message(f"⬆️ Uploading: {path_in_repo}")
233
+
234
+ try:
235
+ api.upload_file(
236
+ path_or_fileobj=local_path,
237
+ path_in_repo=path_in_repo,
238
+ repo_id=DEST_REPO_ID,
239
+ repo_type="dataset"
240
+ )
241
+ uploaded_files.append(path_in_repo)
242
+ upload_count += 1
243
+ except Exception as upload_error:
244
+ log_message(f"❌ Failed to upload {path_in_repo}: {upload_error}")
245
+ raise upload_error
246
+
247
+ log_message(f"✅ Successfully uploaded {upload_count} files from {filename}")
248
+
249
+ # Mark folder as uploaded using hash
250
+ uploaded_folders_set.add(folder_hash)
251
+ save_uploaded_folders(uploaded_folders_set)
252
+ processing_status["uploaded_folders"] = len(uploaded_folders_set)
253
+
254
+ log_message(f"🔒 Folder '{folder_name}' locked in BG2 repo (hash: {folder_hash[:8]}...)")
255
+
256
+ return True
257
+
258
+ except subprocess.CalledProcessError as e:
259
+ error_msg = f"RAR extraction failed (exit {e.returncode}): {e.stderr.strip()}"
260
+ log_failed_file(filename, error_msg)
261
+ processing_status["failed_files"] += 1
262
+ return False
263
+ except Exception as e:
264
+ error_msg = f"Unexpected error during processing {filename}: {str(e)}"
265
+ log_failed_file(filename, error_msg)
266
+ processing_status["failed_files"] += 1
267
+ return False
268
+ finally:
269
+ # Always cleanup the extraction folder after processing (success or failure)
270
+ if os.path.exists(current_extract_folder):
271
+ log_message(f"🧹 Cleaning up extracted files in {current_extract_folder}")
272
+ try:
273
+ shutil.rmtree(current_extract_folder)
274
+ log_message(f"✅ Cleaned up extraction folder")
275
+ except Exception as e:
276
+ log_message(f"⚠️ Could not clean up extraction folder {current_extract_folder}: {e}")
277
+
278
+ def continuous_processing():
279
+ """Main processing loop that runs continuously"""
280
+ processing_status["is_running"] = True
281
+ log_message("🚀 Starting continuous RAR processing...")
282
+
283
+ try:
284
+ # Load uploaded folders tracking
285
+ uploaded_folders = load_uploaded_folders()
286
+ processing_status["uploaded_folders"] = len(uploaded_folders)
287
+
288
+ while processing_status["is_running"]:
289
+ # 1. Download a batch of RAR files
290
+ download_start_index = load_download_state()
291
+ downloaded_rar_paths, next_download_index = download_rar_files(download_start_index, CHUNK_SIZE)
292
+ save_download_state(next_download_index)
293
+
294
+ if not downloaded_rar_paths:
295
+ # Check if there are any local RAR files to process
296
+ all_local_rars = sorted([os.path.join(DOWNLOAD_FOLDER, f) for f in os.listdir(DOWNLOAD_FOLDER) if f.endswith(".rar")])
297
+ processed_rars = load_processed_files_state()
298
+ unprocessed_rars = [rar for rar in all_local_rars if os.path.basename(rar) not in processed_rars]
299
+
300
+ if not unprocessed_rars:
301
+ log_message("✅ All RAR files have been processed. Stopping...")
302
+ break
303
+ else:
304
+ log_message(f"📋 Found {len(unprocessed_rars)} unprocessed local RAR files")
305
+
306
+ # 2. Process all available RAR files (downloaded + existing)
307
+ all_local_rars = sorted([os.path.join(DOWNLOAD_FOLDER, f) for f in os.listdir(DOWNLOAD_FOLDER) if f.endswith(".rar")])
308
+ processed_rars = load_processed_files_state()
309
+ processing_status["total_files"] = len(all_local_rars)
310
+ processing_status["processed_files"] = len(processed_rars)
311
+
312
+ for rar_file_path in all_local_rars:
313
+ if not processing_status["is_running"]:
314
+ break
315
+
316
+ filename = os.path.basename(rar_file_path)
317
+ if filename not in processed_rars:
318
+ success = extract_and_upload_rar(rar_file_path, processed_rars, uploaded_folders)
319
+ if success:
320
+ processed_rars.add(filename)
321
+ save_processed_files_state(processed_rars)
322
+ processing_status["processed_files"] += 1
323
+
324
+ # Delete the RAR file after successful processing
325
+ log_message(f"🗑️ Deleting processed RAR: {filename}")
326
+ try:
327
+ os.remove(rar_file_path)
328
+ log_message(f"✅ Deleted RAR file: {filename}")
329
+ except Exception as e:
330
+ log_message(f"⚠️ Could not delete {rar_file_path}: {e}")
331
+
332
+ # Add delay between processing files
333
+ time.sleep(PROCESSING_DELAY)
334
+
335
+ # If no new files were downloaded and all local files are processed, we're done
336
+ if not downloaded_rar_paths:
337
+ break
338
+
339
+ except Exception as e:
340
+ log_message(f"❌ Error in continuous processing: {e}")
341
+ finally:
342
+ processing_status["is_running"] = False
343
+ processing_status["current_file"] = None
344
+ log_message("🏁 Processing stopped")
345
+
346
+ @app.get("/", response_class=HTMLResponse)
347
+ async def root():
348
+ """Serve the main HTML interface"""
349
+ html_content = """
350
+ <!DOCTYPE html>
351
+ <html>
352
+ <head>
353
+ <title>RAR Processing Service</title>
354
+ <meta charset="utf-8">
355
+ <meta name="viewport" content="width=device-width, initial-scale=1">
356
+ <style>
357
+ body { font-family: Arial, sans-serif; margin: 20px; background-color: #f5f5f5; }
358
+ .container { max-width: 1200px; margin: 0 auto; background: white; padding: 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }
359
+ .status-card { background: #e3f2fd; padding: 15px; border-radius: 5px; margin: 10px 0; }
360
+ .logs { background: #f5f5f5; padding: 15px; border-radius: 5px; height: 400px; overflow-y: auto; font-family: monospace; font-size: 12px; }
361
+ .button { background: #2196F3; color: white; padding: 10px 20px; border: none; border-radius: 5px; cursor: pointer; margin: 5px; }
362
+ .button:hover { background: #1976D2; }
363
+ .button:disabled { background: #ccc; cursor: not-allowed; }
364
+ .stop-button { background: #f44336; }
365
+ .stop-button:hover { background: #d32f2f; }
366
+ .stats { display: flex; gap: 20px; margin: 20px 0; }
367
+ .stat-item { background: #f0f0f0; padding: 10px; border-radius: 5px; text-align: center; flex: 1; }
368
+ </style>
369
+ </head>
370
+ <body>
371
+ <div class="container">
372
+ <h1>🔄 RAR Processing Service</h1>
373
+ <p>Automated extraction and upload of RAR files from BG1 to BG2 dataset</p>
374
+
375
+ <div class="status-card">
376
+ <h3>Status: <span id="status">Stopped</span></h3>
377
+ <p>Current File: <span id="current-file">None</span></p>
378
+ <p>Last Update: <span id="last-update">Never</span></p>
379
+ </div>
380
+
381
+ <div class="stats">
382
+ <div class="stat-item">
383
+ <h4>Total Files</h4>
384
+ <span id="total-files">0</span>
385
+ </div>
386
+ <div class="stat-item">
387
+ <h4>Processed</h4>
388
+ <span id="processed-files">0</span>
389
+ </div>
390
+ <div class="stat-item">
391
+ <h4>Uploaded Folders</h4>
392
+ <span id="uploaded-folders">0</span>
393
+ </div>
394
+ <div class="stat-item">
395
+ <h4>Failed</h4>
396
+ <span id="failed-files">0</span>
397
+ </div>
398
+ </div>
399
+
400
+ <div>
401
+ <button class="button" onclick="startProcessing()" id="start-btn">Start Processing</button>
402
+ <button class="button stop-button" onclick="stopProcessing()" id="stop-btn" disabled>Stop Processing</button>
403
+ <button class="button" onclick="refreshStatus()">Refresh Status</button>
404
+ </div>
405
+
406
+ <h3>Logs</h3>
407
+ <div class="logs" id="logs">Loading...</div>
408
+ </div>
409
+
410
+ <script>
411
+ async function startProcessing() {
412
+ try {
413
+ const response = await fetch('/start', { method: 'POST' });
414
+ const result = await response.json();
415
+ alert(result.message);
416
+ refreshStatus();
417
+ } catch (error) {
418
+ alert('Error starting processing: ' + error.message);
419
+ }
420
+ }
421
+
422
+ async function stopProcessing() {
423
+ try {
424
+ const response = await fetch('/stop', { method: 'POST' });
425
+ const result = await response.json();
426
+ alert(result.message);
427
+ refreshStatus();
428
+ } catch (error) {
429
+ alert('Error stopping processing: ' + error.message);
430
+ }
431
+ }
432
+
433
+ async function refreshStatus() {
434
+ try {
435
+ const response = await fetch('/status');
436
+ const status = await response.json();
437
+
438
+ document.getElementById('status').textContent = status.is_running ? 'Running' : 'Stopped';
439
+ document.getElementById('current-file').textContent = status.current_file || 'None';
440
+ document.getElementById('last-update').textContent = status.last_update || 'Never';
441
+ document.getElementById('total-files').textContent = status.total_files;
442
+ document.getElementById('processed-files').textContent = status.processed_files;
443
+ document.getElementById('uploaded-folders').textContent = status.uploaded_folders;
444
+ document.getElementById('failed-files').textContent = status.failed_files;
445
+
446
+ document.getElementById('start-btn').disabled = status.is_running;
447
+ document.getElementById('stop-btn').disabled = !status.is_running;
448
+
449
+ const logsDiv = document.getElementById('logs');
450
+ logsDiv.innerHTML = status.logs.join('<br>');
451
+ logsDiv.scrollTop = logsDiv.scrollHeight;
452
+ } catch (error) {
453
+ console.error('Error refreshing status:', error);
454
+ }
455
+ }
456
+
457
+ // Auto-refresh every 5 seconds
458
+ setInterval(refreshStatus, 5000);
459
+
460
+ // Initial load
461
+ refreshStatus();
462
+ </script>
463
+ </body>
464
+ </html>
465
+ """
466
+ return HTMLResponse(content=html_content)
467
+
468
+ @app.get("/status")
469
+ async def get_status():
470
+ """Get current processing status"""
471
+ return JSONResponse(content=processing_status)
472
+
473
+ @app.post("/start")
474
+ async def start_processing(background_tasks: BackgroundTasks):
475
+ """Start the processing in background"""
476
+ if processing_status["is_running"]:
477
+ return {"message": "Processing is already running"}
478
+
479
+ background_tasks.add_task(continuous_processing)
480
+ return {"message": "Processing started"}
481
+
482
+ @app.post("/stop")
483
+ async def stop_processing():
484
+ """Stop the processing"""
485
+ if not processing_status["is_running"]:
486
+ return {"message": "Processing is not running"}
487
+
488
+ processing_status["is_running"] = False
489
+ return {"message": "Processing stop requested"}
490
+
491
+ @app.get("/logs")
492
+ async def get_logs():
493
+ """Get processing logs"""
494
+ return {"logs": processing_status["logs"]}
495
+
496
+ @app.get("/uploaded-folders")
497
+ async def get_uploaded_folders():
498
+ """Get list of uploaded folder hashes"""
499
+ uploaded_folders = load_uploaded_folders()
500
+ return {"uploaded_folder_count": len(uploaded_folders), "folder_hashes": list(uploaded_folders)}
501
+
502
+ if __name__ == "__main__":
503
+ uvicorn.run(app, host="0.0.0.0", port=7860)
504
+
env.example ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hugging Face Token (required)
2
+ HF_TOKEN=hf_your_token_here
3
+
4
+ # Source dataset repository
5
+ SOURCE_REPO=Fred808/BG1
6
+
7
+ # Destination dataset repository
8
+ DEST_REPO=Fred808/BG2
9
+
10
+ # Optional: Adjust processing parameters
11
+ CHUNK_SIZE=3
12
+ PROCESSING_DELAY=2
13
+
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi==0.104.1
2
+ uvicorn[standard]==0.24.0
3
+ huggingface_hub==0.19.4
4
+ requests==2.31.0
5
+ python-multipart==0.0.6
6
+