Fred808 commited on
Commit
1dff2e1
Β·
verified Β·
1 Parent(s): ed3a9c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -28
app.py CHANGED
@@ -7,8 +7,8 @@ import asyncio
7
  import threading
8
  import time
9
  import hashlib
10
- from typing import Dict, List, Set
11
- from fastapi import FastAPI, BackgroundTasks, HTTPException
12
  from fastapi.responses import HTMLResponse, JSONResponse
13
  from fastapi.middleware.cors import CORSMiddleware
14
  from huggingface_hub import HfApi, list_repo_files
@@ -101,7 +101,7 @@ def load_download_state() -> int:
101
  if os.path.exists(DOWNLOAD_STATE_FILE):
102
  try:
103
  with open(DOWNLOAD_STATE_FILE, "r") as f:
104
- return json.load(f).get("next_download_index", 3)
105
  except json.JSONDecodeError:
106
  log_message(f"⚠️ Warning: Could not decode {DOWNLOAD_STATE_FILE}. Starting download from index 0.")
107
  return 0
@@ -129,7 +129,7 @@ def save_processed_files_state(processed_set: set):
129
  with open(PROCESS_STATE_FILE, "w") as f:
130
  json.dump({"processed_rars": list(processed_set)}, f)
131
 
132
- def download_rar_files(start_index: 4, chunk_size: int) -> tuple:
133
  """Downloads a batch of RAR files from the source dataset"""
134
  try:
135
  all_files = list_repo_files(repo_id=SOURCE_REPO_ID, repo_type="dataset", token=HF_TOKEN)
@@ -143,7 +143,7 @@ def download_rar_files(start_index: 4, chunk_size: int) -> tuple:
143
  log_message("βœ… No more RAR files to download.")
144
  return [], start_index
145
 
146
- log_message(f"πŸ“₯ Downloading RAR files {start_index + 4} to {end_index} from {SOURCE_REPO_ID}")
147
 
148
  downloaded_paths = []
149
  for file_path_in_repo in files_to_download_metadata:
@@ -190,7 +190,7 @@ def extract_and_upload_rar(rar_path: str, processed_rars_set: set, uploaded_fold
190
 
191
  # Check if this folder content has already been uploaded
192
  if folder_hash in uploaded_folders_set:
193
- log_message(f"πŸ”’ Folder '{folder_name}' already uploaded (hash: {folder_hash[:8]}...), skipping.")
194
  processed_rars_set.add(filename)
195
  save_processed_files_state(processed_rars_set)
196
  return True
@@ -251,7 +251,7 @@ def extract_and_upload_rar(rar_path: str, processed_rars_set: set, uploaded_fold
251
  save_uploaded_folders(uploaded_folders_set)
252
  processing_status["uploaded_folders"] = len(uploaded_folders_set)
253
 
254
- log_message(f"πŸ”’ Folder '{folder_name}' locked in BG2 repo (hash: {folder_hash[:8]}...)")
255
 
256
  return True
257
 
@@ -275,8 +275,8 @@ def extract_and_upload_rar(rar_path: str, processed_rars_set: set, uploaded_fold
275
  except Exception as e:
276
  log_message(f"⚠️ Could not clean up extraction folder {current_extract_folder}: {e}")
277
 
278
- def continuous_processing():
279
- """Main processing loop that runs continuously"""
280
  processing_status["is_running"] = True
281
  log_message("πŸš€ Starting continuous RAR processing...")
282
 
@@ -285,6 +285,12 @@ def continuous_processing():
285
  uploaded_folders = load_uploaded_folders()
286
  processing_status["uploaded_folders"] = len(uploaded_folders)
287
 
 
 
 
 
 
 
288
  while processing_status["is_running"]:
289
  # 1. Download a batch of RAR files
290
  download_start_index = load_download_state()
@@ -298,7 +304,7 @@ def continuous_processing():
298
  unprocessed_rars = [rar for rar in all_local_rars if os.path.basename(rar) not in processed_rars]
299
 
300
  if not unprocessed_rars:
301
- log_message("βœ… All RAR files have been processed. Stopping...")
302
  break
303
  else:
304
  log_message(f"πŸ“‹ Found {len(unprocessed_rars)} unprocessed local RAR files")
@@ -365,6 +371,10 @@ async def root():
365
  .stop-button:hover { background: #d32f2f; }
366
  .stats { display: flex; gap: 20px; margin: 20px 0; }
367
  .stat-item { background: #f0f0f0; padding: 10px; border-radius: 5px; text-align: center; flex: 1; }
 
 
 
 
368
  </style>
369
  </head>
370
  <body>
@@ -397,8 +407,14 @@ async def root():
397
  </div>
398
  </div>
399
 
 
 
 
 
 
 
400
  <div>
401
- <button class="button" onclick="startProcessing()" id="start-btn">Start Processing</button>
402
  <button class="button stop-button" onclick="stopProcessing()" id="stop-btn" disabled>Stop Processing</button>
403
  <button class="button" onclick="refreshStatus()">Refresh Status</button>
404
  </div>
@@ -410,47 +426,67 @@ async def root():
410
  <script>
411
  async function startProcessing() {
412
  try {
413
- const response = await fetch('/start', { method: 'POST' });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
414
  const result = await response.json();
415
  alert(result.message);
416
  refreshStatus();
417
  } catch (error) {
418
- alert('Error starting processing: ' + error.message);
419
  }
420
  }
421
 
422
  async function stopProcessing() {
423
  try {
424
- const response = await fetch('/stop', { method: 'POST' });
425
  const result = await response.json();
426
  alert(result.message);
427
  refreshStatus();
428
  } catch (error) {
429
- alert('Error stopping processing: ' + error.message);
430
  }
431
  }
432
 
433
  async function refreshStatus() {
434
  try {
435
- const response = await fetch('/status');
436
  const status = await response.json();
437
 
438
- document.getElementById('status').textContent = status.is_running ? 'Running' : 'Stopped';
439
- document.getElementById('current-file').textContent = status.current_file || 'None';
440
- document.getElementById('last-update').textContent = status.last_update || 'Never';
441
- document.getElementById('total-files').textContent = status.total_files;
442
- document.getElementById('processed-files').textContent = status.processed_files;
443
- document.getElementById('uploaded-folders').textContent = status.uploaded_folders;
444
- document.getElementById('failed-files').textContent = status.failed_files;
445
 
446
- document.getElementById('start-btn').disabled = status.is_running;
447
- document.getElementById('stop-btn').disabled = !status.is_running;
448
 
449
- const logsDiv = document.getElementById('logs');
450
- logsDiv.innerHTML = status.logs.join('<br>');
451
  logsDiv.scrollTop = logsDiv.scrollHeight;
452
  } catch (error) {
453
- console.error('Error refreshing status:', error);
454
  }
455
  }
456
 
@@ -479,6 +515,18 @@ async def start_processing(background_tasks: BackgroundTasks):
479
  background_tasks.add_task(continuous_processing)
480
  return {"message": "Processing started"}
481
 
 
 
 
 
 
 
 
 
 
 
 
 
482
  @app.post("/stop")
483
  async def stop_processing():
484
  """Stop the processing"""
@@ -502,3 +550,4 @@ async def get_uploaded_folders():
502
  if __name__ == "__main__":
503
  uvicorn.run(app, host="0.0.0.0", port=7860)
504
 
 
 
7
  import threading
8
  import time
9
  import hashlib
10
+ from typing import Dict, List, Set, Optional
11
+ from fastapi import FastAPI, BackgroundTasks, HTTPException, Form
12
  from fastapi.responses import HTMLResponse, JSONResponse
13
  from fastapi.middleware.cors import CORSMiddleware
14
  from huggingface_hub import HfApi, list_repo_files
 
101
  if os.path.exists(DOWNLOAD_STATE_FILE):
102
  try:
103
  with open(DOWNLOAD_STATE_FILE, "r") as f:
104
+ return json.load(f).get("next_download_index", 0)
105
  except json.JSONDecodeError:
106
  log_message(f"⚠️ Warning: Could not decode {DOWNLOAD_STATE_FILE}. Starting download from index 0.")
107
  return 0
 
129
  with open(PROCESS_STATE_FILE, "w") as f:
130
  json.dump({"processed_rars": list(processed_set)}, f)
131
 
132
+ def download_rar_files(start_index: int, chunk_size: int) -> tuple:
133
  """Downloads a batch of RAR files from the source dataset"""
134
  try:
135
  all_files = list_repo_files(repo_id=SOURCE_REPO_ID, repo_type="dataset", token=HF_TOKEN)
 
143
  log_message("βœ… No more RAR files to download.")
144
  return [], start_index
145
 
146
+ log_message(f"πŸ“₯ Downloading RAR files {start_index + 1} to {end_index} from {SOURCE_REPO_ID}")
147
 
148
  downloaded_paths = []
149
  for file_path_in_repo in files_to_download_metadata:
 
190
 
191
  # Check if this folder content has already been uploaded
192
  if folder_hash in uploaded_folders_set:
193
+ log_message(f"πŸ”’ Folder \'{folder_name}\' already uploaded (hash: {folder_hash[:8]}...), skipping.")
194
  processed_rars_set.add(filename)
195
  save_processed_files_state(processed_rars_set)
196
  return True
 
251
  save_uploaded_folders(uploaded_folders_set)
252
  processing_status["uploaded_folders"] = len(uploaded_folders_set)
253
 
254
+ log_message(f"πŸ”’ Folder \'{folder_name}\' locked in BG2 repo (hash: {folder_hash[:8]}...)")
255
 
256
  return True
257
 
 
275
  except Exception as e:
276
  log_message(f"⚠️ Could not clean up extraction folder {current_extract_folder}: {e}")
277
 
278
+ def continuous_processing(start_download_index: Optional[int] = None):
279
+ """Main processing loop that runs continuously, with an optional starting download index"""
280
  processing_status["is_running"] = True
281
  log_message("πŸš€ Starting continuous RAR processing...")
282
 
 
285
  uploaded_folders = load_uploaded_folders()
286
  processing_status["uploaded_folders"] = len(uploaded_folders)
287
 
288
+ if start_download_index is not None:
289
+ log_message(f"Starting download from index: {start_download_index}")
290
+ save_download_state(start_download_index) # Set download state to start from this index
291
+ else:
292
+ log_message("Starting download from saved state or beginning.")
293
+
294
  while processing_status["is_running"]:
295
  # 1. Download a batch of RAR files
296
  download_start_index = load_download_state()
 
304
  unprocessed_rars = [rar for rar in all_local_rars if os.path.basename(rar) not in processed_rars]
305
 
306
  if not unprocessed_rars:
307
+ log_message("βœ… No more RAR files to download. Stopping...")
308
  break
309
  else:
310
  log_message(f"πŸ“‹ Found {len(unprocessed_rars)} unprocessed local RAR files")
 
371
  .stop-button:hover { background: #d32f2f; }
372
  .stats { display: flex; gap: 20px; margin: 20px 0; }
373
  .stat-item { background: #f0f0f0; padding: 10px; border-radius: 5px; text-align: center; flex: 1; }
374
+ .start-form { margin-top: 20px; padding: 15px; border: 1px solid #ddd; border-radius: 5px; background: #f9f9f9; }
375
+ .start-form input[type="number"] { width: calc(100% - 120px); padding: 8px; margin-right: 10px; border: 1px solid #ccc; border-radius: 4px; }
376
+ .start-form button { padding: 8px 15px; background: #4CAF50; color: white; border: none; border-radius: 4px; cursor: pointer; }
377
+ .start-form button:hover { background: #45a049; }
378
  </style>
379
  </head>
380
  <body>
 
407
  </div>
408
  </div>
409
 
410
+ <div class="start-form">
411
+ <h3>Start Processing from Specific Download Index</h3>
412
+ <input type="number" id="start-index-input" placeholder="Enter start index (e.g., 0)" value="0">
413
+ <button onclick="startProcessingWithIndex()">Start from Index</button>
414
+ </div>
415
+
416
  <div>
417
+ <button class="button" onclick="startProcessing()" id="start-btn">Start Processing (from last saved index)</button>
418
  <button class="button stop-button" onclick="stopProcessing()" id="stop-btn" disabled>Stop Processing</button>
419
  <button class="button" onclick="refreshStatus()">Refresh Status</button>
420
  </div>
 
426
  <script>
427
  async function startProcessing() {
428
  try {
429
+ const response = await fetch(\"/start\", { method: \"POST\" });
430
+ const result = await response.json();
431
+ alert(result.message);
432
+ refreshStatus();
433
+ } catch (error) {
434
+ alert(\"Error starting processing: \" + error.message);
435
+ }
436
+ }
437
+
438
+ async function startProcessingWithIndex() {
439
+ const index = document.getElementById(\"start-index-input\").value;
440
+ if (index === "" || isNaN(index)) {
441
+ alert(\"Please enter a valid number for the start index.\");
442
+ return;
443
+ }
444
+ try {
445
+ const response = await fetch(\"/start_from_index\", {
446
+ method: \"POST\",
447
+ headers: { \"Content-Type\": \"application/x-www-form-urlencoded\" },
448
+ body: `start_index=${parseInt(index)}`
449
+ });
450
  const result = await response.json();
451
  alert(result.message);
452
  refreshStatus();
453
  } catch (error) {
454
+ alert(\"Error starting processing from index: \" + error.message);
455
  }
456
  }
457
 
458
  async function stopProcessing() {
459
  try {
460
+ const response = await fetch(\"/stop\", { method: \"POST\" });
461
  const result = await response.json();
462
  alert(result.message);
463
  refreshStatus();
464
  } catch (error) {
465
+ alert(\"Error stopping processing: \" + error.message);
466
  }
467
  }
468
 
469
  async function refreshStatus() {
470
  try {
471
+ const response = await fetch(\"/status\");
472
  const status = await response.json();
473
 
474
+ document.getElementById(\"status\").textContent = status.is_running ? \"Running\" : \"Stopped\";
475
+ document.getElementById(\"current-file\").textContent = status.current_file || \"None\";
476
+ document.getElementById(\"last-update\").textContent = status.last_update || \"Never\";
477
+ document.getElementById(\"total-files\").textContent = status.total_files;
478
+ document.getElementById(\"processed-files\").textContent = status.processed_files;
479
+ document.getElementById(\"uploaded-folders\").textContent = status.uploaded_folders;
480
+ document.getElementById(\"failed-files\").textContent = status.failed_files;
481
 
482
+ document.getElementById(\"start-btn\").disabled = status.is_running;
483
+ document.getElementById(\"stop-btn\").disabled = !status.is_running;
484
 
485
+ const logsDiv = document.getElementById(\"logs\");
486
+ logsDiv.innerHTML = status.logs.join(\"<br>\");
487
  logsDiv.scrollTop = logsDiv.scrollHeight;
488
  } catch (error) {
489
+ console.error(\"Error refreshing status:\", error);
490
  }
491
  }
492
 
 
515
  background_tasks.add_task(continuous_processing)
516
  return {"message": "Processing started"}
517
 
518
+ @app.post("/start_from_index")
519
+ async def start_processing_from_index(background_tasks: BackgroundTasks, start_index: int = Form(...)):
520
+ """Start the processing from a specific download index in background"""
521
+ if processing_status["is_running"]:
522
+ return {"message": "Processing is already running"}
523
+
524
+ if start_index < 0:
525
+ return {"message": "Start index cannot be negative."}
526
+
527
+ background_tasks.add_task(continuous_processing, start_download_index=start_index)
528
+ return {"message": f"Processing started from download index: {start_index}"}
529
+
530
  @app.post("/stop")
531
  async def stop_processing():
532
  """Stop the processing"""
 
550
  if __name__ == "__main__":
551
  uvicorn.run(app, host="0.0.0.0", port=7860)
552
 
553
+