Fred808 commited on
Commit
e79b1b8
Β·
verified Β·
1 Parent(s): 04bc5ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -70
app.py CHANGED
@@ -11,8 +11,9 @@ import zipfile
11
  import uvicorn
12
  from typing import Dict, List, Set, Optional
13
  from fastapi import FastAPI, BackgroundTasks, HTTPException, Form
14
- from fastapi.responses import HTMLResponse, JSONResponse
15
  from fastapi.middleware.cors import CORSMiddleware
 
16
  from huggingface_hub import HfApi, list_repo_files
17
  from huggingface_hub.utils import HfHubHTTPError
18
 
@@ -20,17 +21,17 @@ from huggingface_hub.utils import HfHubHTTPError
20
  HF_TOKEN = os.getenv("HF_TOKEN", "")
21
  SOURCE_REPO_ID = os.getenv("SOURCE_REPO", "Fred808/BG1") # Source for RARs
22
  DEST_REPO_ID_RAR = os.getenv("DEST_REPO_RAR", "") # Destination for extracted RAR contents (set to empty string if not needed)
23
- DEST_REPO_ID_VIDEO = os.getenv("DEST_REPO_VIDEO", "Fred808/BG3") # Destination for zipped video frames
24
 
25
  DOWNLOAD_FOLDER = "downloads"
26
  EXTRACT_FOLDER = "extracted_tmp"
27
- VIDEO_FRAMES_EXTRACT_FOLDER = "video_frames_tmp"
28
- ZIPPED_FRAMES_FOLDER = "zipped_frames"
29
 
30
  DOWNLOAD_STATE_FILE = "download_progress.json"
31
  PROCESS_STATE_FILE = "process_progress.json"
32
  UPLOADED_FOLDERS_FILE = "uploaded_folders.json" # Track uploaded folder hashes for BG2
33
- PROCESSED_VIDEO_COURSES_FILE = "processed_video_courses.json" # Track processed video course folders for BG3
34
  FAILED_FILES_LOG = "failed_files.txt"
35
 
36
  CHUNK_SIZE = 3 # Smaller chunks for Space environment
@@ -40,7 +41,6 @@ VIDEO_FRAME_FPS = 3 # Frames per second to extract from videos
40
  os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
41
  os.makedirs(EXTRACT_FOLDER, exist_ok=True)
42
  os.makedirs(VIDEO_FRAMES_EXTRACT_FOLDER, exist_ok=True)
43
- os.makedirs(ZIPPED_FRAMES_FOLDER, exist_ok=True)
44
 
45
  api = HfApi(token=HF_TOKEN)
46
 
@@ -52,12 +52,12 @@ processing_status = {
52
  "processed_files": 0,
53
  "failed_files": 0,
54
  "uploaded_rar_folders": 0,
55
- "uploaded_video_courses": 0,
56
  "last_update": None,
57
  "logs": []
58
  }
59
 
60
- app = FastAPI(title="RAR & Video Processing Service", description="Automated RAR extraction and video frame upload service")
61
 
62
  # Add CORS middleware
63
  app.add_middleware(
@@ -107,7 +107,7 @@ def save_uploaded_folders(uploaded_set: Set[str]):
107
  json.dump({"uploaded_folder_hashes": list(uploaded_set)}, f)
108
 
109
  def load_processed_video_courses() -> Set[str]:
110
- """Loads the set of processed video course folder names for BG3."""
111
  if os.path.exists(PROCESSED_VIDEO_COURSES_FILE):
112
  try:
113
  with open(PROCESSED_VIDEO_COURSES_FILE, "r") as f:
@@ -118,7 +118,7 @@ def load_processed_video_courses() -> Set[str]:
118
  return set()
119
 
120
  def save_processed_video_courses(processed_set: set):
121
- """Saves the set of processed video course folder names for BG3 to a file."""
122
  with open(PROCESSED_VIDEO_COURSES_FILE, "w") as f:
123
  json.dump(list(processed_set), f)
124
 
@@ -227,22 +227,6 @@ def extract_frames(video_path: str, output_folder: str, fps: int) -> bool:
227
  log_message(f"❌ Error extracting frames from {os.path.basename(video_path)}: {e.stderr}")
228
  return False
229
 
230
- def zip_folder(folder_path: str, output_zip_path: str) -> bool:
231
- """Zips the contents of a folder."""
232
- log_message(f" compressing {folder_path} to {output_zip_path}...")
233
- try:
234
- with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
235
- for root, _, files in os.walk(folder_path):
236
- for file in files:
237
- file_path = os.path.join(root, file)
238
- arcname = os.path.relpath(file_path, folder_path)
239
- zipf.write(file_path, arcname)
240
- log_message(f"βœ… Successfully zipped {folder_path}")
241
- return True
242
- except Exception as e:
243
- log_message(f"❌ Error zipping {folder_path}: {e}")
244
- return False
245
-
246
  def upload_file_to_hf(local_path: str, path_in_repo: str, repo_id: str, max_retries: int = 5, initial_delay: int = 5) -> bool:
247
  """Uploads a single file to Hugging Face Hub with retry logic and exponential backoff."""
248
  log_message(f"⬆️ Uploading {os.path.basename(local_path)} to {repo_id}/{path_in_repo}")
@@ -271,15 +255,15 @@ def upload_file_to_hf(local_path: str, path_in_repo: str, repo_id: str, max_retr
271
  log_message(f"❌ Failed to upload {os.path.basename(local_path)} after {max_retries} attempts.")
272
  return False
273
 
274
- def process_video_frames_and_upload(extracted_rar_folder: str, processed_video_courses_set: Set[str]) -> bool:
275
- """Scans an extracted RAR folder for MP4s, extracts frames, zips, and uploads to BG3."""
276
  video_processed_successfully = False
277
 
278
  # Use the top-level folder name of the extracted RAR as the course folder name
279
  course_folder_name = os.path.basename(extracted_rar_folder)
280
 
281
  if course_folder_name in processed_video_courses_set:
282
- log_message(f"⏩ Video frames for course \'{course_folder_name}\' already processed. Skipping.")
283
  return True
284
 
285
  log_message(f"🎬 Processing videos in extracted RAR folder: {course_folder_name}")
@@ -314,45 +298,32 @@ def process_video_frames_and_upload(extracted_rar_folder: str, processed_video_c
314
 
315
  # Check if any frames were extracted for the entire course folder
316
  if frames_extracted_count == 0:
317
- log_message(f"⚠️ No frames extracted for any video in {course_folder_name}. Skipping zipping and upload to BG3.")
318
  if os.path.exists(course_video_extract_dir):
319
  shutil.rmtree(course_video_extract_dir)
320
  return False
321
 
322
- course_zip_path = os.path.join(ZIPPED_FRAMES_FOLDER, f"{course_folder_name}_frames.zip")
323
- if zip_folder(course_video_extract_dir, course_zip_path):
324
- path_in_repo = f"{course_folder_name}_frames.zip"
325
- if upload_file_to_hf(course_zip_path, path_in_repo, DEST_REPO_ID_VIDEO):
326
- log_message(f"βœ… Successfully processed video frames and uploaded {course_folder_name}_frames.zip to BG3")
327
- processed_video_courses_set.add(course_folder_name) # Mark as processed
328
- save_processed_video_courses(processed_video_courses_set) # Save state
329
- video_processed_successfully = True
330
- else:
331
- log_message(f"❌ Failed to upload zipped video frames for {course_folder_name} to BG3")
332
- else:
333
- log_message(f"❌ Failed to zip video frames for {course_folder_name}")
334
-
335
- # Cleanup local video frame files
336
- log_message(f"🧹 Cleaning up local video frame files for {course_folder_name}")
337
- if os.path.exists(course_video_extract_dir):
338
- shutil.rmtree(course_video_extract_dir)
339
- if os.path.exists(course_zip_path):
340
- os.remove(course_zip_path)
341
 
342
  return video_processed_successfully
343
 
344
  def extract_and_upload_rar(rar_path: str, processed_rars_set: set, uploaded_folders_set: Set[str], processed_video_courses_set: Set[str]) -> bool:
345
- """Extracts a single RAR file, uploads its contents to BG2 (if DEST_REPO_ID_RAR is set), and then processes videos for BG3"""
346
  filename = os.path.basename(rar_path)
347
  processing_status["current_file"] = filename
348
 
349
  folder_name = filename.replace(".rar", "")
 
350
  current_extract_folder = os.path.join(EXTRACT_FOLDER, f"{folder_name}_extracted")
351
 
352
  # Check if RAR is already processed (uploaded to BG2 or video frames processed)
353
  # This logic needs to be careful. If BG2 is not set, we only care about video processing.
354
  # If video processing is not needed, we only care about BG2 upload.
355
- is_bg2_processed = (not DEST_REPO_ID_RAR) or (get_folder_hash(folder_name) in uploaded_folders_set)
356
  is_bg3_processed = (folder_name in processed_video_courses_set)
357
 
358
  if filename in processed_rars_set and is_bg2_processed and is_bg3_processed:
@@ -361,8 +332,8 @@ def extract_and_upload_rar(rar_path: str, processed_rars_set: set, uploaded_fold
361
 
362
  # If BG2 upload is enabled and folder already uploaded to BG2, skip RAR extraction/upload to BG2
363
  # but still proceed to video processing if not already done.
364
- if DEST_REPO_ID_RAR and get_folder_hash(folder_name) in uploaded_folders_set and not is_bg3_processed:
365
- log_message(f"πŸ”’ Folder \'{folder_name}\' already uploaded to BG2 (hash: {get_folder_hash(folder_name)[:8]}...), skipping RAR upload.")
366
  # If the extracted folder doesn't exist, we can't process videos from it.
367
  # This scenario might happen if the previous run was interrupted after BG2 upload but before video processing cleanup.
368
  if not os.path.exists(current_extract_folder):
@@ -371,7 +342,7 @@ def extract_and_upload_rar(rar_path: str, processed_rars_set: set, uploaded_fold
371
  else:
372
  # Proceed to video processing if not already done
373
  log_message(f"Continuing with video processing for {filename}.")
374
- video_processed = process_video_frames_and_upload(current_extract_folder, processed_video_courses_set)
375
  if video_processed:
376
  processed_rars_set.add(filename)
377
  save_processed_files_state(processed_rars_set)
@@ -433,18 +404,22 @@ def extract_and_upload_rar(rar_path: str, processed_rars_set: set, uploaded_fold
433
  uploaded_folders_set.add(folder_hash)
434
  save_uploaded_folders(uploaded_folders_set)
435
  processing_status["uploaded_rar_folders"] = len(uploaded_folders_set)
436
- log_message(f"πŸ”’ Folder \'{folder_name}\' locked in BG2 repo (hash: {folder_hash[:8]}...)")
437
  else:
438
  log_message(f"⚠️ No files were successfully uploaded from {filename} to BG2.")
439
  else:
440
  log_message("Skipping upload to BG2 as DEST_REPO_ID_RAR is not set.")
441
 
442
- # Now process video frames from the extracted content and upload to BG3
443
- video_processed = process_video_frames_and_upload(current_extract_folder, processed_video_courses_set)
444
 
445
- # Mark RAR as processed only if both BG2 (if enabled) and BG3 processing are successful
446
- # Or if BG2 is not enabled, only BG3 processing needs to be successful
447
  if (not DEST_REPO_ID_RAR or (folder_hash in uploaded_folders_set)) and video_processed:
 
 
 
 
448
  return True
449
  elif DEST_REPO_ID_RAR and not (folder_hash in uploaded_folders_set):
450
  log_message(f"❌ RAR processing failed for {filename}: BG2 upload was not successful.")
@@ -487,9 +462,9 @@ def continuous_processing(start_download_index: Optional[int] = None):
487
  uploaded_folders = load_uploaded_folders()
488
  processing_status["uploaded_rar_folders"] = len(uploaded_folders)
489
 
490
- # Load processed video courses tracking for BG3
491
  processed_video_courses = load_processed_video_courses()
492
- processing_status["uploaded_video_courses"] = len(processed_video_courses)
493
 
494
  if start_download_index is not None:
495
  log_message(f"Starting download from index: {start_download_index}")
@@ -532,9 +507,6 @@ def continuous_processing(start_download_index: Optional[int] = None):
532
  filename = os.path.basename(rar_file_path)
533
  success = extract_and_upload_rar(rar_file_path, processed_rars, uploaded_folders, processed_video_courses)
534
  if success:
535
- # processed_rars.add(filename) is handled inside extract_and_upload_rar now for better atomicity
536
- # processing_status["processed_files"] += 1 is also handled inside
537
-
538
  # Delete the RAR file after successful processing
539
  log_message(f"πŸ—‘οΈ Deleting processed RAR: {filename}")
540
  try:
@@ -557,6 +529,9 @@ def continuous_processing(start_download_index: Optional[int] = None):
557
  processing_status["current_file"] = None
558
  log_message("🏁 Processing stopped")
559
 
 
 
 
560
  @app.get("/", response_class=HTMLResponse)
561
  async def root():
562
  """Serve the main HTML interface"""
@@ -577,18 +552,26 @@ async def root():
577
  .button:disabled { background: #ccc; cursor: not-allowed; }
578
  .stop-button { background: #f44336; }
579
  .stop-button:hover { background: #d32f2f; }
 
 
580
  .stats { display: flex; gap: 20px; margin: 20px 0; }
581
  .stat-item { background: #f0f0f0; padding: 10px; border-radius: 5px; text-align: center; flex: 1; }
582
  .start-form { margin-top: 20px; padding: 15px; border: 1px solid #ddd; border-radius: 5px; background: #f9f9f9; }
583
  .start-form input[type=\"number\"] { width: calc(100% - 120px); padding: 8px; margin-right: 10px; border: 1px solid #ccc; border-radius: 4px; }
584
  .start-form button { padding: 8px 15px; background: #4CAF50; color: white; border: none; border-radius: 4px; cursor: pointer; }
585
  .start-form button:hover { background: #45a049; }
 
 
 
 
 
 
586
  </style>
587
  </head>
588
  <body>
589
  <div class=\"container\">
590
  <h1>πŸ”„ RAR & Video Processing Service</h1>
591
- <p>Automated extraction and upload of RAR files from BG1 to BG2 dataset, and video frame extraction/upload to BG3 dataset</p>
592
 
593
  <div class=\"status-card\">
594
  <h3>Status: <span id=\"status\">Stopped</span></h3>
@@ -610,8 +593,8 @@ async def root():
610
  <span id=\"uploaded-rar-folders\">0</span>
611
  </div>
612
  <div class=\"stat-item\">
613
- <h4>Uploaded Video Courses (BG3)</h4>
614
- <span id=\"uploaded-video-courses\">0</span>
615
  </div>
616
  <div class=\"stat-item\">
617
  <h4>Failed</h4>
@@ -629,6 +612,15 @@ async def root():
629
  <button class=\"button\" onclick=\"startProcessing()\" id=\"start-btn\">Start Processing (from last saved index)</button>
630
  <button class=\"button stop-button\" onclick=\"stopProcessing()\" id=\"stop-btn\" disabled>Stop Processing</button>
631
  <button class=\"button\" onclick=\"refreshStatus()\">Refresh Status</button>
 
 
 
 
 
 
 
 
 
632
  </div>
633
 
634
  <h3>Logs</h3>
@@ -689,7 +681,7 @@ async def root():
689
  document.getElementById(\"total-files\").textContent = status.total_files;
690
  document.getElementById(\"processed-files\").textContent = status.processed_files;
691
  document.getElementById(\"uploaded-rar-folders\").textContent = status.uploaded_rar_folders;
692
- document.getElementById(\"uploaded-video-courses\").textContent = status.uploaded_video_courses;
693
  document.getElementById(\"failed-files\").textContent = status.failed_files;
694
 
695
  document.getElementById(\"start-btn\").disabled = status.is_running;
@@ -703,11 +695,35 @@ async def root():
703
  }
704
  }
705
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
706
  // Auto-refresh every 5 seconds
707
  setInterval(refreshStatus, 5000);
 
708
 
709
  // Initial load
710
  refreshStatus();
 
711
  </script>
712
  </body>
713
  </html>
@@ -762,11 +778,47 @@ async def get_uploaded_folders():
762
 
763
  @app.get("/processed-video-courses")
764
  async def get_processed_video_courses():
765
- """Get list of processed video course folder names for BG3"""
766
  processed_video_courses = load_processed_video_courses()
767
  return {"processed_video_course_count": len(processed_video_courses), "course_names": list(processed_video_courses)}
768
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
769
  if __name__ == "__main__":
770
  uvicorn.run(app, host="0.0.0.0", port=7860)
771
 
772
-
 
11
  import uvicorn
12
  from typing import Dict, List, Set, Optional
13
  from fastapi import FastAPI, BackgroundTasks, HTTPException, Form
14
+ from fastapi.responses import HTMLResponse, JSONResponse, FileResponse
15
  from fastapi.middleware.cors import CORSMiddleware
16
+ from fastapi.staticfiles import StaticFiles
17
  from huggingface_hub import HfApi, list_repo_files
18
  from huggingface_hub.utils import HfHubHTTPError
19
 
 
21
  HF_TOKEN = os.getenv("HF_TOKEN", "")
22
  SOURCE_REPO_ID = os.getenv("SOURCE_REPO", "Fred808/BG1") # Source for RARs
23
  DEST_REPO_ID_RAR = os.getenv("DEST_REPO_RAR", "") # Destination for extracted RAR contents (set to empty string if not needed)
24
+ # DEST_REPO_ID_VIDEO = os.getenv("DEST_REPO_VIDEO", "Fred808/BG3") # Destination for zipped video frames - DISABLED FOR DOWNLOAD MODE
25
 
26
  DOWNLOAD_FOLDER = "downloads"
27
  EXTRACT_FOLDER = "extracted_tmp"
28
+ VIDEO_FRAMES_EXTRACT_FOLDER = "video_frames" # Changed to keep frames permanently
29
+ # ZIPPED_FRAMES_FOLDER = "zipped_frames" # No longer needed since we're not zipping
30
 
31
  DOWNLOAD_STATE_FILE = "download_progress.json"
32
  PROCESS_STATE_FILE = "process_progress.json"
33
  UPLOADED_FOLDERS_FILE = "uploaded_folders.json" # Track uploaded folder hashes for BG2
34
+ PROCESSED_VIDEO_COURSES_FILE = "processed_video_courses.json" # Track processed video course folders
35
  FAILED_FILES_LOG = "failed_files.txt"
36
 
37
  CHUNK_SIZE = 3 # Smaller chunks for Space environment
 
41
  os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
42
  os.makedirs(EXTRACT_FOLDER, exist_ok=True)
43
  os.makedirs(VIDEO_FRAMES_EXTRACT_FOLDER, exist_ok=True)
 
44
 
45
  api = HfApi(token=HF_TOKEN)
46
 
 
52
  "processed_files": 0,
53
  "failed_files": 0,
54
  "uploaded_rar_folders": 0,
55
+ "extracted_video_courses": 0, # Changed from uploaded_video_courses to extracted_video_courses
56
  "last_update": None,
57
  "logs": []
58
  }
59
 
60
+ app = FastAPI(title="RAR & Video Processing Service", description="Automated RAR extraction and video frame extraction service with download capability")
61
 
62
  # Add CORS middleware
63
  app.add_middleware(
 
107
  json.dump({"uploaded_folder_hashes": list(uploaded_set)}, f)
108
 
109
  def load_processed_video_courses() -> Set[str]:
110
+ """Loads the set of processed video course folder names."""
111
  if os.path.exists(PROCESSED_VIDEO_COURSES_FILE):
112
  try:
113
  with open(PROCESSED_VIDEO_COURSES_FILE, "r") as f:
 
118
  return set()
119
 
120
  def save_processed_video_courses(processed_set: set):
121
+ """Saves the set of processed video course folder names to a file."""
122
  with open(PROCESSED_VIDEO_COURSES_FILE, "w") as f:
123
  json.dump(list(processed_set), f)
124
 
 
227
  log_message(f"❌ Error extracting frames from {os.path.basename(video_path)}: {e.stderr}")
228
  return False
229
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  def upload_file_to_hf(local_path: str, path_in_repo: str, repo_id: str, max_retries: int = 5, initial_delay: int = 5) -> bool:
231
  """Uploads a single file to Hugging Face Hub with retry logic and exponential backoff."""
232
  log_message(f"⬆️ Uploading {os.path.basename(local_path)} to {repo_id}/{path_in_repo}")
 
255
  log_message(f"❌ Failed to upload {os.path.basename(local_path)} after {max_retries} attempts.")
256
  return False
257
 
258
+ def process_video_frames_for_download(extracted_rar_folder: str, processed_video_courses_set: Set[str]) -> bool:
259
+ """Scans an extracted RAR folder for MP4s, extracts frames, and saves them for download (no zipping)."""
260
  video_processed_successfully = False
261
 
262
  # Use the top-level folder name of the extracted RAR as the course folder name
263
  course_folder_name = os.path.basename(extracted_rar_folder)
264
 
265
  if course_folder_name in processed_video_courses_set:
266
+ log_message(f"⏩ Video frames for course '{course_folder_name}' already processed. Skipping.")
267
  return True
268
 
269
  log_message(f"🎬 Processing videos in extracted RAR folder: {course_folder_name}")
 
298
 
299
  # Check if any frames were extracted for the entire course folder
300
  if frames_extracted_count == 0:
301
+ log_message(f"⚠️ No frames extracted for any video in {course_folder_name}.")
302
  if os.path.exists(course_video_extract_dir):
303
  shutil.rmtree(course_video_extract_dir)
304
  return False
305
 
306
+ # No zipping - just keep the frames in the folder for direct access
307
+ log_message(f"βœ… Successfully processed video frames for {course_folder_name} - frames available for download")
308
+ processed_video_courses_set.add(course_folder_name) # Mark as processed
309
+ save_processed_video_courses(processed_video_courses_set) # Save state
310
+ video_processed_successfully = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
 
312
  return video_processed_successfully
313
 
314
  def extract_and_upload_rar(rar_path: str, processed_rars_set: set, uploaded_folders_set: Set[str], processed_video_courses_set: Set[str]) -> bool:
315
+ """Extracts a single RAR file, uploads its contents to BG2 (if DEST_REPO_ID_RAR is set), and then processes videos for download"""
316
  filename = os.path.basename(rar_path)
317
  processing_status["current_file"] = filename
318
 
319
  folder_name = filename.replace(".rar", "")
320
+ folder_hash = get_folder_hash(folder_name)
321
  current_extract_folder = os.path.join(EXTRACT_FOLDER, f"{folder_name}_extracted")
322
 
323
  # Check if RAR is already processed (uploaded to BG2 or video frames processed)
324
  # This logic needs to be careful. If BG2 is not set, we only care about video processing.
325
  # If video processing is not needed, we only care about BG2 upload.
326
+ is_bg2_processed = (not DEST_REPO_ID_RAR) or (folder_hash in uploaded_folders_set)
327
  is_bg3_processed = (folder_name in processed_video_courses_set)
328
 
329
  if filename in processed_rars_set and is_bg2_processed and is_bg3_processed:
 
332
 
333
  # If BG2 upload is enabled and folder already uploaded to BG2, skip RAR extraction/upload to BG2
334
  # but still proceed to video processing if not already done.
335
+ if DEST_REPO_ID_RAR and folder_hash in uploaded_folders_set and not is_bg3_processed:
336
+ log_message(f"πŸ”’ Folder '{folder_name}' already uploaded to BG2 (hash: {folder_hash[:8]}...), skipping RAR upload.")
337
  # If the extracted folder doesn't exist, we can't process videos from it.
338
  # This scenario might happen if the previous run was interrupted after BG2 upload but before video processing cleanup.
339
  if not os.path.exists(current_extract_folder):
 
342
  else:
343
  # Proceed to video processing if not already done
344
  log_message(f"Continuing with video processing for {filename}.")
345
+ video_processed = process_video_frames_for_download(current_extract_folder, processed_video_courses_set)
346
  if video_processed:
347
  processed_rars_set.add(filename)
348
  save_processed_files_state(processed_rars_set)
 
404
  uploaded_folders_set.add(folder_hash)
405
  save_uploaded_folders(uploaded_folders_set)
406
  processing_status["uploaded_rar_folders"] = len(uploaded_folders_set)
407
+ log_message(f"πŸ”’ Folder '{folder_name}' locked in BG2 repo (hash: {folder_hash[:8]}...)")
408
  else:
409
  log_message(f"⚠️ No files were successfully uploaded from {filename} to BG2.")
410
  else:
411
  log_message("Skipping upload to BG2 as DEST_REPO_ID_RAR is not set.")
412
 
413
+ # Now process video frames from the extracted content for download
414
+ video_processed = process_video_frames_for_download(current_extract_folder, processed_video_courses_set)
415
 
416
+ # Mark RAR as processed only if both BG2 (if enabled) and video processing are successful
417
+ # Or if BG2 is not enabled, only video processing needs to be successful
418
  if (not DEST_REPO_ID_RAR or (folder_hash in uploaded_folders_set)) and video_processed:
419
+ processed_rars_set.add(filename)
420
+ save_processed_files_state(processed_rars_set)
421
+ processing_status["processed_files"] = len(processed_rars_set)
422
+ processing_status["extracted_video_courses"] = len(processed_video_courses_set)
423
  return True
424
  elif DEST_REPO_ID_RAR and not (folder_hash in uploaded_folders_set):
425
  log_message(f"❌ RAR processing failed for {filename}: BG2 upload was not successful.")
 
462
  uploaded_folders = load_uploaded_folders()
463
  processing_status["uploaded_rar_folders"] = len(uploaded_folders)
464
 
465
+ # Load processed video courses tracking
466
  processed_video_courses = load_processed_video_courses()
467
+ processing_status["extracted_video_courses"] = len(processed_video_courses)
468
 
469
  if start_download_index is not None:
470
  log_message(f"Starting download from index: {start_download_index}")
 
507
  filename = os.path.basename(rar_file_path)
508
  success = extract_and_upload_rar(rar_file_path, processed_rars, uploaded_folders, processed_video_courses)
509
  if success:
 
 
 
510
  # Delete the RAR file after successful processing
511
  log_message(f"πŸ—‘οΈ Deleting processed RAR: {filename}")
512
  try:
 
529
  processing_status["current_file"] = None
530
  log_message("🏁 Processing stopped")
531
 
532
+ # Mount the video frames directory as static files for direct access
533
+ app.mount("/frames", StaticFiles(directory=VIDEO_FRAMES_EXTRACT_FOLDER), name="frames")
534
+
535
  @app.get("/", response_class=HTMLResponse)
536
  async def root():
537
  """Serve the main HTML interface"""
 
552
  .button:disabled { background: #ccc; cursor: not-allowed; }
553
  .stop-button { background: #f44336; }
554
  .stop-button:hover { background: #d32f2f; }
555
+ .download-button { background: #4CAF50; }
556
+ .download-button:hover { background: #45a049; }
557
  .stats { display: flex; gap: 20px; margin: 20px 0; }
558
  .stat-item { background: #f0f0f0; padding: 10px; border-radius: 5px; text-align: center; flex: 1; }
559
  .start-form { margin-top: 20px; padding: 15px; border: 1px solid #ddd; border-radius: 5px; background: #f9f9f9; }
560
  .start-form input[type=\"number\"] { width: calc(100% - 120px); padding: 8px; margin-right: 10px; border: 1px solid #ccc; border-radius: 4px; }
561
  .start-form button { padding: 8px 15px; background: #4CAF50; color: white; border: none; border-radius: 4px; cursor: pointer; }
562
  .start-form button:hover { background: #45a049; }
563
+ .downloads-section { margin-top: 30px; padding: 20px; border: 1px solid #ddd; border-radius: 5px; background: #f9f9f9; }
564
+ .download-list { max-height: 300px; overflow-y: auto; }
565
+ .download-item { display: flex; justify-content: space-between; align-items: center; padding: 10px; border-bottom: 1px solid #eee; }
566
+ .download-item:last-child { border-bottom: none; }
567
+ .folder-link { color: #2196F3; text-decoration: none; }
568
+ .folder-link:hover { text-decoration: underline; }
569
  </style>
570
  </head>
571
  <body>
572
  <div class=\"container\">
573
  <h1>πŸ”„ RAR & Video Processing Service</h1>
574
+ <p>Automated extraction and upload of RAR files from BG1 to BG2 dataset, and video frame extraction for download</p>
575
 
576
  <div class=\"status-card\">
577
  <h3>Status: <span id=\"status\">Stopped</span></h3>
 
593
  <span id=\"uploaded-rar-folders\">0</span>
594
  </div>
595
  <div class=\"stat-item\">
596
+ <h4>Extracted Video Courses</h4>
597
+ <span id=\"extracted-video-courses\">0</span>
598
  </div>
599
  <div class=\"stat-item\">
600
  <h4>Failed</h4>
 
612
  <button class=\"button\" onclick=\"startProcessing()\" id=\"start-btn\">Start Processing (from last saved index)</button>
613
  <button class=\"button stop-button\" onclick=\"stopProcessing()\" id=\"stop-btn\" disabled>Stop Processing</button>
614
  <button class=\"button\" onclick=\"refreshStatus()\">Refresh Status</button>
615
+ <button class=\"button download-button\" onclick=\"refreshDownloads()\">Refresh Downloads</button>
616
+ </div>
617
+
618
+ <div class=\"downloads-section\">
619
+ <h3>Available Frame Folders</h3>
620
+ <p>Click on folder names to browse extracted video frames directly</p>
621
+ <div class=\"download-list\" id=\"download-list\">
622
+ <p>Loading...</p>
623
+ </div>
624
  </div>
625
 
626
  <h3>Logs</h3>
 
681
  document.getElementById(\"total-files\").textContent = status.total_files;
682
  document.getElementById(\"processed-files\").textContent = status.processed_files;
683
  document.getElementById(\"uploaded-rar-folders\").textContent = status.uploaded_rar_folders;
684
+ document.getElementById(\"extracted-video-courses\").textContent = status.extracted_video_courses;
685
  document.getElementById(\"failed-files\").textContent = status.failed_files;
686
 
687
  document.getElementById(\"start-btn\").disabled = status.is_running;
 
695
  }
696
  }
697
 
698
+ async function refreshDownloads() {
699
+ try {
700
+ const response = await fetch(\"/frame-folders\");
701
+ const folders = await response.json();
702
+
703
+ const downloadList = document.getElementById(\"download-list\");
704
+ if (folders.folders.length === 0) {
705
+ downloadList.innerHTML = \"<p>No frame folders available yet.</p>\";
706
+ } else {
707
+ downloadList.innerHTML = folders.folders.map(folder =>
708
+ `<div class=\"download-item\">
709
+ <span>${folder.name} (${folder.video_count} videos, ${folder.frame_count} frames)</span>
710
+ <a href=\"/frames/${folder.name}/\" class=\"folder-link\" target=\"_blank\">Browse Frames</a>
711
+ </div>`
712
+ ).join(\"\");
713
+ }
714
+ } catch (error) {
715
+ console.error(\"Error refreshing downloads:\", error);
716
+ document.getElementById(\"download-list\").innerHTML = \"<p>Error loading frame folders.</p>\";
717
+ }
718
+ }
719
+
720
  // Auto-refresh every 5 seconds
721
  setInterval(refreshStatus, 5000);
722
+ setInterval(refreshDownloads, 10000);
723
 
724
  // Initial load
725
  refreshStatus();
726
+ refreshDownloads();
727
  </script>
728
  </body>
729
  </html>
 
778
 
779
  @app.get("/processed-video-courses")
780
  async def get_processed_video_courses():
781
+ """Get list of processed video course folder names"""
782
  processed_video_courses = load_processed_video_courses()
783
  return {"processed_video_course_count": len(processed_video_courses), "course_names": list(processed_video_courses)}
784
 
785
+ @app.get("/frame-folders")
786
+ async def list_frame_folders():
787
+ """List available frame folders with statistics"""
788
+ try:
789
+ if not os.path.exists(VIDEO_FRAMES_EXTRACT_FOLDER):
790
+ return {"folders": []}
791
+
792
+ folders = []
793
+ for folder_name in os.listdir(VIDEO_FRAMES_EXTRACT_FOLDER):
794
+ folder_path = os.path.join(VIDEO_FRAMES_EXTRACT_FOLDER, folder_name)
795
+ if os.path.isdir(folder_path):
796
+ # Count video folders and total frames
797
+ video_count = 0
798
+ frame_count = 0
799
+
800
+ for video_folder in os.listdir(folder_path):
801
+ video_folder_path = os.path.join(folder_path, video_folder)
802
+ if os.path.isdir(video_folder_path):
803
+ video_count += 1
804
+ # Count frames in this video folder
805
+ frame_files = [f for f in os.listdir(video_folder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
806
+ frame_count += len(frame_files)
807
+
808
+ folders.append({
809
+ "name": folder_name,
810
+ "video_count": video_count,
811
+ "frame_count": frame_count,
812
+ "path": folder_path
813
+ })
814
+
815
+ # Sort by folder name
816
+ folders.sort(key=lambda x: x["name"])
817
+ return {"folders": folders}
818
+ except Exception as e:
819
+ log_message(f"❌ Error listing frame folders: {e}")
820
+ return {"folders": [], "error": str(e)}
821
+
822
  if __name__ == "__main__":
823
  uvicorn.run(app, host="0.0.0.0", port=7860)
824