Fred808 commited on
Commit
e901ee5
Β·
verified Β·
1 Parent(s): c64d671

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -59
app.py CHANGED
@@ -13,7 +13,6 @@ from typing import Dict, List, Set, Optional
13
  from fastapi import FastAPI, BackgroundTasks, HTTPException, Form
14
  from fastapi.responses import HTMLResponse, JSONResponse, FileResponse
15
  from fastapi.middleware.cors import CORSMiddleware
16
- from fastapi.staticfiles import StaticFiles
17
  from huggingface_hub import HfApi, list_repo_files
18
  from huggingface_hub.utils import HfHubHTTPError
19
 
@@ -25,13 +24,13 @@ DEST_REPO_ID_RAR = os.getenv("DEST_REPO_RAR", "") # Destination for extracted RA
25
 
26
  DOWNLOAD_FOLDER = "downloads"
27
  EXTRACT_FOLDER = "extracted_tmp"
28
- VIDEO_FRAMES_EXTRACT_FOLDER = "video_frames" # Changed to keep frames permanently
29
- # ZIPPED_FRAMES_FOLDER = "zipped_frames" # No longer needed since we're not zipping
30
 
31
  DOWNLOAD_STATE_FILE = "download_progress.json"
32
  PROCESS_STATE_FILE = "process_progress.json"
33
  UPLOADED_FOLDERS_FILE = "uploaded_folders.json" # Track uploaded folder hashes for BG2
34
- PROCESSED_VIDEO_COURSES_FILE = "processed_video_courses.json" # Track processed video course folders
35
  FAILED_FILES_LOG = "failed_files.txt"
36
 
37
  CHUNK_SIZE = 3 # Smaller chunks for Space environment
@@ -41,6 +40,7 @@ VIDEO_FRAME_FPS = 3 # Frames per second to extract from videos
41
  os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
42
  os.makedirs(EXTRACT_FOLDER, exist_ok=True)
43
  os.makedirs(VIDEO_FRAMES_EXTRACT_FOLDER, exist_ok=True)
 
44
 
45
  api = HfApi(token=HF_TOKEN)
46
 
@@ -227,6 +227,22 @@ def extract_frames(video_path: str, output_folder: str, fps: int) -> bool:
227
  log_message(f"❌ Error extracting frames from {os.path.basename(video_path)}: {e.stderr}")
228
  return False
229
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  def upload_file_to_hf(local_path: str, path_in_repo: str, repo_id: str, max_retries: int = 5, initial_delay: int = 5) -> bool:
231
  """Uploads a single file to Hugging Face Hub with retry logic and exponential backoff."""
232
  log_message(f"⬆️ Uploading {os.path.basename(local_path)} to {repo_id}/{path_in_repo}")
@@ -256,7 +272,7 @@ def upload_file_to_hf(local_path: str, path_in_repo: str, repo_id: str, max_retr
256
  return False
257
 
258
  def process_video_frames_for_download(extracted_rar_folder: str, processed_video_courses_set: Set[str]) -> bool:
259
- """Scans an extracted RAR folder for MP4s, extracts frames, and saves them for download (no zipping)."""
260
  video_processed_successfully = False
261
 
262
  # Use the top-level folder name of the extracted RAR as the course folder name
@@ -298,16 +314,24 @@ def process_video_frames_for_download(extracted_rar_folder: str, processed_video
298
 
299
  # Check if any frames were extracted for the entire course folder
300
  if frames_extracted_count == 0:
301
- log_message(f"⚠️ No frames extracted for any video in {course_folder_name}.")
302
  if os.path.exists(course_video_extract_dir):
303
  shutil.rmtree(course_video_extract_dir)
304
  return False
305
 
306
- # No zipping - just keep the frames in the folder for direct access
307
- log_message(f"βœ… Successfully processed video frames for {course_folder_name} - frames available for download")
308
- processed_video_courses_set.add(course_folder_name) # Mark as processed
309
- save_processed_video_courses(processed_video_courses_set) # Save state
310
- video_processed_successfully = True
 
 
 
 
 
 
 
 
311
 
312
  return video_processed_successfully
313
 
@@ -529,9 +553,6 @@ def continuous_processing(start_download_index: Optional[int] = None):
529
  processing_status["current_file"] = None
530
  log_message("🏁 Processing stopped")
531
 
532
- # Mount the video frames directory as static files for direct access
533
- app.mount("/frames", StaticFiles(directory=VIDEO_FRAMES_EXTRACT_FOLDER), name="frames")
534
-
535
  @app.get("/", response_class=HTMLResponse)
536
  async def root():
537
  """Serve the main HTML interface"""
@@ -564,8 +585,6 @@ async def root():
564
  .download-list { max-height: 300px; overflow-y: auto; }
565
  .download-item { display: flex; justify-content: space-between; align-items: center; padding: 10px; border-bottom: 1px solid #eee; }
566
  .download-item:last-child { border-bottom: none; }
567
- .folder-link { color: #2196F3; text-decoration: none; }
568
- .folder-link:hover { text-decoration: underline; }
569
  </style>
570
  </head>
571
  <body>
@@ -607,7 +626,6 @@ async def root():
607
  <input type=\"number\" id=\"start-index-input\" placeholder=\"Enter start index (e.g., 0)\" value=\"0\">
608
  <button onclick=\"startProcessingWithIndex()\">Start from Index</button>
609
  </div>
610
-
611
  <div>
612
  <button class=\"button\" onclick=\"startProcessing()\" id=\"start-btn\">Start Processing (from last saved index)</button>
613
  <button class=\"button stop-button\" onclick=\"stopProcessing()\" id=\"stop-btn\" disabled>Stop Processing</button>
@@ -616,8 +634,7 @@ async def root():
616
  </div>
617
 
618
  <div class=\"downloads-section\">
619
- <h3>Available Frame Folders</h3>
620
- <p>Click on folder names to browse extracted video frames directly</p>
621
  <div class=\"download-list\" id=\"download-list\">
622
  <p>Loading...</p>
623
  </div>
@@ -638,7 +655,6 @@ async def root():
638
  alert(\"Error starting processing: \" + error.message);
639
  }
640
  }
641
-
642
  async function startProcessingWithIndex() {
643
  const index = document.getElementById(\"start-index-input\").value;
644
  if (index === \"\" || isNaN(index)) {
@@ -697,23 +713,23 @@ async def root():
697
 
698
  async function refreshDownloads() {
699
  try {
700
- const response = await fetch(\"/frame-folders\");
701
- const folders = await response.json();
702
 
703
  const downloadList = document.getElementById(\"download-list\");
704
- if (folders.folders.length === 0) {
705
- downloadList.innerHTML = \"<p>No frame folders available yet.</p>\";
706
  } else {
707
- downloadList.innerHTML = folders.folders.map(folder =>
708
  `<div class=\"download-item\">
709
- <span>${folder.name} (${folder.video_count} videos, ${folder.frame_count} frames)</span>
710
- <a href=\"/frames/${folder.name}/\" class=\"folder-link\" target=\"_blank\">Browse Frames</a>
711
  </div>`
712
  ).join(\"\");
713
  }
714
  } catch (error) {
715
  console.error(\"Error refreshing downloads:\", error);
716
- document.getElementById(\"download-list\").innerHTML = \"<p>Error loading frame folders.</p>\";
717
  }
718
  }
719
 
@@ -782,43 +798,66 @@ async def get_processed_video_courses():
782
  processed_video_courses = load_processed_video_courses()
783
  return {"processed_video_course_count": len(processed_video_courses), "course_names": list(processed_video_courses)}
784
 
785
- @app.get("/frame-folders")
786
- async def list_frame_folders():
787
- """List available frame folders with statistics"""
788
  try:
789
- if not os.path.exists(VIDEO_FRAMES_EXTRACT_FOLDER):
790
- return {"folders": []}
791
 
792
- folders = []
793
- for folder_name in os.listdir(VIDEO_FRAMES_EXTRACT_FOLDER):
794
- folder_path = os.path.join(VIDEO_FRAMES_EXTRACT_FOLDER, folder_name)
795
- if os.path.isdir(folder_path):
796
- # Count video folders and total frames
797
- video_count = 0
798
- frame_count = 0
799
-
800
- for video_folder in os.listdir(folder_path):
801
- video_folder_path = os.path.join(folder_path, video_folder)
802
- if os.path.isdir(video_folder_path):
803
- video_count += 1
804
- # Count frames in this video folder
805
- frame_files = [f for f in os.listdir(video_folder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
806
- frame_count += len(frame_files)
807
 
808
- folders.append({
809
- "name": folder_name,
810
- "video_count": video_count,
811
- "frame_count": frame_count,
812
- "path": folder_path
813
  })
814
 
815
- # Sort by folder name
816
- folders.sort(key=lambda x: x["name"])
817
- return {"folders": folders}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
818
  except Exception as e:
819
- log_message(f"❌ Error listing frame folders: {e}")
820
- return {"folders": [], "error": str(e)}
821
 
822
  if __name__ == "__main__":
823
  uvicorn.run(app, host="0.0.0.0", port=7860)
824
-
 
13
  from fastapi import FastAPI, BackgroundTasks, HTTPException, Form
14
  from fastapi.responses import HTMLResponse, JSONResponse, FileResponse
15
  from fastapi.middleware.cors import CORSMiddleware
 
16
  from huggingface_hub import HfApi, list_repo_files
17
  from huggingface_hub.utils import HfHubHTTPError
18
 
 
24
 
25
  DOWNLOAD_FOLDER = "downloads"
26
  EXTRACT_FOLDER = "extracted_tmp"
27
+ VIDEO_FRAMES_EXTRACT_FOLDER = "video_frames_tmp"
28
+ ZIPPED_FRAMES_FOLDER = "zipped_frames" # This will now store files for download instead of upload
29
 
30
  DOWNLOAD_STATE_FILE = "download_progress.json"
31
  PROCESS_STATE_FILE = "process_progress.json"
32
  UPLOADED_FOLDERS_FILE = "uploaded_folders.json" # Track uploaded folder hashes for BG2
33
+ PROCESSED_VIDEO_COURSES_FILE = "processed_video_courses.json" # Track processed video course folders for BG3
34
  FAILED_FILES_LOG = "failed_files.txt"
35
 
36
  CHUNK_SIZE = 3 # Smaller chunks for Space environment
 
40
  os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
41
  os.makedirs(EXTRACT_FOLDER, exist_ok=True)
42
  os.makedirs(VIDEO_FRAMES_EXTRACT_FOLDER, exist_ok=True)
43
+ os.makedirs(ZIPPED_FRAMES_FOLDER, exist_ok=True)
44
 
45
  api = HfApi(token=HF_TOKEN)
46
 
 
227
  log_message(f"❌ Error extracting frames from {os.path.basename(video_path)}: {e.stderr}")
228
  return False
229
 
230
+ def zip_folder(folder_path: str, output_zip_path: str) -> bool:
231
+ """Zips the contents of a folder."""
232
+ log_message(f"πŸ“¦ Compressing {folder_path} to {output_zip_path}...")
233
+ try:
234
+ with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
235
+ for root, _, files in os.walk(folder_path):
236
+ for file in files:
237
+ file_path = os.path.join(root, file)
238
+ arcname = os.path.relpath(file_path, folder_path)
239
+ zipf.write(file_path, arcname)
240
+ log_message(f"βœ… Successfully zipped {folder_path}")
241
+ return True
242
+ except Exception as e:
243
+ log_message(f"❌ Error zipping {folder_path}: {e}")
244
+ return False
245
+
246
  def upload_file_to_hf(local_path: str, path_in_repo: str, repo_id: str, max_retries: int = 5, initial_delay: int = 5) -> bool:
247
  """Uploads a single file to Hugging Face Hub with retry logic and exponential backoff."""
248
  log_message(f"⬆️ Uploading {os.path.basename(local_path)} to {repo_id}/{path_in_repo}")
 
272
  return False
273
 
274
  def process_video_frames_for_download(extracted_rar_folder: str, processed_video_courses_set: Set[str]) -> bool:
275
+ """Scans an extracted RAR folder for MP4s, extracts frames, zips, and saves for download."""
276
  video_processed_successfully = False
277
 
278
  # Use the top-level folder name of the extracted RAR as the course folder name
 
314
 
315
  # Check if any frames were extracted for the entire course folder
316
  if frames_extracted_count == 0:
317
+ log_message(f"⚠️ No frames extracted for any video in {course_folder_name}. Skipping zipping.")
318
  if os.path.exists(course_video_extract_dir):
319
  shutil.rmtree(course_video_extract_dir)
320
  return False
321
 
322
+ course_zip_path = os.path.join(ZIPPED_FRAMES_FOLDER, f"{course_folder_name}_frames.zip")
323
+ if zip_folder(course_video_extract_dir, course_zip_path):
324
+ log_message(f"βœ… Successfully processed video frames and saved {course_folder_name}_frames.zip for download")
325
+ processed_video_courses_set.add(course_folder_name) # Mark as processed
326
+ save_processed_video_courses(processed_video_courses_set) # Save state
327
+ video_processed_successfully = True
328
+
329
+ # Clean up the temporary extraction folder but keep the zip file for download
330
+ log_message(f"🧹 Cleaning up temporary video frame files for {course_folder_name}")
331
+ if os.path.exists(course_video_extract_dir):
332
+ shutil.rmtree(course_video_extract_dir)
333
+ else:
334
+ log_message(f"❌ Failed to zip video frames for {course_folder_name}")
335
 
336
  return video_processed_successfully
337
 
 
553
  processing_status["current_file"] = None
554
  log_message("🏁 Processing stopped")
555
 
 
 
 
556
  @app.get("/", response_class=HTMLResponse)
557
  async def root():
558
  """Serve the main HTML interface"""
 
585
  .download-list { max-height: 300px; overflow-y: auto; }
586
  .download-item { display: flex; justify-content: space-between; align-items: center; padding: 10px; border-bottom: 1px solid #eee; }
587
  .download-item:last-child { border-bottom: none; }
 
 
588
  </style>
589
  </head>
590
  <body>
 
626
  <input type=\"number\" id=\"start-index-input\" placeholder=\"Enter start index (e.g., 0)\" value=\"0\">
627
  <button onclick=\"startProcessingWithIndex()\">Start from Index</button>
628
  </div>
 
629
  <div>
630
  <button class=\"button\" onclick=\"startProcessing()\" id=\"start-btn\">Start Processing (from last saved index)</button>
631
  <button class=\"button stop-button\" onclick=\"stopProcessing()\" id=\"stop-btn\" disabled>Stop Processing</button>
 
634
  </div>
635
 
636
  <div class=\"downloads-section\">
637
+ <h3>Available Downloads</h3>
 
638
  <div class=\"download-list\" id=\"download-list\">
639
  <p>Loading...</p>
640
  </div>
 
655
  alert(\"Error starting processing: \" + error.message);
656
  }
657
  }
 
658
  async function startProcessingWithIndex() {
659
  const index = document.getElementById(\"start-index-input\").value;
660
  if (index === \"\" || isNaN(index)) {
 
713
 
714
  async function refreshDownloads() {
715
  try {
716
+ const response = await fetch(\"/downloads\");
717
+ const downloads = await response.json();
718
 
719
  const downloadList = document.getElementById(\"download-list\");
720
+ if (downloads.files.length === 0) {
721
+ downloadList.innerHTML = \"<p>No downloads available yet.</p>\";
722
  } else {
723
+ downloadList.innerHTML = downloads.files.map(file =>
724
  `<div class=\"download-item\">
725
+ <span>${file.name} (${file.size})</span>
726
+ <a href=\"/download/${file.name}\" class=\"button download-button\" download>Download</a>
727
  </div>`
728
  ).join(\"\");
729
  }
730
  } catch (error) {
731
  console.error(\"Error refreshing downloads:\", error);
732
+ document.getElementById(\"download-list\").innerHTML = \"<p>Error loading downloads.</p>\";
733
  }
734
  }
735
 
 
798
  processed_video_courses = load_processed_video_courses()
799
  return {"processed_video_course_count": len(processed_video_courses), "course_names": list(processed_video_courses)}
800
 
801
+ @app.get("/downloads")
802
+ async def list_downloads():
803
+ """List available frame downloads"""
804
  try:
805
+ if not os.path.exists(ZIPPED_FRAMES_FOLDER):
806
+ return {"files": []}
807
 
808
+ files = []
809
+ for filename in os.listdir(ZIPPED_FRAMES_FOLDER):
810
+ if filename.endswith('.zip'):
811
+ file_path = os.path.join(ZIPPED_FRAMES_FOLDER, filename)
812
+ file_size = os.path.getsize(file_path)
813
+ # Convert size to human readable format
814
+ if file_size < 1024:
815
+ size_str = f"{file_size} B"
816
+ elif file_size < 1024 * 1024:
817
+ size_str = f"{file_size / 1024:.1f} KB"
818
+ elif file_size < 1024 * 1024 * 1024:
819
+ size_str = f"{file_size / (1024 * 1024):.1f} MB"
820
+ else:
821
+ size_str = f"{file_size / (1024 * 1024 * 1024):.1f} GB"
 
822
 
823
+ files.append({
824
+ "name": filename,
825
+ "size": size_str,
826
+ "path": file_path
 
827
  })
828
 
829
+ # Sort by filename
830
+ files.sort(key=lambda x: x["name"])
831
+ return {"files": files}
832
+ except Exception as e:
833
+ log_message(f"❌ Error listing downloads: {e}")
834
+ return {"files": [], "error": str(e)}
835
+
836
+ @app.get("/download/{filename}")
837
+ async def download_file(filename: str):
838
+ """Download a specific frame zip file"""
839
+ try:
840
+ # Sanitize filename to prevent directory traversal
841
+ safe_filename = os.path.basename(filename)
842
+ file_path = os.path.join(ZIPPED_FRAMES_FOLDER, safe_filename)
843
+
844
+ if not os.path.exists(file_path):
845
+ raise HTTPException(status_code=404, detail="File not found")
846
+
847
+ if not file_path.endswith('.zip'):
848
+ raise HTTPException(status_code=400, detail="Only zip files can be downloaded")
849
+
850
+ log_message(f"πŸ“₯ Serving download: {safe_filename}")
851
+ return FileResponse(
852
+ path=file_path,
853
+ filename=safe_filename,
854
+ media_type='application/zip'
855
+ )
856
+ except HTTPException:
857
+ raise
858
  except Exception as e:
859
+ log_message(f"❌ Error serving download {filename}: {e}")
860
+ raise HTTPException(status_code=500, detail="Internal server error")
861
 
862
  if __name__ == "__main__":
863
  uvicorn.run(app, host="0.0.0.0", port=7860)