Spaces:

Samfredoly
/

ally

Sleeping

App Files Files Community

Samfredoly commited on Nov 30, 2025

Commit

8020386

verified ·

1 Parent(s): 6d01df4

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -6

app.py CHANGED Viewed

@@ -300,10 +300,12 @@ def upload_all_files(job: ProcessingJob, all_courses: List[Dict], combined_file_
 # ============================================================================
-# Main Processing Logic (Modified)
 # ============================================================================
-async def process_single_course(
     course_file: str,
     job: ProcessingJob,
     ato_files: List[str]
@@ -330,6 +332,8 @@ async def process_single_course(
             ato_path = download_file(DATASET_ATO, expected_ato_file)
             if ato_path:
                 transcription_data = load_json_file(ato_path)
                 if transcription_data:
                     job.matched_transcriptions += 1
@@ -363,9 +367,11 @@ async def process_all_courses_background(job_id: str):
         print(f"{'='*70}\n")
         # Fetch file lists from all datasets
         print("[INIT] Fetching file lists from datasets...")
-        helium_files = list_dataset_files(DATASET_HELIUM)
-        ato_files = list_dataset_files(DATASET_ATO)
         # Filter to only _frames.json files from Helium
         course_files = [f for f in helium_files if f.endswith("_frames.json")]
@@ -380,7 +386,7 @@ async def process_all_courses_background(job_id: str):
         for idx, course_file in enumerate(course_files):
             try:
-                # Use asyncio.to_thread for blocking operations like hf_hub_download
                 course_data = await asyncio.to_thread(
                     process_single_course,
                     course_file,
@@ -415,7 +421,7 @@ async def process_all_courses_background(job_id: str):
         job.output_file = str(output_file)
-        # NEW: Upload all files with intelligent fallback
         job.status = JobStatus.UPLOADING
         await asyncio.to_thread(upload_all_files, job, all_courses, output_file)

 # ============================================================================
+# Main Processing Logic (Modified - FIX APPLIED HERE)
 # ============================================================================
+# FIX: Changed from 'async def' to 'def' because this function contains blocking I/O
+# and is intended to be run in a separate thread via asyncio.to_thread.
+def process_single_course(
     course_file: str,
     job: ProcessingJob,
     ato_files: List[str]
             ato_path = download_file(DATASET_ATO, expected_ato_file)
             if ato_path:
                 transcription_data = load_json_file(ato_path)
+                # NOTE: job.matched_transcriptions is a mutable attribute of the job object
+                # which is safe to modify here as it's running in a single thread per job.
                 if transcription_data:
                     job.matched_transcriptions += 1
         print(f"{'='*70}\n")
         # Fetch file lists from all datasets
+        # NOTE: list_dataset_files contains blocking I/O, so it should be run in a thread.
+        # However, since it's only called once at the start, we can use asyncio.to_thread.
         print("[INIT] Fetching file lists from datasets...")
+        helium_files = await asyncio.to_thread(list_dataset_files, DATASET_HELIUM)
+        ato_files = await asyncio.to_thread(list_dataset_files, DATASET_ATO)
         # Filter to only _frames.json files from Helium
         course_files = [f for f in helium_files if f.endswith("_frames.json")]
         for idx, course_file in enumerate(course_files):
             try:
+                # process_single_course is now synchronous and correctly run in a thread
                 course_data = await asyncio.to_thread(
                     process_single_course,
                     course_file,
         job.output_file = str(output_file)
+        # Upload all files with intelligent fallback
         job.status = JobStatus.UPLOADING
         await asyncio.to_thread(upload_all_files, job, all_courses, output_file)