Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -300,10 +300,12 @@ def upload_all_files(job: ProcessingJob, all_courses: List[Dict], combined_file_
|
|
| 300 |
|
| 301 |
|
| 302 |
# ============================================================================
|
| 303 |
-
# Main Processing Logic (Modified)
|
| 304 |
# ============================================================================
|
| 305 |
|
| 306 |
-
async def
|
|
|
|
|
|
|
| 307 |
course_file: str,
|
| 308 |
job: ProcessingJob,
|
| 309 |
ato_files: List[str]
|
|
@@ -330,6 +332,8 @@ async def process_single_course(
|
|
| 330 |
ato_path = download_file(DATASET_ATO, expected_ato_file)
|
| 331 |
if ato_path:
|
| 332 |
transcription_data = load_json_file(ato_path)
|
|
|
|
|
|
|
| 333 |
if transcription_data:
|
| 334 |
job.matched_transcriptions += 1
|
| 335 |
|
|
@@ -363,9 +367,11 @@ async def process_all_courses_background(job_id: str):
|
|
| 363 |
print(f"{'='*70}\n")
|
| 364 |
|
| 365 |
# Fetch file lists from all datasets
|
|
|
|
|
|
|
| 366 |
print("[INIT] Fetching file lists from datasets...")
|
| 367 |
-
helium_files = list_dataset_files
|
| 368 |
-
ato_files = list_dataset_files
|
| 369 |
|
| 370 |
# Filter to only _frames.json files from Helium
|
| 371 |
course_files = [f for f in helium_files if f.endswith("_frames.json")]
|
|
@@ -380,7 +386,7 @@ async def process_all_courses_background(job_id: str):
|
|
| 380 |
|
| 381 |
for idx, course_file in enumerate(course_files):
|
| 382 |
try:
|
| 383 |
-
#
|
| 384 |
course_data = await asyncio.to_thread(
|
| 385 |
process_single_course,
|
| 386 |
course_file,
|
|
@@ -415,7 +421,7 @@ async def process_all_courses_background(job_id: str):
|
|
| 415 |
|
| 416 |
job.output_file = str(output_file)
|
| 417 |
|
| 418 |
-
#
|
| 419 |
job.status = JobStatus.UPLOADING
|
| 420 |
await asyncio.to_thread(upload_all_files, job, all_courses, output_file)
|
| 421 |
|
|
|
|
| 300 |
|
| 301 |
|
| 302 |
# ============================================================================
|
| 303 |
+
# Main Processing Logic (Modified - FIX APPLIED HERE)
|
| 304 |
# ============================================================================
|
| 305 |
|
| 306 |
+
# FIX: Changed from 'async def' to 'def' because this function contains blocking I/O
|
| 307 |
+
# and is intended to be run in a separate thread via asyncio.to_thread.
|
| 308 |
+
def process_single_course(
|
| 309 |
course_file: str,
|
| 310 |
job: ProcessingJob,
|
| 311 |
ato_files: List[str]
|
|
|
|
| 332 |
ato_path = download_file(DATASET_ATO, expected_ato_file)
|
| 333 |
if ato_path:
|
| 334 |
transcription_data = load_json_file(ato_path)
|
| 335 |
+
# NOTE: job.matched_transcriptions is a mutable attribute of the job object
|
| 336 |
+
# which is safe to modify here as it's running in a single thread per job.
|
| 337 |
if transcription_data:
|
| 338 |
job.matched_transcriptions += 1
|
| 339 |
|
|
|
|
| 367 |
print(f"{'='*70}\n")
|
| 368 |
|
| 369 |
# Fetch file lists from all datasets
|
| 370 |
+
# NOTE: list_dataset_files contains blocking I/O, so it should be run in a thread.
|
| 371 |
+
# However, since it's only called once at the start, we can use asyncio.to_thread.
|
| 372 |
print("[INIT] Fetching file lists from datasets...")
|
| 373 |
+
helium_files = await asyncio.to_thread(list_dataset_files, DATASET_HELIUM)
|
| 374 |
+
ato_files = await asyncio.to_thread(list_dataset_files, DATASET_ATO)
|
| 375 |
|
| 376 |
# Filter to only _frames.json files from Helium
|
| 377 |
course_files = [f for f in helium_files if f.endswith("_frames.json")]
|
|
|
|
| 386 |
|
| 387 |
for idx, course_file in enumerate(course_files):
|
| 388 |
try:
|
| 389 |
+
# process_single_course is now synchronous and correctly run in a thread
|
| 390 |
course_data = await asyncio.to_thread(
|
| 391 |
process_single_course,
|
| 392 |
course_file,
|
|
|
|
| 421 |
|
| 422 |
job.output_file = str(output_file)
|
| 423 |
|
| 424 |
+
# Upload all files with intelligent fallback
|
| 425 |
job.status = JobStatus.UPLOADING
|
| 426 |
await asyncio.to_thread(upload_all_files, job, all_courses, output_file)
|
| 427 |
|