Samfredoly commited on
Commit
bcbbd0a
·
verified ·
1 Parent(s): 818a8c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +214 -193
app.py CHANGED
@@ -21,7 +21,8 @@ app = FastAPI(title="Audio Transcriber", description="Audio transcription and up
21
 
22
  # ==== CONFIGURATION ====
23
  # The new backend URL for state management and transcription upload
24
- BACKEND_URL = "https://samfredoly-acp.hf.space"
 
25
  # The original Hugging Face repo IDs are still needed for fetching the audio files
26
  # and the reference file list, as the backend only handles transcription storage.
27
  SOURCE_REPO_ID = "Samfredoly/BG_Vid" # Fetch audio files from here
@@ -256,248 +257,268 @@ def download_with_retry(url: str, dest_path: str, max_retries: int = 3) -> bool:
256
  if chunk:
257
  f.write(chunk)
258
 
259
- log_message(f"✅ Download successful: {dest_path}", "INFO")
260
  return True
261
-
262
  except requests.exceptions.RequestException as e:
263
- log_message(f" Download attempt {attempt + 1} failed for {url}: {str(e)}", "WARNING")
264
- time.sleep(PROCESSING_DELAY)
 
 
 
 
265
  except Exception as e:
266
  log_message(f"❌ An unexpected error occurred during download: {str(e)}", "ERROR")
267
  return False
268
-
269
- log_message(f"❌ Failed to download {url} after {max_retries} attempts.", "ERROR")
270
  return False
271
 
272
- def fetch_reference_files(repo_id: str) -> Dict[str, str]:
273
- """Fetch all files from Fred808/BG3 repo to match with audio filenames."""
274
- log_message(f"📋 Fetching file list from {repo_id}...", "INFO")
 
 
 
 
 
 
 
 
275
 
276
  try:
277
- # This still uses the Hugging Face API
278
- files_list = hf_api.list_repo_files(repo_id=repo_id, repo_type="dataset")
279
-
280
- # Include all file types (zip, rar, wav, mp3, etc.)
281
- all_files = [f for f in files_list]
282
 
283
- # Create a mapping of base filename (without extension) to full path
284
- filename_map = {}
285
- for file_path in all_files:
286
- base_name = os.path.splitext(os.path.basename(file_path))[0]
287
- filename_map[base_name] = file_path
288
-
289
- log_message(f"✅ Found {len(filename_map)} files in reference repo", "INFO")
290
- return filename_map
 
 
 
291
 
292
  except Exception as e:
293
- log_message(f"❌ Failed to fetch reference files: {str(e)}", "ERROR")
294
  return {}
295
 
296
- def find_matching_filename(transcribed_filename: str, reference_map: Dict[str, str]) -> Optional[str]:
297
- """Find matching filename in reference map from Fred808/BG3."""
298
- base_name = os.path.splitext(transcribed_filename)[0]
299
-
300
- # Exact match first
301
- if base_name in reference_map:
302
- full_path = reference_map[base_name]
303
- print(f"\n✅ EXACT MATCH FOUND:")
304
- print(f" Audio: {transcribed_filename}")
305
- print(f" File: {full_path}")
306
- log_message(f" Found exact match: {transcribed_filename} -> {full_path}", "INFO")
307
- return full_path
308
-
309
- # Partial/fuzzy match (check if reference contains transcribed as substring)
310
- matches = []
311
- for ref_base, ref_full_path in reference_map.items():
312
- if base_name.lower() in ref_base.lower() or ref_base.lower() in base_name.lower():
313
- matches.append((ref_base, ref_full_path))
314
-
315
- # Return first partial match if found
316
- if matches:
317
- ref_base, ref_full_path = matches[0]
318
- print(f"\n✅ PARTIAL MATCH FOUND:")
319
- print(f" Audio: {transcribed_filename}")
320
- print(f" File: {ref_full_path}")
321
- log_message(f"✅ Found partial match: {transcribed_filename} -> {ref_full_path}", "INFO")
322
- return ref_full_path
323
-
324
- print(f"\n⚠️ NO EXACT/PARTIAL MATCH FOUND (will still process):")
325
- print(f" Audio: {transcribed_filename}")
326
- log_message(f"⚠️ No matching filename found for: {transcribed_filename}. Will use original filename.", "WARNING")
327
- return None
328
-
329
- def transcribe_audio(wav_path: str) -> Optional[Dict[str, Any]]:
330
- """Transcribe audio file using Whisper from Transformers."""
331
- log_message(f"🎤 Transcribing audio file: {wav_path}", "INFO")
332
 
333
  try:
334
- # Import inside function to avoid global import issues if libraries are missing
335
- from transformers import pipeline
336
- import librosa
337
 
338
- # Load audio with librosa
339
- log_message(f"Loading audio file: {wav_path}", "INFO")
340
- audio, sr = librosa.load(wav_path, sr=16000)
341
 
342
- # Initialize Whisper pipeline
343
- log_message(f"Loading Whisper {WHISPER_MODEL} model from Transformers...", "INFO")
344
- pipe = pipeline(
345
- "automatic-speech-recognition",
346
- model=f"openai/whisper-{WHISPER_MODEL}",
347
- device=0 if __import__('torch').cuda.is_available() else -1 # GPU if available, else CPU
348
- )
349
-
350
- # Transcribe
351
- log_message("Transcribing audio...", "INFO")
352
- result = pipe(audio)
353
 
354
- # Format result to match openai-whisper format
355
- formatted_result = {
356
- "text": result["text"],
357
- "segments": [{"text": result["text"]}]
358
- }
 
 
359
 
360
- log_message(f"✅ Successfully transcribed: {wav_path}", "INFO")
361
- return formatted_result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
 
363
- except ImportError as e:
364
- missing_lib = str(e)
365
- log_message(f"❌ Missing library. Install with: pip install transformers librosa torch torchaudio", "ERROR")
366
- log_message(f" Error: {missing_lib}", "ERROR")
 
 
 
 
 
 
 
 
 
 
367
  return None
 
368
  except Exception as e:
369
- log_message(f"❌ Failed to transcribe {wav_path}: {str(e)}", "ERROR")
370
  return None
371
 
372
- def process_audio_file(wav_path: str, reference_map: Dict[str, str], matched_filename: str) -> bool:
373
  """
374
- Main processing logic for a single audio file:
375
- 1. Transcribe using Whisper
376
- 2. Save transcription as JSON
377
- 3. Upload to backend API
378
- 4. Clean up local files
379
  """
380
- wav_filename = os.path.basename(wav_path)
381
-
382
- # 1. Transcribe audio
383
- transcription = transcribe_audio(wav_path)
384
- if transcription is None:
385
- log_failed_file(wav_filename, "Transcription failed")
386
- return False
387
 
388
- # 2. Save transcription as JSON
389
- # The filename must be the one the backend expects for a transcription file
390
- json_filename = os.path.splitext(matched_filename)[0] + "_transcription.json"
391
- json_output_path = os.path.join(TRANSCRIPTIONS_FOLDER, json_filename)
392
 
393
  try:
394
- os.makedirs(os.path.dirname(json_output_path), exist_ok=True)
 
395
 
396
- with open(json_output_path, "w", encoding="utf-8") as f:
397
- json.dump(transcription, f, indent=2, ensure_ascii=False)
 
398
 
399
- log_message(f"✅ Saved transcription: {json_output_path}", "INFO")
 
 
 
 
 
 
 
400
 
401
- except Exception as e:
402
- log_message(f"❌ Failed to save transcription JSON: {str(e)}", "ERROR")
403
- log_failed_file(wav_filename, f"Failed to save JSON: {str(e)}")
404
- return False
405
-
406
- # 3. Upload to backend API
407
- if upload_transcription_to_api(json_output_path, matched_filename):
408
- log_message(f"✅ Successfully uploaded transcription via API: {json_filename}", "INFO")
409
- processing_status["transcribed_files"] += 1
410
- else:
411
- log_message(f"❌ Failed to upload transcription via API.", "ERROR")
412
- log_failed_file(wav_filename, f"Failed to upload via API")
413
- return False
414
-
415
- # 4. Clean up local files
416
- try:
417
- os.remove(json_output_path)
418
- log_message(f"🗑️ Cleaned up local transcription file: {json_output_path}", "INFO")
419
- except:
420
- pass
421
-
422
- return True
423
-
424
- def get_next_file_to_process(repo_id: str, state: Dict[str, Any]) -> Optional[Dict[str, Any]]:
425
- """
426
- Finds the next audio file to process from the source repo in reverse order (oldest to newest).
427
- Returns: { 'filename': str, 'url': str, 'index': int } or None
428
- """
429
- log_message(f"🔍 Searching for next audio file to process in {repo_id}", "INFO")
430
-
431
- try:
432
- # This still uses the Hugging Face API
433
- files_list = hf_api.list_repo_files(repo_id=repo_id, repo_type="dataset")
434
-
435
- # Filter for audio files and sort in reverse order (descending)
436
- audio_files = sorted([f for f in files_list if f.endswith(('.wav', '.mp3'))], reverse=True)
437
 
438
- if not audio_files:
439
- log_message("ℹ️ No audio files found in the source repository.", "INFO")
440
- return None
441
-
442
- processing_status["total_files"] = len(audio_files)
443
 
444
- start_index = state.get("next_download_index", 0)
 
 
445
 
446
- for index in range(start_index, len(audio_files)):
447
- filename = audio_files[index]
448
- file_state = state["file_states"].get(filename)
449
-
450
- if file_state is None or file_state == "failed":
451
- # Use hf_hub_url to get the direct download URL
452
- url = hf_hub_url(repo_id=repo_id, filename=filename, repo_type="dataset", subfolder=None)
453
-
454
- log_message(f"✅ Found next audio file: {filename} at index {index}", "INFO")
455
- return {
456
- 'filename': filename,
457
- 'url': url,
458
- 'index': index
459
- }
460
-
461
- elif file_state == "processing":
462
- log_message(f"⚠️ File {filename} is currently marked as 'processing'. Skipping for now.", "WARNING")
463
-
464
- elif file_state == "processed":
465
- log_message(f"ℹ️ File {filename} already processed. Skipping.", "INFO")
466
-
467
- log_message("ℹ️ All files up to the current index have been processed or skipped.", "INFO")
468
 
469
- if start_index >= len(audio_files):
470
- log_message("ℹ️ Reached end of file list. Resetting index to 0 for next loop.", "INFO")
471
- # We update the state locally to reset the index, and then upload it.
472
- state["next_download_index"] = 0
473
- upload_state_to_api(state)
474
 
 
 
 
 
 
 
475
  return None
476
-
477
  except Exception as e:
478
- log_message(f"❌ Failed to list files from Hugging Face: {str(e)}", "ERROR")
479
  return None
480
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481
  def main_processing_loop():
482
- """The main loop that orchestrates the download, transcription, and upload cycle."""
 
483
 
484
  if processing_status["is_running"]:
485
- log_message("⚠️ Processing loop is already running.", "WARNING")
486
  return
487
 
488
  processing_status["is_running"] = True
 
489
 
490
- try:
491
- log_message("🚀 Starting audio transcription processing loop...", "INFO")
492
-
493
- # Fetch reference files from BG_Vid repo once at the start
494
- reference_map = fetch_reference_files(REFERENCE_REPO_ID)
495
-
496
- if not reference_map:
497
- log_message("❌ No reference files found. Cannot proceed.", "ERROR")
498
- return
499
 
 
500
  while processing_status["is_running"]:
 
501
 
502
  # 1. Download state from the new API
503
  current_state = download_state_from_api()
@@ -615,7 +636,7 @@ async def stop_processing():
615
  processing_status["is_running"] = False
616
  return JSONResponse(status_code=200, content={"message": "Processing stop requested. Will stop after current file."})
617
 
618
- # --- Main Execution (Unchanged) ---
619
 
620
  if __name__ == "__main__":
621
  # This block is for local testing and won't be used in the final sandbox execution
 
21
 
22
  # ==== CONFIGURATION ====
23
  # The new backend URL for state management and transcription upload
24
+ # It is now read from an environment variable, falling back to the default if not set.
25
+ BACKEND_URL = os.environ.get("BACKEND_URL", "https://samfredoly-acp.hf.space")
26
  # The original Hugging Face repo IDs are still needed for fetching the audio files
27
  # and the reference file list, as the backend only handles transcription storage.
28
  SOURCE_REPO_ID = "Samfredoly/BG_Vid" # Fetch audio files from here
 
257
  if chunk:
258
  f.write(chunk)
259
 
260
+ log_message(f"✅ Download successful: {os.path.basename(dest_path)}", "INFO")
261
  return True
 
262
  except requests.exceptions.RequestException as e:
263
+ log_message(f"⚠️ Download attempt {attempt + 1}/{max_retries} failed for {url}: {str(e)}", "WARNING")
264
+ if attempt < max_retries - 1:
265
+ time.sleep(2 ** attempt) # Exponential backoff
266
+ else:
267
+ log_message(f"❌ Download failed after {max_retries} attempts for {url}", "ERROR")
268
+ return False
269
  except Exception as e:
270
  log_message(f"❌ An unexpected error occurred during download: {str(e)}", "ERROR")
271
  return False
 
 
272
  return False
273
 
274
+ def get_reference_map(reference_repo_id: str) -> Dict[str, str]:
275
+ """
276
+ Downloads the reference file list from the Hugging Face repo and creates a map
277
+ from audio filename (without extension) to the reference filename.
278
+ """
279
+ log_message(f"Fetching reference file list from {reference_repo_id}...", "INFO")
280
+
281
+ # This is a placeholder for the actual logic to get the file list.
282
+ # Assuming the reference repo contains a list of files that match the audio files.
283
+ # In a real scenario, this would involve listing files in the repo.
284
+ # For now, we'll assume a simple list of files can be retrieved.
285
 
286
  try:
287
+ # Use HfApi to list files in the reference repo
288
+ repo_files = hf_api.list_repo_files(repo_id=reference_repo_id, repo_type="dataset")
 
 
 
289
 
290
+ reference_map = {}
291
+ for file in repo_files:
292
+ # Assuming the reference files are named like 'audio_file_name.txt'
293
+ # and we want to map the audio file name (e.g., 'audio_file_name.wav') to it.
294
+ base_name, ext = os.path.splitext(file)
295
+ if ext.lower() in ['.txt', '.json']: # Only consider text/json files as reference
296
+ # The key is the audio file name without extension
297
+ reference_map[base_name] = file
298
+
299
+ log_message(f"✅ Successfully created reference map with {len(reference_map)} entries.", "INFO")
300
+ return reference_map
301
 
302
  except Exception as e:
303
+ log_message(f"❌ Failed to fetch reference map from Hugging Face: {str(e)}", "ERROR")
304
  return {}
305
 
306
+ def find_matching_filename(audio_filename: str, reference_map: Dict[str, str]) -> Optional[str]:
307
+ """Finds the matching reference filename for a given audio filename."""
308
+ base_name, _ = os.path.splitext(audio_filename)
309
+ return reference_map.get(base_name)
310
+
311
+ def get_next_file_to_process(source_repo_id: str, state: Dict[str, Any]) -> Optional[Dict[str, Any]]:
312
+ """
313
+ Determines the next file to process based on the current state and the file list
314
+ from the source Hugging Face repository.
315
+ """
316
+ log_message(f"Determining next file to process from {source_repo_id}...", "INFO")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
 
318
  try:
319
+ # 1. Get the list of all files in the source repo
320
+ repo_files = hf_api.list_repo_files(repo_id=source_repo_id, repo_type="dataset")
 
321
 
322
+ # Filter for audio files (e.g., .wav, .mp3)
323
+ audio_files = sorted([f for f in repo_files if f.lower().endswith(('.wav', '.mp3'))])
 
324
 
325
+ processing_status["total_files"] = len(audio_files)
 
 
 
 
 
 
 
 
 
 
326
 
327
+ if not audio_files:
328
+ log_message("No audio files found in the source repository.", "INFO")
329
+ return None
330
+
331
+ # 2. Get the next index from the state
332
+ next_index = state.get("next_download_index", 0)
333
+ file_states = state.get("file_states", {})
334
 
335
+ # 3. Find the next file that hasn't been processed or is not currently being processed
336
+ for i in range(next_index, len(audio_files)):
337
+ filename = audio_files[i]
338
+ status = file_states.get(filename, "unprocessed")
339
+
340
+ # Skip files that are already processed or currently being processed
341
+ if status in ["processed", "processing"]:
342
+ continue
343
+
344
+ # Found an unprocessed file
345
+ file_url = hf_hub_url(repo_id=source_repo_id, filename=filename, repo_type="dataset")
346
+
347
+ log_message(f"Found next file at index {i}: {filename}", "INFO")
348
+ return {
349
+ "filename": filename,
350
+ "url": file_url,
351
+ "index": i
352
+ }
353
+
354
+ log_message("All files up to the current index have been processed or are locked.", "INFO")
355
 
356
+ # If we reach the end, check from the beginning for any failed files
357
+ for i in range(0, next_index):
358
+ filename = audio_files[i]
359
+ status = file_states.get(filename, "unprocessed")
360
+
361
+ if status == "failed":
362
+ file_url = hf_hub_url(repo_id=source_repo_id, filename=filename, repo_type="dataset")
363
+ log_message(f"Found failed file for retry at index {i}: {filename}", "INFO")
364
+ return {
365
+ "filename": filename,
366
+ "url": file_url,
367
+ "index": i
368
+ }
369
+
370
  return None
371
+
372
  except Exception as e:
373
+ log_message(f"❌ Failed to get next file to process: {str(e)}", "ERROR")
374
  return None
375
 
376
+ def run_whisper_transcription(audio_path: str, output_dir: str, model: str) -> Optional[str]:
377
  """
378
+ Runs the whisper command-line tool to transcribe the audio file.
379
+ Returns the path to the generated JSON file on success.
 
 
 
380
  """
381
+ log_message(f"🎙️ Starting transcription for {os.path.basename(audio_path)} with model {model}...", "INFO")
 
 
 
 
 
 
382
 
383
+ # The whisper command-line tool saves output files in the current directory
384
+ # We need to run the command from the desired output directory
 
 
385
 
386
  try:
387
+ # The command is 'whisper <audio_path> --model <model> --output_dir <output_dir> --output_format json'
388
+ # Since we want to run it from the output_dir, we need to adjust the audio_path
389
 
390
+ # Move the audio file to the output directory temporarily
391
+ temp_audio_path = os.path.join(output_dir, os.path.basename(audio_path))
392
+ shutil.move(audio_path, temp_audio_path)
393
 
394
+ # The whisper command will be executed in the output_dir
395
+ command = [
396
+ "whisper",
397
+ os.path.basename(temp_audio_path), # Use the relative path in the output_dir
398
+ "--model", model,
399
+ "--output_dir", ".", # Output to the current directory (which is output_dir)
400
+ "--output_format", "json"
401
+ ]
402
 
403
+ # Run the command
404
+ result = subprocess.run(
405
+ command,
406
+ cwd=output_dir, # Change current working directory for the subprocess
407
+ capture_output=True,
408
+ text=True,
409
+ check=True,
410
+ timeout=3600 # 1 hour timeout for transcription
411
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
 
413
+ log_message(f"✅ Transcription successful. Output: {result.stdout.strip()}", "INFO")
 
 
 
 
414
 
415
+ # The output filename is the base name of the audio file with a .json extension
416
+ base_name, _ = os.path.splitext(os.path.basename(temp_audio_path))
417
+ json_output_path = os.path.join(output_dir, f"{base_name}.json")
418
 
419
+ # Move the audio file back (or just delete it, as it will be deleted later)
420
+ os.remove(temp_audio_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
421
 
422
+ if os.path.exists(json_output_path):
423
+ return json_output_path
424
+ else:
425
+ log_message(f"❌ Whisper ran successfully but did not produce the expected JSON file: {json_output_path}", "ERROR")
426
+ return None
427
 
428
+ except subprocess.CalledProcessError as e:
429
+ log_message(f"❌ Whisper command failed. Stderr: {e.stderr.strip()}", "ERROR")
430
+ log_message(f"❌ Command: {' '.join(command)}", "ERROR")
431
+ return None
432
+ except subprocess.TimeoutExpired:
433
+ log_message("❌ Whisper command timed out.", "ERROR")
434
  return None
 
435
  except Exception as e:
436
+ log_message(f"❌ An unexpected error occurred during transcription: {str(e)}", "ERROR")
437
  return None
438
 
439
+ def process_audio_file(audio_path: str, reference_map: Dict[str, str], output_filename: str) -> bool:
440
+ """
441
+ Transcribes the audio file, renames the output JSON to match the reference,
442
+ and uploads the result to the API.
443
+ """
444
+
445
+ # 1. Run transcription
446
+ json_output_path = run_whisper_transcription(audio_path, TRANSCRIPTIONS_FOLDER, WHISPER_MODEL)
447
+
448
+ if not json_output_path:
449
+ return False
450
+
451
+ # 2. Rename the JSON file to the matched filename
452
+ # The output_filename already includes the correct extension (e.g., .txt or .json)
453
+ # We assume the reference map provides the full target filename.
454
+
455
+ # The whisper output is a JSON file named after the audio file.
456
+ # We need to rename it to the target filename (which should be a JSON file for the backend).
457
+
458
+ # The output_filename is the matched filename from the reference map (e.g., 'audio_file_name.txt')
459
+ # The backend expects a JSON file. Let's assume the matched filename should be used as the base
460
+ # but with a .json extension for the upload.
461
+
462
+ # Let's stick to the original logic: the backend expects a JSON file with the name
463
+ # of the audio file (or the matched reference file) with a .json extension.
464
+
465
+ # Since the whisper output is already a JSON file, we just need to rename it
466
+ # to the desired final name.
467
+
468
+ # The output_filename passed here is the base name of the audio file or the matched reference file.
469
+ # If it's a reference file name (e.g., 'file.txt'), we should probably use 'file.json'.
470
+
471
+ # For simplicity and to match the backend's expectation (which handles JSON),
472
+ # we will rename the whisper output JSON to the base name of the audio file
473
+ # and ensure it has a .json extension.
474
+
475
+ base_name, _ = os.path.splitext(output_filename)
476
+ final_json_filename = f"{base_name}.json"
477
+ final_json_path = os.path.join(TRANSCRIPTIONS_FOLDER, final_json_filename)
478
+
479
+ try:
480
+ if json_output_path != final_json_path:
481
+ shutil.move(json_output_path, final_json_path)
482
+ log_message(f"✅ Renamed transcription to: {final_json_filename}", "INFO")
483
+ except Exception as e:
484
+ log_message(f"❌ Failed to rename transcription file: {str(e)}", "ERROR")
485
+ return False
486
+
487
+ # 3. Upload transcription to API
488
+ if upload_transcription_to_api(final_json_path, final_json_filename):
489
+ processing_status["transcribed_files"] += 1
490
+ # Clean up the local transcription file after successful upload
491
+ try:
492
+ os.remove(final_json_path)
493
+ log_message(f"🗑️ Cleaned up local transcription file: {final_json_path}", "INFO")
494
+ except Exception as e:
495
+ log_message(f"❌ Failed to clean up transcription file: {str(e)}", "ERROR")
496
+ return True
497
+ else:
498
+ log_message(f"❌ Failed to upload transcription to API: {final_json_filename}", "ERROR")
499
+ return False
500
+
501
  def main_processing_loop():
502
+ """The main loop that continuously checks for and processes new audio files."""
503
+ global processing_status
504
 
505
  if processing_status["is_running"]:
506
+ log_message("Processing loop is already running.", "WARNING")
507
  return
508
 
509
  processing_status["is_running"] = True
510
+ log_message("🚀 Audio transcription processing loop started.", "INFO")
511
 
512
+ # 1. Get the reference map once
513
+ reference_map = get_reference_map(REFERENCE_REPO_ID)
514
+ if not reference_map:
515
+ log_message("❌ Could not get reference map. Stopping loop.", "CRITICAL")
516
+ processing_status["is_running"] = False
517
+ return
 
 
 
518
 
519
+ try:
520
  while processing_status["is_running"]:
521
+ time.sleep(PROCESSING_DELAY)
522
 
523
  # 1. Download state from the new API
524
  current_state = download_state_from_api()
 
636
  processing_status["is_running"] = False
637
  return JSONResponse(status_code=200, content={"message": "Processing stop requested. Will stop after current file."})
638
 
639
+ # --- Main Execution ---
640
 
641
  if __name__ == "__main__":
642
  # This block is for local testing and won't be used in the final sandbox execution