Spaces:

factorstudios
/

switch

Sleeping

App Files Files Community

factorstudios commited on Jan 19

Commit

e330188

verified ·

1 Parent(s): 8b5393c

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -21

app.py CHANGED Viewed

@@ -32,8 +32,9 @@ LOCAL_STATE_FOLDER.mkdir(exist_ok=True)
 # Directory within the HF dataset where the audio files are located
 AUDIO_FILE_PREFIX = "audio/"
 WHISPER_SERVERS = [
-    f"https://makeitfr-mineo-{i}.hf.space/transcribe" for i in range(1, 21)
 ]
 # Temporary storage for audio files
@@ -48,7 +49,6 @@ class WhisperServer:
     def __init__(self, url: str):
         self.url = url
         self.is_processing = False
-        self.current_file_index: Optional[int] = None
         self.total_processed = 0
         self.total_time = 0.0
@@ -60,11 +60,11 @@ class WhisperServer:
     def release(self):
         """Release the server for a new file"""
         self.is_processing = False
-        self.current_file_index = None
 # Global state for whisper servers
 servers = [WhisperServer(url) for url in WHISPER_SERVERS]
 server_lock = asyncio.Lock()
 # --- Progress and State Management Functions ---
@@ -181,21 +181,22 @@ async def transcribe_with_server(server: WhisperServer, wav_path: Path) -> Optio
         print(f"[{FLOW_ID}] Error transcribing with {server.url}: {e}")
     return None
-async def process_file_task(wav_file: str, state: Dict, progress: Dict):
-    server = None
-    while server is None:
         async with server_lock:
-            for s in servers:
                 if not s.is_processing:
                     s.is_processing = True
-                    server = s
-                    break
-        if server is None:
-            await asyncio.sleep(1)
     try:
-        # FIX: Ensure we use the correct local path for the downloaded file
-        # hf_hub_download returns the absolute path to the downloaded file
         print(f"[{FLOW_ID}] Downloading {wav_file}...")
         downloaded_path_str = hf_hub_download(
             repo_id=HF_AUDIO_DATASET_ID,
@@ -233,7 +234,6 @@ async def process_file_task(wav_file: str, state: Dict, progress: Dict):
             state["file_states"][wav_file] = "failed_transcription"
             print(f"[{FLOW_ID}] ❌ Failed: {wav_file}")
-        # Cleanup
         if wav_path.exists():
             wav_path.unlink()
@@ -256,7 +256,6 @@ async def main_processing_loop():
                 await asyncio.sleep(60)
                 continue
-            # Check HF_OUTPUT_DATASET_ID for existing JSON outputs
             print(f"[{FLOW_ID}] Checking {HF_OUTPUT_DATASET_ID} for existing JSON outputs...")
             try:
                 api = HfApi(token=HF_TOKEN)
@@ -267,10 +266,7 @@ async def main_processing_loop():
                 print(f"[{FLOW_ID}] Warning: Could not fetch existing files: {e}")
                 existing_json_files = set()
-            # 1. Handpick failed_transcription files
             failed_files = [f for f, s in state.get("file_states", {}).items() if s == "failed_transcription"]
-            # 2. Check for new files
             next_idx = state.get("next_download_index", 0)
             new_files_chunk = file_list[next_idx:next_idx + 1000]
@@ -298,10 +294,7 @@ async def main_processing_loop():
                                 state["next_download_index"] = current_idx + 1
                     continue
-                # Found an UNPROCESSED file
                 print(f"[{FLOW_ID}] Found unprocessed file: {f}")
-                # Upload skipped state before processing
                 if state_changed_locally:
                     print(f"[{FLOW_ID}] Synchronizing skipped files to HF state...")
                     await upload_hf_state(state)

 # Directory within the HF dataset where the audio files are located
 AUDIO_FILE_PREFIX = "audio/"
+# FIX: Updated server list based on the logs showing 'eliasishere' prefix
 WHISPER_SERVERS = [
+    f"https://eliasishere-makeitfr-mineo-{i}.hf.space/transcribe" for i in range(1, 21)
 ]
 # Temporary storage for audio files
     def __init__(self, url: str):
         self.url = url
         self.is_processing = False
         self.total_processed = 0
         self.total_time = 0.0
     def release(self):
         """Release the server for a new file"""
         self.is_processing = False
 # Global state for whisper servers
 servers = [WhisperServer(url) for url in WHISPER_SERVERS]
 server_lock = asyncio.Lock()
+server_index = 0 # For round-robin selection
 # --- Progress and State Management Functions ---
         print(f"[{FLOW_ID}] Error transcribing with {server.url}: {e}")
     return None
+async def get_available_server() -> WhisperServer:
+    global server_index
+    while True:
         async with server_lock:
+            # Round-robin check
+            for _ in range(len(servers)):
+                s = servers[server_index]
+                server_index = (server_index + 1) % len(servers)
                 if not s.is_processing:
                     s.is_processing = True
+                    return s
+        await asyncio.sleep(1)
+async def process_file_task(wav_file: str, state: Dict, progress: Dict):
+    server = await get_available_server()
     try:
         print(f"[{FLOW_ID}] Downloading {wav_file}...")
         downloaded_path_str = hf_hub_download(
             repo_id=HF_AUDIO_DATASET_ID,
             state["file_states"][wav_file] = "failed_transcription"
             print(f"[{FLOW_ID}] ❌ Failed: {wav_file}")
         if wav_path.exists():
             wav_path.unlink()
                 await asyncio.sleep(60)
                 continue
             print(f"[{FLOW_ID}] Checking {HF_OUTPUT_DATASET_ID} for existing JSON outputs...")
             try:
                 api = HfApi(token=HF_TOKEN)
                 print(f"[{FLOW_ID}] Warning: Could not fetch existing files: {e}")
                 existing_json_files = set()
             failed_files = [f for f, s in state.get("file_states", {}).items() if s == "failed_transcription"]
             next_idx = state.get("next_download_index", 0)
             new_files_chunk = file_list[next_idx:next_idx + 1000]
                                 state["next_download_index"] = current_idx + 1
                     continue
                 print(f"[{FLOW_ID}] Found unprocessed file: {f}")
                 if state_changed_locally:
                     print(f"[{FLOW_ID}] Synchronizing skipped files to HF state...")
                     await upload_hf_state(state)