Spaces:

danicor
/

WL3

Sleeping

App Files Files Community

danicor commited on Sep 6, 2025

Commit

648cc72

verified ·

1 Parent(s): db4175e

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -9

app.py CHANGED Viewed

@@ -94,7 +94,7 @@ class OptimizedModelManager:
                     feature_extractor=self.processor.feature_extractor,
                     chunk_length_s=CHUNK_LENGTH,
                     device=device,
-                    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                     model_kwargs={
                         "use_flash_attention_2": True if hasattr(self.model.config, 'use_flash_attention_2') else False
                     }
@@ -201,6 +201,11 @@ SUPPORTED_AUDIO_FORMATS = ['.mp3', '.wav', '.flac', '.aac', '.ogg', '.m4a', '.wm
 def fast_audio_preprocessing(file_path):
     """پردازش سریع فایل صوتی"""
     try:
         # استفاده از librosa برای بارگذاری سریع‌تر
         audio, sr = librosa.load(file_path, sr=16000, mono=True)
@@ -214,9 +219,13 @@ def fast_audio_preprocessing(file_path):
     except Exception as e:
         logger.error(f"Error in fast audio preprocessing: {e}")
         # بازگشت به روش قدیمی در صورت خطا
-        with open(file_path, "rb") as f:
-            inputs = f.read()
-        return ffmpeg_read(inputs, 16000), 16000
 def extract_audio_from_video_fast(video_path, output_path):
     """استخراج سریع صدا از ویدیو"""
@@ -240,9 +249,48 @@ def extract_audio_from_video_fast(video_path, output_path):
         return False
 def parallel_chunk_processing(audio_chunks, pipe, task, language):
-    """پردازش موازی چانک‌ها - حذف شده بدلیل مشکلات threading"""
-    # این تابع حذف شده و جایگزین شده با پردازش سریال در تابع اصلی
-    pass
 def chunks_to_srt(chunks):
     """تبدیل سریع چانک‌ها به SRT"""
@@ -315,8 +363,15 @@ def process_audio_file_optimized(file_path, task="transcribe", language="auto",
         start_time = time.time()
         pipe = model_manager.get_model()
         # پردازش سریع صدا
         audio, sr = fast_audio_preprocessing(file_path)
         inputs = {"array": audio, "sampling_rate": sr}
         # تنظیمات generation
@@ -378,8 +433,8 @@ def process_audio_file_optimized(file_path, task="transcribe", language="auto",
         if return_timestamps:
             return {
                 "text": result['text'],
-                "chunks": result['chunks'],
-                "srt": chunks_to_srt(result['chunks'])
             }
         else:
             return {"text": result['text']}

                     feature_extractor=self.processor.feature_extractor,
                     chunk_length_s=CHUNK_LENGTH,
                     device=device,
+                    dtype=torch.float16 if torch.cuda.is_available() else torch.float32,  # استفاده از dtype به جای torch_dtype
                     model_kwargs={
                         "use_flash_attention_2": True if hasattr(self.model.config, 'use_flash_attention_2') else False
                     }
 def fast_audio_preprocessing(file_path):
     """پردازش سریع فایل صوتی"""
     try:
+        # غیرفعال کردن کش librosa برای جلوگیری از خطا
+        import librosa.cache
+        librosa.cache.clear()
+        librosa.cache.set_cache(None)
         # استفاده از librosa برای بارگذاری سریع‌تر
         audio, sr = librosa.load(file_path, sr=16000, mono=True)
     except Exception as e:
         logger.error(f"Error in fast audio preprocessing: {e}")
         # بازگشت به روش قدیمی در صورت خطا
+        try:
+            with open(file_path, "rb") as f:
+                inputs = f.read()
+            return ffmpeg_read(inputs, 16000), 16000
+        except Exception as ffmpeg_error:
+            logger.error(f"FFmpeg fallback also failed: {ffmpeg_error}")
+            raise Exception("Both librosa and ffmpeg audio processing failed")
 def extract_audio_from_video_fast(video_path, output_path):
     """استخراج سریع صدا از ویدیو"""
         return False
 def parallel_chunk_processing(audio_chunks, pipe, task, language):
+    """پردازش موازی چانک‌ها"""
+    results = []
+    for chunk_data in audio_chunks:
+        chunk, start_time = chunk_data
+        try:
+            inputs = {"array": chunk, "sampling_rate": 16000}
+            generate_kwargs = {
+                "task": task,
+                "do_sample": False,
+                "num_beams": 1,
+                "use_cache": True,
+            }
+            if language != "auto" and language in SUPPORTED_LANGUAGES:
+                generate_kwargs["language"] = f"<|{language}|>"
+            result = pipe(
+                inputs,
+                batch_size=BATCH_SIZE,
+                generate_kwargs=generate_kwargs,
+                return_timestamps=True
+            )
+            # Adjust timestamps
+            if result.get('chunks'):
+                for chunk_result in result['chunks']:
+                    if chunk_result.get('timestamp'):
+                        chunk_result['timestamp'] = (
+                            chunk_result['timestamp'][0] + start_time,
+                            chunk_result['timestamp'][1] + start_time
+                        )
+            results.append(result)
+        except Exception as e:
+            logger.error(f"Error processing chunk: {e}")
+            # Add empty result to continue processing
+            results.append({"text": "", "chunks": []})
+    return results
 def chunks_to_srt(chunks):
     """تبدیل سریع چانک‌ها به SRT"""
         start_time = time.time()
         pipe = model_manager.get_model()
+        logger.info(f"Starting audio processing for: {file_path}")
         # پردازش سریع صدا
         audio, sr = fast_audio_preprocessing(file_path)
+        logger.info(f"Audio loaded: {len(audio)} samples at {sr}Hz")
+        if audio is None:
+            raise Exception("Audio preprocessing returned None")
         inputs = {"array": audio, "sampling_rate": sr}
         # تنظیمات generation
         if return_timestamps:
             return {
                 "text": result['text'],
+                "chunks": result.get('chunks', []),
+                "srt": chunks_to_srt(result.get('chunks', []))
             }
         else:
             return {"text": result['text']}