Spaces:

AlserFurma
/

Yapi

Build error

App Files Files Community

AlserFurma commited on Dec 1, 2025

Commit

f7d34b8

verified ·

1 Parent(s): 2da25f5

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -18

app.py CHANGED Viewed

@@ -17,11 +17,11 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
 try:
-    # TTS модель казахского языка
     tts_model = VitsModel.from_pretrained("facebook/mms-tts-kaz").to(device)
     tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-kaz")
-    # Модель перевода ru -> kk
     translator = pipeline(
         "translation",
         model="facebook/nllb-200-distilled-600M",
@@ -31,11 +31,11 @@ try:
     print("✅ Все модели успешно загружены!")
 except Exception as e:
-    raise RuntimeError(f"Ошибка загрузки моделей: {str(e)}")
 # =========================
-# Talking Head Space
 # =========================
 TALKING_HEAD_SPACE = "Skywork/skyreels-a1-talking-head"
@@ -53,7 +53,9 @@ def inference(image: Image.Image, text: str):
     img_path = None
     try:
-        # Проверки
         if image is None:
             raise ValueError("Загрузите изображение лектора!")
@@ -63,7 +65,7 @@ def inference(image: Image.Image, text: str):
         if len(text) > 500:
             raise ValueError("Текст превышает 500 символов!")
-        print("Ввод (RU):", text)
         # =========================
         # Шаг 1 — Перевод
@@ -75,7 +77,10 @@ def inference(image: Image.Image, text: str):
         )
         translated_text = translation[0]["translation_text"]
-        print("Перевод (KK):", translated_text)
         # =========================
         # Шаг 2 — Озвучка
@@ -86,6 +91,10 @@ def inference(image: Image.Image, text: str):
             output = tts_model(**inputs)
         waveform = output.waveform.squeeze().cpu().numpy()
         audio = (waveform * 32767).astype("int16")
         sampling_rate = tts_model.config.sampling_rate
@@ -93,8 +102,10 @@ def inference(image: Image.Image, text: str):
             wavfile.write(f.name, sampling_rate, audio)
             audio_path = f.name
         # =========================
-        # Шаг 3 — Сохранение изображения
         # =========================
         with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f:
             if image.mode != "RGB":
@@ -102,9 +113,12 @@ def inference(image: Image.Image, text: str):
             image.save(f.name)
             img_path = f.name
         # =========================
         # Шаг 4 — Генерация видео
         # =========================
         client = Client(TALKING_HEAD_SPACE)
         result = client.predict(
@@ -115,22 +129,52 @@ def inference(image: Image.Image, text: str):
             api_name="/process_image_audio"
         )
-        if isinstance(result, tuple):
-            video_path = result[0]
         else:
-            raise ValueError("Видео не получено!")
         error_msg = "✅ Видео успешно создано!"
     except Exception as e:
         error_msg = f"❌ Ошибка: {str(e)}"
         traceback.print_exc()
     finally:
         for p in [audio_path, img_path]:
             if p and os.path.exists(p):
                 try:
                     os.remove(p)
                 except:
                     pass
@@ -138,18 +182,18 @@ def inference(image: Image.Image, text: str):
 # =========================
-# Gradio Интерфейс
 # =========================
-title = "Бейне Оқытушы"
 description = """
-Суретіңізді жүктеп, дәріс мәтінін орыс тілінде енгізіңіз.
-Жүйе автоматты түрде қазақ тіліне аударады және бейне жасайды!
 **Талаптар:**
 - Фото: бет анық көрінетін
-- Мәтін: орыс тілінде (500 таңбаға дейін)
 """
 iface = gr.Interface(
@@ -159,12 +203,12 @@ iface = gr.Interface(
         gr.Textbox(
             lines=5,
             label="📝 Дәріс мәтіні (орыс тілінде)",
-            placeholder="500 таңбаға дейін..."
         )
     ],
     outputs=[
         gr.Video(label="🎬 Дайын бейне"),
-        gr.Textbox(label="ℹ️ Мәртебе")
     ],
     title=title,
     description=description,

 print(f"Using device: {device}")
 try:
+    # TTS модель (казахский)
     tts_model = VitsModel.from_pretrained("facebook/mms-tts-kaz").to(device)
     tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-kaz")
+    # Перевод ru -> kk
     translator = pipeline(
         "translation",
         model="facebook/nllb-200-distilled-600M",
     print("✅ Все модели успешно загружены!")
 except Exception as e:
+    raise RuntimeError(f"❌ Ошибка загрузки моделей: {str(e)}")
 # =========================
+# Talking Head API
 # =========================
 TALKING_HEAD_SPACE = "Skywork/skyreels-a1-talking-head"
     img_path = None
     try:
+        # =========================
+        # Проверка входных данных
+        # =========================
         if image is None:
             raise ValueError("Загрузите изображение лектора!")
         if len(text) > 500:
             raise ValueError("Текст превышает 500 символов!")
+        print("📥 Ввод (RU):", text)
         # =========================
         # Шаг 1 — Перевод
         )
         translated_text = translation[0]["translation_text"]
+        print("🌍 Перевод (KK):", translated_text)
+        if not translated_text.strip():
+            raise ValueError("Перевод не удался!")
         # =========================
         # Шаг 2 — Озвучка
             output = tts_model(**inputs)
         waveform = output.waveform.squeeze().cpu().numpy()
+        if waveform.size == 0:
+            raise ValueError("TTS вернул пустое аудио!")
         audio = (waveform * 32767).astype("int16")
         sampling_rate = tts_model.config.sampling_rate
             wavfile.write(f.name, sampling_rate, audio)
             audio_path = f.name
+        print("🔊 Аудио создано:", audio_path)
         # =========================
+        # Шаг 3 — Сохранение фото
         # =========================
         with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f:
             if image.mode != "RGB":
             image.save(f.name)
             img_path = f.name
+        print("🖼 Фото сохранено:", img_path)
         # =========================
         # Шаг 4 — Генерация видео
         # =========================
+        print("🎥 Подключение к SkyReels...")
         client = Client(TALKING_HEAD_SPACE)
         result = client.predict(
             api_name="/process_image_audio"
         )
+        print("✅ RAW RESULT:", result)
+        # =========================
+        # Универсальный разбор результата
+        # =========================
+        if isinstance(result, tuple) and len(result) > 0:
+            video_data = result[0]
+        elif isinstance(result, dict):
+            video_data = result
         else:
+            raise ValueError(f"Неизвестный формат ответа API: {type(result)}")
+        if isinstance(video_data, dict):
+            video_path = (
+                video_data.get("video")
+                or video_data.get("path")
+                or video_data.get("file")
+            )
+        elif isinstance(video_data, str):
+            video_path = video_data
+        else:
+            raise ValueError(f"Не удалось извлечь видео: {type(video_data)}")
+        if not video_path:
+            raise ValueError("API не вернул путь к видео!")
+        print("✅ Видео создано:", video_path)
         error_msg = "✅ Видео успешно создано!"
     except Exception as e:
         error_msg = f"❌ Ошибка: {str(e)}"
+        print(error_msg)
         traceback.print_exc()
     finally:
+        # =========================
+        # Очистка временных файлов
+        # =========================
         for p in [audio_path, img_path]:
             if p and os.path.exists(p):
                 try:
                     os.remove(p)
+                    print("🗑 Удалён файл:", p)
                 except:
                     pass
 # =========================
+# Интерфейс Gradio
 # =========================
+title = "🎓 Бейне Оқытушы"
 description = """
+Суретіңізді жүктеп, дәріс мәтінін **орыс тілінде** енгізіңіз.
+Жүйе автоматты түрде қазақ тіліне аударады, озвучка жасайды және бейне шығарады!
 **Талаптар:**
 - Фото: бет анық көрінетін
+- Мәтін: 500 таңбаға дейін
 """
 iface = gr.Interface(
         gr.Textbox(
             lines=5,
             label="📝 Дәріс мәтіні (орыс тілінде)",
+            placeholder="Мәтінді енгізіңіз..."
         )
     ],
     outputs=[
         gr.Video(label="🎬 Дайын бейне"),
+        gr.Textbox(label="ℹ️ Мәртебе", interactive=False)
     ],
     title=title,
     description=description,