Spaces:
Build error
Build error
| import gradio as gr | |
| import os | |
| from PIL import Image | |
| import tempfile | |
| from gradio_client import Client, handle_file | |
| import torch | |
| from transformers import VitsModel, AutoTokenizer, pipeline | |
| import scipy.io.wavfile as wavfile | |
| import traceback | |
| # ========================= | |
| # Загрузка моделей | |
| # ========================= | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"Using device: {device}") | |
| try: | |
| # TTS модель (казахский) | |
| tts_model = VitsModel.from_pretrained("facebook/mms-tts-kaz").to(device) | |
| tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-kaz") | |
| # Перевод ru -> kk | |
| translator = pipeline( | |
| "translation", | |
| model="facebook/nllb-200-distilled-600M", | |
| device=0 if device == "cuda" else -1 | |
| ) | |
| print("✅ Все модели успешно загружены!") | |
| except Exception as e: | |
| raise RuntimeError(f"❌ Ошибка загрузки моделей: {str(e)}") | |
| # ========================= | |
| # Talking Head API | |
| # ========================= | |
| TALKING_HEAD_SPACE = "Skywork/skyreels-a1-talking-head" | |
| # ========================= | |
| # Основная функция | |
| # ========================= | |
| def inference(image: Image.Image, text: str): | |
| error_msg = "" | |
| video_path = None | |
| audio_path = None | |
| img_path = None | |
| try: | |
| # ========================= | |
| # Проверка входных данных | |
| # ========================= | |
| if image is None: | |
| raise ValueError("Загрузите изображение лектора!") | |
| if not text or not text.strip(): | |
| raise ValueError("Введите текст лекции!") | |
| if len(text) > 500: | |
| raise ValueError("Текст превышает 500 символов!") | |
| print("📥 Ввод (RU):", text) | |
| # ========================= | |
| # Шаг 1 — Перевод | |
| # ========================= | |
| translation = translator( | |
| text, | |
| src_lang="rus_Cyrl", | |
| tgt_lang="kaz_Cyrl" | |
| ) | |
| translated_text = translation[0]["translation_text"] | |
| print("🌍 Перевод (KK):", translated_text) | |
| if not translated_text.strip(): | |
| raise ValueError("Перевод не удался!") | |
| # ========================= | |
| # Шаг 2 — Озвучка | |
| # ========================= | |
| inputs = tts_tokenizer(translated_text, return_tensors="pt").to(device) | |
| with torch.no_grad(): | |
| output = tts_model(**inputs) | |
| waveform = output.waveform.squeeze().cpu().numpy() | |
| if waveform.size == 0: | |
| raise ValueError("TTS вернул пустое аудио!") | |
| audio = (waveform * 32767).astype("int16") | |
| sampling_rate = tts_model.config.sampling_rate | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: | |
| wavfile.write(f.name, sampling_rate, audio) | |
| audio_path = f.name | |
| print("🔊 Аудио создано:", audio_path) | |
| # ========================= | |
| # Шаг 3 — Сохранение фото | |
| # ========================= | |
| with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f: | |
| if image.mode != "RGB": | |
| image = image.convert("RGB") | |
| image.save(f.name) | |
| img_path = f.name | |
| print("🖼 Фото сохранено:", img_path) | |
| # ========================= | |
| # Шаг 4 — Генерация видео | |
| # ========================= | |
| print("🎥 Подключение к SkyReels...") | |
| client = Client(TALKING_HEAD_SPACE) | |
| result = client.predict( | |
| image_path=handle_file(img_path), | |
| audio_path=handle_file(audio_path), | |
| guidance_scale=3.0, | |
| steps=10, | |
| api_name="/process_image_audio" | |
| ) | |
| print("✅ RAW RESULT:", result) | |
| # ========================= | |
| # Универсальный разбор результата | |
| # ========================= | |
| if isinstance(result, tuple) and len(result) > 0: | |
| video_data = result[0] | |
| elif isinstance(result, dict): | |
| video_data = result | |
| else: | |
| raise ValueError(f"Неизвестный формат ответа API: {type(result)}") | |
| if isinstance(video_data, dict): | |
| video_path = ( | |
| video_data.get("video") | |
| or video_data.get("path") | |
| or video_data.get("file") | |
| ) | |
| elif isinstance(video_data, str): | |
| video_path = video_data | |
| else: | |
| raise ValueError(f"Не удалось извлечь видео: {type(video_data)}") | |
| if not video_path: | |
| raise ValueError("API не вернул путь к видео!") | |
| print("✅ Видео создано:", video_path) | |
| error_msg = "✅ Видео успешно создано!" | |
| except Exception as e: | |
| error_msg = f"❌ Ошибка: {str(e)}" | |
| print(error_msg) | |
| traceback.print_exc() | |
| finally: | |
| # ========================= | |
| # Очистка временных файлов | |
| # ========================= | |
| for p in [audio_path, img_path]: | |
| if p and os.path.exists(p): | |
| try: | |
| os.remove(p) | |
| print("🗑 Удалён файл:", p) | |
| except: | |
| pass | |
| return video_path, error_msg | |
| # ========================= | |
| # Интерфейс Gradio | |
| # ========================= | |
| title = "🎓 Бейне Оқытушы" | |
| description = """ | |
| Суретіңізді жүктеп, дәріс мәтінін **орыс тілінде** енгізіңіз. | |
| Жүйе автоматты түрде қазақ тіліне аударады, озвучка жасайды және бейне шығарады! | |
| **Талаптар:** | |
| - Фото: бет анық көрінетін | |
| - Мәтін: 500 таңбаға дейін | |
| """ | |
| iface = gr.Interface( | |
| fn=inference, | |
| inputs=[ | |
| gr.Image(type="pil", label="📸 Фото дәріскер"), | |
| gr.Textbox( | |
| lines=5, | |
| label="📝 Дәріс мәтіні (орыс тілінде)", | |
| placeholder="Мәтінді енгізіңіз..." | |
| ) | |
| ], | |
| outputs=[ | |
| gr.Video(label="🎬 Дайын бейне"), | |
| gr.Textbox(label="ℹ️ Мәртебе", interactive=False) | |
| ], | |
| title=title, | |
| description=description, | |
| cache_examples=False, | |
| flagging_mode="never" | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |