# Placeholder - insert your full avatar_streamlit.py script here print("Avatar Streamlit App using SadTalker only (no D-ID)") import os import time import json import tempfile import asyncio import requests import numpy as np import matplotlib.pyplot as plt from pydub import AudioSegment import streamlit as st from dotenv import load_dotenv from TTS.api import TTS # ========== Load environment and settings ========== load_dotenv() # DID_API_KEY = os.getenv("DID_API_KEY") OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3") # Make sure results folder exists RESULTS_DIR = os.path.join(os.getcwd(), "results") os.makedirs(RESULTS_DIR, exist_ok=True) # ========== Helpers ========== # --- Ollama chat --- def ask_ollama(question: str) -> str: """Ask a local Ollama model.""" try: res = requests.post( "http://localhost:11434/api/generate", json={"model": OLLAMA_MODEL, "prompt": question}, ) res.raise_for_status() output = "".join( json.loads(line).get("response", "") for line in res.text.splitlines() if line.strip() ) return output.strip() except Exception as e: st.error(f"Ollama error: {e}") return "מצטער, לא הצלחתי להתחבר למודל המקומי." # --- Text-to-Speech using Edge-TTS --- # import edge_tts # async def text_to_speech_edge(text, voice="en-US-GuyNeural"): # """Convert text to speech and return mp3 path.""" # fd, mp3_path = tempfile.mkstemp(suffix=".mp3") # os.close(fd) # communicate = edge_tts.Communicate(text, voice) # await communicate.save(mp3_path) # return mp3_path def synthesize_speech(text, lang="he"): """Offline GPU TTS using Coqui XTTS v2.""" print("🗣️ Generating speech with Coqui TTS...") model_name = "tts_models/multilingual/multi-dataset/xtts_v2" tts = TTS(model_name, gpu=True) fd, wav_path = tempfile.mkstemp(suffix=".wav") os.close(fd) tts.tts_to_file(text=text, file_path=wav_path, language=lang) print("✅ Saved audio to:", wav_path) return wav_path # --- Check SadTalker availability --- SADTALKER_AVAILABLE = True # set False if not installed # ========== Streamlit UI ========== st.set_page_config(page_title="Avatar Lip Sync", page_icon="🎬", layout="centered") st.title("🧠 Avatar Chatbot & Lip Sync Studio") # --- Image Upload --- st.subheader("🧑 העלאת תמונת אווטר") avatar_image = st.file_uploader("בחר תמונה (JPG/PNG):", type=["jpg", "jpeg", "png"]) if avatar_image is not None: avatar_image_path = os.path.join(tempfile.gettempdir(), avatar_image.name) with open(avatar_image_path, "wb") as f: f.write(avatar_image.read()) st.image(avatar_image_path, caption="Avatar Image", width=250) else: avatar_image_path = None # --- User input & mode --- st.subheader("💬 כתוב שאלה או טקסט לשיחה") user_input = st.text_area("הקלד טקסט:", "") talk_mode = st.radio("בחר מצב:", ["שיחה (טקסט חופשי)", "צ'אט עם מודל Ollama"]) # --- Voice selection --- selected_voice = st.selectbox( "בחר קול לדיבור:", [ "en-US-GuyNeural (Male)", "en-US-JennyNeural (Female)", "he-IL-AsafNeural (Male)", "he-IL-NoaNeural (Female)", ], ) VOICE_CODE = selected_voice.split(" ")[0] # --- Audio Upload Section --- st.subheader("🎵 העלאת קובץ MP3 לסינכרון שפתיים") uploaded_audio = st.file_uploader("העלה קובץ MP3:", type=["mp3"]) if uploaded_audio is not None: with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_audio: tmp_audio.write(uploaded_audio.read()) mp3_path = tmp_audio.name audio = AudioSegment.from_file(mp3_path) duration_seconds = len(audio) / 1000 st.audio(mp3_path) st.info(f"⏱ אורך הקובץ: {duration_seconds:.1f} שניות") # Waveform preview try: samples = np.array(audio.get_array_of_samples()) plt.figure(figsize=(6, 1.5)) plt.plot(samples[::200]) plt.title("Waveform Preview") plt.axis("off") st.pyplot(plt) except Exception as e: st.warning(f"Waveform preview unavailable: {e}") # --- Generate Button --- if st.button("שלח"): if not avatar_image_path: st.warning("אנא העלה תמונת אווטר תחילה.") elif not SADTALKER_AVAILABLE: st.error("SadTalker לא מותקן. אנא התקן לפי ההוראות.") else: if uploaded_audio is not None: st.success("✅ אודיו נטען בהצלחה! משתמש בקובץ שהעלית.") else: if not user_input: st.warning("אנא הקלד שאלה או העלה קובץ אודיו.") st.stop() with st.spinner("🤖 מחכה לתגובה מהמוח המקומי..."): response = user_input if talk_mode.startswith("שיחה") else ask_ollama(user_input) st.success(response) with st.spinner("🎙️ יוצר קול..."): # mp3_path = synthesize_speech(response, VOICE_CODE) mp3_path = synthesize_speech(response, lang="he") st.audio(mp3_path) # --- Generate video --- timestamp = time.strftime("%Y%m%d_%H%M%S") output_video = os.path.join(RESULTS_DIR, f"avatar_{timestamp}.mp4") with st.spinner("🎬 יוצר וידאו עם סינכרון שפתיים..."): video_path = create_sadtalker_video(avatar_image_path, mp3_path, output_video) if video_path and os.path.exists(video_path): st.success(f"✅ הסרטון נשמר בתיקיית results!\n📁 {video_path}") st.video(video_path) with open(video_path, "rb") as vid_file: st.download_button( label="⬇️ הורד את הסרטון", data=vid_file, file_name=os.path.basename(video_path), mime="video/mp4", ) else: st.error("❌ לא הצלחתי ליצור את הסרטון. בדוק את SadTalker.")