| |
| print("Avatar Streamlit App using SadTalker only (no D-ID)") |
| import os |
| import time |
| import json |
| import tempfile |
| import asyncio |
| import requests |
| import numpy as np |
| import matplotlib.pyplot as plt |
| from pydub import AudioSegment |
| import streamlit as st |
| from dotenv import load_dotenv |
| from TTS.api import TTS |
|
|
| |
| load_dotenv() |
| |
| OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3") |
|
|
| |
| RESULTS_DIR = os.path.join(os.getcwd(), "results") |
| os.makedirs(RESULTS_DIR, exist_ok=True) |
|
|
| |
|
|
| |
| def ask_ollama(question: str) -> str: |
| """Ask a local Ollama model.""" |
| try: |
| res = requests.post( |
| "http://localhost:11434/api/generate", |
| json={"model": OLLAMA_MODEL, "prompt": question}, |
| ) |
| res.raise_for_status() |
| output = "".join( |
| json.loads(line).get("response", "") |
| for line in res.text.splitlines() |
| if line.strip() |
| ) |
| return output.strip() |
| except Exception as e: |
| st.error(f"Ollama error: {e}") |
| return "ืืฆืืขืจ, ืื ืืฆืืืชื ืืืชืืืจ ืืืืื ืืืงืืื." |
|
|
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
|
|
| def synthesize_speech(text, lang="he"): |
| """Offline GPU TTS using Coqui XTTS v2.""" |
| print("๐ฃ๏ธ Generating speech with Coqui TTS...") |
| model_name = "tts_models/multilingual/multi-dataset/xtts_v2" |
| tts = TTS(model_name, gpu=True) |
| fd, wav_path = tempfile.mkstemp(suffix=".wav") |
| os.close(fd) |
| tts.tts_to_file(text=text, file_path=wav_path, language=lang) |
| print("โ
Saved audio to:", wav_path) |
| return wav_path |
|
|
| |
| SADTALKER_AVAILABLE = True |
|
|
| |
| st.set_page_config(page_title="Avatar Lip Sync", page_icon="๐ฌ", layout="centered") |
| st.title("๐ง Avatar Chatbot & Lip Sync Studio") |
|
|
| |
| st.subheader("๐ง ืืขืืืช ืชืืื ืช ืืืืืจ") |
| avatar_image = st.file_uploader("ืืืจ ืชืืื ื (JPG/PNG):", type=["jpg", "jpeg", "png"]) |
| if avatar_image is not None: |
| avatar_image_path = os.path.join(tempfile.gettempdir(), avatar_image.name) |
| with open(avatar_image_path, "wb") as f: |
| f.write(avatar_image.read()) |
| st.image(avatar_image_path, caption="Avatar Image", width=250) |
| else: |
| avatar_image_path = None |
|
|
| |
| st.subheader("๐ฌ ืืชืื ืฉืืื ืื ืืงืกื ืืฉืืื") |
| user_input = st.text_area("ืืงืื ืืงืกื:", "") |
| talk_mode = st.radio("ืืืจ ืืฆื:", ["ืฉืืื (ืืงืกื ืืืคืฉื)", "ืฆ'ืื ืขื ืืืื Ollama"]) |
|
|
| |
| selected_voice = st.selectbox( |
| "ืืืจ ืงืื ืืืืืืจ:", |
| [ |
| "en-US-GuyNeural (Male)", |
| "en-US-JennyNeural (Female)", |
| "he-IL-AsafNeural (Male)", |
| "he-IL-NoaNeural (Female)", |
| ], |
| ) |
| VOICE_CODE = selected_voice.split(" ")[0] |
|
|
| |
| st.subheader("๐ต ืืขืืืช ืงืืืฅ MP3 ืืกืื ืืจืื ืฉืคืชืืื") |
| uploaded_audio = st.file_uploader("ืืขืื ืงืืืฅ MP3:", type=["mp3"]) |
|
|
| if uploaded_audio is not None: |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_audio: |
| tmp_audio.write(uploaded_audio.read()) |
| mp3_path = tmp_audio.name |
|
|
| audio = AudioSegment.from_file(mp3_path) |
| duration_seconds = len(audio) / 1000 |
| st.audio(mp3_path) |
| st.info(f"โฑ ืืืจื ืืงืืืฅ: {duration_seconds:.1f} ืฉื ืืืช") |
|
|
| |
| try: |
| samples = np.array(audio.get_array_of_samples()) |
| plt.figure(figsize=(6, 1.5)) |
| plt.plot(samples[::200]) |
| plt.title("Waveform Preview") |
| plt.axis("off") |
| st.pyplot(plt) |
| except Exception as e: |
| st.warning(f"Waveform preview unavailable: {e}") |
|
|
| |
| if st.button("ืฉืื"): |
| if not avatar_image_path: |
| st.warning("ืื ื ืืขืื ืชืืื ืช ืืืืืจ ืชืืืื.") |
| elif not SADTALKER_AVAILABLE: |
| st.error("SadTalker ืื ืืืชืงื. ืื ื ืืชืงื ืืคื ืืืืจืืืช.") |
| else: |
| if uploaded_audio is not None: |
| st.success("โ
ืืืืื ื ืืขื ืืืฆืืื! ืืฉืชืืฉ ืืงืืืฅ ืฉืืขืืืช.") |
| else: |
| if not user_input: |
| st.warning("ืื ื ืืงืื ืฉืืื ืื ืืขืื ืงืืืฅ ืืืืื.") |
| st.stop() |
| with st.spinner("๐ค ืืืื ืืชืืืื ืืืืื ืืืงืืื..."): |
| response = user_input if talk_mode.startswith("ืฉืืื") else ask_ollama(user_input) |
| st.success(response) |
| with st.spinner("๐๏ธ ืืืฆืจ ืงืื..."): |
| |
| mp3_path = synthesize_speech(response, lang="he") |
| st.audio(mp3_path) |
|
|
| |
| timestamp = time.strftime("%Y%m%d_%H%M%S") |
| output_video = os.path.join(RESULTS_DIR, f"avatar_{timestamp}.mp4") |
|
|
| with st.spinner("๐ฌ ืืืฆืจ ืืืืื ืขื ืกืื ืืจืื ืฉืคืชืืื..."): |
| video_path = create_sadtalker_video(avatar_image_path, mp3_path, output_video) |
|
|
| if video_path and os.path.exists(video_path): |
| st.success(f"โ
ืืกืจืืื ื ืฉืืจ ืืชืืงืืืช results!\n๐ {video_path}") |
| st.video(video_path) |
| with open(video_path, "rb") as vid_file: |
| st.download_button( |
| label="โฌ๏ธ ืืืจื ืืช ืืกืจืืื", |
| data=vid_file, |
| file_name=os.path.basename(video_path), |
| mime="video/mp4", |
| ) |
| else: |
| st.error("โ ืื ืืฆืืืชื ืืืฆืืจ ืืช ืืกืจืืื. ืืืืง ืืช SadTalker.") |
|
|