gadhalevy commited on
Commit Β·
93d00d2
1
Parent(s): a3f0c24
Switch to offline pyttsx3 TTS for Hugging Face CPU Space
Browse files- Dockerfile +100 -62
- avatar_streamlit.py +37 -6
- requirements.txt +18 -8
Dockerfile
CHANGED
|
@@ -1,83 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# ================================================================
|
| 2 |
-
# Avatar App - SadTalker +
|
| 3 |
-
#
|
| 4 |
# ================================================================
|
| 5 |
|
| 6 |
-
# ---------- Base Image ----------
|
| 7 |
FROM python:3.10-slim
|
| 8 |
|
| 9 |
ENV DEBIAN_FRONTEND=noninteractive
|
| 10 |
WORKDIR /app
|
| 11 |
|
| 12 |
-
# ---------- System
|
| 13 |
RUN apt-get update && apt-get install -y \
|
| 14 |
-
ffmpeg git wget curl build-essential \
|
| 15 |
&& rm -rf /var/lib/apt/lists/*
|
| 16 |
|
| 17 |
-
# ----------
|
|
|
|
|
|
|
|
|
|
| 18 |
ENV PYTHONUNBUFFERED=1
|
| 19 |
-
ENV COQUI_TOS_AGREED=1
|
| 20 |
ENV CUDA_VISIBLE_DEVICES=""
|
| 21 |
ENV DEVICE=cpu
|
| 22 |
-
ENV PYTORCH_ENABLE_MPS_FALLBACK=1
|
| 23 |
-
|
| 24 |
-
# ---------- Install Python Dependencies (Memory-optimized order) ----------
|
| 25 |
-
RUN pip install --no-cache-dir --upgrade pip setuptools wheel
|
| 26 |
-
|
| 27 |
-
# Install NumPy with version that satisfies all dependencies
|
| 28 |
-
RUN pip install --no-cache-dir "numpy>=1.23.5,<1.24"
|
| 29 |
|
| 30 |
-
#
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
# Core dependencies - install in smaller batches
|
| 35 |
-
RUN pip install --no-cache-dir scipy==1.11.3
|
| 36 |
-
RUN pip install --no-cache-dir scikit-learn==1.5.0
|
| 37 |
-
RUN pip install --no-cache-dir librosa==0.10.2.post1
|
| 38 |
-
RUN pip install --no-cache-dir Cython==3.0.10
|
| 39 |
-
|
| 40 |
-
# Streamlit and lighter dependencies
|
| 41 |
-
RUN pip install --no-cache-dir \
|
| 42 |
-
streamlit \
|
| 43 |
-
pydub \
|
| 44 |
-
matplotlib \
|
| 45 |
-
requests \
|
| 46 |
-
python-dotenv
|
| 47 |
|
| 48 |
# ---------- Clone SadTalker ----------
|
| 49 |
-
RUN git clone
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
sed -i 's/numpy.*/numpy>=1.23.5,<1.24/' requirements.txt && \
|
| 54 |
-
pip install --no-cache-dir -r requirements.txt
|
| 55 |
-
|
| 56 |
-
# ---------- Install Coqui TTS Stack (in stages to manage memory) ----------
|
| 57 |
-
RUN pip install --no-cache-dir transformers==4.36.2
|
| 58 |
-
RUN pip install --no-cache-dir sentencepiece==0.2.0
|
| 59 |
-
RUN pip install --no-cache-dir accelerate==0.25.0
|
| 60 |
-
RUN pip install --no-cache-dir TTS==0.22.0
|
| 61 |
-
|
| 62 |
-
# Fix any NumPy version conflicts
|
| 63 |
-
RUN pip install --no-cache-dir --force-reinstall "numpy>=1.23.5,<1.24"
|
| 64 |
-
|
| 65 |
-
# ---------- Copy Application Files ----------
|
| 66 |
-
COPY avatar_streamlit.py /app/
|
| 67 |
-
|
| 68 |
-
# ---------- Fix NumPy Compatibility in SadTalker ----------
|
| 69 |
-
RUN cd /app/SadTalker && \
|
| 70 |
-
find . -name "*.py" -type f -exec sed -i 's/\bnp\.float\b/np.float64/g' {} + && \
|
| 71 |
-
sed -i 's/warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)/# warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)/' src/face3d/util/preprocess.py && \
|
| 72 |
-
sed -i 's/trans_params = np.array(\[w0, h0, s, t\[0\], t\[1\]\])/trans_params = np.array([w0, h0, s, float(t[0]), float(t[1])])/' src/face3d/util/preprocess.py && \
|
| 73 |
-
echo "β
SadTalker fixes applied!"
|
| 74 |
-
|
| 75 |
-
# ---------- Preload Coqui Model (Skip on build to save memory) ----------
|
| 76 |
-
# Model will download on first run instead
|
| 77 |
-
# RUN python3 -c "from TTS.api import TTS; TTS('tts_models/multilingual/multi-dataset/xtts_v2', gpu=False)" || true
|
| 78 |
|
| 79 |
-
# ---------- Expose
|
| 80 |
EXPOSE 8501
|
| 81 |
CMD ["streamlit", "run", "avatar_streamlit.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
| 82 |
|
| 83 |
-
# FORCE_REBUILD: 2025-
|
|
|
|
| 1 |
+
# # ================================================================
|
| 2 |
+
# # Avatar App - SadTalker + Coqui TTS (CPU version, HF-ready)
|
| 3 |
+
# # Optimized for Hugging Face Spaces Free Tier
|
| 4 |
+
# # ================================================================
|
| 5 |
+
|
| 6 |
+
# # ---------- Base Image ----------
|
| 7 |
+
# FROM python:3.10-slim
|
| 8 |
+
|
| 9 |
+
# ENV DEBIAN_FRONTEND=noninteractive
|
| 10 |
+
# WORKDIR /app
|
| 11 |
+
|
| 12 |
+
# # ---------- System Dependencies ----------
|
| 13 |
+
# RUN apt-get update && apt-get install -y \
|
| 14 |
+
# ffmpeg git wget curl build-essential \
|
| 15 |
+
# && rm -rf /var/lib/apt/lists/*
|
| 16 |
+
|
| 17 |
+
# # ---------- Environment Variables ----------
|
| 18 |
+
# ENV PYTHONUNBUFFERED=1
|
| 19 |
+
# ENV COQUI_TOS_AGREED=1
|
| 20 |
+
# ENV CUDA_VISIBLE_DEVICES=""
|
| 21 |
+
# ENV DEVICE=cpu
|
| 22 |
+
# ENV PYTORCH_ENABLE_MPS_FALLBACK=1
|
| 23 |
+
|
| 24 |
+
# # ---------- Install Python Dependencies (Memory-optimized order) ----------
|
| 25 |
+
# RUN pip install --no-cache-dir --upgrade pip setuptools wheel
|
| 26 |
+
|
| 27 |
+
# # Install NumPy with version that satisfies all dependencies
|
| 28 |
+
# RUN pip install --no-cache-dir "numpy>=1.23.5,<1.24"
|
| 29 |
+
|
| 30 |
+
# # CPU-only PyTorch (use version compatible with TTS library)
|
| 31 |
+
# # PyTorch 2.6+ breaks TTS due to weights_only=True default, so use 2.5.1
|
| 32 |
+
# RUN pip install --no-cache-dir torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cpu
|
| 33 |
+
|
| 34 |
+
# # Core dependencies - install in smaller batches
|
| 35 |
+
# RUN pip install --no-cache-dir scipy==1.11.3
|
| 36 |
+
# RUN pip install --no-cache-dir scikit-learn==1.5.0
|
| 37 |
+
# RUN pip install --no-cache-dir librosa==0.10.2.post1
|
| 38 |
+
# RUN pip install --no-cache-dir Cython==3.0.10
|
| 39 |
+
|
| 40 |
+
# # Streamlit and lighter dependencies
|
| 41 |
+
# RUN pip install --no-cache-dir \
|
| 42 |
+
# streamlit \
|
| 43 |
+
# pydub \
|
| 44 |
+
# matplotlib \
|
| 45 |
+
# requests \
|
| 46 |
+
# python-dotenv
|
| 47 |
+
|
| 48 |
+
# # ---------- Clone SadTalker ----------
|
| 49 |
+
# RUN git clone --depth 1 https://github.com/OpenTalker/SadTalker.git /app/SadTalker
|
| 50 |
+
|
| 51 |
+
# # Install SadTalker requirements with correct NumPy
|
| 52 |
+
# RUN cd /app/SadTalker && \
|
| 53 |
+
# sed -i 's/numpy.*/numpy>=1.23.5,<1.24/' requirements.txt && \
|
| 54 |
+
# pip install --no-cache-dir -r requirements.txt
|
| 55 |
+
|
| 56 |
+
# # ---------- Install Coqui TTS Stack (in stages to manage memory) ----------
|
| 57 |
+
# RUN pip install --no-cache-dir transformers==4.36.2
|
| 58 |
+
# RUN pip install --no-cache-dir sentencepiece==0.2.0
|
| 59 |
+
# RUN pip install --no-cache-dir accelerate==0.25.0
|
| 60 |
+
# RUN pip install --no-cache-dir TTS==0.22.0
|
| 61 |
+
|
| 62 |
+
# # Fix any NumPy version conflicts
|
| 63 |
+
# RUN pip install --no-cache-dir --force-reinstall "numpy>=1.23.5,<1.24"
|
| 64 |
+
|
| 65 |
+
# # ---------- Copy Application Files ----------
|
| 66 |
+
# COPY avatar_streamlit.py /app/
|
| 67 |
+
|
| 68 |
+
# # ---------- Fix NumPy Compatibility in SadTalker ----------
|
| 69 |
+
# RUN cd /app/SadTalker && \
|
| 70 |
+
# find . -name "*.py" -type f -exec sed -i 's/\bnp\.float\b/np.float64/g' {} + && \
|
| 71 |
+
# sed -i 's/warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)/# warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)/' src/face3d/util/preprocess.py && \
|
| 72 |
+
# sed -i 's/trans_params = np.array(\[w0, h0, s, t\[0\], t\[1\]\])/trans_params = np.array([w0, h0, s, float(t[0]), float(t[1])])/' src/face3d/util/preprocess.py && \
|
| 73 |
+
# echo "β
SadTalker fixes applied!"
|
| 74 |
+
|
| 75 |
+
# # ---------- Preload Coqui Model (Skip on build to save memory) ----------
|
| 76 |
+
# # Model will download on first run instead
|
| 77 |
+
# # RUN python3 -c "from TTS.api import TTS; TTS('tts_models/multilingual/multi-dataset/xtts_v2', gpu=False)" || true
|
| 78 |
+
|
| 79 |
+
# # ---------- Expose Port and Run ----------
|
| 80 |
+
# EXPOSE 8501
|
| 81 |
+
# CMD ["streamlit", "run", "avatar_streamlit.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
| 82 |
+
|
| 83 |
+
# # FORCE_REBUILD: 2025-01-04-v5
|
| 84 |
# ================================================================
|
| 85 |
+
# Avatar App - SadTalker + pyttsx3 (Offline CPU version)
|
| 86 |
+
# Works fully offline on Hugging Face (Free CPU tier)
|
| 87 |
# ================================================================
|
| 88 |
|
|
|
|
| 89 |
FROM python:3.10-slim
|
| 90 |
|
| 91 |
ENV DEBIAN_FRONTEND=noninteractive
|
| 92 |
WORKDIR /app
|
| 93 |
|
| 94 |
+
# ---------- System dependencies ----------
|
| 95 |
RUN apt-get update && apt-get install -y \
|
| 96 |
+
ffmpeg git wget curl build-essential espeak \
|
| 97 |
&& rm -rf /var/lib/apt/lists/*
|
| 98 |
|
| 99 |
+
# ---------- Copy source files ----------
|
| 100 |
+
COPY . /app
|
| 101 |
+
|
| 102 |
+
# ---------- Environment ----------
|
| 103 |
ENV PYTHONUNBUFFERED=1
|
|
|
|
| 104 |
ENV CUDA_VISIBLE_DEVICES=""
|
| 105 |
ENV DEVICE=cpu
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
+
# ---------- Python dependencies ----------
|
| 108 |
+
RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
|
| 109 |
+
&& pip install --no-cache-dir -r requirements.txt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
# ---------- Clone SadTalker ----------
|
| 112 |
+
RUN git clone https://github.com/OpenTalker/SadTalker.git /app/SadTalker \
|
| 113 |
+
&& pip install --no-cache-dir -r /app/SadTalker/requirements.txt \
|
| 114 |
+
&& pip uninstall -y torch torchvision torchaudio \
|
| 115 |
+
&& pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
+
# ---------- Expose and run ----------
|
| 118 |
EXPOSE 8501
|
| 119 |
CMD ["streamlit", "run", "avatar_streamlit.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
| 120 |
|
| 121 |
+
# FORCE_REBUILD: 2025-10-30_21-00
|
avatar_streamlit.py
CHANGED
|
@@ -58,17 +58,48 @@ def ask_ollama(question: str) -> str:
|
|
| 58 |
# return mp3_path
|
| 59 |
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
def synthesize_speech(text, lang="he"):
|
| 62 |
-
"""Offline
|
| 63 |
-
print("π£οΈ Generating speech with
|
| 64 |
-
model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
|
| 65 |
-
tts = TTS(model_name, gpu=False)
|
| 66 |
fd, wav_path = tempfile.mkstemp(suffix=".wav")
|
| 67 |
os.close(fd)
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
return wav_path
|
| 71 |
|
|
|
|
| 72 |
# --- Check SadTalker availability ---
|
| 73 |
SADTALKER_AVAILABLE = True # set False if not installed
|
| 74 |
# ========== SadTalker Integration ==========
|
|
|
|
| 58 |
# return mp3_path
|
| 59 |
|
| 60 |
|
| 61 |
+
# def synthesize_speech(text, lang="he"):
|
| 62 |
+
# """Offline GPU TTS using Coqui XTTS v2."""
|
| 63 |
+
# print("π£οΈ Generating speech with Coqui TTS...")
|
| 64 |
+
# model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
|
| 65 |
+
# tts = TTS(model_name, gpu=False)
|
| 66 |
+
# fd, wav_path = tempfile.mkstemp(suffix=".wav")
|
| 67 |
+
# os.close(fd)
|
| 68 |
+
# tts.tts_to_file(text=text, file_path=wav_path, language=lang)
|
| 69 |
+
# print("β
Saved audio to:", wav_path)
|
| 70 |
+
# return wav_path
|
| 71 |
+
|
| 72 |
+
import pyttsx3
|
| 73 |
+
import tempfile
|
| 74 |
+
import os
|
| 75 |
+
|
| 76 |
def synthesize_speech(text, lang="he"):
|
| 77 |
+
"""Offline speech synthesis using pyttsx3 (no internet)."""
|
| 78 |
+
print("π£οΈ Generating speech locally with pyttsx3...")
|
|
|
|
|
|
|
| 79 |
fd, wav_path = tempfile.mkstemp(suffix=".wav")
|
| 80 |
os.close(fd)
|
| 81 |
+
|
| 82 |
+
engine = pyttsx3.init()
|
| 83 |
+
voices = engine.getProperty("voices")
|
| 84 |
+
|
| 85 |
+
# Try to use Hebrew if available
|
| 86 |
+
selected = False
|
| 87 |
+
for voice in voices:
|
| 88 |
+
if "he" in voice.id.lower() or "hebrew" in voice.name.lower():
|
| 89 |
+
engine.setProperty("voice", voice.id)
|
| 90 |
+
selected = True
|
| 91 |
+
print(f"β
Using Hebrew voice: {voice.name}")
|
| 92 |
+
break
|
| 93 |
+
|
| 94 |
+
if not selected:
|
| 95 |
+
print("β οΈ No Hebrew voice found, using default voice.")
|
| 96 |
+
|
| 97 |
+
engine.save_to_file(text, wav_path)
|
| 98 |
+
engine.runAndWait()
|
| 99 |
+
print("β
Speech saved to:", wav_path)
|
| 100 |
return wav_path
|
| 101 |
|
| 102 |
+
|
| 103 |
# --- Check SadTalker availability ---
|
| 104 |
SADTALKER_AVAILABLE = True # set False if not installed
|
| 105 |
# ========== SadTalker Integration ==========
|
requirements.txt
CHANGED
|
@@ -1,13 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
numpy==1.23.5
|
| 2 |
scipy==1.11.3
|
| 3 |
scikit-learn==1.5.0
|
| 4 |
librosa==0.10.2.post1
|
| 5 |
-
streamlit
|
| 6 |
-
TTS==0.22.0
|
| 7 |
-
transformers==4.36.2
|
| 8 |
-
sentencepiece==0.2.0
|
| 9 |
-
accelerate==0.25.0
|
| 10 |
-
pydub
|
| 11 |
-
matplotlib
|
| 12 |
requests
|
| 13 |
-
python-dotenv
|
|
|
|
| 1 |
+
# numpy==1.23.5
|
| 2 |
+
# scipy==1.11.3
|
| 3 |
+
# scikit-learn==1.5.0
|
| 4 |
+
# librosa==0.10.2.post1
|
| 5 |
+
# streamlit
|
| 6 |
+
# TTS==0.22.0
|
| 7 |
+
# transformers==4.36.2
|
| 8 |
+
# sentencepiece==0.2.0
|
| 9 |
+
# accelerate==0.25.0
|
| 10 |
+
# pydub
|
| 11 |
+
# matplotlib
|
| 12 |
+
# requests
|
| 13 |
+
# python-dotenv
|
| 14 |
+
streamlit
|
| 15 |
+
pyttsx3
|
| 16 |
+
pydub
|
| 17 |
+
matplotlib
|
| 18 |
numpy==1.23.5
|
| 19 |
scipy==1.11.3
|
| 20 |
scikit-learn==1.5.0
|
| 21 |
librosa==0.10.2.post1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
requests
|
| 23 |
+
python-dotenv
|