gadhalevy commited on
Commit
93d00d2
Β·
1 Parent(s): a3f0c24

Switch to offline pyttsx3 TTS for Hugging Face CPU Space

Browse files
Files changed (3) hide show
  1. Dockerfile +100 -62
  2. avatar_streamlit.py +37 -6
  3. requirements.txt +18 -8
Dockerfile CHANGED
@@ -1,83 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # ================================================================
2
- # Avatar App - SadTalker + Coqui TTS (CPU version, HF-ready)
3
- # Optimized for Hugging Face Spaces Free Tier
4
  # ================================================================
5
 
6
- # ---------- Base Image ----------
7
  FROM python:3.10-slim
8
 
9
  ENV DEBIAN_FRONTEND=noninteractive
10
  WORKDIR /app
11
 
12
- # ---------- System Dependencies ----------
13
  RUN apt-get update && apt-get install -y \
14
- ffmpeg git wget curl build-essential \
15
  && rm -rf /var/lib/apt/lists/*
16
 
17
- # ---------- Environment Variables ----------
 
 
 
18
  ENV PYTHONUNBUFFERED=1
19
- ENV COQUI_TOS_AGREED=1
20
  ENV CUDA_VISIBLE_DEVICES=""
21
  ENV DEVICE=cpu
22
- ENV PYTORCH_ENABLE_MPS_FALLBACK=1
23
-
24
- # ---------- Install Python Dependencies (Memory-optimized order) ----------
25
- RUN pip install --no-cache-dir --upgrade pip setuptools wheel
26
-
27
- # Install NumPy with version that satisfies all dependencies
28
- RUN pip install --no-cache-dir "numpy>=1.23.5,<1.24"
29
 
30
- # CPU-only PyTorch (use version compatible with TTS library)
31
- # PyTorch 2.6+ breaks TTS due to weights_only=True default, so use 2.5.1
32
- RUN pip install --no-cache-dir torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cpu
33
-
34
- # Core dependencies - install in smaller batches
35
- RUN pip install --no-cache-dir scipy==1.11.3
36
- RUN pip install --no-cache-dir scikit-learn==1.5.0
37
- RUN pip install --no-cache-dir librosa==0.10.2.post1
38
- RUN pip install --no-cache-dir Cython==3.0.10
39
-
40
- # Streamlit and lighter dependencies
41
- RUN pip install --no-cache-dir \
42
- streamlit \
43
- pydub \
44
- matplotlib \
45
- requests \
46
- python-dotenv
47
 
48
  # ---------- Clone SadTalker ----------
49
- RUN git clone --depth 1 https://github.com/OpenTalker/SadTalker.git /app/SadTalker
50
-
51
- # Install SadTalker requirements with correct NumPy
52
- RUN cd /app/SadTalker && \
53
- sed -i 's/numpy.*/numpy>=1.23.5,<1.24/' requirements.txt && \
54
- pip install --no-cache-dir -r requirements.txt
55
-
56
- # ---------- Install Coqui TTS Stack (in stages to manage memory) ----------
57
- RUN pip install --no-cache-dir transformers==4.36.2
58
- RUN pip install --no-cache-dir sentencepiece==0.2.0
59
- RUN pip install --no-cache-dir accelerate==0.25.0
60
- RUN pip install --no-cache-dir TTS==0.22.0
61
-
62
- # Fix any NumPy version conflicts
63
- RUN pip install --no-cache-dir --force-reinstall "numpy>=1.23.5,<1.24"
64
-
65
- # ---------- Copy Application Files ----------
66
- COPY avatar_streamlit.py /app/
67
-
68
- # ---------- Fix NumPy Compatibility in SadTalker ----------
69
- RUN cd /app/SadTalker && \
70
- find . -name "*.py" -type f -exec sed -i 's/\bnp\.float\b/np.float64/g' {} + && \
71
- sed -i 's/warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)/# warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)/' src/face3d/util/preprocess.py && \
72
- sed -i 's/trans_params = np.array(\[w0, h0, s, t\[0\], t\[1\]\])/trans_params = np.array([w0, h0, s, float(t[0]), float(t[1])])/' src/face3d/util/preprocess.py && \
73
- echo "βœ… SadTalker fixes applied!"
74
-
75
- # ---------- Preload Coqui Model (Skip on build to save memory) ----------
76
- # Model will download on first run instead
77
- # RUN python3 -c "from TTS.api import TTS; TTS('tts_models/multilingual/multi-dataset/xtts_v2', gpu=False)" || true
78
 
79
- # ---------- Expose Port and Run ----------
80
  EXPOSE 8501
81
  CMD ["streamlit", "run", "avatar_streamlit.py", "--server.port=8501", "--server.address=0.0.0.0"]
82
 
83
- # FORCE_REBUILD: 2025-01-04-v5
 
1
+ # # ================================================================
2
+ # # Avatar App - SadTalker + Coqui TTS (CPU version, HF-ready)
3
+ # # Optimized for Hugging Face Spaces Free Tier
4
+ # # ================================================================
5
+
6
+ # # ---------- Base Image ----------
7
+ # FROM python:3.10-slim
8
+
9
+ # ENV DEBIAN_FRONTEND=noninteractive
10
+ # WORKDIR /app
11
+
12
+ # # ---------- System Dependencies ----------
13
+ # RUN apt-get update && apt-get install -y \
14
+ # ffmpeg git wget curl build-essential \
15
+ # && rm -rf /var/lib/apt/lists/*
16
+
17
+ # # ---------- Environment Variables ----------
18
+ # ENV PYTHONUNBUFFERED=1
19
+ # ENV COQUI_TOS_AGREED=1
20
+ # ENV CUDA_VISIBLE_DEVICES=""
21
+ # ENV DEVICE=cpu
22
+ # ENV PYTORCH_ENABLE_MPS_FALLBACK=1
23
+
24
+ # # ---------- Install Python Dependencies (Memory-optimized order) ----------
25
+ # RUN pip install --no-cache-dir --upgrade pip setuptools wheel
26
+
27
+ # # Install NumPy with version that satisfies all dependencies
28
+ # RUN pip install --no-cache-dir "numpy>=1.23.5,<1.24"
29
+
30
+ # # CPU-only PyTorch (use version compatible with TTS library)
31
+ # # PyTorch 2.6+ breaks TTS due to weights_only=True default, so use 2.5.1
32
+ # RUN pip install --no-cache-dir torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cpu
33
+
34
+ # # Core dependencies - install in smaller batches
35
+ # RUN pip install --no-cache-dir scipy==1.11.3
36
+ # RUN pip install --no-cache-dir scikit-learn==1.5.0
37
+ # RUN pip install --no-cache-dir librosa==0.10.2.post1
38
+ # RUN pip install --no-cache-dir Cython==3.0.10
39
+
40
+ # # Streamlit and lighter dependencies
41
+ # RUN pip install --no-cache-dir \
42
+ # streamlit \
43
+ # pydub \
44
+ # matplotlib \
45
+ # requests \
46
+ # python-dotenv
47
+
48
+ # # ---------- Clone SadTalker ----------
49
+ # RUN git clone --depth 1 https://github.com/OpenTalker/SadTalker.git /app/SadTalker
50
+
51
+ # # Install SadTalker requirements with correct NumPy
52
+ # RUN cd /app/SadTalker && \
53
+ # sed -i 's/numpy.*/numpy>=1.23.5,<1.24/' requirements.txt && \
54
+ # pip install --no-cache-dir -r requirements.txt
55
+
56
+ # # ---------- Install Coqui TTS Stack (in stages to manage memory) ----------
57
+ # RUN pip install --no-cache-dir transformers==4.36.2
58
+ # RUN pip install --no-cache-dir sentencepiece==0.2.0
59
+ # RUN pip install --no-cache-dir accelerate==0.25.0
60
+ # RUN pip install --no-cache-dir TTS==0.22.0
61
+
62
+ # # Fix any NumPy version conflicts
63
+ # RUN pip install --no-cache-dir --force-reinstall "numpy>=1.23.5,<1.24"
64
+
65
+ # # ---------- Copy Application Files ----------
66
+ # COPY avatar_streamlit.py /app/
67
+
68
+ # # ---------- Fix NumPy Compatibility in SadTalker ----------
69
+ # RUN cd /app/SadTalker && \
70
+ # find . -name "*.py" -type f -exec sed -i 's/\bnp\.float\b/np.float64/g' {} + && \
71
+ # sed -i 's/warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)/# warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)/' src/face3d/util/preprocess.py && \
72
+ # sed -i 's/trans_params = np.array(\[w0, h0, s, t\[0\], t\[1\]\])/trans_params = np.array([w0, h0, s, float(t[0]), float(t[1])])/' src/face3d/util/preprocess.py && \
73
+ # echo "βœ… SadTalker fixes applied!"
74
+
75
+ # # ---------- Preload Coqui Model (Skip on build to save memory) ----------
76
+ # # Model will download on first run instead
77
+ # # RUN python3 -c "from TTS.api import TTS; TTS('tts_models/multilingual/multi-dataset/xtts_v2', gpu=False)" || true
78
+
79
+ # # ---------- Expose Port and Run ----------
80
+ # EXPOSE 8501
81
+ # CMD ["streamlit", "run", "avatar_streamlit.py", "--server.port=8501", "--server.address=0.0.0.0"]
82
+
83
+ # # FORCE_REBUILD: 2025-01-04-v5
84
  # ================================================================
85
+ # Avatar App - SadTalker + pyttsx3 (Offline CPU version)
86
+ # Works fully offline on Hugging Face (Free CPU tier)
87
  # ================================================================
88
 
 
89
  FROM python:3.10-slim
90
 
91
  ENV DEBIAN_FRONTEND=noninteractive
92
  WORKDIR /app
93
 
94
+ # ---------- System dependencies ----------
95
  RUN apt-get update && apt-get install -y \
96
+ ffmpeg git wget curl build-essential espeak \
97
  && rm -rf /var/lib/apt/lists/*
98
 
99
+ # ---------- Copy source files ----------
100
+ COPY . /app
101
+
102
+ # ---------- Environment ----------
103
  ENV PYTHONUNBUFFERED=1
 
104
  ENV CUDA_VISIBLE_DEVICES=""
105
  ENV DEVICE=cpu
 
 
 
 
 
 
 
106
 
107
+ # ---------- Python dependencies ----------
108
+ RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
109
+ && pip install --no-cache-dir -r requirements.txt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
  # ---------- Clone SadTalker ----------
112
+ RUN git clone https://github.com/OpenTalker/SadTalker.git /app/SadTalker \
113
+ && pip install --no-cache-dir -r /app/SadTalker/requirements.txt \
114
+ && pip uninstall -y torch torchvision torchaudio \
115
+ && pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
+ # ---------- Expose and run ----------
118
  EXPOSE 8501
119
  CMD ["streamlit", "run", "avatar_streamlit.py", "--server.port=8501", "--server.address=0.0.0.0"]
120
 
121
+ # FORCE_REBUILD: 2025-10-30_21-00
avatar_streamlit.py CHANGED
@@ -58,17 +58,48 @@ def ask_ollama(question: str) -> str:
58
  # return mp3_path
59
 
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  def synthesize_speech(text, lang="he"):
62
- """Offline GPU TTS using Coqui XTTS v2."""
63
- print("πŸ—£οΈ Generating speech with Coqui TTS...")
64
- model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
65
- tts = TTS(model_name, gpu=False)
66
  fd, wav_path = tempfile.mkstemp(suffix=".wav")
67
  os.close(fd)
68
- tts.tts_to_file(text=text, file_path=wav_path, language=lang)
69
- print("βœ… Saved audio to:", wav_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  return wav_path
71
 
 
72
  # --- Check SadTalker availability ---
73
  SADTALKER_AVAILABLE = True # set False if not installed
74
  # ========== SadTalker Integration ==========
 
58
  # return mp3_path
59
 
60
 
61
+ # def synthesize_speech(text, lang="he"):
62
+ # """Offline GPU TTS using Coqui XTTS v2."""
63
+ # print("πŸ—£οΈ Generating speech with Coqui TTS...")
64
+ # model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
65
+ # tts = TTS(model_name, gpu=False)
66
+ # fd, wav_path = tempfile.mkstemp(suffix=".wav")
67
+ # os.close(fd)
68
+ # tts.tts_to_file(text=text, file_path=wav_path, language=lang)
69
+ # print("βœ… Saved audio to:", wav_path)
70
+ # return wav_path
71
+
72
+ import pyttsx3
73
+ import tempfile
74
+ import os
75
+
76
  def synthesize_speech(text, lang="he"):
77
+ """Offline speech synthesis using pyttsx3 (no internet)."""
78
+ print("πŸ—£οΈ Generating speech locally with pyttsx3...")
 
 
79
  fd, wav_path = tempfile.mkstemp(suffix=".wav")
80
  os.close(fd)
81
+
82
+ engine = pyttsx3.init()
83
+ voices = engine.getProperty("voices")
84
+
85
+ # Try to use Hebrew if available
86
+ selected = False
87
+ for voice in voices:
88
+ if "he" in voice.id.lower() or "hebrew" in voice.name.lower():
89
+ engine.setProperty("voice", voice.id)
90
+ selected = True
91
+ print(f"βœ… Using Hebrew voice: {voice.name}")
92
+ break
93
+
94
+ if not selected:
95
+ print("⚠️ No Hebrew voice found, using default voice.")
96
+
97
+ engine.save_to_file(text, wav_path)
98
+ engine.runAndWait()
99
+ print("βœ… Speech saved to:", wav_path)
100
  return wav_path
101
 
102
+
103
  # --- Check SadTalker availability ---
104
  SADTALKER_AVAILABLE = True # set False if not installed
105
  # ========== SadTalker Integration ==========
requirements.txt CHANGED
@@ -1,13 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  numpy==1.23.5
2
  scipy==1.11.3
3
  scikit-learn==1.5.0
4
  librosa==0.10.2.post1
5
- streamlit
6
- TTS==0.22.0
7
- transformers==4.36.2
8
- sentencepiece==0.2.0
9
- accelerate==0.25.0
10
- pydub
11
- matplotlib
12
  requests
13
- python-dotenv
 
1
+ # numpy==1.23.5
2
+ # scipy==1.11.3
3
+ # scikit-learn==1.5.0
4
+ # librosa==0.10.2.post1
5
+ # streamlit
6
+ # TTS==0.22.0
7
+ # transformers==4.36.2
8
+ # sentencepiece==0.2.0
9
+ # accelerate==0.25.0
10
+ # pydub
11
+ # matplotlib
12
+ # requests
13
+ # python-dotenv
14
+ streamlit
15
+ pyttsx3
16
+ pydub
17
+ matplotlib
18
  numpy==1.23.5
19
  scipy==1.11.3
20
  scikit-learn==1.5.0
21
  librosa==0.10.2.post1
 
 
 
 
 
 
 
22
  requests
23
+ python-dotenv