arifather51 commited on
Commit
bdd7316
·
verified ·
1 Parent(s): 3350704

Upload 6 files

Browse files
Files changed (6) hide show
  1. Dockerfile +21 -0
  2. LICENSE +13 -0
  3. README.md +11 -0
  4. app.py +1 -1
  5. config.py +119 -0
  6. dockerignore +3 -0
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ FROM hadadrjt/pocket-tts:hf-20260121
7
+
8
+ WORKDIR /app
9
+
10
+ COPY . .
11
+
12
+ RUN mkdir build \
13
+ && cd build \
14
+ && cmake \
15
+ -DCMAKE_BUILD_TYPE=Release \
16
+ -DCMAKE_INSTALL_PREFIX=/app \
17
+ ../accelerator \
18
+ && make -j$(nproc) \
19
+ && make install \
20
+ && cd .. \
21
+ && rm -rf accelerator build
LICENSE ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright (c) 2025 Hadad <hadad@linuxmail.org>
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: kyutai/pocket-tts
3
+ short_description: Pocket TTS optimized for Hugging Face Spaces on CPU
4
+ license: apache-2.0
5
+ emoji: ⚡
6
+ colorFrom: gray
7
+ colorTo: yellow
8
+ sdk: docker
9
+ app_port: 7860
10
+ pinned: false
11
+ ---
app.py CHANGED
@@ -170,7 +170,7 @@ with gr.Blocks(css=CSS, fill_height=False, fill_width=True) as app:
170
  label="Prompt",
171
  placeholder="Enter the text you want to convert to speech...",
172
  lines=2,
173
- max_lines=10000,
174
  max_length=MAXIMUM_INPUT_LENGTH,
175
  autoscroll=True
176
  )
 
170
  label="Prompt",
171
  placeholder="Enter the text you want to convert to speech...",
172
  lines=2,
173
+ max_lines=20,
174
  max_length=MAXIMUM_INPUT_LENGTH,
175
  autoscroll=True
176
  )
config.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ import os
7
+
8
+ HF_TOKEN = os.getenv("HF_TOKEN", None)
9
+
10
+ AVAILABLE_VOICES = [
11
+ "alba",
12
+ "marius",
13
+ "javert",
14
+ "jean",
15
+ "fantine",
16
+ "cosette",
17
+ "eponine",
18
+ "azelma"
19
+ ]
20
+
21
+ DEFAULT_VOICE = "alba"
22
+ DEFAULT_MODEL_VARIANT = "b6369a24"
23
+ DEFAULT_TEMPERATURE = 0.7
24
+ DEFAULT_LSD_DECODE_STEPS = 1
25
+ DEFAULT_EOS_THRESHOLD = -4.0
26
+ DEFAULT_NOISE_CLAMP = 0.0
27
+ DEFAULT_FRAMES_AFTER_EOS = 10
28
+
29
+ VOICE_MODE_PRESET = "Preset Voices"
30
+ VOICE_MODE_CLONE = "Voice Cloning"
31
+
32
+ VOICE_STATE_CACHE_MAXIMUM_SIZE = 8
33
+ VOICE_STATE_CACHE_CLEANUP_THRESHOLD = 4
34
+
35
+ BACKGROUND_CLEANUP_INTERVAL = 300
36
+
37
+ MAXIMUM_INPUT_LENGTH = 1000
38
+
39
+ TEMPORARY_FILE_LIFETIME_SECONDS = 7200
40
+
41
+ MAXIMUM_MEMORY_USAGE = 1 * 1024 * 1024 * 1024
42
+ MEMORY_WARNING_THRESHOLD = int(0.7 * MAXIMUM_MEMORY_USAGE)
43
+ MEMORY_CRITICAL_THRESHOLD = int(0.85 * MAXIMUM_MEMORY_USAGE)
44
+ MEMORY_CHECK_INTERVAL = 30
45
+ MEMORY_IDLE_TARGET = int(0.5 * MAXIMUM_MEMORY_USAGE)
46
+
47
+ SUPPORTED_AUDIO_EXTENSIONS = [
48
+ ".wav",
49
+ ".mp3",
50
+ ".flac",
51
+ ".ogg",
52
+ ".m4a",
53
+ ".aac",
54
+ ".wma",
55
+ ".aiff",
56
+ ".aif",
57
+ ".opus",
58
+ ".webm",
59
+ ".mp4",
60
+ ".mkv",
61
+ ".avi",
62
+ ".mov",
63
+ ".3gp"
64
+ ]
65
+
66
+ AUDIO_FORMAT_DISPLAY_NAME_OVERRIDES = {
67
+ "m4a": "M4A/AAC",
68
+ "aif": "AIFF",
69
+ "3gp": "3GP"
70
+ }
71
+
72
+ EXAMPLE_PROMPTS = [
73
+ {
74
+ "text": "The quick brown fox jumps over the lazy dog near the riverbank.",
75
+ "voice": "alba"
76
+ },
77
+ {
78
+ "text": "Welcome to the future of text to speech technology powered by artificial intelligence.",
79
+ "voice": "marius"
80
+ },
81
+ {
82
+ "text": "Technology continues to push the boundaries of what we thought was possible.",
83
+ "voice": "javert"
84
+ },
85
+ {
86
+ "text": "The weather today is absolutely beautiful and perfect for a relaxing walk outside.",
87
+ "voice": "fantine"
88
+ },
89
+ {
90
+ "text": "Science and innovation are transforming how we interact with the world around us.",
91
+ "voice": "jean"
92
+ }
93
+ ]
94
+
95
+ KYUTAI_LOGO_URL = "https://cdn-avatars.huggingface.co/v1/production/uploads/6355a3c1805be5a8f30fea49/8xGdIOlfkopZfhbMitw_k.jpeg"
96
+ POCKET_TTS_LOGO_URL = "https://raw.githubusercontent.com/kyutai-labs/pocket-tts/refs/heads/main/docs/logo.png"
97
+
98
+ SPACE_INFO = "Pocket TTS"
99
+
100
+ HEADER_LINKS = [
101
+ {"icon": "🔊", "text": "Demo", "url": "https://kyutai.org/tts"},
102
+ {"icon": "🐱‍💻", "text": "GitHub", "url": "https://github.com/kyutai-labs/pocket-tts"},
103
+ {"icon": "🤗", "text": "Model Card", "url": "https://huggingface.co/kyutai/pocket-tts"},
104
+ {"icon": "🤗", "text": "Space", "url": "https://huggingface.co/spaces/hadadxyz/pocket-tts-hf-cpu-optimized"},
105
+ {"icon": "📄", "text": "Paper", "url": "https://arxiv.org/abs/2509.06926"},
106
+ {"icon": "📚", "text": "Docs", "url": "https://github.com/kyutai-labs/pocket-tts/tree/main/docs"},
107
+ ]
108
+
109
+ COPYRIGHT_NAME = "Hadad Darajat"
110
+ COPYRIGHT_URL = "https://www.linkedin.com/in/hadadrjt"
111
+
112
+ DESIGN_BY_NAME = "D3vShoaib/pocket-tts"
113
+ DESIGN_BY_URL = f"https://huggingface.co/spaces/{DESIGN_BY_NAME}"
114
+
115
+ ACCELERATOR_SOCKET_PATH = "/app/pocket_tts_accelerator.sock"
116
+ ACCELERATOR_BINARY_PATH = "/app/bin/pocket_tts_accelerator"
117
+ ACCELERATOR_WORKER_THREADS = 1
118
+ ACCELERATOR_MEMORY_POOL_MB = 64
119
+ ACCELERATOR_ENABLED = True
dockerignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Dockerfile
2
+ LICENSE
3
+ README.md