sameerbanchhor commited on
Commit
d2cdbdb
·
verified ·
1 Parent(s): ea0c6d8

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +12 -8
  2. app.py +266 -0
  3. requirements.txt +16 -0
README.md CHANGED
@@ -1,10 +1,14 @@
1
  ---
2
- license: mit
3
- title: ChattisVani
4
- sdk: docker
5
- emoji: 🏆
6
  colorFrom: red
7
- colorTo: gray
8
- pinned: true
9
- short_description: a cg app
10
- ---
 
 
 
 
 
 
 
1
  ---
2
+ title: Chattisgarh Speech
3
+ emoji: 🐢
 
 
4
  colorFrom: red
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 5.34.2
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ short_description: a chattisgarh tts model demp
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import numpy as np
4
+ import soundfile as sf
5
+ import os
6
+ import tempfile
7
+ import logging
8
+ from pathlib import Path
9
+
10
+ # Set up logging
11
+ logging.basicConfig(level=logging.INFO)
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Global variable to store TTS model
15
+ tts_model = None
16
+ model_loaded = False
17
+
18
+ def load_tts_model():
19
+ """Load the TTS model with multiple fallback methods"""
20
+ global tts_model, model_loaded
21
+
22
+ if model_loaded:
23
+ return True
24
+
25
+ try:
26
+ # Method 1: Try loading from Hugging Face Hub
27
+ try:
28
+ from TTS.api import TTS
29
+ from huggingface_hub import hf_hub_download
30
+
31
+ model_repo = "SYSPIN/vits_Chhattisgarhi_Female"
32
+ logger.info(f"Attempting to load model from {model_repo}...")
33
+
34
+ # Download model files from HF
35
+ model_path = hf_hub_download(
36
+ repo_id=model_repo,
37
+ filename="best_model.pth",
38
+ cache_dir="./model_cache"
39
+ )
40
+ config_path = hf_hub_download(
41
+ repo_id=model_repo,
42
+ filename="config.json",
43
+ cache_dir="./model_cache"
44
+ )
45
+
46
+ # Initialize TTS with downloaded files
47
+ tts_model = TTS(model_path=model_path, config_path=config_path)
48
+ model_loaded = True
49
+ logger.info("✅ Model loaded successfully from Hugging Face Hub!")
50
+ return True
51
+
52
+ except ImportError:
53
+ logger.warning("huggingface_hub not available, trying local files...")
54
+ except Exception as e:
55
+ logger.warning(f"Failed to load from HF Hub: {e}")
56
+
57
+ # Method 2: Try loading from local files (if uploaded to space or cloned)
58
+ local_paths = [
59
+ ("./best_model.pth", "./config.json"), # Current directory
60
+ ("./model/best_model.pth", "./model/config.json"), # Model subdirectory
61
+ ("../best_model.pth", "../config.json"), # Parent directory
62
+ ]
63
+
64
+ for model_path, config_path in local_paths:
65
+ if os.path.exists(model_path) and os.path.exists(config_path):
66
+ logger.info(f"Found local model files at {model_path}")
67
+ from TTS.api import TTS
68
+ tts_model = TTS(model_path=model_path, config_path=config_path)
69
+ model_loaded = True
70
+ logger.info("✅ Model loaded successfully from local files!")
71
+ return True
72
+
73
+ # Method 3: Try to use a generic VITS model as fallback
74
+ logger.warning("Custom model not found, trying generic VITS model...")
75
+ try:
76
+ from TTS.api import TTS
77
+ # Use a generic multilingual model as fallback
78
+ tts_model = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
79
+ model_loaded = True
80
+ logger.info("✅ Loaded fallback multilingual model")
81
+ return True
82
+ except Exception as e:
83
+ logger.error(f"Failed to load fallback model: {e}")
84
+
85
+ return False
86
+
87
+ except Exception as e:
88
+ logger.error(f"Critical error loading model: {str(e)}")
89
+ return False
90
+
91
+ def generate_speech(text, speed=1.0):
92
+ """Generate speech from text"""
93
+ global tts_model, model_loaded
94
+
95
+ if not text.strip():
96
+ return None, "⚠️ Please enter some text to synthesize."
97
+
98
+ # Try to load model if not already loaded
99
+ if not model_loaded:
100
+ success = load_tts_model()
101
+ if not success:
102
+ return None, "❌ Error: Could not load any TTS model. Please check the setup."
103
+
104
+ try:
105
+ logger.info(f"Synthesizing: {text[:50]}...")
106
+
107
+ # Create temporary file
108
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
109
+ output_path = tmp_file.name
110
+
111
+ # Generate speech - handle different TTS API versions
112
+ try:
113
+ # Method for custom models
114
+ tts_model.tts_to_file(
115
+ text=text,
116
+ file_path=output_path,
117
+ speed=speed
118
+ )
119
+ except TypeError:
120
+ # Fallback for models that don't support speed parameter
121
+ try:
122
+ tts_model.tts_to_file(text=text, file_path=output_path)
123
+ except Exception:
124
+ # For XTTS and other models that need different parameters
125
+ tts_model.tts_to_file(
126
+ text=text,
127
+ file_path=output_path,
128
+ speaker_wav=None, # Use default speaker
129
+ language="hi" # Hindi as closest language
130
+ )
131
+
132
+ # Check if file was created and has content
133
+ if not os.path.exists(output_path) or os.path.getsize(output_path) == 0:
134
+ return None, "❌ Error: Audio file was not generated properly."
135
+
136
+ # Read audio data
137
+ audio_data, sample_rate = sf.read(output_path)
138
+
139
+ # Clean up
140
+ os.unlink(output_path)
141
+
142
+ if len(audio_data) == 0:
143
+ return None, "❌ Error: Generated audio is empty."
144
+
145
+ logger.info("✅ Speech generated successfully!")
146
+ return (sample_rate, audio_data), "✅ Speech generated successfully!"
147
+
148
+ except Exception as e:
149
+ error_msg = f"❌ Error during synthesis: {str(e)}"
150
+ logger.error(error_msg)
151
+ return None, error_msg
152
+
153
+ # Sample texts
154
+ examples = [
155
+ ["नमस्कार, का हाल बा?", 1.0],
156
+ ["आज मोसम बहुत बढ़िया हे।", 1.0],
157
+ ["तुमन कइसे हव?", 0.9],
158
+ ["धन्यवाद।", 1.1],
159
+ ["Hello, how are you?", 1.0] # English fallback for testing
160
+ ]
161
+
162
+ # Create Gradio interface
163
+ with gr.Blocks(
164
+ title="Chhattisgarhi TTS",
165
+ theme=gr.themes.Default(primary_hue="blue")
166
+ ) as demo:
167
+
168
+ gr.HTML("""
169
+ <div style="text-align: center; margin: 20px 0;">
170
+ <h1>🗣️ Chhattisgarhi Text-to-Speech</h1>
171
+ <p style="color: #666;">Generate natural Chhattisgarhi speech with AI</p>
172
+ <p style="color: #888; font-size: 0.9em;">Powered by SySpin & Coqui TTS</p>
173
+ </div>
174
+ """)
175
+
176
+ with gr.Row():
177
+ with gr.Column(scale=2):
178
+ text_input = gr.Textbox(
179
+ label="📝 Enter Text",
180
+ placeholder="छत्तीसगढ़ी में अपना टेक्स्ट लिखें... (Enter Chhattisgarhi text here)",
181
+ lines=3,
182
+ max_lines=6
183
+ )
184
+
185
+ speed_slider = gr.Slider(
186
+ minimum=0.5,
187
+ maximum=2.0,
188
+ value=1.0,
189
+ step=0.1,
190
+ label="🎚️ Speech Speed",
191
+ info="Adjust speaking rate (may not work with all models)"
192
+ )
193
+
194
+ generate_btn = gr.Button(
195
+ "🎵 Generate Speech",
196
+ variant="primary",
197
+ size="lg"
198
+ )
199
+
200
+ with gr.Column(scale=1):
201
+ gr.Markdown("### Quick Examples")
202
+ for text, _ in examples:
203
+ btn = gr.Button(text, size="sm")
204
+ btn.click(lambda x=text: x, outputs=text_input)
205
+
206
+ with gr.Row():
207
+ audio_output = gr.Audio(
208
+ label="🔊 Generated Speech",
209
+ type="numpy"
210
+ )
211
+
212
+ status_output = gr.Textbox(
213
+ label="📊 Status",
214
+ interactive=False,
215
+ max_lines=3
216
+ )
217
+
218
+ gr.Examples(
219
+ examples=examples,
220
+ inputs=[text_input, speed_slider],
221
+ outputs=[audio_output, status_output],
222
+ fn=generate_speech,
223
+ cache_examples=False
224
+ )
225
+
226
+ with gr.Accordion("ℹ️ Model Information", open=False):
227
+ gr.Markdown("""
228
+ ### About This Model
229
+ - **Language**: Chhattisgarhi (छत्तीसगढ़ी)
230
+ - **Voice Type**: Female
231
+ - **Training**: SySpin dataset
232
+ - **Engine**: Coqui TTS
233
+
234
+ ### Model Loading Strategy
235
+ 1. First tries to load the custom Chhattisgarhi model from Hugging Face
236
+ 2. Falls back to local files if available
237
+ 3. Uses a multilingual model as last resort
238
+
239
+ ### How to Use
240
+ 1. Enter your text in Chhattisgarhi (Devanagari script preferred)
241
+ 2. Adjust speech speed if needed (may not work with all models)
242
+ 3. Click "Generate Speech"
243
+ 4. Listen to the generated audio
244
+
245
+ ### Tips
246
+ - Use proper punctuation for natural pauses
247
+ - Shorter sentences often work better
248
+ - If the custom model fails, a fallback model will be used
249
+ """)
250
+
251
+ # Event binding
252
+ generate_btn.click(
253
+ fn=generate_speech,
254
+ inputs=[text_input, speed_slider],
255
+ outputs=[audio_output, status_output]
256
+ )
257
+
258
+ # Load model on startup
259
+ demo.load(
260
+ fn=lambda: "🔄 Loading TTS model..." if not load_tts_model() else "✅ Model ready!",
261
+ outputs=status_output
262
+ )
263
+
264
+ # Launch the app
265
+ if __name__ == "__main__":
266
+ demo.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TTS==0.22.0
2
+ torch>=1.9.0
3
+ torchaudio>=0.9.0
4
+ numpy>=1.21.0
5
+ soundfile>=0.12.1
6
+ librosa>=0.9.2
7
+ scipy>=1.7.0
8
+ pyyaml>=6.0
9
+ matplotlib>=3.5.0
10
+ Pillow>=8.3.0
11
+ numba>=0.56.0
12
+ inflect>=5.6.0
13
+ psutil>=5.8.0
14
+ pynndescent>=0.5.7
15
+ umap-learn>=0.5.3
16
+ huggingface_hub>=0.16.0