Spaces:
Running
Running
Upload 4 files
Browse files- app.py +8 -26
- config.json +4 -2
app.py
CHANGED
|
@@ -27,13 +27,13 @@ DEFAULT_CONFIG = {
|
|
| 27 |
'model': 'google/gemma-3-27b-it',
|
| 28 |
'api_key_var': 'API_KEY',
|
| 29 |
'theme': 'Default',
|
| 30 |
-
'grounding_urls': [],
|
| 31 |
'enable_dynamic_urls': True,
|
| 32 |
'enable_file_upload': True,
|
| 33 |
'examples': ['Ciao! Come stai oggi?', 'Mi piace giocare a calcio. E tu?', 'Cosa mangi di solito a colazione?', 'A che ora ti svegli la mattina?', 'Qual è il tuo sport preferito?'],
|
| 34 |
'language': 'Italian',
|
| 35 |
'enable_tts': True,
|
| 36 |
-
'tts_model': '
|
| 37 |
'tts_voice': 'default',
|
| 38 |
'locked': False
|
| 39 |
}
|
|
@@ -544,32 +544,14 @@ def generate_tts(text: str, max_retries: int = 2) -> Tuple[Optional[Tuple[int, n
|
|
| 544 |
# Limit text length for TTS
|
| 545 |
text = text[:500]
|
| 546 |
|
| 547 |
-
#
|
| 548 |
-
|
| 549 |
-
if TTS_MODEL == "microsoft/speecht5_tts":
|
| 550 |
-
# For SpeechT5, we need speaker embeddings
|
| 551 |
-
# Using a predefined speaker ID (7306 is a clear female voice)
|
| 552 |
-
speaker_id = {
|
| 553 |
-
"default": 7306,
|
| 554 |
-
"female": 7306,
|
| 555 |
-
"male": 5105,
|
| 556 |
-
"neutral": 6678
|
| 557 |
-
}.get(TTS_VOICE, 7306)
|
| 558 |
-
|
| 559 |
-
# Note: In production, you'd load actual embeddings from the dataset
|
| 560 |
-
# For now, we'll let the API handle default voice
|
| 561 |
-
speaker_embeddings = {"speaker_id": speaker_id}
|
| 562 |
|
| 563 |
for attempt in range(max_retries):
|
| 564 |
try:
|
| 565 |
headers = {"Authorization": f"Bearer {hf_token}"}
|
| 566 |
api_url = f"https://api-inference.huggingface.co/models/{TTS_MODEL}"
|
| 567 |
|
| 568 |
-
# Prepare payload
|
| 569 |
-
payload = {"inputs": text}
|
| 570 |
-
if speaker_embeddings and TTS_MODEL == "microsoft/speecht5_tts":
|
| 571 |
-
# For models that support speaker embeddings
|
| 572 |
-
payload["parameters"] = speaker_embeddings
|
| 573 |
|
| 574 |
response = requests.post(
|
| 575 |
api_url,
|
|
@@ -1012,12 +994,12 @@ def create_interface():
|
|
| 1012 |
edit_tts_model = gr.Dropdown(
|
| 1013 |
label="TTS Model",
|
| 1014 |
choices=[
|
| 1015 |
-
"
|
| 1016 |
"facebook/mms-tts-eng",
|
| 1017 |
-
"
|
| 1018 |
-
"
|
| 1019 |
],
|
| 1020 |
-
value=config.get('tts_model', '
|
| 1021 |
allow_custom_value=True
|
| 1022 |
)
|
| 1023 |
edit_tts_voice = gr.Dropdown(
|
|
|
|
| 27 |
'model': 'google/gemma-3-27b-it',
|
| 28 |
'api_key_var': 'API_KEY',
|
| 29 |
'theme': 'Default',
|
| 30 |
+
'grounding_urls': ["https://www.pnac.org/wp-content/uploads/Italian-Study-Guide.pdf"],
|
| 31 |
'enable_dynamic_urls': True,
|
| 32 |
'enable_file_upload': True,
|
| 33 |
'examples': ['Ciao! Come stai oggi?', 'Mi piace giocare a calcio. E tu?', 'Cosa mangi di solito a colazione?', 'A che ora ti svegli la mattina?', 'Qual è il tuo sport preferito?'],
|
| 34 |
'language': 'Italian',
|
| 35 |
'enable_tts': True,
|
| 36 |
+
'tts_model': 'facebook/fastspeech2-en-ljspeech',
|
| 37 |
'tts_voice': 'default',
|
| 38 |
'locked': False
|
| 39 |
}
|
|
|
|
| 544 |
# Limit text length for TTS
|
| 545 |
text = text[:500]
|
| 546 |
|
| 547 |
+
# Prepare payload - most models just need the text
|
| 548 |
+
payload = {"inputs": text}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 549 |
|
| 550 |
for attempt in range(max_retries):
|
| 551 |
try:
|
| 552 |
headers = {"Authorization": f"Bearer {hf_token}"}
|
| 553 |
api_url = f"https://api-inference.huggingface.co/models/{TTS_MODEL}"
|
| 554 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 555 |
|
| 556 |
response = requests.post(
|
| 557 |
api_url,
|
|
|
|
| 994 |
edit_tts_model = gr.Dropdown(
|
| 995 |
label="TTS Model",
|
| 996 |
choices=[
|
| 997 |
+
"facebook/fastspeech2-en-ljspeech",
|
| 998 |
"facebook/mms-tts-eng",
|
| 999 |
+
"espnet/kan-bayashi_ljspeech_vits",
|
| 1000 |
+
"microsoft/speecht5_tts"
|
| 1001 |
],
|
| 1002 |
+
value=config.get('tts_model', 'facebook/fastspeech2-en-ljspeech'),
|
| 1003 |
allow_custom_value=True
|
| 1004 |
)
|
| 1005 |
edit_tts_voice = gr.Dropdown(
|
config.json
CHANGED
|
@@ -15,11 +15,13 @@
|
|
| 15 |
"A che ora ti svegli la mattina?",
|
| 16 |
"Qual \u00e8 il tuo sport preferito?"
|
| 17 |
],
|
| 18 |
-
"grounding_urls": [
|
|
|
|
|
|
|
| 19 |
"enable_dynamic_urls": true,
|
| 20 |
"enable_file_upload": true,
|
| 21 |
"enable_tts": true,
|
| 22 |
-
"tts_model": "
|
| 23 |
"tts_voice": "default",
|
| 24 |
"theme": "Default"
|
| 25 |
}
|
|
|
|
| 15 |
"A che ora ti svegli la mattina?",
|
| 16 |
"Qual \u00e8 il tuo sport preferito?"
|
| 17 |
],
|
| 18 |
+
"grounding_urls": [
|
| 19 |
+
"https://www.pnac.org/wp-content/uploads/Italian-Study-Guide.pdf"
|
| 20 |
+
],
|
| 21 |
"enable_dynamic_urls": true,
|
| 22 |
"enable_file_upload": true,
|
| 23 |
"enable_tts": true,
|
| 24 |
+
"tts_model": "facebook/fastspeech2-en-ljspeech",
|
| 25 |
"tts_voice": "default",
|
| 26 |
"theme": "Default"
|
| 27 |
}
|