abhinav0231 commited on
Commit
efbc3d2
Β·
verified Β·
1 Parent(s): acba535

Update tts.py

Browse files
Files changed (1) hide show
  1. tts.py +127 -127
tts.py CHANGED
@@ -1,128 +1,128 @@
1
- import os
2
- import base64
3
- import json
4
- import streamlit as st
5
- from sarvamai import SarvamAI
6
- from typing import List, Dict, Optional
7
-
8
-
9
- client = None
10
- try:
11
- api_key = os.environ("SARVAM_API_KEY")
12
- if api_key:
13
- client = SarvamAI(api_subscription_key=api_key)
14
- print("βœ… Sarvam AI client for TTS (Bulbul) initialized successfully.")
15
- else:
16
- print("⚠️ Warning: SARVAM_API_KEY not found.")
17
- except Exception as e:
18
- print(f"❌ Error initializing Sarvam AI client: {e}")
19
-
20
- # --- Language Mapping ---
21
- LANGUAGE_CODE_MAP = {
22
- "hindi": "hi-IN", "bengali": "bn-IN", "tamil": "ta-IN", "telugu": "te-IN",
23
- "gujarati": "gu-IN", "kannada": "kn-IN", "malayalam": "ml-IN", "marathi": "mr-IN",
24
- "punjabi": "pa-IN", "odia": "od-IN", "english": "en-IN",
25
- }
26
-
27
- def get_language_code(language_name: str) -> Optional[str]:
28
- return LANGUAGE_CODE_MAP.get(language_name.lower())
29
-
30
- def generate_audio_from_text(
31
- text: str,
32
- language_name: str,
33
- gender: str,
34
- output_file_path: str
35
- ) -> bool:
36
- """
37
- Generates an audio file from a text string using the Sarvam "Bulbul" TTS API
38
- with a specified gender and pace for the voice.
39
- """
40
- if not client: return False
41
- lang_code = get_language_code(language_name)
42
- if not lang_code:
43
- print(f"❌ Language '{language_name}' is not supported. Skipping.")
44
- return False
45
-
46
- if gender.lower() == "male":
47
- speaker_name = "abhilash"
48
- pace_value = 1.0
49
- else:
50
- speaker_name = "anushka"
51
- pace_value = 0.9
52
-
53
- print(f"--- 🎀 Generating audio for chunk: '{text[:50]}...' in {language_name} (Voice: {speaker_name}, Pace: {pace_value}) ---")
54
-
55
- try:
56
- response = client.text_to_speech.convert(
57
- text=text,
58
- model="bulbul:v2",
59
- target_language_code=lang_code,
60
- speaker=speaker_name,
61
- pace=pace_value, # Use the selected pace
62
- speech_sample_rate=22050,
63
- enable_preprocessing=True
64
- )
65
-
66
- combined_audio_b64 = "".join(response.audios)
67
- audio_data = base64.b64decode(combined_audio_b64)
68
-
69
- with open(output_file_path, "wb") as f:
70
- f.write(audio_data)
71
-
72
- print(f"βœ… Audio saved to {output_file_path}")
73
- return True
74
-
75
- except Exception as e:
76
- print(f"❌ An error occurred during the Sarvam TTS API call: {e}")
77
- return False
78
-
79
- def generate_all_audio_from_file(
80
- json_path: str,
81
- target_language: str,
82
- gender: str,
83
- output_dir: str = "generated_audio",
84
- output_json_path: str = "multimedia_data_final.json"
85
- ) -> List[Dict[str, str]]:
86
- """
87
- Reads data from a JSON, generates audio with a specific gender, and saves a final JSON.
88
- """
89
- try:
90
- with open(json_path, 'r', encoding='utf-8') as f:
91
- multimedia_data = json.load(f)
92
- except (FileNotFoundError, json.JSONDecodeError) as e:
93
- print(f"❌ Error reading or parsing {json_path}: {e}")
94
- return []
95
-
96
- os.makedirs(output_dir, exist_ok=True)
97
-
98
- for i, item in enumerate(multimedia_data):
99
- audio_text = item.get("audio_text")
100
- if not audio_text:
101
- item["audio_path"] = None
102
- continue
103
-
104
- file_path = os.path.join(output_dir, f"audio_{i:03d}.mp3")
105
- success = generate_audio_from_text(audio_text, target_language, gender, file_path)
106
- item["audio_path"] = file_path if success else None
107
-
108
- with open(output_json_path, 'w', encoding='utf-8') as f:
109
- json.dump(multimedia_data, f, indent=2, ensure_ascii=False)
110
- print(f"\n--- βœ… Audio generation finished. Final data saved to {output_json_path}. ---")
111
-
112
- return multimedia_data
113
-
114
- # Example Usage
115
- # if __name__ == '__main__':
116
- # json_input_file = "multimedia_data_with_images.json"
117
- # if not os.path.exists(json_input_file):
118
- # print(f"❌ Error: Input file '{json_input_file}' not found.")
119
- # print("Please run image_generation.py first to generate it.")
120
- # else:
121
- # target_language_for_story = "English"
122
- # target_gender_for_story = "male" # Change to "male" to test the other voice
123
-
124
- # generate_all_audio_from_file(
125
- # json_path=json_input_file,
126
- # target_language=target_language_for_story,
127
- # gender=target_gender_for_story
128
  # )
 
1
+ import os
2
+ import base64
3
+ import json
4
+ import streamlit as st
5
+ from sarvamai import SarvamAI
6
+ from typing import List, Dict, Optional
7
+
8
+
9
+ client = None
10
+ try:
11
+ api_key = st.secrets.get("SARVAM_API_KEY") or os.getenv("SARVAM_API_KEY")
12
+ if api_key:
13
+ client = SarvamAI(api_subscription_key=api_key)
14
+ print("βœ… Sarvam AI client for TTS (Bulbul) initialized successfully.")
15
+ else:
16
+ print("⚠️ Warning: SARVAM_API_KEY not found.")
17
+ except Exception as e:
18
+ print(f"❌ Error initializing Sarvam AI client: {e}")
19
+
20
+ # --- Language Mapping ---
21
+ LANGUAGE_CODE_MAP = {
22
+ "hindi": "hi-IN", "bengali": "bn-IN", "tamil": "ta-IN", "telugu": "te-IN",
23
+ "gujarati": "gu-IN", "kannada": "kn-IN", "malayalam": "ml-IN", "marathi": "mr-IN",
24
+ "punjabi": "pa-IN", "odia": "od-IN", "english": "en-IN",
25
+ }
26
+
27
+ def get_language_code(language_name: str) -> Optional[str]:
28
+ return LANGUAGE_CODE_MAP.get(language_name.lower())
29
+
30
+ def generate_audio_from_text(
31
+ text: str,
32
+ language_name: str,
33
+ gender: str,
34
+ output_file_path: str
35
+ ) -> bool:
36
+ """
37
+ Generates an audio file from a text string using the Sarvam "Bulbul" TTS API
38
+ with a specified gender and pace for the voice.
39
+ """
40
+ if not client: return False
41
+ lang_code = get_language_code(language_name)
42
+ if not lang_code:
43
+ print(f"❌ Language '{language_name}' is not supported. Skipping.")
44
+ return False
45
+
46
+ if gender.lower() == "male":
47
+ speaker_name = "abhilash"
48
+ pace_value = 1.0
49
+ else:
50
+ speaker_name = "anushka"
51
+ pace_value = 0.9
52
+
53
+ print(f"--- 🎀 Generating audio for chunk: '{text[:50]}...' in {language_name} (Voice: {speaker_name}, Pace: {pace_value}) ---")
54
+
55
+ try:
56
+ response = client.text_to_speech.convert(
57
+ text=text,
58
+ model="bulbul:v2",
59
+ target_language_code=lang_code,
60
+ speaker=speaker_name,
61
+ pace=pace_value, # Use the selected pace
62
+ speech_sample_rate=22050,
63
+ enable_preprocessing=True
64
+ )
65
+
66
+ combined_audio_b64 = "".join(response.audios)
67
+ audio_data = base64.b64decode(combined_audio_b64)
68
+
69
+ with open(output_file_path, "wb") as f:
70
+ f.write(audio_data)
71
+
72
+ print(f"βœ… Audio saved to {output_file_path}")
73
+ return True
74
+
75
+ except Exception as e:
76
+ print(f"❌ An error occurred during the Sarvam TTS API call: {e}")
77
+ return False
78
+
79
+ def generate_all_audio_from_file(
80
+ json_path: str,
81
+ target_language: str,
82
+ gender: str,
83
+ output_dir: str = "generated_audio",
84
+ output_json_path: str = "multimedia_data_final.json"
85
+ ) -> List[Dict[str, str]]:
86
+ """
87
+ Reads data from a JSON, generates audio with a specific gender, and saves a final JSON.
88
+ """
89
+ try:
90
+ with open(json_path, 'r', encoding='utf-8') as f:
91
+ multimedia_data = json.load(f)
92
+ except (FileNotFoundError, json.JSONDecodeError) as e:
93
+ print(f"❌ Error reading or parsing {json_path}: {e}")
94
+ return []
95
+
96
+ os.makedirs(output_dir, exist_ok=True)
97
+
98
+ for i, item in enumerate(multimedia_data):
99
+ audio_text = item.get("audio_text")
100
+ if not audio_text:
101
+ item["audio_path"] = None
102
+ continue
103
+
104
+ file_path = os.path.join(output_dir, f"audio_{i:03d}.mp3")
105
+ success = generate_audio_from_text(audio_text, target_language, gender, file_path)
106
+ item["audio_path"] = file_path if success else None
107
+
108
+ with open(output_json_path, 'w', encoding='utf-8') as f:
109
+ json.dump(multimedia_data, f, indent=2, ensure_ascii=False)
110
+ print(f"\n--- βœ… Audio generation finished. Final data saved to {output_json_path}. ---")
111
+
112
+ return multimedia_data
113
+
114
+ # Example Usage
115
+ # if __name__ == '__main__':
116
+ # json_input_file = "multimedia_data_with_images.json"
117
+ # if not os.path.exists(json_input_file):
118
+ # print(f"❌ Error: Input file '{json_input_file}' not found.")
119
+ # print("Please run image_generation.py first to generate it.")
120
+ # else:
121
+ # target_language_for_story = "English"
122
+ # target_gender_for_story = "male" # Change to "male" to test the other voice
123
+
124
+ # generate_all_audio_from_file(
125
+ # json_path=json_input_file,
126
+ # target_language=target_language_for_story,
127
+ # gender=target_gender_for_story
128
  # )