Spaces:

abhinav0231
/

SparrowTale

Sleeping

App Files Files Community

SparrowTale / tts.py

abhinav0231

Update tts.py

941449c verified 6 months ago

raw

history blame contribute delete

3.7 kB

	import os
	import base64
	import json
	import streamlit as st
	from sarvamai import SarvamAI
	from typing import List, Dict, Optional


	client = None
	try:
	api_key = st.secrets.get("SARVAM_API_KEY") or os.getenv("SARVAM_API_KEY")
	if api_key:
	client = SarvamAI(api_subscription_key=api_key)
	print("✅ Sarvam AI client for TTS (Bulbul) initialized successfully.")
	else:
	print("⚠️ Warning: SARVAM_API_KEY not found.")
	except Exception as e:
	print(f"❌ Error initializing Sarvam AI client: {e}")

	# --- Language Mapping ---
	LANGUAGE_CODE_MAP = {
	"hindi": "hi-IN", "bengali": "bn-IN", "tamil": "ta-IN", "telugu": "te-IN",
	"gujarati": "gu-IN", "kannada": "kn-IN", "malayalam": "ml-IN", "marathi": "mr-IN",
	"punjabi": "pa-IN", "odia": "od-IN", "english": "en-IN",
	}

	def get_language_code(language_name: str) -> Optional[str]:
	return LANGUAGE_CODE_MAP.get(language_name.lower())

	def generate_audio_from_text(
	text: str,
	language_name: str,
	gender: str,
	output_file_path: str
	) -> bool:
	"""
	Generates an audio file from a text string using the Sarvam "Bulbul" TTS API
	with a specified gender and pace for the voice.
	"""
	if not client: return False
	lang_code = get_language_code(language_name)
	if not lang_code:
	print(f"❌ Language '{language_name}' is not supported. Skipping.")
	return False

	if gender.lower() == "male":
	speaker_name = "abhilash"
	pace_value = 1.0
	else:
	speaker_name = "anushka"
	pace_value = 0.9

	print(f"--- 🎤 Generating audio for chunk: '{text[:50]}...' in {language_name} (Voice: {speaker_name}, Pace: {pace_value}) ---")

	try:
	response = client.text_to_speech.convert(
	text=text,
	model="bulbul:v2",
	target_language_code=lang_code,
	speaker=speaker_name,
	pace=pace_value, # Use the selected pace
	speech_sample_rate=22050,
	enable_preprocessing=True
	)

	combined_audio_b64 = "".join(response.audios)
	audio_data = base64.b64decode(combined_audio_b64)

	with open(output_file_path, "wb") as f:
	f.write(audio_data)

	print(f"✅ Audio saved to {output_file_path}")
	return True

	except Exception as e:
	print(f"❌ An error occurred during the Sarvam TTS API call: {e}")
	return False

	def generate_all_audio_from_file(
	json_path: str,
	target_language: str,
	gender: str,
	output_dir: str = "generated_audio",
	output_json_path: str = "multimedia_data_final.json"
	) -> List[Dict[str, str]]:
	"""
	Reads data from a JSON, generates audio with a specific gender, and saves a final JSON.
	"""
	try:
	with open(json_path, 'r', encoding='utf-8') as f:
	multimedia_data = json.load(f)
	except (FileNotFoundError, json.JSONDecodeError) as e:
	print(f"❌ Error reading or parsing {json_path}: {e}")
	return []

	os.makedirs(output_dir, exist_ok=True)

	for i, item in enumerate(multimedia_data):
	audio_text = item.get("audio_text")
	if not audio_text:
	item["audio_path"] = None
	continue

	file_path = os.path.join(output_dir, f"audio_{i:03d}.mp3")
	success = generate_audio_from_text(audio_text, target_language, gender, file_path)
	item["audio_path"] = file_path if success else None

	with open(output_json_path, 'w', encoding='utf-8') as f:
	json.dump(multimedia_data, f, indent=2, ensure_ascii=False)
	print(f"\n--- ✅ Audio generation finished. Final data saved to {output_json_path}. ---")

	return multimedia_data