Spaces:

rairo
/

sozo-api

Sleeping

App Files Files

sozo-api / audio_gen.py

rairo

Update audio_gen.py

08a5383 verified 11 months ago

raw

history blame

3.93 kB

	# -----------------------
	# Audio Generation Function
	# -----------------------
	import os
	import re
	import time
	import tempfile
	import requests
	import json
	import io
	import base64
	import cv2
	import logging
	import uuid
	import subprocess
	from pathlib import Path
	import urllib.parse
	from io import BytesIO
	from PIL import Image

	def generate_audio(text, voice_model, audio_model="deepgram"):
	"""
	Generate audio from text using either DeepGram or Pollinations OpenAI-Audio.
	Args:
	text (str): The text to convert to speech.
	voice_model (str): The voice/model to use.
	- For DeepGram, e.g., "aura-asteria-en" or "aura-helios-en".
	- For Pollinations, e.g., "sage" (female) or "echo" (male).
	audio_model (str): Which audio generation service to use ("deepgram" or "openai-audio").
	Returns:
	str or None: The path to the generated audio file, or None if generation failed.
	"""
	if audio_model == "deepgram":
	deepgram_api_key = os.getenv("DeepGram")
	if not deepgram_api_key:
	st.error("Deepgram API Key is missing.")
	return None
	headers_tts = {
	"Authorization": f"Token {deepgram_api_key}",
	"Content-Type": "text/plain"
	}
	url = f"https://api.deepgram.com/v1/speak?model={voice_model}"
	response = requests.post(url, headers=headers_tts, data=text)
	if response.status_code == 200:
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	temp_file.write(response.content)
	temp_file.close()
	return temp_file.name
	else:
	st.error(f"DeepGram TTS error: {response.status_code}")
	return None
	elif audio_model == "openai-audio":
	# URL encode the text and call Pollinations TTS endpoint for openai-audio
	encoded_text = urllib.parse.quote(text)
	url = f"https://text.pollinations.ai/{encoded_text}?model=openai-audio&voice={voice_model}"
	response = requests.get(url)
	if response.status_code == 200:
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	temp_file.write(response.content)
	temp_file.close()
	return temp_file.name
	else:
	print(f"OpenAI Audio TTS error: {response.status_code}")
	return None
	else:
	st.error("Unsupported audio model selected.")
	return None

	def get_audio_duration(audio_file):
	import subprocess
	try:
	cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration',
	'-of', 'default=noprint_wrappers=1:nokey=1', audio_file]
	result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
	if result.returncode != 0:
	return 5.0
	return float(result.stdout.strip())
	except Exception:
	return 5.0

	#edit text and audio

	def edit_section_text(original_text: str, new_text: str, voice_model: str, audio_model: str):
	"""
	Takes the original text, replaces it with new_text, re-generates the audio,
	and returns (updated_text, new_audio_path).
	We'll assume you already have a 'generate_audio' function in this same file
	that can produce audio from text.
	"""
	from audio_gen import generate_audio # or wherever your existing TTS function is

	try:
	# 1) The new text is just new_text
	updated_text = new_text.strip()
	if not updated_text:
	return None, None

	# 2) Re-generate the audio for the new text
	updated_audio_text = re.sub(r"<.*?>", "", updated_text)
	audio_file_path = generate_audio(updated_audio_text, voice_model, audio_model=audio_model)
	return updated_text, audio_file_path

	except Exception as e:
	logging.error(f"Error editing section text/audio: {e}")
	return None, None