# ----------------------- # Audio Generation Function # ----------------------- import os import re import time import tempfile import requests import json import io import base64 import cv2 import logging import uuid import subprocess from pathlib import Path import urllib.parse from io import BytesIO from PIL import Image def generate_audio(text, voice_model, audio_model="deepgram"): """ Generate audio from text using either DeepGram or Pollinations OpenAI-Audio. Args: text (str): The text to convert to speech. voice_model (str): The voice/model to use. - For DeepGram, e.g., "aura-asteria-en" or "aura-helios-en". - For Pollinations, e.g., "sage" (female) or "echo" (male). audio_model (str): Which audio generation service to use ("deepgram" or "openai-audio"). Returns: str or None: The path to the generated audio file, or None if generation failed. """ if audio_model == "deepgram": deepgram_api_key = os.getenv("DeepGram") if not deepgram_api_key: st.error("Deepgram API Key is missing.") return None headers_tts = { "Authorization": f"Token {deepgram_api_key}", "Content-Type": "text/plain" } url = f"https://api.deepgram.com/v1/speak?model={voice_model}" response = requests.post(url, headers=headers_tts, data=text) if response.status_code == 200: temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") temp_file.write(response.content) temp_file.close() return temp_file.name else: st.error(f"DeepGram TTS error: {response.status_code}") return None elif audio_model == "openai-audio": # URL encode the text and call Pollinations TTS endpoint for openai-audio encoded_text = urllib.parse.quote(text) url = f"https://text.pollinations.ai/{encoded_text}?model=openai-audio&voice={voice_model}" response = requests.get(url) if response.status_code == 200: temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") temp_file.write(response.content) temp_file.close() return temp_file.name else: print(f"OpenAI Audio TTS error: {response.status_code}") return None else: st.error("Unsupported audio model selected.") return None def get_audio_duration(audio_file): import subprocess try: cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', audio_file] result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) if result.returncode != 0: return 5.0 return float(result.stdout.strip()) except Exception: return 5.0 #edit text and audio def edit_section_text(original_text: str, new_text: str, voice_model: str, audio_model: str): """ Takes the original text, replaces it with new_text, re-generates the audio, and returns (updated_text, new_audio_path). We'll assume you already have a 'generate_audio' function in this same file that can produce audio from text. """ from audio_gen import generate_audio # or wherever your existing TTS function is try: # 1) The new text is just new_text updated_text = new_text.strip() if not updated_text: return None, None # 2) Re-generate the audio for the new text updated_audio_text = re.sub(r"<.*?>", "", updated_text) audio_file_path = generate_audio(updated_audio_text, voice_model, audio_model=audio_model) return updated_text, audio_file_path except Exception as e: logging.error(f"Error editing section text/audio: {e}") return None, None