Spaces:
Sleeping
Sleeping
| # ----------------------- | |
| # Audio Generation Function | |
| # ----------------------- | |
| import os | |
| import re | |
| import time | |
| import tempfile | |
| import requests | |
| import json | |
| import io | |
| import base64 | |
| import cv2 | |
| import logging | |
| import uuid | |
| import subprocess | |
| from pathlib import Path | |
| import urllib.parse | |
| from io import BytesIO | |
| from PIL import Image | |
| def generate_audio(text, voice_model, audio_model="deepgram"): | |
| """ | |
| Generate audio from text using either DeepGram or Pollinations OpenAI-Audio. | |
| Args: | |
| text (str): The text to convert to speech. | |
| voice_model (str): The voice/model to use. | |
| - For DeepGram, e.g., "aura-asteria-en" or "aura-helios-en". | |
| - For Pollinations, e.g., "sage" (female) or "echo" (male). | |
| audio_model (str): Which audio generation service to use ("deepgram" or "openai-audio"). | |
| Returns: | |
| str or None: The path to the generated audio file, or None if generation failed. | |
| """ | |
| if audio_model == "deepgram": | |
| deepgram_api_key = os.getenv("DeepGram") | |
| if not deepgram_api_key: | |
| st.error("Deepgram API Key is missing.") | |
| return None | |
| headers_tts = { | |
| "Authorization": f"Token {deepgram_api_key}", | |
| "Content-Type": "text/plain" | |
| } | |
| url = f"https://api.deepgram.com/v1/speak?model={voice_model}" | |
| response = requests.post(url, headers=headers_tts, data=text) | |
| if response.status_code == 200: | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
| temp_file.write(response.content) | |
| temp_file.close() | |
| return temp_file.name | |
| else: | |
| st.error(f"DeepGram TTS error: {response.status_code}") | |
| return None | |
| elif audio_model == "openai-audio": | |
| # URL encode the text and call Pollinations TTS endpoint for openai-audio | |
| encoded_text = urllib.parse.quote(text) | |
| url = f"https://text.pollinations.ai/{encoded_text}?model=openai-audio&voice={voice_model}" | |
| response = requests.get(url) | |
| if response.status_code == 200: | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
| temp_file.write(response.content) | |
| temp_file.close() | |
| return temp_file.name | |
| else: | |
| print(f"OpenAI Audio TTS error: {response.status_code}") | |
| return None | |
| else: | |
| st.error("Unsupported audio model selected.") | |
| return None | |
| def get_audio_duration(audio_file): | |
| import subprocess | |
| try: | |
| cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', | |
| '-of', 'default=noprint_wrappers=1:nokey=1', audio_file] | |
| result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) | |
| if result.returncode != 0: | |
| return 5.0 | |
| return float(result.stdout.strip()) | |
| except Exception: | |
| return 5.0 | |
| #edit text and audio | |
| def edit_section_text(original_text: str, new_text: str, voice_model: str, audio_model: str): | |
| """ | |
| Takes the original text, replaces it with new_text, re-generates the audio, | |
| and returns (updated_text, new_audio_path). | |
| We'll assume you already have a 'generate_audio' function in this same file | |
| that can produce audio from text. | |
| """ | |
| from audio_gen import generate_audio # or wherever your existing TTS function is | |
| try: | |
| # 1) The new text is just new_text | |
| updated_text = new_text.strip() | |
| if not updated_text: | |
| return None, None | |
| # 2) Re-generate the audio for the new text | |
| updated_audio_text = re.sub(r"<.*?>", "", updated_text) | |
| audio_file_path = generate_audio(updated_audio_text, voice_model, audio_model=audio_model) | |
| return updated_text, audio_file_path | |
| except Exception as e: | |
| logging.error(f"Error editing section text/audio: {e}") | |
| return None, None |