rairo commited on
Commit
4932c99
·
verified ·
1 Parent(s): 9903c4d

Update audio_gen.py

Browse files
Files changed (1) hide show
  1. audio_gen.py +81 -0
audio_gen.py CHANGED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -----------------------
2
+ # Audio Generation Function
3
+ # -----------------------
4
+ import os
5
+ import re
6
+ import time
7
+ import tempfile
8
+ import requests
9
+ import json
10
+ import io
11
+ import base64
12
+ import cv2
13
+ import logging
14
+ import uuid
15
+ import subprocess
16
+ from pathlib import Path
17
+ import urllib.parse
18
+ from io import BytesIO
19
+
20
+
21
+
22
+
23
+ def generate_audio(text, voice_model, audio_model="deepgram"):
24
+ """
25
+ Generate audio from text using either DeepGram or Pollinations OpenAI-Audio.
26
+ Args:
27
+ text (str): The text to convert to speech.
28
+ voice_model (str): The voice/model to use.
29
+ - For DeepGram, e.g., "aura-asteria-en" or "aura-helios-en".
30
+ - For Pollinations, e.g., "sage" (female) or "echo" (male).
31
+ audio_model (str): Which audio generation service to use ("deepgram" or "openai-audio").
32
+ Returns:
33
+ str or None: The path to the generated audio file, or None if generation failed.
34
+ """
35
+ if audio_model == "deepgram":
36
+ deepgram_api_key = os.getenv("DeepGram")
37
+ if not deepgram_api_key:
38
+ st.error("Deepgram API Key is missing.")
39
+ return None
40
+ headers_tts = {
41
+ "Authorization": f"Token {deepgram_api_key}",
42
+ "Content-Type": "text/plain"
43
+ }
44
+ url = f"https://api.deepgram.com/v1/speak?model={voice_model}"
45
+ response = requests.post(url, headers=headers_tts, data=text)
46
+ if response.status_code == 200:
47
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
48
+ temp_file.write(response.content)
49
+ temp_file.close()
50
+ return temp_file.name
51
+ else:
52
+ st.error(f"DeepGram TTS error: {response.status_code}")
53
+ return None
54
+ elif audio_model == "openai-audio":
55
+ # URL encode the text and call Pollinations TTS endpoint for openai-audio
56
+ encoded_text = urllib.parse.quote(text)
57
+ url = f"https://text.pollinations.ai/{encoded_text}?model=openai-audio&voice={voice_model}"
58
+ response = requests.get(url)
59
+ if response.status_code == 200:
60
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
61
+ temp_file.write(response.content)
62
+ temp_file.close()
63
+ return temp_file.name
64
+ else:
65
+ print(f"OpenAI Audio TTS error: {response.status_code}")
66
+ return None
67
+ else:
68
+ st.error("Unsupported audio model selected.")
69
+ return None
70
+
71
+ def get_audio_duration(audio_file):
72
+ import subprocess
73
+ try:
74
+ cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration',
75
+ '-of', 'default=noprint_wrappers=1:nokey=1', audio_file]
76
+ result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
77
+ if result.returncode != 0:
78
+ return 5.0
79
+ return float(result.stdout.strip())
80
+ except Exception:
81
+ return 5.0