commit
Browse files- speech_io.py +23 -4
speech_io.py
CHANGED
|
@@ -1,18 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
def synthesize_speech(text: str, voice: str = "alloy") -> str:
|
| 2 |
print(">>> Synthesizing speech via OpenAI TTS …")
|
| 3 |
|
| 4 |
-
# OpenAI SDK 2.x
|
| 5 |
response = client.audio.speech.create(
|
| 6 |
model="gpt-4o-mini-tts",
|
| 7 |
voice=voice,
|
| 8 |
input=text
|
| 9 |
)
|
| 10 |
|
| 11 |
-
#
|
| 12 |
-
# Get raw audio bytes directly from the response object
|
| 13 |
audio_bytes = response.read()
|
| 14 |
|
| 15 |
-
# Save file
|
| 16 |
tmp = NamedTemporaryFile(delete=False, suffix=".mp3")
|
| 17 |
tmp.write(audio_bytes)
|
| 18 |
tmp.close()
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from tempfile import NamedTemporaryFile
|
| 3 |
+
from typing import Optional
|
| 4 |
+
from openai import OpenAI
|
| 5 |
+
|
| 6 |
+
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def transcribe_audio(file_path: str, language: Optional[str] = None) -> str:
|
| 10 |
+
print(">>> Transkribiere Audio via OpenAI Audio API …")
|
| 11 |
+
|
| 12 |
+
with open(file_path, "rb") as f:
|
| 13 |
+
resp = client.audio.transcriptions.create(
|
| 14 |
+
model="gpt-4o-mini-transcribe",
|
| 15 |
+
file=f,
|
| 16 |
+
language=language
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
return resp.text
|
| 20 |
+
|
| 21 |
+
|
| 22 |
def synthesize_speech(text: str, voice: str = "alloy") -> str:
|
| 23 |
print(">>> Synthesizing speech via OpenAI TTS …")
|
| 24 |
|
| 25 |
+
# OpenAI SDK 2.x returns HttpxBinaryResponseContent
|
| 26 |
response = client.audio.speech.create(
|
| 27 |
model="gpt-4o-mini-tts",
|
| 28 |
voice=voice,
|
| 29 |
input=text
|
| 30 |
)
|
| 31 |
|
| 32 |
+
# Correct extraction method
|
|
|
|
| 33 |
audio_bytes = response.read()
|
| 34 |
|
|
|
|
| 35 |
tmp = NamedTemporaryFile(delete=False, suffix=".mp3")
|
| 36 |
tmp.write(audio_bytes)
|
| 37 |
tmp.close()
|