Update tts_subtitles.py
Browse files- tts_subtitles.py +28 -19
tts_subtitles.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
| 1 |
import os
|
|
|
|
|
|
|
| 2 |
import requests
|
| 3 |
|
| 4 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
|
@@ -6,6 +8,24 @@ OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
|
|
| 6 |
|
| 7 |
HEADERS_AUTH = {"Authorization": f"Bearer {OPENAI_API_KEY}"}
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
def synthesize_tts_openai(text: str, out_path: str, voice: str = "alloy", model: str = "tts-1", format: str = "mp3"):
|
| 10 |
url = f"{OPENAI_BASE_URL}/audio/speech"
|
| 11 |
payload = {
|
|
@@ -14,29 +34,18 @@ def synthesize_tts_openai(text: str, out_path: str, voice: str = "alloy", model:
|
|
| 14 |
"input": text,
|
| 15 |
"format": format,
|
| 16 |
}
|
| 17 |
-
|
| 18 |
-
r = requests.post(url, headers=headers, json=payload, timeout=300)
|
| 19 |
-
r.raise_for_status()
|
| 20 |
with open(out_path, "wb") as f:
|
| 21 |
f.write(r.content)
|
| 22 |
return out_path
|
| 23 |
|
| 24 |
def transcribe_to_srt_openai(audio_path: str, model: str = "whisper-1") -> str:
|
| 25 |
url = f"{OPENAI_BASE_URL}/audio/transcriptions"
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
|
|
|
| 32 |
return r.text
|
| 33 |
-
|
| 34 |
-
# --- Optional: pyannote による VAD/クリーンアップ(重いモデルのためデフォルト無効) ---
|
| 35 |
-
# from pyannote.audio import Pipeline
|
| 36 |
-
# def refine_srt_with_pyannote(audio_path: str, srt_text: str) -> str:
|
| 37 |
-
# token = os.getenv("HUGGINGFACE_TOKEN")
|
| 38 |
-
# if not token:
|
| 39 |
-
# return srt_text
|
| 40 |
-
# pipeline = Pipeline.from_pretrained("pyannote/segmentation", use_auth_token=token)
|
| 41 |
-
# # ここで音声区間検出→SRTのタイミングを補正する処理を実装(省略)
|
| 42 |
-
# return srt_text
|
|
|
|
| 1 |
import os
|
| 2 |
+
import time
|
| 3 |
+
import random
|
| 4 |
import requests
|
| 5 |
|
| 6 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
|
|
|
| 8 |
|
| 9 |
HEADERS_AUTH = {"Authorization": f"Bearer {OPENAI_API_KEY}"}
|
| 10 |
|
| 11 |
+
def _request_with_retry(method: str, url: str, *, json=None, files=None, data=None, stream=False):
|
| 12 |
+
timeout = int(os.getenv("OPENAI_TIMEOUT", "600"))
|
| 13 |
+
max_retries = int(os.getenv("OPENAI_RETRY_MAX", "5"))
|
| 14 |
+
backoff_base = float(os.getenv("OPENAI_BACKOFF_BASE", "1.7"))
|
| 15 |
+
for attempt in range(max_retries):
|
| 16 |
+
try:
|
| 17 |
+
r = requests.request(method, url, headers=HEADERS_AUTH, json=json, files=files, data=data, stream=stream, timeout=timeout)
|
| 18 |
+
r.raise_for_status()
|
| 19 |
+
return r
|
| 20 |
+
except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError) as e:
|
| 21 |
+
if attempt == max_retries - 1:
|
| 22 |
+
raise
|
| 23 |
+
except requests.exceptions.HTTPError as e:
|
| 24 |
+
status = e.response.status_code if e.response is not None else None
|
| 25 |
+
if status not in (429, 500, 502, 503, 504):
|
| 26 |
+
raise
|
| 27 |
+
time.sleep((backoff_base ** attempt) + random.uniform(0, 0.5))
|
| 28 |
+
|
| 29 |
def synthesize_tts_openai(text: str, out_path: str, voice: str = "alloy", model: str = "tts-1", format: str = "mp3"):
|
| 30 |
url = f"{OPENAI_BASE_URL}/audio/speech"
|
| 31 |
payload = {
|
|
|
|
| 34 |
"input": text,
|
| 35 |
"format": format,
|
| 36 |
}
|
| 37 |
+
r = _request_with_retry("post", url, json=payload, stream=False)
|
|
|
|
|
|
|
| 38 |
with open(out_path, "wb") as f:
|
| 39 |
f.write(r.content)
|
| 40 |
return out_path
|
| 41 |
|
| 42 |
def transcribe_to_srt_openai(audio_path: str, model: str = "whisper-1") -> str:
|
| 43 |
url = f"{OPENAI_BASE_URL}/audio/transcriptions"
|
| 44 |
+
# files は毎回開き直す(リトライで再利用不可のため)
|
| 45 |
+
def _call():
|
| 46 |
+
with open(audio_path, "rb") as af:
|
| 47 |
+
files = {"file": af}
|
| 48 |
+
data = {"model": model, "response_format": "srt"}
|
| 49 |
+
return _request_with_retry("post", url, files=files, data=data)
|
| 50 |
+
r = _call()
|
| 51 |
return r.text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|