Corin1998 commited on
Commit
71bd215
·
verified ·
1 Parent(s): 13fec14

Update tts_subtitles.py

Browse files
Files changed (1) hide show
  1. tts_subtitles.py +28 -19
tts_subtitles.py CHANGED
@@ -1,4 +1,6 @@
1
  import os
 
 
2
  import requests
3
 
4
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
@@ -6,6 +8,24 @@ OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
6
 
7
  HEADERS_AUTH = {"Authorization": f"Bearer {OPENAI_API_KEY}"}
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  def synthesize_tts_openai(text: str, out_path: str, voice: str = "alloy", model: str = "tts-1", format: str = "mp3"):
10
  url = f"{OPENAI_BASE_URL}/audio/speech"
11
  payload = {
@@ -14,29 +34,18 @@ def synthesize_tts_openai(text: str, out_path: str, voice: str = "alloy", model:
14
  "input": text,
15
  "format": format,
16
  }
17
- headers = {**HEADERS_AUTH, "Content-Type": "application/json"}
18
- r = requests.post(url, headers=headers, json=payload, timeout=300)
19
- r.raise_for_status()
20
  with open(out_path, "wb") as f:
21
  f.write(r.content)
22
  return out_path
23
 
24
  def transcribe_to_srt_openai(audio_path: str, model: str = "whisper-1") -> str:
25
  url = f"{OPENAI_BASE_URL}/audio/transcriptions"
26
- headers = HEADERS_AUTH
27
- with open(audio_path, "rb") as af:
28
- files = {"file": af}
29
- data = {"model": model, "response_format": "srt"}
30
- r = requests.post(url, headers=headers, files=files, data=data, timeout=600)
31
- r.raise_for_status()
 
32
  return r.text
33
-
34
- # --- Optional: pyannote による VAD/クリーンアップ(重いモデルのためデフォルト無効) ---
35
- # from pyannote.audio import Pipeline
36
- # def refine_srt_with_pyannote(audio_path: str, srt_text: str) -> str:
37
- # token = os.getenv("HUGGINGFACE_TOKEN")
38
- # if not token:
39
- # return srt_text
40
- # pipeline = Pipeline.from_pretrained("pyannote/segmentation", use_auth_token=token)
41
- # # ここで音声区間検出→SRTのタイミングを補正する処理を実装(省略)
42
- # return srt_text
 
1
  import os
2
+ import time
3
+ import random
4
  import requests
5
 
6
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 
8
 
9
  HEADERS_AUTH = {"Authorization": f"Bearer {OPENAI_API_KEY}"}
10
 
11
+ def _request_with_retry(method: str, url: str, *, json=None, files=None, data=None, stream=False):
12
+ timeout = int(os.getenv("OPENAI_TIMEOUT", "600"))
13
+ max_retries = int(os.getenv("OPENAI_RETRY_MAX", "5"))
14
+ backoff_base = float(os.getenv("OPENAI_BACKOFF_BASE", "1.7"))
15
+ for attempt in range(max_retries):
16
+ try:
17
+ r = requests.request(method, url, headers=HEADERS_AUTH, json=json, files=files, data=data, stream=stream, timeout=timeout)
18
+ r.raise_for_status()
19
+ return r
20
+ except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError) as e:
21
+ if attempt == max_retries - 1:
22
+ raise
23
+ except requests.exceptions.HTTPError as e:
24
+ status = e.response.status_code if e.response is not None else None
25
+ if status not in (429, 500, 502, 503, 504):
26
+ raise
27
+ time.sleep((backoff_base ** attempt) + random.uniform(0, 0.5))
28
+
29
  def synthesize_tts_openai(text: str, out_path: str, voice: str = "alloy", model: str = "tts-1", format: str = "mp3"):
30
  url = f"{OPENAI_BASE_URL}/audio/speech"
31
  payload = {
 
34
  "input": text,
35
  "format": format,
36
  }
37
+ r = _request_with_retry("post", url, json=payload, stream=False)
 
 
38
  with open(out_path, "wb") as f:
39
  f.write(r.content)
40
  return out_path
41
 
42
  def transcribe_to_srt_openai(audio_path: str, model: str = "whisper-1") -> str:
43
  url = f"{OPENAI_BASE_URL}/audio/transcriptions"
44
+ # files は毎回開き直す(リトライで再利用不可のため)
45
+ def _call():
46
+ with open(audio_path, "rb") as af:
47
+ files = {"file": af}
48
+ data = {"model": model, "response_format": "srt"}
49
+ return _request_with_retry("post", url, files=files, data=data)
50
+ r = _call()
51
  return r.text