Ander1 commited on
Commit
3df7fbe
·
verified ·
1 Parent(s): 24ddc97

Upload elevenlabs_stt.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. elevenlabs_stt.py +119 -0
elevenlabs_stt.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 核心依賴
2
+ import requests
3
+ from requests.adapters import HTTPAdapter
4
+ from urllib3.util.retry import Retry
5
+ from typing import Optional, Dict, Any
6
+ import ssl
7
+ import logging
8
+ from elevenlabs.client import ElevenLabs
9
+ from io import BytesIO
10
+ import time
11
+
12
+
13
+ # 設定日誌記錄
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class TLSAdapter(HTTPAdapter):
19
+ """自定義 TLS 適配器解決 SSL 協議問題"""
20
+ def init_poolmanager(self, *args, **kwargs):
21
+ ctx = ssl.create_default_context()
22
+ ctx.set_ciphers('DEFAULT@SECLEVEL=1') # 降低安全等級以兼容舊協議
23
+ ctx.options |= ssl.OP_NO_SSLv2 | ssl.OP_NO_SSLv3 # 禁用不安全的 SSL 版本
24
+ kwargs['ssl_context'] = ctx
25
+ return super().init_poolmanager(*args, **kwargs)
26
+
27
+
28
+ def create_retry_session():
29
+ """建立具有重試機制的 Session"""
30
+ session = requests.Session()
31
+ retry = Retry(
32
+ total=5, # 總重試次數
33
+ backoff_factor=1, # 重試間隔
34
+ status_forcelist=[500, 502, 503, 504], # 需要重試的狀態碼
35
+ allowed_methods=["POST"] # 只重試 POST 請求
36
+ )
37
+ adapter = HTTPAdapter(max_retries=retry)
38
+ session.mount("https://", adapter)
39
+ return session
40
+
41
+
42
+ def transcribe_audio(
43
+ api_key: str,
44
+ file_path: str,
45
+ language_code: Optional[str] = None,
46
+ diarize: bool = False,
47
+ max_retries: int = 5,
48
+ timeout: int = 600 # 10 分鐘超時
49
+ ) -> Optional[Dict[str, Any]]:
50
+ """
51
+ 使用 ElevenLabs API 將音訊轉換為文字,包含重試機制
52
+
53
+ Args:
54
+ api_key: ElevenLabs API 金鑰
55
+ file_path: 音訊檔案路徑
56
+ language_code: 語言代碼(可選,使用 ISO-639-1 或 ISO-639-3 格式)
57
+ diarize: 是否啟用說話者辨識(限制音訊長度最長 8 分鐘)
58
+ max_retries: 最大重試次數
59
+ timeout: 請求超時時間(秒)
60
+ """
61
+ # 初始化 ElevenLabs 客戶端
62
+ client = ElevenLabs(
63
+ api_key=api_key,
64
+ )
65
+
66
+ for attempt in range(max_retries):
67
+ try:
68
+ # 讀取音訊檔案
69
+ with open(file_path, 'rb') as audio_file:
70
+ audio_data = BytesIO(audio_file.read())
71
+
72
+ # 準備 API 參數
73
+ params = {
74
+ "file": audio_data,
75
+ "model_id": "scribe_v1",
76
+ "diarize": diarize,
77
+ "tag_audio_events": True,
78
+ "timestamps_granularity": "word"
79
+ }
80
+
81
+ # 只有當語言代碼不是 None 且不是空字串時才加入
82
+ if language_code and language_code.strip():
83
+ params["language_code"] = language_code.strip()
84
+
85
+ # 呼叫語音轉文字 API
86
+ response = client.speech_to_text.convert(**params)
87
+
88
+ # 檢查回應格式
89
+ if hasattr(response, 'text'):
90
+ language_code = getattr(
91
+ response, 'language_code', None
92
+ )
93
+ language_prob = getattr(
94
+ response, 'language_probability', None
95
+ )
96
+ return {
97
+ 'text': response.text,
98
+ 'language_code': language_code,
99
+ 'language_probability': language_prob
100
+ }
101
+ return response
102
+
103
+ except Exception as e:
104
+ logger.error(f"第 {attempt + 1} 次嘗試失敗:{str(e)}")
105
+ if attempt < max_retries - 1:
106
+ wait_time = min((attempt + 1) * 5, 30) # 最長等待 30 秒
107
+ logger.info(f"{wait_time} 秒後重試...")
108
+ time.sleep(wait_time)
109
+ else:
110
+ logger.error("已達最大重試次數,轉換失敗")
111
+ return None
112
+
113
+ # Example usage:
114
+ # transcription = transcribe_audio(
115
+ # api_key="YOUR_API_KEY",
116
+ # file_path="audio.mp3",
117
+ # language_code="en",
118
+ # diarize=True
119
+ # )