Ander1 commited on
Commit
bcd4554
·
verified ·
1 Parent(s): 3df7fbe

Upload whisper_stt.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. whisper_stt.py +84 -0
whisper_stt.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ import logging
3
+ from typing import Optional, Dict, Any
4
+ import torch
5
+
6
+ # 設定日誌
7
+ logging.basicConfig(level=logging.INFO)
8
+ logger = logging.getLogger(__name__)
9
+
10
+ def transcribe_audio_whisper(
11
+ file_path: str,
12
+ model_name: str = "base",
13
+ language: Optional[str] = None,
14
+ initial_prompt: Optional[str] = None,
15
+ task: str = "transcribe"
16
+ ) -> Optional[Dict[str, Any]]:
17
+ """
18
+ 使用 Whisper 模型進行音訊轉文字
19
+
20
+ Args:
21
+ file_path: 音訊檔案路徑
22
+ model_name: Whisper 模型名稱 ("tiny", "base", "small", "medium", "large")
23
+ language: 音訊語言(ISO 639-1 代碼,如 "zh" 表示中文)
24
+ initial_prompt: 初始提示詞
25
+ task: 任務類型 ("transcribe" 或 "translate")
26
+
27
+ Returns:
28
+ 包含轉錄結果的字典,如果失敗則返回 None
29
+ """
30
+ try:
31
+ # 檢查 CUDA 是否可用
32
+ device = "cuda" if torch.cuda.is_available() else "cpu"
33
+ logger.info(f"使用設備: {device}")
34
+
35
+ # 載入模型
36
+ logger.info(f"載入 Whisper {model_name} 模型...")
37
+ model = whisper.load_model(model_name, device=device)
38
+
39
+ # 轉錄選項
40
+ options = {
41
+ "task": task,
42
+ "verbose": True
43
+ }
44
+ if language:
45
+ options["language"] = language
46
+ if initial_prompt:
47
+ options["initial_prompt"] = initial_prompt
48
+
49
+ # 執行轉錄
50
+ logger.info("開始轉錄...")
51
+ result = model.transcribe(file_path, **options)
52
+
53
+ # 整理結果
54
+ response = {
55
+ "text": result["text"],
56
+ "language": result.get("language", "unknown"),
57
+ "segments": result.get("segments", [])
58
+ }
59
+
60
+ logger.info("轉錄完成")
61
+ return response
62
+
63
+ except Exception as e:
64
+ logger.error(f"轉錄失敗:{str(e)}")
65
+ return None
66
+
67
+ def get_available_models() -> list:
68
+ """
69
+ 取得可用的 Whisper 模型列表
70
+ """
71
+ return ["tiny", "base", "small", "medium", "large"]
72
+
73
+ def get_model_description(model_name: str) -> str:
74
+ """
75
+ 取得模型描述
76
+ """
77
+ descriptions = {
78
+ "tiny": "最小的模型,速度最快但準確度較低",
79
+ "base": "基礎模型,平衡速度和準確度",
80
+ "small": "小型模型,準確度較好",
81
+ "medium": "中型模型,準確度高",
82
+ "large": "最大的模型,準確度最高但需要較多資源"
83
+ }
84
+ return descriptions.get(model_name, "未知模型")