Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| from dotenv import load_dotenv | |
| import os | |
| from elevenlabs_stt import transcribe_audio as transcribe_audio_elevenlabs | |
| from whisper_stt import transcribe_audio_whisper, get_available_models, get_model_description | |
| from transcript_refiner import refine_transcript, OPENAI_MODELS | |
| from utils import check_file_size, split_large_audio | |
| import logging | |
| # 載入環境變數 | |
| load_dotenv() | |
| # 設定日誌 | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # 定義可用的 OpenAI 模型 | |
| OPENAI_MODELS = { | |
| "gpt-4o": "gpt-4o", | |
| "gpt-4o-mini": "gpt-4o-mini", | |
| "o3-mini": "o3-mini", | |
| "o1-mini": "o1-mini" | |
| } | |
| # 模型設定和價格(USD per 1M tokens) | |
| MODEL_CONFIG = { | |
| "gpt-4o": { | |
| "display_name": "gpt-4o", | |
| "input": 2.50, # $2.50 per 1M tokens | |
| "cached_input": 1.25, # $1.25 per 1M tokens | |
| "output": 10.00 # $10.00 per 1M tokens | |
| }, | |
| "gpt-4o-mini": { | |
| "display_name": "gpt-4o-mini", | |
| "input": 0.15, # $0.15 per 1M tokens | |
| "cached_input": 0.075,# $0.075 per 1M tokens | |
| "output": 0.60 # $0.60 per 1M tokens | |
| }, | |
| "o1-mini": { | |
| "display_name": "o1-mini", | |
| "input": 1.10, # $1.10 per 1M tokens | |
| "cached_input": 0.55, # $0.55 per 1M tokens | |
| "output": 4.40 # $4.40 per 1M tokens | |
| }, | |
| "o3-mini": { | |
| "display_name": "o3-mini", | |
| "input": 1.10, # $1.10 per 1M tokens | |
| "cached_input": 0.55, # $0.55 per 1M tokens | |
| "output": 4.40 # $4.40 per 1M tokens | |
| } | |
| } | |
| # 匯率設定 | |
| USD_TO_NTD = 31.5 | |
| def calculate_cost(input_tokens, output_tokens, model_name, is_cached=False): | |
| """計算 API 使用成本 | |
| Args: | |
| input_tokens (int): 輸入 tokens 數量 | |
| output_tokens (int): 輸出 tokens 數量 | |
| model_name (str): 模型名稱 (gpt-4o, gpt-4o-mini, o1-mini, o3-mini) | |
| is_cached (bool, optional): 是否使用快取輸入價格. 預設為 False | |
| Returns: | |
| tuple: (USD 成本, NTD 成本, 詳細計算資訊) | |
| """ | |
| if model_name not in MODEL_CONFIG: | |
| return 0, 0, "未支援的模型" | |
| # 取得價格設定 | |
| model = MODEL_CONFIG[model_name] | |
| input_price = model["cached_input"] if is_cached else model["input"] | |
| output_price = model["output"] | |
| # 計算 USD 成本 (以每 1M tokens 為單位) | |
| input_cost = (input_tokens / 1_000_000) * input_price | |
| output_cost = (output_tokens / 1_000_000) * output_price | |
| total_cost_usd = input_cost + output_cost | |
| total_cost_ntd = total_cost_usd * USD_TO_NTD | |
| # 準備詳細計算資訊 | |
| details = f""" | |
| 計算明細 (USD): | |
| - 輸入: {input_tokens:,} tokens × ${input_price}/1M = ${input_cost:.4f} | |
| - 輸出: {output_tokens:,} tokens × ${output_price}/1M = ${output_cost:.4f} | |
| - 總計 (USD): ${total_cost_usd:.4f} | |
| - 總計 (NTD): NT${total_cost_ntd:.2f} | |
| """ | |
| return total_cost_usd, total_cost_ntd, details | |
| # 在 Streamlit 介面中顯示成本 | |
| def display_cost_info(input_tokens, output_tokens, model_name, is_cached=False): | |
| """在 Streamlit 介面中顯示成本資訊""" | |
| cost_usd, cost_ntd, details = calculate_cost( | |
| input_tokens, | |
| output_tokens, | |
| model_name, | |
| is_cached | |
| ) | |
| with st.sidebar.expander("💰 成本計算", expanded=True): | |
| st.write("### Token 使用量") | |
| st.write(f"- 輸入: {input_tokens:,} tokens") | |
| st.write(f"- 輸出: {output_tokens:,} tokens") | |
| st.write(f"- 總計: {input_tokens + output_tokens:,} tokens") | |
| if (input_tokens + output_tokens) == 0: | |
| st.warning("目前 token 使用量為 0,請確認是否已正確計算 token 數量!") | |
| st.write("### 費用明細") | |
| st.text(details) | |
| if is_cached: | |
| st.info("✨ 使用快取價格計算") | |
| def main(): | |
| st.title("音訊轉文字與優化系統") | |
| # 初始化 token 計數 | |
| if "input_tokens" not in st.session_state: | |
| st.session_state.input_tokens = 0 | |
| if "output_tokens" not in st.session_state: | |
| st.session_state.output_tokens = 0 | |
| if "total_tokens" not in st.session_state: | |
| st.session_state.total_tokens = 0 | |
| # 檢查 session_state 中的 openai_model 是否有效,不是則重設為預設值 o3-mini | |
| valid_openai_models = ["o3-mini", "o1-mini"] | |
| if "openai_model" not in st.session_state or st.session_state["openai_model"] not in valid_openai_models: | |
| st.session_state["openai_model"] = "o3-mini" | |
| if "whisper_model" not in st.session_state: | |
| st.session_state["whisper_model"] = "small" | |
| with st.sidebar: | |
| st.header("設定") | |
| # 選擇轉錄服務 | |
| transcription_service = st.selectbox( | |
| "選擇轉錄服務", | |
| ["Whisper", "ElevenLabs"], | |
| index=0, | |
| help="選擇要使用的語音轉文字服務" | |
| ) | |
| # Whisper 相關設定 | |
| if transcription_service == "Whisper": | |
| whisper_model = st.selectbox( | |
| "選擇 Whisper 模型", | |
| options=["tiny", "base", "small", "medium", "large"], | |
| index=2 # 預設是 small (第三個選項) | |
| ) | |
| st.session_state["whisper_model"] = whisper_model | |
| st.caption(get_model_description(whisper_model)) | |
| # 語言設定 | |
| language_mode = st.radio( | |
| "語言設定", | |
| options=["自動偵測", "指定語言", "混合語言"], | |
| help="選擇音訊的語言處理模式" | |
| ) | |
| if language_mode == "指定語言": | |
| languages = { | |
| "中文 (繁體/簡體)": "zh", | |
| "英文": "en", | |
| "日文": "ja", | |
| "韓文": "ko", | |
| "其他": "custom" | |
| } | |
| selected_lang = st.selectbox( | |
| "選擇語言", | |
| options=list(languages.keys()) | |
| ) | |
| if selected_lang == "其他": | |
| custom_lang = st.text_input( | |
| "輸入語言代碼", | |
| placeholder="例如:fr 代表法文", | |
| help="請輸入 ISO 639-1 語言代碼" | |
| ) | |
| language_code = custom_lang if custom_lang else None | |
| else: | |
| language_code = languages[selected_lang] | |
| else: | |
| language_code = None | |
| # ElevenLabs 相關設定 | |
| elevenlabs_api_key = None | |
| if transcription_service == "ElevenLabs": | |
| elevenlabs_api_key = st.text_input( | |
| "ElevenLabs API 金鑰", | |
| type="password" | |
| ) | |
| # OpenAI API 金鑰和模型選擇 | |
| openai_api_key = st.text_input( | |
| "OpenAI API 金鑰", | |
| type="password" | |
| ) | |
| model_choice = st.selectbox( | |
| "選擇 OpenAI 模型", | |
| options=["gpt-4o", "gpt-4o-mini", "o1-mini", "o3-mini"], | |
| index=3, # 預設選擇 o3-mini | |
| help="選擇要使用的 OpenAI 模型" | |
| ) | |
| st.session_state["openai_model"] = model_choice | |
| # 其他設定 | |
| enable_diarization = st.checkbox("啟用說話者辨識", value=False) | |
| temperature = st.slider("創意程度", 0.0, 1.0, 0.5) | |
| # 作者資訊 | |
| st.markdown("---") | |
| st.markdown(""" | |
| ### Created by | |
| **Tseng Yao Hsien** | |
| Endocrinologist | |
| Tungs' Taichung MetroHarbor Hospital | |
| """) | |
| # 顯示價格說明 | |
| with st.sidebar.expander("💡 模型價格說明(USD per 1M tokens)"): | |
| st.write(""" | |
| ### gpt-4o | |
| - 輸入:$2.50 / 1M tokens | |
| - 快取輸入:$1.25 / 1M tokens | |
| - 輸出:$10.00 / 1M tokens | |
| ### gpt-4o-mini | |
| - 輸入:$0.15 / 1M tokens | |
| - 快取輸入:$0.075 / 1M tokens | |
| - 輸出:$0.60 / 1M tokens | |
| ### o1-mini & o3-mini | |
| - 輸入:$1.10 / 1M tokens | |
| - 快取輸入:$0.55 / 1M tokens | |
| - 輸出:$4.40 / 1M tokens | |
| ### 匯率 | |
| - 1 USD = 31.5 NTD | |
| """) | |
| # 提示詞設定 | |
| with st.expander("提示詞設定(選填)", expanded=False): | |
| context_prompt = st.text_area( | |
| "請輸入相關提示詞", | |
| placeholder="例如:\n- 這是一段醫學演講\n- 包含專有名詞:糖尿病、胰島素\n- 主要討論糖尿病的治療方法", | |
| help="提供音訊內容的相關資訊,可以幫助 AI 更準確地理解和轉錄內容" | |
| ) | |
| # 上傳檔案 | |
| uploaded_file = st.file_uploader("上傳音訊檔案", type=["mp3", "wav", "ogg", "m4a"]) | |
| if uploaded_file and st.button("處理音訊"): | |
| if not openai_api_key: | |
| st.error("請提供 OpenAI API 金鑰") | |
| return | |
| if transcription_service == "ElevenLabs" and not elevenlabs_api_key: | |
| st.error("請提供 ElevenLabs API 金鑰") | |
| return | |
| try: | |
| with st.spinner("處理中..."): | |
| # 初始化變數 | |
| full_transcript = "" | |
| # 檢查檔案大小 | |
| temp_path = f"temp_{uploaded_file.name}" | |
| with open(temp_path, "wb") as f: | |
| f.write(uploaded_file.getbuffer()) | |
| if check_file_size(temp_path): | |
| # 檔案需要分割 | |
| audio_segments = split_large_audio(temp_path) | |
| if not audio_segments: | |
| st.error("檔案分割失敗") | |
| return | |
| progress_bar = st.progress(0) | |
| for i, segment_path in enumerate(audio_segments): | |
| if transcription_service == "Whisper": | |
| result = transcribe_audio_whisper( | |
| segment_path, | |
| model_name=whisper_model, | |
| language=language_code, | |
| initial_prompt=context_prompt | |
| ) | |
| else: | |
| result = transcribe_audio_elevenlabs( | |
| api_key=elevenlabs_api_key, | |
| file_path=segment_path, | |
| diarize=enable_diarization | |
| ) | |
| if result: | |
| full_transcript += result["text"] + "\n" | |
| progress_bar.progress((i + 1) / len(audio_segments)) | |
| os.remove(segment_path) | |
| else: | |
| # 直接轉錄 | |
| if transcription_service == "Whisper": | |
| result = transcribe_audio_whisper( | |
| temp_path, | |
| model_name=whisper_model, | |
| language=language_code, | |
| initial_prompt=context_prompt | |
| ) | |
| else: | |
| result = transcribe_audio_elevenlabs( | |
| api_key=elevenlabs_api_key, | |
| file_path=temp_path, | |
| diarize=enable_diarization | |
| ) | |
| if result: | |
| full_transcript = result["text"] | |
| # 清理原始暫存檔 | |
| os.remove(temp_path) | |
| # 處理轉錄結果 | |
| if full_transcript: | |
| st.subheader("原始轉錄文字") | |
| st.text_area("原始文字", full_transcript, height=200) | |
| # 優化文字 | |
| refined = refine_transcript( | |
| raw_text=full_transcript, | |
| api_key=openai_api_key, | |
| model=model_choice, | |
| temperature=temperature, | |
| context=context_prompt | |
| ) | |
| if refined: | |
| st.subheader("優化後的文字") | |
| st.text_area("修正後的文字", refined["corrected"], height=200) | |
| st.subheader("文字摘要") | |
| st.text_area("摘要", refined["summary"], height=200) | |
| # 更新 token 使用統計(包含兩次 API 呼叫的總和) | |
| current_usage = refined.get("usage", {}) | |
| st.session_state.input_tokens = current_usage.get("total_input_tokens", 0) | |
| st.session_state.output_tokens = current_usage.get("total_output_tokens", 0) | |
| st.session_state.total_tokens = st.session_state.input_tokens + st.session_state.output_tokens | |
| # 顯示費用統計 | |
| st.markdown("---") | |
| st.markdown("### 💰 費用統計") | |
| st.markdown("#### 總計") | |
| st.markdown(f"總 Tokens: **{st.session_state.total_tokens:,}**") | |
| # 計算費用 | |
| total_cost_usd, total_cost_ntd, details = calculate_cost( | |
| st.session_state.input_tokens, | |
| st.session_state.output_tokens, | |
| model_choice, | |
| is_cached=False | |
| ) | |
| st.markdown(f"總費用: **NT$ {total_cost_ntd:.2f}**") | |
| # 顯示詳細成本資訊 | |
| display_cost_info( | |
| st.session_state.input_tokens, | |
| st.session_state.output_tokens, | |
| model_choice, | |
| is_cached=False | |
| ) | |
| else: | |
| st.error("文字優化失敗") | |
| else: | |
| st.error("轉錄失敗") | |
| except Exception as e: | |
| st.error(f"處理失敗:{str(e)}") | |
| logger.error(f"處理失敗:{str(e)}") | |
| if __name__ == "__main__": | |
| main() |