|
|
import gradio as gr |
|
|
import os |
|
|
from elevenlabs_stt import transcribe_audio_elevenlabs |
|
|
from whisper_stt import transcribe_audio_whisper |
|
|
from transcript_refiner import refine_transcript |
|
|
from utils import calculate_tokens_and_cost, OPENAI_MODELS, MODEL_PRICES |
|
|
|
|
|
def process_audio( |
|
|
audio_file, |
|
|
openai_api_key, |
|
|
elevenlabs_api_key, |
|
|
service_choice, |
|
|
openai_model, |
|
|
language, |
|
|
speaker_detection=False, |
|
|
creativity=0.5 |
|
|
): |
|
|
try: |
|
|
if not openai_api_key or len(openai_api_key) < 20: |
|
|
return "請輸入有效的 OpenAI API 金鑰", "", "", "" |
|
|
|
|
|
if service_choice == "ElevenLabs" and (not elevenlabs_api_key or len(elevenlabs_api_key) < 20): |
|
|
return "請輸入有效的 ElevenLabs API 金鑰", "", "", "" |
|
|
|
|
|
|
|
|
if service_choice == "ElevenLabs": |
|
|
transcript = transcribe_audio_elevenlabs( |
|
|
audio_file, |
|
|
elevenlabs_api_key, |
|
|
language=language, |
|
|
speaker_detection=speaker_detection |
|
|
) |
|
|
else: |
|
|
transcript = transcribe_audio_whisper( |
|
|
audio_file, |
|
|
language=language |
|
|
) |
|
|
|
|
|
|
|
|
refined_text = refine_transcript( |
|
|
transcript, |
|
|
openai_api_key, |
|
|
openai_model, |
|
|
creativity |
|
|
) |
|
|
|
|
|
|
|
|
tokens_info, cost_info = calculate_tokens_and_cost( |
|
|
transcript, |
|
|
refined_text, |
|
|
openai_model |
|
|
) |
|
|
|
|
|
return transcript, refined_text, tokens_info, cost_info |
|
|
|
|
|
except Exception as e: |
|
|
return f"錯誤:{str(e)}", "", "", "" |
|
|
|
|
|
finally: |
|
|
|
|
|
if 'openai_api_key' in locals(): |
|
|
del openai_api_key |
|
|
if 'elevenlabs_api_key' in locals(): |
|
|
del elevenlabs_api_key |
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# 音訊轉文字與優化系統") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
audio_input = gr.Audio( |
|
|
label="上傳音訊檔案", |
|
|
type="filepath" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
openai_key = gr.Textbox( |
|
|
label="OpenAI API 金鑰", |
|
|
placeholder="輸入您的 OpenAI API 金鑰", |
|
|
type="password", |
|
|
value="", |
|
|
every=None |
|
|
) |
|
|
elevenlabs_key = gr.Textbox( |
|
|
label="ElevenLabs API 金鑰", |
|
|
placeholder="輸入您的 ElevenLabs API 金鑰(如果使用 ElevenLabs)", |
|
|
type="password", |
|
|
value="", |
|
|
every=None |
|
|
) |
|
|
|
|
|
service = gr.Radio( |
|
|
choices=["Whisper", "ElevenLabs"], |
|
|
label="選擇轉錄服務", |
|
|
value="Whisper" |
|
|
) |
|
|
|
|
|
model = gr.Dropdown( |
|
|
choices=list(OPENAI_MODELS.keys()), |
|
|
label="選擇 OpenAI 模型", |
|
|
value="gpt-3.5-turbo" |
|
|
) |
|
|
|
|
|
language = gr.Textbox( |
|
|
label="語言(可選)", |
|
|
placeholder="輸入語言代碼,例如:zh-TW、en、ja", |
|
|
value="" |
|
|
) |
|
|
|
|
|
speaker = gr.Checkbox( |
|
|
label="啟用說話者辨識(僅限 ElevenLabs)", |
|
|
value=False |
|
|
) |
|
|
|
|
|
creativity = gr.Slider( |
|
|
minimum=0, |
|
|
maximum=1, |
|
|
value=0.5, |
|
|
label="創意程度" |
|
|
) |
|
|
|
|
|
process_btn = gr.Button("處理音訊") |
|
|
|
|
|
with gr.Column(): |
|
|
original_output = gr.Textbox( |
|
|
label="原始轉錄文字", |
|
|
lines=10 |
|
|
) |
|
|
refined_output = gr.Textbox( |
|
|
label="優化後文字", |
|
|
lines=10 |
|
|
) |
|
|
token_info = gr.Textbox( |
|
|
label="Token 使用資訊", |
|
|
lines=3 |
|
|
) |
|
|
cost_info = gr.Textbox( |
|
|
label="費用資訊", |
|
|
lines=3 |
|
|
) |
|
|
|
|
|
gr.Markdown(""" |
|
|
### 安全性說明 |
|
|
- API 金鑰僅在當前處理中使用 |
|
|
- 不會儲存任何敏感資訊 |
|
|
- 每次使用需重新輸入 API 金鑰 |
|
|
""") |
|
|
|
|
|
|
|
|
process_btn.click( |
|
|
fn=process_audio, |
|
|
inputs=[ |
|
|
audio_input, |
|
|
openai_key, |
|
|
elevenlabs_key, |
|
|
service, |
|
|
model, |
|
|
language, |
|
|
speaker, |
|
|
creativity |
|
|
], |
|
|
outputs=[ |
|
|
original_output, |
|
|
refined_output, |
|
|
token_info, |
|
|
cost_info |
|
|
] |
|
|
) |
|
|
|
|
|
|
|
|
demo.launch() |