Dibiddo's picture
Update app.py
bcd57fb verified
raw
history blame
2.82 kB
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from datasets import load_dataset
import spacy
import gradio as gr
# 設置設備和環境變數(如有需要)
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
# Whisper 模型初始化(語音轉文字)
whisper_model_id = "openai/whisper-large-v3"
whisper_model = AutoModelForSpeechSeq2Seq.from_pretrained(
whisper_model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True)
whisper_model.to(device)
whisper_processor = AutoProcessor.from_pretrained(whisper_model_id)
whisper_pipe = pipeline(
"automatic-speech-recognition",
model=whisper_model,
tokenizer=whisper_processor.tokenizer,
feature_extractor=whisper_processor.feature_extractor,
device=device)
# spaCy 初始化(文本分類與標籤)
nlp=None
try:
nlp=spacy.load("en_core_web_sm")
except Exception as e:
print(f"加載spaCy模型時出現錯誤:{e}")
def process_audio(audio_file):
# 語音轉文字
result= whisper_pipe(audio_file)["text"]
# 使用 T5 作為替代模型
messages=[{"role": "user", "content": result}]
deepseek_response=""
try:
from transformers import pipeline
pipe=pipeline("text-generation",model="t5-base")
deepseek_response=pipe(messages)[0]["generated_text"]
# 使用 spaCy 分析文本
doc=nlp(deepseek_response) if nlp is not None else None
entities=[(ent.text, ent.label_) for ent in doc.ents] if doc is not None else []
return {
"Transcription (Whisper)": result,
"AI Response (T5)": deepseek_response,# 修改為 T5 回應以避免與原來不同步
"Extracted Entities (spaCy)": entities}
except Exception as e:
return {
"Transcription (Whister)": result,# 保留原始轉錄內容
}
with gr.Blocks() as app:
with gr.Row():
audio_input=gr.Audio(source="microphone", type="filepath", label="上傳語音")
output_text=gr.JSON(label="結果")
submit_button=gr.Button("提交")
submit_button.click(fn=lambda x: process_audio(x), inputs=[audio_input], outputs=[output_text])
if __name__ == "__main__":
app.launch()