Spaces:

wang0507
/

zhuanti

Runtime error

App Files Files Community

zhuanti / app.py

wang0507

Update app.py

88ba053 over 2 years ago

raw

history blame contribute delete

2.29 kB

	import os
	import gradio as gr
	import whisper
	import youtube_dl

	# 從OpenAI的GitHub儲存庫安裝Whisper函式庫
	os.system("pip install git+https://github.com/openai/whisper.git")

	# 載入名為"base"的預先訓練Whisper模型
	model = whisper.load_model("base")

	# 定義使用Whisper進行語言推斷的函數
	def inference_audio(audio):
	audio = whisper.load_audio(audio)
	audio = whisper.pad_or_trim(audio)
	mel = whisper.log_mel_spectrogram(audio).to(model.device)
	_, probs = model.detect_language(mel)
	options = whisper.DecodingOptions(fp16=False)
	result = whisper.decode(model, mel, options)
	return result.text

	def inference_text(text):
	# 在這裡添加處理文本的代碼，以便進行語言推斷
	# 這可能涉及到將文本轉換為音訊，然後使用Whisper進行推斷
	return "文本推斷結果：" + text

	# 定義使用 youtube-dl 提取 YouTube 影片文字的函數
	def extract_text_from_youtube(url):
	ydl_opts = {
	'format': 'bestaudio/best',
	'postprocessors': [{
	'key': 'FFmpegExtractAudio',
	'preferredcodec': 'wav',
	'preferredquality': '192',
	}],
	}
	with youtube_dl.YoutubeDL(ydl_opts) as ydl:
	info_dict = ydl.extract_info(url, download=False)
	video_url = info_dict['formats'][0]['url']
	os.system(f"youtube-dl -x --audio-format wav -o temp.wav {video_url}")

	# 在這裡添加使用 Whisper 或其他語音辨識工具提取文字的代碼
	# 可以參考前面提供的使用 Whisper 的程式碼範例

	# 假設已經有了從音訊提取文字的函數 extract_text_from_audio
	extracted_text = extract_text_from_audio("temp.wav")

	# 刪除暫存的音訊檔案
	os.remove("temp.wav")

	return extracted_text

	# 使用 gr.TabGroup 將兩個介面結合成一個網頁
	tab_group = gr.TabGroup([
	gr.Tab("音訊推斷", gr.Interface(inference_audio, inputs=gr.Audio(type="filepath"), outputs="text")),
	gr.Tab("文字推斷", gr.Interface(inference_text, inputs="text", outputs="text")),
	gr.Tab("YouTube影片", gr.Interface(extract_text_from_youtube, inputs=gr.Textbox(placeholder="輸入 YouTube 影片的網址"), outputs="text")),
	])

	# 啟動 Gradio 介面
	tab_group.launch()