Spaces:

DeepLearning101
/

Speech-Quality-Inspection_Meta-Denoiser

Paused

App Files Files Community

Speech-Quality-Inspection_Meta-Denoiser / app.py

DeepLearning101's picture

DeepLearning101

Update app.py

e86bd31 verified almost 2 years ago

2.69 kB

	import os
	import time
	import json
	import gradio as gr
	import torch
	import torchaudio
	import numpy as np
	from denoiser.demucs import Demucs
	from pydub import AudioSegment

	modelpath = './denoiser/master64.th'

	def transcribe(file_upload, microphone):
	file = microphone if microphone is not None else file_upload
	model = Demucs(hidden=64)
	state_dict = torch.load(modelpath, map_location='cpu')
	model.load_state_dict(state_dict)
	demucs = model
	x, sr = torchaudio.load(file)
	out = demucs(x[None])[0]
	out = out / max(out.abs().max().item(), 1)
	torchaudio.save('enhanced.wav', out, sr)
	enhanced = AudioSegment.from_wav('enhanced.wav') # 只有去完噪的需要降 bitrate 再做語音識別
	enhanced.export('enhanced.wav', format="wav", bitrate="256k")
	return "enhanced.wav"

	demo = gr.Interface(
	fn=transcribe,
	inputs=[
	gr.Audio(type="filepath", label="語音質檢原始音檔"),
	],
	outputs=gr.Audio(type="filepath", label="Output"),
	title="語音質檢/噪音去除 (語音增強)",
	description="""<a href='https://www.twman.org' target='_blank'>TonTon Huang Ph.D. @ 2024/04 </a><br>
	為了提升語音識別的效果，可以在識別前先進行噪音去除<br>
	<a href='https://blog.twman.org/2021/04/ASR.html' target='_blank'>那些語音處理 (Speech Processing) 踩的坑</a> \| <a href='https://blog.twman.org/2021/04/NLP.html' target='_blank'>那些自然語言處理 (Natural Language Processing, NLP) 踩的坑</a><br>
	<a href='https://blog.twman.org/2024/02/asr-tts.html' target='_blank'>那些ASR和TTS可能會踩的坑</a> \| <a href='https://blog.twman.org/2024/02/LLM.html' target='_blank'>那些大模型開發會踩的坑</a><br>
	<a href='https://blog.twman.org/2023/04/GPT.html' target='_blank'>什麼是大語言模型，它是什麼？想要嗎？</a><br>
	<a href='https://blog.twman.org/2023/07/wsl.html' target='_blank'>用PaddleOCR的PPOCRLabel來微調醫療診斷書和收據</a> \| <a href='https://blog.twman.org/2023/07/HugIE.html' target='_blank'>基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析</a><br>
	基於<a href='https://github.com/facebookresearch/denoiser' target='_blank'> Real Time Speech Enhancement in the Waveform Domain (Interspeech 2020)</a>""",
	allow_flagging="never",
	examples=[
	["exampleAudio/15s_2020-03-27_sep1.wav"],
	["exampleAudio/13s_2020-03-27_sep2.wav"],
	["exampleAudio/30s_2020-04-23_sep1.wav"],
	["exampleAudio/15s_2020-04-23_sep2.wav"],
	],
	)

	demo.launch(debug=True)