Offex's picture
Update app.py
402b5a8 verified
import gradio as gr
import torch
import torchaudio
# PyTorch Hub se Silero VAD model load karna
model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
model='silero_vad',
force_reload=False,
trust_repo=True)
# Utils se zaroori functions nikalna
(get_speech_timestamps, save_audio, read_audio, VADIterator, collect_chunks) = utils
def remove_silence(audio_path):
if audio_path is None:
return None
# Audio file ko read karna (Silero VAD 16000 Hz sample rate par sabse acha kaam karta hai)
wav = read_audio(audio_path, sampling_rate=16000)
# AI model se un parts ka time pata karna jahan koi bol raha hai
speech_timestamps = get_speech_timestamps(wav, model, sampling_rate=16000)
# Agar audio me koi aawaz hi nahi hai
if not speech_timestamps:
return audio_path
# Sirf speech wale hisson ko jod kar nayi audio file save karna
output_file = 'processed_audio.wav'
save_audio(output_file, collect_chunks(speech_timestamps, wav), sampling_rate=16000)
return output_file
# Gradio ka use karke simple User Interface (UI) banana
iface = gr.Interface(
fn=remove_silence,
inputs=gr.Audio(type="filepath", label="Raw Audio Upload Karein"),
outputs=gr.Audio(type="filepath", label="Processed Audio (Without Silence)"),
title="AI Silence Remover (Silero VAD)",
description="Apni audio upload karein aur AI automatically non-speech parts ko cut kar dega."
)
# App ko start karna
if __name__ == "__main__":
iface.launch()