import gradio as gr
import torch
import torchaudio

# PyTorch Hub se Silero VAD model load karna
model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
                              model='silero_vad',
                              force_reload=False,
                              trust_repo=True)

# Utils se zaroori functions nikalna
(get_speech_timestamps, save_audio, read_audio, VADIterator, collect_chunks) = utils

def remove_silence(audio_path):
    if audio_path is None:
        return None
        
    # Audio file ko read karna (Silero VAD 16000 Hz sample rate par sabse acha kaam karta hai)
    wav = read_audio(audio_path, sampling_rate=16000)
    
    # AI model se un parts ka time pata karna jahan koi bol raha hai
    speech_timestamps = get_speech_timestamps(wav, model, sampling_rate=16000)

    # Agar audio me koi aawaz hi nahi hai
    if not speech_timestamps:
        return audio_path 

    # Sirf speech wale hisson ko jod kar nayi audio file save karna
    output_file = 'processed_audio.wav'
    save_audio(output_file, collect_chunks(speech_timestamps, wav), sampling_rate=16000)
    
    return output_file

# Gradio ka use karke simple User Interface (UI) banana
iface = gr.Interface(
    fn=remove_silence,
    inputs=gr.Audio(type="filepath", label="Raw Audio Upload Karein"),
    outputs=gr.Audio(type="filepath", label="Processed Audio (Without Silence)"),
    title="AI Silence Remover (Silero VAD)",
    description="Apni audio upload karein aur AI automatically non-speech parts ko cut kar dega."
)

# App ko start karna
if __name__ == "__main__":
    iface.launch()