from transformers import AutoModelForSeq2SeqLM,AutoTokenizer import gradio as gr from faster_whisper import WhisperModel import whisper_timestamped as whisper import soundfile as sf from utils import * model = WhisperModel("large-v2") tokenizer = AutoTokenizer.from_pretrained("FinetuneModel/VoidFilter") FinetuneModel = AutoModelForSeq2SeqLM.from_pretrained("FinetuneModel/VoidFilter") def VoidFilter(audio_file): sr,data = audio_file sf.write("new_audio.wav", data, sr) audio = whisper.load_audio("new_audio.wav") segments, _ = model.transcribe("new_audio.wav", word_timestamps=True) word_timestamps = get_word_timestamps(segments) transcription = get_transcription(word_timestamps) print(transcription) filtered_text = filterText(transcription, FinetuneModel, tokenizer) modified_timestamps = get_modified_timestamps(word_timestamps,filtered_text) final_audio, sample_rate = cut_audio(audio, modified_timestamps) sf.write("filtered_audio.wav", final_audio, sample_rate) return "filtered_audio.wav" iface = gr.Interface(fn=VoidFilter, inputs="audio", outputs="audio") iface.launch()