Spaces:
Sleeping
Sleeping
| import warnings | |
| warnings.filterwarnings("ignore") | |
| import librosa # Library for loading and processing audio files. | |
| import numpy as np # Library for numerical computations, used for signal processing. | |
| import gradio as gr # Library for creating a web-based user interface for inference. | |
| from transformers import pipeline # Import pipeline for automatic speech recognition (ASR). | |
| # Importing custom utility functions for text processing. | |
| from text2int import text_to_int # Converts text numbers (e.g., "one") into integers (e.g., 1). | |
| from Text2List import text_to_list # Converts a text string into a list of words. | |
| from convert2list import convert_to_list # Converts processed text into a structured list. | |
| from processDoubles import process_doubles # Handles repeated words or numbers in speech recognition output. | |
| from replaceWords import replace_words # Replaces specific words in the recognized text with alternatives. | |
| from highPassFilter import high_pass_filter # filter noise by bypassing high frequency signals. | |
| from waveletDenoise import wavelet_denoise # used for signal Denoising. | |
| from applyWienerFilter import apply_wiener_filter # for Signal Denoising. | |
| # Initialize ASR model pipeline | |
| asr_model = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-punjabi") | |
| # Function to handle speech recognition | |
| def recognize_speech(audio_file): | |
| audio, sr = librosa.load(audio_file, sr=16000) | |
| audio = high_pass_filter(audio, sr) | |
| audio = apply_wiener_filter(audio) | |
| denoised_audio = wavelet_denoise(audio) | |
| result = asr_model(denoised_audio) | |
| text_value = result['text'] | |
| cleaned_text = text_value.replace("[PAD]", "") | |
| converted_to_list = convert_to_list(cleaned_text, text_to_list()) | |
| processed_doubles = process_doubles(converted_to_list) | |
| replaced_words = replace_words(processed_doubles) | |
| converted_text = text_to_int(replaced_words) | |
| return converted_text | |
| def sel_lng(lng, mic=None, file=None): | |
| if mic is not None: | |
| audio = mic | |
| elif file is not None: | |
| audio = file | |
| else: | |
| return "You must either provide a mic recording or a file" | |
| if lng == "model_1": | |
| return recognize_speech(audio) | |
| # Create a Gradio interface | |
| demo = gr.Interface( | |
| fn=sel_lng, | |
| inputs=[ | |
| gr.Dropdown(["model_1"], label="Select Model"), | |
| gr.Audio(sources=["microphone", "upload"], type="filepath"), | |
| ], | |
| outputs=["textbox"], | |
| title="Automatic Speech Recognition" | |
| ) | |
| demo.launch() | |