Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| import librosa | |
| import numpy as np | |
| import noisereduce as nr | |
| from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC | |
| # ------------------------------- | |
| # Load model | |
| # ------------------------------- | |
| MODEL_ID = "infinitejoy/Wav2Vec2-Large-XLSR-53-Assamese" | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"Loading model {MODEL_ID} on {DEVICE}...") | |
| processor = Wav2Vec2Processor.from_pretrained(MODEL_ID) | |
| model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID).to(DEVICE) | |
| print("✅ Model loaded.") | |
| # ------------------------------- | |
| # Audio Preprocessing Function | |
| # ------------------------------- | |
| def preprocess_audio(audio_path): | |
| """ | |
| Preprocess audio by: | |
| - Loading and resampling to 16kHz mono | |
| - Trimming silence | |
| - Reducing background noise | |
| - Normalizing amplitude | |
| """ | |
| # Load audio | |
| audio, sr = librosa.load(audio_path, sr=16000, mono=True) | |
| # Trim silence | |
| audio, _ = librosa.effects.trim(audio) | |
| # Noise reduction | |
| reduced_noise = nr.reduce_noise(y=audio, sr=sr) | |
| # Normalize volume | |
| if np.max(np.abs(reduced_noise)) > 0: | |
| reduced_noise = reduced_noise / np.max(np.abs(reduced_noise)) | |
| return reduced_noise, sr | |
| # ------------------------------- | |
| # Transcription Function | |
| # ------------------------------- | |
| def transcribe(audio_path): | |
| try: | |
| # Preprocess audio | |
| audio, sr = preprocess_audio(audio_path) | |
| # Prepare input for model | |
| inputs = processor(audio, sampling_rate=sr, return_tensors="pt", padding=True) | |
| inputs = {k: v.to(DEVICE) for k, v in inputs.items()} | |
| # Get prediction | |
| with torch.no_grad(): | |
| logits = model(**inputs).logits | |
| predicted_ids = torch.argmax(logits, dim=-1) | |
| transcription = processor.batch_decode(predicted_ids)[0] | |
| return transcription | |
| except Exception as e: | |
| return f"❌ Error: {str(e)}" | |
| # ------------------------------- | |
| # Gradio Interface | |
| # ------------------------------- | |
| demo = gr.Interface( | |
| fn=transcribe, | |
| inputs=gr.Audio(type="filepath", label="Upload audio (wav/mp3)"), | |
| outputs="text", | |
| title="Assamese Transcription by Tanuja and Kritika", | |
| description="Upload an audio file (16kHz recommended). The model will transcribe it to Assamese." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |