Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| import librosa | |
| import numpy as np | |
| from transformers import WhisperForConditionalGeneration, WhisperProcessor | |
| from peft import PeftModel | |
| print("Loading model...") | |
| processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3") | |
| base_model = WhisperForConditionalGeneration.from_pretrained( | |
| "openai/whisper-large-v3", | |
| torch_dtype=torch.float32, | |
| low_cpu_mem_usage=True | |
| ) | |
| model = PeftModel.from_pretrained(base_model, "Dougsworth/caribbean-whisper-asr") | |
| print("Model loaded!") | |
| def enhance_phone_audio(audio): | |
| """Gentle enhancement for phone audio - just normalize, no filtering""" | |
| # Remove DC offset | |
| audio = audio - np.mean(audio) | |
| # Normalize to [-1, 1] | |
| max_val = np.max(np.abs(audio)) | |
| if max_val > 0: | |
| audio = audio / max_val | |
| # Gentle compression to boost quiet parts | |
| audio = np.sign(audio) * np.power(np.abs(audio), 0.9) | |
| # Normalize again | |
| max_val = np.max(np.abs(audio)) | |
| if max_val > 0: | |
| audio = audio / max_val | |
| return audio.astype(np.float32) | |
| def transcribe(audio_path): | |
| if audio_path is None: | |
| return "Please upload an audio file." | |
| audio, sr = librosa.load(audio_path, sr=16000) | |
| # Apply gentle enhancement | |
| audio = enhance_phone_audio(audio) | |
| inputs = processor(audio, sampling_rate=16000, return_tensors="pt") | |
| with torch.no_grad(): | |
| predicted_ids = model.generate(inputs.input_features, language="en", task="transcribe") | |
| transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] | |
| return transcription | |
| demo = gr.Interface( | |
| fn=transcribe, | |
| inputs=gr.Audio(type="filepath", label="Upload Voice Note"), | |
| outputs=gr.Textbox(label="Job Listing Transcription"), | |
| title="Linkup - Caribbean Speech to Text", | |
| description="Upload a voice note describing a job and get it transcribed. Built for Caribbean accents.", | |
| examples=[] | |
| ) | |
| demo.launch() |