Spaces:
Sleeping
Sleeping
| import os | |
| import torch | |
| import librosa | |
| from transformers import AutoProcessor, AutoModelForCTC | |
| # Arabic wav2vec2 CTC model (CPU friendly but heavy) | |
| MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-arabic" | |
| AUDIO_PATH = "sample_trim.wav" | |
| OUT_TXT = os.path.join("output", "asr_raw.txt") | |
| def main(): | |
| os.makedirs("output", exist_ok=True) | |
| print("Loading:", MODEL_ID) | |
| processor = AutoProcessor.from_pretrained(MODEL_ID) | |
| model = AutoModelForCTC.from_pretrained(MODEL_ID) | |
| model.eval() | |
| audio, sr = librosa.load(AUDIO_PATH, sr=16000, mono=True) | |
| print("Audio sec:", round(len(audio)/sr, 2)) | |
| inputs = processor(audio, sampling_rate=sr, return_tensors="pt", padding=True) | |
| with torch.no_grad(): | |
| logits = model(**inputs).logits | |
| pred_ids = torch.argmax(logits, dim=-1) | |
| text = processor.batch_decode(pred_ids)[0].strip() | |
| # Save to file for downstream steps | |
| with open(OUT_TXT, "w", encoding="utf-8") as f: | |
| f.write(text + "\n") | |
| print("\n--- RAW TRANSCRIPTION ---") | |
| print(text) | |
| print(f"\nOK ✅ wrote {OUT_TXT}") | |
| if __name__ == "__main__": | |
| main() |