Spaces:
Sleeping
Sleeping
File size: 1,120 Bytes
4ca6263 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | import os
import torch
import librosa
from transformers import AutoProcessor, AutoModelForCTC
# Arabic wav2vec2 CTC model (CPU friendly but heavy)
MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-arabic"
AUDIO_PATH = "sample_trim.wav"
OUT_TXT = os.path.join("output", "asr_raw.txt")
def main():
os.makedirs("output", exist_ok=True)
print("Loading:", MODEL_ID)
processor = AutoProcessor.from_pretrained(MODEL_ID)
model = AutoModelForCTC.from_pretrained(MODEL_ID)
model.eval()
audio, sr = librosa.load(AUDIO_PATH, sr=16000, mono=True)
print("Audio sec:", round(len(audio)/sr, 2))
inputs = processor(audio, sampling_rate=sr, return_tensors="pt", padding=True)
with torch.no_grad():
logits = model(**inputs).logits
pred_ids = torch.argmax(logits, dim=-1)
text = processor.batch_decode(pred_ids)[0].strip()
# Save to file for downstream steps
with open(OUT_TXT, "w", encoding="utf-8") as f:
f.write(text + "\n")
print("\n--- RAW TRANSCRIPTION ---")
print(text)
print(f"\nOK ✅ wrote {OUT_TXT}")
if __name__ == "__main__":
main() |