| import sounddevice as sd | |
| import scipy.io.wavfile as wav | |
| import nemo.collections.asr as nemo_asr | |
| # ===== SETTINGS ===== | |
| SAMPLE_RATE = 16000 | |
| DURATION = 10 # seconds | |
| OUTPUT_FILE = "arabic_recording.wav" | |
| # ===== STEP 1: Record audio ===== | |
| print("ποΈ Recording... Speak Arabic now!") | |
| audio = sd.rec(int(SAMPLE_RATE * DURATION), samplerate=SAMPLE_RATE, channels=1, dtype='int16') | |
| sd.wait() | |
| wav.write(OUTPUT_FILE, SAMPLE_RATE, audio) | |
| print(f"β Recording finished. Saved as {OUTPUT_FILE}") | |
| # ===== STEP 2: Load ASR model ===== | |
| print("π₯ Loading Arabic ASR model...") | |
| asr_model = nemo_asr.models.EncDecCTCModel.restore_from("C:/Users/thegh/Python_Projects/Expertflow/UnderProgress/Peter_Projects/nvidia_asr_eg_conformer_better_than_whisper/stt_ar_fastconformer_hybrid_large_pcd_v1.0.nemo") | |
| # ===== STEP 3: Transcribe ===== | |
| print("π Transcribing...") | |
| transcription = asr_model.transcribe([OUTPUT_FILE]) | |
| print("π Transcription:", transcription[0]) | |