import sounddevice as sd
import scipy.io.wavfile as wav
import nemo.collections.asr as nemo_asr

# ===== SETTINGS =====
SAMPLE_RATE = 16000
DURATION = 10  # seconds
OUTPUT_FILE = "arabic_recording.wav"

# ===== STEP 1: Record audio =====
print("🎙️ Recording... Speak Arabic now!")
audio = sd.rec(int(SAMPLE_RATE * DURATION), samplerate=SAMPLE_RATE, channels=1, dtype='int16')
sd.wait()
wav.write(OUTPUT_FILE, SAMPLE_RATE, audio)
print(f"✅ Recording finished. Saved as {OUTPUT_FILE}")

# ===== STEP 2: Load ASR model =====
print("📥 Loading Arabic ASR model...")
asr_model = nemo_asr.models.EncDecCTCModel.restore_from("C:/Users/thegh/Python_Projects/Expertflow/UnderProgress/Peter_Projects/nvidia_asr_eg_conformer_better_than_whisper/stt_ar_fastconformer_hybrid_large_pcd_v1.0.nemo")

# ===== STEP 3: Transcribe =====
print("🔍 Transcribing...")
transcription = asr_model.transcribe([OUTPUT_FILE])
print("📝 Transcription:", transcription[0])