Spaces:

Muhammadidrees
/

RiayatechChatDoctor

Sleeping

App Files Files Community

Muhammadidrees commited on Sep 29

Commit

0849418

verified ·

1 Parent(s): b1a4c93

Update PaitentVoiceToText.py

Browse files

Files changed (1) hide show

PaitentVoiceToText.py +67 -70

PaitentVoiceToText.py CHANGED Viewed

@@ -1,70 +1,67 @@
-# stt.py
-import torch
-from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
-import sounddevice as sd
-import numpy as np
-import scipy.io.wavfile as wav
-save_dir = r"C:\Users\JAY\Downloads\model\OpenAIWhisper"
-# Detect GPU
-use_cuda = torch.cuda.is_available()
-device_index = 0 if use_cuda else -1
-device_str = "cuda" if use_cuda else "cpu"
-dtype = torch.float16 if use_cuda else torch.float32
-# Load model
-try:
-    model = AutoModelForSpeechSeq2Seq.from_pretrained(
-        save_dir,
-        torch_dtype=dtype,
-        low_cpu_mem_usage=True,
-        use_safetensors=True,
-        local_files_only=True
-    ).to(device_str)
-    processor = AutoProcessor.from_pretrained(save_dir, local_files_only=True)
-except Exception as e:
-    print("Warning: Local model load failed, falling back to online model:", e)
-    hub_id = "openai/whisper-small"
-    model = AutoModelForSpeechSeq2Seq.from_pretrained(
-        hub_id,
-        torch_dtype=dtype,
-        low_cpu_mem_usage=True,
-        use_safetensors=True,
-    ).to(device_str)
-    processor = AutoProcessor.from_pretrained(hub_id)
-pipe = pipeline(
-    "automatic-speech-recognition",
-    model=model,
-    tokenizer=processor.tokenizer,
-    feature_extractor=processor.feature_extractor,
-    torch_dtype=dtype,
-    device=device_index
-)
-print("Whisper pipeline ready.")
-def record_and_transcribe(duration=5, samplerate=16000, filename="mic_input.wav") -> str:
-    """
-    Record audio from the microphone, save it as a WAV file,
-    and return the transcribed text using Whisper.
-    """
-    # 1️⃣ Record audio
-    print(f"🎙️ Recording for {duration} seconds...")
-    audio = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1, dtype="float32")
-    sd.wait()
-    audio = np.squeeze(audio)
-    # 2️⃣ Save as WAV
-    wav.write(filename, samplerate, (audio * 32767).astype(np.int16))
-    print(f"✅ Recording saved as {filename}")
-    # 3️⃣ Transcribe
-    result = pipe(filename)
-    text = result["text"]
-    print(f"📝 Transcribed text: {text}")
-    return text

+# stt.py
+import torch
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+import sounddevice as sd
+import numpy as np
+import scipy.io.wavfile as wav
+# -------------------
+# 1️⃣ Detect GPU
+# -------------------
+use_cuda = torch.cuda.is_available()
+device_index = 0 if use_cuda else -1
+device_str = "cuda" if use_cuda else "cpu"
+dtype = torch.float16 if use_cuda else torch.float32
+# -------------------
+# 2️⃣ Load Whisper model from Hugging Face
+# -------------------
+hub_id = "Muhammadidrees/WispherVOICE"
+model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    hub_id,
+    torch_dtype=dtype,
+    device_map="auto",  # automatically assigns to GPU if available
+    trust_remote_code=True
+)
+processor = AutoProcessor.from_pretrained(hub_id, trust_remote_code=True)
+# -------------------
+# 3️⃣ Setup ASR pipeline
+# -------------------
+pipe = pipeline(
+    "automatic-speech-recognition",
+    model=model,
+    tokenizer=processor.tokenizer,
+    feature_extractor=processor.feature_extractor,
+    torch_dtype=dtype,
+    device=device_index
+)
+print("🎧 Whisper pipeline ready using Muhammadidrees/WispherVOICE.")
+# -------------------
+# 4️⃣ Record & Transcribe Function
+# -------------------
+def record_and_transcribe(duration=5, samplerate=16000, filename="mic_input.wav") -> str:
+    """
+    Record audio from the microphone, save it as a WAV file,
+    and return the transcribed text using Whisper.
+    """
+    # 1️⃣ Record audio
+    print(f"🎙️ Recording for {duration} seconds...")
+    audio = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1, dtype="float32")
+    sd.wait()
+    audio = np.squeeze(audio)
+    # 2️⃣ Save as WAV
+    wav.write(filename, samplerate, (audio * 32767).astype(np.int16))
+    print(f"✅ Recording saved as {filename}")
+    # 3️⃣ Transcribe
+    result = pipe(filename)
+    text = result["text"]
+    print(f"📝 Transcribed text: {text}")
+    return text