Spaces:

Muhammadidrees
/

RiayatechChatDoctor

Sleeping

App Files Files Community

Muhammadidrees commited on Sep 29

Commit

4921c24

verified ·

1 Parent(s): e1b1f98

Update PaitentVoiceToText.py

Browse files

Files changed (1) hide show

PaitentVoiceToText.py +20 -31

PaitentVoiceToText.py CHANGED Viewed

@@ -1,34 +1,27 @@
-# stt.py
 import torch
 from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
-import sounddevice as sd
-import numpy as np
-import scipy.io.wavfile as wav
 # -------------------
 # 1️⃣ Detect GPU
 # -------------------
 use_cuda = torch.cuda.is_available()
 device_index = 0 if use_cuda else -1
-device_str = "cuda" if use_cuda else "cpu"
 dtype = torch.float16 if use_cuda else torch.float32
 # -------------------
-# 2️⃣ Load Whisper model from Hugging Face
 # -------------------
 hub_id = "Muhammadidrees/WispherVOICE"
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
     hub_id,
     torch_dtype=dtype,
-    device_map="auto",  # automatically assigns to GPU if available
     trust_remote_code=True
 )
 processor = AutoProcessor.from_pretrained(hub_id, trust_remote_code=True)
-# -------------------
-# 3️⃣ Setup ASR pipeline
-# -------------------
 pipe = pipeline(
     "automatic-speech-recognition",
     model=model,
@@ -38,30 +31,26 @@ pipe = pipeline(
     device=device_index
 )
-print("🎧 Whisper pipeline ready using Muhammadidrees/WispherVOICE.")
 # -------------------
-# 4️⃣ Record & Transcribe Function
 # -------------------
-def record_and_transcribe(duration=5, samplerate=16000, filename="mic_input.wav") -> str:
-    """
-    Record audio from the microphone, save it as a WAV file,
-    and return the transcribed text using Whisper.
-    """
-    # 1️⃣ Record audio
-    print(f"🎙️ Recording for {duration} seconds...")
-    audio = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1, dtype="float32")
-    sd.wait()
-    audio = np.squeeze(audio)
-    # 2️⃣ Save as WAV
-    wav.write(filename, samplerate, (audio * 32767).astype(np.int16))
-    print(f"✅ Recording saved as {filename}")
-    # 3️⃣ Transcribe
-    result = pipe(filename)
-    text = result["text"]
-    print(f"📝 Transcribed text: {text}")
-    return text

+# stt_gradio.py
 import torch
 from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+import gradio as gr
 # -------------------
 # 1️⃣ Detect GPU
 # -------------------
 use_cuda = torch.cuda.is_available()
 device_index = 0 if use_cuda else -1
 dtype = torch.float16 if use_cuda else torch.float32
 # -------------------
+# 2️⃣ Load Whisper model
 # -------------------
 hub_id = "Muhammadidrees/WispherVOICE"
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
     hub_id,
     torch_dtype=dtype,
+    device_map="auto",
     trust_remote_code=True
 )
 processor = AutoProcessor.from_pretrained(hub_id, trust_remote_code=True)
 pipe = pipeline(
     "automatic-speech-recognition",
     model=model,
     device=device_index
 )
+print("🎧 Whisper pipeline ready.")
 # -------------------
+# 3️⃣ Function to Transcribe Uploaded/Recorded Audio
 # -------------------
+def transcribe(audio):
+    # audio = (sr, data) from Gradio microphone
+    result = pipe(audio)
+    return result["text"]
+# -------------------
+# 4️⃣ Gradio Interface
+# -------------------
+demo = gr.Interface(
+    fn=transcribe,
+    inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
+    outputs="text"
+)
+if __name__ == "__main__":
+    demo.launch()