Spaces:

Muhammadidrees
/

RiayatechChatDoctor

Build error

App Files Files Community

Muhammadidrees commited on Sep 29, 2025

Commit

dda9115

verified ·

1 Parent(s): 2d0cff7

Update PaitentVoiceToText.py

Browse files

Files changed (1) hide show

PaitentVoiceToText.py +22 -12

PaitentVoiceToText.py CHANGED Viewed

@@ -7,49 +7,59 @@ import gradio as gr
 # 1️⃣ Detect GPU
 # -------------------
 use_cuda = torch.cuda.is_available()
-device_index = 0 if use_cuda else -1
 dtype = torch.float16 if use_cuda else torch.float32
 # -------------------
 # 2️⃣ Load Whisper model
 # -------------------
 hub_id = "Muhammadidrees/WispherVOICE"
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
     hub_id,
     torch_dtype=dtype,
-    device_map="auto",
     trust_remote_code=True
 )
-processor = AutoProcessor.from_pretrained(hub_id, trust_remote_code=True)
 pipe = pipeline(
     "automatic-speech-recognition",
     model=model,
     tokenizer=processor.tokenizer,
-    feature_extractor=processor.feature_extractor,
-    torch_dtype=dtype,
-    device=device_index
 )
 print("🎧 Whisper pipeline ready.")
 # -------------------
-# 3️⃣ Function to Transcribe Uploaded/Recorded Audio
 # -------------------
 def transcribe(audio):
-    # audio = (sr, data) from Gradio microphone
     result = pipe(audio)
     return result["text"]
 # -------------------
-# 4️⃣ Gradio Interface
 # -------------------
 demo = gr.Interface(
     fn=transcribe,
     inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
-    outputs="text"
 )
 if __name__ == "__main__":

 # 1️⃣ Detect GPU
 # -------------------
 use_cuda = torch.cuda.is_available()
 dtype = torch.float16 if use_cuda else torch.float32
+print(f"🌟 Using {'GPU' if use_cuda else 'CPU'}, dtype={dtype}")
 # -------------------
 # 2️⃣ Load Whisper model
 # -------------------
 hub_id = "Muhammadidrees/WispherVOICE"
+print("⏳ Loading model...")
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
     hub_id,
     torch_dtype=dtype,
+    device_map="auto",          # accelerate handles device placement
+    trust_remote_code=True
+)
+processor = AutoProcessor.from_pretrained(
+    hub_id,
     trust_remote_code=True
 )
+# -------------------
+# 3️⃣ Create pipeline (no device argument!)
+# -------------------
 pipe = pipeline(
     "automatic-speech-recognition",
     model=model,
     tokenizer=processor.tokenizer,
+    feature_extractor=processor.feature_extractor
 )
 print("🎧 Whisper pipeline ready.")
 # -------------------
+# 4️⃣ Transcription Function
 # -------------------
 def transcribe(audio):
+    # Gradio audio input returns a file path
+    if audio is None:
+        return "No audio provided."
     result = pipe(audio)
     return result["text"]
 # -------------------
+# 5️⃣ Gradio Interface
 # -------------------
 demo = gr.Interface(
     fn=transcribe,
     inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
+    outputs="text",
+    title="🎤 Whisper Speech-to-Text",
+    description="Record or upload audio and get real-time transcription using Whisper."
 )
 if __name__ == "__main__":