Muhammadidrees commited on
Commit
c7de4ce
·
verified ·
1 Parent(s): 93dd230

Update PaitentVoiceToText.py

Browse files
Files changed (1) hide show
  1. PaitentVoiceToText.py +11 -23
PaitentVoiceToText.py CHANGED
@@ -1,10 +1,9 @@
1
- # stt_gradio.py
2
  import torch
3
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
4
- import gradio as gr
5
 
6
  # -------------------
7
- # 1️⃣ Detect GPU
8
  # -------------------
9
  use_cuda = torch.cuda.is_available()
10
  dtype = torch.float16 if use_cuda else torch.float32
@@ -20,7 +19,7 @@ print("⏳ Loading model...")
20
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
21
  hub_id,
22
  torch_dtype=dtype,
23
- device_map="auto", # accelerate handles device placement
24
  trust_remote_code=True
25
  )
26
 
@@ -42,25 +41,14 @@ pipe = pipeline(
42
  print("🎧 Whisper pipeline ready.")
43
 
44
  # -------------------
45
- # 4️⃣ Transcription Function
46
  # -------------------
47
- def transcribe(audio):
48
- # Gradio audio input returns a file path
49
- if audio is None:
 
 
 
50
  return "No audio provided."
51
- result = pipe(audio)
52
  return result["text"]
53
-
54
- # -------------------
55
- # 5️⃣ Gradio Interface
56
- # -------------------
57
- demo = gr.Interface(
58
- fn=transcribe,
59
- inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
60
- outputs="text",
61
- title="🎤 Whisper Speech-to-Text",
62
- description="Record or upload audio and get real-time transcription using Whisper."
63
- )
64
-
65
- if __name__ == "__main__":
66
- demo.launch()
 
1
+ # PaitentVoiceToText.py
2
  import torch
3
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
 
4
 
5
  # -------------------
6
+ # 1️⃣ Detect device
7
  # -------------------
8
  use_cuda = torch.cuda.is_available()
9
  dtype = torch.float16 if use_cuda else torch.float32
 
19
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
20
  hub_id,
21
  torch_dtype=dtype,
22
+ device_map="auto", # accelerate manages device placement
23
  trust_remote_code=True
24
  )
25
 
 
41
  print("🎧 Whisper pipeline ready.")
42
 
43
  # -------------------
44
+ # 4️⃣ Function for external import
45
  # -------------------
46
+ def record_and_transcribe(audio_file):
47
+ """
48
+ Transcribe an audio file (path) or recording.
49
+ Returns the transcribed text.
50
+ """
51
+ if audio_file is None:
52
  return "No audio provided."
53
+ result = pipe(audio_file)
54
  return result["text"]