Muhammadidrees commited on
Commit
dda9115
·
verified ·
1 Parent(s): 2d0cff7

Update PaitentVoiceToText.py

Browse files
Files changed (1) hide show
  1. PaitentVoiceToText.py +22 -12
PaitentVoiceToText.py CHANGED
@@ -7,49 +7,59 @@ import gradio as gr
7
  # 1️⃣ Detect GPU
8
  # -------------------
9
  use_cuda = torch.cuda.is_available()
10
- device_index = 0 if use_cuda else -1
11
  dtype = torch.float16 if use_cuda else torch.float32
12
 
 
 
13
  # -------------------
14
  # 2️⃣ Load Whisper model
15
  # -------------------
16
  hub_id = "Muhammadidrees/WispherVOICE"
 
 
17
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
18
  hub_id,
19
  torch_dtype=dtype,
20
- device_map="auto",
 
 
 
 
 
21
  trust_remote_code=True
22
  )
23
- processor = AutoProcessor.from_pretrained(hub_id, trust_remote_code=True)
24
 
 
 
 
25
  pipe = pipeline(
26
  "automatic-speech-recognition",
27
  model=model,
28
  tokenizer=processor.tokenizer,
29
- feature_extractor=processor.feature_extractor,
30
- torch_dtype=dtype,
31
- device=device_index
32
  )
33
 
34
  print("🎧 Whisper pipeline ready.")
35
 
36
-
37
  # -------------------
38
- # 3️⃣ Function to Transcribe Uploaded/Recorded Audio
39
  # -------------------
40
  def transcribe(audio):
41
- # audio = (sr, data) from Gradio microphone
 
 
42
  result = pipe(audio)
43
  return result["text"]
44
 
45
-
46
  # -------------------
47
- # 4️⃣ Gradio Interface
48
  # -------------------
49
  demo = gr.Interface(
50
  fn=transcribe,
51
  inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
52
- outputs="text"
 
 
53
  )
54
 
55
  if __name__ == "__main__":
 
7
  # 1️⃣ Detect GPU
8
  # -------------------
9
  use_cuda = torch.cuda.is_available()
 
10
  dtype = torch.float16 if use_cuda else torch.float32
11
 
12
+ print(f"🌟 Using {'GPU' if use_cuda else 'CPU'}, dtype={dtype}")
13
+
14
  # -------------------
15
  # 2️⃣ Load Whisper model
16
  # -------------------
17
  hub_id = "Muhammadidrees/WispherVOICE"
18
+
19
+ print("⏳ Loading model...")
20
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
21
  hub_id,
22
  torch_dtype=dtype,
23
+ device_map="auto", # accelerate handles device placement
24
+ trust_remote_code=True
25
+ )
26
+
27
+ processor = AutoProcessor.from_pretrained(
28
+ hub_id,
29
  trust_remote_code=True
30
  )
 
31
 
32
+ # -------------------
33
+ # 3️⃣ Create pipeline (no device argument!)
34
+ # -------------------
35
  pipe = pipeline(
36
  "automatic-speech-recognition",
37
  model=model,
38
  tokenizer=processor.tokenizer,
39
+ feature_extractor=processor.feature_extractor
 
 
40
  )
41
 
42
  print("🎧 Whisper pipeline ready.")
43
 
 
44
  # -------------------
45
+ # 4️⃣ Transcription Function
46
  # -------------------
47
  def transcribe(audio):
48
+ # Gradio audio input returns a file path
49
+ if audio is None:
50
+ return "No audio provided."
51
  result = pipe(audio)
52
  return result["text"]
53
 
 
54
  # -------------------
55
+ # 5️⃣ Gradio Interface
56
  # -------------------
57
  demo = gr.Interface(
58
  fn=transcribe,
59
  inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
60
+ outputs="text",
61
+ title="🎤 Whisper Speech-to-Text",
62
+ description="Record or upload audio and get real-time transcription using Whisper."
63
  )
64
 
65
  if __name__ == "__main__":