Spaces:
Sleeping
Sleeping
Update PaitentVoiceToText.py
Browse files- PaitentVoiceToText.py +11 -23
PaitentVoiceToText.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
| 1 |
-
#
|
| 2 |
import torch
|
| 3 |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
| 4 |
-
import gradio as gr
|
| 5 |
|
| 6 |
# -------------------
|
| 7 |
-
# 1️⃣ Detect
|
| 8 |
# -------------------
|
| 9 |
use_cuda = torch.cuda.is_available()
|
| 10 |
dtype = torch.float16 if use_cuda else torch.float32
|
|
@@ -20,7 +19,7 @@ print("⏳ Loading model...")
|
|
| 20 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
| 21 |
hub_id,
|
| 22 |
torch_dtype=dtype,
|
| 23 |
-
device_map="auto",
|
| 24 |
trust_remote_code=True
|
| 25 |
)
|
| 26 |
|
|
@@ -42,25 +41,14 @@ pipe = pipeline(
|
|
| 42 |
print("🎧 Whisper pipeline ready.")
|
| 43 |
|
| 44 |
# -------------------
|
| 45 |
-
# 4️⃣
|
| 46 |
# -------------------
|
| 47 |
-
def
|
| 48 |
-
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
| 50 |
return "No audio provided."
|
| 51 |
-
result = pipe(
|
| 52 |
return result["text"]
|
| 53 |
-
|
| 54 |
-
# -------------------
|
| 55 |
-
# 5️⃣ Gradio Interface
|
| 56 |
-
# -------------------
|
| 57 |
-
demo = gr.Interface(
|
| 58 |
-
fn=transcribe,
|
| 59 |
-
inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
|
| 60 |
-
outputs="text",
|
| 61 |
-
title="🎤 Whisper Speech-to-Text",
|
| 62 |
-
description="Record or upload audio and get real-time transcription using Whisper."
|
| 63 |
-
)
|
| 64 |
-
|
| 65 |
-
if __name__ == "__main__":
|
| 66 |
-
demo.launch()
|
|
|
|
| 1 |
+
# PaitentVoiceToText.py
|
| 2 |
import torch
|
| 3 |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
|
|
|
| 4 |
|
| 5 |
# -------------------
|
| 6 |
+
# 1️⃣ Detect device
|
| 7 |
# -------------------
|
| 8 |
use_cuda = torch.cuda.is_available()
|
| 9 |
dtype = torch.float16 if use_cuda else torch.float32
|
|
|
|
| 19 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
| 20 |
hub_id,
|
| 21 |
torch_dtype=dtype,
|
| 22 |
+
device_map="auto", # accelerate manages device placement
|
| 23 |
trust_remote_code=True
|
| 24 |
)
|
| 25 |
|
|
|
|
| 41 |
print("🎧 Whisper pipeline ready.")
|
| 42 |
|
| 43 |
# -------------------
|
| 44 |
+
# 4️⃣ Function for external import
|
| 45 |
# -------------------
|
| 46 |
+
def record_and_transcribe(audio_file):
|
| 47 |
+
"""
|
| 48 |
+
Transcribe an audio file (path) or recording.
|
| 49 |
+
Returns the transcribed text.
|
| 50 |
+
"""
|
| 51 |
+
if audio_file is None:
|
| 52 |
return "No audio provided."
|
| 53 |
+
result = pipe(audio_file)
|
| 54 |
return result["text"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|