Spaces:
Sleeping
Sleeping
update app.py
Browse files
app.py
CHANGED
|
@@ -21,46 +21,36 @@ if LANGUAGE not in SUPPORTED_LANGUAGES:
|
|
| 21 |
else:
|
| 22 |
MODEL_PATH = MODEL_PATHS[LANGUAGE]
|
| 23 |
|
| 24 |
-
|
| 25 |
-
def init_pipeline():
|
| 26 |
-
return pipeline(
|
| 27 |
-
"automatic-speech-recognition",
|
| 28 |
-
model=MODEL_PATH,
|
| 29 |
-
device=0 if torch.cuda.is_available() else -1,
|
| 30 |
-
chunk_length_s=30,
|
| 31 |
-
stride_length_s=(4, 2),
|
| 32 |
-
batch_size=8,
|
| 33 |
-
token=os.getenv("HF_TOKEN"),
|
| 34 |
-
)
|
| 35 |
-
|
| 36 |
-
wave2vec_pipeline = init_pipeline()
|
| 37 |
-
|
| 38 |
|
|
|
|
| 39 |
def transcribe_gradio(audio_path: str | None) -> str:
|
| 40 |
-
"""
|
| 41 |
-
Transcribe an uploaded or recorded audio file and report inference time.
|
| 42 |
-
|
| 43 |
-
Args:
|
| 44 |
-
audio_path: Local filesystem path to the audio file provided by Gradio;
|
| 45 |
-
None or empty if the user hasn't recorded/uploaded anything.
|
| 46 |
-
|
| 47 |
-
Returns:
|
| 48 |
-
A string containing either:
|
| 49 |
-
- A warning if no file was provided,
|
| 50 |
-
- An error message if loading/decoding failed,
|
| 51 |
-
- Or the transcript followed by the elapsed inference time.
|
| 52 |
-
"""
|
| 53 |
if not audio_path:
|
| 54 |
return "⚠️ Please record something or choose a file first."
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
start = time.time()
|
| 57 |
try:
|
| 58 |
-
|
| 59 |
-
|
|
|
|
| 60 |
return f"❌ {err}"
|
| 61 |
runtime = time.time() - start
|
| 62 |
-
return f"{transcript}\n\n⌛ Inference time: {runtime:.2f} s"
|
| 63 |
|
|
|
|
| 64 |
|
| 65 |
# gradio interface
|
| 66 |
with gr.Blocks(title="Wave2Vec (Luxembourgish) ", theme="soft", css=CUSTOM_CSS) as demo:
|
|
|
|
| 21 |
else:
|
| 22 |
MODEL_PATH = MODEL_PATHS[LANGUAGE]
|
| 23 |
|
| 24 |
+
_asr_pipeline = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
+
@spaces.GPU
|
| 27 |
def transcribe_gradio(audio_path: str | None) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
if not audio_path:
|
| 29 |
return "⚠️ Please record something or choose a file first."
|
| 30 |
|
| 31 |
+
global _asr_pipeline
|
| 32 |
+
|
| 33 |
+
if _asr_pipeline is None:
|
| 34 |
+
|
| 35 |
+
_asr_pipeline = pipeline(
|
| 36 |
+
"automatic-speech-recognition",
|
| 37 |
+
model=MODEL_PATH,
|
| 38 |
+
device=0 if torch.cuda.is_available() else -1,
|
| 39 |
+
chunk_length_s=30,
|
| 40 |
+
stride_length_s=(4, 2),
|
| 41 |
+
batch_size=8,
|
| 42 |
+
token=os.getenv("HF_TOKEN"),
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
start = time.time()
|
| 46 |
try:
|
| 47 |
+
result = _asr_pipeline(audio_path)
|
| 48 |
+
transcript = result["text"] if isinstance(result, dict) else str(result)
|
| 49 |
+
except Exception as err:
|
| 50 |
return f"❌ {err}"
|
| 51 |
runtime = time.time() - start
|
|
|
|
| 52 |
|
| 53 |
+
return f"{transcript}\n\n⌛ Inference time: {runtime:.2f} s"
|
| 54 |
|
| 55 |
# gradio interface
|
| 56 |
with gr.Blocks(title="Wave2Vec (Luxembourgish) ", theme="soft", css=CUSTOM_CSS) as demo:
|