Simona-AI / app.py
Elormiden's picture
Update app.py
5533d63 verified
Raw
History Blame Contribute Delete
2.46 kB
# app.py
from __future__ import annotations
import os
import numpy as np
import librosa
import gradio as gr
from transformers import pipeline
import kenlm_asr_pipeline
HF_TOKEN = os.environ.get("HF_TOKEN")
MODEL_ID = "Elormiden/wav2vec2-cypriot-dialect"
KENLM_FILE = "cypriot.klm"
ASR = pipeline(
"automatic-speech-recognition-kenlm",
model=MODEL_ID,
kenlm_filename=KENLM_FILE,
alpha=0.4,
beta=0.9,
token=HF_TOKEN,
device = 0,
model_id_or_path=MODEL_ID,
)
def transcribe(audio: tuple[int, np.ndarray] | None):
if audio is None:
return "Upload or record audio to get a transcription."
sr, data = audio
if isinstance(data, np.ndarray) and data.ndim == 2:
data = data.mean(axis=1)
target_sr = 16000
if sr != target_sr:
data = librosa.resample(data.astype(np.float32), orig_sr=sr, target_sr=target_sr)
sr = target_sr
if data.size == 0 or not np.isfinite(data).all():
return "Could not read this audio. Try another file or recording."
try:
out = ASR(data, sampling_rate=sr)
if isinstance(out, dict):
text = out.get("text", "")
elif isinstance(out, list) and out and isinstance(out[0], dict):
text = out[0].get("text", "")
elif isinstance(out, str):
text = out
else:
text = str(out)
return (text or "").strip() or "No speech detected."
except Exception as e:
return f"Something went wrong. Please try again. ({e})"
DESCRIPTION = """
# Simona AI
**Speech-to-text for Cypriot Greek (Κυπριακά).**
Generic recognizers often miss how people actually speak in Cyprus. Simona turns your recording into text tuned for the **Cypriot dialect** — upload a file or use the microphone.
*Μιλήστε ή ανεβάστε ήχο· λάβετε κείμενο στα Κυπριακά.*
"""
with gr.Blocks(title="Simona AI — Cypriot Greek ASR") as demo:
gr.Markdown(DESCRIPTION)
audio = gr.Audio(
sources=["microphone", "upload"],
type="numpy",
label="Audio",
)
btn = gr.Button("Transcribe", variant="primary")
txt = gr.Textbox(
label="Transcription",
lines=4,
placeholder="Your text will appear here…",
)
btn.click(fn=transcribe, inputs=audio, outputs=txt)
audio.change(fn=transcribe, inputs=audio, outputs=txt)
if __name__ == "__main__":
demo.launch()