Spaces:

Elormiden
/

Simona-AI

Running

App Files Files Community

Simona-AI / app.py

Elormiden

Update app.py

5533d63 verified about 1 month ago

Raw

History Blame Contribute Delete

2.46 kB

	# app.py

	from __future__ import annotations
	import os
	import numpy as np
	import librosa
	import gradio as gr
	from transformers import pipeline
	import kenlm_asr_pipeline

	HF_TOKEN = os.environ.get("HF_TOKEN")

	MODEL_ID = "Elormiden/wav2vec2-cypriot-dialect"
	KENLM_FILE = "cypriot.klm"

	ASR = pipeline(
	"automatic-speech-recognition-kenlm",
	model=MODEL_ID,
	kenlm_filename=KENLM_FILE,
	alpha=0.4,
	beta=0.9,
	token=HF_TOKEN,
	device = 0,
	model_id_or_path=MODEL_ID,
	)


	def transcribe(audio: tuple[int, np.ndarray] \| None):
	if audio is None:
	return "Upload or record audio to get a transcription."

	sr, data = audio

	if isinstance(data, np.ndarray) and data.ndim == 2:
	data = data.mean(axis=1)

	target_sr = 16000
	if sr != target_sr:
	data = librosa.resample(data.astype(np.float32), orig_sr=sr, target_sr=target_sr)
	sr = target_sr

	if data.size == 0 or not np.isfinite(data).all():
	return "Could not read this audio. Try another file or recording."

	try:
	out = ASR(data, sampling_rate=sr)
	if isinstance(out, dict):
	text = out.get("text", "")
	elif isinstance(out, list) and out and isinstance(out[0], dict):
	text = out[0].get("text", "")
	elif isinstance(out, str):
	text = out
	else:
	text = str(out)

	return (text or "").strip() or "No speech detected."
	except Exception as e:
	return f"Something went wrong. Please try again. ({e})"


	DESCRIPTION = """
	# Simona AI

	Speech-to-text for Cypriot Greek (Κυπριακά).

	Generic recognizers often miss how people actually speak in Cyprus. Simona turns your recording into text tuned for the Cypriot dialect — upload a file or use the microphone.

	Μιλήστε ή ανεβάστε ήχο· λάβετε κείμενο στα Κυπριακά.
	"""

	with gr.Blocks(title="Simona AI — Cypriot Greek ASR") as demo:
	gr.Markdown(DESCRIPTION)

	audio = gr.Audio(
	sources=["microphone", "upload"],
	type="numpy",
	label="Audio",
	)
	btn = gr.Button("Transcribe", variant="primary")
	txt = gr.Textbox(
	label="Transcription",
	lines=4,
	placeholder="Your text will appear here…",
	)

	btn.click(fn=transcribe, inputs=audio, outputs=txt)
	audio.change(fn=transcribe, inputs=audio, outputs=txt)

	if __name__ == "__main__":
	demo.launch()