Spaces:

Vikhrmodels
/

Russian_ASR_Leaderboard

Running

App Files Files Community

Russian_ASR_Leaderboard / app.py

Anonumous

Update app.py

fa68614 verified 7 months ago

raw

history blame contribute delete

9.47 kB

	import gradio as gr
	import json

	from constants import INTRODUCTION_TEXT, DATASETS
	from utils import (
	init_repo,
	load_data,
	process_submit,
	get_datasets_description,
	get_metrics_html,
	compute_wer_cer,
	get_submit_html,
	)
	from styles import LEADERBOARD_CSS

	ASSETS_HTML = """
	<link rel="stylesheet" href="https://cdn.datatables.net/1.13.4/css/jquery.dataTables.min.css">
	<script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
	<script src="https://cdn.datatables.net/1.13.4/js/jquery.dataTables.min.js"></script>
	<script>
	window.initLeaderboardDT = function(){
	try{
	if (!window.jQuery \|\| !$.fn.DataTable) return;
	document.querySelectorAll('.leaderboard-table table:not(.dt-ready)').forEach(function(tbl){
	$(tbl).addClass('dt-ready').DataTable({
	pageLength: 25,
	order: [[3, 'asc']],
	language: { url: 'https://cdn.datatables.net/plug-ins/1.13.4/i18n/ru.json' }
	});
	});
	}catch(e){}
	};
	const tryInit = () => window.initLeaderboardDT && window.initLeaderboardDT();
	document.addEventListener('DOMContentLoaded', tryInit);
	const mo = new MutationObserver(tryInit);
	mo.observe(document.documentElement, { childList: true, subtree: true });
	setTimeout(tryInit, 300);
	</script>
	"""

	dataset_info_html = """
	<div class="dataset-info-card">
	<h3>📦 Тестовый набор</h3>
	<p>Все результаты рассчитываются на
	<a href="https://huggingface.co/datasets/Vikhrmodels/RuASRBenchmark" target="_blank">RuASRBenchmark</a>.
	В нём собраны разные сеты с русской речью, чтобы проверять модели на разных типах аудио.</p>
	</div>
	"""

	init_repo()
	gr.set_static_paths(paths=["."])

	with gr.Blocks(css=LEADERBOARD_CSS, theme=gr.themes.Soft()) as demo:
	gr.HTML('<img src="/gradio_api/file=Logo.png" style="display:block; margin:0 auto; width:34%; height:auto;">')
	gr.HTML(ASSETS_HTML)
	gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

	with gr.Tabs():
	with gr.Tab("🏅 Лидерборд"):
	leaderboard_html = gr.HTML(value=load_data(), every=60)

	with gr.Tab("📈 Метрики"):
	gr.HTML(get_metrics_html())
	with gr.Group():
	gr.Markdown("### Песочница: посчитайте WER/CER на своих строках")
	with gr.Row():
	ref = gr.Textbox(label="Референсный текст", lines=2)
	hyp = gr.Textbox(label="Распознанный текст", lines=2)
	with gr.Row():
	normalize = gr.Checkbox(value=True, label="Нормализовать (нижний регистр, без пунктуации)")
	btn_calc = gr.Button("Посчитать")
	with gr.Row():
	out_wer = gr.Number(label="WER, %", precision=2)
	out_cer = gr.Number(label="CER, %", precision=2)

	def _ui_compute(ref_text, hyp_text, norm):
	wer, cer = compute_wer_cer(ref_text or "", hyp_text or "", norm)
	return wer, cer

	btn_calc.click(_ui_compute, inputs=[ref, hyp, normalize], outputs=[out_wer, out_cer])

	with gr.Tab("📊 Датасеты"):
	gr.HTML(dataset_info_html)
	gr.HTML(get_datasets_description())

	with gr.Tab("✉️ Отправить результат"):
	# 1) Кнопка входа через HF
	login = gr.LoginButton(size="md")
	auth_note = gr.HTML()

	# 2) Панель формы, скрыта до входа
	submit_panel = gr.Group(visible=False)
	with submit_panel:
	gr.HTML(get_submit_html())
	with gr.Row():
	with gr.Column():
	model_name = gr.Textbox(label="Название модели *", placeholder="MyAwesomeASRModel")
	link = gr.Textbox(label="Ссылка на модель *", placeholder="https://huggingface.co/username/model")
	license_field = gr.Textbox(label="Лицензия *", placeholder="MIT / Apache-2.0 / Closed")
	with gr.Column():
	metrics_json = gr.TextArea(
	label="Метрики JSON *",
	placeholder='{"Russian_LibriSpeech": {"wer": 0.1234, "cer": 0.0567}, ...}',
	lines=16,
	)

	submit_btn = gr.Button("🚀 Отправить")
	output_msg = gr.HTML()

	def _alert(kind, text):
	return f'<div class="alert {kind}">{text}</div>'

	# 3) Гейт: показать/скрыть форму в зависимости от профиля
	def gate_visibility(profile: gr.OAuthProfile \| None):
	if profile is None:
	return gr.update(visible=False), _alert("error", "Чтобы отправить результат, войдите через Hugging Face выше.")
	return gr.update(visible=True), ""

	# Проверяем на загрузке страницы и после нажатия на LoginButton
	demo.load(gate_visibility, inputs=None, outputs=[submit_panel, auth_note])
	login.click(lambda: None, outputs=None) # сам клик перенаправит на OAuth; по возврату сработает demo.load

	# 4) Авторизованный сабмит: профиль инжектится автоматически
	def build_json_and_submit(name, link_, lic, metrics_str, profile: gr.OAuthProfile \| None):
	if profile is None:
	return gr.update(), _alert("error", "Нужен вход через Hugging Face."), name, link_, lic, metrics_str

	name = (name or "").strip()
	link_ = (link_ or "").strip()
	lic = (lic or "").strip()
	if not name:
	return gr.update(), _alert("error", "Укажите название модели."), name, link_, lic, metrics_str
	if not link_ or not (link_.startswith("http://") or link_.startswith("https://")):
	return gr.update(), _alert("error", "Ссылка должна начинаться с http:// или https://"), name, link_, lic, metrics_str
	if not lic:
	return gr.update(), _alert("error", "Укажите лицензию модели."), name, link_, lic, metrics_str
	try:
	metrics = json.loads(metrics_str)
	except Exception as e:
	return gr.update(), _alert("error", f"Невалидный JSON метрик: {e}"), name, link_, lic, metrics_str
	if not isinstance(metrics, dict):
	return gr.update(), _alert("error", "Метрики должны быть объектом JSON с датасетами верхнего уровня."), name, link_, lic, metrics_str
	missing = [ds for ds in DATASETS if ds not in metrics]
	extra = [k for k in metrics.keys() if k not in DATASETS]
	if missing:
	return gr.update(), _alert("error", f"Отсутствуют датасеты: {', '.join(missing)}"), name, link_, lic, metrics_str
	if extra:
	return gr.update(), _alert("error", f"Лишние ключи в метриках: {', '.join(extra)}"), name, link_, lic, metrics_str
	for ds in DATASETS:
	entry = metrics.get(ds)
	if not isinstance(entry, dict):
	return gr.update(), _alert("error", f"{ds}: значение должно быть объектом с полями wer и cer"), name, link_, lic, metrics_str
	for k in ("wer", "cer"):
	v = entry.get(k)
	if not isinstance(v, (int, float)):
	return gr.update(), _alert("error", f"{ds}: поле {k} должно быть числом"), name, link_, lic, metrics_str
	if not (0 <= float(v) <= 1):
	return gr.update(), _alert("error", f"{ds}: поле {k} должно быть в диапазоне [0, 1]"), name, link_, lic, metrics_str

	payload = json.dumps({"model_name": name, "link": link_, "license": lic, "metrics": metrics}, ensure_ascii=False)
	updated_html, status_msg, _ = process_submit(payload)
	if updated_html is None:
	msg = status_msg.replace("Ошибка:", "").strip()
	return gr.update(), _alert("error", f"Не удалось добавить: {msg}"), name, link_, lic, metrics_str

	return updated_html, _alert("success", "✅ Результат добавлен в лидерборд."), "", "", "", ""

	submit_btn.click(
	build_json_and_submit,
	inputs=[model_name, link, license_field, metrics_json],
	outputs=[leaderboard_html, output_msg, model_name, link, license_field, metrics_json],
	)

	demo.launch()