Spaces:

archivartaunik
/

DatasetChecker

Sleeping

App Files Files Community

DatasetChecker / ui /dashboard.py

archivartaunik

Upload 35 files

e82eaee verified about 1 month ago

raw

history blame contribute delete

16.9 kB

	"""Dashboard HTML generation."""
	import html
	import pandas as pd

	from core.state import get_global_results
	from core.comparison import get_all_model_comparison, find_best_model_pair
	from ui.audio import array_to_b64_audio


	def _e(x) -> str:
	"""HTML-escape helper."""
	return html.escape("" if x is None else str(x), quote=True)


	def generate_dashboard_outputs(similarity_threshold: int):
	"""
	Generates the HTML/DF outputs for the dashboard based on global_results.
	Refactored to be used by all analysis functions.
	"""
	global_results = get_global_results()

	df = pd.DataFrame(global_results)

	if df.empty:
	return "", "", pd.DataFrame()

	# Ensure columns exist
	if 'verification_status' not in df.columns:
	df['verification_status'] = None
	if 'model_used' not in df.columns:
	df['model_used'] = "unknown"

	# Statistics
	total_files = len(df)

	# Problematic: below threshold AND NOT verified as correct
	flagged_mask = (df['score'] < similarity_threshold) & (df['verification_status'] != 'correct')
	flagged_count = int(flagged_mask.sum())
	flagged_pct = (flagged_count / total_files * 100) if total_files > 0 else 0.0
	avg_score = df['score'].mean() if len(df) > 0 else 0

	# Model stats
	model_stats = ""
	if 'model_used' in df.columns:
	model_counts = df['model_used'].value_counts().to_dict()

	# Calculate verified counts per model
	verified_df = df[df['verification_status'] == 'correct']
	verified_counts = verified_df['model_used'].value_counts().to_dict() if not verified_df.empty else {}

	stats_items = []
	for m, count in model_counts.items():
	v_count = verified_counts.get(m, 0)
	stats_items.append(f"{_e(m)}: {count} <span style='color: #4ade80; font-weight: bold;'>(✅ {v_count})</span>")

	model_stats_str = " \| ".join(stats_items)

	model_stats = f"""
	<div style="background: rgba(30, 41, 59, 0.8); padding: 15px; border-radius: 12px; margin-bottom: 20px;">
	<p style="color: #94a3b8; margin: 0;">🤖 <strong>Мадэлі:</strong> {model_stats_str}</p>
	</div>
	"""

	stats_html = f"""
	<div style="display: flex; gap: 20px; margin-bottom: 20px;">
	<div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	padding: 20px; border-radius: 12px; text-align: center; flex: 1; color: white;">
	<h3 style="margin: 0; font-size: 2em;">{total_files}</h3>
	<p style="margin: 5px 0 0 0; opacity: 0.9;">📁 Усяго файлаў</p>
	</div>
	<div style="background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
	padding: 20px; border-radius: 12px; text-align: center; flex: 1; color: white;">
	<h3 style="margin: 0; font-size: 2em;">{flagged_count} <span style="font-size: 0.5em; opacity: 0.8;">({flagged_pct:.1f}%)</span></h3>
	<p style="margin: 5px 0 0 0; opacity: 0.9;">🚩 Праблемных</p>
	</div>
	<div style="background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
	padding: 20px; border-radius: 12px; text-align: center; flex: 1; color: white;">
	<h3 style="margin: 0; font-size: 2em;">{avg_score:.1f}%</h3>
	<p style="margin: 5px 0 0 0; opacity: 0.9;">📊 Сярэдні скор</p>
	</div>
	</div>
	{model_stats}
	"""

	# Flagged items HTML
	flagged_df = df[flagged_mask].sort_values(by="score")

	if flagged_df.empty:
	flagged_html = """
	<div style="background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%);
	padding: 30px; border-radius: 12px; text-align: center;">
	<h3 style="color: #2d3748; margin: 0;">✅ Усё добра!</h3>
	<p style="color: #4a5568; margin: 10px 0 0 0;">Файлаў ніжэй парогу не знойдзена (або ўсе правераны).</p>
	</div>
	"""
	else:
	flagged_html = ""
	# LIMIT displayed bad files to prevent Browser Crash (OOM)
	max_display = 50

	if len(flagged_df) > max_display:
	flagged_html += f"""
	<div style="background: #f59e0b; padding: 15px; border-radius: 8px; margin-bottom: 20px; color: white; text-align: center;">
	⚠️ Паказана першыя {max_display} з {len(flagged_df)} праблемных файлаў, каб пазбегнуць перагрузкі браўзера.
	</div>
	"""

	for i, (_, row) in enumerate(flagged_df.iterrows()):
	if i >= max_display:
	break

	flagged_html += _generate_flagged_item_html(row, similarity_threshold)

	# Add minimized rows for manually verified items
	manual_mask = (df['verification_status'].notnull()) & (df['model_used'] == 'manual')
	manual_df = df[manual_mask].sort_values(by="id", ascending=False).head(5)

	if not manual_df.empty:
	flagged_html += """<h4 style="color: #94a3b8; margin: 20px 0 10px 0;">🕒 Апошнія правераныя:</h4>"""
	for _, row in manual_df.iterrows():
	status_icon = "✅" if row['verification_status'] == 'correct' else "❌"
	status_color = "#10b981" if row['verification_status'] == 'correct' else "#ef4444"
	flagged_html += f"""
	<div style="background: rgba(30, 41, 59, 0.4); border-radius: 8px; padding: 10px 15px;
	margin-bottom: 8px; display: flex; justify-content: space-between; align-items: center;
	border-left: 3px solid {status_color};">
	<span style="color: #cbd5e1; font-size: 0.9em;">{_e(row.get('path'))}</span>
	<span style="color: {status_color}; font-weight: bold; font-size: 0.9em;">
	{status_icon} {_e(row.get('verification_status'))} (Score: {int(round(float(row.get('score', 0))))}%)
	</span>
	</div>
	"""

	# Full table
	cols = ['path', 'score', 'model_used', 'verification_status', 'ref_text', 'hyp_text']

	display_df = df.copy()
	if 'verification_status' not in display_df.columns:
	display_df['verification_status'] = None

	def map_status(x):
	if x == 'correct':
	return "✅"
	if x == 'incorrect':
	return "❌"
	return ""

	display_df['verification_status'] = display_df['verification_status'].apply(map_status)
	table_df = display_df[cols].sort_values(by="score")

	return stats_html, flagged_html, table_df


	def _generate_flagged_item_html(row, similarity_threshold: int) -> str:
	"""Generate HTML for a single flagged item."""
	rid = int(row['id']) if pd.notnull(row.get('id')) else -1
	score = float(row['score']) if pd.notnull(row.get('score')) else 0.0
	score_int = int(round(score))
	score_color = "#f5576c" if score < 50 else "#fbbf24" if score < 75 else "#34d399"

	audio_html = array_to_b64_audio(row.get('audio_array'), row.get('sampling_rate'))

	model_used = row.get('model_used', 'unknown')
	model_badge = "🖐️ Ручная праверка" if model_used == 'manual' else _e(model_used)

	# Генерацыя HTML для параўнання мадэлей
	model_comparison_html = ""
	model_results = row.get('model_results', {})
	if model_results and len(model_results) > 1:
	comparison_data = get_all_model_comparison(row)
	best_model = comparison_data.get('best_model', '')

	model_scores_rows = ""
	# Сартаваць па скору
	sorted_models = sorted(model_results.items(), key=lambda x: -x[1].get('score', 0))

	for m_name, m_data in sorted_models:
	m_score = m_data.get('score', 0)
	m_hyp = m_data.get('hyp_text', '')
	is_best = "✅" if m_name == best_model else ""
	score_bg = "#10b981" if m_score >= similarity_threshold else "#f59e0b" if m_score >= 70 else "#ef4444"

	model_scores_rows += f"""
	<tr style="border-bottom: 1px solid #334155;">
	<td style="color: #e2e8f0; padding: 8px 10px; vertical-align: top; white-space: nowrap;">
	<div style="font-weight: bold; margin-bottom: 4px;">{is_best} {_e(m_name)}</div>
	<span style="background: {score_bg}; color: white; padding: 2px 8px; border-radius: 10px; font-size: 0.8em;">{int(m_score)}%</span>
	</td>
	<td style="color: #cbd5e1; padding: 8px 10px; font-family: monospace; font-size: 0.85em; vertical-align: top;">
	{_e(m_hyp)}
	</td>
	</tr>
	"""

	model_comparison_html = f"""
	<details style="color: #94a3b8; margin-bottom: 15px;">
	<summary style="cursor: pointer; color: #60a5fa; margin-bottom: 5px;">📊 Параўнанне мадэлей ({len(model_results)})</summary>
	<div style="background: #0f172a; border-radius: 8px; overflow: hidden; border: 1px solid #334155; margin-top: 5px;">
	<table style="width: 100%; border-collapse: collapse;">
	<thead>
	<tr style="background: #1e293b; border-bottom: 1px solid #475569;">
	<th style="text-align: left; color: #94a3b8; padding: 10px; width: 30%;">Мадэль / Скор</th>
	<th style="text-align: left; color: #94a3b8; padding: 10px;">Тэкст</th>
	</tr>
	</thead>
	<tbody>
	{model_scores_rows}
	</tbody>
	</table>
	</div>
	</details>
	"""

	# Атрымаць лепшы вынік (пара крыніц з найлепшым супадзеннем паміж сабой)
	best_text_html = ""
	ref_text = row.get('ref_text', '')

	if model_results and len(model_results) >= 2:
	# Выклікаем функцыю, якая цяпер шукае лепшае супадзенне ПАМІЖ КРЫНІЦАМІ
	best_pair = find_best_model_pair(row, ref_text)
	if best_pair:
	m1_name = best_pair.get('model1', '')
	m2_name = best_pair.get('model2', '')
	pair_sim = best_pair.get('pair_similarity', 0)
	best_hyp_pair = best_pair.get('best_hyp', '')

	pair_sim_bg = "#10b981" if pair_sim >= 95 else "#f59e0b" if pair_sim >= 80 else "#ef4444"

	best_text_html = f"""
	<div style="background: linear-gradient(135deg, #1e3a5f 0%, #0f172a 100%); border-radius: 8px; padding: 15px; margin-bottom: 10px; border: 1px solid #3b82f6;">
	<p style="color: #60a5fa; margin: 0 0 10px 0; font-size: 0.9em;">
	🏆 Найлепшае супадзенне паміж крыніцамі:
	<span style="background: #475569; color: #e2e8f0; padding: 2px 8px; border-radius: 6px; margin: 0 5px;">{_e(m1_name)}</span>
	↔
	<span style="background: #475569; color: #e2e8f0; padding: 2px 8px; border-radius: 6px; margin: 0 5px;">{_e(m2_name)}</span>
	<span style="background: {pair_sim_bg}; color: white; padding: 2px 8px; border-radius: 10px; font-weight: bold;">{int(pair_sim)}%</span>
	</p>
	<p style="color: #93c5fd; margin: 0; font-family: monospace; font-weight: bold;">{_e(best_hyp_pair)}</p>
	<button type="button" onclick="verifyRecord({rid}, 'update_match')" class="verify-btn" style="background: #3b82f6; margin-top: 10px; width: 100%; font-size: 0.9em;">📝 Замяніць арыгінал і пацвердзіць</button>
	</div>
	"""
	elif model_results and len(model_results) == 1:
	# Толькі адна мадэль
	m_name = list(model_results.keys())[0]
	res = model_results[m_name]
	score = res.get('score', 0)
	hyp = res.get('hyp_text', '')
	score_bg = "#10b981" if score >= similarity_threshold else "#f59e0b" if score >= 70 else "#ef4444"

	best_text_html = f"""
	<div style="background: linear-gradient(135deg, #1e3a5f 0%, #0f172a 100%); border-radius: 8px; padding: 15px; margin-bottom: 10px; border: 1px solid #3b82f6;">
	<p style="color: #60a5fa; margin: 0 0 5px 0; font-size: 0.9em;">
	🏆 Вынік ({_e(m_name)})
	<span style="background: {score_bg}; color: white; padding: 2px 8px; border-radius: 10px; margin-left: 8px;">{int(score)}%</span>
	</p>
	<p style="color: #93c5fd; margin: 0; font-family: monospace; font-weight: bold;">{_e(hyp)}</p>
	<button type="button" onclick="verifyRecord({rid}, 'update_match')" class="verify-btn" style="background: #3b82f6; margin-top: 10px; width: 100%; font-size: 0.9em;">📝 Замяніць арыгінал і пацвердзіць</button>
	</div>
	"""

	# Атрымаць скор і мадэль для бягучага "Распазнана"
	current_hyp = row.get('hyp_text', '')
	current_model = row.get('model_used', 'unknown')
	current_score = float(row.get('score', 0))
	current_score_bg = "#10b981" if current_score >= similarity_threshold else "#f59e0b" if current_score >= 70 else "#ef4444"

	return f"""
	<div style="background: #1e293b; border-radius: 12px; padding: 20px; margin-bottom: 15px;
	border-left: 4px solid {score_color};">
	<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 15px;">
	<div style="display: flex; flex-direction: column;">
	<span style="color: #e2e8f0; font-weight: bold;">📄 {_e(row.get('path'))}</span>
	<div style="margin-top: 5px;">
	<span style="background: #475569; color: #e2e8f0; padding: 3px 8px;
	border-radius: 10px; font-size: 0.8em; margin-right: 8px;">🤖 {model_badge}</span>
	</div>
	</div>
	<span style="background: {score_color}; color: white; padding: 5px 12px;
	border-radius: 20px; font-weight: bold;">{score_int}%</span>
	</div>
	<div style="background: #0f172a; border-radius: 8px; padding: 15px; margin-bottom: 10px;">
	<p style="color: #94a3b8; margin: 0 0 5px 0; font-size: 0.85em;">📝 Арыгінал:</p>
	<p style="color: #f1f5f9; margin: 0; font-family: monospace;">{_e(row.get('ref_text'))}</p>
	</div>
	<div style="background: #0f172a; border-radius: 8px; padding: 15px; margin-bottom: 10px;">
	<p style="color: #94a3b8; margin: 0 0 5px 0; font-size: 0.85em;">
	🎤 Распазнана
	<span style="background: #475569; color: #e2e8f0; padding: 2px 6px; border-radius: 6px; font-size: 0.9em; margin-left: 5px;">{_e(current_model)}</span>
	<span style="background: {current_score_bg}; color: white; padding: 2px 6px; border-radius: 6px; font-size: 0.9em; margin-left: 5px;">{int(current_score)}%</span>
	</p>
	<p style="color: #f1f5f9; margin: 0; font-family: monospace;">{_e(current_hyp)}</p>
	</div>
	{best_text_html}
	<details style="color: #94a3b8; margin-bottom: 10px;">
	<summary style="cursor: pointer; color: #60a5fa;">🔍 Нармалізаваны тэкст</summary>
	<div style="background: #0f172a; border-radius: 8px; padding: 10px; margin-top: 10px;">
	<p style="margin: 5px 0;"><strong>Ref:</strong> {_e(row.get('norm_ref'))}</p>
	<p style="margin: 5px 0;"><strong>Hyp:</strong> {_e(row.get('norm_hyp'))}</p>
	</div>
	</details>
	{model_comparison_html}
	{audio_html}
	<div style="display: flex; gap: 10px; margin-top: 15px; justify-content: flex-end;">
	<button type="button" onclick="verifyRecord({rid}, 'correct')" class="verify-btn correct-btn">✅ Правільна</button>
	<button type="button" onclick="verifyRecord({rid}, 'incorrect')" class="verify-btn incorrect-btn">❌ Няправільна</button>
	</div>
	</div>
	"""