Spaces:
Sleeping
Sleeping
Yoad
commited on
Commit
·
2f5cf2f
1
Parent(s):
c66d9f1
First commit with actual logic
Browse files- .dockerignore +5 -0
- .gitignore +13 -0
- .python-version +1 -0
- Dockerfile +31 -7
- README.md +15 -6
- pyproject.toml +15 -0
- requirements.txt +0 -3
- src/app.py +407 -0
- src/sample_inputs/eval_results.csv +1 -0
- src/sample_inputs/ivrit_ai_eval_d1.csv +6 -0
- src/st_fixed_container.py +212 -0
- src/streamlit_app.py +0 -40
- src/visual_eval/__init__.py +0 -0
- src/visual_eval/evaluator.py +56 -0
- src/visual_eval/visualization.py +279 -0
- uv.lock +0 -0
.dockerignore
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.venv
|
| 2 |
+
.streamlit
|
| 3 |
+
.git
|
| 4 |
+
.gitignore
|
| 5 |
+
sample_inputs/
|
.gitignore
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.venv
|
| 2 |
+
.streamlit
|
| 3 |
+
|
| 4 |
+
# python
|
| 5 |
+
__pycache__
|
| 6 |
+
*.pyc
|
| 7 |
+
*.pyo
|
| 8 |
+
*.pyd
|
| 9 |
+
*.so
|
| 10 |
+
*.egg-info
|
| 11 |
+
dist
|
| 12 |
+
build
|
| 13 |
+
eggs
|
.python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.11.9
|
Dockerfile
CHANGED
|
@@ -1,6 +1,4 @@
|
|
| 1 |
-
FROM
|
| 2 |
-
|
| 3 |
-
WORKDIR /app
|
| 4 |
|
| 5 |
RUN apt-get update && apt-get install -y \
|
| 6 |
build-essential \
|
|
@@ -9,13 +7,39 @@ RUN apt-get update && apt-get install -y \
|
|
| 9 |
git \
|
| 10 |
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
EXPOSE 8501
|
| 18 |
|
| 19 |
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
|
| 20 |
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim
|
|
|
|
|
|
|
| 2 |
|
| 3 |
RUN apt-get update && apt-get install -y \
|
| 4 |
build-essential \
|
|
|
|
| 7 |
git \
|
| 8 |
&& rm -rf /var/lib/apt/lists/*
|
| 9 |
|
| 10 |
+
RUN useradd -m -u 1000 user
|
| 11 |
+
|
| 12 |
+
USER user
|
| 13 |
+
|
| 14 |
+
ENV HOME=/home/user \
|
| 15 |
+
PATH=/home/user/.local/bin:$PATH
|
| 16 |
+
|
| 17 |
+
WORKDIR $HOME/app
|
| 18 |
+
|
| 19 |
+
ENV UV_COMPILE_BYTECODE=1
|
| 20 |
|
| 21 |
+
# Install the project's dependencies using the lockfile and settings
|
| 22 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 23 |
+
--mount=type=bind,source=uv.lock,target=uv.lock \
|
| 24 |
+
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
|
| 25 |
+
uv sync --frozen --no-install-project --no-dev
|
| 26 |
+
|
| 27 |
+
ADD . $HOME/app
|
| 28 |
+
|
| 29 |
+
# Use uv sync to resolve and install dependencies
|
| 30 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 31 |
+
uv sync --frozen --no-dev
|
| 32 |
+
|
| 33 |
+
# Place executables in the environment at the front of the path
|
| 34 |
+
ENV PATH="$HOME/app/.venv/bin:$PATH"
|
| 35 |
|
| 36 |
EXPOSE 8501
|
| 37 |
|
| 38 |
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
|
| 39 |
|
| 40 |
+
# Reset the entrypoint, don't invoke `uv`
|
| 41 |
+
ENTRYPOINT []
|
| 42 |
+
|
| 43 |
+
ENV PYTHONPATH="$HOME/app/src:$PYTHONPATH"
|
| 44 |
+
|
| 45 |
+
CMD ["uv", "run", "streamlit", "run", "src/app.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
---
|
| 2 |
title: Visualize Eval Results
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: red
|
| 5 |
colorTo: red
|
| 6 |
sdk: docker
|
|
@@ -8,12 +8,21 @@ app_port: 8501
|
|
| 8 |
tags:
|
| 9 |
- streamlit
|
| 10 |
pinned: false
|
| 11 |
-
short_description: Visualize ivrit.ai
|
| 12 |
---
|
| 13 |
|
| 14 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
|
| 17 |
|
| 18 |
-
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
|
| 19 |
-
forums](https://discuss.streamlit.io).
|
|
|
|
| 1 |
---
|
| 2 |
title: Visualize Eval Results
|
| 3 |
+
emoji: 🔍
|
| 4 |
colorFrom: red
|
| 5 |
colorTo: red
|
| 6 |
sdk: docker
|
|
|
|
| 8 |
tags:
|
| 9 |
- streamlit
|
| 10 |
pinned: false
|
| 11 |
+
short_description: Visualize ivrit.ai ASR eval results
|
| 12 |
---
|
| 13 |
|
| 14 |
+
# Hi There 👋
|
| 15 |
+
|
| 16 |
+
Load ivrit.ai ASR eval results CSV file to visualize the results.
|
| 17 |
+
Known Datasets will also allow loading the Audio directly from the HF Hub.
|
| 18 |
+
Supported known datasets are:
|
| 19 |
+
|
| 20 |
+
| Dataset Repo ID + split + reference text feature name | Dataset Config | CSV Output Name |
|
| 21 |
+
| --------------- | -------------- | ----------- |
|
| 22 |
+
| ivrit-ai/eval-d1:test:text | None | ivrit_ai_eval_d1 |
|
| 23 |
+
| upai-inc/saspeech:test:text | None | saspeech |
|
| 24 |
+
| google/fleurs:test:transcription | he_il | fleurs |
|
| 25 |
+
| mozilla-foundation/common_voice_17_0:test:sentence | he | common_voice_17 |
|
| 26 |
+
| imvladikon/hebrew_speech_kan:validation:sentence | None | hebrew_speech_kan |
|
| 27 |
|
|
|
|
| 28 |
|
|
|
|
|
|
pyproject.toml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "vis-asr-eval-results"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "Add your description here"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.11.9"
|
| 7 |
+
dependencies = [
|
| 8 |
+
"hebrew>=0.8.1",
|
| 9 |
+
"huggingface-hub>=0.30.2",
|
| 10 |
+
"jiwer>=3.1.0",
|
| 11 |
+
"pandas>=2.2.3",
|
| 12 |
+
"soundfile>=0.13.1",
|
| 13 |
+
"streamlit>=1.45.0",
|
| 14 |
+
"transformers>=4.51.3",
|
| 15 |
+
]
|
requirements.txt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
altair
|
| 2 |
-
pandas
|
| 3 |
-
streamlit
|
|
|
|
|
|
|
|
|
|
|
|
src/app.py
ADDED
|
@@ -0,0 +1,407 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import streamlit as st
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import jiwer
|
| 5 |
+
import requests
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from st_fixed_container import st_fixed_container
|
| 9 |
+
from visual_eval.visualization import render_visualize_jiwer_result_html
|
| 10 |
+
from visual_eval.evaluator import HebrewTextNormalizer
|
| 11 |
+
|
| 12 |
+
HF_API_TOKEN = None
|
| 13 |
+
try:
|
| 14 |
+
HF_API_TOKEN = st.secrets["HF_API_TOKEN"]
|
| 15 |
+
except FileNotFoundError:
|
| 16 |
+
HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
|
| 17 |
+
has_api_token = HF_API_TOKEN is not None
|
| 18 |
+
|
| 19 |
+
known_datasets = [
|
| 20 |
+
("ivrit-ai/eval-d1:test:text", None, "ivrit_ai_eval_d1"),
|
| 21 |
+
("upai-inc/saspeech:test:text", None, "saspeech"),
|
| 22 |
+
("google/fleurs:test:transcription", "he_il", "fleurs"),
|
| 23 |
+
("mozilla-foundation/common_voice_17_0:test:sentence", "he", "common_voice_17"),
|
| 24 |
+
("imvladikon/hebrew_speech_kan:validation:sentence", None, "hebrew_speech_kan"),
|
| 25 |
+
]
|
| 26 |
+
|
| 27 |
+
# Initialize session state for audio cache if it doesn't exist
|
| 28 |
+
if "audio_cache" not in st.session_state:
|
| 29 |
+
st.session_state.audio_cache = {}
|
| 30 |
+
|
| 31 |
+
if "audio_preview_active" not in st.session_state:
|
| 32 |
+
st.session_state.audio_preview_active = {}
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def on_file_upload():
|
| 36 |
+
st.session_state.audio_cache = {}
|
| 37 |
+
st.session_state.audio_preview_active = {}
|
| 38 |
+
st.session_state.selected_entry_idx = 0
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def display_rtl(html):
|
| 42 |
+
"""Render an RTL container with the provided HTML string"""
|
| 43 |
+
st.markdown(
|
| 44 |
+
f"""
|
| 45 |
+
<div dir="rtl" lang="he">
|
| 46 |
+
{html}
|
| 47 |
+
</div>
|
| 48 |
+
""",
|
| 49 |
+
unsafe_allow_html=True,
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
@st.cache_data
|
| 54 |
+
def calculate_final_metrics(uploaded_file, _df):
|
| 55 |
+
"""Calculate final metrics for all entries
|
| 56 |
+
|
| 57 |
+
Args:
|
| 58 |
+
uploaded_file: The uploaded file object (For cache hash gen)
|
| 59 |
+
_df: The dataframe containing the evaluation results (not included in cache hash)
|
| 60 |
+
|
| 61 |
+
Returns:
|
| 62 |
+
A dictionary containing the final metrics
|
| 63 |
+
"""
|
| 64 |
+
_df = _df.sort_values(by=["id"])
|
| 65 |
+
_df["reference_text"] = _df["reference_text"].fillna("")
|
| 66 |
+
_df["predicted_text"] = _df["predicted_text"].fillna("")
|
| 67 |
+
|
| 68 |
+
# convert to list of dicts
|
| 69 |
+
entries_data = _df.to_dict(orient="records")
|
| 70 |
+
|
| 71 |
+
htn = HebrewTextNormalizer()
|
| 72 |
+
|
| 73 |
+
# Calculate final metrics
|
| 74 |
+
results = jiwer.process_words(
|
| 75 |
+
[htn(entry["reference_text"]) for entry in entries_data],
|
| 76 |
+
[htn(entry["predicted_text"]) for entry in entries_data],
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
return results
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def get_known_dataset_by_output_name(output_name):
|
| 83 |
+
for dataset in known_datasets:
|
| 84 |
+
if dataset[2] == output_name:
|
| 85 |
+
return dataset
|
| 86 |
+
return None
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def get_dataset_entries_audio_urls(dataset, offset=0, max_entries=100):
|
| 90 |
+
if dataset is None or not has_api_token:
|
| 91 |
+
return None
|
| 92 |
+
|
| 93 |
+
dataset_repo_id, dataset_config, _ = dataset
|
| 94 |
+
if not dataset_config:
|
| 95 |
+
dataset_config = "default"
|
| 96 |
+
if ":" in dataset_repo_id:
|
| 97 |
+
dataset_repo_id, split, _ = dataset_repo_id.split(":")
|
| 98 |
+
else:
|
| 99 |
+
split = "test"
|
| 100 |
+
|
| 101 |
+
headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
|
| 102 |
+
api_query_params = {
|
| 103 |
+
"dataset": dataset_repo_id,
|
| 104 |
+
"config": dataset_config,
|
| 105 |
+
"split": split,
|
| 106 |
+
"offset": offset,
|
| 107 |
+
"length": max_entries,
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
query_params_str = "&".join([f"{k}={v}" for k, v in api_query_params.items()])
|
| 111 |
+
API_URL = f"https://datasets-server.huggingface.co/rows?{query_params_str}"
|
| 112 |
+
|
| 113 |
+
def query():
|
| 114 |
+
response = requests.get(API_URL, headers=headers)
|
| 115 |
+
return response.json()
|
| 116 |
+
|
| 117 |
+
data = query()
|
| 118 |
+
|
| 119 |
+
def get_audio_url(row):
|
| 120 |
+
audio_feature_list = row["row"]["audio"]
|
| 121 |
+
first_audio = audio_feature_list[0]
|
| 122 |
+
return first_audio["src"]
|
| 123 |
+
|
| 124 |
+
if "rows" in data and len(data["rows"]) > 0:
|
| 125 |
+
return [get_audio_url(row) for row in data["rows"]]
|
| 126 |
+
else:
|
| 127 |
+
return None
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def get_audio_url_for_entry(
|
| 131 |
+
dataset, entry_idx, cache_neighbors=True, neighbor_range=20
|
| 132 |
+
):
|
| 133 |
+
"""
|
| 134 |
+
Get audio URL for a specific entry and optionally cache neighbors
|
| 135 |
+
|
| 136 |
+
Args:
|
| 137 |
+
dataset: Dataset tuple (repo_id, config, output_name)
|
| 138 |
+
entry_idx: Index of the entry to get audio URL for
|
| 139 |
+
cache_neighbors: Whether to cache audio URLs for neighboring entries
|
| 140 |
+
neighbor_range: Range of neighboring entries to cache
|
| 141 |
+
|
| 142 |
+
Returns:
|
| 143 |
+
Audio URL for the specified entry
|
| 144 |
+
"""
|
| 145 |
+
# Calculate the range of entries to load
|
| 146 |
+
if cache_neighbors:
|
| 147 |
+
start_idx = max(0, entry_idx - neighbor_range)
|
| 148 |
+
max_entries = neighbor_range * 2 + 1
|
| 149 |
+
else:
|
| 150 |
+
start_idx = entry_idx
|
| 151 |
+
max_entries = 1
|
| 152 |
+
|
| 153 |
+
# Get audio URLs for the range of entries
|
| 154 |
+
audio_urls = get_dataset_entries_audio_urls(dataset, start_idx, max_entries)
|
| 155 |
+
|
| 156 |
+
if not audio_urls:
|
| 157 |
+
return None
|
| 158 |
+
|
| 159 |
+
# Cache the audio URLs
|
| 160 |
+
for i, url in enumerate(audio_urls):
|
| 161 |
+
idx = start_idx + i
|
| 162 |
+
# Extract expiration time from URL if available
|
| 163 |
+
expires = None
|
| 164 |
+
if "expires=" in url:
|
| 165 |
+
try:
|
| 166 |
+
expires_param = url.split("expires=")[1].split("&")[0]
|
| 167 |
+
expires = datetime.fromtimestamp(int(expires_param))
|
| 168 |
+
except (ValueError, IndexError):
|
| 169 |
+
expires = None
|
| 170 |
+
|
| 171 |
+
st.session_state.audio_cache[idx] = {"url": url, "expires": expires}
|
| 172 |
+
|
| 173 |
+
# Return the URL for the requested entry
|
| 174 |
+
relative_idx = entry_idx - start_idx
|
| 175 |
+
if 0 <= relative_idx < len(audio_urls):
|
| 176 |
+
return audio_urls[relative_idx]
|
| 177 |
+
return None
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
def get_cached_audio_url(entry_idx):
|
| 181 |
+
"""
|
| 182 |
+
Get audio URL from cache if available and not expired
|
| 183 |
+
|
| 184 |
+
Args:
|
| 185 |
+
entry_idx: Index of the entry to get audio URL for
|
| 186 |
+
|
| 187 |
+
Returns:
|
| 188 |
+
Audio URL if available in cache and not expired, None otherwise
|
| 189 |
+
"""
|
| 190 |
+
if entry_idx not in st.session_state.audio_cache:
|
| 191 |
+
return None
|
| 192 |
+
|
| 193 |
+
cache_entry = st.session_state.audio_cache[entry_idx]
|
| 194 |
+
|
| 195 |
+
# Check if the URL is expired
|
| 196 |
+
if cache_entry["expires"] and datetime.now() > cache_entry["expires"]:
|
| 197 |
+
return None
|
| 198 |
+
|
| 199 |
+
return cache_entry["url"]
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
def main():
|
| 203 |
+
st.set_page_config(
|
| 204 |
+
page_title="ASR Evaluation Visualizer", page_icon="🎤", layout="wide"
|
| 205 |
+
)
|
| 206 |
+
|
| 207 |
+
if not has_api_token:
|
| 208 |
+
st.warning("No Hugging Face API token found. Audio previews will not work.")
|
| 209 |
+
|
| 210 |
+
st.title("ASR Evaluation Visualizer")
|
| 211 |
+
|
| 212 |
+
# File uploader
|
| 213 |
+
uploaded_file = st.file_uploader(
|
| 214 |
+
"Upload evaluation results CSV", type=["csv"], on_change=on_file_upload
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
if uploaded_file is not None:
|
| 218 |
+
# Load the data
|
| 219 |
+
try:
|
| 220 |
+
eval_results = pd.read_csv(uploaded_file)
|
| 221 |
+
st.success("File uploaded successfully!")
|
| 222 |
+
|
| 223 |
+
with st.sidebar:
|
| 224 |
+
# Toggle for calculating total metrics
|
| 225 |
+
show_total_metrics = st.toggle("Show total metrics", value=False)
|
| 226 |
+
|
| 227 |
+
if show_total_metrics:
|
| 228 |
+
total_metrics = calculate_final_metrics(uploaded_file, eval_results)
|
| 229 |
+
|
| 230 |
+
# Display total metrics in a nice format
|
| 231 |
+
with st.container():
|
| 232 |
+
st.metric("WER", f"{total_metrics.wer * 100:.4f}%")
|
| 233 |
+
st.table(
|
| 234 |
+
{
|
| 235 |
+
"Hits": total_metrics.hits,
|
| 236 |
+
"Subs": total_metrics.substitutions,
|
| 237 |
+
"Dels": total_metrics.deletions,
|
| 238 |
+
"Insrt": total_metrics.insertions,
|
| 239 |
+
}
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
# Create sidebar for entry selection
|
| 243 |
+
st.sidebar.header("Select Entry")
|
| 244 |
+
|
| 245 |
+
# Add Next/Prev buttons at the top of the sidebar
|
| 246 |
+
col1, col2 = st.sidebar.columns(2)
|
| 247 |
+
|
| 248 |
+
# Define navigation functions
|
| 249 |
+
def go_prev():
|
| 250 |
+
if st.session_state.selected_entry_idx > 0:
|
| 251 |
+
st.session_state.selected_entry_idx -= 1
|
| 252 |
+
|
| 253 |
+
def go_next():
|
| 254 |
+
if st.session_state.selected_entry_idx < len(eval_results) - 1:
|
| 255 |
+
st.session_state.selected_entry_idx += 1
|
| 256 |
+
|
| 257 |
+
# Add navigation buttons
|
| 258 |
+
col1.button("← Prev", on_click=go_prev, use_container_width=True)
|
| 259 |
+
col2.button("Next →", on_click=go_next, use_container_width=True)
|
| 260 |
+
|
| 261 |
+
# Create a data table with entries and their WER
|
| 262 |
+
entries_data = []
|
| 263 |
+
for i in range(len(eval_results)):
|
| 264 |
+
wer_value = eval_results.iloc[i].get("wer", 0)
|
| 265 |
+
# Format WER as percentage
|
| 266 |
+
wer_formatted = (
|
| 267 |
+
f"{wer_value*100:.2f}%"
|
| 268 |
+
if isinstance(wer_value, (int, float))
|
| 269 |
+
else wer_value
|
| 270 |
+
)
|
| 271 |
+
entries_data.append({"Entry": f"Entry #{i+1}", "WER": wer_formatted})
|
| 272 |
+
|
| 273 |
+
# Create a selection mechanism using radio buttons that look like a table
|
| 274 |
+
st.sidebar.write("Select an entry:")
|
| 275 |
+
|
| 276 |
+
# Use a container for better styling
|
| 277 |
+
entry_container = st.sidebar.container()
|
| 278 |
+
|
| 279 |
+
# Create a radio button for each entry, styled to look like a table row
|
| 280 |
+
entry_container.radio(
|
| 281 |
+
"Select an entry",
|
| 282 |
+
options=list(range(len(eval_results))),
|
| 283 |
+
format_func=lambda i: f"Entry #{i+1} ({entries_data[i]['WER']})",
|
| 284 |
+
label_visibility="collapsed",
|
| 285 |
+
key="selected_entry_idx",
|
| 286 |
+
)
|
| 287 |
+
|
| 288 |
+
# Use the selected entry
|
| 289 |
+
selected_entry = st.session_state.selected_entry_idx
|
| 290 |
+
|
| 291 |
+
# Toggle for normalized vs raw text
|
| 292 |
+
use_normalized = st.sidebar.toggle("Use normalized text", value=True)
|
| 293 |
+
|
| 294 |
+
# Get the text columns based on the toggle
|
| 295 |
+
if use_normalized:
|
| 296 |
+
ref_col, hyp_col = "norm_reference_text", "norm_predicted_text"
|
| 297 |
+
else:
|
| 298 |
+
ref_col, hyp_col = "reference_text", "predicted_text"
|
| 299 |
+
|
| 300 |
+
# Get the reference and hypothesis texts
|
| 301 |
+
ref, hyp = eval_results.iloc[selected_entry][[ref_col, hyp_col]].values
|
| 302 |
+
|
| 303 |
+
st.header("Visualization")
|
| 304 |
+
|
| 305 |
+
# Check if the CSV file is from a known dataset
|
| 306 |
+
dataset_name = None
|
| 307 |
+
|
| 308 |
+
# If no dataset column, try to infer from filename
|
| 309 |
+
if uploaded_file is not None:
|
| 310 |
+
filename_stem = Path(uploaded_file.name).stem
|
| 311 |
+
dataset_name = filename_stem
|
| 312 |
+
|
| 313 |
+
if not dataset_name and "dataset" in eval_results.columns:
|
| 314 |
+
dataset_name = eval_results.iloc[selected_entry]["dataset"]
|
| 315 |
+
|
| 316 |
+
# Get the known dataset if available
|
| 317 |
+
known_dataset = get_known_dataset_by_output_name(dataset_name)
|
| 318 |
+
|
| 319 |
+
# Display audio preview button if from a known dataset
|
| 320 |
+
if known_dataset:
|
| 321 |
+
# Check if we have the audio URL in cache
|
| 322 |
+
audio_url = get_cached_audio_url(selected_entry)
|
| 323 |
+
|
| 324 |
+
audio_preview_active = st.session_state.audio_preview_active.get(
|
| 325 |
+
selected_entry, False
|
| 326 |
+
)
|
| 327 |
+
|
| 328 |
+
preview_audio = False
|
| 329 |
+
if not audio_preview_active:
|
| 330 |
+
# Create a button to preview audio
|
| 331 |
+
preview_audio = st.button("Preview Audio", key="preview_audio")
|
| 332 |
+
|
| 333 |
+
if preview_audio or audio_url:
|
| 334 |
+
st.session_state.audio_preview_active[selected_entry] = True
|
| 335 |
+
with st_fixed_container(
|
| 336 |
+
mode="sticky", position="top", border=True, margin=0
|
| 337 |
+
):
|
| 338 |
+
# If button clicked or we already have the URL, get/use the audio URL
|
| 339 |
+
if not audio_url:
|
| 340 |
+
with st.spinner("Loading audio..."):
|
| 341 |
+
audio_url = get_audio_url_for_entry(
|
| 342 |
+
known_dataset, selected_entry
|
| 343 |
+
)
|
| 344 |
+
|
| 345 |
+
# Display the audio player in the sticky container at the top
|
| 346 |
+
if audio_url:
|
| 347 |
+
st.audio(audio_url)
|
| 348 |
+
else:
|
| 349 |
+
st.error("Failed to load audio for this entry.")
|
| 350 |
+
|
| 351 |
+
# Display the visualization
|
| 352 |
+
html = render_visualize_jiwer_result_html(ref, hyp)
|
| 353 |
+
display_rtl(html)
|
| 354 |
+
|
| 355 |
+
# Display metadata
|
| 356 |
+
st.header("Metadata")
|
| 357 |
+
metadata_cols = [
|
| 358 |
+
"metadata_uuid",
|
| 359 |
+
"model",
|
| 360 |
+
"dataset",
|
| 361 |
+
"dataset_split",
|
| 362 |
+
"engine",
|
| 363 |
+
]
|
| 364 |
+
metadata = eval_results.iloc[selected_entry][metadata_cols]
|
| 365 |
+
|
| 366 |
+
# Create a DataFrame for better display
|
| 367 |
+
metadata_df = pd.DataFrame(
|
| 368 |
+
{"Field": metadata_cols, "Value": metadata.values}
|
| 369 |
+
)
|
| 370 |
+
st.table(metadata_df)
|
| 371 |
+
|
| 372 |
+
# If we have audio URL, display it in the sticky container
|
| 373 |
+
if "audio_url" in locals() and audio_url:
|
| 374 |
+
pass # CSS is now applied globally
|
| 375 |
+
|
| 376 |
+
except Exception as e:
|
| 377 |
+
st.error(f"Error processing file: {str(e)}")
|
| 378 |
+
else:
|
| 379 |
+
st.info(
|
| 380 |
+
"Please upload an evaluation results CSV file to visualize the results."
|
| 381 |
+
)
|
| 382 |
+
st.markdown(
|
| 383 |
+
"""
|
| 384 |
+
### Expected CSV Format
|
| 385 |
+
The CSV should have the following columns:
|
| 386 |
+
- id
|
| 387 |
+
- reference_text
|
| 388 |
+
- predicted_text
|
| 389 |
+
- norm_reference_text
|
| 390 |
+
- norm_predicted_text
|
| 391 |
+
- wer
|
| 392 |
+
- wil
|
| 393 |
+
- substitutions
|
| 394 |
+
- deletions
|
| 395 |
+
- insertions
|
| 396 |
+
- hits
|
| 397 |
+
- metadata_uuid
|
| 398 |
+
- model
|
| 399 |
+
- dataset
|
| 400 |
+
- dataset_split
|
| 401 |
+
- engine
|
| 402 |
+
"""
|
| 403 |
+
)
|
| 404 |
+
|
| 405 |
+
|
| 406 |
+
if __name__ == "__main__":
|
| 407 |
+
main()
|
src/sample_inputs/eval_results.csv
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
id,reference_text,predicted_text,norm_reference_text,norm_predicted_text,wer,wil,substitutions,deletions,insertions,hits,metadata_uuid,model,dataset,dataset_split,engine
|
src/sample_inputs/ivrit_ai_eval_d1.csv
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id,reference_text,predicted_text,norm_reference_text,norm_predicted_text,wer,wil,substitutions,deletions,insertions,hits,metadata_uuid,model,dataset,dataset_split,engine
|
| 2 |
+
1,"אני אוהב לתכנת בפייתון","אני אוהב לתכנת בפיתון","אני אוהב לתכנת בפייתון","אני אוהב לתכנת בפיתון",0.2,0.2,1,0,0,4,12345,whisper-large-v3,ivrit-ai/eval-d1,test,whisper
|
| 3 |
+
2,"שלום עולם","שלום עולם","שלום עולם","שלום עולם",0.0,0.0,0,0,0,2,67890,whisper-large-v3,ivrit-ai/eval-d1,test,whisper
|
| 4 |
+
3,"ברוכים הבאים לישראל","ברוכים הבאים לישראל","ברוכים הבאים לישראל","ברוכים הבאים לישראל",0.0,0.0,0,0,0,3,13579,whisper-large-v3,ivrit-ai/eval-d1,test,whisper
|
| 5 |
+
4,"תל אביב היא עיר יפה","תל אביב היא עיר יפה מאוד","תל אביב היא עיר יפה","תל אביב היא עיר יפה מאוד",0.2,0.2,0,0,1,5,24680,whisper-large-v3,ivrit-ai/eval-d1,test,whisper
|
| 6 |
+
5,"אני גר בירושלים","אני גר בירושלים","אני גר בירושלים","אני גר בירושלים",0.0,0.0,0,0,0,3,97531,whisper-large-v3,ivrit-ai/eval-d1,test,whisper
|
src/st_fixed_container.py
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Literal
|
| 2 |
+
|
| 3 |
+
import streamlit as st
|
| 4 |
+
from streamlit.components.v1 import html
|
| 5 |
+
|
| 6 |
+
"""
|
| 7 |
+
st_fixed_container consist of two parts - fixed container and opaque container.
|
| 8 |
+
Fixed container is a container that is fixed to the top or bottom of the screen.
|
| 9 |
+
|
| 10 |
+
When transparent is set to True, the container is typical `st.container`, which is transparent by default.
|
| 11 |
+
When transparent is set to False, the container is custom opaque_container, that updates its background color to match the background color of the app.
|
| 12 |
+
|
| 13 |
+
Opaque container is a helper class, but can be used to create more custom views. See main for examples.
|
| 14 |
+
|
| 15 |
+
"""
|
| 16 |
+
OPAQUE_CONTAINER_CSS = """
|
| 17 |
+
|
| 18 |
+
:root {{
|
| 19 |
+
--background-color: #ffffff; /* Default background color */
|
| 20 |
+
}}
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
div[data-testid="stVerticalBlockBorderWrapper"]:has(div.opaque-container-{id}):not(:has(div.not-opaque-container)) div[data-testid="stVerticalBlock"]:has(div.opaque-container-{id}):not(:has(div.not-opaque-container)) > div[data-testid="stVerticalBlockBorderWrapper"] {{
|
| 24 |
+
background-color: var(--background-color);
|
| 25 |
+
width: 100%;
|
| 26 |
+
}}
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
div[data-testid="stVerticalBlockBorderWrapper"]:has(div.opaque-container-{id}):not(:has(div.not-opaque-container)) div[data-testid="stVerticalBlock"]:has(div.opaque-container-{id}):not(:has(div.not-opaque-container)) > div[data-testid="element-container"] {{
|
| 31 |
+
display: none;
|
| 32 |
+
}}
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
div[data-testid="stVerticalBlockBorderWrapper"]:has(div.not-opaque-container):not(:has(div[class^='opaque-container-'])) {{
|
| 36 |
+
display: none;
|
| 37 |
+
}}
|
| 38 |
+
""".strip()
|
| 39 |
+
|
| 40 |
+
OPAQUE_CONTAINER_JS = """
|
| 41 |
+
const root = parent.document.querySelector('.stApp');
|
| 42 |
+
let lastBackgroundColor = null;
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
function updateContainerBackground(currentBackground) {
|
| 46 |
+
parent.document.documentElement.style.setProperty('--background-color', currentBackground);
|
| 47 |
+
;
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
function checkForBackgroundColorChange() {
|
| 51 |
+
const style = window.getComputedStyle(root);
|
| 52 |
+
const currentBackgroundColor = style.backgroundColor;
|
| 53 |
+
if (currentBackgroundColor !== lastBackgroundColor) {
|
| 54 |
+
lastBackgroundColor = currentBackgroundColor; // Update the last known value
|
| 55 |
+
updateContainerBackground(lastBackgroundColor);
|
| 56 |
+
}
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
const observerCallback = (mutationsList, observer) => {
|
| 60 |
+
for(let mutation of mutationsList) {
|
| 61 |
+
if (mutation.type === 'attributes' && (mutation.attributeName === 'class' || mutation.attributeName === 'style')) {
|
| 62 |
+
checkForBackgroundColorChange();
|
| 63 |
+
}
|
| 64 |
+
}
|
| 65 |
+
};
|
| 66 |
+
|
| 67 |
+
const main = () => {
|
| 68 |
+
checkForBackgroundColorChange();
|
| 69 |
+
|
| 70 |
+
const observer = new MutationObserver(observerCallback);
|
| 71 |
+
observer.observe(root, { attributes: true, childList: false, subtree: false });
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
// main();
|
| 75 |
+
document.addEventListener("DOMContentLoaded", main);
|
| 76 |
+
""".strip()
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def st_opaque_container(
|
| 80 |
+
*,
|
| 81 |
+
height: int | None = None,
|
| 82 |
+
border: bool | None = None,
|
| 83 |
+
key: str | None = None,
|
| 84 |
+
):
|
| 85 |
+
global opaque_counter
|
| 86 |
+
|
| 87 |
+
opaque_container = st.container()
|
| 88 |
+
non_opaque_container = st.container()
|
| 89 |
+
css = OPAQUE_CONTAINER_CSS.format(id=key)
|
| 90 |
+
with opaque_container:
|
| 91 |
+
html(f"<script>{OPAQUE_CONTAINER_JS}</script>", scrolling=False, height=0)
|
| 92 |
+
st.markdown(f"<style>{css}</style>", unsafe_allow_html=True)
|
| 93 |
+
st.markdown(
|
| 94 |
+
f"<div class='opaque-container-{key}'></div>",
|
| 95 |
+
unsafe_allow_html=True,
|
| 96 |
+
)
|
| 97 |
+
with non_opaque_container:
|
| 98 |
+
st.markdown(
|
| 99 |
+
f"<div class='not-opaque-container'></div>",
|
| 100 |
+
unsafe_allow_html=True,
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
return opaque_container.container(height=height, border=border)
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
FIXED_CONTAINER_CSS = """
|
| 107 |
+
|
| 108 |
+
div[data-testid="stVerticalBlockBorderWrapper"]:has(div.fixed-container-{id}):not(:has(div.not-fixed-container)){{
|
| 109 |
+
background-color: transparent;
|
| 110 |
+
position: {mode};
|
| 111 |
+
width: inherit;
|
| 112 |
+
background-color: inherit;
|
| 113 |
+
{position}: {margin};
|
| 114 |
+
z-index: 999;
|
| 115 |
+
|
| 116 |
+
}}
|
| 117 |
+
|
| 118 |
+
div[data-testid="stVerticalBlockBorderWrapper"]:has(div.fixed-container-{id}):not(:has(div.not-fixed-container)) div[data-testid="stVerticalBlock"]:has(div.fixed-container-{id}):not(:has(div.not-fixed-container)) > div[data-testid="element-container"] {{
|
| 119 |
+
display: none;
|
| 120 |
+
}}
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
div[data-testid="stVerticalBlockBorderWrapper"]:has(div.not-fixed-container):not(:has(div[class^='fixed-container-'])) {{
|
| 124 |
+
display: none;
|
| 125 |
+
}}
|
| 126 |
+
""".strip()
|
| 127 |
+
|
| 128 |
+
MARGINS = {
|
| 129 |
+
"top": "2.875rem",
|
| 130 |
+
"bottom": "0",
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def st_fixed_container(
|
| 135 |
+
*,
|
| 136 |
+
height: int | None = None,
|
| 137 |
+
border: bool | None = None,
|
| 138 |
+
mode: Literal["fixed", "sticky"] = "fixed",
|
| 139 |
+
position: Literal["top", "bottom"] = "top",
|
| 140 |
+
margin: str | None = None,
|
| 141 |
+
transparent: bool = False,
|
| 142 |
+
key: str | None = None,
|
| 143 |
+
):
|
| 144 |
+
if margin is None:
|
| 145 |
+
margin = MARGINS[position]
|
| 146 |
+
global fixed_counter
|
| 147 |
+
fixed_container = st.container()
|
| 148 |
+
non_fixed_container = st.container()
|
| 149 |
+
css = FIXED_CONTAINER_CSS.format(
|
| 150 |
+
mode=mode,
|
| 151 |
+
position=position,
|
| 152 |
+
margin=margin,
|
| 153 |
+
id=key,
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
def render_content():
|
| 157 |
+
with fixed_container:
|
| 158 |
+
if transparent:
|
| 159 |
+
return st.container(height=height, border=border)
|
| 160 |
+
|
| 161 |
+
return st_opaque_container(
|
| 162 |
+
height=height, border=border, key=f"opaque_{key}"
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
def render_non_content():
|
| 166 |
+
with fixed_container:
|
| 167 |
+
st.markdown(f"<style>{css}</style>", unsafe_allow_html=True)
|
| 168 |
+
st.markdown(
|
| 169 |
+
f"<div class='fixed-container-{key}'></div>",
|
| 170 |
+
unsafe_allow_html=True,
|
| 171 |
+
)
|
| 172 |
+
with non_fixed_container:
|
| 173 |
+
st.markdown(
|
| 174 |
+
f"<div class='not-fixed-container'></div>",
|
| 175 |
+
unsafe_allow_html=True,
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
result = None
|
| 179 |
+
|
| 180 |
+
if position == "top":
|
| 181 |
+
result = render_content()
|
| 182 |
+
render_non_content()
|
| 183 |
+
else:
|
| 184 |
+
render_non_content()
|
| 185 |
+
result = render_content()
|
| 186 |
+
|
| 187 |
+
return result
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
if __name__ == "__main__":
|
| 191 |
+
for i in range(30):
|
| 192 |
+
st.write(f"Line {i}")
|
| 193 |
+
|
| 194 |
+
# with st_fixed_container(mode="sticky", position="bottom", border=True):
|
| 195 |
+
# with st_fixed_container(mode="sticky", position="top", border=True):
|
| 196 |
+
# with st_fixed_container(mode="fixed", position="bottom", border=True):
|
| 197 |
+
with st_fixed_container(mode="fixed", position="top", border=True):
|
| 198 |
+
st.write("This is a fixed container.")
|
| 199 |
+
st.write("This is a fixed container.")
|
| 200 |
+
st.write("This is a fixed container.")
|
| 201 |
+
|
| 202 |
+
# The following code creates a small control panel on the right side of the screen with two buttons inside it:
|
| 203 |
+
with st_fixed_container(mode="fixed", position="bottom", transparent=True):
|
| 204 |
+
_, right = st.columns([0.7, 0.3])
|
| 205 |
+
with right:
|
| 206 |
+
with st_opaque_container(border=True):
|
| 207 |
+
st.button("Feedback", use_container_width=True)
|
| 208 |
+
st.button("Clean up", use_container_width=True)
|
| 209 |
+
|
| 210 |
+
st.container(border=True).write("This is a regular container.")
|
| 211 |
+
for i in range(30):
|
| 212 |
+
st.write(f"Line {i}")
|
src/streamlit_app.py
DELETED
|
@@ -1,40 +0,0 @@
|
|
| 1 |
-
import altair as alt
|
| 2 |
-
import numpy as np
|
| 3 |
-
import pandas as pd
|
| 4 |
-
import streamlit as st
|
| 5 |
-
|
| 6 |
-
"""
|
| 7 |
-
# Welcome to Streamlit!
|
| 8 |
-
|
| 9 |
-
Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
|
| 10 |
-
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
|
| 11 |
-
forums](https://discuss.streamlit.io).
|
| 12 |
-
|
| 13 |
-
In the meantime, below is an example of what you can do with just a few lines of code:
|
| 14 |
-
"""
|
| 15 |
-
|
| 16 |
-
num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
|
| 17 |
-
num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
|
| 18 |
-
|
| 19 |
-
indices = np.linspace(0, 1, num_points)
|
| 20 |
-
theta = 2 * np.pi * num_turns * indices
|
| 21 |
-
radius = indices
|
| 22 |
-
|
| 23 |
-
x = radius * np.cos(theta)
|
| 24 |
-
y = radius * np.sin(theta)
|
| 25 |
-
|
| 26 |
-
df = pd.DataFrame({
|
| 27 |
-
"x": x,
|
| 28 |
-
"y": y,
|
| 29 |
-
"idx": indices,
|
| 30 |
-
"rand": np.random.randn(num_points),
|
| 31 |
-
})
|
| 32 |
-
|
| 33 |
-
st.altair_chart(alt.Chart(df, height=700, width=700)
|
| 34 |
-
.mark_point(filled=True)
|
| 35 |
-
.encode(
|
| 36 |
-
x=alt.X("x", axis=None),
|
| 37 |
-
y=alt.Y("y", axis=None),
|
| 38 |
-
color=alt.Color("idx", legend=None, scale=alt.Scale()),
|
| 39 |
-
size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
|
| 40 |
-
))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/visual_eval/__init__.py
ADDED
|
File without changes
|
src/visual_eval/evaluator.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Evaluator module.
|
| 3 |
+
Provides functions to evaluate a given model on a dataset sample using the Faster Whisper model,
|
| 4 |
+
and generate HTML visualization blocks of the word alignment.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import concurrent.futures
|
| 8 |
+
import gc
|
| 9 |
+
import io
|
| 10 |
+
import queue
|
| 11 |
+
import threading
|
| 12 |
+
from typing import Dict, Generator, List
|
| 13 |
+
|
| 14 |
+
import soundfile as sf
|
| 15 |
+
from hebrew import Hebrew
|
| 16 |
+
from tqdm import tqdm
|
| 17 |
+
from transformers.models.whisper.english_normalizer import BasicTextNormalizer
|
| 18 |
+
|
| 19 |
+
from visual_eval.visualization import render_visualize_jiwer_result_html
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class HebrewTextNormalizer(BasicTextNormalizer):
|
| 23 |
+
def __init__(self, *args, **kwargs):
|
| 24 |
+
super().__init__(*args, **kwargs)
|
| 25 |
+
|
| 26 |
+
superfluous_chars_to_remove = "\u061c" # Arabic letter mark
|
| 27 |
+
superfluous_chars_to_remove += (
|
| 28 |
+
"\u200b\u200c\u200d" # Zero-width space, non-joiner, joiner
|
| 29 |
+
)
|
| 30 |
+
superfluous_chars_to_remove += "\u200e\u200f" # LTR and RTL marks
|
| 31 |
+
superfluous_chars_to_remove += (
|
| 32 |
+
"\u202a\u202b\u202c\u202d\u202e" # LTR/RTL embedding, pop, override
|
| 33 |
+
)
|
| 34 |
+
superfluous_chars_to_remove += "\u2066\u2067\u2068\u2069" # Isolate controls
|
| 35 |
+
superfluous_chars_to_remove += "\ufeff" # Zero-width no-break space
|
| 36 |
+
self.superfluous_hebrew_unicode_symbols_translator = str.maketrans(
|
| 37 |
+
{ord(c): None for c in superfluous_chars_to_remove}
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
self.quotes_translator = str.maketrans({ord(c): None for c in "\"'"})
|
| 41 |
+
|
| 42 |
+
def __remove_niqqud(self, text: str) -> str:
|
| 43 |
+
return Hebrew(text).no_niqqud().string
|
| 44 |
+
|
| 45 |
+
def __remove_superfluous_hebrew_unicode_symbols(self, text: str) -> str:
|
| 46 |
+
return text.translate(self.superfluous_hebrew_unicode_symbols_translator)
|
| 47 |
+
|
| 48 |
+
def __remove_quotes(self, text: str) -> str:
|
| 49 |
+
return text.translate(self.quotes_translator)
|
| 50 |
+
|
| 51 |
+
def __call__(self, text):
|
| 52 |
+
text = self.__remove_niqqud(text)
|
| 53 |
+
text = self.__remove_superfluous_hebrew_unicode_symbols(text)
|
| 54 |
+
text = self.__remove_quotes(text)
|
| 55 |
+
text = super().__call__(text)
|
| 56 |
+
return text
|
src/visual_eval/visualization.py
ADDED
|
@@ -0,0 +1,279 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Visualization module.
|
| 3 |
+
Provides functions to render HTML visualizations of word alignment between reference and hypothesis texts,
|
| 4 |
+
and to generate the complete results HTML page with an embedded audio element and progress status.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from itertools import zip_longest
|
| 8 |
+
from jiwer import process_words
|
| 9 |
+
import hashlib
|
| 10 |
+
|
| 11 |
+
def render_visualize_jiwer_result_html(ref: str, hyp: str, title: str = "", model_id: str = None) -> str:
|
| 12 |
+
"""
|
| 13 |
+
Generate an HTML visualization of the alignment between reference and hypothesis texts.
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
ref: The reference text.
|
| 17 |
+
hyp: The hypothesis (transcribed) text.
|
| 18 |
+
title: A title for the evaluation block (e.g., model name).
|
| 19 |
+
model_id: A unique identifier for the model (used in word IDs).
|
| 20 |
+
|
| 21 |
+
Returns:
|
| 22 |
+
An HTML string visualizing word-level alignments and error metrics.
|
| 23 |
+
"""
|
| 24 |
+
# Use the title as model_id if none provided
|
| 25 |
+
if model_id is None:
|
| 26 |
+
model_id = hashlib.md5(title.encode()).hexdigest()[:8]
|
| 27 |
+
|
| 28 |
+
# Process word alignment via jiwer
|
| 29 |
+
word_output = process_words(ref, hyp)
|
| 30 |
+
alignment_chunks = word_output.alignments[0]
|
| 31 |
+
|
| 32 |
+
columns = []
|
| 33 |
+
ref_position = 0 # This tracks the position in the reference text
|
| 34 |
+
|
| 35 |
+
for chunk in alignment_chunks:
|
| 36 |
+
if chunk.type == "equal":
|
| 37 |
+
words = word_output.references[0][chunk.ref_start_idx : chunk.ref_end_idx]
|
| 38 |
+
for word in words:
|
| 39 |
+
ref_cell = f'<span class="word-item ref-word" data-ref-pos="{ref_position}" data-ref-word="{word}">{word}</span>'
|
| 40 |
+
hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{ref_position}" data-ref-word="{word}">{word}</span>'
|
| 41 |
+
columns.append((ref_cell, hyp_cell, ref_position))
|
| 42 |
+
ref_position += 1
|
| 43 |
+
|
| 44 |
+
elif chunk.type == "delete":
|
| 45 |
+
words = word_output.references[0][chunk.ref_start_idx : chunk.ref_end_idx]
|
| 46 |
+
for word in words:
|
| 47 |
+
ref_cell = f'<span class="word-item ref-word" data-ref-pos="{ref_position}" data-ref-word="{word}">{word}</span>'
|
| 48 |
+
hyp_cell = '<span style="background-color: #ffb3d7; padding: 0 4px;"> </span>'
|
| 49 |
+
columns.append((ref_cell, hyp_cell, ref_position))
|
| 50 |
+
ref_position += 1
|
| 51 |
+
|
| 52 |
+
elif chunk.type == "insert":
|
| 53 |
+
words = word_output.hypotheses[0][chunk.hyp_start_idx : chunk.hyp_end_idx]
|
| 54 |
+
# For inserted words, they are linked to the previous reference position
|
| 55 |
+
# If we're at the beginning, use position 0
|
| 56 |
+
last_ref_pos = max(0, ref_position - 1) if ref_position > 0 else 0
|
| 57 |
+
for word in words:
|
| 58 |
+
ref_cell = '<span> </span>'
|
| 59 |
+
hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{last_ref_pos}" data-inserted="true" style="background-color: #99f7c8; padding: 0 4px;">{word}</span>'
|
| 60 |
+
columns.append((ref_cell, hyp_cell, last_ref_pos))
|
| 61 |
+
# Note: ref_position is NOT incremented for inserts
|
| 62 |
+
|
| 63 |
+
elif chunk.type == "substitute":
|
| 64 |
+
ref_words = word_output.references[0][chunk.ref_start_idx : chunk.ref_end_idx]
|
| 65 |
+
hyp_words = word_output.hypotheses[0][chunk.hyp_start_idx : chunk.hyp_end_idx]
|
| 66 |
+
|
| 67 |
+
for ref_word, hyp_word in zip_longest(ref_words, hyp_words, fillvalue=""):
|
| 68 |
+
if ref_word: # Only increment position for actual reference words
|
| 69 |
+
ref_cell = f'<span class="word-item ref-word" data-ref-pos="{ref_position}" data-ref-word="{ref_word}" style="background-color: #dddddd;">{ref_word}</span>'
|
| 70 |
+
if hyp_word:
|
| 71 |
+
hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{ref_position}" data-subst="true" style="background-color: #ffc04d; padding: 0 4px;">{hyp_word}</span>'
|
| 72 |
+
else:
|
| 73 |
+
hyp_cell = '<span style="background-color: #ffb3d7; padding: 0 4px;"> </span>'
|
| 74 |
+
columns.append((ref_cell, hyp_cell, ref_position))
|
| 75 |
+
ref_position += 1
|
| 76 |
+
elif hyp_word: # Extra hypothesis words with no reference pair
|
| 77 |
+
# Link to previous reference position
|
| 78 |
+
last_ref_pos = max(0, ref_position - 1)
|
| 79 |
+
ref_cell = '<span> </span>'
|
| 80 |
+
hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{last_ref_pos}" data-inserted="true" style="background-color: #99f7c8; padding: 0 4px;">{hyp_word}</span>'
|
| 81 |
+
columns.append((ref_cell, hyp_cell, last_ref_pos))
|
| 82 |
+
|
| 83 |
+
# Create HTML visualization
|
| 84 |
+
html_blocks = []
|
| 85 |
+
metrics_results_str = f"WER: {word_output.wer * 100:0.04f}%, WIL: {word_output.wil * 100:0.04f}%"
|
| 86 |
+
summary_operations_str = f"Subs: {word_output.substitutions}, Dels: {word_output.deletions}, Insrt: {word_output.insertions}"
|
| 87 |
+
|
| 88 |
+
html_blocks.append(
|
| 89 |
+
f"<div dir='ltr' class='model-result' data-model-id='{model_id}' style='font-size: 1.25em; margin-bottom: 10px; display: flex; justify-content: space-between; gap: 1.5em;'>"
|
| 90 |
+
f"<div style='flex: 0 0 content;'>{metrics_results_str}</div>"
|
| 91 |
+
f"<div>{title}</div>"
|
| 92 |
+
f"<div style='flex: 0 0 content;'>{summary_operations_str}</div></div>"
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
flex_container = f'<div class="word-alignment-container" data-model-id="{model_id}" style="display: flex; flex-wrap: wrap; margin-bottom: 10px;">'
|
| 96 |
+
for ref_cell, hyp_cell, ref_pos in columns:
|
| 97 |
+
cell_html = (
|
| 98 |
+
f'<div class="word-pair" data-ref-pos="{ref_pos}" style="display: flex; flex-direction: column; align-items: center; border-bottom: 1px solid grey; '
|
| 99 |
+
'padding-left: 1em; font-family: monospace;">'
|
| 100 |
+
f'<div style="text-align: center;">{ref_cell}</div>'
|
| 101 |
+
f'<div style="text-align: center;">{hyp_cell}</div>'
|
| 102 |
+
'</div>'
|
| 103 |
+
)
|
| 104 |
+
flex_container += cell_html
|
| 105 |
+
flex_container += '</div>'
|
| 106 |
+
html_blocks.append(flex_container)
|
| 107 |
+
|
| 108 |
+
html_string = f'<div class="model-block" data-model-id="{model_id}" style="background: white; color: black; margin-bottom: 20px;">' + "\n".join(html_blocks) + '</div>'
|
| 109 |
+
|
| 110 |
+
return html_string
|
| 111 |
+
|
| 112 |
+
def generate_results_html(dataset_description: str, html_blocks: list, audio_file: str, timestamp: str, progress: tuple = None) -> str:
|
| 113 |
+
"""
|
| 114 |
+
Generate the complete HTML results page including an audio player, all evaluation blocks, and progress status.
|
| 115 |
+
|
| 116 |
+
Args:
|
| 117 |
+
dataset_description: A string describing the dataset.
|
| 118 |
+
html_blocks: A list of HTML strings (one per model evaluation).
|
| 119 |
+
audio_file: The filename of the saved audio sample.
|
| 120 |
+
timestamp: The timestamp string used in titles.
|
| 121 |
+
progress: A tuple (done, total) indicating the number of models evaluated so far.
|
| 122 |
+
|
| 123 |
+
Returns:
|
| 124 |
+
A complete HTML document as a string.
|
| 125 |
+
"""
|
| 126 |
+
progress_html = ""
|
| 127 |
+
auto_scroll_to_bottom_on_load = ""
|
| 128 |
+
if progress:
|
| 129 |
+
done, total = progress
|
| 130 |
+
progress_html = f"<div style='margin-bottom:20px;'><strong>Progress:</strong> {done} of {total} models evaluated.</div>"
|
| 131 |
+
if done < total:
|
| 132 |
+
auto_scroll_to_bottom_on_load = """
|
| 133 |
+
<script type="text/javascript">
|
| 134 |
+
document.getElementById('results-container').scrollTop = document.getElementById('results-container').scrollHeight;
|
| 135 |
+
</script>
|
| 136 |
+
"""
|
| 137 |
+
|
| 138 |
+
refresh_page_control = """
|
| 139 |
+
<button onclick="location.reload();">Refresh Page</button>
|
| 140 |
+
"""
|
| 141 |
+
audio_element = f"""
|
| 142 |
+
<div style="margin-bottom: 20px;">
|
| 143 |
+
<audio controls>
|
| 144 |
+
<source src="{audio_file}" type="audio/mp3">
|
| 145 |
+
Your browser does not support the audio element.
|
| 146 |
+
</audio>
|
| 147 |
+
</div>
|
| 148 |
+
"""
|
| 149 |
+
|
| 150 |
+
# Add JavaScript for reference-based word highlighting with sticky functionality
|
| 151 |
+
highlighting_js = """
|
| 152 |
+
<script type="text/javascript">
|
| 153 |
+
document.addEventListener('DOMContentLoaded', function() {
|
| 154 |
+
// Track the currently selected reference position
|
| 155 |
+
let selectedRefPos = null;
|
| 156 |
+
|
| 157 |
+
// Helper function to apply highlighting
|
| 158 |
+
function highlightPosition(refPos, isSticky = false) {
|
| 159 |
+
// Apply highlighting style
|
| 160 |
+
const highlightStyle = 'underline';
|
| 161 |
+
|
| 162 |
+
// Highlight all elements with the matching reference position
|
| 163 |
+
document.querySelectorAll(`.word-item[data-ref-pos="${refPos}"]`).forEach(el => {
|
| 164 |
+
el.style.textDecoration = highlightStyle;
|
| 165 |
+
el.style.textDecorationThickness = '2px';
|
| 166 |
+
el.style.textDecorationColor = isSticky ? 'red' : 'blue';
|
| 167 |
+
});
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
// Helper function to remove highlighting
|
| 171 |
+
function removeHighlighting(refPos) {
|
| 172 |
+
// Don't remove highlighting if this is the selected position
|
| 173 |
+
if (refPos === selectedRefPos) return;
|
| 174 |
+
|
| 175 |
+
document.querySelectorAll(`.word-item[data-ref-pos="${refPos}"]`).forEach(el => {
|
| 176 |
+
el.style.textDecoration = 'none';
|
| 177 |
+
});
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
// Helper function to clear all sticky highlighting
|
| 181 |
+
function clearStickyHighlighting() {
|
| 182 |
+
if (selectedRefPos !== null) {
|
| 183 |
+
document.querySelectorAll(`.word-item[data-ref-pos="${selectedRefPos}"]`).forEach(el => {
|
| 184 |
+
el.style.textDecoration = 'none';
|
| 185 |
+
});
|
| 186 |
+
|
| 187 |
+
selectedRefPos = null;
|
| 188 |
+
}
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
// Use event delegation for all word-alignment-containers
|
| 192 |
+
document.querySelectorAll('.word-alignment-container').forEach(container => {
|
| 193 |
+
// Mouseover (replaces mouseenter on individual elements)
|
| 194 |
+
container.addEventListener('mouseover', function(event) {
|
| 195 |
+
const target = event.target.closest('.word-item');
|
| 196 |
+
if (!target) return;
|
| 197 |
+
|
| 198 |
+
const refPos = target.dataset.refPos;
|
| 199 |
+
if (!refPos) return;
|
| 200 |
+
|
| 201 |
+
highlightPosition(refPos, false);
|
| 202 |
+
});
|
| 203 |
+
|
| 204 |
+
// Mouseout (replaces mouseleave on individual elements)
|
| 205 |
+
container.addEventListener('mouseout', function(event) {
|
| 206 |
+
const target = event.target.closest('.word-item');
|
| 207 |
+
if (!target) return;
|
| 208 |
+
|
| 209 |
+
const refPos = target.dataset.refPos;
|
| 210 |
+
if (!refPos) return;
|
| 211 |
+
|
| 212 |
+
removeHighlighting(refPos);
|
| 213 |
+
});
|
| 214 |
+
|
| 215 |
+
// Click for sticky highlighting
|
| 216 |
+
container.addEventListener('click', function(event) {
|
| 217 |
+
const target = event.target.closest('.word-item');
|
| 218 |
+
if (!target) return;
|
| 219 |
+
|
| 220 |
+
const refPos = target.dataset.refPos;
|
| 221 |
+
if (!refPos) return;
|
| 222 |
+
|
| 223 |
+
// If this position is already selected, clear it
|
| 224 |
+
if (selectedRefPos === refPos) {
|
| 225 |
+
clearStickyHighlighting();
|
| 226 |
+
} else {
|
| 227 |
+
// Clear any existing sticky highlighting
|
| 228 |
+
clearStickyHighlighting();
|
| 229 |
+
|
| 230 |
+
// Set new selected position
|
| 231 |
+
selectedRefPos = refPos;
|
| 232 |
+
|
| 233 |
+
// Apply sticky highlighting
|
| 234 |
+
highlightPosition(refPos, true);
|
| 235 |
+
}
|
| 236 |
+
});
|
| 237 |
+
});
|
| 238 |
+
|
| 239 |
+
// Add a click handler on the document to clear sticky highlighting when clicking elsewhere
|
| 240 |
+
document.addEventListener('click', function(e) {
|
| 241 |
+
// If the click wasn't on a word item or word pair, clear sticky highlighting
|
| 242 |
+
if (!e.target.closest('.word-item') && !e.target.closest('.word-pair') && selectedRefPos !== null) {
|
| 243 |
+
clearStickyHighlighting();
|
| 244 |
+
}
|
| 245 |
+
});
|
| 246 |
+
});
|
| 247 |
+
</script>
|
| 248 |
+
"""
|
| 249 |
+
|
| 250 |
+
# Add CSS for hover effects
|
| 251 |
+
highlighting_css = """
|
| 252 |
+
<style>
|
| 253 |
+
.word-item {
|
| 254 |
+
cursor: pointer;
|
| 255 |
+
transition: all 0.2s;
|
| 256 |
+
}
|
| 257 |
+
</style>
|
| 258 |
+
"""
|
| 259 |
+
|
| 260 |
+
results_html = f"""
|
| 261 |
+
<html dir="rtl" lang="he">
|
| 262 |
+
<head>
|
| 263 |
+
<meta charset="utf-8">
|
| 264 |
+
<title>Evaluation Results - {dataset_description} - {timestamp}</title>
|
| 265 |
+
{highlighting_css}
|
| 266 |
+
</head>
|
| 267 |
+
<body>
|
| 268 |
+
<h3>Evaluation Results - {dataset_description} - {timestamp}</h3>
|
| 269 |
+
{progress_html}{refresh_page_control}
|
| 270 |
+
{audio_element}
|
| 271 |
+
<div id="results-container" style="max-height: 80vh; overflow-y: auto;">
|
| 272 |
+
{''.join(html_blocks)}
|
| 273 |
+
</div>
|
| 274 |
+
{highlighting_js}
|
| 275 |
+
{auto_scroll_to_bottom_on_load}
|
| 276 |
+
</body>
|
| 277 |
+
</html>
|
| 278 |
+
"""
|
| 279 |
+
return results_html
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|