Upload 3 files
Browse files- README.md +211 -6
- app.py +783 -0
- requirements.txt +3 -0
README.md
CHANGED
|
@@ -1,13 +1,218 @@
|
|
| 1 |
---
|
| 2 |
-
title: MOS Evaluation
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
colorTo: indigo
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
-
python_version: '3.13'
|
| 9 |
app_file: app.py
|
| 10 |
pinned: false
|
| 11 |
---
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: TTS MOS Evaluation
|
| 3 |
+
emoji: π§
|
| 4 |
+
colorFrom: blue
|
| 5 |
colorTo: indigo
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: "5.49.1"
|
|
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# π§ Plotweaver AI β TTS MOS Evaluation Platform
|
| 13 |
+
|
| 14 |
+
A multi-user [Gradio](https://gradio.app) application for collecting **Mean Opinion Score (MOS)**
|
| 15 |
+
ratings of synthesised speech across **multiple languages**. Reviewers create accounts,
|
| 16 |
+
listen to audio samples in the languages they are competent in, and rate each sample on
|
| 17 |
+
**7 criteria (1β5)**. An admin uploads audio per language and reads off aggregated MOS results
|
| 18 |
+
per language and per model, with one-click export for your paper.
|
| 19 |
+
|
| 20 |
+
Built for the Plotweaver AI African-language TTS validation workflow (Yoruba, Hausa, Igbo,
|
| 21 |
+
Nigerian English, Akan, Swahili, Nigerian Pidgin, β¦ add more any time).
|
| 22 |
+
|
| 23 |
+
> **Note on the front-matter above:** Hugging Face Spaces reads the YAML block at the top of this
|
| 24 |
+
> file to configure the Space (SDK, app file, etc.). Keep it as the very first thing in the file.
|
| 25 |
+
> Set `sdk_version` to whatever current Gradio 5.x the Space build accepts β if it complains, the
|
| 26 |
+
> build log lists valid versions.
|
| 27 |
+
|
| 28 |
+
---
|
| 29 |
+
|
| 30 |
+
## What it does
|
| 31 |
+
|
| 32 |
+
- **Accounts & roles.** Reviewers self-register; passwords are hashed (PBKDF2-HMAC-SHA256,
|
| 33 |
+
200k iterations, per-user salt). Two roles: `reviewer` and `admin`.
|
| 34 |
+
- **Per-language gating.** A reviewer is assigned a set of languages and only ever sees / rates
|
| 35 |
+
samples in those languages. They can update their language set themselves.
|
| 36 |
+
- **Add languages any time.** New languages appear instantly in the signup form, the reviewer
|
| 37 |
+
profile, and the admin upload/results dropdowns β no code change, no redeploy.
|
| 38 |
+
- **Blind evaluation.** The model / system name is stored with each sample but is **never shown
|
| 39 |
+
to reviewers** β they only see "Sample N", so ratings aren't biased by branding.
|
| 40 |
+
- **7-criterion MOS form** matching `TTS_Evaluation_Criteria.docx`:
|
| 41 |
+
Naturalness Β· Intelligibility Β· Pronunciation Accuracy Β· Prosody & Expressiveness Β· Fluency Β·
|
| 42 |
+
Audio Quality Β· Overall Quality β plus a free-text comments box.
|
| 43 |
+
- **Resumable rating.** Ratings are upserted (one per reviewer per sample); reviewers can revisit
|
| 44 |
+
and change a rating, and a "Next unrated βΆ" button walks them through the queue.
|
| 45 |
+
- **Results dashboard.** Per-model MOS, per-sample MOS, reviewer/sample counts, standard
|
| 46 |
+
deviation, and the separate **reference-anchor MOS** (see below). Export to `.xlsx`.
|
| 47 |
+
|
| 48 |
+
---
|
| 49 |
+
|
| 50 |
+
## Environment variables
|
| 51 |
+
|
| 52 |
+
| Variable | Default | Purpose |
|
| 53 |
+
|----------|---------|---------|
|
| 54 |
+
| `MOS_DATA_DIR` | `./data` | Where the SQLite DB, audio, and exports live. Point at your persistent mount (e.g. `/data` on a Space bucket). |
|
| 55 |
+
| `ADMIN_CODE` | `plotweaver-admin` | Entered on the signup form to create an admin account. **Override this.** |
|
| 56 |
+
| `MOS_JOURNAL_MODE` | `DELETE` | SQLite journal mode. Leave as `DELETE` on a bucket/network mount; set `WAL` only on a real local disk for extra concurrency. |
|
| 57 |
+
| `PORT` | `7860` | Port the app binds to. |
|
| 58 |
+
|
| 59 |
+
---
|
| 60 |
+
|
| 61 |
+
## Quick start (local)
|
| 62 |
+
|
| 63 |
+
```bash
|
| 64 |
+
pip install -r requirements.txt
|
| 65 |
+
python app.py
|
| 66 |
+
```
|
| 67 |
+
|
| 68 |
+
Open http://localhost:7860.
|
| 69 |
+
|
| 70 |
+
**Create the first admin:** go to *Create account*, fill in a username/password, and enter the
|
| 71 |
+
admin code in the *Admin code* field. The default code is printed in the console on startup
|
| 72 |
+
(`plotweaver-admin` unless you override it). Override it with an environment variable:
|
| 73 |
+
|
| 74 |
+
```bash
|
| 75 |
+
export ADMIN_CODE="something-only-you-know"
|
| 76 |
+
export MOS_JOURNAL_MODE=WAL # optional: a real local disk supports WAL
|
| 77 |
+
python app.py
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
Anyone who signs up **without** the code becomes a normal reviewer.
|
| 81 |
+
|
| 82 |
+
---
|
| 83 |
+
|
| 84 |
+
## Deploying on a Hugging Face Space
|
| 85 |
+
|
| 86 |
+
This app writes its database and uploaded audio to disk, so it needs **persistent storage**. Space
|
| 87 |
+
filesystems are otherwise ephemeral and get wiped on rebuilds, restarts, and after the free CPU
|
| 88 |
+
tier sleeps. The current way to persist data on a Space is a **Storage Bucket** (HF's replacement
|
| 89 |
+
for the older fixed `/data` storage tier).
|
| 90 |
+
|
| 91 |
+
1. **Create the Space.** New β Space β SDK **Gradio**, hardware **CPU basic (free)**, visibility
|
| 92 |
+
**Private**. ZeroGPU is *not* needed β there's no inference here, only file serving and SQLite.
|
| 93 |
+
2. **Mount a Storage Bucket.** Space β Settings β **Storage Buckets** β *Mount a bucket*. Create a
|
| 94 |
+
private bucket (e.g. `mos-eval-data`), mode read-write, and set the **mount path** to `/data`.
|
| 95 |
+
Your audio clips are tiny, so this stays within the free private-storage allowance.
|
| 96 |
+
3. **Set variables / secrets.** Space β Settings β *Variables and secrets*:
|
| 97 |
+
- Variable `MOS_DATA_DIR` = `/data`
|
| 98 |
+
- Secret `ADMIN_CODE` = `<your secret>`
|
| 99 |
+
- (Do **not** set `MOS_JOURNAL_MODE` β the bucket-safe `DELETE` default is what you want here.)
|
| 100 |
+
4. **Upload `app.py`, `requirements.txt`, `README.md`** (this file, with its YAML front-matter) via
|
| 101 |
+
the Files tab or `git push`.
|
| 102 |
+
5. **First boot.** Watch the **Logs** tab β the active admin code is printed on startup. Open the
|
| 103 |
+
app, *Create account*, enter the admin code, and you're the admin.
|
| 104 |
+
|
| 105 |
+
Notes:
|
| 106 |
+
- The app already calls `launch(allowed_paths=[MOS_DATA_DIR])`, so Gradio is permitted to serve the
|
| 107 |
+
audio files stored on the bucket. Without this, the audio player can't load clips from `/data`.
|
| 108 |
+
- Buckets are object storage mounted as a filesystem (FUSE). SQLite's **WAL** mode relies on shared
|
| 109 |
+
memory that doesn't work on such mounts, which is why the default journal mode is `DELETE`. At a
|
| 110 |
+
few dozen reviewers this is comfortable; the only risk is a small chance of DB corruption if the
|
| 111 |
+
Space is killed mid-write. For a bulletproof setup, keep the DB on local disk and sync periodic
|
| 112 |
+
backups to the bucket (not built in β easy to add if you need it).
|
| 113 |
+
|
| 114 |
+
### Self-hosted (AlmaLinux / AWS, behind nginx)
|
| 115 |
+
|
| 116 |
+
Run it under `nohup` and reverse-proxy `127.0.0.1:7860`. A real local disk supports WAL:
|
| 117 |
+
|
| 118 |
+
```bash
|
| 119 |
+
export MOS_DATA_DIR=/srv/mos/data ADMIN_CODE=... MOS_JOURNAL_MODE=WAL
|
| 120 |
+
nohup python app.py > mos.log 2>&1 &
|
| 121 |
+
```
|
| 122 |
+
|
| 123 |
+
Point your nginx `location /` block at `http://127.0.0.1:7860;` with the usual
|
| 124 |
+
`proxy_set_header Upgrade/Connection` lines for websockets.
|
| 125 |
+
|
| 126 |
+
---
|
| 127 |
+
|
| 128 |
+
## How an evaluation round works
|
| 129 |
+
|
| 130 |
+
1. **Admin β Languages.** Add the languages in scope (e.g. `yo / Yoruba`, `ha / Hausa`).
|
| 131 |
+
2. **Admin β Upload audio samples.** Pick a language, set the **Model / system name**
|
| 132 |
+
(e.g. `F5-TTS`, `XTTS-v2`, `MMS-TTS`, or `human`), drag in the wav/mp3/flac/ogg files, upload.
|
| 133 |
+
Tick **reference / human anchor** for ground-truth human recordings (see below).
|
| 134 |
+
3. **Reviewers** sign up, choose their languages, and rate. They see a blind "Sample N", an audio
|
| 135 |
+
player, the 7 radios, and a comments box.
|
| 136 |
+
4. **Admin β Results.** Pick a language, click **Compute MOS**, read the per-model and per-sample
|
| 137 |
+
tables, then **Export XLSX** (sheets: *Per Model*, *Per Sample*, *Raw Ratings*).
|
| 138 |
+
|
| 139 |
+
### Final MOS per language
|
| 140 |
+
|
| 141 |
+
The summary line reports two headline numbers per language:
|
| 142 |
+
|
| 143 |
+
- **System MOS (Overall criterion)** β the mean of the dedicated *Overall Quality* ratings. This is
|
| 144 |
+
the conventional single MOS number to quote.
|
| 145 |
+
- **System MOS (mean of all criteria)** β the mean across all 7 criteria, useful when you want a
|
| 146 |
+
composite that weights pronunciation/prosody equally with overall impression.
|
| 147 |
+
|
| 148 |
+
Both are broken down per model so you can compare systems within a language directly
|
| 149 |
+
(e.g. F5-TTS vs XTTS-v2 for Yoruba).
|
| 150 |
+
|
| 151 |
+
---
|
| 152 |
+
|
| 153 |
+
## Data model (SQLite)
|
| 154 |
+
|
| 155 |
+
```
|
| 156 |
+
users(id, username, email, password_hash, salt, role, is_active, created_at)
|
| 157 |
+
languages(id, code, name, created_at)
|
| 158 |
+
user_languages(user_id, language_id) -- which languages a reviewer may rate
|
| 159 |
+
samples(id, language_id, sample_name, model_name, file_path, is_reference, transcript, created_at)
|
| 160 |
+
ratings(id, user_id, sample_id, naturalness, intelligibility, pronunciation,
|
| 161 |
+
prosody, fluency, audio_quality, overall, comments, updated_at,
|
| 162 |
+
UNIQUE(user_id, sample_id))
|
| 163 |
+
```
|
| 164 |
+
|
| 165 |
+
Everything lives under `MOS_DATA_DIR`: `mos.db`, `audio/<language_id>/<file>`, and `exports/`.
|
| 166 |
+
|
| 167 |
+
---
|
| 168 |
+
|
| 169 |
+
## Suggested additions (beyond what you asked for, already built in where noted)
|
| 170 |
+
|
| 171 |
+
These come from standard MOS / listening-test practice and matter for a defensible result:
|
| 172 |
+
|
| 173 |
+
1. **Reference / human anchors (built in).** Upload a few real human recordings flagged as
|
| 174 |
+
*reference*. They are mixed blindly into the reviewer's queue but excluded from the system MOS,
|
| 175 |
+
and their mean is reported separately. If your anchors don't score ~4.5β5.0, that reviewer (or
|
| 176 |
+
the whole batch) is mis-calibrated and you can discount them. This is the single most important
|
| 177 |
+
safeguard for credible MOS.
|
| 178 |
+
2. **Blind model names (built in).** Already enforced β reviewers never see which system produced
|
| 179 |
+
a clip.
|
| 180 |
+
3. **Inter-rater spread (built in).** The per-model/per-sample tables include the standard
|
| 181 |
+
deviation of the Overall score, so you can spot samples reviewers disagree on.
|
| 182 |
+
|
| 183 |
+
Worth considering for a v2 (not yet built β happy to add):
|
| 184 |
+
|
| 185 |
+
4. **Minimum ratings per sample** before a sample counts as "final" (e.g. require β₯5 reviewers),
|
| 186 |
+
and surface samples that are under-rated.
|
| 187 |
+
5. **Randomised, balanced presentation order** per reviewer (Latin-square style) so position bias
|
| 188 |
+
averages out, instead of always-ascending sample id.
|
| 189 |
+
6. **Listening-setup gate** β a one-time confirmation that the reviewer is on headphones in a quiet
|
| 190 |
+
room, stored with their profile.
|
| 191 |
+
7. **Native-speaker / experience metadata** on reviewers, so you can filter MOS to native speakers
|
| 192 |
+
only for the paper.
|
| 193 |
+
8. **Attention-trap clips** (e.g. "rate this one 1") to catch click-through reviewers.
|
| 194 |
+
9. **Krippendorff's Ξ± / ICC** for formal inter-rater reliability, reported per language.
|
| 195 |
+
10. **Pairwise / MUSHRA / CMOS modes** if you later want preference tests rather than absolute MOS.
|
| 196 |
+
11. **Per-reviewer rate limiting / session timing** to flag implausibly fast ratings.
|
| 197 |
+
12. **CI / standard error** on each MOS (the raw export already lets you compute this; could be
|
| 198 |
+
shown inline).
|
| 199 |
+
13. **DB backup-to-bucket** (recommended if you go to production scale on a Space) β periodic
|
| 200 |
+
snapshot of `mos.db` to a separate bucket path, with restore-on-startup, to remove the
|
| 201 |
+
mid-write corruption risk noted above.
|
| 202 |
+
|
| 203 |
+
---
|
| 204 |
+
|
| 205 |
+
## Notes & limitations
|
| 206 |
+
|
| 207 |
+
- **SQLite journal mode.** Default is `DELETE` (safe on bucket/FUSE mounts). Set
|
| 208 |
+
`MOS_JOURNAL_MODE=WAL` only on a genuine local disk for better concurrency. Comfortable for a few
|
| 209 |
+
dozen concurrent reviewers; for larger crowdsourcing, move to Postgres β the data layer is
|
| 210 |
+
isolated in a handful of functions and easy to swap.
|
| 211 |
+
- **Audio serving.** Files are served from disk via Gradio's file mechanism, enabled by
|
| 212 |
+
`launch(allowed_paths=[MOS_DATA_DIR])`. Keep clips short (a few seconds) as is normal for MOS.
|
| 213 |
+
- Deactivating a user (`Admin β Users β active = no`) blocks login but keeps their ratings.
|
| 214 |
+
- Deleting a sample also deletes its ratings (cascade) and the audio file on disk.
|
| 215 |
+
|
| 216 |
+
---
|
| 217 |
+
|
| 218 |
+
*Generated for Afolabi Abeeb, Plotweaver AI.*
|
app.py
ADDED
|
@@ -0,0 +1,783 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Plotweaver AI β TTS MOS Evaluation Platform
|
| 3 |
+
============================================
|
| 4 |
+
|
| 5 |
+
A multi-user Gradio application for collecting Mean Opinion Score (MOS) ratings
|
| 6 |
+
of synthesised speech across multiple languages.
|
| 7 |
+
|
| 8 |
+
Roles
|
| 9 |
+
-----
|
| 10 |
+
- Reviewer: signs up, selects the language(s) they are competent in, listens to
|
| 11 |
+
audio samples and rates each on 7 criteria (1-5) plus free-text comments.
|
| 12 |
+
- Admin: uploads audio per language, manages languages/users, and views the
|
| 13 |
+
aggregated MOS results per language and per model, with CSV/XLSX export.
|
| 14 |
+
|
| 15 |
+
Persistence
|
| 16 |
+
-----------
|
| 17 |
+
Everything (users, languages, samples, ratings) lives in a single SQLite
|
| 18 |
+
database. Audio files are stored on disk under the data directory. Point
|
| 19 |
+
MOS_DATA_DIR at a persistent location (on Hugging Face Spaces enable persistent
|
| 20 |
+
storage and set MOS_DATA_DIR=/data) so data survives restarts.
|
| 21 |
+
|
| 22 |
+
Bootstrapping the first admin
|
| 23 |
+
-----------------------------
|
| 24 |
+
On the signup form there is an optional "Admin code" field. If it matches the
|
| 25 |
+
ADMIN_CODE environment variable, the new account is created as an admin.
|
| 26 |
+
The effective admin code is printed to the logs on startup.
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
import os
|
| 30 |
+
import re
|
| 31 |
+
import sqlite3
|
| 32 |
+
import hashlib
|
| 33 |
+
import secrets
|
| 34 |
+
import datetime as dt
|
| 35 |
+
import shutil
|
| 36 |
+
|
| 37 |
+
import pandas as pd
|
| 38 |
+
import gradio as gr
|
| 39 |
+
|
| 40 |
+
# --------------------------------------------------------------------------- #
|
| 41 |
+
# Configuration
|
| 42 |
+
# --------------------------------------------------------------------------- #
|
| 43 |
+
DATA_DIR = os.environ.get("MOS_DATA_DIR", os.path.join(os.path.dirname(os.path.abspath(__file__)), "data"))
|
| 44 |
+
AUDIO_DIR = os.path.join(DATA_DIR, "audio")
|
| 45 |
+
EXPORT_DIR = os.path.join(DATA_DIR, "exports")
|
| 46 |
+
DB_PATH = os.path.join(DATA_DIR, "mos.db")
|
| 47 |
+
ADMIN_CODE = os.environ.get("ADMIN_CODE", "plotweaver-admin")
|
| 48 |
+
|
| 49 |
+
for d in (DATA_DIR, AUDIO_DIR, EXPORT_DIR):
|
| 50 |
+
os.makedirs(d, exist_ok=True)
|
| 51 |
+
|
| 52 |
+
# The 7 MOS criteria, in the order they appear on the evaluation form.
|
| 53 |
+
# (db_column, display_label, short_definition)
|
| 54 |
+
CRITERIA = [
|
| 55 |
+
("naturalness", "Naturalness", "How human-like and natural the speech sounds."),
|
| 56 |
+
("intelligibility", "Intelligibility", "How easy it is to understand the spoken content."),
|
| 57 |
+
("pronunciation", "Pronunciation Accuracy", "Whether words, phonemes, tones and language-specific sounds are correct."),
|
| 58 |
+
("prosody", "Prosody & Expressiveness", "Rhythm, stress, pitch, intonation and speaking style."),
|
| 59 |
+
("fluency", "Fluency", "Smoothness without awkward pauses, repetitions or glitches."),
|
| 60 |
+
("audio_quality", "Audio Quality", "Technical quality: noise, distortion, clipping, artifacts."),
|
| 61 |
+
("overall", "Overall Quality", "Overall impression considering all aspects of synthesis quality."),
|
| 62 |
+
]
|
| 63 |
+
CRITERIA_KEYS = [c[0] for c in CRITERIA]
|
| 64 |
+
|
| 65 |
+
SCALE_HINT = "1 = Very Poor Β· 2 = Poor Β· 3 = Fair Β· 4 = Good Β· 5 = Excellent"
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
# --------------------------------------------------------------------------- #
|
| 69 |
+
# Database helpers
|
| 70 |
+
# --------------------------------------------------------------------------- #
|
| 71 |
+
def get_conn():
|
| 72 |
+
conn = sqlite3.connect(DB_PATH, timeout=30)
|
| 73 |
+
conn.row_factory = sqlite3.Row
|
| 74 |
+
conn.execute("PRAGMA foreign_keys = ON;")
|
| 75 |
+
return conn
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def init_db():
|
| 79 |
+
with get_conn() as conn:
|
| 80 |
+
conn.execute("PRAGMA journal_mode = WAL;")
|
| 81 |
+
conn.execute("""
|
| 82 |
+
CREATE TABLE IF NOT EXISTS users (
|
| 83 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 84 |
+
username TEXT UNIQUE NOT NULL,
|
| 85 |
+
email TEXT,
|
| 86 |
+
password_hash TEXT NOT NULL,
|
| 87 |
+
salt TEXT NOT NULL,
|
| 88 |
+
role TEXT NOT NULL DEFAULT 'reviewer',
|
| 89 |
+
native_langs TEXT DEFAULT '',
|
| 90 |
+
is_active INTEGER NOT NULL DEFAULT 1,
|
| 91 |
+
created_at TEXT NOT NULL
|
| 92 |
+
);
|
| 93 |
+
""")
|
| 94 |
+
conn.execute("""
|
| 95 |
+
CREATE TABLE IF NOT EXISTS languages (
|
| 96 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 97 |
+
code TEXT UNIQUE NOT NULL,
|
| 98 |
+
name TEXT NOT NULL,
|
| 99 |
+
created_at TEXT NOT NULL
|
| 100 |
+
);
|
| 101 |
+
""")
|
| 102 |
+
# user <-> language assignment (which languages a reviewer is eligible for)
|
| 103 |
+
conn.execute("""
|
| 104 |
+
CREATE TABLE IF NOT EXISTS user_languages (
|
| 105 |
+
user_id INTEGER NOT NULL,
|
| 106 |
+
language_id INTEGER NOT NULL,
|
| 107 |
+
PRIMARY KEY (user_id, language_id),
|
| 108 |
+
FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE,
|
| 109 |
+
FOREIGN KEY (language_id) REFERENCES languages(id) ON DELETE CASCADE
|
| 110 |
+
);
|
| 111 |
+
""")
|
| 112 |
+
conn.execute("""
|
| 113 |
+
CREATE TABLE IF NOT EXISTS samples (
|
| 114 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 115 |
+
language_id INTEGER NOT NULL,
|
| 116 |
+
sample_name TEXT NOT NULL,
|
| 117 |
+
model_name TEXT NOT NULL DEFAULT 'unspecified',
|
| 118 |
+
file_path TEXT NOT NULL,
|
| 119 |
+
is_reference INTEGER NOT NULL DEFAULT 0,
|
| 120 |
+
transcript TEXT DEFAULT '',
|
| 121 |
+
created_at TEXT NOT NULL,
|
| 122 |
+
FOREIGN KEY (language_id) REFERENCES languages(id) ON DELETE CASCADE
|
| 123 |
+
);
|
| 124 |
+
""")
|
| 125 |
+
cols = ",\n".join(f"{k} INTEGER" for k in CRITERIA_KEYS)
|
| 126 |
+
conn.execute(f"""
|
| 127 |
+
CREATE TABLE IF NOT EXISTS ratings (
|
| 128 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 129 |
+
user_id INTEGER NOT NULL,
|
| 130 |
+
sample_id INTEGER NOT NULL,
|
| 131 |
+
{cols},
|
| 132 |
+
comments TEXT DEFAULT '',
|
| 133 |
+
updated_at TEXT NOT NULL,
|
| 134 |
+
UNIQUE (user_id, sample_id),
|
| 135 |
+
FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE,
|
| 136 |
+
FOREIGN KEY (sample_id) REFERENCES samples(id) ON DELETE CASCADE
|
| 137 |
+
);
|
| 138 |
+
""")
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def now_iso():
|
| 142 |
+
return dt.datetime.utcnow().isoformat(timespec="seconds")
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
# --------------------------------------------------------------------------- #
|
| 146 |
+
# Auth
|
| 147 |
+
# --------------------------------------------------------------------------- #
|
| 148 |
+
def hash_password(password, salt=None):
|
| 149 |
+
if salt is None:
|
| 150 |
+
salt = secrets.token_hex(16)
|
| 151 |
+
h = hashlib.pbkdf2_hmac("sha256", password.encode("utf-8"), salt.encode("utf-8"), 200_000)
|
| 152 |
+
return h.hex(), salt
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def create_user(username, email, password, role="reviewer", language_ids=None):
|
| 156 |
+
username = (username or "").strip()
|
| 157 |
+
if not re.fullmatch(r"[A-Za-z0-9_.\-]{3,32}", username):
|
| 158 |
+
raise ValueError("Username must be 3-32 chars: letters, numbers, _ . - only.")
|
| 159 |
+
if not password or len(password) < 6:
|
| 160 |
+
raise ValueError("Password must be at least 6 characters.")
|
| 161 |
+
pw_hash, salt = hash_password(password)
|
| 162 |
+
with get_conn() as conn:
|
| 163 |
+
try:
|
| 164 |
+
cur = conn.execute(
|
| 165 |
+
"INSERT INTO users (username, email, password_hash, salt, role, created_at) "
|
| 166 |
+
"VALUES (?,?,?,?,?,?)",
|
| 167 |
+
(username, (email or "").strip(), pw_hash, salt, role, now_iso()),
|
| 168 |
+
)
|
| 169 |
+
except sqlite3.IntegrityError:
|
| 170 |
+
raise ValueError(f"Username '{username}' is already taken.")
|
| 171 |
+
uid = cur.lastrowid
|
| 172 |
+
for lid in (language_ids or []):
|
| 173 |
+
conn.execute("INSERT OR IGNORE INTO user_languages (user_id, language_id) VALUES (?,?)", (uid, lid))
|
| 174 |
+
return uid
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def authenticate(username, password):
|
| 178 |
+
with get_conn() as conn:
|
| 179 |
+
row = conn.execute("SELECT * FROM users WHERE username = ?", ((username or "").strip(),)).fetchone()
|
| 180 |
+
if not row or not row["is_active"]:
|
| 181 |
+
return None
|
| 182 |
+
pw_hash, _ = hash_password(password, row["salt"])
|
| 183 |
+
if secrets.compare_digest(pw_hash, row["password_hash"]):
|
| 184 |
+
return dict(row)
|
| 185 |
+
return None
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
def user_session(uid):
|
| 189 |
+
"""Return a lightweight session dict used in gr.State."""
|
| 190 |
+
with get_conn() as conn:
|
| 191 |
+
row = conn.execute("SELECT * FROM users WHERE id = ?", (uid,)).fetchone()
|
| 192 |
+
langs = conn.execute(
|
| 193 |
+
"SELECT l.id, l.code, l.name FROM user_languages ul "
|
| 194 |
+
"JOIN languages l ON l.id = ul.language_id WHERE ul.user_id = ? ORDER BY l.name", (uid,)
|
| 195 |
+
).fetchall()
|
| 196 |
+
return {
|
| 197 |
+
"id": row["id"],
|
| 198 |
+
"username": row["username"],
|
| 199 |
+
"role": row["role"],
|
| 200 |
+
"languages": [{"id": l["id"], "code": l["code"], "name": l["name"]} for l in langs],
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
# --------------------------------------------------------------------------- #
|
| 205 |
+
# Languages
|
| 206 |
+
# --------------------------------------------------------------------------- #
|
| 207 |
+
def add_language(code, name):
|
| 208 |
+
code = (code or "").strip().lower()
|
| 209 |
+
name = (name or "").strip()
|
| 210 |
+
if not code or not name:
|
| 211 |
+
raise ValueError("Both language code and name are required.")
|
| 212 |
+
with get_conn() as conn:
|
| 213 |
+
try:
|
| 214 |
+
conn.execute("INSERT INTO languages (code, name, created_at) VALUES (?,?,?)", (code, name, now_iso()))
|
| 215 |
+
except sqlite3.IntegrityError:
|
| 216 |
+
raise ValueError(f"Language code '{code}' already exists.")
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
def list_languages():
|
| 220 |
+
with get_conn() as conn:
|
| 221 |
+
return [dict(r) for r in conn.execute("SELECT * FROM languages ORDER BY name").fetchall()]
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
def set_user_languages(uid, language_ids):
|
| 225 |
+
with get_conn() as conn:
|
| 226 |
+
conn.execute("DELETE FROM user_languages WHERE user_id = ?", (uid,))
|
| 227 |
+
for lid in language_ids:
|
| 228 |
+
conn.execute("INSERT OR IGNORE INTO user_languages (user_id, language_id) VALUES (?,?)", (uid, lid))
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
# --------------------------------------------------------------------------- #
|
| 232 |
+
# Samples
|
| 233 |
+
# --------------------------------------------------------------------------- #
|
| 234 |
+
def add_sample(language_id, src_path, sample_name=None, model_name="unspecified",
|
| 235 |
+
is_reference=False, transcript=""):
|
| 236 |
+
if not src_path or not os.path.exists(src_path):
|
| 237 |
+
raise ValueError("Audio file not found.")
|
| 238 |
+
ext = os.path.splitext(src_path)[1].lower() or ".wav"
|
| 239 |
+
lang_dir = os.path.join(AUDIO_DIR, str(language_id))
|
| 240 |
+
os.makedirs(lang_dir, exist_ok=True)
|
| 241 |
+
fname = f"{secrets.token_hex(8)}{ext}"
|
| 242 |
+
dst = os.path.join(lang_dir, fname)
|
| 243 |
+
shutil.copyfile(src_path, dst)
|
| 244 |
+
sample_name = (sample_name or "").strip() or os.path.splitext(os.path.basename(src_path))[0]
|
| 245 |
+
with get_conn() as conn:
|
| 246 |
+
conn.execute(
|
| 247 |
+
"INSERT INTO samples (language_id, sample_name, model_name, file_path, is_reference, transcript, created_at) "
|
| 248 |
+
"VALUES (?,?,?,?,?,?,?)",
|
| 249 |
+
(language_id, sample_name, (model_name or "unspecified").strip(), dst,
|
| 250 |
+
1 if is_reference else 0, (transcript or "").strip(), now_iso()),
|
| 251 |
+
)
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
def list_samples(language_id=None):
|
| 255 |
+
q = ("SELECT s.*, l.name AS language_name, l.code AS language_code "
|
| 256 |
+
"FROM samples s JOIN languages l ON l.id = s.language_id")
|
| 257 |
+
args = ()
|
| 258 |
+
if language_id:
|
| 259 |
+
q += " WHERE s.language_id = ?"
|
| 260 |
+
args = (language_id,)
|
| 261 |
+
q += " ORDER BY s.language_id, s.id"
|
| 262 |
+
with get_conn() as conn:
|
| 263 |
+
return [dict(r) for r in conn.execute(q, args).fetchall()]
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
def delete_sample(sample_id):
|
| 267 |
+
with get_conn() as conn:
|
| 268 |
+
row = conn.execute("SELECT file_path FROM samples WHERE id = ?", (sample_id,)).fetchone()
|
| 269 |
+
if row and row["file_path"] and os.path.exists(row["file_path"]):
|
| 270 |
+
try:
|
| 271 |
+
os.remove(row["file_path"])
|
| 272 |
+
except OSError:
|
| 273 |
+
pass
|
| 274 |
+
conn.execute("DELETE FROM samples WHERE id = ?", (sample_id,))
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
# --------------------------------------------------------------------------- #
|
| 278 |
+
# Ratings
|
| 279 |
+
# --------------------------------------------------------------------------- #
|
| 280 |
+
def upsert_rating(user_id, sample_id, scores, comments=""):
|
| 281 |
+
cols = ", ".join(CRITERIA_KEYS)
|
| 282 |
+
placeholders = ", ".join("?" for _ in CRITERIA_KEYS)
|
| 283 |
+
updates = ", ".join(f"{k}=excluded.{k}" for k in CRITERIA_KEYS)
|
| 284 |
+
vals = [int(scores[k]) for k in CRITERIA_KEYS]
|
| 285 |
+
with get_conn() as conn:
|
| 286 |
+
conn.execute(
|
| 287 |
+
f"INSERT INTO ratings (user_id, sample_id, {cols}, comments, updated_at) "
|
| 288 |
+
f"VALUES (?,?,{placeholders},?,?) "
|
| 289 |
+
f"ON CONFLICT(user_id, sample_id) DO UPDATE SET {updates}, "
|
| 290 |
+
f"comments=excluded.comments, updated_at=excluded.updated_at",
|
| 291 |
+
[user_id, sample_id, *vals, (comments or "").strip(), now_iso()],
|
| 292 |
+
)
|
| 293 |
+
|
| 294 |
+
|
| 295 |
+
def get_rating(user_id, sample_id):
|
| 296 |
+
with get_conn() as conn:
|
| 297 |
+
row = conn.execute("SELECT * FROM ratings WHERE user_id=? AND sample_id=?",
|
| 298 |
+
(user_id, sample_id)).fetchone()
|
| 299 |
+
return dict(row) if row else None
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
def samples_for_reviewer(user_id, language_id):
|
| 303 |
+
"""Samples in a language with a 'rated' flag for this reviewer."""
|
| 304 |
+
with get_conn() as conn:
|
| 305 |
+
rows = conn.execute(
|
| 306 |
+
"SELECT s.id, s.sample_name, s.is_reference, "
|
| 307 |
+
" CASE WHEN r.id IS NULL THEN 0 ELSE 1 END AS rated "
|
| 308 |
+
"FROM samples s "
|
| 309 |
+
"LEFT JOIN ratings r ON r.sample_id = s.id AND r.user_id = ? "
|
| 310 |
+
"WHERE s.language_id = ? ORDER BY s.id",
|
| 311 |
+
(user_id, language_id),
|
| 312 |
+
).fetchall()
|
| 313 |
+
return [dict(r) for r in rows]
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
# --------------------------------------------------------------------------- #
|
| 317 |
+
# Aggregation
|
| 318 |
+
# --------------------------------------------------------------------------- #
|
| 319 |
+
def ratings_dataframe(language_id=None, include_reference=False):
|
| 320 |
+
q = (
|
| 321 |
+
"SELECT r.*, s.language_id, s.sample_name, s.model_name, s.is_reference, "
|
| 322 |
+
" l.name AS language_name, l.code AS language_code "
|
| 323 |
+
"FROM ratings r "
|
| 324 |
+
"JOIN samples s ON s.id = r.sample_id "
|
| 325 |
+
"JOIN languages l ON l.id = s.language_id"
|
| 326 |
+
)
|
| 327 |
+
args = ()
|
| 328 |
+
if language_id:
|
| 329 |
+
q += " WHERE s.language_id = ?"
|
| 330 |
+
args = (language_id,)
|
| 331 |
+
with get_conn() as conn:
|
| 332 |
+
df = pd.read_sql_query(q, conn, params=args)
|
| 333 |
+
if not include_reference and not df.empty:
|
| 334 |
+
df = df[df["is_reference"] == 0]
|
| 335 |
+
return df
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
def compute_results(language_id, by_model=True):
|
| 339 |
+
"""Return (per_model_df, per_sample_df, summary_text) for a language."""
|
| 340 |
+
df = ratings_dataframe(language_id=language_id, include_reference=True)
|
| 341 |
+
if df.empty:
|
| 342 |
+
return pd.DataFrame(), pd.DataFrame(), "No ratings collected yet for this language."
|
| 343 |
+
|
| 344 |
+
# Split system vs reference (human anchor) samples.
|
| 345 |
+
sys_df = df[df["is_reference"] == 0]
|
| 346 |
+
ref_df = df[df["is_reference"] == 1]
|
| 347 |
+
|
| 348 |
+
def agg_block(frame, group_cols):
|
| 349 |
+
if frame.empty:
|
| 350 |
+
return pd.DataFrame()
|
| 351 |
+
g = frame.groupby(group_cols, dropna=False)
|
| 352 |
+
out = g[CRITERIA_KEYS].mean().round(3)
|
| 353 |
+
out["MOS (mean of criteria)"] = out[CRITERIA_KEYS].mean(axis=1).round(3)
|
| 354 |
+
out["overall_std"] = g["overall"].std().round(3)
|
| 355 |
+
out["n_ratings"] = g.size()
|
| 356 |
+
out["n_reviewers"] = g["user_id"].nunique()
|
| 357 |
+
out["n_samples"] = g["sample_id"].nunique()
|
| 358 |
+
return out.reset_index()
|
| 359 |
+
|
| 360 |
+
per_model = agg_block(sys_df, ["model_name"])
|
| 361 |
+
per_sample = agg_block(sys_df, ["model_name", "sample_id", "sample_name"])
|
| 362 |
+
|
| 363 |
+
# Friendly column labels.
|
| 364 |
+
label_map = {k: lbl for k, lbl, _ in CRITERIA}
|
| 365 |
+
per_model = per_model.rename(columns=label_map)
|
| 366 |
+
per_sample = per_sample.rename(columns=label_map)
|
| 367 |
+
|
| 368 |
+
lang_name = df["language_name"].iloc[0]
|
| 369 |
+
lines = [f"Language: {lang_name}",
|
| 370 |
+
f"Total ratings: {len(sys_df)} | Reviewers: {sys_df['user_id'].nunique()} | "
|
| 371 |
+
f"Samples: {sys_df['sample_id'].nunique()} | Models: {sys_df['model_name'].nunique()}"]
|
| 372 |
+
if not sys_df.empty:
|
| 373 |
+
overall_mos = sys_df["overall"].mean()
|
| 374 |
+
mean_mos = sys_df[CRITERIA_KEYS].mean(axis=1).mean()
|
| 375 |
+
lines.append(f"System MOS (Overall criterion): {overall_mos:.3f} | "
|
| 376 |
+
f"System MOS (mean of all criteria): {mean_mos:.3f}")
|
| 377 |
+
if not ref_df.empty:
|
| 378 |
+
lines.append(f"Reference/anchor MOS (Overall): {ref_df['overall'].mean():.3f} "
|
| 379 |
+
f"({ref_df['sample_id'].nunique()} anchor samples) β sanity check on reviewer calibration.")
|
| 380 |
+
return per_model, per_sample, "\n".join(lines)
|
| 381 |
+
|
| 382 |
+
|
| 383 |
+
def export_results(language_id):
|
| 384 |
+
per_model, per_sample, _ = compute_results(language_id)
|
| 385 |
+
raw = ratings_dataframe(language_id=language_id, include_reference=True)
|
| 386 |
+
langs = {l["id"]: l for l in list_languages()}
|
| 387 |
+
code = langs.get(language_id, {}).get("code", str(language_id))
|
| 388 |
+
path = os.path.join(EXPORT_DIR, f"mos_results_{code}_{dt.datetime.utcnow().strftime('%Y%m%d_%H%M%S')}.xlsx")
|
| 389 |
+
with pd.ExcelWriter(path, engine="openpyxl") as xw:
|
| 390 |
+
(per_model if not per_model.empty else pd.DataFrame({"info": ["no data"]})).to_excel(xw, sheet_name="Per Model", index=False)
|
| 391 |
+
(per_sample if not per_sample.empty else pd.DataFrame({"info": ["no data"]})).to_excel(xw, sheet_name="Per Sample", index=False)
|
| 392 |
+
(raw if not raw.empty else pd.DataFrame({"info": ["no data"]})).to_excel(xw, sheet_name="Raw Ratings", index=False)
|
| 393 |
+
return path
|
| 394 |
+
|
| 395 |
+
|
| 396 |
+
# --------------------------------------------------------------------------- #
|
| 397 |
+
# Startup
|
| 398 |
+
# --------------------------------------------------------------------------- #
|
| 399 |
+
init_db()
|
| 400 |
+
print("=" * 64)
|
| 401 |
+
print("Plotweaver AI β TTS MOS Evaluation Platform")
|
| 402 |
+
print(f"Data directory : {DATA_DIR}")
|
| 403 |
+
print(f"Database : {DB_PATH}")
|
| 404 |
+
print(f"Admin code : {ADMIN_CODE} (use on signup to create an admin)")
|
| 405 |
+
print("=" * 64)
|
| 406 |
+
|
| 407 |
+
|
| 408 |
+
# ===========================================================================
|
| 409 |
+
# GRADIO UI
|
| 410 |
+
# ===========================================================================
|
| 411 |
+
def lang_choices():
|
| 412 |
+
return [(f"{l['name']} ({l['code']})", l["id"]) for l in list_languages()]
|
| 413 |
+
|
| 414 |
+
|
| 415 |
+
def reviewer_lang_choices(session):
|
| 416 |
+
if not session:
|
| 417 |
+
return []
|
| 418 |
+
return [(l["name"], l["id"]) for l in session["languages"]]
|
| 419 |
+
|
| 420 |
+
|
| 421 |
+
with gr.Blocks(title="Plotweaver AI β TTS MOS Evaluation", theme=gr.themes.Soft()) as demo:
|
| 422 |
+
session = gr.State(None) # logged-in user session dict
|
| 423 |
+
current_sample = gr.State(None) # sample id currently being rated
|
| 424 |
+
|
| 425 |
+
gr.Markdown("# π§ Plotweaver AI β TTS MOS Evaluation Platform")
|
| 426 |
+
|
| 427 |
+
# ----------------------------- AUTH ------------------------------------ #
|
| 428 |
+
with gr.Column(visible=True) as auth_col:
|
| 429 |
+
gr.Markdown("Sign in or create a reviewer account to begin.")
|
| 430 |
+
with gr.Tabs():
|
| 431 |
+
with gr.Tab("Sign in"):
|
| 432 |
+
li_user = gr.Textbox(label="Username")
|
| 433 |
+
li_pw = gr.Textbox(label="Password", type="password")
|
| 434 |
+
li_btn = gr.Button("Sign in", variant="primary")
|
| 435 |
+
li_msg = gr.Markdown()
|
| 436 |
+
with gr.Tab("Create account"):
|
| 437 |
+
su_user = gr.Textbox(label="Username", info="3-32 chars: letters, numbers, _ . -")
|
| 438 |
+
su_email = gr.Textbox(label="Email (optional)")
|
| 439 |
+
su_pw = gr.Textbox(label="Password", type="password", info="At least 6 characters")
|
| 440 |
+
su_langs = gr.CheckboxGroup(label="Languages you can evaluate", choices=lang_choices())
|
| 441 |
+
su_code = gr.Textbox(label="Admin code (optional)", type="password",
|
| 442 |
+
info="Leave blank for a normal reviewer account.")
|
| 443 |
+
su_btn = gr.Button("Create account", variant="primary")
|
| 444 |
+
su_msg = gr.Markdown()
|
| 445 |
+
|
| 446 |
+
# ----------------------------- APP ------------------------------------- #
|
| 447 |
+
with gr.Column(visible=False) as app_col:
|
| 448 |
+
with gr.Row():
|
| 449 |
+
greeting = gr.Markdown()
|
| 450 |
+
logout_btn = gr.Button("Log out", scale=0)
|
| 451 |
+
|
| 452 |
+
with gr.Tabs() as app_tabs:
|
| 453 |
+
# ---------- Rate tab ----------
|
| 454 |
+
with gr.Tab("Rate samples"):
|
| 455 |
+
gr.Markdown(
|
| 456 |
+
"Please listen to **the whole clip** with headphones in a quiet room before rating. "
|
| 457 |
+
f"Scale: {SCALE_HINT}"
|
| 458 |
+
)
|
| 459 |
+
with gr.Row():
|
| 460 |
+
rate_lang = gr.Dropdown(label="Language", choices=[], interactive=True)
|
| 461 |
+
rate_sample = gr.Dropdown(label="Sample (β = already rated)", choices=[], interactive=True)
|
| 462 |
+
next_btn = gr.Button("Next unrated βΆ", scale=0)
|
| 463 |
+
rate_audio = gr.Audio(label="Audio sample", type="filepath", interactive=False)
|
| 464 |
+
|
| 465 |
+
criterion_inputs = {}
|
| 466 |
+
with gr.Row():
|
| 467 |
+
for key, label, definition in CRITERIA[:4]:
|
| 468 |
+
criterion_inputs[key] = gr.Radio(
|
| 469 |
+
choices=[1, 2, 3, 4, 5], label=label, info=definition, value=None
|
| 470 |
+
)
|
| 471 |
+
with gr.Row():
|
| 472 |
+
for key, label, definition in CRITERIA[4:]:
|
| 473 |
+
criterion_inputs[key] = gr.Radio(
|
| 474 |
+
choices=[1, 2, 3, 4, 5], label=label, info=definition, value=None
|
| 475 |
+
)
|
| 476 |
+
rate_comments = gr.Textbox(label="Comments (pronunciation errors, artifacts, etc.)", lines=2)
|
| 477 |
+
with gr.Row():
|
| 478 |
+
submit_btn = gr.Button("Submit / update rating", variant="primary")
|
| 479 |
+
rate_msg = gr.Markdown()
|
| 480 |
+
|
| 481 |
+
# ---------- Progress tab ----------
|
| 482 |
+
with gr.Tab("My progress"):
|
| 483 |
+
refresh_prog_btn = gr.Button("Refresh")
|
| 484 |
+
progress_md = gr.Markdown()
|
| 485 |
+
progress_tbl = gr.Dataframe(headers=["Language", "Rated", "Total", "Remaining"], interactive=False)
|
| 486 |
+
|
| 487 |
+
# ---------- Profile tab ----------
|
| 488 |
+
with gr.Tab("My languages"):
|
| 489 |
+
gr.Markdown("Update the set of languages you are eligible to evaluate.")
|
| 490 |
+
prof_langs = gr.CheckboxGroup(label="Languages", choices=lang_choices())
|
| 491 |
+
prof_save = gr.Button("Save", variant="primary")
|
| 492 |
+
prof_msg = gr.Markdown()
|
| 493 |
+
|
| 494 |
+
# ---------- Admin tab ----------
|
| 495 |
+
with gr.Tab("Admin", visible=False) as admin_tab:
|
| 496 |
+
gr.Markdown("### Languages")
|
| 497 |
+
with gr.Row():
|
| 498 |
+
al_code = gr.Textbox(label="Code", info="e.g. yo, ha, ig, pcm, en-NG")
|
| 499 |
+
al_name = gr.Textbox(label="Name", info="e.g. Yoruba")
|
| 500 |
+
al_btn = gr.Button("Add language", scale=0)
|
| 501 |
+
al_msg = gr.Markdown()
|
| 502 |
+
langs_tbl = gr.Dataframe(headers=["id", "code", "name"], interactive=False, label="Existing languages")
|
| 503 |
+
|
| 504 |
+
gr.Markdown("### Upload audio samples")
|
| 505 |
+
with gr.Row():
|
| 506 |
+
up_lang = gr.Dropdown(label="Language", choices=lang_choices())
|
| 507 |
+
up_model = gr.Textbox(label="Model / system name", value="",
|
| 508 |
+
info="e.g. F5-TTS, XTTS-v2, MMS-TTS, human. Hidden from reviewers.")
|
| 509 |
+
up_files = gr.File(label="Audio files (wav/mp3/flac/ogg)", file_count="multiple",
|
| 510 |
+
file_types=["audio"])
|
| 511 |
+
with gr.Row():
|
| 512 |
+
up_isref = gr.Checkbox(label="These are reference / human anchor samples", value=False)
|
| 513 |
+
up_transcript = gr.Textbox(label="Transcript (optional, applies to all uploaded)", scale=2)
|
| 514 |
+
up_btn = gr.Button("Upload", variant="primary")
|
| 515 |
+
up_msg = gr.Markdown()
|
| 516 |
+
samples_tbl = gr.Dataframe(
|
| 517 |
+
headers=["id", "language", "sample_name", "model", "reference"],
|
| 518 |
+
interactive=False, label="Samples")
|
| 519 |
+
with gr.Row():
|
| 520 |
+
del_sample_id = gr.Number(label="Delete sample by id", precision=0)
|
| 521 |
+
del_btn = gr.Button("Delete", variant="stop", scale=0)
|
| 522 |
+
|
| 523 |
+
gr.Markdown("### Users")
|
| 524 |
+
refresh_users_btn = gr.Button("Refresh users")
|
| 525 |
+
users_tbl = gr.Dataframe(
|
| 526 |
+
headers=["id", "username", "role", "active", "languages", "ratings"],
|
| 527 |
+
interactive=False)
|
| 528 |
+
with gr.Row():
|
| 529 |
+
promote_id = gr.Number(label="User id", precision=0)
|
| 530 |
+
role_choice = gr.Dropdown(label="Set role", choices=["reviewer", "admin"], value="reviewer")
|
| 531 |
+
active_choice = gr.Dropdown(label="Set active", choices=["yes", "no"], value="yes")
|
| 532 |
+
update_user_btn = gr.Button("Apply", scale=0)
|
| 533 |
+
user_admin_msg = gr.Markdown()
|
| 534 |
+
|
| 535 |
+
gr.Markdown("### Results")
|
| 536 |
+
with gr.Row():
|
| 537 |
+
res_lang = gr.Dropdown(label="Language", choices=lang_choices())
|
| 538 |
+
res_btn = gr.Button("Compute MOS", variant="primary")
|
| 539 |
+
export_btn = gr.Button("Export XLSX")
|
| 540 |
+
res_summary = gr.Markdown()
|
| 541 |
+
res_model_tbl = gr.Dataframe(label="MOS by model", interactive=False)
|
| 542 |
+
res_sample_tbl = gr.Dataframe(label="MOS by sample", interactive=False)
|
| 543 |
+
res_file = gr.File(label="Exported file")
|
| 544 |
+
|
| 545 |
+
# ===================================================================== #
|
| 546 |
+
# Handlers
|
| 547 |
+
# ===================================================================== #
|
| 548 |
+
rate_outputs = [criterion_inputs[k] for k in CRITERIA_KEYS]
|
| 549 |
+
|
| 550 |
+
def do_login(username, password):
|
| 551 |
+
user = authenticate(username, password)
|
| 552 |
+
if not user:
|
| 553 |
+
return (gr.update(), gr.update(), None, "β Invalid username or password.",
|
| 554 |
+
gr.update(), gr.update(), gr.update(), gr.update())
|
| 555 |
+
sess = user_session(user["id"])
|
| 556 |
+
is_admin = sess["role"] == "admin"
|
| 557 |
+
rl = reviewer_lang_choices(sess)
|
| 558 |
+
return (
|
| 559 |
+
gr.update(visible=False), # auth_col
|
| 560 |
+
gr.update(visible=True), # app_col
|
| 561 |
+
sess, # session state
|
| 562 |
+
"", # li_msg
|
| 563 |
+
gr.update(value=f"Signed in as **{sess['username']}** ({sess['role']})"), # greeting
|
| 564 |
+
gr.update(visible=is_admin), # admin_tab
|
| 565 |
+
gr.update(choices=rl, value=(rl[0][1] if rl else None)), # rate_lang
|
| 566 |
+
gr.update(choices=lang_choices(),
|
| 567 |
+
value=[l["id"] for l in sess["languages"]]), # prof_langs
|
| 568 |
+
)
|
| 569 |
+
|
| 570 |
+
li_btn.click(
|
| 571 |
+
do_login, [li_user, li_pw],
|
| 572 |
+
[auth_col, app_col, session, li_msg, greeting, admin_tab, rate_lang, prof_langs],
|
| 573 |
+
)
|
| 574 |
+
|
| 575 |
+
def do_signup(username, email, password, lang_ids, code):
|
| 576 |
+
role = "admin" if (code and code == ADMIN_CODE) else "reviewer"
|
| 577 |
+
try:
|
| 578 |
+
create_user(username, email, password, role=role, language_ids=lang_ids or [])
|
| 579 |
+
except ValueError as e:
|
| 580 |
+
return f"β {e}", gr.update()
|
| 581 |
+
note = " (admin)" if role == "admin" else ""
|
| 582 |
+
return f"β
Account created{note}. Switch to **Sign in** to continue.", gr.update(value="")
|
| 583 |
+
su_btn.click(do_signup, [su_user, su_email, su_pw, su_langs, su_code], [su_msg, su_pw])
|
| 584 |
+
|
| 585 |
+
def do_logout():
|
| 586 |
+
return (gr.update(visible=True), gr.update(visible=False), None, "")
|
| 587 |
+
logout_btn.click(do_logout, None, [auth_col, app_col, session, greeting])
|
| 588 |
+
|
| 589 |
+
# ---- Rating flow ----
|
| 590 |
+
def load_samples_for_lang(sess, language_id):
|
| 591 |
+
if not sess or not language_id:
|
| 592 |
+
return gr.update(choices=[], value=None)
|
| 593 |
+
items = samples_for_reviewer(sess["id"], language_id)
|
| 594 |
+
choices = [(("β " if s["rated"] else "β’ ") + s["sample_name"], s["id"]) for s in items]
|
| 595 |
+
return gr.update(choices=choices, value=(choices[0][1] if choices else None))
|
| 596 |
+
rate_lang.change(load_samples_for_lang, [session, rate_lang], [rate_sample])
|
| 597 |
+
|
| 598 |
+
def load_sample(sess, sample_id):
|
| 599 |
+
"""Load audio + any existing rating into the form."""
|
| 600 |
+
if not sample_id:
|
| 601 |
+
return (None, None, *[None] * len(CRITERIA_KEYS), "")
|
| 602 |
+
with get_conn() as conn:
|
| 603 |
+
row = conn.execute("SELECT * FROM samples WHERE id=?", (sample_id,)).fetchone()
|
| 604 |
+
if not row:
|
| 605 |
+
return (None, None, *[None] * len(CRITERIA_KEYS), "")
|
| 606 |
+
existing = get_rating(sess["id"], sample_id) if sess else None
|
| 607 |
+
scores = [existing[k] if existing else None for k in CRITERIA_KEYS]
|
| 608 |
+
comments = existing["comments"] if existing else ""
|
| 609 |
+
return (row["file_path"], sample_id, *scores, comments)
|
| 610 |
+
rate_sample.change(
|
| 611 |
+
load_sample, [session, rate_sample],
|
| 612 |
+
[rate_audio, current_sample, *rate_outputs, rate_comments],
|
| 613 |
+
)
|
| 614 |
+
|
| 615 |
+
def go_next_unrated(sess, language_id):
|
| 616 |
+
if not sess or not language_id:
|
| 617 |
+
return gr.update()
|
| 618 |
+
items = samples_for_reviewer(sess["id"], language_id)
|
| 619 |
+
nxt = next((s["id"] for s in items if not s["rated"]), None)
|
| 620 |
+
if nxt is None:
|
| 621 |
+
return gr.update()
|
| 622 |
+
return gr.update(value=nxt)
|
| 623 |
+
next_btn.click(go_next_unrated, [session, rate_lang], [rate_sample])
|
| 624 |
+
|
| 625 |
+
def submit_rating(sess, sample_id, comments, *scores):
|
| 626 |
+
if not sess:
|
| 627 |
+
return "β Not signed in.", gr.update()
|
| 628 |
+
if not sample_id:
|
| 629 |
+
return "β No sample selected.", gr.update()
|
| 630 |
+
score_map = dict(zip(CRITERIA_KEYS, scores))
|
| 631 |
+
missing = [lbl for (k, lbl, _) in CRITERIA if score_map.get(k) in (None, "")]
|
| 632 |
+
if missing:
|
| 633 |
+
return f"β Please rate every criterion. Missing: {', '.join(missing)}", gr.update()
|
| 634 |
+
upsert_rating(sess["id"], sample_id, score_map, comments)
|
| 635 |
+
# refresh sample dropdown to show the β and move on
|
| 636 |
+
items = samples_for_reviewer(sess["id"], score_lang(sess, sample_id))
|
| 637 |
+
rated = sum(1 for s in items if s["rated"])
|
| 638 |
+
return (f"β
Saved. You have rated {rated}/{len(items)} samples in this language.", gr.update())
|
| 639 |
+
|
| 640 |
+
def score_lang(sess, sample_id):
|
| 641 |
+
with get_conn() as conn:
|
| 642 |
+
row = conn.execute("SELECT language_id FROM samples WHERE id=?", (sample_id,)).fetchone()
|
| 643 |
+
return row["language_id"] if row else None
|
| 644 |
+
|
| 645 |
+
submit_btn.click(
|
| 646 |
+
submit_rating,
|
| 647 |
+
[session, current_sample, rate_comments, *rate_outputs],
|
| 648 |
+
[rate_msg, rate_sample],
|
| 649 |
+
).then(
|
| 650 |
+
load_samples_for_lang, [session, rate_lang], [rate_sample]
|
| 651 |
+
)
|
| 652 |
+
|
| 653 |
+
# ---- Progress ----
|
| 654 |
+
def load_progress(sess):
|
| 655 |
+
if not sess:
|
| 656 |
+
return "Not signed in.", []
|
| 657 |
+
rows = []
|
| 658 |
+
for l in sess["languages"]:
|
| 659 |
+
items = samples_for_reviewer(sess["id"], l["id"])
|
| 660 |
+
rated = sum(1 for s in items if s["rated"])
|
| 661 |
+
rows.append([l["name"], rated, len(items), len(items) - rated])
|
| 662 |
+
if not rows:
|
| 663 |
+
return "You have no languages assigned yet. Add some under **My languages**.", []
|
| 664 |
+
return f"Progress for **{sess['username']}**:", rows
|
| 665 |
+
refresh_prog_btn.click(load_progress, [session], [progress_md, progress_tbl])
|
| 666 |
+
|
| 667 |
+
# ---- Profile ----
|
| 668 |
+
def save_profile(sess, lang_ids):
|
| 669 |
+
if not sess:
|
| 670 |
+
return "β Not signed in.", gr.update(), gr.update()
|
| 671 |
+
set_user_languages(sess["id"], lang_ids or [])
|
| 672 |
+
new_sess = user_session(sess["id"])
|
| 673 |
+
rl = reviewer_lang_choices(new_sess)
|
| 674 |
+
return ("β
Languages updated.", new_sess,
|
| 675 |
+
gr.update(choices=rl, value=(rl[0][1] if rl else None)))
|
| 676 |
+
prof_save.click(save_profile, [session, prof_langs], [prof_msg, session, rate_lang])
|
| 677 |
+
|
| 678 |
+
# ---- Admin: languages ----
|
| 679 |
+
def admin_add_language(sess, code, name):
|
| 680 |
+
if not sess or sess["role"] != "admin":
|
| 681 |
+
return "β Admin only.", _languages_table(), *_lang_dropdown_updates()
|
| 682 |
+
try:
|
| 683 |
+
add_language(code, name)
|
| 684 |
+
except ValueError as e:
|
| 685 |
+
return f"β {e}", _languages_table(), *_lang_dropdown_updates()
|
| 686 |
+
return f"β
Added {name} ({code}).", _languages_table(), *_lang_dropdown_updates()
|
| 687 |
+
|
| 688 |
+
def _languages_table():
|
| 689 |
+
return [[l["id"], l["code"], l["name"]] for l in list_languages()]
|
| 690 |
+
|
| 691 |
+
def _lang_dropdown_updates():
|
| 692 |
+
ch = lang_choices()
|
| 693 |
+
return (gr.update(choices=ch), gr.update(choices=ch), gr.update(choices=ch), gr.update(choices=ch))
|
| 694 |
+
|
| 695 |
+
al_btn.click(
|
| 696 |
+
admin_add_language, [session, al_code, al_name],
|
| 697 |
+
[al_msg, langs_tbl, up_lang, res_lang, su_langs, prof_langs],
|
| 698 |
+
)
|
| 699 |
+
|
| 700 |
+
# ---- Admin: samples ----
|
| 701 |
+
def admin_upload(sess, language_id, files, model, is_ref, transcript):
|
| 702 |
+
if not sess or sess["role"] != "admin":
|
| 703 |
+
return "β Admin only.", _samples_table()
|
| 704 |
+
if not language_id:
|
| 705 |
+
return "β Choose a language first.", _samples_table()
|
| 706 |
+
if not files:
|
| 707 |
+
return "β No files selected.", _samples_table()
|
| 708 |
+
count = 0
|
| 709 |
+
for f in files:
|
| 710 |
+
path = f if isinstance(f, str) else getattr(f, "name", None)
|
| 711 |
+
try:
|
| 712 |
+
add_sample(language_id, path, model_name=model, is_reference=is_ref, transcript=transcript)
|
| 713 |
+
count += 1
|
| 714 |
+
except Exception as e: # noqa
|
| 715 |
+
return f"β Error on a file: {e}", _samples_table()
|
| 716 |
+
return f"β
Uploaded {count} sample(s).", _samples_table()
|
| 717 |
+
|
| 718 |
+
def _samples_table():
|
| 719 |
+
return [[s["id"], f"{s['language_name']} ({s['language_code']})", s["sample_name"],
|
| 720 |
+
s["model_name"], "yes" if s["is_reference"] else "no"] for s in list_samples()]
|
| 721 |
+
|
| 722 |
+
up_btn.click(admin_upload, [session, up_lang, up_files, up_model, up_isref, up_transcript],
|
| 723 |
+
[up_msg, samples_tbl])
|
| 724 |
+
|
| 725 |
+
def admin_delete_sample(sess, sid):
|
| 726 |
+
if not sess or sess["role"] != "admin":
|
| 727 |
+
return "β Admin only.", _samples_table()
|
| 728 |
+
if not sid:
|
| 729 |
+
return "β Enter a sample id.", _samples_table()
|
| 730 |
+
delete_sample(int(sid))
|
| 731 |
+
return f"β
Deleted sample {int(sid)}.", _samples_table()
|
| 732 |
+
del_btn.click(admin_delete_sample, [session, del_sample_id], [up_msg, samples_tbl])
|
| 733 |
+
|
| 734 |
+
# ---- Admin: users ----
|
| 735 |
+
def _users_table():
|
| 736 |
+
rows = []
|
| 737 |
+
with get_conn() as conn:
|
| 738 |
+
for u in conn.execute("SELECT * FROM users ORDER BY id").fetchall():
|
| 739 |
+
langs = conn.execute(
|
| 740 |
+
"SELECT l.name FROM user_languages ul JOIN languages l ON l.id=ul.language_id "
|
| 741 |
+
"WHERE ul.user_id=?", (u["id"],)).fetchall()
|
| 742 |
+
nratings = conn.execute("SELECT COUNT(*) c FROM ratings WHERE user_id=?", (u["id"],)).fetchone()["c"]
|
| 743 |
+
rows.append([u["id"], u["username"], u["role"], "yes" if u["is_active"] else "no",
|
| 744 |
+
", ".join(l["name"] for l in langs), nratings])
|
| 745 |
+
return rows
|
| 746 |
+
refresh_users_btn.click(lambda s: _users_table() if s and s["role"] == "admin" else [], [session], [users_tbl])
|
| 747 |
+
|
| 748 |
+
def admin_update_user(sess, uid, role, active):
|
| 749 |
+
if not sess or sess["role"] != "admin":
|
| 750 |
+
return "β Admin only.", _users_table()
|
| 751 |
+
if not uid:
|
| 752 |
+
return "β Enter a user id.", _users_table()
|
| 753 |
+
with get_conn() as conn:
|
| 754 |
+
conn.execute("UPDATE users SET role=?, is_active=? WHERE id=?",
|
| 755 |
+
(role, 1 if active == "yes" else 0, int(uid)))
|
| 756 |
+
return f"β
Updated user {int(uid)}.", _users_table()
|
| 757 |
+
update_user_btn.click(admin_update_user, [session, promote_id, role_choice, active_choice],
|
| 758 |
+
[user_admin_msg, users_tbl])
|
| 759 |
+
|
| 760 |
+
# ---- Admin: results ----
|
| 761 |
+
def admin_results(sess, language_id):
|
| 762 |
+
if not sess or sess["role"] != "admin":
|
| 763 |
+
return "β Admin only.", pd.DataFrame(), pd.DataFrame()
|
| 764 |
+
if not language_id:
|
| 765 |
+
return "Choose a language.", pd.DataFrame(), pd.DataFrame()
|
| 766 |
+
per_model, per_sample, summary = compute_results(language_id)
|
| 767 |
+
return summary, per_model, per_sample
|
| 768 |
+
res_btn.click(admin_results, [session, res_lang], [res_summary, res_model_tbl, res_sample_tbl])
|
| 769 |
+
|
| 770 |
+
def admin_export(sess, language_id):
|
| 771 |
+
if not sess or sess["role"] != "admin" or not language_id:
|
| 772 |
+
return None
|
| 773 |
+
return export_results(language_id)
|
| 774 |
+
export_btn.click(admin_export, [session, res_lang], [res_file])
|
| 775 |
+
|
| 776 |
+
# Populate admin tables when the app loads for an admin (via login .then chain)
|
| 777 |
+
li_btn.click(lambda s: (_languages_table(), _samples_table(), _users_table())
|
| 778 |
+
if s and s["role"] == "admin" else ([], [], []),
|
| 779 |
+
[session], [langs_tbl, samples_tbl, users_tbl])
|
| 780 |
+
|
| 781 |
+
|
| 782 |
+
if __name__ == "__main__":
|
| 783 |
+
demo.queue().launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=5.0,<6.0
|
| 2 |
+
pandas>=2.0
|
| 3 |
+
openpyxl>=3.1
|