Spaces:
Sleeping
Sleeping
OliverPerrin commited on
Commit Β·
f1ab267
1
Parent(s): b93250a
Updated Gradio Demo to match new approac for book descritpions and shown metrics
Browse files- scripts/demo_gradio.py +129 -18
scripts/demo_gradio.py
CHANGED
|
@@ -10,6 +10,8 @@ Date: 2026-01-14
|
|
| 10 |
|
| 11 |
from __future__ import annotations
|
| 12 |
|
|
|
|
|
|
|
| 13 |
from typing import Any
|
| 14 |
|
| 15 |
import gradio as gr
|
|
@@ -36,6 +38,18 @@ print(f"Topics: {TOPICS}")
|
|
| 36 |
print(f"Emotions: {EMOTIONS}")
|
| 37 |
print(f"Books: {len(BOOKS)}, Papers: {len(PAPERS)}")
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
# --------------- Filter Functions ---------------
|
| 41 |
|
|
@@ -78,10 +92,10 @@ def format_item_card(item: dict) -> str:
|
|
| 78 |
use_reference = item.get("use_reference_summary", False)
|
| 79 |
if use_reference or source_type == "literary":
|
| 80 |
summary = item.get("reference_summary", "")
|
| 81 |
-
summary_label = "π **
|
| 82 |
else:
|
| 83 |
summary = item.get("generated_summary", "")
|
| 84 |
-
summary_label = "π€ **AI-Generated
|
| 85 |
|
| 86 |
if not summary:
|
| 87 |
summary = "No summary available."
|
|
@@ -215,13 +229,13 @@ with gr.Blocks(
|
|
| 215 |
|
| 216 |
| Source | Count | Description |
|
| 217 |
|--------|-------|-------------|
|
| 218 |
-
| π Literature | {lit_count} | Classic
|
| 219 |
| π Research | {paper_count} | Scientific papers from arXiv |
|
| 220 |
|
| 221 |
**Model Capabilities:**
|
| 222 |
- π·οΈ **Topic Classification**: Fiction, Science, History, Philosophy, Arts, Business, Technology
|
| 223 |
- π **Emotion Detection**: 28 emotions (joy, sadness, anger, fear, surprise, love, etc.)
|
| 224 |
-
- π **
|
| 225 |
|
| 226 |
---
|
| 227 |
""".format(
|
|
@@ -297,7 +311,101 @@ with gr.Blocks(
|
|
| 297 |
outputs=[search_results],
|
| 298 |
)
|
| 299 |
|
| 300 |
-
# ===================== TAB 4:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
with gr.Tab("βΉοΈ About"):
|
| 302 |
gr.Markdown(
|
| 303 |
"""
|
|
@@ -307,7 +415,7 @@ with gr.Blocks(
|
|
| 307 |
|
| 308 |
| Task | Description |
|
| 309 |
|------|-------------|
|
| 310 |
-
| **
|
| 311 |
| **Topic Classification** | Categorize into Fiction, Science, Technology, Philosophy, History, Business, Arts |
|
| 312 |
| **Emotion Detection** | Identify emotional tones (28 emotions from GoEmotions) |
|
| 313 |
|
|
@@ -321,21 +429,24 @@ with gr.Blocks(
|
|
| 321 |
|
| 322 |
### Training Data
|
| 323 |
|
| 324 |
-
| Dataset | Task |
|
| 325 |
-
|---------|------|
|
| 326 |
-
|
|
| 327 |
-
|
|
| 328 |
-
|
|
|
|
|
|
|
|
|
|
|
| 329 |
|
| 330 |
-
|
|
|
|
|
|
|
| 331 |
|
| 332 |
-
|
| 333 |
-
- 100 Gutenberg books
|
| 334 |
-
- 80 arXiv academic papers
|
| 335 |
-
- 20 BookSum literary excerpts
|
| 336 |
|
| 337 |
-
|
| 338 |
-
|
|
|
|
| 339 |
|
| 340 |
### Links
|
| 341 |
|
|
|
|
| 10 |
|
| 11 |
from __future__ import annotations
|
| 12 |
|
| 13 |
+
import json
|
| 14 |
+
from pathlib import Path
|
| 15 |
from typing import Any
|
| 16 |
|
| 17 |
import gradio as gr
|
|
|
|
| 38 |
print(f"Emotions: {EMOTIONS}")
|
| 39 |
print(f"Books: {len(BOOKS)}, Papers: {len(PAPERS)}")
|
| 40 |
|
| 41 |
+
# --------------- Load Evaluation Metrics ---------------
|
| 42 |
+
|
| 43 |
+
METRICS: dict[str, Any] = {}
|
| 44 |
+
_metrics_path = Path(__file__).parent.parent / "outputs" / "evaluation_report.json"
|
| 45 |
+
if _metrics_path.exists():
|
| 46 |
+
try:
|
| 47 |
+
with open(_metrics_path) as f:
|
| 48 |
+
METRICS = json.load(f)
|
| 49 |
+
print(f"Loaded evaluation metrics from {_metrics_path}")
|
| 50 |
+
except Exception as e:
|
| 51 |
+
print(f"Warning: Could not load metrics: {e}")
|
| 52 |
+
|
| 53 |
|
| 54 |
# --------------- Filter Functions ---------------
|
| 55 |
|
|
|
|
| 92 |
use_reference = item.get("use_reference_summary", False)
|
| 93 |
if use_reference or source_type == "literary":
|
| 94 |
summary = item.get("reference_summary", "")
|
| 95 |
+
summary_label = "π **Book Description** (Goodreads-style):"
|
| 96 |
else:
|
| 97 |
summary = item.get("generated_summary", "")
|
| 98 |
+
summary_label = "π€ **AI-Generated Description:**"
|
| 99 |
|
| 100 |
if not summary:
|
| 101 |
summary = "No summary available."
|
|
|
|
| 229 |
|
| 230 |
| Source | Count | Description |
|
| 231 |
|--------|-------|-------------|
|
| 232 |
+
| π Literature | {lit_count} | Classic books with Goodreads-style descriptions |
|
| 233 |
| π Research | {paper_count} | Scientific papers from arXiv |
|
| 234 |
|
| 235 |
**Model Capabilities:**
|
| 236 |
- π·οΈ **Topic Classification**: Fiction, Science, History, Philosophy, Arts, Business, Technology
|
| 237 |
- π **Emotion Detection**: 28 emotions (joy, sadness, anger, fear, surprise, love, etc.)
|
| 238 |
+
- π **Book Descriptions**: Back-cover style summaries of what texts are about
|
| 239 |
|
| 240 |
---
|
| 241 |
""".format(
|
|
|
|
| 311 |
outputs=[search_results],
|
| 312 |
)
|
| 313 |
|
| 314 |
+
# ===================== TAB 4: METRICS =====================
|
| 315 |
+
with gr.Tab("π Model Metrics"):
|
| 316 |
+
gr.Markdown(
|
| 317 |
+
"""
|
| 318 |
+
### Evaluation Metrics
|
| 319 |
+
|
| 320 |
+
LexiMind is evaluated using comprehensive metrics across all three tasks.
|
| 321 |
+
Metrics are computed on held-out validation data.
|
| 322 |
+
"""
|
| 323 |
+
)
|
| 324 |
+
|
| 325 |
+
# Summarization Metrics
|
| 326 |
+
gr.Markdown("#### π Summarization Metrics")
|
| 327 |
+
|
| 328 |
+
if METRICS.get("summarization"):
|
| 329 |
+
summ = METRICS["summarization"]
|
| 330 |
+
summ_md = """
|
| 331 |
+
| Metric | Score | Description |
|
| 332 |
+
|--------|-------|-------------|
|
| 333 |
+
| **ROUGE-1** | {rouge1:.4f} | Unigram overlap with reference |
|
| 334 |
+
| **ROUGE-2** | {rouge2:.4f} | Bigram overlap with reference |
|
| 335 |
+
| **ROUGE-L** | {rougeL:.4f} | Longest common subsequence |
|
| 336 |
+
| **BLEU-4** | {bleu4:.4f} | 4-gram precision score |
|
| 337 |
+
| **BERTScore F1** | {bertscore:.4f} | Semantic similarity (contextual) |
|
| 338 |
+
|
| 339 |
+
*Note: For back-cover style descriptions, BERTScore is more meaningful than ROUGE
|
| 340 |
+
since descriptions paraphrase rather than quote the source text.*
|
| 341 |
+
""".format(
|
| 342 |
+
rouge1=summ.get("rouge_rouge1", summ.get("rouge1", 0)),
|
| 343 |
+
rouge2=summ.get("rouge_rouge2", summ.get("rouge2", 0)),
|
| 344 |
+
rougeL=summ.get("rouge_rougeL", summ.get("rougeL", 0)),
|
| 345 |
+
bleu4=summ.get("bleu4", 0),
|
| 346 |
+
bertscore=summ.get("bertscore_f1", 0),
|
| 347 |
+
)
|
| 348 |
+
gr.Markdown(summ_md)
|
| 349 |
+
else:
|
| 350 |
+
gr.Markdown("*Summarization metrics not available. Run evaluation script.*")
|
| 351 |
+
|
| 352 |
+
# Topic Classification Metrics
|
| 353 |
+
gr.Markdown("#### π·οΈ Topic Classification Metrics")
|
| 354 |
+
|
| 355 |
+
if METRICS.get("topic"):
|
| 356 |
+
topic = METRICS["topic"]
|
| 357 |
+
topic_md = """
|
| 358 |
+
| Metric | Score |
|
| 359 |
+
|--------|-------|
|
| 360 |
+
| **Accuracy** | {accuracy:.2%} |
|
| 361 |
+
| **Macro F1** | {f1:.4f} |
|
| 362 |
+
| **Precision** | {precision:.4f} |
|
| 363 |
+
| **Recall** | {recall:.4f} |
|
| 364 |
+
""".format(
|
| 365 |
+
accuracy=topic.get("accuracy", 0),
|
| 366 |
+
f1=topic.get("f1", topic.get("macro_f1", 0)),
|
| 367 |
+
precision=topic.get("precision", 0),
|
| 368 |
+
recall=topic.get("recall", 0),
|
| 369 |
+
)
|
| 370 |
+
gr.Markdown(topic_md)
|
| 371 |
+
else:
|
| 372 |
+
gr.Markdown("*Topic classification metrics not available.*")
|
| 373 |
+
|
| 374 |
+
# Emotion Detection Metrics
|
| 375 |
+
gr.Markdown("#### π Emotion Detection Metrics")
|
| 376 |
+
|
| 377 |
+
if METRICS.get("emotion"):
|
| 378 |
+
emotion = METRICS["emotion"]
|
| 379 |
+
emotion_md = """
|
| 380 |
+
| Metric | Score |
|
| 381 |
+
|--------|-------|
|
| 382 |
+
| **Multi-label F1** | {f1:.4f} |
|
| 383 |
+
| **Precision** | {precision:.4f} |
|
| 384 |
+
| **Recall** | {recall:.4f} |
|
| 385 |
+
|
| 386 |
+
*Emotion detection uses 28 labels from GoEmotions. Multiple emotions can be assigned to each text.*
|
| 387 |
+
""".format(
|
| 388 |
+
f1=emotion.get("f1", emotion.get("multilabel_f1", 0)),
|
| 389 |
+
precision=emotion.get("precision", 0),
|
| 390 |
+
recall=emotion.get("recall", 0),
|
| 391 |
+
)
|
| 392 |
+
gr.Markdown(emotion_md)
|
| 393 |
+
else:
|
| 394 |
+
gr.Markdown("*Emotion detection metrics not available.*")
|
| 395 |
+
|
| 396 |
+
# Dataset Statistics
|
| 397 |
+
gr.Markdown("#### π Dataset Statistics")
|
| 398 |
+
gr.Markdown(f"""
|
| 399 |
+
| Statistic | Value |
|
| 400 |
+
|-----------|-------|
|
| 401 |
+
| Total Items | {len(ALL_ITEMS)} |
|
| 402 |
+
| Literary Works | {len(BOOKS)} |
|
| 403 |
+
| Academic Papers | {len(PAPERS)} |
|
| 404 |
+
| Unique Topics | {len(TOPICS)} |
|
| 405 |
+
| Unique Emotions | {len(EMOTIONS)} |
|
| 406 |
+
""")
|
| 407 |
+
|
| 408 |
+
# ===================== TAB 5: ABOUT =====================
|
| 409 |
with gr.Tab("βΉοΈ About"):
|
| 410 |
gr.Markdown(
|
| 411 |
"""
|
|
|
|
| 415 |
|
| 416 |
| Task | Description |
|
| 417 |
|------|-------------|
|
| 418 |
+
| **Book Descriptions** | Generate back-cover style descriptions of what books are about |
|
| 419 |
| **Topic Classification** | Categorize into Fiction, Science, Technology, Philosophy, History, Business, Arts |
|
| 420 |
| **Emotion Detection** | Identify emotional tones (28 emotions from GoEmotions) |
|
| 421 |
|
|
|
|
| 429 |
|
| 430 |
### Training Data
|
| 431 |
|
| 432 |
+
| Dataset | Task | Description |
|
| 433 |
+
|---------|------|-------------|
|
| 434 |
+
| Goodreads (711k+ blurbs) | Book Descriptions | Back-cover style descriptions matched with Gutenberg texts |
|
| 435 |
+
| arXiv | Paper Abstracts | Scientific paper summarization |
|
| 436 |
+
| 20 Newsgroups + Gutenberg | Topic Classification | Multi-domain topic categorization |
|
| 437 |
+
| GoEmotions | Emotion Detection | 28-class multi-label emotion classification |
|
| 438 |
+
|
| 439 |
+
### Key Design Decision
|
| 440 |
|
| 441 |
+
LexiMind generates **back-cover style descriptions** (what a book is about) rather than
|
| 442 |
+
plot summaries (what happens in the book). This is achieved by training on Goodreads
|
| 443 |
+
descriptions paired with Project Gutenberg book texts.
|
| 444 |
|
| 445 |
+
### Evaluation Metrics
|
|
|
|
|
|
|
|
|
|
| 446 |
|
| 447 |
+
- **ROUGE-1/2/L**: Lexical overlap (expected range: 0.15-0.25 for descriptions)
|
| 448 |
+
- **BLEU-4**: N-gram precision
|
| 449 |
+
- **BERTScore**: Semantic similarity using contextual embeddings (key metric for paraphrasing)
|
| 450 |
|
| 451 |
### Links
|
| 452 |
|