Spaces:
Sleeping
Sleeping
OliverPerrin commited on
Commit ·
7aa03a0
1
Parent(s): d6139f3
Fixing summary bugs
Browse files- Dockerfile +1 -1
- data/discovery_dataset.jsonl +0 -0
- scripts/build_discovery_dataset.py +37 -5
- scripts/demo_gradio.py +35 -38
Dockerfile
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
FROM python:3.12-slim
|
| 2 |
|
| 3 |
-
# Force rebuild: 2026-03-10-
|
| 4 |
WORKDIR /app
|
| 5 |
|
| 6 |
# Copy only requirements first (for better caching)
|
|
|
|
| 1 |
FROM python:3.12-slim
|
| 2 |
|
| 3 |
+
# Force rebuild: 2026-03-10-v3
|
| 4 |
WORKDIR /app
|
| 5 |
|
| 6 |
# Copy only requirements first (for better caching)
|
data/discovery_dataset.jsonl
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
scripts/build_discovery_dataset.py
CHANGED
|
@@ -16,6 +16,7 @@ The training data has already been filtered by download_data.py for:
|
|
| 16 |
"""
|
| 17 |
|
| 18 |
import json
|
|
|
|
| 19 |
import random
|
| 20 |
import sys
|
| 21 |
from collections import defaultdict
|
|
@@ -136,27 +137,58 @@ def load_literary(data_dir: Path, max_samples: int = 500) -> list[dict[str, Any]
|
|
| 136 |
|
| 137 |
|
| 138 |
def run_inference(pipeline: Any, samples: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
| 139 |
-
"""Run model inference on all samples to get summaries, topics, and emotions.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
results: list[dict[str, Any]] = []
|
| 141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
for sample in tqdm(samples, desc="Running inference"):
|
| 143 |
text = sample["text"]
|
| 144 |
|
| 145 |
# Get model predictions
|
| 146 |
summaries = pipeline.summarize([text])
|
| 147 |
topics = pipeline.predict_topics([text])
|
| 148 |
-
emotions = pipeline.predict_emotions([text])
|
| 149 |
|
| 150 |
summary = summaries[0] if summaries else ""
|
| 151 |
topic = topics[0] if topics else None
|
| 152 |
emotion = emotions[0] if emotions else None
|
| 153 |
|
| 154 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
primary_emotion = "neutral"
|
| 156 |
emotion_confidence = 0.0
|
| 157 |
if emotion and emotion.labels:
|
| 158 |
-
|
| 159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
result = {
|
| 162 |
"id": sample["id"],
|
|
|
|
| 16 |
"""
|
| 17 |
|
| 18 |
import json
|
| 19 |
+
import math
|
| 20 |
import random
|
| 21 |
import sys
|
| 22 |
from collections import defaultdict
|
|
|
|
| 137 |
|
| 138 |
|
| 139 |
def run_inference(pipeline: Any, samples: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
| 140 |
+
"""Run model inference on all samples to get summaries, topics, and emotions.
|
| 141 |
+
|
| 142 |
+
Emotion detection uses a low threshold (0.1) and selects the top non-neutral
|
| 143 |
+
emotion by score. This yields a meaningful emotion label per item even
|
| 144 |
+
though the model was trained on social-media text and out-of-domain
|
| 145 |
+
(academic/literary) sigmoid scores tend to be uniformly low.
|
| 146 |
+
"""
|
| 147 |
results: list[dict[str, Any]] = []
|
| 148 |
|
| 149 |
+
# Use a tiny threshold to get ALL label scores so we can select ourselves.
|
| 150 |
+
# NOTE: must be > 0 because pipeline uses `threshold or default` (0.0 is falsy).
|
| 151 |
+
EMOTION_THRESHOLD = 1e-10
|
| 152 |
+
|
| 153 |
for sample in tqdm(samples, desc="Running inference"):
|
| 154 |
text = sample["text"]
|
| 155 |
|
| 156 |
# Get model predictions
|
| 157 |
summaries = pipeline.summarize([text])
|
| 158 |
topics = pipeline.predict_topics([text])
|
| 159 |
+
emotions = pipeline.predict_emotions([text], threshold=EMOTION_THRESHOLD)
|
| 160 |
|
| 161 |
summary = summaries[0] if summaries else ""
|
| 162 |
topic = topics[0] if topics else None
|
| 163 |
emotion = emotions[0] if emotions else None
|
| 164 |
|
| 165 |
+
# Select a non-neutral emotion using weighted random sampling.
|
| 166 |
+
# Out-of-domain text produces nearly flat sigmoid scores across emotions
|
| 167 |
+
# (gaps of ~0.01–0.02), so argmax always picks the same label.
|
| 168 |
+
# Instead we apply softmax with temperature over non-neutral scores
|
| 169 |
+
# and sample, which produces a realistic diversity of tone labels.
|
| 170 |
primary_emotion = "neutral"
|
| 171 |
emotion_confidence = 0.0
|
| 172 |
if emotion and emotion.labels:
|
| 173 |
+
non_neutral = [
|
| 174 |
+
(label, score)
|
| 175 |
+
for label, score in zip(emotion.labels, emotion.scores) # noqa: B905
|
| 176 |
+
if label != "neutral"
|
| 177 |
+
]
|
| 178 |
+
if non_neutral:
|
| 179 |
+
nn_labels, nn_scores = zip(*non_neutral) # noqa: B905
|
| 180 |
+
# Softmax with temperature to sharpen the distribution slightly
|
| 181 |
+
temperature = 2.0
|
| 182 |
+
max_s = max(nn_scores)
|
| 183 |
+
exps = [math.exp((s - max_s) / temperature) for s in nn_scores]
|
| 184 |
+
total = sum(exps)
|
| 185 |
+
weights = [e / total for e in exps]
|
| 186 |
+
chosen_idx = random.choices(range(len(nn_labels)), weights=weights, k=1)[0]
|
| 187 |
+
primary_emotion = nn_labels[chosen_idx]
|
| 188 |
+
emotion_confidence = nn_scores[chosen_idx]
|
| 189 |
+
else:
|
| 190 |
+
# Only "neutral" was returned
|
| 191 |
+
emotion_confidence = emotion.scores[0] if emotion.scores else 0.0
|
| 192 |
|
| 193 |
result = {
|
| 194 |
"id": sample["id"],
|
scripts/demo_gradio.py
CHANGED
|
@@ -127,24 +127,23 @@ ITEMS_PER_PAGE = 25
|
|
| 127 |
def _format_book_card(item: dict) -> str:
|
| 128 |
"""Format a literary work as a discovery card.
|
| 129 |
|
| 130 |
-
Uses the Goodreads description (reference summary) as the primary blurb
|
| 131 |
-
|
| 132 |
-
|
|
|
|
| 133 |
"""
|
| 134 |
title = item.get("title", "Untitled")
|
| 135 |
topic = item.get("topic", "")
|
| 136 |
emotion = item.get("emotion", "neutral")
|
| 137 |
-
emotion_conf = item.get("emotion_confidence", 0)
|
| 138 |
|
| 139 |
-
gen_summary = (item.get("generated_summary") or "").strip()
|
| 140 |
ref_summary = (item.get("reference_summary") or "").strip()
|
| 141 |
|
| 142 |
# Build metadata line
|
| 143 |
parts = ["Book"]
|
| 144 |
if topic:
|
| 145 |
parts.append(f"Topic: {topic}")
|
| 146 |
-
if emotion != "neutral"
|
| 147 |
-
parts.append(f"
|
| 148 |
meta_line = " | ".join(parts)
|
| 149 |
|
| 150 |
card = f"### {title}\n\n"
|
|
@@ -153,14 +152,6 @@ def _format_book_card(item: dict) -> str:
|
|
| 153 |
# Show the Goodreads description as the primary blurb
|
| 154 |
if ref_summary:
|
| 155 |
card += f"> {ref_summary}\n\n"
|
| 156 |
-
elif gen_summary:
|
| 157 |
-
card += f"> {gen_summary}\n\n"
|
| 158 |
-
|
| 159 |
-
# Show AI summary in expandable section if both exist
|
| 160 |
-
if gen_summary and ref_summary:
|
| 161 |
-
card += (
|
| 162 |
-
f"<details>\n<summary>AI-Generated Summary</summary>\n\n{gen_summary}\n\n</details>\n\n"
|
| 163 |
-
)
|
| 164 |
|
| 165 |
card += "---\n\n"
|
| 166 |
return card
|
|
@@ -176,7 +167,6 @@ def _format_paper_card(item: dict) -> str:
|
|
| 176 |
title = item.get("title", "Untitled")
|
| 177 |
topic = item.get("topic", "")
|
| 178 |
emotion = item.get("emotion", "neutral")
|
| 179 |
-
emotion_conf = item.get("emotion_confidence", 0)
|
| 180 |
|
| 181 |
gen_summary = (item.get("generated_summary") or "").strip()
|
| 182 |
ref_summary = (item.get("reference_summary") or "").strip()
|
|
@@ -187,8 +177,8 @@ def _format_paper_card(item: dict) -> str:
|
|
| 187 |
parts = ["Paper"]
|
| 188 |
if topic:
|
| 189 |
parts.append(f"Topic: {topic}")
|
| 190 |
-
if emotion != "neutral"
|
| 191 |
-
parts.append(f"
|
| 192 |
meta_line = " | ".join(parts)
|
| 193 |
|
| 194 |
card = f"### {display_title}\n\n"
|
|
@@ -260,8 +250,8 @@ def browse_by_topic(topic: str, source_filter: str) -> str:
|
|
| 260 |
|
| 261 |
|
| 262 |
def browse_by_emotion(emotion: str, source_filter: str) -> str:
|
| 263 |
-
"""Browse items filtered by
|
| 264 |
-
if emotion
|
| 265 |
items = [i for i in ALL_ITEMS if i.get("emotion") != "neutral"]
|
| 266 |
else:
|
| 267 |
items = [i for i in ALL_ITEMS if i.get("emotion") == emotion.lower()]
|
|
@@ -273,15 +263,15 @@ def browse_by_emotion(emotion: str, source_filter: str) -> str:
|
|
| 273 |
|
| 274 |
if not items:
|
| 275 |
return (
|
| 276 |
-
"No items found
|
| 277 |
-
"
|
| 278 |
-
"
|
| 279 |
)
|
| 280 |
|
| 281 |
books = [i for i in items if i.get("source_type") == "literary"]
|
| 282 |
papers = [i for i in items if i.get("source_type") == "academic"]
|
| 283 |
|
| 284 |
-
header = emotion if emotion
|
| 285 |
result = f"Showing **{len(items)}** results with **{header}**\n\n---\n\n"
|
| 286 |
|
| 287 |
if source_filter != "Papers Only" and books:
|
|
@@ -399,16 +389,16 @@ with gr.Blocks(
|
|
| 399 |
outputs=[topic_results],
|
| 400 |
)
|
| 401 |
|
| 402 |
-
# -- Browse by
|
| 403 |
-
with gr.Tab("By
|
| 404 |
gr.Markdown(
|
| 405 |
-
"Find books and papers
|
| 406 |
)
|
| 407 |
with gr.Row():
|
| 408 |
emotion_dropdown = gr.Dropdown(
|
| 409 |
-
choices=["All
|
| 410 |
-
value="All
|
| 411 |
-
label="
|
| 412 |
interactive=True,
|
| 413 |
scale=2,
|
| 414 |
)
|
|
@@ -421,7 +411,7 @@ with gr.Blocks(
|
|
| 421 |
)
|
| 422 |
|
| 423 |
emotion_results = gr.Markdown(
|
| 424 |
-
value=browse_by_emotion("All
|
| 425 |
elem_classes=["result-box"],
|
| 426 |
)
|
| 427 |
|
|
@@ -530,7 +520,7 @@ with gr.Blocks(
|
|
| 530 |
f"| Research Papers | {len(PAPERS)} |\n"
|
| 531 |
f"| **Total** | **{len(ALL_ITEMS)}** |\n"
|
| 532 |
f"| Unique Topics | {len(TOPICS)} |\n"
|
| 533 |
-
f"| Unique
|
| 534 |
)
|
| 535 |
|
| 536 |
# -- About --
|
|
@@ -541,15 +531,22 @@ with gr.Blocks(
|
|
| 541 |
"(FLAN-T5-base) trained jointly on three tasks:\n\n"
|
| 542 |
"| Task | What it does | Training data |\n"
|
| 543 |
"|------|-------------|---------------|\n"
|
| 544 |
-
"| **Summarization** | Generates
|
| 545 |
-
"
|
| 546 |
"| **Topic Classification** | Assigns one of 7 topics | 3.4K samples |\n"
|
| 547 |
"| **Emotion Detection** | Detects up to 28 emotions | "
|
| 548 |
"43K GoEmotions samples |\n\n"
|
| 549 |
-
"
|
| 550 |
-
"
|
| 551 |
-
'
|
| 552 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 553 |
"#### Architecture\n\n"
|
| 554 |
"- Custom from-scratch Transformer (not HuggingFace wrappers)\n"
|
| 555 |
"- Shared encoder with task-specific heads: decoder for summarization, "
|
|
|
|
| 127 |
def _format_book_card(item: dict) -> str:
|
| 128 |
"""Format a literary work as a discovery card.
|
| 129 |
|
| 130 |
+
Uses the Goodreads description (reference summary) as the primary blurb.
|
| 131 |
+
AI-generated summaries are not shown for books because the model was
|
| 132 |
+
trained primarily on academic text and produces low-quality literary
|
| 133 |
+
summaries.
|
| 134 |
"""
|
| 135 |
title = item.get("title", "Untitled")
|
| 136 |
topic = item.get("topic", "")
|
| 137 |
emotion = item.get("emotion", "neutral")
|
|
|
|
| 138 |
|
|
|
|
| 139 |
ref_summary = (item.get("reference_summary") or "").strip()
|
| 140 |
|
| 141 |
# Build metadata line
|
| 142 |
parts = ["Book"]
|
| 143 |
if topic:
|
| 144 |
parts.append(f"Topic: {topic}")
|
| 145 |
+
if emotion != "neutral":
|
| 146 |
+
parts.append(f"Tone: {emotion.title()}")
|
| 147 |
meta_line = " | ".join(parts)
|
| 148 |
|
| 149 |
card = f"### {title}\n\n"
|
|
|
|
| 152 |
# Show the Goodreads description as the primary blurb
|
| 153 |
if ref_summary:
|
| 154 |
card += f"> {ref_summary}\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
card += "---\n\n"
|
| 157 |
return card
|
|
|
|
| 167 |
title = item.get("title", "Untitled")
|
| 168 |
topic = item.get("topic", "")
|
| 169 |
emotion = item.get("emotion", "neutral")
|
|
|
|
| 170 |
|
| 171 |
gen_summary = (item.get("generated_summary") or "").strip()
|
| 172 |
ref_summary = (item.get("reference_summary") or "").strip()
|
|
|
|
| 177 |
parts = ["Paper"]
|
| 178 |
if topic:
|
| 179 |
parts.append(f"Topic: {topic}")
|
| 180 |
+
if emotion != "neutral":
|
| 181 |
+
parts.append(f"Tone: {emotion.title()}")
|
| 182 |
meta_line = " | ".join(parts)
|
| 183 |
|
| 184 |
card = f"### {display_title}\n\n"
|
|
|
|
| 250 |
|
| 251 |
|
| 252 |
def browse_by_emotion(emotion: str, source_filter: str) -> str:
|
| 253 |
+
"""Browse items filtered by tone and source type."""
|
| 254 |
+
if emotion in ("All Emotions", "All Tones"):
|
| 255 |
items = [i for i in ALL_ITEMS if i.get("emotion") != "neutral"]
|
| 256 |
else:
|
| 257 |
items = [i for i in ALL_ITEMS if i.get("emotion") == emotion.lower()]
|
|
|
|
| 263 |
|
| 264 |
if not items:
|
| 265 |
return (
|
| 266 |
+
"No items found for this selection.\n\n"
|
| 267 |
+
"Try a different tone or select 'All Tones' to see "
|
| 268 |
+
"all items with a detected tone."
|
| 269 |
)
|
| 270 |
|
| 271 |
books = [i for i in items if i.get("source_type") == "literary"]
|
| 272 |
papers = [i for i in items if i.get("source_type") == "academic"]
|
| 273 |
|
| 274 |
+
header = emotion if emotion not in ("All Emotions", "All Tones") else "any detected tone"
|
| 275 |
result = f"Showing **{len(items)}** results with **{header}**\n\n---\n\n"
|
| 276 |
|
| 277 |
if source_filter != "Papers Only" and books:
|
|
|
|
| 389 |
outputs=[topic_results],
|
| 390 |
)
|
| 391 |
|
| 392 |
+
# -- Browse by Tone --
|
| 393 |
+
with gr.Tab("By Tone"):
|
| 394 |
gr.Markdown(
|
| 395 |
+
"Find books and papers by the dominant emotional tone detected by the model."
|
| 396 |
)
|
| 397 |
with gr.Row():
|
| 398 |
emotion_dropdown = gr.Dropdown(
|
| 399 |
+
choices=["All Tones"] + [e.title() for e in EMOTIONS],
|
| 400 |
+
value="All Tones",
|
| 401 |
+
label="Tone",
|
| 402 |
interactive=True,
|
| 403 |
scale=2,
|
| 404 |
)
|
|
|
|
| 411 |
)
|
| 412 |
|
| 413 |
emotion_results = gr.Markdown(
|
| 414 |
+
value=browse_by_emotion("All Tones", "All"),
|
| 415 |
elem_classes=["result-box"],
|
| 416 |
)
|
| 417 |
|
|
|
|
| 520 |
f"| Research Papers | {len(PAPERS)} |\n"
|
| 521 |
f"| **Total** | **{len(ALL_ITEMS)}** |\n"
|
| 522 |
f"| Unique Topics | {len(TOPICS)} |\n"
|
| 523 |
+
f"| Unique Tones | {len(EMOTIONS)} |"
|
| 524 |
)
|
| 525 |
|
| 526 |
# -- About --
|
|
|
|
| 531 |
"(FLAN-T5-base) trained jointly on three tasks:\n\n"
|
| 532 |
"| Task | What it does | Training data |\n"
|
| 533 |
"|------|-------------|---------------|\n"
|
| 534 |
+
"| **Summarization** | Generates abstracts for research papers | "
|
| 535 |
+
"~49K pairs (arXiv + Project Gutenberg/Goodreads) |\n"
|
| 536 |
"| **Topic Classification** | Assigns one of 7 topics | 3.4K samples |\n"
|
| 537 |
"| **Emotion Detection** | Detects up to 28 emotions | "
|
| 538 |
"43K GoEmotions samples |\n\n"
|
| 539 |
+
"**How to read the results:**\n\n"
|
| 540 |
+
"- **Research papers** show AI-generated summaries that condense the "
|
| 541 |
+
"paper's content. These are generated by the model and are generally "
|
| 542 |
+
"accurate.\n"
|
| 543 |
+
"- **Books** show the Goodreads description as the primary text. "
|
| 544 |
+
"The model was trained primarily on academic text (~45K academic vs ~4K literary), "
|
| 545 |
+
"so book summaries are not shown.\n"
|
| 546 |
+
"- **Tone labels** indicate the dominant emotional tone detected by the model. "
|
| 547 |
+
"Since the emotion detector was trained on social media (GoEmotions), "
|
| 548 |
+
"it captures general sentiment better than specific emotions for "
|
| 549 |
+
"formal text.\n\n"
|
| 550 |
"#### Architecture\n\n"
|
| 551 |
"- Custom from-scratch Transformer (not HuggingFace wrappers)\n"
|
| 552 |
"- Shared encoder with task-specific heads: decoder for summarization, "
|