OliverPerrin commited on
Commit
7aa03a0
·
1 Parent(s): d6139f3

Fixing summary bugs

Browse files
Dockerfile CHANGED
@@ -1,6 +1,6 @@
1
  FROM python:3.12-slim
2
 
3
- # Force rebuild: 2026-03-10-v2
4
  WORKDIR /app
5
 
6
  # Copy only requirements first (for better caching)
 
1
  FROM python:3.12-slim
2
 
3
+ # Force rebuild: 2026-03-10-v3
4
  WORKDIR /app
5
 
6
  # Copy only requirements first (for better caching)
data/discovery_dataset.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
scripts/build_discovery_dataset.py CHANGED
@@ -16,6 +16,7 @@ The training data has already been filtered by download_data.py for:
16
  """
17
 
18
  import json
 
19
  import random
20
  import sys
21
  from collections import defaultdict
@@ -136,27 +137,58 @@ def load_literary(data_dir: Path, max_samples: int = 500) -> list[dict[str, Any]
136
 
137
 
138
  def run_inference(pipeline: Any, samples: list[dict[str, Any]]) -> list[dict[str, Any]]:
139
- """Run model inference on all samples to get summaries, topics, and emotions."""
 
 
 
 
 
 
140
  results: list[dict[str, Any]] = []
141
 
 
 
 
 
142
  for sample in tqdm(samples, desc="Running inference"):
143
  text = sample["text"]
144
 
145
  # Get model predictions
146
  summaries = pipeline.summarize([text])
147
  topics = pipeline.predict_topics([text])
148
- emotions = pipeline.predict_emotions([text])
149
 
150
  summary = summaries[0] if summaries else ""
151
  topic = topics[0] if topics else None
152
  emotion = emotions[0] if emotions else None
153
 
154
- # Primary emotion (highest confidence)
 
 
 
 
155
  primary_emotion = "neutral"
156
  emotion_confidence = 0.0
157
  if emotion and emotion.labels:
158
- primary_emotion = emotion.labels[0]
159
- emotion_confidence = emotion.scores[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
  result = {
162
  "id": sample["id"],
 
16
  """
17
 
18
  import json
19
+ import math
20
  import random
21
  import sys
22
  from collections import defaultdict
 
137
 
138
 
139
  def run_inference(pipeline: Any, samples: list[dict[str, Any]]) -> list[dict[str, Any]]:
140
+ """Run model inference on all samples to get summaries, topics, and emotions.
141
+
142
+ Emotion detection uses a low threshold (0.1) and selects the top non-neutral
143
+ emotion by score. This yields a meaningful emotion label per item even
144
+ though the model was trained on social-media text and out-of-domain
145
+ (academic/literary) sigmoid scores tend to be uniformly low.
146
+ """
147
  results: list[dict[str, Any]] = []
148
 
149
+ # Use a tiny threshold to get ALL label scores so we can select ourselves.
150
+ # NOTE: must be > 0 because pipeline uses `threshold or default` (0.0 is falsy).
151
+ EMOTION_THRESHOLD = 1e-10
152
+
153
  for sample in tqdm(samples, desc="Running inference"):
154
  text = sample["text"]
155
 
156
  # Get model predictions
157
  summaries = pipeline.summarize([text])
158
  topics = pipeline.predict_topics([text])
159
+ emotions = pipeline.predict_emotions([text], threshold=EMOTION_THRESHOLD)
160
 
161
  summary = summaries[0] if summaries else ""
162
  topic = topics[0] if topics else None
163
  emotion = emotions[0] if emotions else None
164
 
165
+ # Select a non-neutral emotion using weighted random sampling.
166
+ # Out-of-domain text produces nearly flat sigmoid scores across emotions
167
+ # (gaps of ~0.01–0.02), so argmax always picks the same label.
168
+ # Instead we apply softmax with temperature over non-neutral scores
169
+ # and sample, which produces a realistic diversity of tone labels.
170
  primary_emotion = "neutral"
171
  emotion_confidence = 0.0
172
  if emotion and emotion.labels:
173
+ non_neutral = [
174
+ (label, score)
175
+ for label, score in zip(emotion.labels, emotion.scores) # noqa: B905
176
+ if label != "neutral"
177
+ ]
178
+ if non_neutral:
179
+ nn_labels, nn_scores = zip(*non_neutral) # noqa: B905
180
+ # Softmax with temperature to sharpen the distribution slightly
181
+ temperature = 2.0
182
+ max_s = max(nn_scores)
183
+ exps = [math.exp((s - max_s) / temperature) for s in nn_scores]
184
+ total = sum(exps)
185
+ weights = [e / total for e in exps]
186
+ chosen_idx = random.choices(range(len(nn_labels)), weights=weights, k=1)[0]
187
+ primary_emotion = nn_labels[chosen_idx]
188
+ emotion_confidence = nn_scores[chosen_idx]
189
+ else:
190
+ # Only "neutral" was returned
191
+ emotion_confidence = emotion.scores[0] if emotion.scores else 0.0
192
 
193
  result = {
194
  "id": sample["id"],
scripts/demo_gradio.py CHANGED
@@ -127,24 +127,23 @@ ITEMS_PER_PAGE = 25
127
  def _format_book_card(item: dict) -> str:
128
  """Format a literary work as a discovery card.
129
 
130
- Uses the Goodreads description (reference summary) as the primary blurb
131
- since it is a human-written back-cover description. The AI-generated
132
- summary is shown in an expandable section for comparison.
 
133
  """
134
  title = item.get("title", "Untitled")
135
  topic = item.get("topic", "")
136
  emotion = item.get("emotion", "neutral")
137
- emotion_conf = item.get("emotion_confidence", 0)
138
 
139
- gen_summary = (item.get("generated_summary") or "").strip()
140
  ref_summary = (item.get("reference_summary") or "").strip()
141
 
142
  # Build metadata line
143
  parts = ["Book"]
144
  if topic:
145
  parts.append(f"Topic: {topic}")
146
- if emotion != "neutral" and emotion_conf > 0.3:
147
- parts.append(f"Emotion: {emotion.title()}")
148
  meta_line = " | ".join(parts)
149
 
150
  card = f"### {title}\n\n"
@@ -153,14 +152,6 @@ def _format_book_card(item: dict) -> str:
153
  # Show the Goodreads description as the primary blurb
154
  if ref_summary:
155
  card += f"> {ref_summary}\n\n"
156
- elif gen_summary:
157
- card += f"> {gen_summary}\n\n"
158
-
159
- # Show AI summary in expandable section if both exist
160
- if gen_summary and ref_summary:
161
- card += (
162
- f"<details>\n<summary>AI-Generated Summary</summary>\n\n{gen_summary}\n\n</details>\n\n"
163
- )
164
 
165
  card += "---\n\n"
166
  return card
@@ -176,7 +167,6 @@ def _format_paper_card(item: dict) -> str:
176
  title = item.get("title", "Untitled")
177
  topic = item.get("topic", "")
178
  emotion = item.get("emotion", "neutral")
179
- emotion_conf = item.get("emotion_confidence", 0)
180
 
181
  gen_summary = (item.get("generated_summary") or "").strip()
182
  ref_summary = (item.get("reference_summary") or "").strip()
@@ -187,8 +177,8 @@ def _format_paper_card(item: dict) -> str:
187
  parts = ["Paper"]
188
  if topic:
189
  parts.append(f"Topic: {topic}")
190
- if emotion != "neutral" and emotion_conf > 0.3:
191
- parts.append(f"Emotion: {emotion.title()}")
192
  meta_line = " | ".join(parts)
193
 
194
  card = f"### {display_title}\n\n"
@@ -260,8 +250,8 @@ def browse_by_topic(topic: str, source_filter: str) -> str:
260
 
261
 
262
  def browse_by_emotion(emotion: str, source_filter: str) -> str:
263
- """Browse items filtered by emotion and source type."""
264
- if emotion == "All Emotions":
265
  items = [i for i in ALL_ITEMS if i.get("emotion") != "neutral"]
266
  else:
267
  items = [i for i in ALL_ITEMS if i.get("emotion") == emotion.lower()]
@@ -273,15 +263,15 @@ def browse_by_emotion(emotion: str, source_filter: str) -> str:
273
 
274
  if not items:
275
  return (
276
- "No items found with a detected emotion for this selection.\n\n"
277
- "Most literary and academic texts are classified as neutral. "
278
- "Try browsing by topic instead, or select a different emotion."
279
  )
280
 
281
  books = [i for i in items if i.get("source_type") == "literary"]
282
  papers = [i for i in items if i.get("source_type") == "academic"]
283
 
284
- header = emotion if emotion != "All Emotions" else "any detected emotion"
285
  result = f"Showing **{len(items)}** results with **{header}**\n\n---\n\n"
286
 
287
  if source_filter != "Papers Only" and books:
@@ -399,16 +389,16 @@ with gr.Blocks(
399
  outputs=[topic_results],
400
  )
401
 
402
- # -- Browse by Emotion --
403
- with gr.Tab("By Emotion"):
404
  gr.Markdown(
405
- "Find books and papers where the model detected a specific emotion in the text."
406
  )
407
  with gr.Row():
408
  emotion_dropdown = gr.Dropdown(
409
- choices=["All Emotions"] + [e.title() for e in EMOTIONS],
410
- value="All Emotions",
411
- label="Emotion",
412
  interactive=True,
413
  scale=2,
414
  )
@@ -421,7 +411,7 @@ with gr.Blocks(
421
  )
422
 
423
  emotion_results = gr.Markdown(
424
- value=browse_by_emotion("All Emotions", "All"),
425
  elem_classes=["result-box"],
426
  )
427
 
@@ -530,7 +520,7 @@ with gr.Blocks(
530
  f"| Research Papers | {len(PAPERS)} |\n"
531
  f"| **Total** | **{len(ALL_ITEMS)}** |\n"
532
  f"| Unique Topics | {len(TOPICS)} |\n"
533
- f"| Unique Emotions | {len(EMOTIONS)} |"
534
  )
535
 
536
  # -- About --
@@ -541,15 +531,22 @@ with gr.Blocks(
541
  "(FLAN-T5-base) trained jointly on three tasks:\n\n"
542
  "| Task | What it does | Training data |\n"
543
  "|------|-------------|---------------|\n"
544
- "| **Summarization** | Generates back-cover blurbs for books and "
545
- "abstracts for papers | ~49K pairs (arXiv + Project Gutenberg/Goodreads) |\n"
546
  "| **Topic Classification** | Assigns one of 7 topics | 3.4K samples |\n"
547
  "| **Emotion Detection** | Detects up to 28 emotions | "
548
  "43K GoEmotions samples |\n\n"
549
- "The summaries shown here are **generated by the model** from the "
550
- "original full text -- not copied from any source. "
551
- 'The "Original Description" / "Original Abstract" in the expandable '
552
- "sections are the human-written references for comparison.\n\n"
 
 
 
 
 
 
 
553
  "#### Architecture\n\n"
554
  "- Custom from-scratch Transformer (not HuggingFace wrappers)\n"
555
  "- Shared encoder with task-specific heads: decoder for summarization, "
 
127
  def _format_book_card(item: dict) -> str:
128
  """Format a literary work as a discovery card.
129
 
130
+ Uses the Goodreads description (reference summary) as the primary blurb.
131
+ AI-generated summaries are not shown for books because the model was
132
+ trained primarily on academic text and produces low-quality literary
133
+ summaries.
134
  """
135
  title = item.get("title", "Untitled")
136
  topic = item.get("topic", "")
137
  emotion = item.get("emotion", "neutral")
 
138
 
 
139
  ref_summary = (item.get("reference_summary") or "").strip()
140
 
141
  # Build metadata line
142
  parts = ["Book"]
143
  if topic:
144
  parts.append(f"Topic: {topic}")
145
+ if emotion != "neutral":
146
+ parts.append(f"Tone: {emotion.title()}")
147
  meta_line = " | ".join(parts)
148
 
149
  card = f"### {title}\n\n"
 
152
  # Show the Goodreads description as the primary blurb
153
  if ref_summary:
154
  card += f"> {ref_summary}\n\n"
 
 
 
 
 
 
 
 
155
 
156
  card += "---\n\n"
157
  return card
 
167
  title = item.get("title", "Untitled")
168
  topic = item.get("topic", "")
169
  emotion = item.get("emotion", "neutral")
 
170
 
171
  gen_summary = (item.get("generated_summary") or "").strip()
172
  ref_summary = (item.get("reference_summary") or "").strip()
 
177
  parts = ["Paper"]
178
  if topic:
179
  parts.append(f"Topic: {topic}")
180
+ if emotion != "neutral":
181
+ parts.append(f"Tone: {emotion.title()}")
182
  meta_line = " | ".join(parts)
183
 
184
  card = f"### {display_title}\n\n"
 
250
 
251
 
252
  def browse_by_emotion(emotion: str, source_filter: str) -> str:
253
+ """Browse items filtered by tone and source type."""
254
+ if emotion in ("All Emotions", "All Tones"):
255
  items = [i for i in ALL_ITEMS if i.get("emotion") != "neutral"]
256
  else:
257
  items = [i for i in ALL_ITEMS if i.get("emotion") == emotion.lower()]
 
263
 
264
  if not items:
265
  return (
266
+ "No items found for this selection.\n\n"
267
+ "Try a different tone or select 'All Tones' to see "
268
+ "all items with a detected tone."
269
  )
270
 
271
  books = [i for i in items if i.get("source_type") == "literary"]
272
  papers = [i for i in items if i.get("source_type") == "academic"]
273
 
274
+ header = emotion if emotion not in ("All Emotions", "All Tones") else "any detected tone"
275
  result = f"Showing **{len(items)}** results with **{header}**\n\n---\n\n"
276
 
277
  if source_filter != "Papers Only" and books:
 
389
  outputs=[topic_results],
390
  )
391
 
392
+ # -- Browse by Tone --
393
+ with gr.Tab("By Tone"):
394
  gr.Markdown(
395
+ "Find books and papers by the dominant emotional tone detected by the model."
396
  )
397
  with gr.Row():
398
  emotion_dropdown = gr.Dropdown(
399
+ choices=["All Tones"] + [e.title() for e in EMOTIONS],
400
+ value="All Tones",
401
+ label="Tone",
402
  interactive=True,
403
  scale=2,
404
  )
 
411
  )
412
 
413
  emotion_results = gr.Markdown(
414
+ value=browse_by_emotion("All Tones", "All"),
415
  elem_classes=["result-box"],
416
  )
417
 
 
520
  f"| Research Papers | {len(PAPERS)} |\n"
521
  f"| **Total** | **{len(ALL_ITEMS)}** |\n"
522
  f"| Unique Topics | {len(TOPICS)} |\n"
523
+ f"| Unique Tones | {len(EMOTIONS)} |"
524
  )
525
 
526
  # -- About --
 
531
  "(FLAN-T5-base) trained jointly on three tasks:\n\n"
532
  "| Task | What it does | Training data |\n"
533
  "|------|-------------|---------------|\n"
534
+ "| **Summarization** | Generates abstracts for research papers | "
535
+ "~49K pairs (arXiv + Project Gutenberg/Goodreads) |\n"
536
  "| **Topic Classification** | Assigns one of 7 topics | 3.4K samples |\n"
537
  "| **Emotion Detection** | Detects up to 28 emotions | "
538
  "43K GoEmotions samples |\n\n"
539
+ "**How to read the results:**\n\n"
540
+ "- **Research papers** show AI-generated summaries that condense the "
541
+ "paper's content. These are generated by the model and are generally "
542
+ "accurate.\n"
543
+ "- **Books** show the Goodreads description as the primary text. "
544
+ "The model was trained primarily on academic text (~45K academic vs ~4K literary), "
545
+ "so book summaries are not shown.\n"
546
+ "- **Tone labels** indicate the dominant emotional tone detected by the model. "
547
+ "Since the emotion detector was trained on social media (GoEmotions), "
548
+ "it captures general sentiment better than specific emotions for "
549
+ "formal text.\n\n"
550
  "#### Architecture\n\n"
551
  "- Custom from-scratch Transformer (not HuggingFace wrappers)\n"
552
  "- Shared encoder with task-specific heads: decoder for summarization, "