OliverPerrin commited on
Commit
18e0afe
·
1 Parent(s): 32b685d

Add metrics tab with evaluation report, show all labels

Browse files
Files changed (2) hide show
  1. Dockerfile +2 -1
  2. scripts/demo_gradio.py +49 -10
Dockerfile CHANGED
@@ -1,6 +1,6 @@
1
  FROM python:3.10-slim
2
 
3
- # Force rebuild: 2026-01-16-v2
4
  WORKDIR /app
5
 
6
  # Install system dependencies
@@ -19,6 +19,7 @@ COPY scripts/demo_gradio.py scripts/
19
  COPY configs/ configs/
20
  COPY artifacts/ artifacts/
21
  COPY checkpoints/ checkpoints/
 
22
 
23
  # Set environment variables for Gradio
24
  ENV GRADIO_SERVER_NAME="0.0.0.0"
 
1
  FROM python:3.10-slim
2
 
3
+ # Force rebuild: 2026-01-16-v3
4
  WORKDIR /app
5
 
6
  # Install system dependencies
 
19
  COPY configs/ configs/
20
  COPY artifacts/ artifacts/
21
  COPY checkpoints/ checkpoints/
22
+ COPY outputs/evaluation_report.json outputs/
23
 
24
  # Set environment variables for Gradio
25
  ENV GRADIO_SERVER_NAME="0.0.0.0"
scripts/demo_gradio.py CHANGED
@@ -26,16 +26,33 @@ print(f"Loaded {len(_dataset)} items")
26
  # Convert to list of dicts for easier filtering
27
  ALL_ITEMS: list[dict[str, Any]] = [dict(row) for row in _dataset]
28
 
29
- # Extract unique topics and emotions
30
- TOPICS: list[str] = sorted(set(str(item["topic"]) for item in ALL_ITEMS if item.get("topic")))
31
- EMOTIONS: list[str] = sorted(set(str(item["emotion"]) for item in ALL_ITEMS if item.get("emotion")))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  # Group by source type
34
  BOOKS: list[dict[str, Any]] = [item for item in ALL_ITEMS if item.get("source_type") == "literary"]
35
  PAPERS: list[dict[str, Any]] = [item for item in ALL_ITEMS if item.get("source_type") == "academic"]
36
 
37
- print(f"Topics: {TOPICS}")
38
- print(f"Emotions: {EMOTIONS}")
 
 
39
  print(f"Books: {len(BOOKS)}, Papers: {len(PAPERS)}")
40
 
41
  # --------------- Load Evaluation Metrics ---------------
@@ -394,15 +411,37 @@ since descriptions paraphrase rather than quote the source text.*
394
  gr.Markdown("*Emotion detection metrics not available.*")
395
 
396
  # Dataset Statistics
397
- gr.Markdown("#### 📈 Dataset Statistics")
 
 
 
 
 
 
 
 
 
398
  gr.Markdown(f"""
399
  | Statistic | Value |
400
  |-----------|-------|
401
- | Total Items | {len(ALL_ITEMS)} |
402
  | Literary Works | {len(BOOKS)} |
403
- | Academic Papers | {len(PAPERS)} |
404
- | Unique Topics | {len(TOPICS)} |
405
- | Unique Emotions | {len(EMOTIONS)} |
 
 
 
 
 
 
 
 
 
 
 
 
 
406
  """)
407
 
408
  # ===================== TAB 5: ABOUT =====================
 
26
  # Convert to list of dicts for easier filtering
27
  ALL_ITEMS: list[dict[str, Any]] = [dict(row) for row in _dataset]
28
 
29
+ # Extract unique topics and emotions FROM THE DATASET (what model predicted)
30
+ DATASET_TOPICS: list[str] = sorted(set(str(item["topic"]) for item in ALL_ITEMS if item.get("topic")))
31
+ DATASET_EMOTIONS: list[str] = sorted(set(str(item["emotion"]) for item in ALL_ITEMS if item.get("emotion")))
32
+
33
+ # Load ALL possible labels from labels.json (what the model CAN predict)
34
+ _labels_path = Path(__file__).parent.parent / "artifacts" / "labels.json"
35
+ if _labels_path.exists():
36
+ with open(_labels_path) as f:
37
+ _labels = json.load(f)
38
+ ALL_TOPICS: list[str] = _labels.get("topic", DATASET_TOPICS)
39
+ ALL_EMOTIONS: list[str] = _labels.get("emotion", DATASET_EMOTIONS)
40
+ else:
41
+ ALL_TOPICS = DATASET_TOPICS
42
+ ALL_EMOTIONS = DATASET_EMOTIONS
43
+
44
+ # Use dataset-observed values for dropdown filtering
45
+ TOPICS = DATASET_TOPICS
46
+ EMOTIONS = DATASET_EMOTIONS
47
 
48
  # Group by source type
49
  BOOKS: list[dict[str, Any]] = [item for item in ALL_ITEMS if item.get("source_type") == "literary"]
50
  PAPERS: list[dict[str, Any]] = [item for item in ALL_ITEMS if item.get("source_type") == "academic"]
51
 
52
+ print(f"Dataset Topics ({len(TOPICS)}): {TOPICS}")
53
+ print(f"Dataset Emotions ({len(EMOTIONS)}): {EMOTIONS}")
54
+ print(f"All Model Topics ({len(ALL_TOPICS)}): {ALL_TOPICS}")
55
+ print(f"All Model Emotions ({len(ALL_EMOTIONS)}): {ALL_EMOTIONS}")
56
  print(f"Books: {len(BOOKS)}, Papers: {len(PAPERS)}")
57
 
58
  # --------------- Load Evaluation Metrics ---------------
 
411
  gr.Markdown("*Emotion detection metrics not available.*")
412
 
413
  # Dataset Statistics
414
+ gr.Markdown("#### 📈 Dataset & Model Statistics")
415
+
416
+ # Build topic list with indicators for observed vs possible
417
+ topic_list = ", ".join([
418
+ f"**{t}**" if t in TOPICS else t for t in ALL_TOPICS
419
+ ])
420
+ emotion_list = ", ".join([
421
+ f"**{e}**" if e in EMOTIONS else e for e in ALL_EMOTIONS
422
+ ])
423
+
424
  gr.Markdown(f"""
425
  | Statistic | Value |
426
  |-----------|-------|
427
+ | Total Discovery Items | {len(ALL_ITEMS)} |
428
  | Literary Works | {len(BOOKS)} |
429
+ | Academic Papers (arXiv) | {len(PAPERS)} |
430
+ | Topics in Dataset | {len(TOPICS)} of {len(ALL_TOPICS)} possible |
431
+ | Emotions in Dataset | {len(EMOTIONS)} of {len(ALL_EMOTIONS)} possible |
432
+
433
+ **All Model Topics ({len(ALL_TOPICS)}):** {topic_list}
434
+
435
+ **All Model Emotions ({len(ALL_EMOTIONS)}):** {emotion_list}
436
+
437
+ *Bold items appear in the discovery dataset. The model can predict all listed labels.*
438
+
439
+ ---
440
+
441
+ **Note on Content Types:**
442
+ - 📄 **Academic Papers** include CS/AI papers (Technology), Physics/Math (Science), Economics (Business)
443
+ - 📖 **Literary Works** include novels (Fiction), biographies (History), philosophical texts (Philosophy)
444
+ - Technical blogs and tutorials would be classified under **Technology**
445
  """)
446
 
447
  # ===================== TAB 5: ABOUT =====================