Spaces:

OliverPerrin
/

LexiMind

Sleeping

LexiMind / scripts /demo_gradio.py

OliverPerrin

Add metrics tab with evaluation report, show all labels

18e0afe 30 days ago

18.8 kB

	"""
	LexiMind - Book & Paper Discovery

	Browse books and research papers by topic or emotion.
	Pre-analyzed summaries help you find what to read next.

	Author: Oliver Perrin
	Date: 2026-01-14
	"""

	from __future__ import annotations

	import json
	from pathlib import Path
	from typing import Any

	import gradio as gr
	from datasets import Dataset, load_dataset

	# --------------- Load Dataset from HuggingFace Hub ---------------

	print("Loading discovery dataset from HuggingFace Hub...")
	_dataset: Dataset = load_dataset("OliverPerrin/LexiMind-Discovery", split="train") # type: ignore[assignment]
	print(f"Loaded {len(_dataset)} items")

	# Convert to list of dicts for easier filtering
	ALL_ITEMS: list[dict[str, Any]] = [dict(row) for row in _dataset]

	# Extract unique topics and emotions FROM THE DATASET (what model predicted)
	DATASET_TOPICS: list[str] = sorted(set(str(item["topic"]) for item in ALL_ITEMS if item.get("topic")))
	DATASET_EMOTIONS: list[str] = sorted(set(str(item["emotion"]) for item in ALL_ITEMS if item.get("emotion")))

	# Load ALL possible labels from labels.json (what the model CAN predict)
	_labels_path = Path(__file__).parent.parent / "artifacts" / "labels.json"
	if _labels_path.exists():
	with open(_labels_path) as f:
	_labels = json.load(f)
	ALL_TOPICS: list[str] = _labels.get("topic", DATASET_TOPICS)
	ALL_EMOTIONS: list[str] = _labels.get("emotion", DATASET_EMOTIONS)
	else:
	ALL_TOPICS = DATASET_TOPICS
	ALL_EMOTIONS = DATASET_EMOTIONS

	# Use dataset-observed values for dropdown filtering
	TOPICS = DATASET_TOPICS
	EMOTIONS = DATASET_EMOTIONS

	# Group by source type
	BOOKS: list[dict[str, Any]] = [item for item in ALL_ITEMS if item.get("source_type") == "literary"]
	PAPERS: list[dict[str, Any]] = [item for item in ALL_ITEMS if item.get("source_type") == "academic"]

	print(f"Dataset Topics ({len(TOPICS)}): {TOPICS}")
	print(f"Dataset Emotions ({len(EMOTIONS)}): {EMOTIONS}")
	print(f"All Model Topics ({len(ALL_TOPICS)}): {ALL_TOPICS}")
	print(f"All Model Emotions ({len(ALL_EMOTIONS)}): {ALL_EMOTIONS}")
	print(f"Books: {len(BOOKS)}, Papers: {len(PAPERS)}")

	# --------------- Load Evaluation Metrics ---------------

	METRICS: dict[str, Any] = {}
	_metrics_path = Path(__file__).parent.parent / "outputs" / "evaluation_report.json"
	if _metrics_path.exists():
	try:
	with open(_metrics_path) as f:
	METRICS = json.load(f)
	print(f"Loaded evaluation metrics from {_metrics_path}")
	except Exception as e:
	print(f"Warning: Could not load metrics: {e}")


	# --------------- Filter Functions ---------------


	def get_items_by_topic(topic: str) -> list[dict]:
	"""Get all items matching a topic."""
	if topic == "All":
	return ALL_ITEMS
	return [item for item in ALL_ITEMS if item.get("topic") == topic]


	def get_items_by_emotion(emotion: str) -> list[dict]:
	"""Get all items matching an emotion."""
	if emotion == "All":
	return ALL_ITEMS
	return [item for item in ALL_ITEMS if item.get("emotion") == emotion]


	def format_item_card(item: dict) -> str:
	"""Format an item as a markdown card."""
	title = item.get("title", "Unknown")
	source_type = item.get("source_type", "unknown")
	dataset_name = item.get("dataset", "").title()

	# Icon based on type
	if source_type == "academic":
	icon = "📄"
	type_label = "Research Paper"
	else:
	icon = "📖"
	type_label = "Literature"

	# Topic and emotion with confidence
	topic = item.get("topic", "Unknown")
	topic_conf = item.get("topic_confidence", 0)
	emotion = item.get("emotion", "Unknown")
	emotion_conf = item.get("emotion_confidence", 0)

	# Summary - check if using reference or generated
	use_reference = item.get("use_reference_summary", False)
	if use_reference or source_type == "literary":
	summary = item.get("reference_summary", "")
	summary_label = "📚 Book Description (Goodreads-style):"
	else:
	summary = item.get("generated_summary", "")
	summary_label = "🤖 AI-Generated Description:"

	if not summary:
	summary = "No summary available."

	# Truncate summary if too long
	if len(summary) > 400:
	summary = summary[:400].rsplit(' ', 1)[0] + "..."

	# Preview of original text
	text_preview = item.get("text", "")[:400] + "..." if len(item.get("text", "")) > 400 else item.get("text", "")

	# Confidence badges
	topic_badge = "🟢" if topic_conf > 0.6 else "🟡" if topic_conf > 0.3 else "🔴"
	emotion_badge = "🟢" if emotion_conf > 0.6 else "🟡" if emotion_conf > 0.3 else "🔴"

	return f"""### {icon} {title}

	<small>{type_label} from {dataset_name}</small>

	\| Topic \| Emotion \|
	\|-------\|---------\|
	\| {topic_badge} {topic} ({topic_conf:.0%}) \| {emotion_badge} {emotion.title()} ({emotion_conf:.0%}) \|

	{summary_label}
	> {summary}

	<details>
	<summary>📜 View Original Text</summary>

	{text_preview}

	</details>

	---
	"""


	def browse_by_topic(topic: str) -> str:
	"""Browse items filtered by topic."""
	items = get_items_by_topic(topic)
	if not items:
	return "No items found for this topic."

	# Group by type
	literary = [i for i in items if i.get("source_type") == "literary"]
	academic = [i for i in items if i.get("source_type") == "academic"]

	result = f"## {topic if topic != 'All' else 'All Topics'}\n\n"
	result += f"Found {len(items)} items ({len(literary)} literary, {len(academic)} academic)\n\n"

	if literary:
	result += "### 📖 Literary Works\n\n"
	for item in literary[:25]: # Limit to avoid huge pages
	result += format_item_card(item)

	if academic:
	result += "### 📄 Academic Papers\n\n"
	for item in academic[:25]:
	result += format_item_card(item)

	return result


	def browse_by_emotion(emotion: str) -> str:
	"""Browse items filtered by emotion."""
	items = get_items_by_emotion(emotion)
	if not items:
	return "No items found for this emotion."

	literary = [i for i in items if i.get("source_type") == "literary"]
	academic = [i for i in items if i.get("source_type") == "academic"]

	result = f"## Feeling {emotion.title() if emotion != 'All' else 'All Emotions'}?\n\n"
	result += f"Found {len(items)} items ({len(literary)} literary, {len(academic)} academic)\n\n"

	if literary:
	result += "### 📖 Literary Works\n\n"
	for item in literary[:25]:
	result += format_item_card(item)

	if academic:
	result += "### 📄 Academic Papers\n\n"
	for item in academic[:25]:
	result += format_item_card(item)

	return result


	def search_items(query: str) -> str:
	"""Search items by text content."""
	if not query or len(query) < 3:
	return "Enter at least 3 characters to search."

	query_lower = query.lower()
	matches = [
	item for item in ALL_ITEMS
	if query_lower in item.get("text", "").lower()
	or query_lower in item.get("generated_summary", "").lower()
	or query_lower in item.get("title", "").lower()
	]

	if not matches:
	return f"No results found for '{query}'."

	result = f"## Search Results for '{query}'\n\n"
	result += f"Found {len(matches)} matching items\n\n"

	for item in matches[:30]:
	result += format_item_card(item)

	return result


	# --------------- Gradio Interface ---------------

	with gr.Blocks(
	title="LexiMind",
	theme=gr.themes.Soft(),
	css="""
	.result-box { max-height: 700px; overflow-y: auto; }
	h3 { margin-top: 0.5em !important; }
	"""
	) as demo:

	gr.Markdown(
	"""
	# 📚 LexiMind - Literary Discovery
	### Find Books & Research Papers by Topic or Emotional Tone

	Explore {total_count} items analyzed by the LexiMind multi-task transformer:

	\| Source \| Count \| Description \|
	\|--------\|-------\|-------------\|
	\| 📖 Literature \| {lit_count} \| Classic books with Goodreads-style descriptions \|
	\| 📄 Research \| {paper_count} \| Scientific papers from arXiv \|

	Model Capabilities:
	- 🏷️ Topic Classification: Fiction, Science, History, Philosophy, Arts, Business, Technology
	- 💭 Emotion Detection: 28 emotions (joy, sadness, anger, fear, surprise, love, etc.)
	- 📝 Book Descriptions: Back-cover style summaries of what texts are about

	---
	""".format(
	total_count=len(ALL_ITEMS),
	lit_count=len(BOOKS),
	paper_count=len(PAPERS)
	)
	)

	with gr.Tabs():
	# ===================== TAB 1: BROWSE BY TOPIC =====================
	with gr.Tab("🏷️ Browse by Topic"):
	gr.Markdown("Select a topic to explore related books and papers")

	topic_dropdown = gr.Dropdown(
	choices=["All"] + TOPICS,
	value="All",
	label="Select Topic",
	interactive=True,
	)

	topic_results = gr.Markdown(
	value=browse_by_topic("All"),
	elem_classes=["result-box"],
	)

	topic_dropdown.change(
	fn=browse_by_topic,
	inputs=[topic_dropdown],
	outputs=[topic_results],
	)

	# ===================== TAB 2: BROWSE BY EMOTION =====================
	with gr.Tab("💭 Browse by Emotion"):
	gr.Markdown("Find books and papers that evoke specific emotions")

	emotion_dropdown = gr.Dropdown(
	choices=["All"] + [e.title() for e in EMOTIONS],
	value="All",
	label="Select Emotion",
	interactive=True,
	)

	emotion_results = gr.Markdown(
	value=browse_by_emotion("All"),
	elem_classes=["result-box"],
	)

	emotion_dropdown.change(
	fn=lambda e: browse_by_emotion(e.lower() if e != "All" else "All"),
	inputs=[emotion_dropdown],
	outputs=[emotion_results],
	)

	# ===================== TAB 3: SEARCH =====================
	with gr.Tab("🔍 Search"):
	gr.Markdown("Search through all books and papers by keyword")

	search_input = gr.Textbox(
	placeholder="Enter keywords to search...",
	label="Search",
	interactive=True,
	)

	search_results = gr.Markdown(
	value="Enter at least 3 characters to search.",
	elem_classes=["result-box"],
	)

	search_input.change(
	fn=search_items,
	inputs=[search_input],
	outputs=[search_results],
	)

	# ===================== TAB 4: METRICS =====================
	with gr.Tab("📊 Model Metrics"):
	gr.Markdown(
	"""
	### Evaluation Metrics

	LexiMind is evaluated using comprehensive metrics across all three tasks.
	Metrics are computed on held-out validation data.
	"""
	)

	# Summarization Metrics
	gr.Markdown("#### 📝 Summarization Metrics")

	if METRICS.get("summarization"):
	summ = METRICS["summarization"]
	summ_md = """
	\| Metric \| Score \| Description \|
	\|--------\|-------\|-------------\|
	\| ROUGE-1 \| {rouge1:.4f} \| Unigram overlap with reference \|
	\| ROUGE-2 \| {rouge2:.4f} \| Bigram overlap with reference \|
	\| ROUGE-L \| {rougeL:.4f} \| Longest common subsequence \|
	\| BLEU-4 \| {bleu4:.4f} \| 4-gram precision score \|
	\| BERTScore F1 \| {bertscore:.4f} \| Semantic similarity (contextual) \|

	*Note: For back-cover style descriptions, BERTScore is more meaningful than ROUGE
	since descriptions paraphrase rather than quote the source text.*
	""".format(
	rouge1=summ.get("rouge_rouge1", summ.get("rouge1", 0)),
	rouge2=summ.get("rouge_rouge2", summ.get("rouge2", 0)),
	rougeL=summ.get("rouge_rougeL", summ.get("rougeL", 0)),
	bleu4=summ.get("bleu4", 0),
	bertscore=summ.get("bertscore_f1", 0),
	)
	gr.Markdown(summ_md)
	else:
	gr.Markdown("Summarization metrics not available. Run evaluation script.")

	# Topic Classification Metrics
	gr.Markdown("#### 🏷️ Topic Classification Metrics")

	if METRICS.get("topic"):
	topic = METRICS["topic"]
	topic_md = """
	\| Metric \| Score \|
	\|--------\|-------\|
	\| Accuracy \| {accuracy:.2%} \|
	\| Macro F1 \| {f1:.4f} \|
	\| Precision \| {precision:.4f} \|
	\| Recall \| {recall:.4f} \|
	""".format(
	accuracy=topic.get("accuracy", 0),
	f1=topic.get("f1", topic.get("macro_f1", 0)),
	precision=topic.get("precision", 0),
	recall=topic.get("recall", 0),
	)
	gr.Markdown(topic_md)
	else:
	gr.Markdown("Topic classification metrics not available.")

	# Emotion Detection Metrics
	gr.Markdown("#### 💭 Emotion Detection Metrics")

	if METRICS.get("emotion"):
	emotion = METRICS["emotion"]
	emotion_md = """
	\| Metric \| Score \|
	\|--------\|-------\|
	\| Multi-label F1 \| {f1:.4f} \|
	\| Precision \| {precision:.4f} \|
	\| Recall \| {recall:.4f} \|

	Emotion detection uses 28 labels from GoEmotions. Multiple emotions can be assigned to each text.
	""".format(
	f1=emotion.get("f1", emotion.get("multilabel_f1", 0)),
	precision=emotion.get("precision", 0),
	recall=emotion.get("recall", 0),
	)
	gr.Markdown(emotion_md)
	else:
	gr.Markdown("Emotion detection metrics not available.")

	# Dataset Statistics
	gr.Markdown("#### 📈 Dataset & Model Statistics")

	# Build topic list with indicators for observed vs possible
	topic_list = ", ".join([
	f"{t}" if t in TOPICS else t for t in ALL_TOPICS
	])
	emotion_list = ", ".join([
	f"{e}" if e in EMOTIONS else e for e in ALL_EMOTIONS
	])

	gr.Markdown(f"""
	\| Statistic \| Value \|
	\|-----------\|-------\|
	\| Total Discovery Items \| {len(ALL_ITEMS)} \|
	\| Literary Works \| {len(BOOKS)} \|
	\| Academic Papers (arXiv) \| {len(PAPERS)} \|
	\| Topics in Dataset \| {len(TOPICS)} of {len(ALL_TOPICS)} possible \|
	\| Emotions in Dataset \| {len(EMOTIONS)} of {len(ALL_EMOTIONS)} possible \|

	All Model Topics ({len(ALL_TOPICS)}): {topic_list}

	All Model Emotions ({len(ALL_EMOTIONS)}): {emotion_list}

	Bold items appear in the discovery dataset. The model can predict all listed labels.

	---

	Note on Content Types:
	- 📄 Academic Papers include CS/AI papers (Technology), Physics/Math (Science), Economics (Business)
	- 📖 Literary Works include novels (Fiction), biographies (History), philosophical texts (Philosophy)
	- Technical blogs and tutorials would be classified under Technology
	""")

	# ===================== TAB 5: ABOUT =====================
	with gr.Tab("ℹ️ About"):
	gr.Markdown(
	"""
	### About LexiMind

	LexiMind is a 272M parameter encoder-decoder transformer trained on three tasks:

	\| Task \| Description \|
	\|------\|-------------\|
	\| Book Descriptions \| Generate back-cover style descriptions of what books are about \|
	\| Topic Classification \| Categorize into Fiction, Science, Technology, Philosophy, History, Business, Arts \|
	\| Emotion Detection \| Identify emotional tones (28 emotions from GoEmotions) \|

	### Architecture

	- Base: FLAN-T5-base (Google)
	- Encoder: 12 layers, 768 dim, 12 attention heads
	- Decoder: 12 layers with causal attention
	- Position: T5 relative position bias
	- Training: Multi-task learning with task-specific heads

	### Training Data

	\| Dataset \| Task \| Description \|
	\|---------\|------\|-------------\|
	\| Goodreads (711k+ blurbs) \| Book Descriptions \| Back-cover style descriptions matched with Gutenberg texts \|
	\| arXiv \| Paper Abstracts \| Scientific paper summarization \|
	\| 20 Newsgroups + Gutenberg \| Topic Classification \| Multi-domain topic categorization \|
	\| GoEmotions \| Emotion Detection \| 28-class multi-label emotion classification \|

	### Key Design Decision

	LexiMind generates back-cover style descriptions (what a book is about) rather than
	plot summaries (what happens in the book). This is achieved by training on Goodreads
	descriptions paired with Project Gutenberg book texts.

	### Evaluation Metrics

	- ROUGE-1/2/L: Lexical overlap (expected range: 0.15-0.25 for descriptions)
	- BLEU-4: N-gram precision
	- BERTScore: Semantic similarity using contextual embeddings (key metric for paraphrasing)

	### Links

	- 🔗 [GitHub](https://github.com/OliverPerrin/LexiMind)
	- 🤗 [Model](https://huggingface.co/OliverPerrin/LexiMind-Model)
	- 📊 [Discovery Dataset](https://huggingface.co/datasets/OliverPerrin/LexiMind-Discovery)

	---
	Built by Oliver Perrin • Appalachian State University • 2025-2026
	"""
	)


	# --------------- Entry Point ---------------

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)