Spaces:

ai4data
/

monitoring_of_datause

Running

App Files Files Community

monitoring_of_datause / app.py

rafmacalaba

consolidate reliefweb and project documents

6ec818b 8 days ago

raw

history blame contribute delete

33.2 kB

	import os
	import json
	from typing import List, Dict, Tuple, Optional, Any
	from collections import Counter, defaultdict

	import gradio as gr

	# ── Local CONFIG ──────────────────────────────────────────────────────────────
	DATA_FILE = "merged_consolidated_data.json"


	def load_initial_data() -> List[Dict]:
	if not os.path.exists(DATA_FILE):
	raise FileNotFoundError(f"{DATA_FILE} not found in current directory.")
	with open(DATA_FILE, "r", encoding="utf-8") as f:
	data = json.load(f)

	# Sort to show records with relations first (most informative)
	data.sort(key=lambda x: len(x.get('ner_text', [])), reverse=True)

	return data


	class DynamicDataset:
	def __init__(self, data: List[Dict]):
	self.data = data
	self.len = len(data)
	self.current = 0

	def example(self, idx: int) -> Dict:
	self.current = max(0, min(self.len - 1, idx))
	return self.data[self.current]


	class ComparisonManager:
	def __init__(self, data: List[Dict], corpus_filter: str = "All"):
	# Filter data by corpus first
	if corpus_filter != "All":
	self.data = [rec for rec in data if rec.get("corpus") == corpus_filter]
	else:
	self.data = data

	# Group by type
	self.type_groups = defaultdict(lambda: {'validated': [], 'not_validated': []})

	# Group by term (extract from ner_text)
	self.term_groups = defaultdict(lambda: {'validated': [], 'not_validated': []})

	for rec in self.data:
	dtype = rec.get("type")
	is_validated = rec.get("validated", False)
	tags = rec.get("tags", [])

	# Only include borderline cases
	if "borderline" not in tags:
	continue

	# Group by type
	if dtype:
	key = 'validated' if is_validated else 'not_validated'
	self.type_groups[dtype][key].append(rec)

	# Extract term from ner_text
	if rec.get('ner_text') and len(rec['ner_text']) > 0:
	start, end, label = rec['ner_text'][0]
	if label == 'named' and rec.get('text'):
	term = rec['text'][start:end]
	if term and "confusing_term" in tags:
	key = 'validated' if is_validated else 'not_validated'
	self.term_groups[term][key].append(rec)

	# Get mixed types (sorted by total count)
	self.mixed_types = []
	for dtype, groups in self.type_groups.items():
	if groups['validated'] and groups['not_validated']:
	total = len(groups['validated']) + len(groups['not_validated'])
	self.mixed_types.append((dtype, total))
	self.mixed_types.sort(key=lambda x: x[1], reverse=True)
	self.mixed_types = [t[0] for t in self.mixed_types]

	# Get confusing terms (sorted by total count)
	self.confusing_terms = []
	for term, groups in self.term_groups.items():
	if groups['validated'] and groups['not_validated']:
	total = len(groups['validated']) + len(groups['not_validated'])
	self.confusing_terms.append((term, total))
	self.confusing_terms.sort(key=lambda x: x[1], reverse=True)
	self.confusing_terms = [t[0] for t in self.confusing_terms]

	def get_example_by_type(self, dtype: str, is_validated: bool, idx: int) -> Dict:
	if dtype not in self.type_groups:
	return {}
	group = self.type_groups[dtype]['validated' if is_validated else 'not_validated']
	if not group:
	return {}
	safe_idx = idx % len(group)
	return group[safe_idx]

	def get_count_by_type(self, dtype: str, is_validated: bool) -> int:
	if dtype not in self.type_groups:
	return 0
	return len(self.type_groups[dtype]['validated' if is_validated else 'not_validated'])

	def get_example_by_term(self, term: str, is_validated: bool, idx: int) -> Dict:
	if term not in self.term_groups:
	return {}
	group = self.term_groups[term]['validated' if is_validated else 'not_validated']
	if not group:
	return {}
	safe_idx = idx % len(group)
	return group[safe_idx]

	def get_count_by_term(self, term: str, is_validated: bool) -> int:
	if term not in self.term_groups:
	return 0
	return len(self.term_groups[term]['validated' if is_validated else 'not_validated'])

	def filter_terms(self, search_query: str) -> List[str]:
	"""Filter confusing terms by search query (case-insensitive substring match)."""
	if not search_query or not search_query.strip():
	return self.confusing_terms

	query = search_query.strip().lower()
	return [term for term in self.confusing_terms if query in term.lower()]


	# ── Highlight utils ──────────────────────────────────────────────────────────
	def prepare_for_highlight(rec: Dict) -> List[Tuple[str, Optional[str]]]:
	text = rec.get("text", "") or ""
	ner_spans = rec.get("ner_text", []) or []

	segments = []
	last_idx = 0

	for start, end, label in sorted(ner_spans, key=lambda x: x[0]):
	try:
	start = int(start)
	end = int(end)
	except:
	continue

	if start < 0 or end <= start or start > len(text):
	continue
	end = min(end, len(text))

	if start > last_idx:
	segments.append((text[last_idx:start], None))

	segments.append((text[start:end], str(label)))
	last_idx = end

	if last_idx < len(text):
	segments.append((text[last_idx:], None))

	return segments


	# ── Filtering helpers ─────────────────────────────────────────────────────────
	def record_matches_filters(rec: Dict, dataset_filter: str, type_filter: str, term_search: str = "", corpus_filter: str = "All"):
	# Corpus filter (check first for efficiency)
	if corpus_filter != "All":
	if rec.get("corpus") != corpus_filter:
	return False

	is_validated = rec.get("validated", False)
	tags = rec.get("tags", [])

	if dataset_filter == "Datasets only" and not is_validated:
	return False
	if dataset_filter == "Non-datasets only" and is_validated:
	return False
	if dataset_filter == "Borderline Cases Only":
	return "borderline" in tags

	if type_filter != "All types":
	if rec.get("type") != type_filter:
	return False

	# Term search filter
	if term_search and term_search.strip():
	query = term_search.strip().lower()
	# Extract term from ner_text
	if rec.get('ner_text') and len(rec['ner_text']) > 0 and rec.get('text'):
	start, end, label = rec['ner_text'][0]
	term = rec['text'][start:end]
	if term and query in term.lower():
	return True
	else:
	return False
	else:
	return False

	return True


	# ── Documentation ─────────────────────────────────────────────────────────────
	DOCUMENTATION = """
	# 📊 Monitoring of Data Use - User Guide

	## What is this tool?

	This application helps you review and explore dataset mentions extracted from documents.
	It displays text excerpts where potential datasets have been identified, along with metadata about each mention.

	## What you'll see

	Each record shows:
	- 📄 Source Document: The filename and page number where the text was found
	- 🔍 Highlighted Text: The original text with dataset mentions highlighted
	- 📋 Data Type: The category of the dataset (e.g., census, survey, database)
	- ✅ Dataset Status: Whether this mention actually refers to a dataset
	- 💡 Context: The surrounding text that provides context
	- 📝 Explanation: Why this was classified as a dataset (or not)
	- 🏷️ Tags: Borderline, mixed type, or confusing term indicators

	## How to use this tool

	### 🎯 Navigation
	- Browse Records: Use the slider to jump to any record by number
	- Previous/Next Buttons: Navigate through records one at a time
	- Filters: The Previous/Next buttons respect your active filters

	### 🧪 Try It Yourself
	You can try an interactive version of this tool here:

	👉 Hugging Face Space: https://huggingface.co/spaces/ai4data/datause-extraction

	This hosted demo lets you experiment with dataset extraction, highlighting, and validation using real examples.

	### 🔍 Filtering Options

	1. Dataset Status Filter
	- All: Show all records
	- Datasets only: Show only records that contain actual dataset references
	- Non-datasets only: Show records that were identified but don't actually refer to datasets
	- 🔥 Borderline Cases Only: Show only confusing/mixed cases

	2. Data Type Filter
	- Filter by specific data types (census, survey, database, etc.)
	- Types are sorted by frequency (most common first)

	### ⚖️ Comparison Tab

	The Comparison tab helps you understand why the same type or term can be validated differently:

	1. By Type: Compare examples of the same data type (e.g., "system") with different validation outcomes
	2. By Term: Compare the exact same term (e.g., "Project MIS") appearing in different contexts

	This helps identify:
	- What contextual signals distinguish valid from invalid datasets
	- Why borderline cases are confusing
	- Patterns in validation decisions

	### 💡 Tips
	- Use filters to focus on specific types of data mentions
	- The "Validated" field tells you if the mention is a true dataset reference
	- Review the "Explanation" to understand the classification reasoning
	- Highlighted text shows exactly where the dataset mention appears in context
	- Check tags to identify borderline/confusing cases

	## Data Source

	This viewer uses data from World Bank project documents with revalidation analysis.
	"""


	# ── Gradio App ───────────────────────────────────────────────────────────────
	def create_demo() -> gr.Blocks:
	data = load_initial_data()
	dynamic_dataset = DynamicDataset(data)
	comparison_manager = ComparisonManager(data)

	# Count types and sort by frequency (most common first)
	type_counter = Counter(rec.get("type") for rec in data if rec.get("type"))
	type_values = [t for t, _ in type_counter.most_common()]
	type_choices = ["All types"] + type_values

	def make_info(rec):
	"""Format record metadata for display."""
	fn = rec.get("filename", "—")
	pg = rec.get("page", "—")
	v_type = rec.get("type", "—")
	empirical_context = rec.get("empirical_context", "—")
	explanation = rec.get("explanation", "—")
	tags = rec.get("tags", [])
	is_validated = rec.get("validated", False)
	contextual_signal = rec.get("contextual_signal", "—")
	contextual_reason_model = rec.get("contextual_reason_model", "—")
	contextual_reason_agent = rec.get("contextual_reason_agent", "—")

	# Apply conditional highlighting based on validation
	if rec.get("ner_text") and rec.get("text") and is_validated is not None:
	try:
	start, end = rec["ner_text"][0][0], rec["ner_text"][0][1]
	term = rec["text"][start:end]
	if is_validated:
	highlight_style = 'background-color: #90ee90; color: black; padding: 2px 4px; border-radius: 4px; font-weight: bold; border: 1px solid #5cb85c;'
	else:
	highlight_style = 'background-color: #ff7f7f; color: black; padding: 2px 4px; border-radius: 4px; font-weight: bold; border: 1px solid #d9534f;'
	if term and term in empirical_context:
	empirical_context = empirical_context.replace(term, f'<span style="{highlight_style}">{term}</span>')
	except Exception:
	pass

	# Build HTML
	type_html = f"<code>{v_type}</code>"

	# Add type stats if available
	type_stats = rec.get("type_stats")
	if type_stats:
	type_html += f" <small>(Type: {type_stats['validated']} ✅ / {type_stats['not_validated']} ❌)</small>"

	tags_html = ""
	# Add tags
	if tags:
	tag_badges = []
	if "borderline" in tags:
	tag_badges.append("⚠️ <b>Borderline</b>")
	if "mixed_type" in tags:
	tag_badges.append("🔍 <b>Mixed Type</b>")
	if "confusing_term" in tags:
	tag_badges.append("🤔 <b>Confusing Term</b>")
	if tag_badges:
	tags_html = " ".join(tag_badges)

	html = f"""
	<h3>📄 Document Information</h3>
	<p><b>File:</b> <code>{fn}</code><br>
	<b>Page:</b> <code>{pg}</code></p>

	<h3>🏷️ Type</h3>
	<p>{type_html}</p>
	"""

	if tags_html:
	html += f"""
	<h3>🚩 Tags</h3>
	<p>{tags_html}</p>
	"""

	html += f"""
	<h3>📝 Surrounding Text</h3>
	<p>{empirical_context}</p>
	"""

	# Add validation analysis
	status_icon = '✅' if is_validated else '❌'
	status_text = 'Is a dataset' if is_validated else 'Not a dataset'
	html += f"""
	<h3>🤖 Validation Analysis</h3>
	<p><b>Assessment:</b> {status_icon} {status_text}</p>
	<p><b>Contextual Signal:</b> <code>{contextual_signal}</code></p>
	"""

	if contextual_reason_agent:
	html += f"""
	<p><b>Agent Reasoning:</b></p>
	<blockquote style="border-left: 3px solid #ccc; padding-left: 10px; color: #666;">
	{contextual_reason_agent}
	</blockquote>
	"""

	if contextual_reason_model:
	html += f"""
	<p><b>Model Reasoning:</b></p>
	<blockquote style="border-left: 3px solid #999; padding-left: 10px; color: #888;">
	{contextual_reason_model}
	</blockquote>
	"""

	return html

	# Basic load by slider index (ignores filters)
	def load_example(idx: int):
	rec = dynamic_dataset.example(idx)
	segs = prepare_for_highlight(rec)
	return segs, idx, make_info(rec)

	# When filters change → jump to first matching record
	def jump_on_filters(dataset_filter, type_filter, term_search, corpus_filter):
	n = dynamic_dataset.len
	for i in range(n):
	if record_matches_filters(data[i], dataset_filter, type_filter, term_search, corpus_filter):
	dynamic_dataset.current = i
	rec = data[i]
	segs = prepare_for_highlight(rec)
	return segs, i, make_info(rec)

	# No match → return blank
	return [], 0, "⚠️ No matching records found with the selected filters."

	# Navigation respecting filters
	def nav_next(dataset_filter, type_filter, term_search, corpus_filter):
	i = dynamic_dataset.current + 1
	n = dynamic_dataset.len
	while i < n:
	if record_matches_filters(data[i], dataset_filter, type_filter, term_search, corpus_filter):
	break
	i += 1
	if i >= n:
	i = dynamic_dataset.current
	dynamic_dataset.current = i
	rec = data[i]
	return prepare_for_highlight(rec), i, make_info(rec)

	def nav_prev(dataset_filter, type_filter, term_search, corpus_filter):
	i = dynamic_dataset.current - 1
	while i >= 0:
	if record_matches_filters(data[i], dataset_filter, type_filter, term_search, corpus_filter):
	break
	i -= 1
	if i < 0:
	i = dynamic_dataset.current
	dynamic_dataset.current = i
	rec = data[i]
	return prepare_for_highlight(rec), i, make_info(rec)

	# Comparison Logic - By Type
	def load_type_comparison(manager, dtype, pos_idx, neg_idx):
	if not dtype:
	return [], "Select a type", [], "Select a type", "### ✅ IS Dataset", "### ❌ NOT Dataset"

	pos_rec = manager.get_example_by_type(dtype, True, pos_idx)
	neg_rec = manager.get_example_by_type(dtype, False, neg_idx)

	pos_hl = prepare_for_highlight(pos_rec) if pos_rec else []
	neg_hl = prepare_for_highlight(neg_rec) if neg_rec else []

	pos_info = make_info(pos_rec) if pos_rec else "No examples"
	neg_info = make_info(neg_rec) if neg_rec else "No examples"

	# Add count info
	pos_total = manager.get_count_by_type(dtype, True)
	neg_total = manager.get_count_by_type(dtype, False)

	pos_header = f"### ✅ IS Dataset ({(pos_idx % pos_total) + 1 if pos_total > 0 else 0}/{pos_total})"
	neg_header = f"### ❌ NOT Dataset ({(neg_idx % neg_total) + 1 if neg_total > 0 else 0}/{neg_total})"

	return pos_hl, pos_info, neg_hl, neg_info, pos_header, neg_header

	# Comparison Logic - By Term
	def load_term_comparison(manager, term, pos_idx, neg_idx):
	if not term:
	return [], "Select a term", [], "Select a term", "### ✅ IS Dataset", "### ❌ NOT Dataset"

	pos_rec = manager.get_example_by_term(term, True, pos_idx)
	neg_rec = manager.get_example_by_term(term, False, neg_idx)

	pos_hl = prepare_for_highlight(pos_rec) if pos_rec else []
	neg_hl = prepare_for_highlight(neg_rec) if neg_rec else []

	pos_info = make_info(pos_rec) if pos_rec else "No examples"
	neg_info = make_info(neg_rec) if neg_rec else "No examples"

	# Add count info
	pos_total = manager.get_count_by_term(term, True)
	neg_total = manager.get_count_by_term(term, False)

	pos_header = f"### ✅ IS Dataset ({(pos_idx % pos_total) + 1 if pos_total > 0 else 0}/{pos_total})"
	neg_header = f"### ❌ NOT Dataset ({(neg_idx % neg_total) + 1 if neg_total > 0 else 0}/{neg_total})"

	return pos_hl, pos_info, neg_hl, neg_info, pos_header, neg_header

	# Rebuild comparison manager when corpus filter changes
	def rebuild_comparison(corpus_filter):
	"""Rebuild ComparisonManager with new corpus filter."""
	new_manager = ComparisonManager(data, corpus_filter)
	# Return updated choices for dropdowns and new manager state
	return (
	new_manager,
	gr.update(choices=new_manager.mixed_types,
	value=new_manager.mixed_types[0] if new_manager.mixed_types else None),
	gr.update(choices=new_manager.confusing_terms,
	value=new_manager.confusing_terms[0] if new_manager.confusing_terms else None)
	)

	def next_pos(current_idx):
	return current_idx + 1

	def next_neg(current_idx):
	return current_idx + 1

	# ---- UI ----
	with gr.Blocks(title="Monitoring of Data Use") as demo:
	gr.Markdown("# 📊 Monitoring of Data Use")

	with gr.Tabs():
	with gr.Tab("📖 How to Use"):
	gr.Markdown(DOCUMENTATION)

	with gr.Tab("🔍 Viewer"):
	with gr.Row():
	prog = gr.Slider(
	minimum=0,
	maximum=dynamic_dataset.len - 1,
	value=0,
	step=1,
	label=f"📑 Browse Records (1 to {dynamic_dataset.len:,})",
	interactive=True,
	)

	with gr.Row():
	dataset_filter = gr.Dropdown(
	choices=["All", "Datasets only", "Non-datasets only", "Borderline Cases Only"],
	value="Datasets only",
	label="🎯 Filter by Validation Status",
	)

	type_filter = gr.Dropdown(
	choices=type_choices,
	value="All types",
	label="📂 Filter by Data Type",
	)

	corpus_filter = gr.Dropdown(
	choices=["All", "reliefweb", "WB project documents"],
	value="All",
	label="📚 Filter by Corpus",
	)

	term_search = gr.Textbox(
	label="🔍 Search by Term",
	placeholder="Type to filter by dataset term (e.g., 'MIS' to find EMIS, MIS, MIS database...)",
	value="",
	)

	inp_box = gr.HighlightedText(
	label="📄 Document Text (with highlighted dataset mentions)",
	interactive=False,
	show_legend=False,
	value=""
	)

	info_md = gr.HTML(label="ℹ️ Record Details")

	with gr.Row():
	prev_btn = gr.Button("⬅️ Previous", variant="secondary", size="lg")
	next_btn = gr.Button("Next ➡️", variant="primary", size="lg")

	# Initial load
	demo.load(
	fn=load_example,
	inputs=prog,
	outputs=[inp_box, prog, info_md],
	)

	# Slider navigation
	prog.release(
	fn=load_example,
	inputs=prog,
	outputs=[inp_box, prog, info_md],
	)

	# Filters
	dataset_filter.change(
	fn=jump_on_filters,
	inputs=[dataset_filter, type_filter, term_search, corpus_filter],
	outputs=[inp_box, prog, info_md],
	)
	type_filter.change(
	fn=jump_on_filters,
	inputs=[dataset_filter, type_filter, term_search, corpus_filter],
	outputs=[inp_box, prog, info_md],
	)
	corpus_filter.change(
	fn=jump_on_filters,
	inputs=[dataset_filter, type_filter, term_search, corpus_filter],
	outputs=[inp_box, prog, info_md],
	)
	term_search.change(
	fn=jump_on_filters,
	inputs=[dataset_filter, type_filter, term_search, corpus_filter],
	outputs=[inp_box, prog, info_md],
	)

	# Prev / Next navigation respecting filters
	prev_btn.click(
	fn=nav_prev,
	inputs=[dataset_filter, type_filter, term_search, corpus_filter],
	outputs=[inp_box, prog, info_md],
	)
	next_btn.click(
	fn=nav_next,
	inputs=[dataset_filter, type_filter, term_search, corpus_filter],
	outputs=[inp_box, prog, info_md],
	)

	with gr.Tab("⚖️ Comparison"):
	gr.Markdown("### Side-by-Side Comparison of Borderline Cases")
	gr.Markdown("Compare examples to understand why the same type or term is validated differently based on context.")

	corpus_filter_comp = gr.Dropdown(
	choices=["All", "reliefweb", "WB project documents"],
	value="All",
	label="📚 Filter by Corpus",
	)

	# State to hold current comparison manager
	comparison_manager_state = gr.State(comparison_manager)

	comparison_mode = gr.Radio(
	choices=["By Type", "By Term"],
	value="By Type",
	label="Comparison Mode"
	)

	# Type comparison
	with gr.Group(visible=True) as type_comparison_group:
	gr.Markdown("Compare by Data Type: See how the same type (e.g., 'system') can be valid or invalid")
	comp_type_selector = gr.Dropdown(
	choices=comparison_manager.mixed_types,
	value=comparison_manager.mixed_types[0] if comparison_manager.mixed_types else None,
	label="Select Mixed Type to Compare",
	)

	type_pos_idx_state = gr.State(0)
	type_neg_idx_state = gr.State(0)

	with gr.Row():
	with gr.Column():
	type_pos_header = gr.Markdown("### ✅ IS Dataset")
	type_pos_hl_box = gr.HighlightedText(label="Context", interactive=False, show_legend=False, value="")
	type_pos_info_box = gr.HTML()
	type_pos_next_btn = gr.Button("Next Example ➡️")

	with gr.Column():
	type_neg_header = gr.Markdown("### ❌ NOT Dataset")
	type_neg_hl_box = gr.HighlightedText(label="Context", interactive=False, show_legend=False, value="")
	type_neg_info_box = gr.HTML()
	type_neg_next_btn = gr.Button("Next Example ➡️")

	# Term comparison
	with gr.Group(visible=False) as term_comparison_group:
	gr.Markdown("Compare by Term: See how the exact same term appears in different validation contexts")

	term_search_box = gr.Textbox(
	label="🔍 Search Terms",
	placeholder="Type to filter terms (e.g., 'MIS' to find EMIS, MIS, MIS database...)",
	value="",
	)

	comp_term_selector = gr.Dropdown(
	choices=comparison_manager.confusing_terms,
	value=comparison_manager.confusing_terms[0] if comparison_manager.confusing_terms else None,
	label="Select Confusing Term to Compare",
	)

	term_pos_idx_state = gr.State(0)
	term_neg_idx_state = gr.State(0)

	with gr.Row():
	with gr.Column():
	term_pos_header = gr.Markdown("### ✅ IS Dataset")
	term_pos_hl_box = gr.HighlightedText(label="Context", interactive=False, show_legend=False, value="")
	term_pos_info_box = gr.HTML()
	term_pos_next_btn = gr.Button("Next Example ➡️")

	with gr.Column():
	term_neg_header = gr.Markdown("### ❌ NOT Dataset")
	term_neg_hl_box = gr.HighlightedText(label="Context", interactive=False, show_legend=False, value="")
	term_neg_info_box = gr.HTML()
	term_neg_next_btn = gr.Button("Next Example ➡️")

	# Toggle visibility based on mode
	def toggle_comparison_mode(mode):
	return gr.update(visible=mode == "By Type"), gr.update(visible=mode == "By Term")

	comparison_mode.change(
	fn=toggle_comparison_mode,
	inputs=[comparison_mode],
	outputs=[type_comparison_group, term_comparison_group]
	)

	# Corpus filter change - rebuild comparison manager
	corpus_filter_comp.change(
	fn=rebuild_comparison,
	inputs=[corpus_filter_comp],
	outputs=[comparison_manager_state, comp_type_selector, comp_term_selector]
	)

	# Type comparison events
	comp_type_selector.change(
	fn=lambda: (0, 0),
	outputs=[type_pos_idx_state, type_neg_idx_state]
	).then(
	fn=load_type_comparison,
	inputs=[comparison_manager_state, comp_type_selector, type_pos_idx_state, type_neg_idx_state],
	outputs=[type_pos_hl_box, type_pos_info_box, type_neg_hl_box, type_neg_info_box, type_pos_header, type_neg_header]
	)

	type_pos_next_btn.click(
	fn=next_pos,
	inputs=[type_pos_idx_state],
	outputs=[type_pos_idx_state]
	).then(
	fn=load_type_comparison,
	inputs=[comparison_manager_state, comp_type_selector, type_pos_idx_state, type_neg_idx_state],
	outputs=[type_pos_hl_box, type_pos_info_box, type_neg_hl_box, type_neg_info_box, type_pos_header, type_neg_header]
	)

	type_neg_next_btn.click(
	fn=next_neg,
	inputs=[type_neg_idx_state],
	outputs=[type_neg_idx_state]
	).then(
	fn=load_type_comparison,
	inputs=[comparison_manager_state, comp_type_selector, type_pos_idx_state, type_neg_idx_state],
	outputs=[type_pos_hl_box, type_pos_info_box, type_neg_hl_box, type_neg_info_box, type_pos_header, type_neg_header]
	)

	# Term comparison events
	def update_term_dropdown(manager, search_query):
	"""Update dropdown choices based on search query."""
	filtered_terms = manager.filter_terms(search_query)
	if filtered_terms:
	return gr.update(choices=filtered_terms, value=filtered_terms[0])
	else:
	return gr.update(choices=[], value=None)

	term_search_box.change(
	fn=update_term_dropdown,
	inputs=[comparison_manager_state, term_search_box],
	outputs=[comp_term_selector]
	).then(
	fn=lambda: (0, 0),
	outputs=[term_pos_idx_state, term_neg_idx_state]
	).then(
	fn=load_term_comparison,
	inputs=[comparison_manager_state, comp_term_selector, term_pos_idx_state, term_neg_idx_state],
	outputs=[term_pos_hl_box, term_pos_info_box, term_neg_hl_box, term_neg_info_box, term_pos_header, term_neg_header]
	)

	comp_term_selector.change(
	fn=lambda: (0, 0),
	outputs=[term_pos_idx_state, term_neg_idx_state]
	).then(
	fn=load_term_comparison,
	inputs=[comparison_manager_state, comp_term_selector, term_pos_idx_state, term_neg_idx_state],
	outputs=[term_pos_hl_box, term_pos_info_box, term_neg_hl_box, term_neg_info_box, term_pos_header, term_neg_header]
	)

	term_pos_next_btn.click(
	fn=next_pos,
	inputs=[term_pos_idx_state],
	outputs=[term_pos_idx_state]
	).then(
	fn=load_term_comparison,
	inputs=[comparison_manager_state, comp_term_selector, term_pos_idx_state, term_neg_idx_state],
	outputs=[term_pos_hl_box, term_pos_info_box, term_neg_hl_box, term_neg_info_box, term_pos_header, term_neg_header]
	)

	term_neg_next_btn.click(
	fn=next_neg,
	inputs=[term_neg_idx_state],
	outputs=[term_neg_idx_state]
	).then(
	fn=load_term_comparison,
	inputs=[comparison_manager_state, comp_term_selector, term_pos_idx_state, term_neg_idx_state],
	outputs=[term_pos_hl_box, term_pos_info_box, term_neg_hl_box, term_neg_info_box, term_pos_header, term_neg_header]
	)

	return demo


	if __name__ == "__main__":
	create_demo().launch(share=False, debug=False)