ParseComparisons / app.py

Upload app.py with huggingface_hub

80e58bc verified 6 months ago

16.1 kB

	import gradio as gr
	import gspread
	from google.oauth2.service_account import Credentials
	import os
	from typing import Dict, List, Tuple
	import random
	import traceback
	import difflib

	# --- Configuration ---
	SCOPES = ['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive']
	SERVICE_ACCOUNT_FILE = 'ext-collab-human-data-annotate-5fb589a03d60.json'
	ORIGINAL_SHEET_ID = '1WbHKeZ0VKWWD8JdbH5KhsIpkMECRzwXjFwHue2lknPw'
	SHEET_NAME = 'Gradio TEST - Tables v2.0 vs v2.1 (n=21)'
	SCREENSHOTS_DIR = 'screenshots'

	# --- Custom CSS for better table rendering ---
	CUSTOM_CSS = """
	<style>
	.gradio-container .table-wrapper { overflow-x: auto !important; }
	.gradio-container table { table-layout: auto !important; width: 100% !important; }
	.gradio-container th, .gradio-container td { white-space: pre !important; padding: 4px !important; }
	</style>
	"""

	# --- Helper Functions ---
	def format_as_markdown_table(raw_text: str) -> str:
	if not raw_text or not raw_text.strip(): return "> _No data to display._"
	lines = raw_text.strip().split('\n'); md_output = ""
	for line in lines:
	if '\|' in line:
	if '---' not in md_output:
	cells = [cell.strip() for cell in line.split('\|')]
	md_output += f"\| {' \| '.join(cells)} \|\n\|{'---\|' * len(cells)}\n"
	else: md_output += f"\| {' \| '.join(cell.strip() for cell in line.split('\|'))} \|\n"
	elif line.strip(): md_output += f"\n> {line.strip()}\n"
	return md_output

	def create_diff_html(base_text: str, new_text: str) -> str:
	if not base_text or not new_text: return "<p><i>Not enough data to create a diff.</i></p>"
	diff = difflib.ndiff(base_text.splitlines(), new_text.splitlines())
	html = "<div style='font-family: monospace; white-space: pre-wrap; line-height: 1.4; font-size: 0.9em;'>"
	for line in diff:
	if line.startswith('+ '): html += f"<span style='background-color: #e6ffed;'>{line}</span>\n"
	elif line.startswith('- '): html += f"<span style='background-color: #ffeef0;'>{line}</span>\n"
	elif line.startswith('? '): continue
	else: html += f"{line}\n"
	html += "</div>"
	return html

	class AnnotationApp:
	def __init__(self):
	self.spreadsheet = None; self.worksheet = None; self.user_sheets = {}
	self.all_triplets = {}; self.claimed_triplets = set()
	self.user_sessions = {}; self.demo_mode = True; self.init_google_sheets()

	def init_google_sheets(self):
	try:
	creds = Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES); gc = gspread.authorize(creds)
	self.spreadsheet = gc.open_by_key(ORIGINAL_SHEET_ID); self.worksheet = self.spreadsheet.worksheet(SHEET_NAME)
	self.group_data_into_triplets(); self.demo_mode = False; print("🎉 Google Sheets initialized successfully!")
	except Exception as e: print(f"❌ Error initializing Google Sheets: {e}"); self.demo_mode = True

	def group_data_into_triplets(self):
	print("Grouping data..."); all_values = self.worksheet.get_all_values()
	if not all_values: print("Sheet is empty."); return
	headers = [h.strip() for h in all_values[0]]
	self.column_indices = {h: i for i, h in enumerate(headers)}
	if 'eval ID' not in self.column_indices: print("ERROR: 'eval ID' column not found."); return
	records = [dict(zip(headers, row)) for row in all_values[1:]]
	grouped = {};
	for i, row in enumerate(records):
	eval_id = row.get('eval ID')
	if eval_id:
	if eval_id not in grouped: grouped[eval_id] = []
	grouped[eval_id].append({'row_num': i + 2, **row})
	self.all_triplets = {doc_id: rows for doc_id, rows in grouped.items() if len(rows) >= 3}
	print(f"Found {len(self.all_triplets)} complete triplets.")

	def get_available_triplets(self) -> List[str]:
	return [doc_id for doc_id in self.all_triplets if doc_id not in self.claimed_triplets]

	def create_user_sheet(self, annotator_name: str) -> Tuple[bool, str]:
	if self.demo_mode: return True, "Demo mode"
	try:
	sheet_name = f"{self.worksheet.title} - {annotator_name}"; worksheet = self.spreadsheet.worksheet(sheet_name)
	self.user_sheets[annotator_name] = worksheet; return True, f"Resuming work: {sheet_name}"
	except gspread.exceptions.WorksheetNotFound:
	worksheet = self.spreadsheet.duplicate_sheet(self.worksheet.id, new_sheet_name=sheet_name)
	self.user_sheets[annotator_name] = worksheet; return True, f"Created new sheet: {sheet_name}"
	except Exception as e: return False, str(e)

	def write_annotations_to_sheet(self, annotator_name: str, session: Dict):
	if self.demo_mode: return
	try:
	user_sheet = self.user_sheets[annotator_name]; annotations = session.get('annotations', {}); cells_to_update = []

	# Check if randomized_parse_map exists
	if 'randomized_parse_map' not in session:
	print(f"❌ No randomized_parse_map found in session for {annotator_name}")
	return

	for ui_char, original_row in session['randomized_parse_map'].items():
	eval_data = annotations.get(ui_char)
	if eval_data:
	row_num = original_row['row_num']
	cells_to_update.extend([gspread.Cell(row_num, self.column_indices['annotator evaluation'] + 1, eval_data['evaluation']),
	gspread.Cell(row_num, self.column_indices['annotator explanation'] + 1, eval_data['explanation'])])
	if 'comparison' in annotations:
	comp_data = annotations['comparison']
	for original_row in session['triplet_rows']:
	row_num = original_row['row_num']
	cells_to_update.extend([gspread.Cell(row_num, self.column_indices['which /parse version produced the best output?'] + 1, comp_data['best']),
	gspread.Cell(row_num, self.column_indices['explanation of v2.0 vs v2.1'] + 1, comp_data['explanation'])])
	if cells_to_update: user_sheet.update_cells(cells_to_update, value_input_option='USER_ENTERED'); print(f"✅ Wrote {len(cells_to_update)} cells to the sheet.")
	except Exception as e: print(f"❌ FAILED TO WRITE TO SHEET: {e}")

	# --- Backend Logic ---
	app = AnnotationApp()

	def on_setup(name):
	success, message = app.create_user_sheet(name)
	if not success: return message, gr.update(visible=False), gr.update(visible=False)
	triplets = app.get_available_triplets()
	return f"{message}. Found {len(triplets)} triplets.", gr.update(visible=True, choices=triplets), gr.update(visible=True)

	def start_annotation(annotator_name: str, selected_doc_id: str):
	# Initialize or update the session
	if annotator_name not in app.user_sessions:
	app.user_sessions[annotator_name] = {}
	app.user_sessions[annotator_name]['current_doc_id'] = selected_doc_id
	return load_next_triplet(annotator_name)

	def load_next_triplet(annotator_name: str):
	# Ensure session exists
	if annotator_name not in app.user_sessions:
	app.user_sessions[annotator_name] = {}

	session = app.user_sessions[annotator_name]
	doc_id = session.get('current_doc_id')
	available = app.get_available_triplets()

	if not doc_id or doc_id not in available:
	if not available: return "🎉 All triplets completed!", {}, [gr.update(visible=False)]13
	doc_id = available[0]

	app.claimed_triplets.add(doc_id)
	session['current_doc_id'] = doc_id
	triplet_rows = app.all_triplets[doc_id]
	session['triplet_rows'] = triplet_rows

	screenshot, gold_render = "", ""
	for r in triplet_rows:
	if not screenshot and r.get('PDF: screenshot (link)'):
	fname = r.get('PDF: screenshot (link)', '').strip()
	fpath = os.path.join(SCREENSHOTS_DIR, fname);
	if os.path.exists(fpath): screenshot = fpath
	if not gold_render and r.get('gold render'): gold_render = format_as_markdown_table(r.get('gold render'))

	parses = {r.get('tool name').strip(): r for r in triplet_rows if r.get('tool name')}
	parse_keys = list(parses.keys()); random.shuffle(parse_keys)

	# Ensure we have exactly 3 parses
	if len(parse_keys) < 3:
	print(f"❌ Warning: Only {len(parse_keys)} parses found for {doc_id}, expected 3")
	# Pad with empty entries if needed
	while len(parse_keys) < 3:
	parse_keys.append(f'missing_{len(parse_keys)}')
	parses[f'missing_{len(parse_keys)-1}'] = {'tool name': 'Missing', 'tool output': ''}

	session['randomized_parse_map'] = {
	'A': parses[parse_keys[0]],
	'B': parses[parse_keys[1]],
	'C': parses[parse_keys[2]]
	}

	ui_data = {
	'screenshot': screenshot, 'gold_raw': gold_render,
	'parse_a_raw': format_as_markdown_table(session['randomized_parse_map']['A'].get('tool output', '')),
	'parse_b_raw': format_as_markdown_table(session['randomized_parse_map']['B'].get('tool output', '')),
	'parse_c_raw': format_as_markdown_table(session['randomized_parse_map']['C'].get('tool output', ''))}

	progress = f"Annotating Document ID: {doc_id}"
	initial_views = update_view(ui_data, 'rendered')

	return (progress, ui_data, ui_data.get('screenshot'), *initial_views,
	gr.update(value=None), "", gr.update(value=None), "", gr.update(value=None), "",
	gr.update(value=None), "")

	def update_view(current_data, view_type):
	keys = ['gold_raw', 'parse_a_raw', 'parse_b_raw', 'parse_c_raw']
	if view_type == 'rendered': return [current_data.get(key, '') for key in keys]
	if view_type == 'raw': return [f"```markdown\n{current_data.get(key, '')}\n```" for key in keys]
	if view_type == 'diff':
	gold = current_data.get('gold_raw', '')
	return ["", create_diff_html(gold, current_data.get('parse_a_raw')),
	create_diff_html(gold, current_data.get('parse_b_raw')),
	create_diff_html(gold, current_data.get('parse_c_raw'))]

	def submit_annotations(annotator_name, eval_a, exp_a, eval_b, exp_b, eval_c, exp_c, best_choice, best_exp):
	# Validate inputs
	if not annotator_name or annotator_name not in app.user_sessions:
	print(f"❌ Invalid annotator name or session not found: {annotator_name}")
	return "Error: Session not found", {}, [gr.update()]13

	if not best_choice:
	print("❌ No best choice selected")
	return "Error: Please select which parse is best", {}, [gr.update()]13

	session = app.user_sessions[annotator_name]

	# Verify session has required data
	if 'randomized_parse_map' not in session:
	print(f"❌ No randomized_parse_map in session for {annotator_name}")
	return "Error: Session data corrupted, please reload", {}, [gr.update()]13

	# Extract the UI choice (A, B, or C) from the radio button text
	choice_map = {"Parse A is best": "A", "Parse B is best": "B", "Parse C is best": "C"}
	ui_char = choice_map.get(best_choice)

	if not ui_char or ui_char not in session['randomized_parse_map']:
	print(f"❌ Invalid choice: {best_choice} -> {ui_char}")
	return "Error: Invalid selection", {}, [gr.update()]13

	best_tool_name = session['randomized_parse_map'][ui_char].get('tool name', 'Unknown')

	session['annotations'] = {
	'A': {'evaluation': eval_a or 'Not specified', 'explanation': exp_a or ''},
	'B': {'evaluation': eval_b or 'Not specified', 'explanation': exp_b or ''},
	'C': {'evaluation': eval_c or 'Not specified', 'explanation': exp_c or ''},
	'comparison': {'best': best_tool_name, 'explanation': best_exp or ''}
	}

	app.write_annotations_to_sheet(annotator_name, session)
	return load_next_triplet(annotator_name)

	# --- Gradio UI Definition ---
	with gr.Blocks(title="3-Way Parse Comparison", theme=gr.themes.Soft()) as demo:
	gr.HTML(CUSTOM_CSS) # Inject the CSS for better table rendering
	gr.Markdown("# 📊 3-Way Document Parse Comparison Tool")
	annotator_name_state = gr.State("")
	current_data_state = gr.State({})

	with gr.Column(visible=True) as setup_section:
	name_input = gr.Textbox(label="Enter your name"); setup_btn = gr.Button("Start Session", variant="primary")
	setup_message = gr.Textbox(label="Status", interactive=False)
	triplet_selector = gr.Dropdown(label="Available Document Triplets (Optional, will auto-load if empty)", visible=False)
	start_btn = gr.Button("Start Annotation", variant="primary", visible=False)

	with gr.Column(visible=False) as annotation_section:
	progress_display = gr.Textbox(label="Progress", interactive=False)
	gr.Markdown("### 📄 Original Document"); screenshot_display = gr.Image()
	gr.Markdown("---")
	with gr.Row():
	gr.Markdown("### Select View Type:"); show_rendered_btn = gr.Button("🖼️ Rendered"); show_raw_btn = gr.Button("📄 Raw Markdown"); show_diff_btn = gr.Button("✨ Diffs vs. Gold")
	with gr.Row():
	with gr.Column(): gr.Markdown("### 🏆 Gold Standard"); gold_display = gr.Markdown()
	with gr.Column(): gr.Markdown("### Parse A"); parse_a_display = gr.Markdown()
	with gr.Column(): gr.Markdown("### Parse B"); parse_b_display = gr.Markdown()
	with gr.Column(): gr.Markdown("### Parse C"); parse_c_display = gr.Markdown()
	gr.Markdown("---")
	with gr.Row():
	with gr.Column(scale=2):
	with gr.Row():
	with gr.Column(): eval_a = gr.Dropdown(label="Evaluate A", choices=["No issues", "Minor", "Severe"]); exp_a = gr.Textbox(label="Explanation A", lines=3)
	with gr.Column(): eval_b = gr.Dropdown(label="Evaluate B", choices=["No issues", "Minor", "Severe"]); exp_b = gr.Textbox(label="Explanation B", lines=3)
	with gr.Column(): eval_c = gr.Dropdown(label="Evaluate C", choices=["No issues", "Minor", "Severe"]); exp_c = gr.Textbox(label="Explanation C", lines=3)
	with gr.Column(scale=1):
	comparison_choice = gr.Radio(["Parse A is best", "Parse B is best", "Parse C is best"], label="Which parse is best overall?")
	comparison_explanation = gr.Textbox(label="Explain final choice", lines=4)
	submit_btn = gr.Button("✅ Submit & Load Next", variant="primary")

	# --- Event Handlers ---
	setup_btn.click(on_setup, [name_input], [setup_message, triplet_selector, start_btn]).then(lambda name: name, [name_input], [annotator_name_state])

	outputs_on_start = [progress_display, current_data_state, screenshot_display,
	gold_display, parse_a_display, parse_b_display, parse_c_display,
	eval_a, exp_a, eval_b, exp_b, eval_c, exp_c,
	comparison_choice, comparison_explanation]

	start_btn.click(start_annotation, [annotator_name_state, triplet_selector], outputs_on_start).then(
	lambda: gr.update(visible=False), None, [setup_section]).then(
	lambda: gr.update(visible=True), None, [annotation_section])

	view_displays = [gold_display, parse_a_display, parse_b_display, parse_c_display]
	show_rendered_btn.click(lambda data: update_view(data, 'rendered'), [current_data_state], view_displays)
	show_raw_btn.click(lambda data: update_view(data, 'raw'), [current_data_state], view_displays)
	show_diff_btn.click(lambda data: update_view(data, 'diff'), [current_data_state], view_displays)

	submit_inputs = [annotator_name_state, eval_a, exp_a, eval_b, exp_b, eval_c, exp_c, comparison_choice, comparison_explanation]
	submit_btn.click(submit_annotations, submit_inputs, outputs_on_start)

	if __name__ == "__main__":
	demo.launch(debug=True)