Spaces:

jedick
/

noteworthy-differences

Running

jedick

Get earliest available revision for a specified revision number

3c02ce2 1 day ago

15.3 kB

	import gradio as gr
	from wiki_data_fetcher import (
	get_previous_revisions,
	get_revision_from_age,
	get_wikipedia_introduction,
	extract_revision_info,
	get_revisions_behind,
	get_random_wikipedia_title,
	)
	from models import classifier, judge
	import logfire
	from dotenv import load_dotenv

	# Load API keys
	load_dotenv()
	# Setup logging with Logfire
	logfire.configure()

	# If running a standalone Gradio app via `demo.launch()` within a script,
	# Logfire's auto-instrumentation for FastAPI is often automatically handled
	# if installed. If mounting within a separate FastAPI app, use:
	# logfire.instrument_fastapi(app)


	@logfire.instrument("Step 1: Fetch current revision")
	def fetch_current_revision(title: str):
	"""
	Fetch current revision of a Wikipedia article and return its introduction.

	Args:
	title: Wikipedia article title

	Returns:
	Tuple of (introduction, timestamp)
	"""
	if not title or not title.strip():
	error_msg = "Please enter a Wikipedia page title."
	raise gr.Error(error_msg, print_exception=False)
	return None, None

	try:
	# Get current revision (revision 0)
	json_data = get_previous_revisions(title, revisions=0)
	revision_info = extract_revision_info(json_data, revnum=0)

	if not revision_info.get("revid"):
	error_msg = f"Error: Could not find Wikipedia page '{title}'. Please check the title."
	raise gr.Error(error_msg, print_exception=False)
	return None, None

	revid = revision_info["revid"]
	timestamp = revision_info["timestamp"]

	# Get introduction
	introduction = get_wikipedia_introduction(revid)

	if introduction is None:
	introduction = f"Error: Could not retrieve introduction for current revision (revid: {revid})"

	# Format timestamp for display
	timestamp = f"Timestamp: {timestamp}" if timestamp else ""

	# Return introduction text and timestamp
	return introduction, timestamp

	except Exception as e:
	error_msg = f"Error occurred: {str(e)}"
	raise gr.Error(error_msg, print_exception=False)
	return None, None


	@logfire.instrument("Step 2: Fetch previous revision")
	def fetch_previous_revision(title: str, unit: str, number: int, new_revision: str):
	"""
	Fetch previous revision of a Wikipedia article and return its introduction.

	Args:
	title: Wikipedia article title
	unit: "revisions" or "days"
	number: Number of revisions or days behind

	Returns:
	Tuple of (introduction, timestamp)
	"""

	# If we get here with an empty new revision, then an error should have been raised
	# in fetch_current_revision, so just return empty values without raising another error
	if not new_revision:
	return None, None

	try:
	# Get previous revision based on unit
	if unit == "revisions":
	json_data = get_previous_revisions(title, revisions=number)
	revision_info = extract_revision_info(json_data, revnum=number)
	else: # unit == "days"
	revision_info = get_revision_from_age(title, age_days=number)

	if not revision_info.get("revid"):
	error_msg = f"Error: Could not find revision {number} {'revisions' if unit == 'revisions' else 'days'} behind for '{title}'."
	raise gr.Error(error_msg, print_exception=False)
	return None, None

	revid = revision_info["revid"]
	timestamp = revision_info["timestamp"]

	# Get introduction
	introduction = get_wikipedia_introduction(revid)

	if introduction is None:
	introduction = f"Error: Could not retrieve introduction for previous revision (revid: {revid})"

	# Get revisions_behind
	if unit == "revisions":
	revisions_behind = revision_info["revnum"]
	else:
	revisions_behind = get_revisions_behind(title, revid)
	# For a negative number, replace the negative sign with ">"
	if revisions_behind < 0:
	revisions_behind = str(revisions_behind).replace("-", ">")

	# Format timestamp for display
	timestamp = (
	f"Timestamp: {timestamp}, {revisions_behind} revisions behind"
	if timestamp
	else ""
	)

	# Return introduction text and timestamp
	return introduction, timestamp

	except Exception as e:
	error_msg = f"Error occurred: {str(e)}"
	raise gr.Error(error_msg, print_exception=False)
	return None, None


	def run_classifier(old_revision: str, new_revision: str, prompt_style: str):
	"""
	Run a classification model on the revisions.

	Args:
	old_revision: Old revision text
	new_revision: New revision text
	prompt_style: heuristic or few-shot

	Returns:
	Tuple of (noteworthy, rationale) (bool, str)
	"""

	# Values to return if there is an error
	noteworthy, rationale = None, None
	if not old_revision or not new_revision:
	return noteworthy, rationale

	try:
	# Run classifier model
	result = classifier(old_revision, new_revision, prompt_style=prompt_style)
	if result:
	noteworthy = result.get("noteworthy", None)
	rationale = result.get("rationale", "")
	else:
	error_msg = f"Error: Could not get {prompt_style} model result"
	raise gr.Error(error_msg, print_exception=False)

	except Exception as e:
	error_msg = f"Error running model: {str(e)}"
	raise gr.Error(error_msg, print_exception=False)

	return noteworthy, rationale


	@logfire.instrument("Step 3a: Run heuristic classifier")
	def run_heuristic_classifier(old_revision: str, new_revision: str):
	return run_classifier(old_revision, new_revision, prompt_style="heuristic")


	@logfire.instrument("Step 3b: Run few-shot classifier")
	def run_fewshot_classifier(old_revision: str, new_revision: str):
	return run_classifier(old_revision, new_revision, prompt_style="few-shot")


	def compute_confidence(
	heuristic_noteworthy,
	fewshot_noteworthy,
	judge_noteworthy,
	heuristic_rationale,
	fewshot_rationale,
	judge_reasoning,
	):
	"""
	Compute a confidence label using the noteworthy booleans.
	"""
	# Return None if any of the rationales or reasoning is missing.
	if not heuristic_rationale or not fewshot_rationale or not judge_reasoning:
	return None
	if heuristic_noteworthy == fewshot_noteworthy == judge_noteworthy:
	# Classifiers and judge all agree
	return "High"
	elif heuristic_noteworthy != fewshot_noteworthy:
	# Classifiers disagree, judge decides
	return "Moderate"
	else:
	# Classifiers agree, judge vetoes
	return "Questionable"


	@logfire.instrument("Step 4: Run judge")
	def run_judge(
	old_revision: str,
	new_revision: str,
	heuristic_noteworthy: bool,
	fewshot_noteworthy: bool,
	heuristic_rationale: str,
	fewshot_rationale: str,
	judge_mode: str,
	):
	"""
	Run judge on the revisions and classifiers' rationales.

	Args:
	old_revision: Old revision text
	new_revision: New revision text
	heuristic_rationale: Heuristic model's rationale
	fewshot_rationale: Few-shot model's rationale
	judge_mode: Mode for judge function ("unaligned", "aligned-fewshot", "aligned-heuristic")

	Returns:
	Tuple of (noteworthy, noteworthy_text, reasoning, confidence) (bool, str, str, str)
	"""

	# Values to return if there is an error
	noteworthy, noteworthy_text, reasoning, confidence = None, None, None, None
	if (
	not old_revision
	or not new_revision
	or not heuristic_rationale
	or not fewshot_rationale
	):
	return noteworthy, noteworthy_text, reasoning, confidence

	try:
	# Run judge
	result = judge(
	old_revision,
	new_revision,
	heuristic_rationale,
	fewshot_rationale,
	mode=judge_mode,
	)
	if result:
	noteworthy = result.get("noteworthy", "")
	reasoning = result.get("reasoning", "")
	else:
	error_msg = f"Error: Could not get judge's result"
	raise gr.Error(error_msg, print_exception=False)

	except Exception as e:
	error_msg = f"Error running judge: {str(e)}"
	raise gr.Error(error_msg, print_exception=False)

	# Format noteworthy label (boolean) as text
	if not reasoning:
	noteworthy_text = None
	else:
	noteworthy_text = str(noteworthy)

	# Get confidence score
	confidence = compute_confidence(
	heuristic_noteworthy,
	fewshot_noteworthy,
	noteworthy,
	heuristic_rationale,
	fewshot_rationale,
	reasoning,
	)

	return noteworthy, noteworthy_text, reasoning, confidence


	# Create Gradio interface
	with gr.Blocks(title="Noteworthy Differences") as demo:
	with gr.Row():
	gr.Markdown(
	"""
	Compare current and old revisions of a Wikipedia article - you choose the number of revisions or days behind.<br>
	Two classifier models (with heuristic and few-shot prompts) and a judge predict the noteworthiness of the differences.<br>
	The judge was aligned with human preferences as described in the
	[GitHub repository](https://github.com/jedick/noteworthy-differences).
	"""
	)

	with gr.Row():
	title_input = gr.Textbox(
	label="Wikipedia Page Title", placeholder="e.g., Albert Einstein", value=""
	)
	number_input = gr.Number(label="Number", value=50, minimum=0, precision=0)
	unit_dropdown = gr.Dropdown(
	choices=["revisions", "days"], value="revisions", label="Unit"
	)
	judge_mode_dropdown = gr.Dropdown(
	choices=["unaligned", "aligned-fewshot", "aligned-heuristic"],
	value="aligned-heuristic",
	label="Judge Mode",
	)
	with gr.Column():
	random_btn = gr.Button("Get Random Page Title")
	submit_btn = gr.Button("Fetch Revisions and Run Model", variant="primary")

	with gr.Row():
	with gr.Column():
	gr.Markdown("### Old Revision")
	old_timestamp = gr.Markdown("")
	old_revision = gr.Textbox(label="", lines=15, max_lines=30, container=False)
	gr.Markdown(
	"""#### Query Instructions
	- Page title is case sensitive; use underscores or spaces
	- Specify any number of days or up to 499 revisions behind
	- The closest available revision is retrieved
	- Only article introductions are downloaded
	"""
	)

	with gr.Column():
	gr.Markdown("### Current Revision")
	new_timestamp = gr.Markdown("")
	new_revision = gr.Textbox(label="", lines=15, max_lines=30, container=False)
	gr.Markdown(
	"""#### Confidence Key
	- High: heuristic = few-shot, judge agrees
	- Moderate: heuristic ≠ few-shot, judge decides
	- Questionable: heuristic = few-shot, judge vetoes
	"""
	)

	with gr.Column():
	gr.Markdown("### Model Output")
	heuristic_rationale = gr.Textbox(
	label="Heuristic Model's Rationale",
	lines=2,
	max_lines=7,
	)
	fewshot_rationale = gr.Textbox(
	label="Few-shot Model's Rationale",
	lines=2,
	max_lines=7,
	)
	judge_reasoning = gr.Textbox(
	label="Judge's Reasoning",
	lines=2,
	max_lines=7,
	)
	with gr.Row(variant="default"):
	noteworthy_text = gr.Textbox(
	label="Noteworthy Differences",
	lines=1,
	interactive=False,
	)
	confidence = gr.Textbox(
	label="Confidence",
	lines=1,
	interactive=False,
	)
	rerun_btn = gr.Button("Rerun Model")

	# States to store boolean values
	heuristic_noteworthy = gr.State()
	fewshot_noteworthy = gr.State()
	judge_noteworthy = gr.State()

	random_btn.click(
	fn=get_random_wikipedia_title,
	inputs=None,
	outputs=[title_input],
	)

	gr.on(
	# Press Enter in textbox or use button to submit
	triggers=[title_input.submit, submit_btn.click],
	# Clear the new_revision and new_timestamp values before proceeding.
	# The empty values will propagate to the other components (through function return values) if there is an error.
	fn=lambda: (gr.update(value=""), gr.update(value="")),
	inputs=None,
	outputs=[new_revision, new_timestamp],
	api_name=False,
	).then(
	fn=fetch_current_revision,
	inputs=[title_input],
	outputs=[new_revision, new_timestamp],
	api_name=False,
	).then(
	fn=fetch_previous_revision,
	inputs=[title_input, unit_dropdown, number_input, new_revision],
	outputs=[old_revision, old_timestamp],
	api_name=False,
	).then(
	fn=run_heuristic_classifier,
	inputs=[old_revision, new_revision],
	outputs=[heuristic_noteworthy, heuristic_rationale],
	api_name=False,
	).then(
	fn=run_fewshot_classifier,
	inputs=[old_revision, new_revision],
	outputs=[fewshot_noteworthy, fewshot_rationale],
	api_name=False,
	).then(
	fn=run_judge,
	inputs=[
	old_revision,
	new_revision,
	heuristic_noteworthy,
	fewshot_noteworthy,
	heuristic_rationale,
	fewshot_rationale,
	judge_mode_dropdown,
	],
	outputs=[judge_noteworthy, noteworthy_text, judge_reasoning, confidence],
	api_name=False,
	)

	# Rerun model when rerun button is clicked
	gr.on(
	triggers=[rerun_btn.click],
	fn=run_heuristic_classifier,
	inputs=[old_revision, new_revision],
	outputs=[heuristic_noteworthy, heuristic_rationale],
	api_name=False,
	).then(
	fn=run_fewshot_classifier,
	inputs=[old_revision, new_revision],
	outputs=[fewshot_noteworthy, fewshot_rationale],
	api_name=False,
	).then(
	fn=run_judge,
	inputs=[
	old_revision,
	new_revision,
	heuristic_noteworthy,
	fewshot_noteworthy,
	heuristic_rationale,
	fewshot_rationale,
	judge_mode_dropdown,
	],
	outputs=[judge_noteworthy, noteworthy_text, judge_reasoning, confidence],
	api_name=False,
	)

	if __name__ == "__main__":

	# Setup theme without background image
	theme = gr.Theme.from_hub("NoCrypt/miku")
	theme.set(body_background_fill="#FFFFFF", body_background_fill_dark="#000000")

	demo.launch(theme=theme)