Spaces:

jedick
/

noteworthy-differences

Sleeping

noteworthy-differences / app_functions.py

jedick

Use train/test split for feedback

103ea6f about 1 month ago

12 kB

	from wiki_data_fetcher import (
	get_previous_revisions,
	get_revision_from_age,
	get_wikipedia_introduction,
	extract_revision_info,
	get_revisions_behind,
	get_random_wikipedia_title,
	)
	from models import classifier, judge
	import gradio as gr
	import logfire


	@logfire.instrument("Fetch current revision")
	def _fetch_current_revision(title: str):
	"""
	Fetch current revision of a Wikipedia article and return its introduction.

	Args:
	title: Wikipedia article title

	Returns:
	Tuple of (introduction, timestamp)
	"""
	if not title or not title.strip():
	error_msg = "Please enter a Wikipedia page title."
	raise gr.Error(error_msg, print_exception=False)
	return None, None

	try:
	# Get current revision (revision 0)
	json_data = get_previous_revisions(title, revisions=0)
	revision_info = extract_revision_info(json_data, revnum=0)

	if not revision_info.get("revid"):
	error_msg = f"Error: Could not find Wikipedia page '{title}'. Please check the title."
	raise gr.Error(error_msg, print_exception=False)
	return None, None

	revid = revision_info["revid"]
	timestamp = revision_info["timestamp"]

	# Get introduction
	introduction = get_wikipedia_introduction(revid)

	if introduction is None:
	introduction = f"Error: Could not retrieve introduction for current revision (revid: {revid})"

	# Format timestamp for display
	timestamp = f"Timestamp: {timestamp}" if timestamp else ""

	# Return introduction text and timestamp
	return introduction, timestamp

	except Exception as e:
	error_msg = f"Error occurred: {str(e)}"
	raise gr.Error(error_msg, print_exception=False)
	return None, None


	@logfire.instrument("Fetch previous revision")
	def _fetch_previous_revision(title: str, number: int, units: str, new_revision: str):
	"""
	Fetch previous revision of a Wikipedia article and return its introduction.

	Args:
	title: Wikipedia article title
	number: Number of revisions or days behind
	units: "revisions" or "days"

	Returns:
	Tuple of (introduction, timestamp)
	"""

	# If we get here with an empty new revision, then an error should have been raised
	# in fetch_current_revision, so just return empty values without raising another error
	if not new_revision:
	return None, None

	try:
	# Get previous revision based on units
	if units == "revisions":
	json_data = get_previous_revisions(title, revisions=number)
	revision_info = extract_revision_info(json_data, revnum=number)
	else: # units == "days"
	revision_info = get_revision_from_age(title, age_days=number)

	if not revision_info.get("revid"):
	error_msg = f"Error: Could not find revision {number} {'revisions' if units == 'revisions' else 'days'} behind for '{title}'."
	raise gr.Error(error_msg, print_exception=False)
	return None, None

	revid = revision_info["revid"]
	timestamp = revision_info["timestamp"]

	# Get introduction
	introduction = get_wikipedia_introduction(revid)

	if introduction is None:
	introduction = f"Error: Could not retrieve introduction for previous revision (revid: {revid})"

	# Get revisions_behind
	if units == "revisions":
	revisions_behind = revision_info["revnum"]
	else:
	revisions_behind = get_revisions_behind(title, revid)
	# For a negative number, replace the negative sign with ">"
	if revisions_behind < 0:
	revisions_behind = str(revisions_behind).replace("-", ">")

	# Format timestamp for display
	timestamp = (
	f"Timestamp: {timestamp}, {revisions_behind} revisions behind"
	if timestamp
	else ""
	)

	# Return introduction text and timestamp
	return introduction, timestamp

	except Exception as e:
	error_msg = f"Error occurred: {str(e)}"
	raise gr.Error(error_msg, print_exception=False)
	return None, None


	def run_classifier(old_revision: str, new_revision: str, prompt_style: str):
	"""
	Run a classification model on the revisions.

	Args:
	old_revision: Old revision text
	new_revision: New revision text
	prompt_style: heuristic or few-shot

	Returns:
	Tuple of (noteworthy, rationale) (bool, str)
	"""

	# Values to return if there is an error
	noteworthy, rationale = None, None
	if not old_revision or not new_revision:
	return noteworthy, rationale

	try:
	# Run classifier model
	result = classifier(old_revision, new_revision, prompt_style=prompt_style)
	if result:
	noteworthy = result.get("noteworthy", None)
	rationale = result.get("rationale", "")
	else:
	error_msg = f"Error: Could not get {prompt_style} model result"
	raise gr.Error(error_msg, print_exception=False)

	except Exception as e:
	error_msg = f"Error running model: {str(e)}"
	raise gr.Error(error_msg, print_exception=False)

	return noteworthy, rationale


	@logfire.instrument("Run heuristic classifier")
	def _run_heuristic_classifier(old_revision: str, new_revision: str):
	return run_classifier(old_revision, new_revision, prompt_style="heuristic")


	@logfire.instrument("Run few-shot classifier")
	def _run_fewshot_classifier(old_revision: str, new_revision: str):
	return run_classifier(old_revision, new_revision, prompt_style="few-shot")


	def compute_confidence(
	heuristic_noteworthy,
	fewshot_noteworthy,
	judge_noteworthy,
	):
	"""
	Compute a confidence label using the noteworthy booleans.
	"""
	if heuristic_noteworthy == fewshot_noteworthy == judge_noteworthy:
	# Classifiers and judge all agree
	return "High"
	elif heuristic_noteworthy != fewshot_noteworthy:
	# Classifiers disagree, judge decides
	return "Moderate"
	else:
	# Classifiers agree, judge vetoes
	return "Questionable"


	@logfire.instrument("Run judge")
	def _run_judge(
	old_revision: str,
	new_revision: str,
	heuristic_noteworthy: bool,
	fewshot_noteworthy: bool,
	heuristic_rationale: str,
	fewshot_rationale: str,
	):
	"""
	Run judge on the revisions and classifiers' rationales.

	Args:
	old_revision: Old revision text
	new_revision: New revision text
	heuristic_noteworthy: Heuristic model's noteworthiness prediction
	fewshot_noteworthy: Few-shot model's noteworthiness prediction
	heuristic_rationale: Heuristic model's rationale
	fewshot_rationale: Few-shot model's rationale

	Returns:
	Tuple of (noteworthy, noteworthy_text, reasoning, confidence) (bool, str, str, str)
	"""

	# Values to return if there is an error
	noteworthy, noteworthy_text, reasoning, confidence = None, None, None, None
	if (
	not old_revision
	or not new_revision
	or not heuristic_rationale
	or not fewshot_rationale
	):
	return noteworthy, noteworthy_text, reasoning, confidence

	try:
	# Run judge
	result = judge(
	old_revision,
	new_revision,
	heuristic_rationale,
	fewshot_rationale,
	mode="aligned-heuristic",
	)
	if result:
	noteworthy = result.get("noteworthy", "")
	reasoning = result.get("reasoning", "")
	else:
	error_msg = f"Error: Could not get judge's result"
	raise gr.Error(error_msg, print_exception=False)

	except Exception as e:
	error_msg = f"Error running judge: {str(e)}"
	raise gr.Error(error_msg, print_exception=False)

	# Format noteworthy label (boolean) as text
	if not reasoning:
	noteworthy_text = None
	else:
	noteworthy_text = str(noteworthy)

	# Return no confidence score if any of the rationales or reasoning is missing
	if not heuristic_rationale or not fewshot_rationale or not reasoning:
	confidence = None
	else:
	# Get confidence score
	confidence = compute_confidence(
	heuristic_noteworthy,
	fewshot_noteworthy,
	noteworthy,
	)

	return noteworthy, noteworthy_text, reasoning, confidence


	@logfire.instrument("🎲 Special Random")
	def find_interesting_example(number_behind: int, units_behind: str):
	"""
	Find an interesting example by repeatedly getting random pages and running the model
	until we find one with a confidence score that is not High, up to 20 tries.
	"""
	max_tries = 20

	for attempt in range(max_tries):
	# Get random page title
	page_title = get_random_wikipedia_title()
	if not page_title:
	continue

	gr.Info(f"Page {attempt + 1}: {page_title}", duration=20)

	try:
	# Initialize Logfire span
	span_name = f"{page_title} - {number_behind} {units_behind}"
	with logfire.span(span_name):

	# Fetch current revision
	new_revision, new_timestamp = _fetch_current_revision(page_title)
	if not new_revision:
	continue

	# Fetch previous revision
	old_revision, old_timestamp = _fetch_previous_revision(
	page_title, number_behind, units_behind, new_revision
	)
	if not old_revision:
	continue

	# Run heuristic classifier
	heuristic_noteworthy, heuristic_rationale = _run_heuristic_classifier(
	old_revision, new_revision
	)
	if heuristic_rationale is None:
	continue

	# Run few-shot classifier
	fewshot_noteworthy, fewshot_rationale = _run_fewshot_classifier(
	old_revision, new_revision
	)
	if fewshot_rationale is None:
	continue

	# Run judge
	judge_noteworthy, noteworthy_text, judge_reasoning, confidence_score = (
	_run_judge(
	old_revision,
	new_revision,
	heuristic_noteworthy,
	fewshot_noteworthy,
	heuristic_rationale,
	fewshot_rationale,
	)
	)

	# Check if confidence score is not High
	if confidence_score and confidence_score != "High":
	# Found an interesting example
	gr.Success(
	f"Interesting example (page {attempt + 1}) - ready for your feedback",
	duration=None,
	)
	return (
	page_title,
	new_revision,
	new_timestamp,
	old_revision,
	old_timestamp,
	heuristic_noteworthy,
	fewshot_noteworthy,
	judge_noteworthy,
	heuristic_rationale,
	fewshot_rationale,
	judge_reasoning,
	noteworthy_text,
	confidence_score,
	)

	except Exception:
	# If there's an error, continue to next attempt
	continue

	# If we get here, all 20 tries had High confidence
	gr.Warning("No interesting examples found - try again", duration=None)
	# Return empty values
	return (
	"",
	"",
	"",
	"",
	"",
	None,
	None,
	None,
	"",
	"",
	"",
	"",
	"",
	)