Spaces:
Sleeping
Sleeping
| from wiki_data_fetcher import ( | |
| get_previous_revisions, | |
| get_revision_from_age, | |
| get_wikipedia_introduction, | |
| extract_revision_info, | |
| get_revisions_behind, | |
| get_random_wikipedia_title, | |
| ) | |
| from models import classifier, judge | |
| import gradio as gr | |
| import logfire | |
| def _fetch_current_revision(title: str): | |
| """ | |
| Fetch current revision of a Wikipedia article and return its introduction. | |
| Args: | |
| title: Wikipedia article title | |
| Returns: | |
| Tuple of (introduction, timestamp) | |
| """ | |
| if not title or not title.strip(): | |
| error_msg = "Please enter a Wikipedia page title." | |
| raise gr.Error(error_msg, print_exception=False) | |
| return None, None | |
| try: | |
| # Get current revision (revision 0) | |
| json_data = get_previous_revisions(title, revisions=0) | |
| revision_info = extract_revision_info(json_data, revnum=0) | |
| if not revision_info.get("revid"): | |
| error_msg = f"Error: Could not find Wikipedia page '{title}'. Please check the title." | |
| raise gr.Error(error_msg, print_exception=False) | |
| return None, None | |
| revid = revision_info["revid"] | |
| timestamp = revision_info["timestamp"] | |
| # Get introduction | |
| introduction = get_wikipedia_introduction(revid) | |
| if introduction is None: | |
| introduction = f"Error: Could not retrieve introduction for current revision (revid: {revid})" | |
| # Format timestamp for display | |
| timestamp = f"**Timestamp:** {timestamp}" if timestamp else "" | |
| # Return introduction text and timestamp | |
| return introduction, timestamp | |
| except Exception as e: | |
| error_msg = f"Error occurred: {str(e)}" | |
| raise gr.Error(error_msg, print_exception=False) | |
| return None, None | |
| def _fetch_previous_revision(title: str, number: int, units: str, new_revision: str): | |
| """ | |
| Fetch previous revision of a Wikipedia article and return its introduction. | |
| Args: | |
| title: Wikipedia article title | |
| number: Number of revisions or days behind | |
| units: "revisions" or "days" | |
| Returns: | |
| Tuple of (introduction, timestamp) | |
| """ | |
| # If we get here with an empty new revision, then an error should have been raised | |
| # in fetch_current_revision, so just return empty values without raising another error | |
| if not new_revision: | |
| return None, None | |
| try: | |
| # Get previous revision based on units | |
| if units == "revisions": | |
| json_data = get_previous_revisions(title, revisions=number) | |
| revision_info = extract_revision_info(json_data, revnum=number) | |
| else: # units == "days" | |
| revision_info = get_revision_from_age(title, age_days=number) | |
| if not revision_info.get("revid"): | |
| error_msg = f"Error: Could not find revision {number} {'revisions' if units == 'revisions' else 'days'} behind for '{title}'." | |
| raise gr.Error(error_msg, print_exception=False) | |
| return None, None | |
| revid = revision_info["revid"] | |
| timestamp = revision_info["timestamp"] | |
| # Get introduction | |
| introduction = get_wikipedia_introduction(revid) | |
| if introduction is None: | |
| introduction = f"Error: Could not retrieve introduction for previous revision (revid: {revid})" | |
| # Get revisions_behind | |
| if units == "revisions": | |
| revisions_behind = revision_info["revnum"] | |
| else: | |
| revisions_behind = get_revisions_behind(title, revid) | |
| # For a negative number, replace the negative sign with ">" | |
| if revisions_behind < 0: | |
| revisions_behind = str(revisions_behind).replace("-", ">") | |
| # Format timestamp for display | |
| timestamp = ( | |
| f"**Timestamp:** {timestamp}, {revisions_behind} revisions behind" | |
| if timestamp | |
| else "" | |
| ) | |
| # Return introduction text and timestamp | |
| return introduction, timestamp | |
| except Exception as e: | |
| error_msg = f"Error occurred: {str(e)}" | |
| raise gr.Error(error_msg, print_exception=False) | |
| return None, None | |
| def run_classifier(old_revision: str, new_revision: str, prompt_style: str): | |
| """ | |
| Run a classification model on the revisions. | |
| Args: | |
| old_revision: Old revision text | |
| new_revision: New revision text | |
| prompt_style: heuristic or few-shot | |
| Returns: | |
| Tuple of (noteworthy, rationale) (bool, str) | |
| """ | |
| # Values to return if there is an error | |
| noteworthy, rationale = None, None | |
| if not old_revision or not new_revision: | |
| return noteworthy, rationale | |
| try: | |
| # Run classifier model | |
| result = classifier(old_revision, new_revision, prompt_style=prompt_style) | |
| if result: | |
| noteworthy = result.get("noteworthy", None) | |
| rationale = result.get("rationale", "") | |
| else: | |
| error_msg = f"Error: Could not get {prompt_style} model result" | |
| raise gr.Error(error_msg, print_exception=False) | |
| except Exception as e: | |
| error_msg = f"Error running model: {str(e)}" | |
| raise gr.Error(error_msg, print_exception=False) | |
| return noteworthy, rationale | |
| def _run_heuristic_classifier(old_revision: str, new_revision: str): | |
| return run_classifier(old_revision, new_revision, prompt_style="heuristic") | |
| def _run_fewshot_classifier(old_revision: str, new_revision: str): | |
| return run_classifier(old_revision, new_revision, prompt_style="few-shot") | |
| def compute_confidence( | |
| heuristic_noteworthy, | |
| fewshot_noteworthy, | |
| judge_noteworthy, | |
| ): | |
| """ | |
| Compute a confidence label using the noteworthy booleans. | |
| """ | |
| if heuristic_noteworthy == fewshot_noteworthy == judge_noteworthy: | |
| # Classifiers and judge all agree | |
| return "High" | |
| elif heuristic_noteworthy != fewshot_noteworthy: | |
| # Classifiers disagree, judge decides | |
| return "Moderate" | |
| else: | |
| # Classifiers agree, judge vetoes | |
| return "Questionable" | |
| def _run_judge( | |
| old_revision: str, | |
| new_revision: str, | |
| heuristic_noteworthy: bool, | |
| fewshot_noteworthy: bool, | |
| heuristic_rationale: str, | |
| fewshot_rationale: str, | |
| ): | |
| """ | |
| Run judge on the revisions and classifiers' rationales. | |
| Args: | |
| old_revision: Old revision text | |
| new_revision: New revision text | |
| heuristic_noteworthy: Heuristic model's noteworthiness prediction | |
| fewshot_noteworthy: Few-shot model's noteworthiness prediction | |
| heuristic_rationale: Heuristic model's rationale | |
| fewshot_rationale: Few-shot model's rationale | |
| Returns: | |
| Tuple of (noteworthy, noteworthy_text, reasoning, confidence) (bool, str, str, str) | |
| """ | |
| # Values to return if there is an error | |
| noteworthy, noteworthy_text, reasoning, confidence = None, None, None, None | |
| if ( | |
| not old_revision | |
| or not new_revision | |
| or not heuristic_rationale | |
| or not fewshot_rationale | |
| ): | |
| return noteworthy, noteworthy_text, reasoning, confidence | |
| try: | |
| # Run judge | |
| result = judge( | |
| old_revision, | |
| new_revision, | |
| heuristic_rationale, | |
| fewshot_rationale, | |
| mode="aligned-heuristic", | |
| ) | |
| if result: | |
| noteworthy = result.get("noteworthy", "") | |
| reasoning = result.get("reasoning", "") | |
| else: | |
| error_msg = f"Error: Could not get judge's result" | |
| raise gr.Error(error_msg, print_exception=False) | |
| except Exception as e: | |
| error_msg = f"Error running judge: {str(e)}" | |
| raise gr.Error(error_msg, print_exception=False) | |
| # Format noteworthy label (boolean) as text | |
| if not reasoning: | |
| noteworthy_text = None | |
| else: | |
| noteworthy_text = str(noteworthy) | |
| # Return no confidence score if any of the rationales or reasoning is missing | |
| if not heuristic_rationale or not fewshot_rationale or not reasoning: | |
| confidence = None | |
| else: | |
| # Get confidence score | |
| confidence = compute_confidence( | |
| heuristic_noteworthy, | |
| fewshot_noteworthy, | |
| noteworthy, | |
| ) | |
| return noteworthy, noteworthy_text, reasoning, confidence | |
| def find_interesting_example(number_behind: int, units_behind: str): | |
| """ | |
| Find an interesting example by repeatedly getting random pages and running the model | |
| until we find one with a confidence score that is not High, up to 20 tries. | |
| """ | |
| max_tries = 20 | |
| for attempt in range(max_tries): | |
| # Get random page title | |
| page_title = get_random_wikipedia_title() | |
| if not page_title: | |
| continue | |
| gr.Info(f"Page {attempt + 1}: {page_title}", duration=20) | |
| try: | |
| # Initialize Logfire span | |
| span_name = f"{page_title} - {number_behind} {units_behind}" | |
| with logfire.span(span_name): | |
| # Fetch current revision | |
| new_revision, new_timestamp = _fetch_current_revision(page_title) | |
| if not new_revision: | |
| continue | |
| # Fetch previous revision | |
| old_revision, old_timestamp = _fetch_previous_revision( | |
| page_title, number_behind, units_behind, new_revision | |
| ) | |
| if not old_revision: | |
| continue | |
| # Run heuristic classifier | |
| heuristic_noteworthy, heuristic_rationale = _run_heuristic_classifier( | |
| old_revision, new_revision | |
| ) | |
| if heuristic_rationale is None: | |
| continue | |
| # Run few-shot classifier | |
| fewshot_noteworthy, fewshot_rationale = _run_fewshot_classifier( | |
| old_revision, new_revision | |
| ) | |
| if fewshot_rationale is None: | |
| continue | |
| # Run judge | |
| judge_noteworthy, noteworthy_text, judge_reasoning, confidence_score = ( | |
| _run_judge( | |
| old_revision, | |
| new_revision, | |
| heuristic_noteworthy, | |
| fewshot_noteworthy, | |
| heuristic_rationale, | |
| fewshot_rationale, | |
| ) | |
| ) | |
| # Check if confidence score is not High | |
| if confidence_score and confidence_score != "High": | |
| # Found an interesting example | |
| gr.Success( | |
| f"Interesting example (page {attempt + 1}) - ready for your feedback", | |
| duration=None, | |
| ) | |
| return ( | |
| page_title, | |
| new_revision, | |
| new_timestamp, | |
| old_revision, | |
| old_timestamp, | |
| heuristic_noteworthy, | |
| fewshot_noteworthy, | |
| judge_noteworthy, | |
| heuristic_rationale, | |
| fewshot_rationale, | |
| judge_reasoning, | |
| noteworthy_text, | |
| confidence_score, | |
| ) | |
| except Exception: | |
| # If there's an error, continue to next attempt | |
| continue | |
| # If we get here, all 20 tries had High confidence | |
| gr.Warning("No interesting examples found - try again", duration=None) | |
| # Return empty values | |
| return ( | |
| "", | |
| "", | |
| "", | |
| "", | |
| "", | |
| None, | |
| None, | |
| None, | |
| "", | |
| "", | |
| "", | |
| "", | |
| "", | |
| ) | |