Spaces:

jedick
/

noteworthy-differences

Sleeping

File size: 12,007 Bytes

from wiki_data_fetcher import (
    get_previous_revisions,
    get_revision_from_age,
    get_wikipedia_introduction,
    extract_revision_info,
    get_revisions_behind,
    get_random_wikipedia_title,
)
from models import classifier, judge
import gradio as gr
import logfire


@logfire.instrument("Fetch current revision")
def _fetch_current_revision(title: str):
    """
    Fetch current revision of a Wikipedia article and return its introduction.

    Args:
        title: Wikipedia article title

    Returns:
        Tuple of (introduction, timestamp)
    """
    if not title or not title.strip():
        error_msg = "Please enter a Wikipedia page title."
        raise gr.Error(error_msg, print_exception=False)
        return None, None

    try:
        # Get current revision (revision 0)
        json_data = get_previous_revisions(title, revisions=0)
        revision_info = extract_revision_info(json_data, revnum=0)

        if not revision_info.get("revid"):
            error_msg = f"Error: Could not find Wikipedia page '{title}'. Please check the title."
            raise gr.Error(error_msg, print_exception=False)
            return None, None

        revid = revision_info["revid"]
        timestamp = revision_info["timestamp"]

        # Get introduction
        introduction = get_wikipedia_introduction(revid)

        if introduction is None:
            introduction = f"Error: Could not retrieve introduction for current revision (revid: {revid})"

        # Format timestamp for display
        timestamp = f"**Timestamp:** {timestamp}" if timestamp else ""

        # Return introduction text and timestamp
        return introduction, timestamp

    except Exception as e:
        error_msg = f"Error occurred: {str(e)}"
        raise gr.Error(error_msg, print_exception=False)
        return None, None


@logfire.instrument("Fetch previous revision")
def _fetch_previous_revision(title: str, number: int, units: str, new_revision: str):
    """
    Fetch previous revision of a Wikipedia article and return its introduction.

    Args:
        title: Wikipedia article title
        number: Number of revisions or days behind
        units: "revisions" or "days"

    Returns:
        Tuple of (introduction, timestamp)
    """

    # If we get here with an empty new revision, then an error should have been raised
    # in fetch_current_revision, so just return empty values without raising another error
    if not new_revision:
        return None, None

    try:
        # Get previous revision based on units
        if units == "revisions":
            json_data = get_previous_revisions(title, revisions=number)
            revision_info = extract_revision_info(json_data, revnum=number)
        else:  # units == "days"
            revision_info = get_revision_from_age(title, age_days=number)

        if not revision_info.get("revid"):
            error_msg = f"Error: Could not find revision {number} {'revisions' if units == 'revisions' else 'days'} behind for '{title}'."
            raise gr.Error(error_msg, print_exception=False)
            return None, None

        revid = revision_info["revid"]
        timestamp = revision_info["timestamp"]

        # Get introduction
        introduction = get_wikipedia_introduction(revid)

        if introduction is None:
            introduction = f"Error: Could not retrieve introduction for previous revision (revid: {revid})"

        # Get revisions_behind
        if units == "revisions":
            revisions_behind = revision_info["revnum"]
        else:
            revisions_behind = get_revisions_behind(title, revid)
            # For a negative number, replace the negative sign with ">"
            if revisions_behind < 0:
                revisions_behind = str(revisions_behind).replace("-", ">")

        # Format timestamp for display
        timestamp = (
            f"**Timestamp:** {timestamp}, {revisions_behind} revisions behind"
            if timestamp
            else ""
        )

        # Return introduction text and timestamp
        return introduction, timestamp

    except Exception as e:
        error_msg = f"Error occurred: {str(e)}"
        raise gr.Error(error_msg, print_exception=False)
        return None, None


def run_classifier(old_revision: str, new_revision: str, prompt_style: str):
    """
    Run a classification model on the revisions.

    Args:
        old_revision: Old revision text
        new_revision: New revision text
        prompt_style: heuristic or few-shot

    Returns:
        Tuple of (noteworthy, rationale) (bool, str)
    """

    # Values to return if there is an error
    noteworthy, rationale = None, None
    if not old_revision or not new_revision:
        return noteworthy, rationale

    try:
        # Run classifier model
        result = classifier(old_revision, new_revision, prompt_style=prompt_style)
        if result:
            noteworthy = result.get("noteworthy", None)
            rationale = result.get("rationale", "")
        else:
            error_msg = f"Error: Could not get {prompt_style} model result"
            raise gr.Error(error_msg, print_exception=False)

    except Exception as e:
        error_msg = f"Error running model: {str(e)}"
        raise gr.Error(error_msg, print_exception=False)

    return noteworthy, rationale


@logfire.instrument("Run heuristic classifier")
def _run_heuristic_classifier(old_revision: str, new_revision: str):
    return run_classifier(old_revision, new_revision, prompt_style="heuristic")


@logfire.instrument("Run few-shot classifier")
def _run_fewshot_classifier(old_revision: str, new_revision: str):
    return run_classifier(old_revision, new_revision, prompt_style="few-shot")


def compute_confidence(
    heuristic_noteworthy,
    fewshot_noteworthy,
    judge_noteworthy,
):
    """
    Compute a confidence label using the noteworthy booleans.
    """
    if heuristic_noteworthy == fewshot_noteworthy == judge_noteworthy:
        # Classifiers and judge all agree
        return "High"
    elif heuristic_noteworthy != fewshot_noteworthy:
        # Classifiers disagree, judge decides
        return "Moderate"
    else:
        # Classifiers agree, judge vetoes
        return "Questionable"


@logfire.instrument("Run judge")
def _run_judge(
    old_revision: str,
    new_revision: str,
    heuristic_noteworthy: bool,
    fewshot_noteworthy: bool,
    heuristic_rationale: str,
    fewshot_rationale: str,
):
    """
    Run judge on the revisions and classifiers' rationales.

    Args:
        old_revision: Old revision text
        new_revision: New revision text
        heuristic_noteworthy: Heuristic model's noteworthiness prediction
        fewshot_noteworthy: Few-shot model's noteworthiness prediction
        heuristic_rationale: Heuristic model's rationale
        fewshot_rationale: Few-shot model's rationale

    Returns:
        Tuple of (noteworthy, noteworthy_text, reasoning, confidence) (bool, str, str, str)
    """

    # Values to return if there is an error
    noteworthy, noteworthy_text, reasoning, confidence = None, None, None, None
    if (
        not old_revision
        or not new_revision
        or not heuristic_rationale
        or not fewshot_rationale
    ):
        return noteworthy, noteworthy_text, reasoning, confidence

    try:
        # Run judge
        result = judge(
            old_revision,
            new_revision,
            heuristic_rationale,
            fewshot_rationale,
            mode="aligned-heuristic",
        )
        if result:
            noteworthy = result.get("noteworthy", "")
            reasoning = result.get("reasoning", "")
        else:
            error_msg = f"Error: Could not get judge's result"
            raise gr.Error(error_msg, print_exception=False)

    except Exception as e:
        error_msg = f"Error running judge: {str(e)}"
        raise gr.Error(error_msg, print_exception=False)

    # Format noteworthy label (boolean) as text
    if not reasoning:
        noteworthy_text = None
    else:
        noteworthy_text = str(noteworthy)

    # Return no confidence score if any of the rationales or reasoning is missing
    if not heuristic_rationale or not fewshot_rationale or not reasoning:
        confidence = None
    else:
        # Get confidence score
        confidence = compute_confidence(
            heuristic_noteworthy,
            fewshot_noteworthy,
            noteworthy,
        )

    return noteworthy, noteworthy_text, reasoning, confidence


@logfire.instrument("🎲 Special Random")
def find_interesting_example(number_behind: int, units_behind: str):
    """
    Find an interesting example by repeatedly getting random pages and running the model
    until we find one with a confidence score that is not High, up to 20 tries.
    """
    max_tries = 20

    for attempt in range(max_tries):
        # Get random page title
        page_title = get_random_wikipedia_title()
        if not page_title:
            continue

        gr.Info(f"Page {attempt + 1}: {page_title}", duration=20)

        try:
            # Initialize Logfire span
            span_name = f"{page_title} - {number_behind} {units_behind}"
            with logfire.span(span_name):

                # Fetch current revision
                new_revision, new_timestamp = _fetch_current_revision(page_title)
                if not new_revision:
                    continue

                # Fetch previous revision
                old_revision, old_timestamp = _fetch_previous_revision(
                    page_title, number_behind, units_behind, new_revision
                )
                if not old_revision:
                    continue

                # Run heuristic classifier
                heuristic_noteworthy, heuristic_rationale = _run_heuristic_classifier(
                    old_revision, new_revision
                )
                if heuristic_rationale is None:
                    continue

                # Run few-shot classifier
                fewshot_noteworthy, fewshot_rationale = _run_fewshot_classifier(
                    old_revision, new_revision
                )
                if fewshot_rationale is None:
                    continue

                # Run judge
                judge_noteworthy, noteworthy_text, judge_reasoning, confidence_score = (
                    _run_judge(
                        old_revision,
                        new_revision,
                        heuristic_noteworthy,
                        fewshot_noteworthy,
                        heuristic_rationale,
                        fewshot_rationale,
                    )
                )

            # Check if confidence score is not High
            if confidence_score and confidence_score != "High":
                # Found an interesting example
                gr.Success(
                    f"Interesting example (page {attempt + 1}) - ready for your feedback",
                    duration=None,
                )
                return (
                    page_title,
                    new_revision,
                    new_timestamp,
                    old_revision,
                    old_timestamp,
                    heuristic_noteworthy,
                    fewshot_noteworthy,
                    judge_noteworthy,
                    heuristic_rationale,
                    fewshot_rationale,
                    judge_reasoning,
                    noteworthy_text,
                    confidence_score,
                )

        except Exception:
            # If there's an error, continue to next attempt
            continue

    # If we get here, all 20 tries had High confidence
    gr.Warning("No interesting examples found - try again", duration=None)
    # Return empty values
    return (
        "",
        "",
        "",
        "",
        "",
        None,
        None,
        None,
        "",
        "",
        "",
        "",
        "",
    )