Spaces:

jedick
/

noteworthy-differences

Sleeping

App Files Files Community

jedick commited on Dec 18, 2025

Commit

956820f

1 Parent(s): ff43104

Create app_functions.py to hold main app functions

Browse files

Files changed (3) hide show

app.py +55 -292
app_functions.py +272 -0
feedback.py +5 -1

app.py CHANGED Viewed

@@ -1,12 +1,5 @@
 import gradio as gr
-from wiki_data_fetcher import (
-    get_previous_revisions,
-    get_revision_from_age,
-    get_wikipedia_introduction,
-    extract_revision_info,
-    get_revisions_behind,
-    get_random_wikipedia_title,
-)
 from feedback import save_feedback_agree, save_feedback_disagree
 from contextlib import nullcontext
 from dotenv import load_dotenv
@@ -18,9 +11,15 @@ load_dotenv()
 # Setup logging with Logfire
 logfire.configure()
-# Import this after logfire.configure() so we don't get
 # LogfireNotConfiguredWarning: Instrumentation will have no effect
-from models import classifier, judge
 def start_parent_span(title: str, number: int, units: str):
@@ -38,59 +37,12 @@ def start_parent_span(title: str, number: int, units: str):
 def fetch_current_revision(title: str, context=None):
     """
     Wrapper to run _fetch_current_revision in provided Logfire context.
-    We use this to minimize indentation in the wrapped function.
     """
     with logfire.attach_context(context) if context else nullcontext():
         return _fetch_current_revision(title)
-@logfire.instrument("Fetch current revision")
-def _fetch_current_revision(title: str):
-    """
-    Fetch current revision of a Wikipedia article and return its introduction.
-    Args:
-        title: Wikipedia article title
-    Returns:
-        Tuple of (introduction, timestamp)
-    """
-    if not title or not title.strip():
-        error_msg = "Please enter a Wikipedia page title."
-        raise gr.Error(error_msg, print_exception=False)
-        return None, None
-    try:
-        # Get current revision (revision 0)
-        json_data = get_previous_revisions(title, revisions=0)
-        revision_info = extract_revision_info(json_data, revnum=0)
-        if not revision_info.get("revid"):
-            error_msg = f"Error: Could not find Wikipedia page '{title}'. Please check the title."
-            raise gr.Error(error_msg, print_exception=False)
-            return None, None
-        revid = revision_info["revid"]
-        timestamp = revision_info["timestamp"]
-        # Get introduction
-        introduction = get_wikipedia_introduction(revid)
-        if introduction is None:
-            introduction = f"Error: Could not retrieve introduction for current revision (revid: {revid})"
-        # Format timestamp for display
-        timestamp = f"**Timestamp:** {timestamp}" if timestamp else ""
-        # Return introduction text and timestamp
-        return introduction, timestamp
-    except Exception as e:
-        error_msg = f"Error occurred: {str(e)}"
-        raise gr.Error(error_msg, print_exception=False)
-        return None, None
 def fetch_previous_revision(
     title: str, number: int, units: str, new_revision: str, context=None
 ):
@@ -98,152 +50,16 @@ def fetch_previous_revision(
         return _fetch_previous_revision(title, number, units, new_revision)
-@logfire.instrument("Fetch previous revision")
-def _fetch_previous_revision(title: str, number: int, units: str, new_revision: str):
-    """
-    Fetch previous revision of a Wikipedia article and return its introduction.
-    Args:
-        title: Wikipedia article title
-        number: Number of revisions or days behind
-        units: "revisions" or "days"
-    Returns:
-        Tuple of (introduction, timestamp)
-    """
-    # If we get here with an empty new revision, then an error should have been raised
-    # in fetch_current_revision, so just return empty values without raising another error
-    if not new_revision:
-        return None, None
-    try:
-        # Get previous revision based on units
-        if units == "revisions":
-            json_data = get_previous_revisions(title, revisions=number)
-            revision_info = extract_revision_info(json_data, revnum=number)
-        else:  # units == "days"
-            revision_info = get_revision_from_age(title, age_days=number)
-        if not revision_info.get("revid"):
-            error_msg = f"Error: Could not find revision {number} {'revisions' if units == 'revisions' else 'days'} behind for '{title}'."
-            raise gr.Error(error_msg, print_exception=False)
-            return None, None
-        revid = revision_info["revid"]
-        timestamp = revision_info["timestamp"]
-        # Get introduction
-        introduction = get_wikipedia_introduction(revid)
-        if introduction is None:
-            introduction = f"Error: Could not retrieve introduction for previous revision (revid: {revid})"
-        # Get revisions_behind
-        if units == "revisions":
-            revisions_behind = revision_info["revnum"]
-        else:
-            revisions_behind = get_revisions_behind(title, revid)
-            # For a negative number, replace the negative sign with ">"
-            if revisions_behind < 0:
-                revisions_behind = str(revisions_behind).replace("-", ">")
-        # Format timestamp for display
-        timestamp = (
-            f"**Timestamp:** {timestamp}, {revisions_behind} revisions behind"
-            if timestamp
-            else ""
-        )
-        # Return introduction text and timestamp
-        return introduction, timestamp
-    except Exception as e:
-        error_msg = f"Error occurred: {str(e)}"
-        raise gr.Error(error_msg, print_exception=False)
-        return None, None
-def run_classifier(old_revision: str, new_revision: str, prompt_style: str):
-    """
-    Run a classification model on the revisions.
-    Args:
-        old_revision: Old revision text
-        new_revision: New revision text
-        prompt_style: heuristic or few-shot
-    Returns:
-        Tuple of (noteworthy, rationale) (bool, str)
-    """
-    # Values to return if there is an error
-    noteworthy, rationale = None, None
-    if not old_revision or not new_revision:
-        return noteworthy, rationale
-    try:
-        # Run classifier model
-        result = classifier(old_revision, new_revision, prompt_style=prompt_style)
-        if result:
-            noteworthy = result.get("noteworthy", None)
-            rationale = result.get("rationale", "")
-        else:
-            error_msg = f"Error: Could not get {prompt_style} model result"
-            raise gr.Error(error_msg, print_exception=False)
-    except Exception as e:
-        error_msg = f"Error running model: {str(e)}"
-        raise gr.Error(error_msg, print_exception=False)
-    return noteworthy, rationale
 def run_heuristic_classifier(old_revision: str, new_revision: str, context=None):
     with logfire.attach_context(context) if context else nullcontext():
         return _run_heuristic_classifier(old_revision, new_revision)
-@logfire.instrument("Run heuristic classifier")
-def _run_heuristic_classifier(old_revision: str, new_revision: str):
-    return run_classifier(old_revision, new_revision, prompt_style="heuristic")
 def run_fewshot_classifier(old_revision: str, new_revision: str, context=None):
     with logfire.attach_context(context) if context else nullcontext():
         return _run_fewshot_classifier(old_revision, new_revision)
-@logfire.instrument("Run few-shot classifier")
-def _run_fewshot_classifier(old_revision: str, new_revision: str):
-    return run_classifier(old_revision, new_revision, prompt_style="few-shot")
-def compute_confidence(
-    heuristic_noteworthy,
-    fewshot_noteworthy,
-    judge_noteworthy,
-    heuristic_rationale,
-    fewshot_rationale,
-    judge_reasoning,
-):
-    """
-    Compute a confidence label using the noteworthy booleans.
-    """
-    # Return None if any of the rationales or reasoning is missing.
-    if not heuristic_rationale or not fewshot_rationale or not judge_reasoning:
-        return None
-    if heuristic_noteworthy == fewshot_noteworthy == judge_noteworthy:
-        # Classifiers and judge all agree
-        return "High"
-    elif heuristic_noteworthy != fewshot_noteworthy:
-        # Classifiers disagree, judge decides
-        return "Moderate"
-    else:
-        # Classifiers agree, judge vetoes
-        return "Questionable"
 def run_judge(
     old_revision: str,
     new_revision: str,
@@ -266,89 +82,34 @@ def run_judge(
         )
-@logfire.instrument("Run judge")
-def _run_judge(
-    old_revision: str,
-    new_revision: str,
-    heuristic_noteworthy: bool,
-    fewshot_noteworthy: bool,
-    heuristic_rationale: str,
-    fewshot_rationale: str,
-    judge_mode: str,
-):
-    """
-    Run judge on the revisions and classifiers' rationales.
-    Args:
-        old_revision: Old revision text
-        new_revision: New revision text
-        heuristic_rationale: Heuristic model's rationale
-        fewshot_rationale: Few-shot model's rationale
-        judge_mode: Mode for judge function ("unaligned", "aligned-fewshot", "aligned-heuristic")
-    Returns:
-        Tuple of (noteworthy, noteworthy_text, reasoning, confidence) (bool, str, str, str)
-    """
-    # Values to return if there is an error
-    noteworthy, noteworthy_text, reasoning, confidence = None, None, None, None
-    if (
-        not old_revision
-        or not new_revision
-        or not heuristic_rationale
-        or not fewshot_rationale
-    ):
-        return noteworthy, noteworthy_text, reasoning, confidence
-    try:
-        # Run judge
-        result = judge(
-            old_revision,
-            new_revision,
-            heuristic_rationale,
-            fewshot_rationale,
-            mode=judge_mode,
-        )
-        if result:
-            noteworthy = result.get("noteworthy", "")
-            reasoning = result.get("reasoning", "")
-        else:
-            error_msg = f"Error: Could not get judge's result"
-            raise gr.Error(error_msg, print_exception=False)
-    except Exception as e:
-        error_msg = f"Error running judge: {str(e)}"
-        raise gr.Error(error_msg, print_exception=False)
-    # Format noteworthy label (boolean) as text
-    if not reasoning:
-        noteworthy_text = None
-    else:
-        noteworthy_text = str(noteworthy)
-    # Get confidence score
-    confidence = compute_confidence(
-        heuristic_noteworthy,
-        fewshot_noteworthy,
-        noteworthy,
-        heuristic_rationale,
-        fewshot_rationale,
-        reasoning,
-    )
-    return noteworthy, noteworthy_text, reasoning, confidence
 # Create Gradio interface
 with gr.Blocks(title="Noteworthy Differences") as demo:
     with gr.Row():
         gr.Markdown(
             """
-        Compare current and old revisions of a Wikipedia article - you choose the number of revisions or days behind.<br>
-        Two classifier models (with heuristic and few-shot prompts) and a judge predict the noteworthiness of the differences.<br>
-        The judge was aligned with human preferences as described in the
-        [GitHub repository](https://github.com/jedick/noteworthy-differences).
-        """
         )
     with gr.Row():
@@ -357,9 +118,9 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
         )
         number_input = gr.Number(label="Number", value=50, minimum=0, precision=0)
         units_dropdown = gr.Dropdown(
-            choices=["revisions", "days"], value="revisions", label="Unit"
         )
-        judge_mode_dropdown = gr.Dropdown(
             choices=["unaligned", "aligned-fewshot", "aligned-heuristic"],
             value="aligned-heuristic",
             label="Judge Mode",
@@ -397,17 +158,17 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
         with gr.Column():
             gr.Markdown("### Model Output")
             heuristic_rationale = gr.Textbox(
-                label="Heuristic Model's Rationale",
                 lines=2,
                 max_lines=7,
             )
             fewshot_rationale = gr.Textbox(
-                label="Few-shot Model's Rationale",
                 lines=2,
                 max_lines=7,
             )
             judge_reasoning = gr.Textbox(
-                label="Judge's Reasoning",
                 lines=2,
                 max_lines=7,
             )
@@ -424,16 +185,10 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
                 )
             rerun_btn = gr.Button("Rerun Model")
-            gr.Markdown("### Your feedback")
-            feedback_status = gr.Textbox(
-                label="",
-                lines=1,
-                interactive=False,
-                visible=True,
-            )
             with gr.Row():
                 thumbs_up_btn = gr.Button("👍 Agree", variant="primary")
-                thumbs_down_btn = gr.Button("👎 Disagree", variant="secondary")
     # States to store boolean values
     heuristic_noteworthy = gr.State()
@@ -491,7 +246,7 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
             fewshot_noteworthy,
             heuristic_rationale,
             fewshot_rationale,
-            judge_mode_dropdown,
             context,
         ],
         outputs=[judge_noteworthy, noteworthy_text, judge_reasoning, confidence],
@@ -519,7 +274,7 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
             fewshot_noteworthy,
             heuristic_rationale,
             fewshot_rationale,
-            judge_mode_dropdown,
             context,
         ],
         outputs=[judge_noteworthy, noteworthy_text, judge_reasoning, confidence],
@@ -533,7 +288,7 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
             title_input,
             number_input,
             units_dropdown,
-            judge_mode_dropdown,
             old_revision,
             new_revision,
             old_timestamp,
@@ -547,7 +302,6 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
             fewshot_noteworthy,
             judge_noteworthy,
         ],
-        outputs=[feedback_status],
         api_name=False,
     )
@@ -557,7 +311,7 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
             title_input,
             number_input,
             units_dropdown,
-            judge_mode_dropdown,
             old_revision,
             new_revision,
             old_timestamp,
@@ -571,7 +325,6 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
             fewshot_noteworthy,
             judge_noteworthy,
         ],
-        outputs=[feedback_status],
         api_name=False,
     )
@@ -580,5 +333,15 @@ if __name__ == "__main__":
     # Setup theme without background image
     theme = gr.Theme.from_hub("NoCrypt/miku")
     theme.set(body_background_fill="#FFFFFF", body_background_fill_dark="#000000")
-    demo.launch(theme=theme)

 import gradio as gr
+from wiki_data_fetcher import get_random_wikipedia_title
 from feedback import save_feedback_agree, save_feedback_disagree
 from contextlib import nullcontext
 from dotenv import load_dotenv
 # Setup logging with Logfire
 logfire.configure()
+# This goes after logfire.configure() to avoid
 # LogfireNotConfiguredWarning: Instrumentation will have no effect
+from app_functions import (
+    _fetch_current_revision,
+    _fetch_previous_revision,
+    _run_heuristic_classifier,
+    _run_fewshot_classifier,
+    _run_judge,
+)
 def start_parent_span(title: str, number: int, units: str):
 def fetch_current_revision(title: str, context=None):
     """
     Wrapper to run _fetch_current_revision in provided Logfire context.
+    We use a wrapper to minimize indentation in the called function.
     """
     with logfire.attach_context(context) if context else nullcontext():
         return _fetch_current_revision(title)
 def fetch_previous_revision(
     title: str, number: int, units: str, new_revision: str, context=None
 ):
         return _fetch_previous_revision(title, number, units, new_revision)
 def run_heuristic_classifier(old_revision: str, new_revision: str, context=None):
     with logfire.attach_context(context) if context else nullcontext():
         return _run_heuristic_classifier(old_revision, new_revision)
 def run_fewshot_classifier(old_revision: str, new_revision: str, context=None):
     with logfire.attach_context(context) if context else nullcontext():
         return _run_fewshot_classifier(old_revision, new_revision)
 def run_judge(
     old_revision: str,
     new_revision: str,
         )
 # Create Gradio interface
 with gr.Blocks(title="Noteworthy Differences") as demo:
     with gr.Row():
         gr.Markdown(
             """
+        <table>
+          <colgroup>
+            <col span="1" style="width: 30%;">
+            <col span="1" style="width: 25%;">
+            <col span="1" style="width: 45%;">
+          </colgroup>
+          <tr>
+            <td>
+              <i class="fa-brands fa-wikipedia-w"></i> Compare current and old revisions of a Wikipedia article.<br>
+              📅 You choose the number of revisions or days behind.
+            </td>
+            <td>
+              ◇ ∴ ⚖ Two classifier models and a judge predict the noteworthiness of the differences.
+            </td>
+            <td>
+              <i class="fa-brands fa-github"></i> The <a href="https://github.com/jedick/noteworthy-differences">GitHub repository</a> describes how the judge was aligned with human preferences.<br>
+              👥 The <a href="https://huggingface.co/datasets/jedick/noteworthy-differences-feedback">feedback dataset</a> holds all user feedback collected to date.
+              </td>
+          </tr>
+        </table>
+        """,
+            elem_id="intro_table",
         )
     with gr.Row():
         )
         number_input = gr.Number(label="Number", value=50, minimum=0, precision=0)
         units_dropdown = gr.Dropdown(
+            choices=["revisions", "days"], value="revisions", label="Units"
         )
+        judge_mode = gr.Dropdown(
             choices=["unaligned", "aligned-fewshot", "aligned-heuristic"],
             value="aligned-heuristic",
             label="Judge Mode",
         with gr.Column():
             gr.Markdown("### Model Output")
             heuristic_rationale = gr.Textbox(
+                label="◇ Heuristic Model's Rationale",
                 lines=2,
                 max_lines=7,
             )
             fewshot_rationale = gr.Textbox(
+                label="∴ Few-shot Model's Rationale",
                 lines=2,
                 max_lines=7,
             )
             judge_reasoning = gr.Textbox(
+                label="⚖ Judge's Reasoning",
                 lines=2,
                 max_lines=7,
             )
                 )
             rerun_btn = gr.Button("Rerun Model")
+            gr.Markdown("### 👥 Your feedback")
             with gr.Row():
                 thumbs_up_btn = gr.Button("👍 Agree", variant="primary")
+                thumbs_down_btn = gr.Button("👎 Disagree", variant="primary")
     # States to store boolean values
     heuristic_noteworthy = gr.State()
             fewshot_noteworthy,
             heuristic_rationale,
             fewshot_rationale,
+            judge_mode,
             context,
         ],
         outputs=[judge_noteworthy, noteworthy_text, judge_reasoning, confidence],
             fewshot_noteworthy,
             heuristic_rationale,
             fewshot_rationale,
+            judge_mode,
             context,
         ],
         outputs=[judge_noteworthy, noteworthy_text, judge_reasoning, confidence],
             title_input,
             number_input,
             units_dropdown,
+            judge_mode,
             old_revision,
             new_revision,
             old_timestamp,
             fewshot_noteworthy,
             judge_noteworthy,
         ],
         api_name=False,
     )
             title_input,
             number_input,
             units_dropdown,
+            judge_mode,
             old_revision,
             new_revision,
             old_timestamp,
             fewshot_noteworthy,
             judge_noteworthy,
         ],
         api_name=False,
     )
     # Setup theme without background image
     theme = gr.Theme.from_hub("NoCrypt/miku")
     theme.set(body_background_fill="#FFFFFF", body_background_fill_dark="#000000")
+    # Define the HTML for Font Awesome
+    head = '<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css" rel="stylesheet">'
+    # Use CSS to style table
+    css = """
+    #intro_table {background-color: #ecfdf5}
+    table, tr, td {
+        border: none; /* Removes all borders */
+        border-collapse: collapse; /* Ensures no gaps between cells */
+    }
+    """
+    demo.launch(theme=theme, head=head, css=css)

app_functions.py ADDED Viewed

	@@ -0,0 +1,272 @@

+from wiki_data_fetcher import (
+    get_previous_revisions,
+    get_revision_from_age,
+    get_wikipedia_introduction,
+    extract_revision_info,
+    get_revisions_behind,
+)
+from models import classifier, judge
+import gradio as gr
+import logfire
+@logfire.instrument("Fetch current revision")
+def _fetch_current_revision(title: str):
+    """
+    Fetch current revision of a Wikipedia article and return its introduction.
+    Args:
+        title: Wikipedia article title
+    Returns:
+        Tuple of (introduction, timestamp)
+    """
+    if not title or not title.strip():
+        error_msg = "Please enter a Wikipedia page title."
+        raise gr.Error(error_msg, print_exception=False)
+        return None, None
+    try:
+        # Get current revision (revision 0)
+        json_data = get_previous_revisions(title, revisions=0)
+        revision_info = extract_revision_info(json_data, revnum=0)
+        if not revision_info.get("revid"):
+            error_msg = f"Error: Could not find Wikipedia page '{title}'. Please check the title."
+            raise gr.Error(error_msg, print_exception=False)
+            return None, None
+        revid = revision_info["revid"]
+        timestamp = revision_info["timestamp"]
+        # Get introduction
+        introduction = get_wikipedia_introduction(revid)
+        if introduction is None:
+            introduction = f"Error: Could not retrieve introduction for current revision (revid: {revid})"
+        # Format timestamp for display
+        timestamp = f"**Timestamp:** {timestamp}" if timestamp else ""
+        # Return introduction text and timestamp
+        return introduction, timestamp
+    except Exception as e:
+        error_msg = f"Error occurred: {str(e)}"
+        raise gr.Error(error_msg, print_exception=False)
+        return None, None
+@logfire.instrument("Fetch previous revision")
+def _fetch_previous_revision(title: str, number: int, units: str, new_revision: str):
+    """
+    Fetch previous revision of a Wikipedia article and return its introduction.
+    Args:
+        title: Wikipedia article title
+        number: Number of revisions or days behind
+        units: "revisions" or "days"
+    Returns:
+        Tuple of (introduction, timestamp)
+    """
+    # If we get here with an empty new revision, then an error should have been raised
+    # in fetch_current_revision, so just return empty values without raising another error
+    if not new_revision:
+        return None, None
+    try:
+        # Get previous revision based on units
+        if units == "revisions":
+            json_data = get_previous_revisions(title, revisions=number)
+            revision_info = extract_revision_info(json_data, revnum=number)
+        else:  # units == "days"
+            revision_info = get_revision_from_age(title, age_days=number)
+        if not revision_info.get("revid"):
+            error_msg = f"Error: Could not find revision {number} {'revisions' if units == 'revisions' else 'days'} behind for '{title}'."
+            raise gr.Error(error_msg, print_exception=False)
+            return None, None
+        revid = revision_info["revid"]
+        timestamp = revision_info["timestamp"]
+        # Get introduction
+        introduction = get_wikipedia_introduction(revid)
+        if introduction is None:
+            introduction = f"Error: Could not retrieve introduction for previous revision (revid: {revid})"
+        # Get revisions_behind
+        if units == "revisions":
+            revisions_behind = revision_info["revnum"]
+        else:
+            revisions_behind = get_revisions_behind(title, revid)
+            # For a negative number, replace the negative sign with ">"
+            if revisions_behind < 0:
+                revisions_behind = str(revisions_behind).replace("-", ">")
+        # Format timestamp for display
+        timestamp = (
+            f"**Timestamp:** {timestamp}, {revisions_behind} revisions behind"
+            if timestamp
+            else ""
+        )
+        # Return introduction text and timestamp
+        return introduction, timestamp
+    except Exception as e:
+        error_msg = f"Error occurred: {str(e)}"
+        raise gr.Error(error_msg, print_exception=False)
+        return None, None
+def run_classifier(old_revision: str, new_revision: str, prompt_style: str):
+    """
+    Run a classification model on the revisions.
+    Args:
+        old_revision: Old revision text
+        new_revision: New revision text
+        prompt_style: heuristic or few-shot
+    Returns:
+        Tuple of (noteworthy, rationale) (bool, str)
+    """
+    # Values to return if there is an error
+    noteworthy, rationale = None, None
+    if not old_revision or not new_revision:
+        return noteworthy, rationale
+    try:
+        # Run classifier model
+        result = classifier(old_revision, new_revision, prompt_style=prompt_style)
+        if result:
+            noteworthy = result.get("noteworthy", None)
+            rationale = result.get("rationale", "")
+        else:
+            error_msg = f"Error: Could not get {prompt_style} model result"
+            raise gr.Error(error_msg, print_exception=False)
+    except Exception as e:
+        error_msg = f"Error running model: {str(e)}"
+        raise gr.Error(error_msg, print_exception=False)
+    return noteworthy, rationale
+@logfire.instrument("Run heuristic classifier")
+def _run_heuristic_classifier(old_revision: str, new_revision: str):
+    return run_classifier(old_revision, new_revision, prompt_style="heuristic")
+@logfire.instrument("Run few-shot classifier")
+def _run_fewshot_classifier(old_revision: str, new_revision: str):
+    return run_classifier(old_revision, new_revision, prompt_style="few-shot")
+def compute_confidence(
+    heuristic_noteworthy,
+    fewshot_noteworthy,
+    judge_noteworthy,
+    heuristic_rationale,
+    fewshot_rationale,
+    judge_reasoning,
+):
+    """
+    Compute a confidence label using the noteworthy booleans.
+    """
+    # Return None if any of the rationales or reasoning is missing.
+    if not heuristic_rationale or not fewshot_rationale or not judge_reasoning:
+        return None
+    if heuristic_noteworthy == fewshot_noteworthy == judge_noteworthy:
+        # Classifiers and judge all agree
+        return "High"
+    elif heuristic_noteworthy != fewshot_noteworthy:
+        # Classifiers disagree, judge decides
+        return "Moderate"
+    else:
+        # Classifiers agree, judge vetoes
+        return "Questionable"
+@logfire.instrument("Run judge")
+def _run_judge(
+    old_revision: str,
+    new_revision: str,
+    heuristic_noteworthy: bool,
+    fewshot_noteworthy: bool,
+    heuristic_rationale: str,
+    fewshot_rationale: str,
+    judge_mode: str,
+):
+    """
+    Run judge on the revisions and classifiers' rationales.
+    Args:
+        old_revision: Old revision text
+        new_revision: New revision text
+        heuristic_noteworthy: Heuristic model's noteworthiness prediction
+        fewshot_noteworthy: Few-shot model's noteworthiness prediction
+        heuristic_rationale: Heuristic model's rationale
+        fewshot_rationale: Few-shot model's rationale
+        judge_mode: Mode for judge function ("unaligned", "aligned-fewshot", "aligned-heuristic")
+    Returns:
+        Tuple of (noteworthy, noteworthy_text, reasoning, confidence) (bool, str, str, str)
+    """
+    print(f"old_revision: {old_revision}")
+    print(f"new_revision: {new_revision}")
+    print(f"judge_mode: {judge_mode}")
+    # Values to return if there is an error
+    noteworthy, noteworthy_text, reasoning, confidence = None, None, None, None
+    if (
+        not old_revision
+        or not new_revision
+        or not heuristic_rationale
+        or not fewshot_rationale
+    ):
+        return noteworthy, noteworthy_text, reasoning, confidence
+    try:
+        # Run judge
+        result = judge(
+            old_revision,
+            new_revision,
+            heuristic_rationale,
+            fewshot_rationale,
+            mode=judge_mode,
+        )
+        if result:
+            noteworthy = result.get("noteworthy", "")
+            reasoning = result.get("reasoning", "")
+        else:
+            error_msg = f"Error: Could not get judge's result"
+            raise gr.Error(error_msg, print_exception=False)
+    except Exception as e:
+        error_msg = f"Error running judge: {str(e)}"
+        raise gr.Error(error_msg, print_exception=False)
+    # Format noteworthy label (boolean) as text
+    if not reasoning:
+        noteworthy_text = None
+    else:
+        noteworthy_text = str(noteworthy)
+    # Get confidence score
+    confidence = compute_confidence(
+        heuristic_noteworthy,
+        fewshot_noteworthy,
+        noteworthy,
+        heuristic_rationale,
+        fewshot_rationale,
+        reasoning,
+    )
+    return noteworthy, noteworthy_text, reasoning, confidence

feedback.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from huggingface_hub import HfApi, CommitScheduler
 from datetime import datetime
 from pathlib import Path
 import gradio as gr
@@ -6,6 +7,9 @@ import logfire
 import json
 import os
 # Set repo ID for Hugging Face dataset
 REPO_ID = "jedick/noteworthy-differences-feedback"
 # Setup user feedback file for uploading to HF dataset
@@ -64,7 +68,7 @@ def save_feedback(*args, feedback_value: str) -> None:
     with feedback_path.open("a") as f:
         f.write(json.dumps(feedback_dict))
         f.write("\n")
-    gr.Success(f"Saved your feedback: {feedback_value}", duration=2, title="Thank you!")
 @logfire.instrument("Save feedback: agree")

 from huggingface_hub import HfApi, CommitScheduler
+from dotenv import load_dotenv
 from datetime import datetime
 from pathlib import Path
 import gradio as gr
 import json
 import os
+# Load API keys
+load_dotenv()
 # Set repo ID for Hugging Face dataset
 REPO_ID = "jedick/noteworthy-differences-feedback"
 # Setup user feedback file for uploading to HF dataset
     with feedback_path.open("a") as f:
         f.write(json.dumps(feedback_dict))
         f.write("\n")
+    gr.Success(f"Saved feedback: <strong>{feedback_value}</strong>")
 @logfire.instrument("Save feedback: agree")