Spaces:
Sleeping
Sleeping
jedick
commited on
Commit
·
956820f
1
Parent(s):
ff43104
Create app_functions.py to hold main app functions
Browse files- app.py +55 -292
- app_functions.py +272 -0
- feedback.py +5 -1
app.py
CHANGED
|
@@ -1,12 +1,5 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from wiki_data_fetcher import
|
| 3 |
-
get_previous_revisions,
|
| 4 |
-
get_revision_from_age,
|
| 5 |
-
get_wikipedia_introduction,
|
| 6 |
-
extract_revision_info,
|
| 7 |
-
get_revisions_behind,
|
| 8 |
-
get_random_wikipedia_title,
|
| 9 |
-
)
|
| 10 |
from feedback import save_feedback_agree, save_feedback_disagree
|
| 11 |
from contextlib import nullcontext
|
| 12 |
from dotenv import load_dotenv
|
|
@@ -18,9 +11,15 @@ load_dotenv()
|
|
| 18 |
# Setup logging with Logfire
|
| 19 |
logfire.configure()
|
| 20 |
|
| 21 |
-
#
|
| 22 |
# LogfireNotConfiguredWarning: Instrumentation will have no effect
|
| 23 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
|
| 26 |
def start_parent_span(title: str, number: int, units: str):
|
|
@@ -38,59 +37,12 @@ def start_parent_span(title: str, number: int, units: str):
|
|
| 38 |
def fetch_current_revision(title: str, context=None):
|
| 39 |
"""
|
| 40 |
Wrapper to run _fetch_current_revision in provided Logfire context.
|
| 41 |
-
We use
|
| 42 |
"""
|
| 43 |
with logfire.attach_context(context) if context else nullcontext():
|
| 44 |
return _fetch_current_revision(title)
|
| 45 |
|
| 46 |
|
| 47 |
-
@logfire.instrument("Fetch current revision")
|
| 48 |
-
def _fetch_current_revision(title: str):
|
| 49 |
-
"""
|
| 50 |
-
Fetch current revision of a Wikipedia article and return its introduction.
|
| 51 |
-
|
| 52 |
-
Args:
|
| 53 |
-
title: Wikipedia article title
|
| 54 |
-
|
| 55 |
-
Returns:
|
| 56 |
-
Tuple of (introduction, timestamp)
|
| 57 |
-
"""
|
| 58 |
-
if not title or not title.strip():
|
| 59 |
-
error_msg = "Please enter a Wikipedia page title."
|
| 60 |
-
raise gr.Error(error_msg, print_exception=False)
|
| 61 |
-
return None, None
|
| 62 |
-
|
| 63 |
-
try:
|
| 64 |
-
# Get current revision (revision 0)
|
| 65 |
-
json_data = get_previous_revisions(title, revisions=0)
|
| 66 |
-
revision_info = extract_revision_info(json_data, revnum=0)
|
| 67 |
-
|
| 68 |
-
if not revision_info.get("revid"):
|
| 69 |
-
error_msg = f"Error: Could not find Wikipedia page '{title}'. Please check the title."
|
| 70 |
-
raise gr.Error(error_msg, print_exception=False)
|
| 71 |
-
return None, None
|
| 72 |
-
|
| 73 |
-
revid = revision_info["revid"]
|
| 74 |
-
timestamp = revision_info["timestamp"]
|
| 75 |
-
|
| 76 |
-
# Get introduction
|
| 77 |
-
introduction = get_wikipedia_introduction(revid)
|
| 78 |
-
|
| 79 |
-
if introduction is None:
|
| 80 |
-
introduction = f"Error: Could not retrieve introduction for current revision (revid: {revid})"
|
| 81 |
-
|
| 82 |
-
# Format timestamp for display
|
| 83 |
-
timestamp = f"**Timestamp:** {timestamp}" if timestamp else ""
|
| 84 |
-
|
| 85 |
-
# Return introduction text and timestamp
|
| 86 |
-
return introduction, timestamp
|
| 87 |
-
|
| 88 |
-
except Exception as e:
|
| 89 |
-
error_msg = f"Error occurred: {str(e)}"
|
| 90 |
-
raise gr.Error(error_msg, print_exception=False)
|
| 91 |
-
return None, None
|
| 92 |
-
|
| 93 |
-
|
| 94 |
def fetch_previous_revision(
|
| 95 |
title: str, number: int, units: str, new_revision: str, context=None
|
| 96 |
):
|
|
@@ -98,152 +50,16 @@ def fetch_previous_revision(
|
|
| 98 |
return _fetch_previous_revision(title, number, units, new_revision)
|
| 99 |
|
| 100 |
|
| 101 |
-
@logfire.instrument("Fetch previous revision")
|
| 102 |
-
def _fetch_previous_revision(title: str, number: int, units: str, new_revision: str):
|
| 103 |
-
"""
|
| 104 |
-
Fetch previous revision of a Wikipedia article and return its introduction.
|
| 105 |
-
|
| 106 |
-
Args:
|
| 107 |
-
title: Wikipedia article title
|
| 108 |
-
number: Number of revisions or days behind
|
| 109 |
-
units: "revisions" or "days"
|
| 110 |
-
|
| 111 |
-
Returns:
|
| 112 |
-
Tuple of (introduction, timestamp)
|
| 113 |
-
"""
|
| 114 |
-
|
| 115 |
-
# If we get here with an empty new revision, then an error should have been raised
|
| 116 |
-
# in fetch_current_revision, so just return empty values without raising another error
|
| 117 |
-
if not new_revision:
|
| 118 |
-
return None, None
|
| 119 |
-
|
| 120 |
-
try:
|
| 121 |
-
# Get previous revision based on units
|
| 122 |
-
if units == "revisions":
|
| 123 |
-
json_data = get_previous_revisions(title, revisions=number)
|
| 124 |
-
revision_info = extract_revision_info(json_data, revnum=number)
|
| 125 |
-
else: # units == "days"
|
| 126 |
-
revision_info = get_revision_from_age(title, age_days=number)
|
| 127 |
-
|
| 128 |
-
if not revision_info.get("revid"):
|
| 129 |
-
error_msg = f"Error: Could not find revision {number} {'revisions' if units == 'revisions' else 'days'} behind for '{title}'."
|
| 130 |
-
raise gr.Error(error_msg, print_exception=False)
|
| 131 |
-
return None, None
|
| 132 |
-
|
| 133 |
-
revid = revision_info["revid"]
|
| 134 |
-
timestamp = revision_info["timestamp"]
|
| 135 |
-
|
| 136 |
-
# Get introduction
|
| 137 |
-
introduction = get_wikipedia_introduction(revid)
|
| 138 |
-
|
| 139 |
-
if introduction is None:
|
| 140 |
-
introduction = f"Error: Could not retrieve introduction for previous revision (revid: {revid})"
|
| 141 |
-
|
| 142 |
-
# Get revisions_behind
|
| 143 |
-
if units == "revisions":
|
| 144 |
-
revisions_behind = revision_info["revnum"]
|
| 145 |
-
else:
|
| 146 |
-
revisions_behind = get_revisions_behind(title, revid)
|
| 147 |
-
# For a negative number, replace the negative sign with ">"
|
| 148 |
-
if revisions_behind < 0:
|
| 149 |
-
revisions_behind = str(revisions_behind).replace("-", ">")
|
| 150 |
-
|
| 151 |
-
# Format timestamp for display
|
| 152 |
-
timestamp = (
|
| 153 |
-
f"**Timestamp:** {timestamp}, {revisions_behind} revisions behind"
|
| 154 |
-
if timestamp
|
| 155 |
-
else ""
|
| 156 |
-
)
|
| 157 |
-
|
| 158 |
-
# Return introduction text and timestamp
|
| 159 |
-
return introduction, timestamp
|
| 160 |
-
|
| 161 |
-
except Exception as e:
|
| 162 |
-
error_msg = f"Error occurred: {str(e)}"
|
| 163 |
-
raise gr.Error(error_msg, print_exception=False)
|
| 164 |
-
return None, None
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
def run_classifier(old_revision: str, new_revision: str, prompt_style: str):
|
| 168 |
-
"""
|
| 169 |
-
Run a classification model on the revisions.
|
| 170 |
-
|
| 171 |
-
Args:
|
| 172 |
-
old_revision: Old revision text
|
| 173 |
-
new_revision: New revision text
|
| 174 |
-
prompt_style: heuristic or few-shot
|
| 175 |
-
|
| 176 |
-
Returns:
|
| 177 |
-
Tuple of (noteworthy, rationale) (bool, str)
|
| 178 |
-
"""
|
| 179 |
-
|
| 180 |
-
# Values to return if there is an error
|
| 181 |
-
noteworthy, rationale = None, None
|
| 182 |
-
if not old_revision or not new_revision:
|
| 183 |
-
return noteworthy, rationale
|
| 184 |
-
|
| 185 |
-
try:
|
| 186 |
-
# Run classifier model
|
| 187 |
-
result = classifier(old_revision, new_revision, prompt_style=prompt_style)
|
| 188 |
-
if result:
|
| 189 |
-
noteworthy = result.get("noteworthy", None)
|
| 190 |
-
rationale = result.get("rationale", "")
|
| 191 |
-
else:
|
| 192 |
-
error_msg = f"Error: Could not get {prompt_style} model result"
|
| 193 |
-
raise gr.Error(error_msg, print_exception=False)
|
| 194 |
-
|
| 195 |
-
except Exception as e:
|
| 196 |
-
error_msg = f"Error running model: {str(e)}"
|
| 197 |
-
raise gr.Error(error_msg, print_exception=False)
|
| 198 |
-
|
| 199 |
-
return noteworthy, rationale
|
| 200 |
-
|
| 201 |
-
|
| 202 |
def run_heuristic_classifier(old_revision: str, new_revision: str, context=None):
|
| 203 |
with logfire.attach_context(context) if context else nullcontext():
|
| 204 |
return _run_heuristic_classifier(old_revision, new_revision)
|
| 205 |
|
| 206 |
|
| 207 |
-
@logfire.instrument("Run heuristic classifier")
|
| 208 |
-
def _run_heuristic_classifier(old_revision: str, new_revision: str):
|
| 209 |
-
return run_classifier(old_revision, new_revision, prompt_style="heuristic")
|
| 210 |
-
|
| 211 |
-
|
| 212 |
def run_fewshot_classifier(old_revision: str, new_revision: str, context=None):
|
| 213 |
with logfire.attach_context(context) if context else nullcontext():
|
| 214 |
return _run_fewshot_classifier(old_revision, new_revision)
|
| 215 |
|
| 216 |
|
| 217 |
-
@logfire.instrument("Run few-shot classifier")
|
| 218 |
-
def _run_fewshot_classifier(old_revision: str, new_revision: str):
|
| 219 |
-
return run_classifier(old_revision, new_revision, prompt_style="few-shot")
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
def compute_confidence(
|
| 223 |
-
heuristic_noteworthy,
|
| 224 |
-
fewshot_noteworthy,
|
| 225 |
-
judge_noteworthy,
|
| 226 |
-
heuristic_rationale,
|
| 227 |
-
fewshot_rationale,
|
| 228 |
-
judge_reasoning,
|
| 229 |
-
):
|
| 230 |
-
"""
|
| 231 |
-
Compute a confidence label using the noteworthy booleans.
|
| 232 |
-
"""
|
| 233 |
-
# Return None if any of the rationales or reasoning is missing.
|
| 234 |
-
if not heuristic_rationale or not fewshot_rationale or not judge_reasoning:
|
| 235 |
-
return None
|
| 236 |
-
if heuristic_noteworthy == fewshot_noteworthy == judge_noteworthy:
|
| 237 |
-
# Classifiers and judge all agree
|
| 238 |
-
return "High"
|
| 239 |
-
elif heuristic_noteworthy != fewshot_noteworthy:
|
| 240 |
-
# Classifiers disagree, judge decides
|
| 241 |
-
return "Moderate"
|
| 242 |
-
else:
|
| 243 |
-
# Classifiers agree, judge vetoes
|
| 244 |
-
return "Questionable"
|
| 245 |
-
|
| 246 |
-
|
| 247 |
def run_judge(
|
| 248 |
old_revision: str,
|
| 249 |
new_revision: str,
|
|
@@ -266,89 +82,34 @@ def run_judge(
|
|
| 266 |
)
|
| 267 |
|
| 268 |
|
| 269 |
-
@logfire.instrument("Run judge")
|
| 270 |
-
def _run_judge(
|
| 271 |
-
old_revision: str,
|
| 272 |
-
new_revision: str,
|
| 273 |
-
heuristic_noteworthy: bool,
|
| 274 |
-
fewshot_noteworthy: bool,
|
| 275 |
-
heuristic_rationale: str,
|
| 276 |
-
fewshot_rationale: str,
|
| 277 |
-
judge_mode: str,
|
| 278 |
-
):
|
| 279 |
-
"""
|
| 280 |
-
Run judge on the revisions and classifiers' rationales.
|
| 281 |
-
|
| 282 |
-
Args:
|
| 283 |
-
old_revision: Old revision text
|
| 284 |
-
new_revision: New revision text
|
| 285 |
-
heuristic_rationale: Heuristic model's rationale
|
| 286 |
-
fewshot_rationale: Few-shot model's rationale
|
| 287 |
-
judge_mode: Mode for judge function ("unaligned", "aligned-fewshot", "aligned-heuristic")
|
| 288 |
-
|
| 289 |
-
Returns:
|
| 290 |
-
Tuple of (noteworthy, noteworthy_text, reasoning, confidence) (bool, str, str, str)
|
| 291 |
-
"""
|
| 292 |
-
|
| 293 |
-
# Values to return if there is an error
|
| 294 |
-
noteworthy, noteworthy_text, reasoning, confidence = None, None, None, None
|
| 295 |
-
if (
|
| 296 |
-
not old_revision
|
| 297 |
-
or not new_revision
|
| 298 |
-
or not heuristic_rationale
|
| 299 |
-
or not fewshot_rationale
|
| 300 |
-
):
|
| 301 |
-
return noteworthy, noteworthy_text, reasoning, confidence
|
| 302 |
-
|
| 303 |
-
try:
|
| 304 |
-
# Run judge
|
| 305 |
-
result = judge(
|
| 306 |
-
old_revision,
|
| 307 |
-
new_revision,
|
| 308 |
-
heuristic_rationale,
|
| 309 |
-
fewshot_rationale,
|
| 310 |
-
mode=judge_mode,
|
| 311 |
-
)
|
| 312 |
-
if result:
|
| 313 |
-
noteworthy = result.get("noteworthy", "")
|
| 314 |
-
reasoning = result.get("reasoning", "")
|
| 315 |
-
else:
|
| 316 |
-
error_msg = f"Error: Could not get judge's result"
|
| 317 |
-
raise gr.Error(error_msg, print_exception=False)
|
| 318 |
-
|
| 319 |
-
except Exception as e:
|
| 320 |
-
error_msg = f"Error running judge: {str(e)}"
|
| 321 |
-
raise gr.Error(error_msg, print_exception=False)
|
| 322 |
-
|
| 323 |
-
# Format noteworthy label (boolean) as text
|
| 324 |
-
if not reasoning:
|
| 325 |
-
noteworthy_text = None
|
| 326 |
-
else:
|
| 327 |
-
noteworthy_text = str(noteworthy)
|
| 328 |
-
|
| 329 |
-
# Get confidence score
|
| 330 |
-
confidence = compute_confidence(
|
| 331 |
-
heuristic_noteworthy,
|
| 332 |
-
fewshot_noteworthy,
|
| 333 |
-
noteworthy,
|
| 334 |
-
heuristic_rationale,
|
| 335 |
-
fewshot_rationale,
|
| 336 |
-
reasoning,
|
| 337 |
-
)
|
| 338 |
-
|
| 339 |
-
return noteworthy, noteworthy_text, reasoning, confidence
|
| 340 |
-
|
| 341 |
-
|
| 342 |
# Create Gradio interface
|
| 343 |
with gr.Blocks(title="Noteworthy Differences") as demo:
|
| 344 |
with gr.Row():
|
| 345 |
gr.Markdown(
|
| 346 |
"""
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
)
|
| 353 |
|
| 354 |
with gr.Row():
|
|
@@ -357,9 +118,9 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
|
|
| 357 |
)
|
| 358 |
number_input = gr.Number(label="Number", value=50, minimum=0, precision=0)
|
| 359 |
units_dropdown = gr.Dropdown(
|
| 360 |
-
choices=["revisions", "days"], value="revisions", label="
|
| 361 |
)
|
| 362 |
-
|
| 363 |
choices=["unaligned", "aligned-fewshot", "aligned-heuristic"],
|
| 364 |
value="aligned-heuristic",
|
| 365 |
label="Judge Mode",
|
|
@@ -397,17 +158,17 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
|
|
| 397 |
with gr.Column():
|
| 398 |
gr.Markdown("### Model Output")
|
| 399 |
heuristic_rationale = gr.Textbox(
|
| 400 |
-
label="Heuristic Model's Rationale",
|
| 401 |
lines=2,
|
| 402 |
max_lines=7,
|
| 403 |
)
|
| 404 |
fewshot_rationale = gr.Textbox(
|
| 405 |
-
label="Few-shot Model's Rationale",
|
| 406 |
lines=2,
|
| 407 |
max_lines=7,
|
| 408 |
)
|
| 409 |
judge_reasoning = gr.Textbox(
|
| 410 |
-
label="Judge's Reasoning",
|
| 411 |
lines=2,
|
| 412 |
max_lines=7,
|
| 413 |
)
|
|
@@ -424,16 +185,10 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
|
|
| 424 |
)
|
| 425 |
rerun_btn = gr.Button("Rerun Model")
|
| 426 |
|
| 427 |
-
gr.Markdown("### Your feedback")
|
| 428 |
-
feedback_status = gr.Textbox(
|
| 429 |
-
label="",
|
| 430 |
-
lines=1,
|
| 431 |
-
interactive=False,
|
| 432 |
-
visible=True,
|
| 433 |
-
)
|
| 434 |
with gr.Row():
|
| 435 |
thumbs_up_btn = gr.Button("👍 Agree", variant="primary")
|
| 436 |
-
thumbs_down_btn = gr.Button("👎 Disagree", variant="
|
| 437 |
|
| 438 |
# States to store boolean values
|
| 439 |
heuristic_noteworthy = gr.State()
|
|
@@ -491,7 +246,7 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
|
|
| 491 |
fewshot_noteworthy,
|
| 492 |
heuristic_rationale,
|
| 493 |
fewshot_rationale,
|
| 494 |
-
|
| 495 |
context,
|
| 496 |
],
|
| 497 |
outputs=[judge_noteworthy, noteworthy_text, judge_reasoning, confidence],
|
|
@@ -519,7 +274,7 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
|
|
| 519 |
fewshot_noteworthy,
|
| 520 |
heuristic_rationale,
|
| 521 |
fewshot_rationale,
|
| 522 |
-
|
| 523 |
context,
|
| 524 |
],
|
| 525 |
outputs=[judge_noteworthy, noteworthy_text, judge_reasoning, confidence],
|
|
@@ -533,7 +288,7 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
|
|
| 533 |
title_input,
|
| 534 |
number_input,
|
| 535 |
units_dropdown,
|
| 536 |
-
|
| 537 |
old_revision,
|
| 538 |
new_revision,
|
| 539 |
old_timestamp,
|
|
@@ -547,7 +302,6 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
|
|
| 547 |
fewshot_noteworthy,
|
| 548 |
judge_noteworthy,
|
| 549 |
],
|
| 550 |
-
outputs=[feedback_status],
|
| 551 |
api_name=False,
|
| 552 |
)
|
| 553 |
|
|
@@ -557,7 +311,7 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
|
|
| 557 |
title_input,
|
| 558 |
number_input,
|
| 559 |
units_dropdown,
|
| 560 |
-
|
| 561 |
old_revision,
|
| 562 |
new_revision,
|
| 563 |
old_timestamp,
|
|
@@ -571,7 +325,6 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
|
|
| 571 |
fewshot_noteworthy,
|
| 572 |
judge_noteworthy,
|
| 573 |
],
|
| 574 |
-
outputs=[feedback_status],
|
| 575 |
api_name=False,
|
| 576 |
)
|
| 577 |
|
|
@@ -580,5 +333,15 @@ if __name__ == "__main__":
|
|
| 580 |
# Setup theme without background image
|
| 581 |
theme = gr.Theme.from_hub("NoCrypt/miku")
|
| 582 |
theme.set(body_background_fill="#FFFFFF", body_background_fill_dark="#000000")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 583 |
|
| 584 |
-
demo.launch(theme=theme)
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from wiki_data_fetcher import get_random_wikipedia_title
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
from feedback import save_feedback_agree, save_feedback_disagree
|
| 4 |
from contextlib import nullcontext
|
| 5 |
from dotenv import load_dotenv
|
|
|
|
| 11 |
# Setup logging with Logfire
|
| 12 |
logfire.configure()
|
| 13 |
|
| 14 |
+
# This goes after logfire.configure() to avoid
|
| 15 |
# LogfireNotConfiguredWarning: Instrumentation will have no effect
|
| 16 |
+
from app_functions import (
|
| 17 |
+
_fetch_current_revision,
|
| 18 |
+
_fetch_previous_revision,
|
| 19 |
+
_run_heuristic_classifier,
|
| 20 |
+
_run_fewshot_classifier,
|
| 21 |
+
_run_judge,
|
| 22 |
+
)
|
| 23 |
|
| 24 |
|
| 25 |
def start_parent_span(title: str, number: int, units: str):
|
|
|
|
| 37 |
def fetch_current_revision(title: str, context=None):
|
| 38 |
"""
|
| 39 |
Wrapper to run _fetch_current_revision in provided Logfire context.
|
| 40 |
+
We use a wrapper to minimize indentation in the called function.
|
| 41 |
"""
|
| 42 |
with logfire.attach_context(context) if context else nullcontext():
|
| 43 |
return _fetch_current_revision(title)
|
| 44 |
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
def fetch_previous_revision(
|
| 47 |
title: str, number: int, units: str, new_revision: str, context=None
|
| 48 |
):
|
|
|
|
| 50 |
return _fetch_previous_revision(title, number, units, new_revision)
|
| 51 |
|
| 52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
def run_heuristic_classifier(old_revision: str, new_revision: str, context=None):
|
| 54 |
with logfire.attach_context(context) if context else nullcontext():
|
| 55 |
return _run_heuristic_classifier(old_revision, new_revision)
|
| 56 |
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
def run_fewshot_classifier(old_revision: str, new_revision: str, context=None):
|
| 59 |
with logfire.attach_context(context) if context else nullcontext():
|
| 60 |
return _run_fewshot_classifier(old_revision, new_revision)
|
| 61 |
|
| 62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
def run_judge(
|
| 64 |
old_revision: str,
|
| 65 |
new_revision: str,
|
|
|
|
| 82 |
)
|
| 83 |
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
# Create Gradio interface
|
| 86 |
with gr.Blocks(title="Noteworthy Differences") as demo:
|
| 87 |
with gr.Row():
|
| 88 |
gr.Markdown(
|
| 89 |
"""
|
| 90 |
+
<table>
|
| 91 |
+
<colgroup>
|
| 92 |
+
<col span="1" style="width: 30%;">
|
| 93 |
+
<col span="1" style="width: 25%;">
|
| 94 |
+
<col span="1" style="width: 45%;">
|
| 95 |
+
</colgroup>
|
| 96 |
+
<tr>
|
| 97 |
+
<td>
|
| 98 |
+
<i class="fa-brands fa-wikipedia-w"></i> Compare current and old revisions of a Wikipedia article.<br>
|
| 99 |
+
📅 You choose the number of revisions or days behind.
|
| 100 |
+
</td>
|
| 101 |
+
<td>
|
| 102 |
+
◇ ∴ ⚖ Two classifier models and a judge predict the noteworthiness of the differences.
|
| 103 |
+
</td>
|
| 104 |
+
<td>
|
| 105 |
+
<i class="fa-brands fa-github"></i> The <a href="https://github.com/jedick/noteworthy-differences">GitHub repository</a> describes how the judge was aligned with human preferences.<br>
|
| 106 |
+
👥 The <a href="https://huggingface.co/datasets/jedick/noteworthy-differences-feedback">feedback dataset</a> holds all user feedback collected to date.
|
| 107 |
+
</td>
|
| 108 |
+
</tr>
|
| 109 |
+
</table>
|
| 110 |
+
|
| 111 |
+
""",
|
| 112 |
+
elem_id="intro_table",
|
| 113 |
)
|
| 114 |
|
| 115 |
with gr.Row():
|
|
|
|
| 118 |
)
|
| 119 |
number_input = gr.Number(label="Number", value=50, minimum=0, precision=0)
|
| 120 |
units_dropdown = gr.Dropdown(
|
| 121 |
+
choices=["revisions", "days"], value="revisions", label="Units"
|
| 122 |
)
|
| 123 |
+
judge_mode = gr.Dropdown(
|
| 124 |
choices=["unaligned", "aligned-fewshot", "aligned-heuristic"],
|
| 125 |
value="aligned-heuristic",
|
| 126 |
label="Judge Mode",
|
|
|
|
| 158 |
with gr.Column():
|
| 159 |
gr.Markdown("### Model Output")
|
| 160 |
heuristic_rationale = gr.Textbox(
|
| 161 |
+
label="◇ Heuristic Model's Rationale",
|
| 162 |
lines=2,
|
| 163 |
max_lines=7,
|
| 164 |
)
|
| 165 |
fewshot_rationale = gr.Textbox(
|
| 166 |
+
label="∴ Few-shot Model's Rationale",
|
| 167 |
lines=2,
|
| 168 |
max_lines=7,
|
| 169 |
)
|
| 170 |
judge_reasoning = gr.Textbox(
|
| 171 |
+
label="⚖ Judge's Reasoning",
|
| 172 |
lines=2,
|
| 173 |
max_lines=7,
|
| 174 |
)
|
|
|
|
| 185 |
)
|
| 186 |
rerun_btn = gr.Button("Rerun Model")
|
| 187 |
|
| 188 |
+
gr.Markdown("### 👥 Your feedback")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
with gr.Row():
|
| 190 |
thumbs_up_btn = gr.Button("👍 Agree", variant="primary")
|
| 191 |
+
thumbs_down_btn = gr.Button("👎 Disagree", variant="primary")
|
| 192 |
|
| 193 |
# States to store boolean values
|
| 194 |
heuristic_noteworthy = gr.State()
|
|
|
|
| 246 |
fewshot_noteworthy,
|
| 247 |
heuristic_rationale,
|
| 248 |
fewshot_rationale,
|
| 249 |
+
judge_mode,
|
| 250 |
context,
|
| 251 |
],
|
| 252 |
outputs=[judge_noteworthy, noteworthy_text, judge_reasoning, confidence],
|
|
|
|
| 274 |
fewshot_noteworthy,
|
| 275 |
heuristic_rationale,
|
| 276 |
fewshot_rationale,
|
| 277 |
+
judge_mode,
|
| 278 |
context,
|
| 279 |
],
|
| 280 |
outputs=[judge_noteworthy, noteworthy_text, judge_reasoning, confidence],
|
|
|
|
| 288 |
title_input,
|
| 289 |
number_input,
|
| 290 |
units_dropdown,
|
| 291 |
+
judge_mode,
|
| 292 |
old_revision,
|
| 293 |
new_revision,
|
| 294 |
old_timestamp,
|
|
|
|
| 302 |
fewshot_noteworthy,
|
| 303 |
judge_noteworthy,
|
| 304 |
],
|
|
|
|
| 305 |
api_name=False,
|
| 306 |
)
|
| 307 |
|
|
|
|
| 311 |
title_input,
|
| 312 |
number_input,
|
| 313 |
units_dropdown,
|
| 314 |
+
judge_mode,
|
| 315 |
old_revision,
|
| 316 |
new_revision,
|
| 317 |
old_timestamp,
|
|
|
|
| 325 |
fewshot_noteworthy,
|
| 326 |
judge_noteworthy,
|
| 327 |
],
|
|
|
|
| 328 |
api_name=False,
|
| 329 |
)
|
| 330 |
|
|
|
|
| 333 |
# Setup theme without background image
|
| 334 |
theme = gr.Theme.from_hub("NoCrypt/miku")
|
| 335 |
theme.set(body_background_fill="#FFFFFF", body_background_fill_dark="#000000")
|
| 336 |
+
# Define the HTML for Font Awesome
|
| 337 |
+
head = '<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css" rel="stylesheet">'
|
| 338 |
+
# Use CSS to style table
|
| 339 |
+
css = """
|
| 340 |
+
#intro_table {background-color: #ecfdf5}
|
| 341 |
+
table, tr, td {
|
| 342 |
+
border: none; /* Removes all borders */
|
| 343 |
+
border-collapse: collapse; /* Ensures no gaps between cells */
|
| 344 |
+
}
|
| 345 |
+
"""
|
| 346 |
|
| 347 |
+
demo.launch(theme=theme, head=head, css=css)
|
app_functions.py
ADDED
|
@@ -0,0 +1,272 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from wiki_data_fetcher import (
|
| 2 |
+
get_previous_revisions,
|
| 3 |
+
get_revision_from_age,
|
| 4 |
+
get_wikipedia_introduction,
|
| 5 |
+
extract_revision_info,
|
| 6 |
+
get_revisions_behind,
|
| 7 |
+
)
|
| 8 |
+
from models import classifier, judge
|
| 9 |
+
import gradio as gr
|
| 10 |
+
import logfire
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@logfire.instrument("Fetch current revision")
|
| 14 |
+
def _fetch_current_revision(title: str):
|
| 15 |
+
"""
|
| 16 |
+
Fetch current revision of a Wikipedia article and return its introduction.
|
| 17 |
+
|
| 18 |
+
Args:
|
| 19 |
+
title: Wikipedia article title
|
| 20 |
+
|
| 21 |
+
Returns:
|
| 22 |
+
Tuple of (introduction, timestamp)
|
| 23 |
+
"""
|
| 24 |
+
if not title or not title.strip():
|
| 25 |
+
error_msg = "Please enter a Wikipedia page title."
|
| 26 |
+
raise gr.Error(error_msg, print_exception=False)
|
| 27 |
+
return None, None
|
| 28 |
+
|
| 29 |
+
try:
|
| 30 |
+
# Get current revision (revision 0)
|
| 31 |
+
json_data = get_previous_revisions(title, revisions=0)
|
| 32 |
+
revision_info = extract_revision_info(json_data, revnum=0)
|
| 33 |
+
|
| 34 |
+
if not revision_info.get("revid"):
|
| 35 |
+
error_msg = f"Error: Could not find Wikipedia page '{title}'. Please check the title."
|
| 36 |
+
raise gr.Error(error_msg, print_exception=False)
|
| 37 |
+
return None, None
|
| 38 |
+
|
| 39 |
+
revid = revision_info["revid"]
|
| 40 |
+
timestamp = revision_info["timestamp"]
|
| 41 |
+
|
| 42 |
+
# Get introduction
|
| 43 |
+
introduction = get_wikipedia_introduction(revid)
|
| 44 |
+
|
| 45 |
+
if introduction is None:
|
| 46 |
+
introduction = f"Error: Could not retrieve introduction for current revision (revid: {revid})"
|
| 47 |
+
|
| 48 |
+
# Format timestamp for display
|
| 49 |
+
timestamp = f"**Timestamp:** {timestamp}" if timestamp else ""
|
| 50 |
+
|
| 51 |
+
# Return introduction text and timestamp
|
| 52 |
+
return introduction, timestamp
|
| 53 |
+
|
| 54 |
+
except Exception as e:
|
| 55 |
+
error_msg = f"Error occurred: {str(e)}"
|
| 56 |
+
raise gr.Error(error_msg, print_exception=False)
|
| 57 |
+
return None, None
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
@logfire.instrument("Fetch previous revision")
|
| 61 |
+
def _fetch_previous_revision(title: str, number: int, units: str, new_revision: str):
|
| 62 |
+
"""
|
| 63 |
+
Fetch previous revision of a Wikipedia article and return its introduction.
|
| 64 |
+
|
| 65 |
+
Args:
|
| 66 |
+
title: Wikipedia article title
|
| 67 |
+
number: Number of revisions or days behind
|
| 68 |
+
units: "revisions" or "days"
|
| 69 |
+
|
| 70 |
+
Returns:
|
| 71 |
+
Tuple of (introduction, timestamp)
|
| 72 |
+
"""
|
| 73 |
+
|
| 74 |
+
# If we get here with an empty new revision, then an error should have been raised
|
| 75 |
+
# in fetch_current_revision, so just return empty values without raising another error
|
| 76 |
+
if not new_revision:
|
| 77 |
+
return None, None
|
| 78 |
+
|
| 79 |
+
try:
|
| 80 |
+
# Get previous revision based on units
|
| 81 |
+
if units == "revisions":
|
| 82 |
+
json_data = get_previous_revisions(title, revisions=number)
|
| 83 |
+
revision_info = extract_revision_info(json_data, revnum=number)
|
| 84 |
+
else: # units == "days"
|
| 85 |
+
revision_info = get_revision_from_age(title, age_days=number)
|
| 86 |
+
|
| 87 |
+
if not revision_info.get("revid"):
|
| 88 |
+
error_msg = f"Error: Could not find revision {number} {'revisions' if units == 'revisions' else 'days'} behind for '{title}'."
|
| 89 |
+
raise gr.Error(error_msg, print_exception=False)
|
| 90 |
+
return None, None
|
| 91 |
+
|
| 92 |
+
revid = revision_info["revid"]
|
| 93 |
+
timestamp = revision_info["timestamp"]
|
| 94 |
+
|
| 95 |
+
# Get introduction
|
| 96 |
+
introduction = get_wikipedia_introduction(revid)
|
| 97 |
+
|
| 98 |
+
if introduction is None:
|
| 99 |
+
introduction = f"Error: Could not retrieve introduction for previous revision (revid: {revid})"
|
| 100 |
+
|
| 101 |
+
# Get revisions_behind
|
| 102 |
+
if units == "revisions":
|
| 103 |
+
revisions_behind = revision_info["revnum"]
|
| 104 |
+
else:
|
| 105 |
+
revisions_behind = get_revisions_behind(title, revid)
|
| 106 |
+
# For a negative number, replace the negative sign with ">"
|
| 107 |
+
if revisions_behind < 0:
|
| 108 |
+
revisions_behind = str(revisions_behind).replace("-", ">")
|
| 109 |
+
|
| 110 |
+
# Format timestamp for display
|
| 111 |
+
timestamp = (
|
| 112 |
+
f"**Timestamp:** {timestamp}, {revisions_behind} revisions behind"
|
| 113 |
+
if timestamp
|
| 114 |
+
else ""
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
# Return introduction text and timestamp
|
| 118 |
+
return introduction, timestamp
|
| 119 |
+
|
| 120 |
+
except Exception as e:
|
| 121 |
+
error_msg = f"Error occurred: {str(e)}"
|
| 122 |
+
raise gr.Error(error_msg, print_exception=False)
|
| 123 |
+
return None, None
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def run_classifier(old_revision: str, new_revision: str, prompt_style: str):
|
| 127 |
+
"""
|
| 128 |
+
Run a classification model on the revisions.
|
| 129 |
+
|
| 130 |
+
Args:
|
| 131 |
+
old_revision: Old revision text
|
| 132 |
+
new_revision: New revision text
|
| 133 |
+
prompt_style: heuristic or few-shot
|
| 134 |
+
|
| 135 |
+
Returns:
|
| 136 |
+
Tuple of (noteworthy, rationale) (bool, str)
|
| 137 |
+
"""
|
| 138 |
+
|
| 139 |
+
# Values to return if there is an error
|
| 140 |
+
noteworthy, rationale = None, None
|
| 141 |
+
if not old_revision or not new_revision:
|
| 142 |
+
return noteworthy, rationale
|
| 143 |
+
|
| 144 |
+
try:
|
| 145 |
+
# Run classifier model
|
| 146 |
+
result = classifier(old_revision, new_revision, prompt_style=prompt_style)
|
| 147 |
+
if result:
|
| 148 |
+
noteworthy = result.get("noteworthy", None)
|
| 149 |
+
rationale = result.get("rationale", "")
|
| 150 |
+
else:
|
| 151 |
+
error_msg = f"Error: Could not get {prompt_style} model result"
|
| 152 |
+
raise gr.Error(error_msg, print_exception=False)
|
| 153 |
+
|
| 154 |
+
except Exception as e:
|
| 155 |
+
error_msg = f"Error running model: {str(e)}"
|
| 156 |
+
raise gr.Error(error_msg, print_exception=False)
|
| 157 |
+
|
| 158 |
+
return noteworthy, rationale
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
@logfire.instrument("Run heuristic classifier")
|
| 162 |
+
def _run_heuristic_classifier(old_revision: str, new_revision: str):
|
| 163 |
+
return run_classifier(old_revision, new_revision, prompt_style="heuristic")
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
@logfire.instrument("Run few-shot classifier")
|
| 167 |
+
def _run_fewshot_classifier(old_revision: str, new_revision: str):
|
| 168 |
+
return run_classifier(old_revision, new_revision, prompt_style="few-shot")
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def compute_confidence(
|
| 172 |
+
heuristic_noteworthy,
|
| 173 |
+
fewshot_noteworthy,
|
| 174 |
+
judge_noteworthy,
|
| 175 |
+
heuristic_rationale,
|
| 176 |
+
fewshot_rationale,
|
| 177 |
+
judge_reasoning,
|
| 178 |
+
):
|
| 179 |
+
"""
|
| 180 |
+
Compute a confidence label using the noteworthy booleans.
|
| 181 |
+
"""
|
| 182 |
+
# Return None if any of the rationales or reasoning is missing.
|
| 183 |
+
if not heuristic_rationale or not fewshot_rationale or not judge_reasoning:
|
| 184 |
+
return None
|
| 185 |
+
if heuristic_noteworthy == fewshot_noteworthy == judge_noteworthy:
|
| 186 |
+
# Classifiers and judge all agree
|
| 187 |
+
return "High"
|
| 188 |
+
elif heuristic_noteworthy != fewshot_noteworthy:
|
| 189 |
+
# Classifiers disagree, judge decides
|
| 190 |
+
return "Moderate"
|
| 191 |
+
else:
|
| 192 |
+
# Classifiers agree, judge vetoes
|
| 193 |
+
return "Questionable"
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
@logfire.instrument("Run judge")
|
| 197 |
+
def _run_judge(
|
| 198 |
+
old_revision: str,
|
| 199 |
+
new_revision: str,
|
| 200 |
+
heuristic_noteworthy: bool,
|
| 201 |
+
fewshot_noteworthy: bool,
|
| 202 |
+
heuristic_rationale: str,
|
| 203 |
+
fewshot_rationale: str,
|
| 204 |
+
judge_mode: str,
|
| 205 |
+
):
|
| 206 |
+
"""
|
| 207 |
+
Run judge on the revisions and classifiers' rationales.
|
| 208 |
+
|
| 209 |
+
Args:
|
| 210 |
+
old_revision: Old revision text
|
| 211 |
+
new_revision: New revision text
|
| 212 |
+
heuristic_noteworthy: Heuristic model's noteworthiness prediction
|
| 213 |
+
fewshot_noteworthy: Few-shot model's noteworthiness prediction
|
| 214 |
+
heuristic_rationale: Heuristic model's rationale
|
| 215 |
+
fewshot_rationale: Few-shot model's rationale
|
| 216 |
+
judge_mode: Mode for judge function ("unaligned", "aligned-fewshot", "aligned-heuristic")
|
| 217 |
+
|
| 218 |
+
Returns:
|
| 219 |
+
Tuple of (noteworthy, noteworthy_text, reasoning, confidence) (bool, str, str, str)
|
| 220 |
+
"""
|
| 221 |
+
|
| 222 |
+
print(f"old_revision: {old_revision}")
|
| 223 |
+
print(f"new_revision: {new_revision}")
|
| 224 |
+
print(f"judge_mode: {judge_mode}")
|
| 225 |
+
|
| 226 |
+
# Values to return if there is an error
|
| 227 |
+
noteworthy, noteworthy_text, reasoning, confidence = None, None, None, None
|
| 228 |
+
if (
|
| 229 |
+
not old_revision
|
| 230 |
+
or not new_revision
|
| 231 |
+
or not heuristic_rationale
|
| 232 |
+
or not fewshot_rationale
|
| 233 |
+
):
|
| 234 |
+
return noteworthy, noteworthy_text, reasoning, confidence
|
| 235 |
+
|
| 236 |
+
try:
|
| 237 |
+
# Run judge
|
| 238 |
+
result = judge(
|
| 239 |
+
old_revision,
|
| 240 |
+
new_revision,
|
| 241 |
+
heuristic_rationale,
|
| 242 |
+
fewshot_rationale,
|
| 243 |
+
mode=judge_mode,
|
| 244 |
+
)
|
| 245 |
+
if result:
|
| 246 |
+
noteworthy = result.get("noteworthy", "")
|
| 247 |
+
reasoning = result.get("reasoning", "")
|
| 248 |
+
else:
|
| 249 |
+
error_msg = f"Error: Could not get judge's result"
|
| 250 |
+
raise gr.Error(error_msg, print_exception=False)
|
| 251 |
+
|
| 252 |
+
except Exception as e:
|
| 253 |
+
error_msg = f"Error running judge: {str(e)}"
|
| 254 |
+
raise gr.Error(error_msg, print_exception=False)
|
| 255 |
+
|
| 256 |
+
# Format noteworthy label (boolean) as text
|
| 257 |
+
if not reasoning:
|
| 258 |
+
noteworthy_text = None
|
| 259 |
+
else:
|
| 260 |
+
noteworthy_text = str(noteworthy)
|
| 261 |
+
|
| 262 |
+
# Get confidence score
|
| 263 |
+
confidence = compute_confidence(
|
| 264 |
+
heuristic_noteworthy,
|
| 265 |
+
fewshot_noteworthy,
|
| 266 |
+
noteworthy,
|
| 267 |
+
heuristic_rationale,
|
| 268 |
+
fewshot_rationale,
|
| 269 |
+
reasoning,
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
return noteworthy, noteworthy_text, reasoning, confidence
|
feedback.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
from huggingface_hub import HfApi, CommitScheduler
|
|
|
|
| 2 |
from datetime import datetime
|
| 3 |
from pathlib import Path
|
| 4 |
import gradio as gr
|
|
@@ -6,6 +7,9 @@ import logfire
|
|
| 6 |
import json
|
| 7 |
import os
|
| 8 |
|
|
|
|
|
|
|
|
|
|
| 9 |
# Set repo ID for Hugging Face dataset
|
| 10 |
REPO_ID = "jedick/noteworthy-differences-feedback"
|
| 11 |
# Setup user feedback file for uploading to HF dataset
|
|
@@ -64,7 +68,7 @@ def save_feedback(*args, feedback_value: str) -> None:
|
|
| 64 |
with feedback_path.open("a") as f:
|
| 65 |
f.write(json.dumps(feedback_dict))
|
| 66 |
f.write("\n")
|
| 67 |
-
gr.Success(f"Saved
|
| 68 |
|
| 69 |
|
| 70 |
@logfire.instrument("Save feedback: agree")
|
|
|
|
| 1 |
from huggingface_hub import HfApi, CommitScheduler
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
from datetime import datetime
|
| 4 |
from pathlib import Path
|
| 5 |
import gradio as gr
|
|
|
|
| 7 |
import json
|
| 8 |
import os
|
| 9 |
|
| 10 |
+
# Load API keys
|
| 11 |
+
load_dotenv()
|
| 12 |
+
|
| 13 |
# Set repo ID for Hugging Face dataset
|
| 14 |
REPO_ID = "jedick/noteworthy-differences-feedback"
|
| 15 |
# Setup user feedback file for uploading to HF dataset
|
|
|
|
| 68 |
with feedback_path.open("a") as f:
|
| 69 |
f.write(json.dumps(feedback_dict))
|
| 70 |
f.write("\n")
|
| 71 |
+
gr.Success(f"Saved feedback: <strong>{feedback_value}</strong>")
|
| 72 |
|
| 73 |
|
| 74 |
@logfire.instrument("Save feedback: agree")
|