jedick commited on
Commit
956820f
·
1 Parent(s): ff43104

Create app_functions.py to hold main app functions

Browse files
Files changed (3) hide show
  1. app.py +55 -292
  2. app_functions.py +272 -0
  3. feedback.py +5 -1
app.py CHANGED
@@ -1,12 +1,5 @@
1
  import gradio as gr
2
- from wiki_data_fetcher import (
3
- get_previous_revisions,
4
- get_revision_from_age,
5
- get_wikipedia_introduction,
6
- extract_revision_info,
7
- get_revisions_behind,
8
- get_random_wikipedia_title,
9
- )
10
  from feedback import save_feedback_agree, save_feedback_disagree
11
  from contextlib import nullcontext
12
  from dotenv import load_dotenv
@@ -18,9 +11,15 @@ load_dotenv()
18
  # Setup logging with Logfire
19
  logfire.configure()
20
 
21
- # Import this after logfire.configure() so we don't get
22
  # LogfireNotConfiguredWarning: Instrumentation will have no effect
23
- from models import classifier, judge
 
 
 
 
 
 
24
 
25
 
26
  def start_parent_span(title: str, number: int, units: str):
@@ -38,59 +37,12 @@ def start_parent_span(title: str, number: int, units: str):
38
  def fetch_current_revision(title: str, context=None):
39
  """
40
  Wrapper to run _fetch_current_revision in provided Logfire context.
41
- We use this to minimize indentation in the wrapped function.
42
  """
43
  with logfire.attach_context(context) if context else nullcontext():
44
  return _fetch_current_revision(title)
45
 
46
 
47
- @logfire.instrument("Fetch current revision")
48
- def _fetch_current_revision(title: str):
49
- """
50
- Fetch current revision of a Wikipedia article and return its introduction.
51
-
52
- Args:
53
- title: Wikipedia article title
54
-
55
- Returns:
56
- Tuple of (introduction, timestamp)
57
- """
58
- if not title or not title.strip():
59
- error_msg = "Please enter a Wikipedia page title."
60
- raise gr.Error(error_msg, print_exception=False)
61
- return None, None
62
-
63
- try:
64
- # Get current revision (revision 0)
65
- json_data = get_previous_revisions(title, revisions=0)
66
- revision_info = extract_revision_info(json_data, revnum=0)
67
-
68
- if not revision_info.get("revid"):
69
- error_msg = f"Error: Could not find Wikipedia page '{title}'. Please check the title."
70
- raise gr.Error(error_msg, print_exception=False)
71
- return None, None
72
-
73
- revid = revision_info["revid"]
74
- timestamp = revision_info["timestamp"]
75
-
76
- # Get introduction
77
- introduction = get_wikipedia_introduction(revid)
78
-
79
- if introduction is None:
80
- introduction = f"Error: Could not retrieve introduction for current revision (revid: {revid})"
81
-
82
- # Format timestamp for display
83
- timestamp = f"**Timestamp:** {timestamp}" if timestamp else ""
84
-
85
- # Return introduction text and timestamp
86
- return introduction, timestamp
87
-
88
- except Exception as e:
89
- error_msg = f"Error occurred: {str(e)}"
90
- raise gr.Error(error_msg, print_exception=False)
91
- return None, None
92
-
93
-
94
  def fetch_previous_revision(
95
  title: str, number: int, units: str, new_revision: str, context=None
96
  ):
@@ -98,152 +50,16 @@ def fetch_previous_revision(
98
  return _fetch_previous_revision(title, number, units, new_revision)
99
 
100
 
101
- @logfire.instrument("Fetch previous revision")
102
- def _fetch_previous_revision(title: str, number: int, units: str, new_revision: str):
103
- """
104
- Fetch previous revision of a Wikipedia article and return its introduction.
105
-
106
- Args:
107
- title: Wikipedia article title
108
- number: Number of revisions or days behind
109
- units: "revisions" or "days"
110
-
111
- Returns:
112
- Tuple of (introduction, timestamp)
113
- """
114
-
115
- # If we get here with an empty new revision, then an error should have been raised
116
- # in fetch_current_revision, so just return empty values without raising another error
117
- if not new_revision:
118
- return None, None
119
-
120
- try:
121
- # Get previous revision based on units
122
- if units == "revisions":
123
- json_data = get_previous_revisions(title, revisions=number)
124
- revision_info = extract_revision_info(json_data, revnum=number)
125
- else: # units == "days"
126
- revision_info = get_revision_from_age(title, age_days=number)
127
-
128
- if not revision_info.get("revid"):
129
- error_msg = f"Error: Could not find revision {number} {'revisions' if units == 'revisions' else 'days'} behind for '{title}'."
130
- raise gr.Error(error_msg, print_exception=False)
131
- return None, None
132
-
133
- revid = revision_info["revid"]
134
- timestamp = revision_info["timestamp"]
135
-
136
- # Get introduction
137
- introduction = get_wikipedia_introduction(revid)
138
-
139
- if introduction is None:
140
- introduction = f"Error: Could not retrieve introduction for previous revision (revid: {revid})"
141
-
142
- # Get revisions_behind
143
- if units == "revisions":
144
- revisions_behind = revision_info["revnum"]
145
- else:
146
- revisions_behind = get_revisions_behind(title, revid)
147
- # For a negative number, replace the negative sign with ">"
148
- if revisions_behind < 0:
149
- revisions_behind = str(revisions_behind).replace("-", ">")
150
-
151
- # Format timestamp for display
152
- timestamp = (
153
- f"**Timestamp:** {timestamp}, {revisions_behind} revisions behind"
154
- if timestamp
155
- else ""
156
- )
157
-
158
- # Return introduction text and timestamp
159
- return introduction, timestamp
160
-
161
- except Exception as e:
162
- error_msg = f"Error occurred: {str(e)}"
163
- raise gr.Error(error_msg, print_exception=False)
164
- return None, None
165
-
166
-
167
- def run_classifier(old_revision: str, new_revision: str, prompt_style: str):
168
- """
169
- Run a classification model on the revisions.
170
-
171
- Args:
172
- old_revision: Old revision text
173
- new_revision: New revision text
174
- prompt_style: heuristic or few-shot
175
-
176
- Returns:
177
- Tuple of (noteworthy, rationale) (bool, str)
178
- """
179
-
180
- # Values to return if there is an error
181
- noteworthy, rationale = None, None
182
- if not old_revision or not new_revision:
183
- return noteworthy, rationale
184
-
185
- try:
186
- # Run classifier model
187
- result = classifier(old_revision, new_revision, prompt_style=prompt_style)
188
- if result:
189
- noteworthy = result.get("noteworthy", None)
190
- rationale = result.get("rationale", "")
191
- else:
192
- error_msg = f"Error: Could not get {prompt_style} model result"
193
- raise gr.Error(error_msg, print_exception=False)
194
-
195
- except Exception as e:
196
- error_msg = f"Error running model: {str(e)}"
197
- raise gr.Error(error_msg, print_exception=False)
198
-
199
- return noteworthy, rationale
200
-
201
-
202
  def run_heuristic_classifier(old_revision: str, new_revision: str, context=None):
203
  with logfire.attach_context(context) if context else nullcontext():
204
  return _run_heuristic_classifier(old_revision, new_revision)
205
 
206
 
207
- @logfire.instrument("Run heuristic classifier")
208
- def _run_heuristic_classifier(old_revision: str, new_revision: str):
209
- return run_classifier(old_revision, new_revision, prompt_style="heuristic")
210
-
211
-
212
  def run_fewshot_classifier(old_revision: str, new_revision: str, context=None):
213
  with logfire.attach_context(context) if context else nullcontext():
214
  return _run_fewshot_classifier(old_revision, new_revision)
215
 
216
 
217
- @logfire.instrument("Run few-shot classifier")
218
- def _run_fewshot_classifier(old_revision: str, new_revision: str):
219
- return run_classifier(old_revision, new_revision, prompt_style="few-shot")
220
-
221
-
222
- def compute_confidence(
223
- heuristic_noteworthy,
224
- fewshot_noteworthy,
225
- judge_noteworthy,
226
- heuristic_rationale,
227
- fewshot_rationale,
228
- judge_reasoning,
229
- ):
230
- """
231
- Compute a confidence label using the noteworthy booleans.
232
- """
233
- # Return None if any of the rationales or reasoning is missing.
234
- if not heuristic_rationale or not fewshot_rationale or not judge_reasoning:
235
- return None
236
- if heuristic_noteworthy == fewshot_noteworthy == judge_noteworthy:
237
- # Classifiers and judge all agree
238
- return "High"
239
- elif heuristic_noteworthy != fewshot_noteworthy:
240
- # Classifiers disagree, judge decides
241
- return "Moderate"
242
- else:
243
- # Classifiers agree, judge vetoes
244
- return "Questionable"
245
-
246
-
247
  def run_judge(
248
  old_revision: str,
249
  new_revision: str,
@@ -266,89 +82,34 @@ def run_judge(
266
  )
267
 
268
 
269
- @logfire.instrument("Run judge")
270
- def _run_judge(
271
- old_revision: str,
272
- new_revision: str,
273
- heuristic_noteworthy: bool,
274
- fewshot_noteworthy: bool,
275
- heuristic_rationale: str,
276
- fewshot_rationale: str,
277
- judge_mode: str,
278
- ):
279
- """
280
- Run judge on the revisions and classifiers' rationales.
281
-
282
- Args:
283
- old_revision: Old revision text
284
- new_revision: New revision text
285
- heuristic_rationale: Heuristic model's rationale
286
- fewshot_rationale: Few-shot model's rationale
287
- judge_mode: Mode for judge function ("unaligned", "aligned-fewshot", "aligned-heuristic")
288
-
289
- Returns:
290
- Tuple of (noteworthy, noteworthy_text, reasoning, confidence) (bool, str, str, str)
291
- """
292
-
293
- # Values to return if there is an error
294
- noteworthy, noteworthy_text, reasoning, confidence = None, None, None, None
295
- if (
296
- not old_revision
297
- or not new_revision
298
- or not heuristic_rationale
299
- or not fewshot_rationale
300
- ):
301
- return noteworthy, noteworthy_text, reasoning, confidence
302
-
303
- try:
304
- # Run judge
305
- result = judge(
306
- old_revision,
307
- new_revision,
308
- heuristic_rationale,
309
- fewshot_rationale,
310
- mode=judge_mode,
311
- )
312
- if result:
313
- noteworthy = result.get("noteworthy", "")
314
- reasoning = result.get("reasoning", "")
315
- else:
316
- error_msg = f"Error: Could not get judge's result"
317
- raise gr.Error(error_msg, print_exception=False)
318
-
319
- except Exception as e:
320
- error_msg = f"Error running judge: {str(e)}"
321
- raise gr.Error(error_msg, print_exception=False)
322
-
323
- # Format noteworthy label (boolean) as text
324
- if not reasoning:
325
- noteworthy_text = None
326
- else:
327
- noteworthy_text = str(noteworthy)
328
-
329
- # Get confidence score
330
- confidence = compute_confidence(
331
- heuristic_noteworthy,
332
- fewshot_noteworthy,
333
- noteworthy,
334
- heuristic_rationale,
335
- fewshot_rationale,
336
- reasoning,
337
- )
338
-
339
- return noteworthy, noteworthy_text, reasoning, confidence
340
-
341
-
342
  # Create Gradio interface
343
  with gr.Blocks(title="Noteworthy Differences") as demo:
344
  with gr.Row():
345
  gr.Markdown(
346
  """
347
- Compare current and old revisions of a Wikipedia article - you choose the number of revisions or days behind.<br>
348
- Two classifier models (with heuristic and few-shot prompts) and a judge predict the noteworthiness of the differences.<br>
349
- The judge was aligned with human preferences as described in the
350
- [GitHub repository](https://github.com/jedick/noteworthy-differences).
351
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
  )
353
 
354
  with gr.Row():
@@ -357,9 +118,9 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
357
  )
358
  number_input = gr.Number(label="Number", value=50, minimum=0, precision=0)
359
  units_dropdown = gr.Dropdown(
360
- choices=["revisions", "days"], value="revisions", label="Unit"
361
  )
362
- judge_mode_dropdown = gr.Dropdown(
363
  choices=["unaligned", "aligned-fewshot", "aligned-heuristic"],
364
  value="aligned-heuristic",
365
  label="Judge Mode",
@@ -397,17 +158,17 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
397
  with gr.Column():
398
  gr.Markdown("### Model Output")
399
  heuristic_rationale = gr.Textbox(
400
- label="Heuristic Model's Rationale",
401
  lines=2,
402
  max_lines=7,
403
  )
404
  fewshot_rationale = gr.Textbox(
405
- label="Few-shot Model's Rationale",
406
  lines=2,
407
  max_lines=7,
408
  )
409
  judge_reasoning = gr.Textbox(
410
- label="Judge's Reasoning",
411
  lines=2,
412
  max_lines=7,
413
  )
@@ -424,16 +185,10 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
424
  )
425
  rerun_btn = gr.Button("Rerun Model")
426
 
427
- gr.Markdown("### Your feedback")
428
- feedback_status = gr.Textbox(
429
- label="",
430
- lines=1,
431
- interactive=False,
432
- visible=True,
433
- )
434
  with gr.Row():
435
  thumbs_up_btn = gr.Button("👍 Agree", variant="primary")
436
- thumbs_down_btn = gr.Button("👎 Disagree", variant="secondary")
437
 
438
  # States to store boolean values
439
  heuristic_noteworthy = gr.State()
@@ -491,7 +246,7 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
491
  fewshot_noteworthy,
492
  heuristic_rationale,
493
  fewshot_rationale,
494
- judge_mode_dropdown,
495
  context,
496
  ],
497
  outputs=[judge_noteworthy, noteworthy_text, judge_reasoning, confidence],
@@ -519,7 +274,7 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
519
  fewshot_noteworthy,
520
  heuristic_rationale,
521
  fewshot_rationale,
522
- judge_mode_dropdown,
523
  context,
524
  ],
525
  outputs=[judge_noteworthy, noteworthy_text, judge_reasoning, confidence],
@@ -533,7 +288,7 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
533
  title_input,
534
  number_input,
535
  units_dropdown,
536
- judge_mode_dropdown,
537
  old_revision,
538
  new_revision,
539
  old_timestamp,
@@ -547,7 +302,6 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
547
  fewshot_noteworthy,
548
  judge_noteworthy,
549
  ],
550
- outputs=[feedback_status],
551
  api_name=False,
552
  )
553
 
@@ -557,7 +311,7 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
557
  title_input,
558
  number_input,
559
  units_dropdown,
560
- judge_mode_dropdown,
561
  old_revision,
562
  new_revision,
563
  old_timestamp,
@@ -571,7 +325,6 @@ with gr.Blocks(title="Noteworthy Differences") as demo:
571
  fewshot_noteworthy,
572
  judge_noteworthy,
573
  ],
574
- outputs=[feedback_status],
575
  api_name=False,
576
  )
577
 
@@ -580,5 +333,15 @@ if __name__ == "__main__":
580
  # Setup theme without background image
581
  theme = gr.Theme.from_hub("NoCrypt/miku")
582
  theme.set(body_background_fill="#FFFFFF", body_background_fill_dark="#000000")
 
 
 
 
 
 
 
 
 
 
583
 
584
- demo.launch(theme=theme)
 
1
  import gradio as gr
2
+ from wiki_data_fetcher import get_random_wikipedia_title
 
 
 
 
 
 
 
3
  from feedback import save_feedback_agree, save_feedback_disagree
4
  from contextlib import nullcontext
5
  from dotenv import load_dotenv
 
11
  # Setup logging with Logfire
12
  logfire.configure()
13
 
14
+ # This goes after logfire.configure() to avoid
15
  # LogfireNotConfiguredWarning: Instrumentation will have no effect
16
+ from app_functions import (
17
+ _fetch_current_revision,
18
+ _fetch_previous_revision,
19
+ _run_heuristic_classifier,
20
+ _run_fewshot_classifier,
21
+ _run_judge,
22
+ )
23
 
24
 
25
  def start_parent_span(title: str, number: int, units: str):
 
37
  def fetch_current_revision(title: str, context=None):
38
  """
39
  Wrapper to run _fetch_current_revision in provided Logfire context.
40
+ We use a wrapper to minimize indentation in the called function.
41
  """
42
  with logfire.attach_context(context) if context else nullcontext():
43
  return _fetch_current_revision(title)
44
 
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  def fetch_previous_revision(
47
  title: str, number: int, units: str, new_revision: str, context=None
48
  ):
 
50
  return _fetch_previous_revision(title, number, units, new_revision)
51
 
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  def run_heuristic_classifier(old_revision: str, new_revision: str, context=None):
54
  with logfire.attach_context(context) if context else nullcontext():
55
  return _run_heuristic_classifier(old_revision, new_revision)
56
 
57
 
 
 
 
 
 
58
  def run_fewshot_classifier(old_revision: str, new_revision: str, context=None):
59
  with logfire.attach_context(context) if context else nullcontext():
60
  return _run_fewshot_classifier(old_revision, new_revision)
61
 
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  def run_judge(
64
  old_revision: str,
65
  new_revision: str,
 
82
  )
83
 
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  # Create Gradio interface
86
  with gr.Blocks(title="Noteworthy Differences") as demo:
87
  with gr.Row():
88
  gr.Markdown(
89
  """
90
+ <table>
91
+ <colgroup>
92
+ <col span="1" style="width: 30%;">
93
+ <col span="1" style="width: 25%;">
94
+ <col span="1" style="width: 45%;">
95
+ </colgroup>
96
+ <tr>
97
+ <td>
98
+ <i class="fa-brands fa-wikipedia-w"></i> Compare current and old revisions of a Wikipedia article.<br>
99
+ 📅 You choose the number of revisions or days behind.
100
+ </td>
101
+ <td>
102
+ ◇ ∴ ⚖ Two classifier models and a judge predict the noteworthiness of the differences.
103
+ </td>
104
+ <td>
105
+ <i class="fa-brands fa-github"></i> The <a href="https://github.com/jedick/noteworthy-differences">GitHub repository</a> describes how the judge was aligned with human preferences.<br>
106
+ 👥 The <a href="https://huggingface.co/datasets/jedick/noteworthy-differences-feedback">feedback dataset</a> holds all user feedback collected to date.
107
+ </td>
108
+ </tr>
109
+ </table>
110
+
111
+ """,
112
+ elem_id="intro_table",
113
  )
114
 
115
  with gr.Row():
 
118
  )
119
  number_input = gr.Number(label="Number", value=50, minimum=0, precision=0)
120
  units_dropdown = gr.Dropdown(
121
+ choices=["revisions", "days"], value="revisions", label="Units"
122
  )
123
+ judge_mode = gr.Dropdown(
124
  choices=["unaligned", "aligned-fewshot", "aligned-heuristic"],
125
  value="aligned-heuristic",
126
  label="Judge Mode",
 
158
  with gr.Column():
159
  gr.Markdown("### Model Output")
160
  heuristic_rationale = gr.Textbox(
161
+ label="Heuristic Model's Rationale",
162
  lines=2,
163
  max_lines=7,
164
  )
165
  fewshot_rationale = gr.Textbox(
166
+ label="Few-shot Model's Rationale",
167
  lines=2,
168
  max_lines=7,
169
  )
170
  judge_reasoning = gr.Textbox(
171
+ label="Judge's Reasoning",
172
  lines=2,
173
  max_lines=7,
174
  )
 
185
  )
186
  rerun_btn = gr.Button("Rerun Model")
187
 
188
+ gr.Markdown("### 👥 Your feedback")
 
 
 
 
 
 
189
  with gr.Row():
190
  thumbs_up_btn = gr.Button("👍 Agree", variant="primary")
191
+ thumbs_down_btn = gr.Button("👎 Disagree", variant="primary")
192
 
193
  # States to store boolean values
194
  heuristic_noteworthy = gr.State()
 
246
  fewshot_noteworthy,
247
  heuristic_rationale,
248
  fewshot_rationale,
249
+ judge_mode,
250
  context,
251
  ],
252
  outputs=[judge_noteworthy, noteworthy_text, judge_reasoning, confidence],
 
274
  fewshot_noteworthy,
275
  heuristic_rationale,
276
  fewshot_rationale,
277
+ judge_mode,
278
  context,
279
  ],
280
  outputs=[judge_noteworthy, noteworthy_text, judge_reasoning, confidence],
 
288
  title_input,
289
  number_input,
290
  units_dropdown,
291
+ judge_mode,
292
  old_revision,
293
  new_revision,
294
  old_timestamp,
 
302
  fewshot_noteworthy,
303
  judge_noteworthy,
304
  ],
 
305
  api_name=False,
306
  )
307
 
 
311
  title_input,
312
  number_input,
313
  units_dropdown,
314
+ judge_mode,
315
  old_revision,
316
  new_revision,
317
  old_timestamp,
 
325
  fewshot_noteworthy,
326
  judge_noteworthy,
327
  ],
 
328
  api_name=False,
329
  )
330
 
 
333
  # Setup theme without background image
334
  theme = gr.Theme.from_hub("NoCrypt/miku")
335
  theme.set(body_background_fill="#FFFFFF", body_background_fill_dark="#000000")
336
+ # Define the HTML for Font Awesome
337
+ head = '<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css" rel="stylesheet">'
338
+ # Use CSS to style table
339
+ css = """
340
+ #intro_table {background-color: #ecfdf5}
341
+ table, tr, td {
342
+ border: none; /* Removes all borders */
343
+ border-collapse: collapse; /* Ensures no gaps between cells */
344
+ }
345
+ """
346
 
347
+ demo.launch(theme=theme, head=head, css=css)
app_functions.py ADDED
@@ -0,0 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from wiki_data_fetcher import (
2
+ get_previous_revisions,
3
+ get_revision_from_age,
4
+ get_wikipedia_introduction,
5
+ extract_revision_info,
6
+ get_revisions_behind,
7
+ )
8
+ from models import classifier, judge
9
+ import gradio as gr
10
+ import logfire
11
+
12
+
13
+ @logfire.instrument("Fetch current revision")
14
+ def _fetch_current_revision(title: str):
15
+ """
16
+ Fetch current revision of a Wikipedia article and return its introduction.
17
+
18
+ Args:
19
+ title: Wikipedia article title
20
+
21
+ Returns:
22
+ Tuple of (introduction, timestamp)
23
+ """
24
+ if not title or not title.strip():
25
+ error_msg = "Please enter a Wikipedia page title."
26
+ raise gr.Error(error_msg, print_exception=False)
27
+ return None, None
28
+
29
+ try:
30
+ # Get current revision (revision 0)
31
+ json_data = get_previous_revisions(title, revisions=0)
32
+ revision_info = extract_revision_info(json_data, revnum=0)
33
+
34
+ if not revision_info.get("revid"):
35
+ error_msg = f"Error: Could not find Wikipedia page '{title}'. Please check the title."
36
+ raise gr.Error(error_msg, print_exception=False)
37
+ return None, None
38
+
39
+ revid = revision_info["revid"]
40
+ timestamp = revision_info["timestamp"]
41
+
42
+ # Get introduction
43
+ introduction = get_wikipedia_introduction(revid)
44
+
45
+ if introduction is None:
46
+ introduction = f"Error: Could not retrieve introduction for current revision (revid: {revid})"
47
+
48
+ # Format timestamp for display
49
+ timestamp = f"**Timestamp:** {timestamp}" if timestamp else ""
50
+
51
+ # Return introduction text and timestamp
52
+ return introduction, timestamp
53
+
54
+ except Exception as e:
55
+ error_msg = f"Error occurred: {str(e)}"
56
+ raise gr.Error(error_msg, print_exception=False)
57
+ return None, None
58
+
59
+
60
+ @logfire.instrument("Fetch previous revision")
61
+ def _fetch_previous_revision(title: str, number: int, units: str, new_revision: str):
62
+ """
63
+ Fetch previous revision of a Wikipedia article and return its introduction.
64
+
65
+ Args:
66
+ title: Wikipedia article title
67
+ number: Number of revisions or days behind
68
+ units: "revisions" or "days"
69
+
70
+ Returns:
71
+ Tuple of (introduction, timestamp)
72
+ """
73
+
74
+ # If we get here with an empty new revision, then an error should have been raised
75
+ # in fetch_current_revision, so just return empty values without raising another error
76
+ if not new_revision:
77
+ return None, None
78
+
79
+ try:
80
+ # Get previous revision based on units
81
+ if units == "revisions":
82
+ json_data = get_previous_revisions(title, revisions=number)
83
+ revision_info = extract_revision_info(json_data, revnum=number)
84
+ else: # units == "days"
85
+ revision_info = get_revision_from_age(title, age_days=number)
86
+
87
+ if not revision_info.get("revid"):
88
+ error_msg = f"Error: Could not find revision {number} {'revisions' if units == 'revisions' else 'days'} behind for '{title}'."
89
+ raise gr.Error(error_msg, print_exception=False)
90
+ return None, None
91
+
92
+ revid = revision_info["revid"]
93
+ timestamp = revision_info["timestamp"]
94
+
95
+ # Get introduction
96
+ introduction = get_wikipedia_introduction(revid)
97
+
98
+ if introduction is None:
99
+ introduction = f"Error: Could not retrieve introduction for previous revision (revid: {revid})"
100
+
101
+ # Get revisions_behind
102
+ if units == "revisions":
103
+ revisions_behind = revision_info["revnum"]
104
+ else:
105
+ revisions_behind = get_revisions_behind(title, revid)
106
+ # For a negative number, replace the negative sign with ">"
107
+ if revisions_behind < 0:
108
+ revisions_behind = str(revisions_behind).replace("-", ">")
109
+
110
+ # Format timestamp for display
111
+ timestamp = (
112
+ f"**Timestamp:** {timestamp}, {revisions_behind} revisions behind"
113
+ if timestamp
114
+ else ""
115
+ )
116
+
117
+ # Return introduction text and timestamp
118
+ return introduction, timestamp
119
+
120
+ except Exception as e:
121
+ error_msg = f"Error occurred: {str(e)}"
122
+ raise gr.Error(error_msg, print_exception=False)
123
+ return None, None
124
+
125
+
126
+ def run_classifier(old_revision: str, new_revision: str, prompt_style: str):
127
+ """
128
+ Run a classification model on the revisions.
129
+
130
+ Args:
131
+ old_revision: Old revision text
132
+ new_revision: New revision text
133
+ prompt_style: heuristic or few-shot
134
+
135
+ Returns:
136
+ Tuple of (noteworthy, rationale) (bool, str)
137
+ """
138
+
139
+ # Values to return if there is an error
140
+ noteworthy, rationale = None, None
141
+ if not old_revision or not new_revision:
142
+ return noteworthy, rationale
143
+
144
+ try:
145
+ # Run classifier model
146
+ result = classifier(old_revision, new_revision, prompt_style=prompt_style)
147
+ if result:
148
+ noteworthy = result.get("noteworthy", None)
149
+ rationale = result.get("rationale", "")
150
+ else:
151
+ error_msg = f"Error: Could not get {prompt_style} model result"
152
+ raise gr.Error(error_msg, print_exception=False)
153
+
154
+ except Exception as e:
155
+ error_msg = f"Error running model: {str(e)}"
156
+ raise gr.Error(error_msg, print_exception=False)
157
+
158
+ return noteworthy, rationale
159
+
160
+
161
+ @logfire.instrument("Run heuristic classifier")
162
+ def _run_heuristic_classifier(old_revision: str, new_revision: str):
163
+ return run_classifier(old_revision, new_revision, prompt_style="heuristic")
164
+
165
+
166
+ @logfire.instrument("Run few-shot classifier")
167
+ def _run_fewshot_classifier(old_revision: str, new_revision: str):
168
+ return run_classifier(old_revision, new_revision, prompt_style="few-shot")
169
+
170
+
171
+ def compute_confidence(
172
+ heuristic_noteworthy,
173
+ fewshot_noteworthy,
174
+ judge_noteworthy,
175
+ heuristic_rationale,
176
+ fewshot_rationale,
177
+ judge_reasoning,
178
+ ):
179
+ """
180
+ Compute a confidence label using the noteworthy booleans.
181
+ """
182
+ # Return None if any of the rationales or reasoning is missing.
183
+ if not heuristic_rationale or not fewshot_rationale or not judge_reasoning:
184
+ return None
185
+ if heuristic_noteworthy == fewshot_noteworthy == judge_noteworthy:
186
+ # Classifiers and judge all agree
187
+ return "High"
188
+ elif heuristic_noteworthy != fewshot_noteworthy:
189
+ # Classifiers disagree, judge decides
190
+ return "Moderate"
191
+ else:
192
+ # Classifiers agree, judge vetoes
193
+ return "Questionable"
194
+
195
+
196
+ @logfire.instrument("Run judge")
197
+ def _run_judge(
198
+ old_revision: str,
199
+ new_revision: str,
200
+ heuristic_noteworthy: bool,
201
+ fewshot_noteworthy: bool,
202
+ heuristic_rationale: str,
203
+ fewshot_rationale: str,
204
+ judge_mode: str,
205
+ ):
206
+ """
207
+ Run judge on the revisions and classifiers' rationales.
208
+
209
+ Args:
210
+ old_revision: Old revision text
211
+ new_revision: New revision text
212
+ heuristic_noteworthy: Heuristic model's noteworthiness prediction
213
+ fewshot_noteworthy: Few-shot model's noteworthiness prediction
214
+ heuristic_rationale: Heuristic model's rationale
215
+ fewshot_rationale: Few-shot model's rationale
216
+ judge_mode: Mode for judge function ("unaligned", "aligned-fewshot", "aligned-heuristic")
217
+
218
+ Returns:
219
+ Tuple of (noteworthy, noteworthy_text, reasoning, confidence) (bool, str, str, str)
220
+ """
221
+
222
+ print(f"old_revision: {old_revision}")
223
+ print(f"new_revision: {new_revision}")
224
+ print(f"judge_mode: {judge_mode}")
225
+
226
+ # Values to return if there is an error
227
+ noteworthy, noteworthy_text, reasoning, confidence = None, None, None, None
228
+ if (
229
+ not old_revision
230
+ or not new_revision
231
+ or not heuristic_rationale
232
+ or not fewshot_rationale
233
+ ):
234
+ return noteworthy, noteworthy_text, reasoning, confidence
235
+
236
+ try:
237
+ # Run judge
238
+ result = judge(
239
+ old_revision,
240
+ new_revision,
241
+ heuristic_rationale,
242
+ fewshot_rationale,
243
+ mode=judge_mode,
244
+ )
245
+ if result:
246
+ noteworthy = result.get("noteworthy", "")
247
+ reasoning = result.get("reasoning", "")
248
+ else:
249
+ error_msg = f"Error: Could not get judge's result"
250
+ raise gr.Error(error_msg, print_exception=False)
251
+
252
+ except Exception as e:
253
+ error_msg = f"Error running judge: {str(e)}"
254
+ raise gr.Error(error_msg, print_exception=False)
255
+
256
+ # Format noteworthy label (boolean) as text
257
+ if not reasoning:
258
+ noteworthy_text = None
259
+ else:
260
+ noteworthy_text = str(noteworthy)
261
+
262
+ # Get confidence score
263
+ confidence = compute_confidence(
264
+ heuristic_noteworthy,
265
+ fewshot_noteworthy,
266
+ noteworthy,
267
+ heuristic_rationale,
268
+ fewshot_rationale,
269
+ reasoning,
270
+ )
271
+
272
+ return noteworthy, noteworthy_text, reasoning, confidence
feedback.py CHANGED
@@ -1,4 +1,5 @@
1
  from huggingface_hub import HfApi, CommitScheduler
 
2
  from datetime import datetime
3
  from pathlib import Path
4
  import gradio as gr
@@ -6,6 +7,9 @@ import logfire
6
  import json
7
  import os
8
 
 
 
 
9
  # Set repo ID for Hugging Face dataset
10
  REPO_ID = "jedick/noteworthy-differences-feedback"
11
  # Setup user feedback file for uploading to HF dataset
@@ -64,7 +68,7 @@ def save_feedback(*args, feedback_value: str) -> None:
64
  with feedback_path.open("a") as f:
65
  f.write(json.dumps(feedback_dict))
66
  f.write("\n")
67
- gr.Success(f"Saved your feedback: {feedback_value}", duration=2, title="Thank you!")
68
 
69
 
70
  @logfire.instrument("Save feedback: agree")
 
1
  from huggingface_hub import HfApi, CommitScheduler
2
+ from dotenv import load_dotenv
3
  from datetime import datetime
4
  from pathlib import Path
5
  import gradio as gr
 
7
  import json
8
  import os
9
 
10
+ # Load API keys
11
+ load_dotenv()
12
+
13
  # Set repo ID for Hugging Face dataset
14
  REPO_ID = "jedick/noteworthy-differences-feedback"
15
  # Setup user feedback file for uploading to HF dataset
 
68
  with feedback_path.open("a") as f:
69
  f.write(json.dumps(feedback_dict))
70
  f.write("\n")
71
+ gr.Success(f"Saved feedback: <strong>{feedback_value}</strong>")
72
 
73
 
74
  @logfire.instrument("Save feedback: agree")