jedick commited on
Commit
d790ca4
·
1 Parent(s): 48c27bb

Update for Gradio 6

Browse files
Files changed (2) hide show
  1. app.py +63 -86
  2. requirements.txt +1 -1
app.py CHANGED
@@ -180,16 +180,43 @@ def run_fewshot_classifier(old_revision: str, new_revision: str):
180
  return run_classifier(old_revision, new_revision, prompt_style="few-shot")
181
 
182
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  @logfire.instrument("Step 4: Run judge")
184
  def run_judge(
185
  old_revision: str,
186
  new_revision: str,
187
- heuristic_rationale,
188
- fewshot_rationale,
 
 
189
  judge_mode: str,
190
  ):
191
  """
192
- Run classification models and judge on the revisions.
193
 
194
  Args:
195
  old_revision: Old revision text
@@ -199,18 +226,18 @@ def run_judge(
199
  judge_mode: Mode for judge function ("unaligned", "aligned-fewshot", "aligned-heuristic")
200
 
201
  Returns:
202
- Tuple of (noteworthy, reasoning) (bool, str)
203
  """
204
 
205
  # Values to return if there is an error
206
- noteworthy, reasoning = None, None
207
  if (
208
  not old_revision
209
  or not new_revision
210
  or not heuristic_rationale
211
  or not fewshot_rationale
212
  ):
213
- return noteworthy, reasoning
214
 
215
  try:
216
  # Run judge
@@ -232,52 +259,27 @@ def run_judge(
232
  error_msg = f"Error running judge: {str(e)}"
233
  raise gr.Error(error_msg, print_exception=False)
234
 
235
- return noteworthy, reasoning
236
-
237
-
238
- def format_noteworthy(noteworthy, reasoning):
239
- """
240
- Format judge's noteworthy label as text.
241
- """
242
  if not reasoning:
243
- # If the reasoning is empty, return nothing
244
- return None
245
- else:
246
- # Format noteworthy boolean as text
247
- return str(noteworthy)
248
-
249
-
250
- def compute_confidence(
251
- heuristic_noteworthy,
252
- fewshot_noteworthy,
253
- judge_noteworthy,
254
- heuristic_rationale,
255
- fewshot_rationale,
256
- judge_reasoning,
257
- ):
258
- """
259
- Compute a confidence label using the noteworthy booleans.
260
- """
261
- # Return None if any of the rationales or reasoning is missing.
262
- if not heuristic_rationale or not fewshot_rationale or not judge_reasoning:
263
- return None
264
- if heuristic_noteworthy == fewshot_noteworthy == judge_noteworthy:
265
- # Classifiers and judge all agree
266
- return "High"
267
- elif heuristic_noteworthy != fewshot_noteworthy:
268
- # Classifiers disagree, judge decides
269
- return "Moderate"
270
  else:
271
- # Classifiers agree, judge vetoes
272
- return "Questionable"
 
 
 
 
 
 
 
 
 
273
 
 
274
 
275
- # Setup theme without background image
276
- theme = gr.Theme.from_hub("NoCrypt/miku")
277
- theme.set(body_background_fill="#FFFFFF", body_background_fill_dark="#000000")
278
 
279
  # Create Gradio interface
280
- with gr.Blocks(theme=theme, title="Noteworthy Differences") as demo:
281
  with gr.Row():
282
  with gr.Column(scale=2):
283
  gr.Markdown(
@@ -360,12 +362,12 @@ with gr.Blocks(theme=theme, title="Noteworthy Differences") as demo:
360
  lines=1,
361
  interactive=False,
362
  )
363
- rerun_btn = gr.Button("Rerun Model", variant="primary")
364
 
365
- # Hidden checkboxes to store boolean values
366
- heuristic_noteworthy = gr.Checkbox(visible=False)
367
- fewshot_noteworthy = gr.Checkbox(visible=False)
368
- judge_noteworthy = gr.Checkbox(visible=False)
369
 
370
  random_btn.click(
371
  fn=get_random_wikipedia_title,
@@ -407,28 +409,13 @@ with gr.Blocks(theme=theme, title="Noteworthy Differences") as demo:
407
  inputs=[
408
  old_revision,
409
  new_revision,
410
- heuristic_rationale,
411
- fewshot_rationale,
412
- judge_mode_dropdown,
413
- ],
414
- outputs=[judge_noteworthy, judge_reasoning],
415
- api_name=False,
416
- ).then(
417
- fn=format_noteworthy,
418
- inputs=[judge_noteworthy, judge_reasoning],
419
- outputs=[noteworthy_text],
420
- api_name=False,
421
- ).then(
422
- fn=compute_confidence,
423
- inputs=[
424
  heuristic_noteworthy,
425
  fewshot_noteworthy,
426
- judge_noteworthy,
427
  heuristic_rationale,
428
  fewshot_rationale,
429
- judge_reasoning,
430
  ],
431
- outputs=[confidence],
432
  api_name=False,
433
  )
434
 
@@ -449,30 +436,20 @@ with gr.Blocks(theme=theme, title="Noteworthy Differences") as demo:
449
  inputs=[
450
  old_revision,
451
  new_revision,
452
- heuristic_rationale,
453
- fewshot_rationale,
454
- judge_mode_dropdown,
455
- ],
456
- outputs=[judge_noteworthy, judge_reasoning],
457
- api_name=False,
458
- ).then(
459
- fn=format_noteworthy,
460
- inputs=[judge_noteworthy, judge_reasoning],
461
- outputs=[noteworthy_text],
462
- api_name=False,
463
- ).then(
464
- fn=compute_confidence,
465
- inputs=[
466
  heuristic_noteworthy,
467
  fewshot_noteworthy,
468
- judge_noteworthy,
469
  heuristic_rationale,
470
  fewshot_rationale,
471
- judge_reasoning,
472
  ],
473
- outputs=[confidence],
474
  api_name=False,
475
  )
476
 
477
  if __name__ == "__main__":
478
- demo.launch()
 
 
 
 
 
 
180
  return run_classifier(old_revision, new_revision, prompt_style="few-shot")
181
 
182
 
183
+ def compute_confidence(
184
+ heuristic_noteworthy,
185
+ fewshot_noteworthy,
186
+ judge_noteworthy,
187
+ heuristic_rationale,
188
+ fewshot_rationale,
189
+ judge_reasoning,
190
+ ):
191
+ """
192
+ Compute a confidence label using the noteworthy booleans.
193
+ """
194
+ # Return None if any of the rationales or reasoning is missing.
195
+ if not heuristic_rationale or not fewshot_rationale or not judge_reasoning:
196
+ return None
197
+ if heuristic_noteworthy == fewshot_noteworthy == judge_noteworthy:
198
+ # Classifiers and judge all agree
199
+ return "High"
200
+ elif heuristic_noteworthy != fewshot_noteworthy:
201
+ # Classifiers disagree, judge decides
202
+ return "Moderate"
203
+ else:
204
+ # Classifiers agree, judge vetoes
205
+ return "Questionable"
206
+
207
+
208
  @logfire.instrument("Step 4: Run judge")
209
  def run_judge(
210
  old_revision: str,
211
  new_revision: str,
212
+ heuristic_noteworthy: bool,
213
+ fewshot_noteworthy: bool,
214
+ heuristic_rationale: str,
215
+ fewshot_rationale: str,
216
  judge_mode: str,
217
  ):
218
  """
219
+ Run judge on the revisions and classifiers' rationales.
220
 
221
  Args:
222
  old_revision: Old revision text
 
226
  judge_mode: Mode for judge function ("unaligned", "aligned-fewshot", "aligned-heuristic")
227
 
228
  Returns:
229
+ Tuple of (noteworthy, noteworthy_text, reasoning, confidence) (bool, str, str, str)
230
  """
231
 
232
  # Values to return if there is an error
233
+ noteworthy, noteworthy_text, reasoning, confidence = None, None, None, None
234
  if (
235
  not old_revision
236
  or not new_revision
237
  or not heuristic_rationale
238
  or not fewshot_rationale
239
  ):
240
+ return noteworthy, noteworthy_text, reasoning, confidence
241
 
242
  try:
243
  # Run judge
 
259
  error_msg = f"Error running judge: {str(e)}"
260
  raise gr.Error(error_msg, print_exception=False)
261
 
262
+ # Format noteworthy label (boolean) as text
 
 
 
 
 
 
263
  if not reasoning:
264
+ noteworthy_text = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  else:
266
+ noteworthy_text = str(noteworthy)
267
+
268
+ # Get confidence score
269
+ confidence = compute_confidence(
270
+ heuristic_noteworthy,
271
+ fewshot_noteworthy,
272
+ noteworthy,
273
+ heuristic_rationale,
274
+ fewshot_rationale,
275
+ reasoning,
276
+ )
277
 
278
+ return noteworthy, noteworthy_text, reasoning, confidence
279
 
 
 
 
280
 
281
  # Create Gradio interface
282
+ with gr.Blocks(title="Noteworthy Differences") as demo:
283
  with gr.Row():
284
  with gr.Column(scale=2):
285
  gr.Markdown(
 
362
  lines=1,
363
  interactive=False,
364
  )
365
+ rerun_btn = gr.Button("Rerun Model")
366
 
367
+ # States to store boolean values
368
+ heuristic_noteworthy = gr.State()
369
+ fewshot_noteworthy = gr.State()
370
+ judge_noteworthy = gr.State()
371
 
372
  random_btn.click(
373
  fn=get_random_wikipedia_title,
 
409
  inputs=[
410
  old_revision,
411
  new_revision,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
  heuristic_noteworthy,
413
  fewshot_noteworthy,
 
414
  heuristic_rationale,
415
  fewshot_rationale,
416
+ judge_mode_dropdown,
417
  ],
418
+ outputs=[judge_noteworthy, noteworthy_text, judge_reasoning, confidence],
419
  api_name=False,
420
  )
421
 
 
436
  inputs=[
437
  old_revision,
438
  new_revision,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
439
  heuristic_noteworthy,
440
  fewshot_noteworthy,
 
441
  heuristic_rationale,
442
  fewshot_rationale,
443
+ judge_mode_dropdown,
444
  ],
445
+ outputs=[judge_noteworthy, noteworthy_text, judge_reasoning, confidence],
446
  api_name=False,
447
  )
448
 
449
  if __name__ == "__main__":
450
+
451
+ # Setup theme without background image
452
+ theme = gr.Theme.from_hub("NoCrypt/miku")
453
+ theme.set(body_background_fill="#FFFFFF", body_background_fill_dark="#000000")
454
+
455
+ demo.launch(theme=theme)
requirements.txt CHANGED
@@ -2,7 +2,7 @@ google-genai
2
  pydantic
3
  pandas
4
  dotenv
5
- gradio
6
  requests
7
  logfire
8
  opentelemetry-instrumentation-google-genai
 
2
  pydantic
3
  pandas
4
  dotenv
5
+ gradio>=6.0.1
6
  requests
7
  logfire
8
  opentelemetry-instrumentation-google-genai