Sina1138 commited on
Commit
8bcd5eb
·
1 Parent(s): 4cd1bc5

Enhance score normalization in interactive review processor: implement robust median-centered, IQR-scaled clipping to improve color scale handling

Browse files
interface/Demo.py CHANGED
@@ -2,17 +2,23 @@ import sys, os.path
2
  from pathlib import Path
3
  from typing import Tuple, Dict
4
  import json
5
-
6
  import torch
7
- sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
8
-
9
- BASE_DIR = Path(__file__).resolve().parent.parent
10
-
11
  import gradio as gr
12
  import pandas as pd
13
  import ast
14
  from tqdm import tqdm
15
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Auto-detect the preprocessed dataset CSV
17
  def _find_preprocessed_csv() -> Path:
18
  """Find the most recent preprocessed_scored_reviews_*.csv in the data dir."""
@@ -449,16 +455,24 @@ def compute_rsa_in_background(rsa_state: Dict, current_focus: str, progress=gr.P
449
  progress(0.50, desc="Running RSA reranking...")
450
  consensuality_map = processor.predict_consensuality(*active_texts)
451
 
452
- # Calculate most common and unique
453
  if consensuality_map:
454
  import pandas as _pd
455
  scores_series = _pd.Series(consensuality_map)
456
- most_common_text = "\n".join(scores_series.nlargest(3).index.tolist())
457
- most_unique_text = "\n".join(scores_series.nsmallest(3).index.tolist())
458
  else:
459
  most_common_text = ""
460
  most_unique_text = ""
461
 
 
 
 
 
 
 
 
 
462
  progress(0.90, desc="Formatting agreement results...")
463
 
464
  fmt = processor.format_highlighted_output
@@ -466,7 +480,7 @@ def compute_rsa_in_background(rsa_state: Dict, current_focus: str, progress=gr.P
466
  agree_out = []
467
  for i in range(MAX_INTERACTIVE_REVIEWS):
468
  if i < len(sentence_lists):
469
- agree_out.append(gr.update(visible=show_agreement, value=fmt(sentence_lists[i], consensuality_map, "consensuality")))
470
  else:
471
  agree_out.append(gr.update(visible=False, value=None))
472
 
@@ -575,6 +589,24 @@ with gr.Blocks(title="ReView", css=CUSTOM_CSS) as demo:
575
  rebuttal_updates = []
576
  consensuality_dict = {}
577
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
578
  for i in range(10):
579
  if i < number_of_displayed_reviews:
580
  # Handle new structure: current_review[i] can be dict with "sentences" and "rebuttal"
@@ -612,13 +644,17 @@ with gr.Blocks(title="ReView", css=CUSTOM_CSS) as demo:
612
  elif show_consensuality:
613
  highlighted = []
614
  for sentence, metadata in review_item:
615
- score = metadata.get("consensuality", 0.0)
616
- score = score * 2 - 1 # Normalize to [-1, 1]
617
- score = score/2.5 if score > 0 else score # Amplify unique scores for better visibility
618
- score *= -1 # Invert the score for highlighting
619
-
 
620
  consensuality_dict[sentence] = score
621
- highlighted.append((sentence, score))
 
 
 
622
 
623
  elif show_topic:
624
  highlighted = []
@@ -662,8 +698,8 @@ with gr.Blocks(title="ReView", css=CUSTOM_CSS) as demo:
662
  # Set most consensual / unique sentences
663
  if show_consensuality and consensuality_dict:
664
  scores = pd.Series(consensuality_dict)
665
- most_unique = scores.sort_values(ascending=True).head(3).index.tolist()
666
- most_common = scores.sort_values(ascending=False).head(3).index.tolist()
667
  most_common_text = "\n".join(most_common)
668
  most_unique_text = "\n".join(most_unique)
669
 
 
2
  from pathlib import Path
3
  from typing import Tuple, Dict
4
  import json
 
5
  import torch
 
 
 
 
6
  import gradio as gr
7
  import pandas as pd
8
  import ast
9
  from tqdm import tqdm
10
 
11
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
12
+
13
+ BASE_DIR = Path(__file__).resolve().parent.parent
14
+
15
+ # Controls how aggressively agreement colors are amplified.
16
+ # Lower = more vivid colors (0.2 = very strong, 1.0 = no amplification).
17
+ # Asymmetric: unique/red (positive) is amplified less than common/blue (negative)
18
+ # to avoid overwhelming red when most sentences are unique.
19
+ AGREEMENT_AMP_UNIQUE = 0.9 # exponent for positive scores (red = unique)
20
+ AGREEMENT_AMP_COMMON = 0.5 # exponent for negative scores (blue = common)
21
+
22
  # Auto-detect the preprocessed dataset CSV
23
  def _find_preprocessed_csv() -> Path:
24
  """Find the most recent preprocessed_scored_reviews_*.csv in the data dir."""
 
455
  progress(0.50, desc="Running RSA reranking...")
456
  consensuality_map = processor.predict_consensuality(*active_texts)
457
 
458
+ # Calculate most common and unique (before amplification, so ranking is on true scores)
459
  if consensuality_map:
460
  import pandas as _pd
461
  scores_series = _pd.Series(consensuality_map)
462
+ most_common_text = "\n".join(scores_series.nsmallest(3).index.tolist())
463
+ most_unique_text = "\n".join(scores_series.nlargest(3).index.tolist())
464
  else:
465
  most_common_text = ""
466
  most_unique_text = ""
467
 
468
+ # Amplify scores for visible highlighting: sign-preserving power transform
469
+ # Maps [-1,1] → [-1,1] but pushes values away from 0 for better color contrast
470
+ import math
471
+ amplified_map = {
472
+ s: math.copysign(abs(v) ** (AGREEMENT_AMP_UNIQUE if v > 0 else AGREEMENT_AMP_COMMON), v) if v != 0 else 0.0
473
+ for s, v in consensuality_map.items()
474
+ }
475
+
476
  progress(0.90, desc="Formatting agreement results...")
477
 
478
  fmt = processor.format_highlighted_output
 
480
  agree_out = []
481
  for i in range(MAX_INTERACTIVE_REVIEWS):
482
  if i < len(sentence_lists):
483
+ agree_out.append(gr.update(visible=show_agreement, value=fmt(sentence_lists[i], amplified_map, "consensuality")))
484
  else:
485
  agree_out.append(gr.update(visible=False, value=None))
486
 
 
589
  rebuttal_updates = []
590
  consensuality_dict = {}
591
 
592
+ # Pre-compute robust normalization stats (median + IQR) for raw KL scores
593
+ import numpy as _np
594
+ _kl_median, _kl_iqr = 0.0, 0.0
595
+ if show_consensuality:
596
+ all_raw_scores = []
597
+ for review_data in current_review:
598
+ if isinstance(review_data, dict) and "sentences" in review_data:
599
+ items = review_data["sentences"].items()
600
+ else:
601
+ items = review_data.items() if isinstance(review_data, dict) else []
602
+ for _, metadata in items:
603
+ all_raw_scores.append(metadata.get("consensuality", 0.0))
604
+ if all_raw_scores:
605
+ arr = _np.array(all_raw_scores)
606
+ _kl_median = float(_np.median(arr))
607
+ q25, q75 = float(_np.percentile(arr, 25)), float(_np.percentile(arr, 75))
608
+ _kl_iqr = q75 - q25
609
+
610
  for i in range(10):
611
  if i < number_of_displayed_reviews:
612
  # Handle new structure: current_review[i] can be dict with "sentences" and "rebuttal"
 
644
  elif show_consensuality:
645
  highlighted = []
646
  for sentence, metadata in review_item:
647
+ raw = metadata.get("consensuality", 0.0)
648
+ # Robust normalization: median-centered, IQR-scaled, clipped to [-1, 1]
649
+ if _kl_iqr > 0:
650
+ score = max(-1.0, min(1.0, (raw - _kl_median) / (_kl_iqr * 2)))
651
+ else:
652
+ score = 0.0
653
  consensuality_dict[sentence] = score
654
+ # Asymmetric amplification for display
655
+ import math
656
+ display_score = math.copysign(abs(score) ** (AGREEMENT_AMP_UNIQUE if score > 0 else AGREEMENT_AMP_COMMON), score) if score != 0 else 0.0
657
+ highlighted.append((sentence, display_score))
658
 
659
  elif show_topic:
660
  highlighted = []
 
698
  # Set most consensual / unique sentences
699
  if show_consensuality and consensuality_dict:
700
  scores = pd.Series(consensuality_dict)
701
+ most_unique = scores.sort_values(ascending=False).head(3).index.tolist()
702
+ most_common = scores.sort_values(ascending=True).head(3).index.tolist()
703
  most_common_text = "\n".join(most_common)
704
  most_unique_text = "\n".join(most_unique)
705
 
interface/interactive_processor.py CHANGED
@@ -188,12 +188,20 @@ class InteractiveReviewProcessor:
188
 
189
  _, _, _, _, _, _, _, consensuality_scores = rsa_reranker.rerank(t=iterations)
190
 
191
- # Normalize to [-1, 1]
 
 
192
  scores = consensuality_scores.copy()
193
- scores_min = scores.min()
194
- scores_max = scores.max()
195
- scores = (scores - scores_min) / (scores_max - scores_min) if scores_max > scores_min else scores
196
- scores = scores * 2 - 1 # Scale to [-1, 1]
 
 
 
 
 
 
197
 
198
  return dict(scores)
199
 
 
188
 
189
  _, _, _, _, _, _, _, consensuality_scores = rsa_reranker.rerank(t=iterations)
190
 
191
+ # Robust normalization: median-centered, IQR-scaled, clipped to [-1, 1]
192
+ # This avoids outliers dominating the color scale
193
+ import numpy as np
194
  scores = consensuality_scores.copy()
195
+ vals = scores.values
196
+ median = np.median(vals)
197
+ q25, q75 = np.percentile(vals, 25), np.percentile(vals, 75)
198
+ iqr = q75 - q25
199
+ if iqr > 0:
200
+ # Center on median, scale so IQR spans ~[-0.5, 0.5], clip to [-1, 1]
201
+ scores = ((scores - median) / (iqr * 2)).clip(-1, 1)
202
+ else:
203
+ # All scores identical or near-identical
204
+ scores = scores * 0
205
 
206
  return dict(scores)
207