Sina1138 commited on
Commit ·
8bcd5eb
1
Parent(s): 4cd1bc5
Enhance score normalization in interactive review processor: implement robust median-centered, IQR-scaled clipping to improve color scale handling
Browse files- interface/Demo.py +53 -17
- interface/interactive_processor.py +13 -5
interface/Demo.py
CHANGED
|
@@ -2,17 +2,23 @@ import sys, os.path
|
|
| 2 |
from pathlib import Path
|
| 3 |
from typing import Tuple, Dict
|
| 4 |
import json
|
| 5 |
-
|
| 6 |
import torch
|
| 7 |
-
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
|
| 8 |
-
|
| 9 |
-
BASE_DIR = Path(__file__).resolve().parent.parent
|
| 10 |
-
|
| 11 |
import gradio as gr
|
| 12 |
import pandas as pd
|
| 13 |
import ast
|
| 14 |
from tqdm import tqdm
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
# Auto-detect the preprocessed dataset CSV
|
| 17 |
def _find_preprocessed_csv() -> Path:
|
| 18 |
"""Find the most recent preprocessed_scored_reviews_*.csv in the data dir."""
|
|
@@ -449,16 +455,24 @@ def compute_rsa_in_background(rsa_state: Dict, current_focus: str, progress=gr.P
|
|
| 449 |
progress(0.50, desc="Running RSA reranking...")
|
| 450 |
consensuality_map = processor.predict_consensuality(*active_texts)
|
| 451 |
|
| 452 |
-
# Calculate most common and unique
|
| 453 |
if consensuality_map:
|
| 454 |
import pandas as _pd
|
| 455 |
scores_series = _pd.Series(consensuality_map)
|
| 456 |
-
most_common_text = "\n".join(scores_series.
|
| 457 |
-
most_unique_text = "\n".join(scores_series.
|
| 458 |
else:
|
| 459 |
most_common_text = ""
|
| 460 |
most_unique_text = ""
|
| 461 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 462 |
progress(0.90, desc="Formatting agreement results...")
|
| 463 |
|
| 464 |
fmt = processor.format_highlighted_output
|
|
@@ -466,7 +480,7 @@ def compute_rsa_in_background(rsa_state: Dict, current_focus: str, progress=gr.P
|
|
| 466 |
agree_out = []
|
| 467 |
for i in range(MAX_INTERACTIVE_REVIEWS):
|
| 468 |
if i < len(sentence_lists):
|
| 469 |
-
agree_out.append(gr.update(visible=show_agreement, value=fmt(sentence_lists[i],
|
| 470 |
else:
|
| 471 |
agree_out.append(gr.update(visible=False, value=None))
|
| 472 |
|
|
@@ -575,6 +589,24 @@ with gr.Blocks(title="ReView", css=CUSTOM_CSS) as demo:
|
|
| 575 |
rebuttal_updates = []
|
| 576 |
consensuality_dict = {}
|
| 577 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 578 |
for i in range(10):
|
| 579 |
if i < number_of_displayed_reviews:
|
| 580 |
# Handle new structure: current_review[i] can be dict with "sentences" and "rebuttal"
|
|
@@ -612,13 +644,17 @@ with gr.Blocks(title="ReView", css=CUSTOM_CSS) as demo:
|
|
| 612 |
elif show_consensuality:
|
| 613 |
highlighted = []
|
| 614 |
for sentence, metadata in review_item:
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
|
|
|
| 620 |
consensuality_dict[sentence] = score
|
| 621 |
-
|
|
|
|
|
|
|
|
|
|
| 622 |
|
| 623 |
elif show_topic:
|
| 624 |
highlighted = []
|
|
@@ -662,8 +698,8 @@ with gr.Blocks(title="ReView", css=CUSTOM_CSS) as demo:
|
|
| 662 |
# Set most consensual / unique sentences
|
| 663 |
if show_consensuality and consensuality_dict:
|
| 664 |
scores = pd.Series(consensuality_dict)
|
| 665 |
-
most_unique = scores.sort_values(ascending=
|
| 666 |
-
most_common = scores.sort_values(ascending=
|
| 667 |
most_common_text = "\n".join(most_common)
|
| 668 |
most_unique_text = "\n".join(most_unique)
|
| 669 |
|
|
|
|
| 2 |
from pathlib import Path
|
| 3 |
from typing import Tuple, Dict
|
| 4 |
import json
|
|
|
|
| 5 |
import torch
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
import gradio as gr
|
| 7 |
import pandas as pd
|
| 8 |
import ast
|
| 9 |
from tqdm import tqdm
|
| 10 |
|
| 11 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
|
| 12 |
+
|
| 13 |
+
BASE_DIR = Path(__file__).resolve().parent.parent
|
| 14 |
+
|
| 15 |
+
# Controls how aggressively agreement colors are amplified.
|
| 16 |
+
# Lower = more vivid colors (0.2 = very strong, 1.0 = no amplification).
|
| 17 |
+
# Asymmetric: unique/red (positive) is amplified less than common/blue (negative)
|
| 18 |
+
# to avoid overwhelming red when most sentences are unique.
|
| 19 |
+
AGREEMENT_AMP_UNIQUE = 0.9 # exponent for positive scores (red = unique)
|
| 20 |
+
AGREEMENT_AMP_COMMON = 0.5 # exponent for negative scores (blue = common)
|
| 21 |
+
|
| 22 |
# Auto-detect the preprocessed dataset CSV
|
| 23 |
def _find_preprocessed_csv() -> Path:
|
| 24 |
"""Find the most recent preprocessed_scored_reviews_*.csv in the data dir."""
|
|
|
|
| 455 |
progress(0.50, desc="Running RSA reranking...")
|
| 456 |
consensuality_map = processor.predict_consensuality(*active_texts)
|
| 457 |
|
| 458 |
+
# Calculate most common and unique (before amplification, so ranking is on true scores)
|
| 459 |
if consensuality_map:
|
| 460 |
import pandas as _pd
|
| 461 |
scores_series = _pd.Series(consensuality_map)
|
| 462 |
+
most_common_text = "\n".join(scores_series.nsmallest(3).index.tolist())
|
| 463 |
+
most_unique_text = "\n".join(scores_series.nlargest(3).index.tolist())
|
| 464 |
else:
|
| 465 |
most_common_text = ""
|
| 466 |
most_unique_text = ""
|
| 467 |
|
| 468 |
+
# Amplify scores for visible highlighting: sign-preserving power transform
|
| 469 |
+
# Maps [-1,1] → [-1,1] but pushes values away from 0 for better color contrast
|
| 470 |
+
import math
|
| 471 |
+
amplified_map = {
|
| 472 |
+
s: math.copysign(abs(v) ** (AGREEMENT_AMP_UNIQUE if v > 0 else AGREEMENT_AMP_COMMON), v) if v != 0 else 0.0
|
| 473 |
+
for s, v in consensuality_map.items()
|
| 474 |
+
}
|
| 475 |
+
|
| 476 |
progress(0.90, desc="Formatting agreement results...")
|
| 477 |
|
| 478 |
fmt = processor.format_highlighted_output
|
|
|
|
| 480 |
agree_out = []
|
| 481 |
for i in range(MAX_INTERACTIVE_REVIEWS):
|
| 482 |
if i < len(sentence_lists):
|
| 483 |
+
agree_out.append(gr.update(visible=show_agreement, value=fmt(sentence_lists[i], amplified_map, "consensuality")))
|
| 484 |
else:
|
| 485 |
agree_out.append(gr.update(visible=False, value=None))
|
| 486 |
|
|
|
|
| 589 |
rebuttal_updates = []
|
| 590 |
consensuality_dict = {}
|
| 591 |
|
| 592 |
+
# Pre-compute robust normalization stats (median + IQR) for raw KL scores
|
| 593 |
+
import numpy as _np
|
| 594 |
+
_kl_median, _kl_iqr = 0.0, 0.0
|
| 595 |
+
if show_consensuality:
|
| 596 |
+
all_raw_scores = []
|
| 597 |
+
for review_data in current_review:
|
| 598 |
+
if isinstance(review_data, dict) and "sentences" in review_data:
|
| 599 |
+
items = review_data["sentences"].items()
|
| 600 |
+
else:
|
| 601 |
+
items = review_data.items() if isinstance(review_data, dict) else []
|
| 602 |
+
for _, metadata in items:
|
| 603 |
+
all_raw_scores.append(metadata.get("consensuality", 0.0))
|
| 604 |
+
if all_raw_scores:
|
| 605 |
+
arr = _np.array(all_raw_scores)
|
| 606 |
+
_kl_median = float(_np.median(arr))
|
| 607 |
+
q25, q75 = float(_np.percentile(arr, 25)), float(_np.percentile(arr, 75))
|
| 608 |
+
_kl_iqr = q75 - q25
|
| 609 |
+
|
| 610 |
for i in range(10):
|
| 611 |
if i < number_of_displayed_reviews:
|
| 612 |
# Handle new structure: current_review[i] can be dict with "sentences" and "rebuttal"
|
|
|
|
| 644 |
elif show_consensuality:
|
| 645 |
highlighted = []
|
| 646 |
for sentence, metadata in review_item:
|
| 647 |
+
raw = metadata.get("consensuality", 0.0)
|
| 648 |
+
# Robust normalization: median-centered, IQR-scaled, clipped to [-1, 1]
|
| 649 |
+
if _kl_iqr > 0:
|
| 650 |
+
score = max(-1.0, min(1.0, (raw - _kl_median) / (_kl_iqr * 2)))
|
| 651 |
+
else:
|
| 652 |
+
score = 0.0
|
| 653 |
consensuality_dict[sentence] = score
|
| 654 |
+
# Asymmetric amplification for display
|
| 655 |
+
import math
|
| 656 |
+
display_score = math.copysign(abs(score) ** (AGREEMENT_AMP_UNIQUE if score > 0 else AGREEMENT_AMP_COMMON), score) if score != 0 else 0.0
|
| 657 |
+
highlighted.append((sentence, display_score))
|
| 658 |
|
| 659 |
elif show_topic:
|
| 660 |
highlighted = []
|
|
|
|
| 698 |
# Set most consensual / unique sentences
|
| 699 |
if show_consensuality and consensuality_dict:
|
| 700 |
scores = pd.Series(consensuality_dict)
|
| 701 |
+
most_unique = scores.sort_values(ascending=False).head(3).index.tolist()
|
| 702 |
+
most_common = scores.sort_values(ascending=True).head(3).index.tolist()
|
| 703 |
most_common_text = "\n".join(most_common)
|
| 704 |
most_unique_text = "\n".join(most_unique)
|
| 705 |
|
interface/interactive_processor.py
CHANGED
|
@@ -188,12 +188,20 @@ class InteractiveReviewProcessor:
|
|
| 188 |
|
| 189 |
_, _, _, _, _, _, _, consensuality_scores = rsa_reranker.rerank(t=iterations)
|
| 190 |
|
| 191 |
-
#
|
|
|
|
|
|
|
| 192 |
scores = consensuality_scores.copy()
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
return dict(scores)
|
| 199 |
|
|
|
|
| 188 |
|
| 189 |
_, _, _, _, _, _, _, consensuality_scores = rsa_reranker.rerank(t=iterations)
|
| 190 |
|
| 191 |
+
# Robust normalization: median-centered, IQR-scaled, clipped to [-1, 1]
|
| 192 |
+
# This avoids outliers dominating the color scale
|
| 193 |
+
import numpy as np
|
| 194 |
scores = consensuality_scores.copy()
|
| 195 |
+
vals = scores.values
|
| 196 |
+
median = np.median(vals)
|
| 197 |
+
q25, q75 = np.percentile(vals, 25), np.percentile(vals, 75)
|
| 198 |
+
iqr = q75 - q25
|
| 199 |
+
if iqr > 0:
|
| 200 |
+
# Center on median, scale so IQR spans ~[-0.5, 0.5], clip to [-1, 1]
|
| 201 |
+
scores = ((scores - median) / (iqr * 2)).clip(-1, 1)
|
| 202 |
+
else:
|
| 203 |
+
# All scores identical or near-identical
|
| 204 |
+
scores = scores * 0
|
| 205 |
|
| 206 |
return dict(scores)
|
| 207 |
|