Sina1138 commited on
Commit ยท
fdaabfa
1
Parent(s): acf8e5e
Refactor model references and enhance ZeroGPU support; update README and requirements for HF
Browse files- .gitignore +1 -0
- dependencies/scoring_utils.py +2 -2
- interface/Demo.py +8 -0
- interface/interactive_processor.py +31 -6
- pipeline/config.py +2 -2
- readme.md +11 -1
- requirements โ requirements.txt +1 -0
.gitignore
CHANGED
|
@@ -8,6 +8,7 @@ logs/
|
|
| 8 |
# validation, training, and benchmark files -- to be added back after
|
| 9 |
validation/
|
| 10 |
training/
|
|
|
|
| 11 |
benchmark_cpu_optimizations/
|
| 12 |
|
| 13 |
# ========================================================================
|
|
|
|
| 8 |
# validation, training, and benchmark files -- to be added back after
|
| 9 |
validation/
|
| 10 |
training/
|
| 11 |
+
benchmark/
|
| 12 |
benchmark_cpu_optimizations/
|
| 13 |
|
| 14 |
# ========================================================================
|
dependencies/scoring_utils.py
CHANGED
|
@@ -220,7 +220,7 @@ def load_polarity_model(model_variant: str, base_dir: Path, device: str = "cuda"
|
|
| 220 |
"scibert": "Sina1138/Scibert_polarity_Review",
|
| 221 |
"scideberta": "KISTI-AI/Scideberta-full", # Needs fine-tuning
|
| 222 |
"modernbert": "answerdotai/ModernBERT-base", # Needs fine-tuning
|
| 223 |
-
"deberta": "
|
| 224 |
"deberta_v3_small": "microsoft/deberta-v3-small", # Needs fine-tuning
|
| 225 |
}
|
| 226 |
|
|
@@ -270,7 +270,7 @@ def load_topic_model(model_variant: str, base_dir: Path, device: str = "cuda"):
|
|
| 270 |
"scideberta_legacy": base_dir / "alternative_topic" / "scideberta" / "final_model",
|
| 271 |
}
|
| 272 |
hub_fallback_map = {
|
| 273 |
-
"scideberta": "Sina1138/
|
| 274 |
"scibert": "allenai/scibert_scivocab_uncased", # Needs fine-tuning
|
| 275 |
"deberta": "microsoft/deberta-v3-base", # Needs fine-tuning
|
| 276 |
"deberta_v3_small": "microsoft/deberta-v3-small", # Needs fine-tuning
|
|
|
|
| 220 |
"scibert": "Sina1138/Scibert_polarity_Review",
|
| 221 |
"scideberta": "KISTI-AI/Scideberta-full", # Needs fine-tuning
|
| 222 |
"modernbert": "answerdotai/ModernBERT-base", # Needs fine-tuning
|
| 223 |
+
"deberta": "Sina1138/deberta_polarity_Review", # DeBERTa-v3-base (F1=0.764)
|
| 224 |
"deberta_v3_small": "microsoft/deberta-v3-small", # Needs fine-tuning
|
| 225 |
}
|
| 226 |
|
|
|
|
| 270 |
"scideberta_legacy": base_dir / "alternative_topic" / "scideberta" / "final_model",
|
| 271 |
}
|
| 272 |
hub_fallback_map = {
|
| 273 |
+
"scideberta": "Sina1138/scideberta_topic_Review", # SciDeBERTa (F1=0.478)
|
| 274 |
"scibert": "allenai/scibert_scivocab_uncased", # Needs fine-tuning
|
| 275 |
"deberta": "microsoft/deberta-v3-base", # Needs fine-tuning
|
| 276 |
"deberta_v3_small": "microsoft/deberta-v3-small", # Needs fine-tuning
|
interface/Demo.py
CHANGED
|
@@ -16,6 +16,13 @@ import pandas as pd
|
|
| 16 |
import ast
|
| 17 |
from tqdm import tqdm
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
|
| 20 |
|
| 21 |
BASE_DIR = Path(__file__).resolve().parent.parent
|
|
@@ -1278,6 +1285,7 @@ def format_general_rebuttals(rebuttal: str) -> str:
|
|
| 1278 |
)
|
| 1279 |
|
| 1280 |
|
|
|
|
| 1281 |
def process_interactive_reviews_fast(text1: str, text2: str, text3: str, text4: str, text5: str, text6: str, focus: str, rebuttal_str: str = "", thread_state=None, progress=gr.Progress()) -> Tuple:
|
| 1282 |
"""
|
| 1283 |
Fast processing: Polarity + Topic only (~3-5 sec on CPU).
|
|
|
|
| 16 |
import ast
|
| 17 |
from tqdm import tqdm
|
| 18 |
|
| 19 |
+
# ZeroGPU support for HuggingFace Spaces
|
| 20 |
+
try:
|
| 21 |
+
import spaces
|
| 22 |
+
_gpu = spaces.GPU
|
| 23 |
+
except ImportError:
|
| 24 |
+
_gpu = lambda f: f # no-op when not on HF Spaces
|
| 25 |
+
|
| 26 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
|
| 27 |
|
| 28 |
BASE_DIR = Path(__file__).resolve().parent.parent
|
|
|
|
| 1285 |
)
|
| 1286 |
|
| 1287 |
|
| 1288 |
+
@_gpu
|
| 1289 |
def process_interactive_reviews_fast(text1: str, text2: str, text3: str, text4: str, text5: str, text6: str, focus: str, rebuttal_str: str = "", thread_state=None, progress=gr.Progress()) -> Tuple:
|
| 1290 |
"""
|
| 1291 |
Fast processing: Polarity + Topic only (~3-5 sec on CPU).
|
interface/interactive_processor.py
CHANGED
|
@@ -63,7 +63,13 @@ class InteractiveReviewProcessor:
|
|
| 63 |
"""Process reviews through the same pipeline as preprocessed data."""
|
| 64 |
|
| 65 |
def __init__(self, device: str = "cuda"):
|
| 66 |
-
"""Initialize processor with all required models.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
self.device = torch.device(device if torch.cuda.is_available() else "cpu")
|
| 68 |
t_total = time.time()
|
| 69 |
|
|
@@ -94,9 +100,8 @@ class InteractiveReviewProcessor:
|
|
| 94 |
polarity_model_name = str(polarity_model_local)
|
| 95 |
print(f"Loading polarity model from local trained model: {polarity_model_name}")
|
| 96 |
else:
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
print(f"Local model not found, using legacy SciBERT: {polarity_model_name}")
|
| 100 |
|
| 101 |
self.polarity_tokenizer = AutoTokenizer.from_pretrained(polarity_model_name)
|
| 102 |
self.polarity_model = AutoModelForSequenceClassification.from_pretrained(polarity_model_name)
|
|
@@ -114,8 +119,8 @@ class InteractiveReviewProcessor:
|
|
| 114 |
topic_model_name = str(topic_model_local)
|
| 115 |
print(f"Loading topic model from local trained model: {topic_model_name}")
|
| 116 |
else:
|
| 117 |
-
topic_model_name = "Sina1138/
|
| 118 |
-
print(f"
|
| 119 |
|
| 120 |
self.topic_tokenizer = AutoTokenizer.from_pretrained(topic_model_name)
|
| 121 |
self.topic_model = AutoModelForSequenceClassification.from_pretrained(topic_model_name)
|
|
@@ -139,6 +144,20 @@ class InteractiveReviewProcessor:
|
|
| 139 |
7: None # Unclassified
|
| 140 |
}
|
| 141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
@staticmethod
|
| 143 |
def _normalize_uniqueness_scores(consensuality_scores):
|
| 144 |
"""IQR-based normalization: median-centered, clipped to [-1, 1]."""
|
|
@@ -161,6 +180,7 @@ class InteractiveReviewProcessor:
|
|
| 161 |
if not sentences:
|
| 162 |
return {}
|
| 163 |
|
|
|
|
| 164 |
t0 = time.time()
|
| 165 |
n_batches = (len(sentences) + batch_size - 1) // batch_size
|
| 166 |
print(f"[TIMING] Polarity: {len(sentences)} sentences, {n_batches} batches")
|
|
@@ -191,6 +211,7 @@ class InteractiveReviewProcessor:
|
|
| 191 |
if not sentences:
|
| 192 |
return {}
|
| 193 |
|
|
|
|
| 194 |
t0 = time.time()
|
| 195 |
n_batches = (len(sentences) + batch_size - 1) // batch_size
|
| 196 |
print(f"[TIMING] Topic: {len(sentences)} sentences, {n_batches} batches")
|
|
@@ -227,6 +248,8 @@ class InteractiveReviewProcessor:
|
|
| 227 |
if len(texts) < 2:
|
| 228 |
return {}
|
| 229 |
|
|
|
|
|
|
|
| 230 |
# Tokenize all reviews
|
| 231 |
all_sentence_lists = [[s for s in glimpse_tokenizer(t) if s.strip()] for t in texts]
|
| 232 |
|
|
@@ -274,6 +297,8 @@ class InteractiveReviewProcessor:
|
|
| 274 |
if len(texts) < 2:
|
| 275 |
return {}
|
| 276 |
|
|
|
|
|
|
|
| 277 |
all_sentence_lists = [[s for s in glimpse_tokenizer(t) if s.strip()] for t in texts]
|
| 278 |
unique_sentences = list(set(s for lst in all_sentence_lists for s in lst))
|
| 279 |
sentences = filter_and_clean_sentences(unique_sentences)
|
|
|
|
| 63 |
"""Process reviews through the same pipeline as preprocessed data."""
|
| 64 |
|
| 65 |
def __init__(self, device: str = "cuda"):
|
| 66 |
+
"""Initialize processor with all required models.
|
| 67 |
+
|
| 68 |
+
Models always load on CPU at startup. On ZeroGPU (HF Spaces),
|
| 69 |
+
GPU is only available inside @spaces.GPU-decorated functions,
|
| 70 |
+
so use ensure_device() to move models to GPU dynamically.
|
| 71 |
+
"""
|
| 72 |
+
# Always load on CPU โ GPU may not be available yet (ZeroGPU)
|
| 73 |
self.device = torch.device(device if torch.cuda.is_available() else "cpu")
|
| 74 |
t_total = time.time()
|
| 75 |
|
|
|
|
| 100 |
polarity_model_name = str(polarity_model_local)
|
| 101 |
print(f"Loading polarity model from local trained model: {polarity_model_name}")
|
| 102 |
else:
|
| 103 |
+
polarity_model_name = "Sina1138/deberta_polarity_Review"
|
| 104 |
+
print(f"Local model not found, using HuggingFace: {polarity_model_name}")
|
|
|
|
| 105 |
|
| 106 |
self.polarity_tokenizer = AutoTokenizer.from_pretrained(polarity_model_name)
|
| 107 |
self.polarity_model = AutoModelForSequenceClassification.from_pretrained(polarity_model_name)
|
|
|
|
| 119 |
topic_model_name = str(topic_model_local)
|
| 120 |
print(f"Loading topic model from local trained model: {topic_model_name}")
|
| 121 |
else:
|
| 122 |
+
topic_model_name = "Sina1138/scideberta_topic_Review"
|
| 123 |
+
print(f"Local model not found, using HuggingFace: {topic_model_name}")
|
| 124 |
|
| 125 |
self.topic_tokenizer = AutoTokenizer.from_pretrained(topic_model_name)
|
| 126 |
self.topic_model = AutoModelForSequenceClassification.from_pretrained(topic_model_name)
|
|
|
|
| 144 |
7: None # Unclassified
|
| 145 |
}
|
| 146 |
|
| 147 |
+
def ensure_device(self):
|
| 148 |
+
"""Move all models to the best available device.
|
| 149 |
+
|
| 150 |
+
On ZeroGPU, GPU only becomes available inside @spaces.GPU functions.
|
| 151 |
+
Call this at the start of inference to move models to GPU when available.
|
| 152 |
+
"""
|
| 153 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 154 |
+
if device != self.device:
|
| 155 |
+
print(f"[DEVICE] Switching models from {self.device} to {device}")
|
| 156 |
+
self.rsa_model.to(device)
|
| 157 |
+
self.polarity_model.to(device)
|
| 158 |
+
self.topic_model.to(device)
|
| 159 |
+
self.device = device
|
| 160 |
+
|
| 161 |
@staticmethod
|
| 162 |
def _normalize_uniqueness_scores(consensuality_scores):
|
| 163 |
"""IQR-based normalization: median-centered, clipped to [-1, 1]."""
|
|
|
|
| 180 |
if not sentences:
|
| 181 |
return {}
|
| 182 |
|
| 183 |
+
self.ensure_device()
|
| 184 |
t0 = time.time()
|
| 185 |
n_batches = (len(sentences) + batch_size - 1) // batch_size
|
| 186 |
print(f"[TIMING] Polarity: {len(sentences)} sentences, {n_batches} batches")
|
|
|
|
| 211 |
if not sentences:
|
| 212 |
return {}
|
| 213 |
|
| 214 |
+
self.ensure_device()
|
| 215 |
t0 = time.time()
|
| 216 |
n_batches = (len(sentences) + batch_size - 1) // batch_size
|
| 217 |
print(f"[TIMING] Topic: {len(sentences)} sentences, {n_batches} batches")
|
|
|
|
| 248 |
if len(texts) < 2:
|
| 249 |
return {}
|
| 250 |
|
| 251 |
+
self.ensure_device()
|
| 252 |
+
|
| 253 |
# Tokenize all reviews
|
| 254 |
all_sentence_lists = [[s for s in glimpse_tokenizer(t) if s.strip()] for t in texts]
|
| 255 |
|
|
|
|
| 297 |
if len(texts) < 2:
|
| 298 |
return {}
|
| 299 |
|
| 300 |
+
self.ensure_device()
|
| 301 |
+
|
| 302 |
all_sentence_lists = [[s for s in glimpse_tokenizer(t) if s.strip()] for t in texts]
|
| 303 |
unique_sentences = list(set(s for lst in all_sentence_lists for s in lst))
|
| 304 |
sentences = filter_and_clean_sentences(unique_sentences)
|
pipeline/config.py
CHANGED
|
@@ -38,8 +38,8 @@ class Config:
|
|
| 38 |
TOPIC_MODEL_LOCAL = BASE_DIR / "training" / "outputs" / "scideberta_topic" / "final_model"
|
| 39 |
|
| 40 |
# HuggingFace fallbacks (if local models not available)
|
| 41 |
-
POLARITY_MODEL_HUB = "Sina1138/
|
| 42 |
-
TOPIC_MODEL_HUB = "Sina1138/
|
| 43 |
|
| 44 |
# Legacy models (SciBERT baseline, kept for reference)
|
| 45 |
POLARITY_MODEL_LEGACY = "Sina1138/Scibert_polarity_Review" # F1=0.724
|
|
|
|
| 38 |
TOPIC_MODEL_LOCAL = BASE_DIR / "training" / "outputs" / "scideberta_topic" / "final_model"
|
| 39 |
|
| 40 |
# HuggingFace fallbacks (if local models not available)
|
| 41 |
+
POLARITY_MODEL_HUB = "Sina1138/deberta_polarity_Review" # DeBERTa-v3-base (F1=0.764)
|
| 42 |
+
TOPIC_MODEL_HUB = "Sina1138/scideberta_topic_Review" # SciDeBERTa (F1=0.478)
|
| 43 |
|
| 44 |
# Legacy models (SciBERT baseline, kept for reference)
|
| 45 |
POLARITY_MODEL_LEGACY = "Sina1138/Scibert_polarity_Review" # F1=0.724
|
readme.md
CHANGED
|
@@ -1,3 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
|
| 2 |
This is the repository of ReView: A Tool for Visualizing and Analyzing Scientific Reviews. [Code](https://github.com/sina1138/glimpse-ui) | [Hugging Face Spaces](https://huggingface.co/spaces/Sina1138/ReView)
|
| 3 |
<!-- [Paper]() | -->
|
|
@@ -32,7 +42,7 @@ git lfs install
|
|
| 32 |
- Finally, all remaining required packages could be installed with the requirements file:
|
| 33 |
|
| 34 |
``` bash
|
| 35 |
-
pip install -r requirements
|
| 36 |
```
|
| 37 |
|
| 38 |
- (Optional) To enable fetching reviews directly from OpenReview links in the Interactive tab:
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: ReView
|
| 3 |
+
emoji: ๐
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: gradio
|
| 7 |
+
app_file: interface/Demo.py
|
| 8 |
+
pinned: false
|
| 9 |
+
hardware: zero-a10g
|
| 10 |
+
---
|
| 11 |
|
| 12 |
This is the repository of ReView: A Tool for Visualizing and Analyzing Scientific Reviews. [Code](https://github.com/sina1138/glimpse-ui) | [Hugging Face Spaces](https://huggingface.co/spaces/Sina1138/ReView)
|
| 13 |
<!-- [Paper]() | -->
|
|
|
|
| 42 |
- Finally, all remaining required packages could be installed with the requirements file:
|
| 43 |
|
| 44 |
``` bash
|
| 45 |
+
pip install -r requirements.txt
|
| 46 |
```
|
| 47 |
|
| 48 |
- (Optional) To enable fetching reviews directly from OpenReview links in the Interactive tab:
|
requirements โ requirements.txt
RENAMED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
transformers
|
| 2 |
numpy==1.25.2
|
| 3 |
seaborn
|
|
|
|
| 1 |
+
spaces
|
| 2 |
transformers
|
| 3 |
numpy==1.25.2
|
| 4 |
seaborn
|