Sina1138 commited on
Commit
fdaabfa
ยท
1 Parent(s): acf8e5e

Refactor model references and enhance ZeroGPU support; update README and requirements for HF

Browse files
.gitignore CHANGED
@@ -8,6 +8,7 @@ logs/
8
  # validation, training, and benchmark files -- to be added back after
9
  validation/
10
  training/
 
11
  benchmark_cpu_optimizations/
12
 
13
  # ========================================================================
 
8
  # validation, training, and benchmark files -- to be added back after
9
  validation/
10
  training/
11
+ benchmark/
12
  benchmark_cpu_optimizations/
13
 
14
  # ========================================================================
dependencies/scoring_utils.py CHANGED
@@ -220,7 +220,7 @@ def load_polarity_model(model_variant: str, base_dir: Path, device: str = "cuda"
220
  "scibert": "Sina1138/Scibert_polarity_Review",
221
  "scideberta": "KISTI-AI/Scideberta-full", # Needs fine-tuning
222
  "modernbert": "answerdotai/ModernBERT-base", # Needs fine-tuning
223
- "deberta": "microsoft/deberta-v3-base", # Needs fine-tuning
224
  "deberta_v3_small": "microsoft/deberta-v3-small", # Needs fine-tuning
225
  }
226
 
@@ -270,7 +270,7 @@ def load_topic_model(model_variant: str, base_dir: Path, device: str = "cuda"):
270
  "scideberta_legacy": base_dir / "alternative_topic" / "scideberta" / "final_model",
271
  }
272
  hub_fallback_map = {
273
- "scideberta": "Sina1138/SciDeberta_Review", # Production HuggingFace model
274
  "scibert": "allenai/scibert_scivocab_uncased", # Needs fine-tuning
275
  "deberta": "microsoft/deberta-v3-base", # Needs fine-tuning
276
  "deberta_v3_small": "microsoft/deberta-v3-small", # Needs fine-tuning
 
220
  "scibert": "Sina1138/Scibert_polarity_Review",
221
  "scideberta": "KISTI-AI/Scideberta-full", # Needs fine-tuning
222
  "modernbert": "answerdotai/ModernBERT-base", # Needs fine-tuning
223
+ "deberta": "Sina1138/deberta_polarity_Review", # DeBERTa-v3-base (F1=0.764)
224
  "deberta_v3_small": "microsoft/deberta-v3-small", # Needs fine-tuning
225
  }
226
 
 
270
  "scideberta_legacy": base_dir / "alternative_topic" / "scideberta" / "final_model",
271
  }
272
  hub_fallback_map = {
273
+ "scideberta": "Sina1138/scideberta_topic_Review", # SciDeBERTa (F1=0.478)
274
  "scibert": "allenai/scibert_scivocab_uncased", # Needs fine-tuning
275
  "deberta": "microsoft/deberta-v3-base", # Needs fine-tuning
276
  "deberta_v3_small": "microsoft/deberta-v3-small", # Needs fine-tuning
interface/Demo.py CHANGED
@@ -16,6 +16,13 @@ import pandas as pd
16
  import ast
17
  from tqdm import tqdm
18
 
 
 
 
 
 
 
 
19
  sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
20
 
21
  BASE_DIR = Path(__file__).resolve().parent.parent
@@ -1278,6 +1285,7 @@ def format_general_rebuttals(rebuttal: str) -> str:
1278
  )
1279
 
1280
 
 
1281
  def process_interactive_reviews_fast(text1: str, text2: str, text3: str, text4: str, text5: str, text6: str, focus: str, rebuttal_str: str = "", thread_state=None, progress=gr.Progress()) -> Tuple:
1282
  """
1283
  Fast processing: Polarity + Topic only (~3-5 sec on CPU).
 
16
  import ast
17
  from tqdm import tqdm
18
 
19
+ # ZeroGPU support for HuggingFace Spaces
20
+ try:
21
+ import spaces
22
+ _gpu = spaces.GPU
23
+ except ImportError:
24
+ _gpu = lambda f: f # no-op when not on HF Spaces
25
+
26
  sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
27
 
28
  BASE_DIR = Path(__file__).resolve().parent.parent
 
1285
  )
1286
 
1287
 
1288
+ @_gpu
1289
  def process_interactive_reviews_fast(text1: str, text2: str, text3: str, text4: str, text5: str, text6: str, focus: str, rebuttal_str: str = "", thread_state=None, progress=gr.Progress()) -> Tuple:
1290
  """
1291
  Fast processing: Polarity + Topic only (~3-5 sec on CPU).
interface/interactive_processor.py CHANGED
@@ -63,7 +63,13 @@ class InteractiveReviewProcessor:
63
  """Process reviews through the same pipeline as preprocessed data."""
64
 
65
  def __init__(self, device: str = "cuda"):
66
- """Initialize processor with all required models."""
 
 
 
 
 
 
67
  self.device = torch.device(device if torch.cuda.is_available() else "cpu")
68
  t_total = time.time()
69
 
@@ -94,9 +100,8 @@ class InteractiveReviewProcessor:
94
  polarity_model_name = str(polarity_model_local)
95
  print(f"Loading polarity model from local trained model: {polarity_model_name}")
96
  else:
97
- # Fallback: will need to upload fine-tuned model or use legacy SciBERT
98
- polarity_model_name = "Sina1138/Scibert_polarity_Review" # Legacy SciBERT
99
- print(f"Local model not found, using legacy SciBERT: {polarity_model_name}")
100
 
101
  self.polarity_tokenizer = AutoTokenizer.from_pretrained(polarity_model_name)
102
  self.polarity_model = AutoModelForSequenceClassification.from_pretrained(polarity_model_name)
@@ -114,8 +119,8 @@ class InteractiveReviewProcessor:
114
  topic_model_name = str(topic_model_local)
115
  print(f"Loading topic model from local trained model: {topic_model_name}")
116
  else:
117
- topic_model_name = "Sina1138/SciDeberta_Review" # Production HuggingFace model
118
- print(f"Using HuggingFace topic model: {topic_model_name}")
119
 
120
  self.topic_tokenizer = AutoTokenizer.from_pretrained(topic_model_name)
121
  self.topic_model = AutoModelForSequenceClassification.from_pretrained(topic_model_name)
@@ -139,6 +144,20 @@ class InteractiveReviewProcessor:
139
  7: None # Unclassified
140
  }
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  @staticmethod
143
  def _normalize_uniqueness_scores(consensuality_scores):
144
  """IQR-based normalization: median-centered, clipped to [-1, 1]."""
@@ -161,6 +180,7 @@ class InteractiveReviewProcessor:
161
  if not sentences:
162
  return {}
163
 
 
164
  t0 = time.time()
165
  n_batches = (len(sentences) + batch_size - 1) // batch_size
166
  print(f"[TIMING] Polarity: {len(sentences)} sentences, {n_batches} batches")
@@ -191,6 +211,7 @@ class InteractiveReviewProcessor:
191
  if not sentences:
192
  return {}
193
 
 
194
  t0 = time.time()
195
  n_batches = (len(sentences) + batch_size - 1) // batch_size
196
  print(f"[TIMING] Topic: {len(sentences)} sentences, {n_batches} batches")
@@ -227,6 +248,8 @@ class InteractiveReviewProcessor:
227
  if len(texts) < 2:
228
  return {}
229
 
 
 
230
  # Tokenize all reviews
231
  all_sentence_lists = [[s for s in glimpse_tokenizer(t) if s.strip()] for t in texts]
232
 
@@ -274,6 +297,8 @@ class InteractiveReviewProcessor:
274
  if len(texts) < 2:
275
  return {}
276
 
 
 
277
  all_sentence_lists = [[s for s in glimpse_tokenizer(t) if s.strip()] for t in texts]
278
  unique_sentences = list(set(s for lst in all_sentence_lists for s in lst))
279
  sentences = filter_and_clean_sentences(unique_sentences)
 
63
  """Process reviews through the same pipeline as preprocessed data."""
64
 
65
  def __init__(self, device: str = "cuda"):
66
+ """Initialize processor with all required models.
67
+
68
+ Models always load on CPU at startup. On ZeroGPU (HF Spaces),
69
+ GPU is only available inside @spaces.GPU-decorated functions,
70
+ so use ensure_device() to move models to GPU dynamically.
71
+ """
72
+ # Always load on CPU โ€” GPU may not be available yet (ZeroGPU)
73
  self.device = torch.device(device if torch.cuda.is_available() else "cpu")
74
  t_total = time.time()
75
 
 
100
  polarity_model_name = str(polarity_model_local)
101
  print(f"Loading polarity model from local trained model: {polarity_model_name}")
102
  else:
103
+ polarity_model_name = "Sina1138/deberta_polarity_Review"
104
+ print(f"Local model not found, using HuggingFace: {polarity_model_name}")
 
105
 
106
  self.polarity_tokenizer = AutoTokenizer.from_pretrained(polarity_model_name)
107
  self.polarity_model = AutoModelForSequenceClassification.from_pretrained(polarity_model_name)
 
119
  topic_model_name = str(topic_model_local)
120
  print(f"Loading topic model from local trained model: {topic_model_name}")
121
  else:
122
+ topic_model_name = "Sina1138/scideberta_topic_Review"
123
+ print(f"Local model not found, using HuggingFace: {topic_model_name}")
124
 
125
  self.topic_tokenizer = AutoTokenizer.from_pretrained(topic_model_name)
126
  self.topic_model = AutoModelForSequenceClassification.from_pretrained(topic_model_name)
 
144
  7: None # Unclassified
145
  }
146
 
147
+ def ensure_device(self):
148
+ """Move all models to the best available device.
149
+
150
+ On ZeroGPU, GPU only becomes available inside @spaces.GPU functions.
151
+ Call this at the start of inference to move models to GPU when available.
152
+ """
153
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
154
+ if device != self.device:
155
+ print(f"[DEVICE] Switching models from {self.device} to {device}")
156
+ self.rsa_model.to(device)
157
+ self.polarity_model.to(device)
158
+ self.topic_model.to(device)
159
+ self.device = device
160
+
161
  @staticmethod
162
  def _normalize_uniqueness_scores(consensuality_scores):
163
  """IQR-based normalization: median-centered, clipped to [-1, 1]."""
 
180
  if not sentences:
181
  return {}
182
 
183
+ self.ensure_device()
184
  t0 = time.time()
185
  n_batches = (len(sentences) + batch_size - 1) // batch_size
186
  print(f"[TIMING] Polarity: {len(sentences)} sentences, {n_batches} batches")
 
211
  if not sentences:
212
  return {}
213
 
214
+ self.ensure_device()
215
  t0 = time.time()
216
  n_batches = (len(sentences) + batch_size - 1) // batch_size
217
  print(f"[TIMING] Topic: {len(sentences)} sentences, {n_batches} batches")
 
248
  if len(texts) < 2:
249
  return {}
250
 
251
+ self.ensure_device()
252
+
253
  # Tokenize all reviews
254
  all_sentence_lists = [[s for s in glimpse_tokenizer(t) if s.strip()] for t in texts]
255
 
 
297
  if len(texts) < 2:
298
  return {}
299
 
300
+ self.ensure_device()
301
+
302
  all_sentence_lists = [[s for s in glimpse_tokenizer(t) if s.strip()] for t in texts]
303
  unique_sentences = list(set(s for lst in all_sentence_lists for s in lst))
304
  sentences = filter_and_clean_sentences(unique_sentences)
pipeline/config.py CHANGED
@@ -38,8 +38,8 @@ class Config:
38
  TOPIC_MODEL_LOCAL = BASE_DIR / "training" / "outputs" / "scideberta_topic" / "final_model"
39
 
40
  # HuggingFace fallbacks (if local models not available)
41
- POLARITY_MODEL_HUB = "Sina1138/Scibert_polarity_Review" # Legacy SciBERT (until fine-tuned DeBERTa is uploaded to Hub)
42
- TOPIC_MODEL_HUB = "Sina1138/SciDeberta_Review" # Current production model
43
 
44
  # Legacy models (SciBERT baseline, kept for reference)
45
  POLARITY_MODEL_LEGACY = "Sina1138/Scibert_polarity_Review" # F1=0.724
 
38
  TOPIC_MODEL_LOCAL = BASE_DIR / "training" / "outputs" / "scideberta_topic" / "final_model"
39
 
40
  # HuggingFace fallbacks (if local models not available)
41
+ POLARITY_MODEL_HUB = "Sina1138/deberta_polarity_Review" # DeBERTa-v3-base (F1=0.764)
42
+ TOPIC_MODEL_HUB = "Sina1138/scideberta_topic_Review" # SciDeBERTa (F1=0.478)
43
 
44
  # Legacy models (SciBERT baseline, kept for reference)
45
  POLARITY_MODEL_LEGACY = "Sina1138/Scibert_polarity_Review" # F1=0.724
readme.md CHANGED
@@ -1,3 +1,13 @@
 
 
 
 
 
 
 
 
 
 
1
 
2
  This is the repository of ReView: A Tool for Visualizing and Analyzing Scientific Reviews. [Code](https://github.com/sina1138/glimpse-ui) | [Hugging Face Spaces](https://huggingface.co/spaces/Sina1138/ReView)
3
  <!-- [Paper]() | -->
@@ -32,7 +42,7 @@ git lfs install
32
  - Finally, all remaining required packages could be installed with the requirements file:
33
 
34
  ``` bash
35
- pip install -r requirements
36
  ```
37
 
38
  - (Optional) To enable fetching reviews directly from OpenReview links in the Interactive tab:
 
1
+ ---
2
+ title: ReView
3
+ emoji: ๐Ÿ“
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: gradio
7
+ app_file: interface/Demo.py
8
+ pinned: false
9
+ hardware: zero-a10g
10
+ ---
11
 
12
  This is the repository of ReView: A Tool for Visualizing and Analyzing Scientific Reviews. [Code](https://github.com/sina1138/glimpse-ui) | [Hugging Face Spaces](https://huggingface.co/spaces/Sina1138/ReView)
13
  <!-- [Paper]() | -->
 
42
  - Finally, all remaining required packages could be installed with the requirements file:
43
 
44
  ``` bash
45
+ pip install -r requirements.txt
46
  ```
47
 
48
  - (Optional) To enable fetching reviews directly from OpenReview links in the Interactive tab:
requirements โ†’ requirements.txt RENAMED
@@ -1,3 +1,4 @@
 
1
  transformers
2
  numpy==1.25.2
3
  seaborn
 
1
+ spaces
2
  transformers
3
  numpy==1.25.2
4
  seaborn