Spaces:

Sina1138
/

ReView

Paused

App Files Files Community

Sina1138 commited on Mar 20

Commit

fdaabfa

1 Parent(s): acf8e5e

Refactor model references and enhance ZeroGPU support; update README and requirements for HF

Browse files

Files changed (7) hide show

.gitignore +1 -0
dependencies/scoring_utils.py +2 -2
interface/Demo.py +8 -0
interface/interactive_processor.py +31 -6
pipeline/config.py +2 -2
readme.md +11 -1
requirements → requirements.txt +1 -0

.gitignore CHANGED Viewed

@@ -8,6 +8,7 @@ logs/
 # validation, training, and benchmark files -- to be added back after
 validation/
 training/
 benchmark_cpu_optimizations/
 # ========================================================================

 # validation, training, and benchmark files -- to be added back after
 validation/
 training/
+benchmark/
 benchmark_cpu_optimizations/
 # ========================================================================

dependencies/scoring_utils.py CHANGED Viewed

@@ -220,7 +220,7 @@ def load_polarity_model(model_variant: str, base_dir: Path, device: str = "cuda"
         "scibert": "Sina1138/Scibert_polarity_Review",
         "scideberta": "KISTI-AI/Scideberta-full",  # Needs fine-tuning
         "modernbert": "answerdotai/ModernBERT-base",  # Needs fine-tuning
-        "deberta": "microsoft/deberta-v3-base",  # Needs fine-tuning
         "deberta_v3_small": "microsoft/deberta-v3-small",  # Needs fine-tuning
     }
@@ -270,7 +270,7 @@ def load_topic_model(model_variant: str, base_dir: Path, device: str = "cuda"):
         "scideberta_legacy": base_dir / "alternative_topic" / "scideberta" / "final_model",
     }
     hub_fallback_map = {
-        "scideberta": "Sina1138/SciDeberta_Review",  # Production HuggingFace model
         "scibert": "allenai/scibert_scivocab_uncased",  # Needs fine-tuning
         "deberta": "microsoft/deberta-v3-base",  # Needs fine-tuning
         "deberta_v3_small": "microsoft/deberta-v3-small",  # Needs fine-tuning

         "scibert": "Sina1138/Scibert_polarity_Review",
         "scideberta": "KISTI-AI/Scideberta-full",  # Needs fine-tuning
         "modernbert": "answerdotai/ModernBERT-base",  # Needs fine-tuning
+        "deberta": "Sina1138/deberta_polarity_Review",  # DeBERTa-v3-base (F1=0.764)
         "deberta_v3_small": "microsoft/deberta-v3-small",  # Needs fine-tuning
     }
         "scideberta_legacy": base_dir / "alternative_topic" / "scideberta" / "final_model",
     }
     hub_fallback_map = {
+        "scideberta": "Sina1138/scideberta_topic_Review",  # SciDeBERTa (F1=0.478)
         "scibert": "allenai/scibert_scivocab_uncased",  # Needs fine-tuning
         "deberta": "microsoft/deberta-v3-base",  # Needs fine-tuning
         "deberta_v3_small": "microsoft/deberta-v3-small",  # Needs fine-tuning

interface/Demo.py CHANGED Viewed

@@ -16,6 +16,13 @@ import pandas as pd
 import ast
 from tqdm import tqdm
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
 BASE_DIR = Path(__file__).resolve().parent.parent
@@ -1278,6 +1285,7 @@ def format_general_rebuttals(rebuttal: str) -> str:
     )
 def process_interactive_reviews_fast(text1: str, text2: str, text3: str, text4: str, text5: str, text6: str, focus: str, rebuttal_str: str = "", thread_state=None, progress=gr.Progress()) -> Tuple:
     """
     Fast processing: Polarity + Topic only (~3-5 sec on CPU).

 import ast
 from tqdm import tqdm
+# ZeroGPU support for HuggingFace Spaces
+try:
+    import spaces
+    _gpu = spaces.GPU
+except ImportError:
+    _gpu = lambda f: f  # no-op when not on HF Spaces
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
 BASE_DIR = Path(__file__).resolve().parent.parent
     )
+@_gpu
 def process_interactive_reviews_fast(text1: str, text2: str, text3: str, text4: str, text5: str, text6: str, focus: str, rebuttal_str: str = "", thread_state=None, progress=gr.Progress()) -> Tuple:
     """
     Fast processing: Polarity + Topic only (~3-5 sec on CPU).

interface/interactive_processor.py CHANGED Viewed

@@ -63,7 +63,13 @@ class InteractiveReviewProcessor:
     """Process reviews through the same pipeline as preprocessed data."""
     def __init__(self, device: str = "cuda"):
-        """Initialize processor with all required models."""
         self.device = torch.device(device if torch.cuda.is_available() else "cpu")
         t_total = time.time()
@@ -94,9 +100,8 @@ class InteractiveReviewProcessor:
             polarity_model_name = str(polarity_model_local)
             print(f"Loading polarity model from local trained model: {polarity_model_name}")
         else:
-            # Fallback: will need to upload fine-tuned model or use legacy SciBERT
-            polarity_model_name = "Sina1138/Scibert_polarity_Review"  # Legacy SciBERT
-            print(f"Local model not found, using legacy SciBERT: {polarity_model_name}")
         self.polarity_tokenizer = AutoTokenizer.from_pretrained(polarity_model_name)
         self.polarity_model = AutoModelForSequenceClassification.from_pretrained(polarity_model_name)
@@ -114,8 +119,8 @@ class InteractiveReviewProcessor:
             topic_model_name = str(topic_model_local)
             print(f"Loading topic model from local trained model: {topic_model_name}")
         else:
-            topic_model_name = "Sina1138/SciDeberta_Review"  # Production HuggingFace model
-            print(f"Using HuggingFace topic model: {topic_model_name}")
         self.topic_tokenizer = AutoTokenizer.from_pretrained(topic_model_name)
         self.topic_model = AutoModelForSequenceClassification.from_pretrained(topic_model_name)
@@ -139,6 +144,20 @@ class InteractiveReviewProcessor:
             7: None  # Unclassified
         }
     @staticmethod
     def _normalize_uniqueness_scores(consensuality_scores):
         """IQR-based normalization: median-centered, clipped to [-1, 1]."""
@@ -161,6 +180,7 @@ class InteractiveReviewProcessor:
         if not sentences:
             return {}
         t0 = time.time()
         n_batches = (len(sentences) + batch_size - 1) // batch_size
         print(f"[TIMING] Polarity: {len(sentences)} sentences, {n_batches} batches")
@@ -191,6 +211,7 @@ class InteractiveReviewProcessor:
         if not sentences:
             return {}
         t0 = time.time()
         n_batches = (len(sentences) + batch_size - 1) // batch_size
         print(f"[TIMING] Topic: {len(sentences)} sentences, {n_batches} batches")
@@ -227,6 +248,8 @@ class InteractiveReviewProcessor:
         if len(texts) < 2:
             return {}
         # Tokenize all reviews
         all_sentence_lists = [[s for s in glimpse_tokenizer(t) if s.strip()] for t in texts]
@@ -274,6 +297,8 @@ class InteractiveReviewProcessor:
         if len(texts) < 2:
             return {}
         all_sentence_lists = [[s for s in glimpse_tokenizer(t) if s.strip()] for t in texts]
         unique_sentences = list(set(s for lst in all_sentence_lists for s in lst))
         sentences = filter_and_clean_sentences(unique_sentences)

     """Process reviews through the same pipeline as preprocessed data."""
     def __init__(self, device: str = "cuda"):
+        """Initialize processor with all required models.
+        Models always load on CPU at startup. On ZeroGPU (HF Spaces),
+        GPU is only available inside @spaces.GPU-decorated functions,
+        so use ensure_device() to move models to GPU dynamically.
+        """
+        # Always load on CPU — GPU may not be available yet (ZeroGPU)
         self.device = torch.device(device if torch.cuda.is_available() else "cpu")
         t_total = time.time()
             polarity_model_name = str(polarity_model_local)
             print(f"Loading polarity model from local trained model: {polarity_model_name}")
         else:
+            polarity_model_name = "Sina1138/deberta_polarity_Review"
+            print(f"Local model not found, using HuggingFace: {polarity_model_name}")
         self.polarity_tokenizer = AutoTokenizer.from_pretrained(polarity_model_name)
         self.polarity_model = AutoModelForSequenceClassification.from_pretrained(polarity_model_name)
             topic_model_name = str(topic_model_local)
             print(f"Loading topic model from local trained model: {topic_model_name}")
         else:
+            topic_model_name = "Sina1138/scideberta_topic_Review"
+            print(f"Local model not found, using HuggingFace: {topic_model_name}")
         self.topic_tokenizer = AutoTokenizer.from_pretrained(topic_model_name)
         self.topic_model = AutoModelForSequenceClassification.from_pretrained(topic_model_name)
             7: None  # Unclassified
         }
+    def ensure_device(self):
+        """Move all models to the best available device.
+        On ZeroGPU, GPU only becomes available inside @spaces.GPU functions.
+        Call this at the start of inference to move models to GPU when available.
+        """
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        if device != self.device:
+            print(f"[DEVICE] Switching models from {self.device} to {device}")
+            self.rsa_model.to(device)
+            self.polarity_model.to(device)
+            self.topic_model.to(device)
+            self.device = device
     @staticmethod
     def _normalize_uniqueness_scores(consensuality_scores):
         """IQR-based normalization: median-centered, clipped to [-1, 1]."""
         if not sentences:
             return {}
+        self.ensure_device()
         t0 = time.time()
         n_batches = (len(sentences) + batch_size - 1) // batch_size
         print(f"[TIMING] Polarity: {len(sentences)} sentences, {n_batches} batches")
         if not sentences:
             return {}
+        self.ensure_device()
         t0 = time.time()
         n_batches = (len(sentences) + batch_size - 1) // batch_size
         print(f"[TIMING] Topic: {len(sentences)} sentences, {n_batches} batches")
         if len(texts) < 2:
             return {}
+        self.ensure_device()
         # Tokenize all reviews
         all_sentence_lists = [[s for s in glimpse_tokenizer(t) if s.strip()] for t in texts]
         if len(texts) < 2:
             return {}
+        self.ensure_device()
         all_sentence_lists = [[s for s in glimpse_tokenizer(t) if s.strip()] for t in texts]
         unique_sentences = list(set(s for lst in all_sentence_lists for s in lst))
         sentences = filter_and_clean_sentences(unique_sentences)

pipeline/config.py CHANGED Viewed

@@ -38,8 +38,8 @@ class Config:
     TOPIC_MODEL_LOCAL = BASE_DIR / "training" / "outputs" / "scideberta_topic" / "final_model"
     # HuggingFace fallbacks (if local models not available)
-    POLARITY_MODEL_HUB = "Sina1138/Scibert_polarity_Review"  # Legacy SciBERT (until fine-tuned DeBERTa is uploaded to Hub)
-    TOPIC_MODEL_HUB = "Sina1138/SciDeberta_Review"  # Current production model
     # Legacy models (SciBERT baseline, kept for reference)
     POLARITY_MODEL_LEGACY = "Sina1138/Scibert_polarity_Review"  # F1=0.724

     TOPIC_MODEL_LOCAL = BASE_DIR / "training" / "outputs" / "scideberta_topic" / "final_model"
     # HuggingFace fallbacks (if local models not available)
+    POLARITY_MODEL_HUB = "Sina1138/deberta_polarity_Review"  # DeBERTa-v3-base (F1=0.764)
+    TOPIC_MODEL_HUB = "Sina1138/scideberta_topic_Review"  # SciDeBERTa (F1=0.478)
     # Legacy models (SciBERT baseline, kept for reference)
     POLARITY_MODEL_LEGACY = "Sina1138/Scibert_polarity_Review"  # F1=0.724

readme.md CHANGED Viewed

@@ -1,3 +1,13 @@
 This is the repository of ReView: A Tool for Visualizing and Analyzing Scientific Reviews. [Code](https://github.com/sina1138/glimpse-ui) | [Hugging Face Spaces](https://huggingface.co/spaces/Sina1138/ReView)
 <!-- [Paper]() | -->
@@ -32,7 +42,7 @@ git lfs install
 - Finally, all remaining required packages could be installed with the requirements file:
 ``` bash
-pip install -r requirements
 ```
 - (Optional) To enable fetching reviews directly from OpenReview links in the Interactive tab:

+---
+title: ReView
+emoji: 📝
+colorFrom: blue
+colorTo: purple
+sdk: gradio
+app_file: interface/Demo.py
+pinned: false
+hardware: zero-a10g
+---
 This is the repository of ReView: A Tool for Visualizing and Analyzing Scientific Reviews. [Code](https://github.com/sina1138/glimpse-ui) | [Hugging Face Spaces](https://huggingface.co/spaces/Sina1138/ReView)
 <!-- [Paper]() | -->
 - Finally, all remaining required packages could be installed with the requirements file:
 ``` bash
+pip install -r requirements.txt
 ```
 - (Optional) To enable fetching reviews directly from OpenReview links in the Interactive tab:

requirements → requirements.txt RENAMED Viewed

@@ -1,3 +1,4 @@
 transformers
 numpy==1.25.2
 seaborn

+spaces
 transformers
 numpy==1.25.2
 seaborn