Spaces:

Sina1138
/

ReView

Paused

Sina1138 commited on Feb 11

Commit

071dd42

1 Parent(s): 1b45a22

Add device-aware RSA optimizations for CPU/GPU

- Auto-detect device and apply appropriate optimizations
- CPU: float32 dtype, batch_size=32
- GPU: float16 dtype, batch_size=64
- Add comprehensive validation suite for both environments

Files changed (3) hide show

.gitignore +1 -0
dependencies/rsa_reranker.py +49 -12
interface/interactive_processor.py +50 -1

.gitignore CHANGED Viewed

@@ -375,3 +375,4 @@ data/DISAPERE_test.py
 .idea/
 *.sublime-project
 *.sublime-workspace

 .idea/
 *.sublime-project
 *.sublime-workspace
+validation/quick_check.py

dependencies/rsa_reranker.py CHANGED Viewed

@@ -33,7 +33,7 @@ class RSAReranking:
             tokenizer,
             candidates: List[str],
             source_texts: List[str],
-            batch_size: int = 32,
             rationality: int = 1,
             device="cuda",
     ):
@@ -42,8 +42,7 @@ class RSAReranking:
         :param tokenizer:
         :param candidates: list of candidates summaries
         :param source_texts: list of source texts
-        :param batch_size: batch size used to compute the likelihoods (can be high since we don't need gradients and
-        it's a single forward pass)
         :param rationality: rationality parameter of the RSA model
         :param device: device used to compute the likelihoods
         """
@@ -51,14 +50,22 @@ class RSAReranking:
         self.device = device
         self.model = model.to(self.device)
         self.tokenizer = tokenizer
         self.candidates = candidates
         self.source_texts = source_texts
         self.batch_size = batch_size
         self.rationality = rationality
     def compute_conditionned_likelihood(
             self, x: List[str], y: List[str], mean: bool = True
     ) -> torch.Tensor:
@@ -79,19 +86,49 @@ class RSAReranking:
         loss_fn = torch.nn.CrossEntropyLoss(reduction="none")
         batch_size = len(x)
-        x = self.tokenizer(
-            x,
-            return_tensors="pt",
-            padding=True,
-            truncation=True,
-            max_length=1024,
-        )
         y = self.tokenizer(
             y,
             return_tensors="pt",
             padding=True,
             truncation=True,
-            max_length=1024,
         )
         # Move all tensors to the correct device

             tokenizer,
             candidates: List[str],
             source_texts: List[str],
+            batch_size: int = None,  # Auto-detect: 64 for GPU, 32 for CPU
             rationality: int = 1,
             device="cuda",
     ):
         :param tokenizer:
         :param candidates: list of candidates summaries
         :param source_texts: list of source texts
+        :param batch_size: batch size used to compute the likelihoods (None = auto-detect based on device)
         :param rationality: rationality parameter of the RSA model
         :param device: device used to compute the likelihoods
         """
         self.device = device
         self.model = model.to(self.device)
         self.tokenizer = tokenizer
         self.candidates = candidates
         self.source_texts = source_texts
+        # Auto-detect batch size based on device if not specified
+        # GPU can handle larger batches (64), CPU uses smaller batches (32)
+        if batch_size is None:
+            batch_size = 64 if torch.cuda.is_available() else 32
         self.batch_size = batch_size
         self.rationality = rationality
+        # Pre-tokenize source texts once to avoid redundant tokenization
+        # This significantly speeds up likelihood_matrix computation
+        self._tokenized_sources_cache = {}
     def compute_conditionned_likelihood(
             self, x: List[str], y: List[str], mean: bool = True
     ) -> torch.Tensor:
         loss_fn = torch.nn.CrossEntropyLoss(reduction="none")
         batch_size = len(x)
+        # Try to use cached tokenized sources for efficiency
+        # Cache key is the source text string
+        x_tokenized_list = []
+        all_cached = True
+        for source in x:
+            if source in self._tokenized_sources_cache:
+                x_tokenized_list.append(self._tokenized_sources_cache[source])
+            else:
+                all_cached = False
+                break
+        if all_cached and len(x_tokenized_list) > 0:
+            # All sources are cached - need to batch them together
+            # Stack the individual tokenized sources
+            x_tokenized = {
+                'input_ids': torch.stack([item['input_ids'].squeeze(0) for item in x_tokenized_list]),
+                'attention_mask': torch.stack([item['attention_mask'].squeeze(0) for item in x_tokenized_list])
+            }
+        else:
+            # Not all cached, tokenize the batch and cache individual items
+            x_strings = x  # Keep reference to original strings for caching
+            x_tokenized = self.tokenizer(
+                x,
+                return_tensors="pt",
+                padding=True,
+                truncation=True,
+                max_length=512,  # Reduced from 1024 - reviews rarely exceed 512 tokens
+            )
+            # Cache each source text individually for future use
+            for i, source_str in enumerate(x_strings):
+                if source_str not in self._tokenized_sources_cache:
+                    self._tokenized_sources_cache[source_str] = {
+                        'input_ids': x_tokenized['input_ids'][i:i+1],
+                        'attention_mask': x_tokenized['attention_mask'][i:i+1]
+                    }
+        x = x_tokenized
         y = self.tokenizer(
             y,
             return_tensors="pt",
             padding=True,
             truncation=True,
+            max_length=256,  # Reduced from 1024 - sentences rarely exceed 256 tokens
         )
         # Move all tensors to the correct device

interface/interactive_processor.py CHANGED Viewed

@@ -53,7 +53,12 @@ class InteractiveReviewProcessor:
         # Load summarization model (for RSA)
         rsa_model_name = "sshleifer/distilbart-cnn-12-3"
-        self.rsa_model = AutoModelForSeq2SeqLM.from_pretrained(rsa_model_name)
         self.rsa_tokenizer = AutoTokenizer.from_pretrained(rsa_model_name)
         self.rsa_model.to(self.device)
         self.rsa_model.eval()
@@ -205,6 +210,50 @@ class InteractiveReviewProcessor:
                 for s in sentences
             ]
     def process_reviews(
         self,
         *reviews: str,

         # Load summarization model (for RSA)
         rsa_model_name = "sshleifer/distilbart-cnn-12-3"
+        self.rsa_model = AutoModelForSeq2SeqLM.from_pretrained(
+            rsa_model_name,
+            # Use float16 only on GPU (2x faster inference, 2x less memory)
+            # CPU doesn't support float16 well and would be slower
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
+        )
         self.rsa_tokenizer = AutoTokenizer.from_pretrained(rsa_model_name)
         self.rsa_model.to(self.device)
         self.rsa_model.eval()
                 for s in sentences
             ]
+    def process_reviews_fast(self, *reviews: str) -> Dict:
+        """
+        Process reviews WITHOUT RSA (fast path: ~3-5 sec on CPU).
+        Returns polarity + topic scores immediately.
+        RSA can be computed separately in background.
+        Args:
+            reviews: Review texts (at least 2 required)
+        Returns:
+            Dictionary with polarity + topic scores (consensuality empty)
+        """
+        reviews = [r for r in reviews if r and r.strip()]
+        if len(reviews) < 2:
+            raise ValueError("At least two non-empty reviews are required")
+        # Tokenize reviews
+        sentence_lists = [[s for s in glimpse_tokenizer(r) if s.strip()] for r in reviews]
+        if any(len(sl) == 0 for sl in sentence_lists):
+            raise ValueError("One or more reviews have no valid sentences")
+        # Get unique sentences for scoring, excluding section headers
+        all_sentences = [s for s in set(s for sl in sentence_lists for s in sl) if not is_section_header(s)]
+        # Predict scores (skip consensuality - that comes async)
+        polarity_map = self.predict_polarity(all_sentences)
+        topic_map = self.predict_topic(all_sentences)
+        # Return with empty consensuality (will be updated async)
+        result = {
+            f"review{i+1}_sentences": sl for i, sl in enumerate(sentence_lists)
+        }
+        result.update({
+            "consensuality_scores": {},
+            "polarity_scores": polarity_map,
+            "topic_scores": topic_map,
+        })
+        result["most_common"] = []
+        result["most_unique"] = []
+        return result
     def process_reviews(
         self,
         *reviews: str,