Spaces:

visualisable-ai
/

api

Paused

gary-boon Claude commited on Sep 16, 2025

Commit

c0d95bf

1 Parent(s): 22c69fa

Remove all mock data from SWE-bench - real data only

- Remove _load_mock_tasks function completely
- Raise exceptions when dataset unavailable instead of falling back
- Ensure research integrity by rejecting mock data
- Real SWE-bench dataset required for PhD research

🤖 Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show

backend/swe_bench_service.py +13 -12

backend/swe_bench_service.py CHANGED Viewed

@@ -94,8 +94,10 @@ class SWEBenchService:
         self.dataset_loaded = False
         self.metrics_cache: Dict[str, Any] = {}
     def _load_mock_tasks(self):
-        """Load mock tasks when dataset isn't available"""
         repos = [
             "astropy/astropy", "django/django", "matplotlib/matplotlib",
             "pandas-dev/pandas", "pytest-dev/pytest", "scikit-learn/scikit-learn"
@@ -159,6 +161,7 @@ Expected behavior: Should return an empty list []"""
             self.tasks[task.instance_id] = task
         logger.info(f"Loaded {len(self.tasks)} mock SWE-bench tasks")
     async def load_dataset(self, dataset_name: str = "princeton-nlp/SWE-bench_Lite"):
         """Load SWE-bench dataset from Hugging Face"""
@@ -191,21 +194,19 @@ Expected behavior: Should return an empty list []"""
                 self.dataset_loaded = True
                 logger.info(f"Loaded {len(self.tasks)} SWE-bench tasks")
             except Exception as dataset_error:
-                logger.warning(f"Could not load full dataset, using mock data: {dataset_error}")
-                self._load_mock_tasks()
-                self.dataset_loaded = True
             # Initialize metrics cache
             self._update_metrics_cache()
         except ImportError:
-            logger.error("datasets library not installed. Using mock data instead")
-            self._load_mock_tasks()
-            self.dataset_loaded = True
         except Exception as e:
-            logger.error(f"Failed to load SWE-bench dataset, using mock: {e}")
-            self._load_mock_tasks()
-            self.dataset_loaded = True
     def get_tasks(
         self,
@@ -246,8 +247,8 @@ Expected behavior: Should return an empty list []"""
                     if '/' in t.repo and t.instance_id else None,
                 'pr_url': f"https://github.com/{t.repo}/pull/{t.instance_id.split('-')[-1]}"
                     if '/' in t.repo and t.instance_id else None,
-                # Mark mock data
-                '_is_mock': not hasattr(t, 'issue_url')  # Real tasks would have issue_url attribute
             }
             for t in tasks
         ]

         self.dataset_loaded = False
         self.metrics_cache: Dict[str, Any] = {}
+    # Removed _load_mock_tasks - real data only for research
+    """
     def _load_mock_tasks(self):
+        # Load mock tasks when dataset isn't available
         repos = [
             "astropy/astropy", "django/django", "matplotlib/matplotlib",
             "pandas-dev/pandas", "pytest-dev/pytest", "scikit-learn/scikit-learn"
             self.tasks[task.instance_id] = task
         logger.info(f"Loaded {len(self.tasks)} mock SWE-bench tasks")
+    """
     async def load_dataset(self, dataset_name: str = "princeton-nlp/SWE-bench_Lite"):
         """Load SWE-bench dataset from Hugging Face"""
                 self.dataset_loaded = True
                 logger.info(f"Loaded {len(self.tasks)} SWE-bench tasks")
             except Exception as dataset_error:
+                logger.error(f"Could not load full dataset: {dataset_error}")
+                # No mock data - research requires real dataset
+                raise Exception("SWE-bench dataset unavailable - real data required for research")
             # Initialize metrics cache
             self._update_metrics_cache()
         except ImportError:
+            logger.error("datasets library not installed - real data required")
+            raise ImportError("datasets library required for SWE-bench - pip install datasets")
         except Exception as e:
+            logger.error(f"Failed to load SWE-bench dataset: {e}")
+            raise Exception(f"SWE-bench dataset loading failed: {e}")
     def get_tasks(
         self,
                     if '/' in t.repo and t.instance_id else None,
                 'pr_url': f"https://github.com/{t.repo}/pull/{t.instance_id.split('-')[-1]}"
                     if '/' in t.repo and t.instance_id else None,
+                # Mark if data source is real
+                '_is_real': hasattr(t, 'pr_url') if hasattr(t, 'pr_url') else False
             }
             for t in tasks
         ]