gary-boon Claude commited on
Commit
c0d95bf
·
1 Parent(s): 22c69fa

Remove all mock data from SWE-bench - real data only

Browse files

- Remove _load_mock_tasks function completely
- Raise exceptions when dataset unavailable instead of falling back
- Ensure research integrity by rejecting mock data
- Real SWE-bench dataset required for PhD research

🤖 Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show
  1. backend/swe_bench_service.py +13 -12
backend/swe_bench_service.py CHANGED
@@ -94,8 +94,10 @@ class SWEBenchService:
94
  self.dataset_loaded = False
95
  self.metrics_cache: Dict[str, Any] = {}
96
 
 
 
97
  def _load_mock_tasks(self):
98
- """Load mock tasks when dataset isn't available"""
99
  repos = [
100
  "astropy/astropy", "django/django", "matplotlib/matplotlib",
101
  "pandas-dev/pandas", "pytest-dev/pytest", "scikit-learn/scikit-learn"
@@ -159,6 +161,7 @@ Expected behavior: Should return an empty list []"""
159
  self.tasks[task.instance_id] = task
160
 
161
  logger.info(f"Loaded {len(self.tasks)} mock SWE-bench tasks")
 
162
 
163
  async def load_dataset(self, dataset_name: str = "princeton-nlp/SWE-bench_Lite"):
164
  """Load SWE-bench dataset from Hugging Face"""
@@ -191,21 +194,19 @@ Expected behavior: Should return an empty list []"""
191
  self.dataset_loaded = True
192
  logger.info(f"Loaded {len(self.tasks)} SWE-bench tasks")
193
  except Exception as dataset_error:
194
- logger.warning(f"Could not load full dataset, using mock data: {dataset_error}")
195
- self._load_mock_tasks()
196
- self.dataset_loaded = True
197
 
198
  # Initialize metrics cache
199
  self._update_metrics_cache()
200
 
201
  except ImportError:
202
- logger.error("datasets library not installed. Using mock data instead")
203
- self._load_mock_tasks()
204
- self.dataset_loaded = True
205
  except Exception as e:
206
- logger.error(f"Failed to load SWE-bench dataset, using mock: {e}")
207
- self._load_mock_tasks()
208
- self.dataset_loaded = True
209
 
210
  def get_tasks(
211
  self,
@@ -246,8 +247,8 @@ Expected behavior: Should return an empty list []"""
246
  if '/' in t.repo and t.instance_id else None,
247
  'pr_url': f"https://github.com/{t.repo}/pull/{t.instance_id.split('-')[-1]}"
248
  if '/' in t.repo and t.instance_id else None,
249
- # Mark mock data
250
- '_is_mock': not hasattr(t, 'issue_url') # Real tasks would have issue_url attribute
251
  }
252
  for t in tasks
253
  ]
 
94
  self.dataset_loaded = False
95
  self.metrics_cache: Dict[str, Any] = {}
96
 
97
+ # Removed _load_mock_tasks - real data only for research
98
+ """
99
  def _load_mock_tasks(self):
100
+ # Load mock tasks when dataset isn't available
101
  repos = [
102
  "astropy/astropy", "django/django", "matplotlib/matplotlib",
103
  "pandas-dev/pandas", "pytest-dev/pytest", "scikit-learn/scikit-learn"
 
161
  self.tasks[task.instance_id] = task
162
 
163
  logger.info(f"Loaded {len(self.tasks)} mock SWE-bench tasks")
164
+ """
165
 
166
  async def load_dataset(self, dataset_name: str = "princeton-nlp/SWE-bench_Lite"):
167
  """Load SWE-bench dataset from Hugging Face"""
 
194
  self.dataset_loaded = True
195
  logger.info(f"Loaded {len(self.tasks)} SWE-bench tasks")
196
  except Exception as dataset_error:
197
+ logger.error(f"Could not load full dataset: {dataset_error}")
198
+ # No mock data - research requires real dataset
199
+ raise Exception("SWE-bench dataset unavailable - real data required for research")
200
 
201
  # Initialize metrics cache
202
  self._update_metrics_cache()
203
 
204
  except ImportError:
205
+ logger.error("datasets library not installed - real data required")
206
+ raise ImportError("datasets library required for SWE-bench - pip install datasets")
 
207
  except Exception as e:
208
+ logger.error(f"Failed to load SWE-bench dataset: {e}")
209
+ raise Exception(f"SWE-bench dataset loading failed: {e}")
 
210
 
211
  def get_tasks(
212
  self,
 
247
  if '/' in t.repo and t.instance_id else None,
248
  'pr_url': f"https://github.com/{t.repo}/pull/{t.instance_id.split('-')[-1]}"
249
  if '/' in t.repo and t.instance_id else None,
250
+ # Mark if data source is real
251
+ '_is_real': hasattr(t, 'pr_url') if hasattr(t, 'pr_url') else False
252
  }
253
  for t in tasks
254
  ]