Spaces:
Sleeping
Sleeping
gary-boon
Claude
commited on
Commit
·
c0d95bf
1
Parent(s):
22c69fa
Remove all mock data from SWE-bench - real data only
Browse files- Remove _load_mock_tasks function completely
- Raise exceptions when dataset unavailable instead of falling back
- Ensure research integrity by rejecting mock data
- Real SWE-bench dataset required for PhD research
🤖 Generated with Claude Code
Co-Authored-By: Claude <noreply@anthropic.com>
- backend/swe_bench_service.py +13 -12
backend/swe_bench_service.py
CHANGED
|
@@ -94,8 +94,10 @@ class SWEBenchService:
|
|
| 94 |
self.dataset_loaded = False
|
| 95 |
self.metrics_cache: Dict[str, Any] = {}
|
| 96 |
|
|
|
|
|
|
|
| 97 |
def _load_mock_tasks(self):
|
| 98 |
-
|
| 99 |
repos = [
|
| 100 |
"astropy/astropy", "django/django", "matplotlib/matplotlib",
|
| 101 |
"pandas-dev/pandas", "pytest-dev/pytest", "scikit-learn/scikit-learn"
|
|
@@ -159,6 +161,7 @@ Expected behavior: Should return an empty list []"""
|
|
| 159 |
self.tasks[task.instance_id] = task
|
| 160 |
|
| 161 |
logger.info(f"Loaded {len(self.tasks)} mock SWE-bench tasks")
|
|
|
|
| 162 |
|
| 163 |
async def load_dataset(self, dataset_name: str = "princeton-nlp/SWE-bench_Lite"):
|
| 164 |
"""Load SWE-bench dataset from Hugging Face"""
|
|
@@ -191,21 +194,19 @@ Expected behavior: Should return an empty list []"""
|
|
| 191 |
self.dataset_loaded = True
|
| 192 |
logger.info(f"Loaded {len(self.tasks)} SWE-bench tasks")
|
| 193 |
except Exception as dataset_error:
|
| 194 |
-
logger.
|
| 195 |
-
|
| 196 |
-
|
| 197 |
|
| 198 |
# Initialize metrics cache
|
| 199 |
self._update_metrics_cache()
|
| 200 |
|
| 201 |
except ImportError:
|
| 202 |
-
logger.error("datasets library not installed
|
| 203 |
-
|
| 204 |
-
self.dataset_loaded = True
|
| 205 |
except Exception as e:
|
| 206 |
-
logger.error(f"Failed to load SWE-bench dataset
|
| 207 |
-
|
| 208 |
-
self.dataset_loaded = True
|
| 209 |
|
| 210 |
def get_tasks(
|
| 211 |
self,
|
|
@@ -246,8 +247,8 @@ Expected behavior: Should return an empty list []"""
|
|
| 246 |
if '/' in t.repo and t.instance_id else None,
|
| 247 |
'pr_url': f"https://github.com/{t.repo}/pull/{t.instance_id.split('-')[-1]}"
|
| 248 |
if '/' in t.repo and t.instance_id else None,
|
| 249 |
-
# Mark
|
| 250 |
-
'
|
| 251 |
}
|
| 252 |
for t in tasks
|
| 253 |
]
|
|
|
|
| 94 |
self.dataset_loaded = False
|
| 95 |
self.metrics_cache: Dict[str, Any] = {}
|
| 96 |
|
| 97 |
+
# Removed _load_mock_tasks - real data only for research
|
| 98 |
+
"""
|
| 99 |
def _load_mock_tasks(self):
|
| 100 |
+
# Load mock tasks when dataset isn't available
|
| 101 |
repos = [
|
| 102 |
"astropy/astropy", "django/django", "matplotlib/matplotlib",
|
| 103 |
"pandas-dev/pandas", "pytest-dev/pytest", "scikit-learn/scikit-learn"
|
|
|
|
| 161 |
self.tasks[task.instance_id] = task
|
| 162 |
|
| 163 |
logger.info(f"Loaded {len(self.tasks)} mock SWE-bench tasks")
|
| 164 |
+
"""
|
| 165 |
|
| 166 |
async def load_dataset(self, dataset_name: str = "princeton-nlp/SWE-bench_Lite"):
|
| 167 |
"""Load SWE-bench dataset from Hugging Face"""
|
|
|
|
| 194 |
self.dataset_loaded = True
|
| 195 |
logger.info(f"Loaded {len(self.tasks)} SWE-bench tasks")
|
| 196 |
except Exception as dataset_error:
|
| 197 |
+
logger.error(f"Could not load full dataset: {dataset_error}")
|
| 198 |
+
# No mock data - research requires real dataset
|
| 199 |
+
raise Exception("SWE-bench dataset unavailable - real data required for research")
|
| 200 |
|
| 201 |
# Initialize metrics cache
|
| 202 |
self._update_metrics_cache()
|
| 203 |
|
| 204 |
except ImportError:
|
| 205 |
+
logger.error("datasets library not installed - real data required")
|
| 206 |
+
raise ImportError("datasets library required for SWE-bench - pip install datasets")
|
|
|
|
| 207 |
except Exception as e:
|
| 208 |
+
logger.error(f"Failed to load SWE-bench dataset: {e}")
|
| 209 |
+
raise Exception(f"SWE-bench dataset loading failed: {e}")
|
|
|
|
| 210 |
|
| 211 |
def get_tasks(
|
| 212 |
self,
|
|
|
|
| 247 |
if '/' in t.repo and t.instance_id else None,
|
| 248 |
'pr_url': f"https://github.com/{t.repo}/pull/{t.instance_id.split('-')[-1]}"
|
| 249 |
if '/' in t.repo and t.instance_id else None,
|
| 250 |
+
# Mark if data source is real
|
| 251 |
+
'_is_real': hasattr(t, 'pr_url') if hasattr(t, 'pr_url') else False
|
| 252 |
}
|
| 253 |
for t in tasks
|
| 254 |
]
|