openhands commited on
Commit
2176460
·
1 Parent(s): e18eb21

Add enhanced debugging output for data setup

Browse files

- Add verbose logging to track data loading process
- Try both relative and absolute paths for mock data
- Display current working directory and file listings
- Show which data source is being used (GitHub or mock)
- Verify copied files after setup

Files changed (1) hide show
  1. setup_data.py +27 -6
setup_data.py CHANGED
@@ -82,11 +82,20 @@ def fetch_data_from_github():
82
 
83
  def copy_mock_data():
84
  """Copy mock data to the expected extraction directory."""
 
85
  mock_source = Path("mock_results") / CONFIG_NAME
 
 
 
 
86
  target_dir = Path(EXTRACTED_DATA_DIR) / CONFIG_NAME
87
 
 
 
 
88
  if not mock_source.exists():
89
- print(f"Warning: Mock data directory {mock_source} not found")
 
90
  return False
91
 
92
  # Create target directory
@@ -97,7 +106,11 @@ def copy_mock_data():
97
  if target_dir.exists():
98
  shutil.rmtree(target_dir)
99
  shutil.copytree(mock_source, target_dir)
100
- print(f"Mock data copied successfully. Files: {list(target_dir.glob('*'))}")
 
 
 
 
101
  return True
102
 
103
  def setup_mock_data():
@@ -105,26 +118,34 @@ def setup_mock_data():
105
  Setup data for the leaderboard.
106
  First tries to fetch from GitHub, falls back to mock data if unavailable.
107
  """
 
 
 
 
108
  target_dir = Path(EXTRACTED_DATA_DIR) / CONFIG_NAME
109
 
110
  # Check if data already exists
111
  if target_dir.exists() and any(target_dir.glob("*.jsonl")):
 
112
  print(f"Data already exists at {target_dir}")
 
113
  return
114
 
115
  # Try to fetch from GitHub first
116
- print("Checking for data from GitHub repository...")
117
  if fetch_data_from_github():
118
- print("Using data from GitHub repository")
119
  return
120
 
121
  # Fall back to mock data
122
- print("GitHub data not available, falling back to mock data...")
123
  if copy_mock_data():
124
- print("Using mock data")
125
  return
126
 
 
127
  print("ERROR: No data available! Neither GitHub nor mock data could be loaded.")
 
128
 
129
  if __name__ == "__main__":
130
  setup_mock_data()
 
82
 
83
  def copy_mock_data():
84
  """Copy mock data to the expected extraction directory."""
85
+ # Try both relative and absolute paths
86
  mock_source = Path("mock_results") / CONFIG_NAME
87
+ if not mock_source.exists():
88
+ # Try absolute path in case we're in a different working directory
89
+ mock_source = Path("/app/mock_results") / CONFIG_NAME
90
+
91
  target_dir = Path(EXTRACTED_DATA_DIR) / CONFIG_NAME
92
 
93
+ print(f"Current working directory: {os.getcwd()}")
94
+ print(f"Looking for mock data at: {mock_source.absolute()}")
95
+
96
  if not mock_source.exists():
97
+ print(f"ERROR: Mock data directory {mock_source} not found!")
98
+ print(f"Directory contents: {list(Path('.').glob('*'))}")
99
  return False
100
 
101
  # Create target directory
 
106
  if target_dir.exists():
107
  shutil.rmtree(target_dir)
108
  shutil.copytree(mock_source, target_dir)
109
+
110
+ # Verify the copy
111
+ copied_files = list(target_dir.glob('*'))
112
+ print(f"Mock data copied successfully. Files: {copied_files}")
113
+ print(f"Target directory: {target_dir.absolute()}")
114
  return True
115
 
116
  def setup_mock_data():
 
118
  Setup data for the leaderboard.
119
  First tries to fetch from GitHub, falls back to mock data if unavailable.
120
  """
121
+ print("=" * 60)
122
+ print("STARTING DATA SETUP")
123
+ print("=" * 60)
124
+
125
  target_dir = Path(EXTRACTED_DATA_DIR) / CONFIG_NAME
126
 
127
  # Check if data already exists
128
  if target_dir.exists() and any(target_dir.glob("*.jsonl")):
129
+ jsonl_files = list(target_dir.glob("*.jsonl"))
130
  print(f"Data already exists at {target_dir}")
131
+ print(f"Found {len(jsonl_files)} JSONL files: {[f.name for f in jsonl_files]}")
132
  return
133
 
134
  # Try to fetch from GitHub first
135
+ print("\n--- Attempting to fetch from GitHub ---")
136
  if fetch_data_from_github():
137
+ print(" Successfully using data from GitHub repository")
138
  return
139
 
140
  # Fall back to mock data
141
+ print("\n--- GitHub data not available, falling back to mock data ---")
142
  if copy_mock_data():
143
+ print(" Successfully using mock data")
144
  return
145
 
146
+ print("\n" + "!" * 60)
147
  print("ERROR: No data available! Neither GitHub nor mock data could be loaded.")
148
+ print("!" * 60)
149
 
150
  if __name__ == "__main__":
151
  setup_mock_data()