dolev31 commited on
Commit
bd7e5b6
·
1 Parent(s): 73a5f3a

Harden submission upload: file size limits, consistent paths, Gradio 6 compat

Browse files

- Add file size validation (50MB max) and empty file check before JSON parsing
- Use __file__-relative paths for all data files (submissions, key_requests,
persistence dir) for consistent behavior regardless of CWD
- Fix Gradio 6 dropdown updates: use gr.update(choices=...) instead of
creating new gr.Dropdown instances (preserves multiselect/label properties)
- Update canonical_hashes.json to match current source files

Files changed (2) hide show
  1. app.py +13 -6
  2. data/canonical_hashes.json +8 -0
app.py CHANGED
@@ -735,8 +735,8 @@ _EMAIL_RE = re.compile(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$")
735
  # ---------------------------------------------------------------------------
736
 
737
  _APP_DIR = Path(__file__).resolve().parent
738
- SUBMISSIONS_FILE = Path("data/submissions.jsonl")
739
- KEY_REQUESTS_FILE = Path("data/key_requests.jsonl")
740
  TASKS_FILE = _APP_DIR / "data" / "test.raw.json"
741
  CANONICAL_HASHES_FILE = _APP_DIR / "data" / "canonical_hashes.json"
742
 
@@ -746,7 +746,7 @@ CANONICAL_HASHES_FILE = _APP_DIR / "data" / "canonical_hashes.json"
746
  # ---------------------------------------------------------------------------
747
 
748
  _DATA_REPO_ID = "dolev31/st-webagentbench-data"
749
- _DATA_DIR = Path("data")
750
  _scheduler: CommitScheduler | None = None
751
  _PERSISTENCE_ENABLED = False
752
 
@@ -1684,10 +1684,17 @@ def validate_upload_full(file) -> tuple[str, Optional[dict], str]:
1684
  # Handle both Gradio 4.x (object with .name) and 5.x (filepath string)
1685
  try:
1686
  file_path = file.name if hasattr(file, "name") else str(file)
 
 
 
 
 
1687
  with open(file_path, "r") as f:
1688
  data = json.load(f)
1689
- except (json.JSONDecodeError, Exception) as e:
1690
  return "rejected", None, f"REJECTED: Invalid JSON — {e}"
 
 
1691
 
1692
  report_lines = []
1693
 
@@ -1811,7 +1818,7 @@ def process_upload(file):
1811
  return (
1812
  report,
1813
  build_main_table(subs),
1814
- gr.Dropdown(choices=agent_choices),
1815
  )
1816
 
1817
  # Add status and save
@@ -1834,7 +1841,7 @@ def process_upload(file):
1834
  return (
1835
  summary,
1836
  build_main_table(subs),
1837
- gr.Dropdown(choices=agent_choices),
1838
  )
1839
 
1840
 
 
735
  # ---------------------------------------------------------------------------
736
 
737
  _APP_DIR = Path(__file__).resolve().parent
738
+ SUBMISSIONS_FILE = _APP_DIR / "data" / "submissions.jsonl"
739
+ KEY_REQUESTS_FILE = _APP_DIR / "data" / "key_requests.jsonl"
740
  TASKS_FILE = _APP_DIR / "data" / "test.raw.json"
741
  CANONICAL_HASHES_FILE = _APP_DIR / "data" / "canonical_hashes.json"
742
 
 
746
  # ---------------------------------------------------------------------------
747
 
748
  _DATA_REPO_ID = "dolev31/st-webagentbench-data"
749
+ _DATA_DIR = _APP_DIR / "data"
750
  _scheduler: CommitScheduler | None = None
751
  _PERSISTENCE_ENABLED = False
752
 
 
1684
  # Handle both Gradio 4.x (object with .name) and 5.x (filepath string)
1685
  try:
1686
  file_path = file.name if hasattr(file, "name") else str(file)
1687
+ file_size = os.path.getsize(file_path)
1688
+ if file_size > 50_000_000:
1689
+ return "rejected", None, f"REJECTED: File too large ({file_size / 1_000_000:.1f}MB). Maximum is 50MB."
1690
+ if file_size == 0:
1691
+ return "rejected", None, "REJECTED: Empty file uploaded."
1692
  with open(file_path, "r") as f:
1693
  data = json.load(f)
1694
+ except json.JSONDecodeError as e:
1695
  return "rejected", None, f"REJECTED: Invalid JSON — {e}"
1696
+ except Exception as e:
1697
+ return "rejected", None, f"REJECTED: Could not read file — {e}"
1698
 
1699
  report_lines = []
1700
 
 
1818
  return (
1819
  report,
1820
  build_main_table(subs),
1821
+ gr.update(choices=agent_choices),
1822
  )
1823
 
1824
  # Add status and save
 
1841
  return (
1842
  summary,
1843
  build_main_table(subs),
1844
+ gr.update(choices=agent_choices),
1845
  )
1846
 
1847
 
data/canonical_hashes.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "1.0.0": {
3
+ "evaluators_sha256": "1ecb7e511d25fe0dc4aaf6fd887eb108d12e293d9b90629630745300f9733cf5",
4
+ "task_config_sha256": "5119d99c758a46100cc678d8193659b43c3174e7e295a7887e0b07f877f131b5",
5
+ "custom_env_sha256": "7e6ef6e3fb8e75cd46c8c00a038524e73ff37829584b1f47d34b237eb2181ca8",
6
+ "helper_functions_sha256": "3ed7169b7c5bb734b13c669c06b5f977a448a66c7d9eb41cbb32d7f7d16cb845"
7
+ }
8
+ }