black-yt commited on
Commit
771e544
·
1 Parent(s): 4017d58

Update trajectory dataset organization

Browse files
Files changed (4) hide show
  1. README.md +3 -3
  2. app.py +1 -1
  3. check_space_runtime.py +1 -1
  4. frontend/local_server.py +1 -1
README.md CHANGED
@@ -28,7 +28,7 @@ new in the Space.
28
  | --- | --- |
29
  | `black-yt/ResearchHarness` | Main open-source runtime, CLI, API server, frontend, docs, tests, and benchmark adapters. |
30
  | `InternScience/ResearchHarness` | Hugging Face Space app that hosts the browser frontend with managed temporary workspaces. |
31
- | `CoCoOne/ResearchHarness-Data` | Hugging Face dataset receiving collected hosted-run trajectory PRs. |
32
 
33
  Maintenance rule:
34
 
@@ -108,7 +108,7 @@ Configure these as Hugging Face Space secrets before starting the app:
108
  | `SERPER_KEY` | WebSearch / ScholarSearch key from <https://serper.dev/>. |
109
  | `JINA_KEY` | WebFetch key from <https://jina.ai/>. |
110
  | `MINERU_TOKEN` | ReadPDF key from <https://mineru.net/>. |
111
- | `HF_TOKEN` | Hugging Face token with write access to `CoCoOne/ResearchHarness-Data`. |
112
 
113
  ## Optional Runtime Variables
114
 
@@ -121,7 +121,7 @@ Configure these as Hugging Face Space secrets before starting the app:
121
  | `WEBFETCH_TIMEOUT_SECONDS` | `180` | Overall timeout for one WebFetch tool call. |
122
  | `WEBFETCH_MAX_CHARS` | `30000` | Hard maximum characters returned by one URL-only WebFetch call. |
123
  | `RH_COLLECTION_ENABLED` | `true` | Automatically collect completed hosted runs. |
124
- | `RH_COLLECTION_DATASET_REPO` | `CoCoOne/ResearchHarness-Data` | Dataset repo that receives trajectory PRs. |
125
  | `RH_COLLECTION_BATCH_SIZE` | `5` | Create one dataset PR after this many collected runs. |
126
  | `RH_COLLECTION_MAX_BUNDLE_BYTES` | `20971520` | Drop a single run bundle if it exceeds this byte limit. |
127
  | `PORT` | `7860` | Port used by Hugging Face Docker Spaces. |
 
28
  | --- | --- |
29
  | `black-yt/ResearchHarness` | Main open-source runtime, CLI, API server, frontend, docs, tests, and benchmark adapters. |
30
  | `InternScience/ResearchHarness` | Hugging Face Space app that hosts the browser frontend with managed temporary workspaces. |
31
+ | `InternScience/ResearchHarness-Data` | Hugging Face dataset receiving collected hosted-run trajectory PRs. |
32
 
33
  Maintenance rule:
34
 
 
108
  | `SERPER_KEY` | WebSearch / ScholarSearch key from <https://serper.dev/>. |
109
  | `JINA_KEY` | WebFetch key from <https://jina.ai/>. |
110
  | `MINERU_TOKEN` | ReadPDF key from <https://mineru.net/>. |
111
+ | `HF_TOKEN` | Hugging Face token with write access to `InternScience/ResearchHarness-Data`. |
112
 
113
  ## Optional Runtime Variables
114
 
 
121
  | `WEBFETCH_TIMEOUT_SECONDS` | `180` | Overall timeout for one WebFetch tool call. |
122
  | `WEBFETCH_MAX_CHARS` | `30000` | Hard maximum characters returned by one URL-only WebFetch call. |
123
  | `RH_COLLECTION_ENABLED` | `true` | Automatically collect completed hosted runs. |
124
+ | `RH_COLLECTION_DATASET_REPO` | `InternScience/ResearchHarness-Data` | Dataset repo that receives trajectory PRs. |
125
  | `RH_COLLECTION_BATCH_SIZE` | `5` | Create one dataset PR after this many collected runs. |
126
  | `RH_COLLECTION_MAX_BUNDLE_BYTES` | `20971520` | Drop a single run bundle if it exceeds this byte limit. |
127
  | `PORT` | `7860` | Port used by Hugging Face Docker Spaces. |
app.py CHANGED
@@ -39,7 +39,7 @@ def configure_space() -> None:
39
  cleanup_max_runs=_int_env("RH_SPACE_MAX_RUNS", 40),
40
  cleanup_interval_seconds=_int_env("RH_SPACE_CLEANUP_INTERVAL_SECONDS", 15 * 60),
41
  collection_enabled=_bool_env("RH_COLLECTION_ENABLED", True),
42
- collection_dataset_repo=os.getenv("RH_COLLECTION_DATASET_REPO", "CoCoOne/ResearchHarness-Data"),
43
  collection_batch_size=_int_env("RH_COLLECTION_BATCH_SIZE", 5),
44
  collection_max_bundle_bytes=_int_env("RH_COLLECTION_MAX_BUNDLE_BYTES", 20 * 1024 * 1024),
45
  )
 
39
  cleanup_max_runs=_int_env("RH_SPACE_MAX_RUNS", 40),
40
  cleanup_interval_seconds=_int_env("RH_SPACE_CLEANUP_INTERVAL_SECONDS", 15 * 60),
41
  collection_enabled=_bool_env("RH_COLLECTION_ENABLED", True),
42
+ collection_dataset_repo=os.getenv("RH_COLLECTION_DATASET_REPO", "InternScience/ResearchHarness-Data"),
43
  collection_batch_size=_int_env("RH_COLLECTION_BATCH_SIZE", 5),
44
  collection_max_bundle_bytes=_int_env("RH_COLLECTION_MAX_BUNDLE_BYTES", 20 * 1024 * 1024),
45
  )
check_space_runtime.py CHANGED
@@ -19,7 +19,7 @@ def main() -> int:
19
  cleanup_max_runs=2,
20
  cleanup_interval_seconds=60,
21
  collection_enabled=True,
22
- collection_dataset_repo="CoCoOne/ResearchHarness-Data",
23
  collection_batch_size=5,
24
  collection_max_bundle_bytes=1024 * 1024,
25
  )
 
19
  cleanup_max_runs=2,
20
  cleanup_interval_seconds=60,
21
  collection_enabled=True,
22
+ collection_dataset_repo="InternScience/ResearchHarness-Data",
23
  collection_batch_size=5,
24
  collection_max_bundle_bytes=1024 * 1024,
25
  )
frontend/local_server.py CHANGED
@@ -44,7 +44,7 @@ FRONTEND_CLEANUP_RETENTION_SECONDS = 6 * 60 * 60
44
  FRONTEND_CLEANUP_MAX_RUNS = 40
45
  FRONTEND_CLEANUP_INTERVAL_SECONDS = 15 * 60
46
  FRONTEND_COLLECTION_ENABLED = True
47
- FRONTEND_COLLECTION_DATASET_REPO = "CoCoOne/ResearchHarness-Data"
48
  FRONTEND_COLLECTION_BATCH_SIZE = 5
49
  FRONTEND_COLLECTION_MAX_BUNDLE_BYTES = 20 * 1024 * 1024
50
  _CLEANUP_THREAD_STARTED = False
 
44
  FRONTEND_CLEANUP_MAX_RUNS = 40
45
  FRONTEND_CLEANUP_INTERVAL_SECONDS = 15 * 60
46
  FRONTEND_COLLECTION_ENABLED = True
47
+ FRONTEND_COLLECTION_DATASET_REPO = "InternScience/ResearchHarness-Data"
48
  FRONTEND_COLLECTION_BATCH_SIZE = 5
49
  FRONTEND_COLLECTION_MAX_BUNDLE_BYTES = 20 * 1024 * 1024
50
  _CLEANUP_THREAD_STARTED = False