Spaces:
Running
Running
Update trajectory dataset organization
Browse files- README.md +3 -3
- app.py +1 -1
- check_space_runtime.py +1 -1
- frontend/local_server.py +1 -1
README.md
CHANGED
|
@@ -28,7 +28,7 @@ new in the Space.
|
|
| 28 |
| --- | --- |
|
| 29 |
| `black-yt/ResearchHarness` | Main open-source runtime, CLI, API server, frontend, docs, tests, and benchmark adapters. |
|
| 30 |
| `InternScience/ResearchHarness` | Hugging Face Space app that hosts the browser frontend with managed temporary workspaces. |
|
| 31 |
-
| `
|
| 32 |
|
| 33 |
Maintenance rule:
|
| 34 |
|
|
@@ -108,7 +108,7 @@ Configure these as Hugging Face Space secrets before starting the app:
|
|
| 108 |
| `SERPER_KEY` | WebSearch / ScholarSearch key from <https://serper.dev/>. |
|
| 109 |
| `JINA_KEY` | WebFetch key from <https://jina.ai/>. |
|
| 110 |
| `MINERU_TOKEN` | ReadPDF key from <https://mineru.net/>. |
|
| 111 |
-
| `HF_TOKEN` | Hugging Face token with write access to `
|
| 112 |
|
| 113 |
## Optional Runtime Variables
|
| 114 |
|
|
@@ -121,7 +121,7 @@ Configure these as Hugging Face Space secrets before starting the app:
|
|
| 121 |
| `WEBFETCH_TIMEOUT_SECONDS` | `180` | Overall timeout for one WebFetch tool call. |
|
| 122 |
| `WEBFETCH_MAX_CHARS` | `30000` | Hard maximum characters returned by one URL-only WebFetch call. |
|
| 123 |
| `RH_COLLECTION_ENABLED` | `true` | Automatically collect completed hosted runs. |
|
| 124 |
-
| `RH_COLLECTION_DATASET_REPO` | `
|
| 125 |
| `RH_COLLECTION_BATCH_SIZE` | `5` | Create one dataset PR after this many collected runs. |
|
| 126 |
| `RH_COLLECTION_MAX_BUNDLE_BYTES` | `20971520` | Drop a single run bundle if it exceeds this byte limit. |
|
| 127 |
| `PORT` | `7860` | Port used by Hugging Face Docker Spaces. |
|
|
|
|
| 28 |
| --- | --- |
|
| 29 |
| `black-yt/ResearchHarness` | Main open-source runtime, CLI, API server, frontend, docs, tests, and benchmark adapters. |
|
| 30 |
| `InternScience/ResearchHarness` | Hugging Face Space app that hosts the browser frontend with managed temporary workspaces. |
|
| 31 |
+
| `InternScience/ResearchHarness-Data` | Hugging Face dataset receiving collected hosted-run trajectory PRs. |
|
| 32 |
|
| 33 |
Maintenance rule:
|
| 34 |
|
|
|
|
| 108 |
| `SERPER_KEY` | WebSearch / ScholarSearch key from <https://serper.dev/>. |
|
| 109 |
| `JINA_KEY` | WebFetch key from <https://jina.ai/>. |
|
| 110 |
| `MINERU_TOKEN` | ReadPDF key from <https://mineru.net/>. |
|
| 111 |
+
| `HF_TOKEN` | Hugging Face token with write access to `InternScience/ResearchHarness-Data`. |
|
| 112 |
|
| 113 |
## Optional Runtime Variables
|
| 114 |
|
|
|
|
| 121 |
| `WEBFETCH_TIMEOUT_SECONDS` | `180` | Overall timeout for one WebFetch tool call. |
|
| 122 |
| `WEBFETCH_MAX_CHARS` | `30000` | Hard maximum characters returned by one URL-only WebFetch call. |
|
| 123 |
| `RH_COLLECTION_ENABLED` | `true` | Automatically collect completed hosted runs. |
|
| 124 |
+
| `RH_COLLECTION_DATASET_REPO` | `InternScience/ResearchHarness-Data` | Dataset repo that receives trajectory PRs. |
|
| 125 |
| `RH_COLLECTION_BATCH_SIZE` | `5` | Create one dataset PR after this many collected runs. |
|
| 126 |
| `RH_COLLECTION_MAX_BUNDLE_BYTES` | `20971520` | Drop a single run bundle if it exceeds this byte limit. |
|
| 127 |
| `PORT` | `7860` | Port used by Hugging Face Docker Spaces. |
|
app.py
CHANGED
|
@@ -39,7 +39,7 @@ def configure_space() -> None:
|
|
| 39 |
cleanup_max_runs=_int_env("RH_SPACE_MAX_RUNS", 40),
|
| 40 |
cleanup_interval_seconds=_int_env("RH_SPACE_CLEANUP_INTERVAL_SECONDS", 15 * 60),
|
| 41 |
collection_enabled=_bool_env("RH_COLLECTION_ENABLED", True),
|
| 42 |
-
collection_dataset_repo=os.getenv("RH_COLLECTION_DATASET_REPO", "
|
| 43 |
collection_batch_size=_int_env("RH_COLLECTION_BATCH_SIZE", 5),
|
| 44 |
collection_max_bundle_bytes=_int_env("RH_COLLECTION_MAX_BUNDLE_BYTES", 20 * 1024 * 1024),
|
| 45 |
)
|
|
|
|
| 39 |
cleanup_max_runs=_int_env("RH_SPACE_MAX_RUNS", 40),
|
| 40 |
cleanup_interval_seconds=_int_env("RH_SPACE_CLEANUP_INTERVAL_SECONDS", 15 * 60),
|
| 41 |
collection_enabled=_bool_env("RH_COLLECTION_ENABLED", True),
|
| 42 |
+
collection_dataset_repo=os.getenv("RH_COLLECTION_DATASET_REPO", "InternScience/ResearchHarness-Data"),
|
| 43 |
collection_batch_size=_int_env("RH_COLLECTION_BATCH_SIZE", 5),
|
| 44 |
collection_max_bundle_bytes=_int_env("RH_COLLECTION_MAX_BUNDLE_BYTES", 20 * 1024 * 1024),
|
| 45 |
)
|
check_space_runtime.py
CHANGED
|
@@ -19,7 +19,7 @@ def main() -> int:
|
|
| 19 |
cleanup_max_runs=2,
|
| 20 |
cleanup_interval_seconds=60,
|
| 21 |
collection_enabled=True,
|
| 22 |
-
collection_dataset_repo="
|
| 23 |
collection_batch_size=5,
|
| 24 |
collection_max_bundle_bytes=1024 * 1024,
|
| 25 |
)
|
|
|
|
| 19 |
cleanup_max_runs=2,
|
| 20 |
cleanup_interval_seconds=60,
|
| 21 |
collection_enabled=True,
|
| 22 |
+
collection_dataset_repo="InternScience/ResearchHarness-Data",
|
| 23 |
collection_batch_size=5,
|
| 24 |
collection_max_bundle_bytes=1024 * 1024,
|
| 25 |
)
|
frontend/local_server.py
CHANGED
|
@@ -44,7 +44,7 @@ FRONTEND_CLEANUP_RETENTION_SECONDS = 6 * 60 * 60
|
|
| 44 |
FRONTEND_CLEANUP_MAX_RUNS = 40
|
| 45 |
FRONTEND_CLEANUP_INTERVAL_SECONDS = 15 * 60
|
| 46 |
FRONTEND_COLLECTION_ENABLED = True
|
| 47 |
-
FRONTEND_COLLECTION_DATASET_REPO = "
|
| 48 |
FRONTEND_COLLECTION_BATCH_SIZE = 5
|
| 49 |
FRONTEND_COLLECTION_MAX_BUNDLE_BYTES = 20 * 1024 * 1024
|
| 50 |
_CLEANUP_THREAD_STARTED = False
|
|
|
|
| 44 |
FRONTEND_CLEANUP_MAX_RUNS = 40
|
| 45 |
FRONTEND_CLEANUP_INTERVAL_SECONDS = 15 * 60
|
| 46 |
FRONTEND_COLLECTION_ENABLED = True
|
| 47 |
+
FRONTEND_COLLECTION_DATASET_REPO = "InternScience/ResearchHarness-Data"
|
| 48 |
FRONTEND_COLLECTION_BATCH_SIZE = 5
|
| 49 |
FRONTEND_COLLECTION_MAX_BUNDLE_BYTES = 20 * 1024 * 1024
|
| 50 |
_CLEANUP_THREAD_STARTED = False
|