maris-ai-master / core-python /tests /test_huggingface_sync.py
MarisUK's picture
Maris AI model sync
f440f03 verified
from __future__ import annotations
import json
import os
import stat
import subprocess
import sys
from pathlib import Path
import pytest
SYNC_SCRIPT_PATH = Path(__file__).resolve().parents[2] / "huggingface" / "sync.sh"
@pytest.mark.parametrize(
(
"command",
"repo_env",
"repo_id",
"space_dir_name",
"local_space_dir_env",
"commit_message",
"sdk_env",
"sdk_value",
),
[
(
"upload-space",
"MARIS_AGENT_SPACE_REPO",
"MarisUK/maris.ai.agent",
"huggingface_space",
"MARIS_LOCAL_AGENT_DIR",
"Maris AI Space sync",
None,
"docker",
),
(
"upload-chat-space",
"MARIS_CHAT_SPACE_REPO",
"MarisUK/maris.ai.chat",
"huggingface_chat_space",
"MARIS_LOCAL_CHAT_AGENT_DIR",
"Maris AI chat Space sync",
None,
"docker",
),
(
"upload-human-training-space",
"MARIS_HUMAN_TRAINING_SPACE_REPO",
"MarisUK/maris.ai.human.training",
"huggingface_human_training_space",
"MARIS_LOCAL_HUMAN_TRAINING_SPACE_DIR",
"Maris AI human training Space sync",
None,
"docker",
),
(
"upload-space",
"MARIS_AGENT_SPACE_REPO",
"MarisUK/custom.space",
"huggingface_space",
"MARIS_LOCAL_AGENT_DIR",
"Maris AI Space sync",
"HF_SPACE_SDK",
"gradio",
),
],
)
def test_space_uploads_pass_space_sdk(
tmp_path: Path,
command: str,
repo_env: str,
repo_id: str,
space_dir_name: str,
local_space_dir_env: str,
commit_message: str,
sdk_env: str | None,
sdk_value: str,
) -> None:
repo_root = tmp_path / "repo"
(repo_root / "huggingface").mkdir(parents=True)
(repo_root / "core-python").mkdir()
(repo_root / space_dir_name).mkdir()
(repo_root / "huggingface" / "sync.sh").write_text(
SYNC_SCRIPT_PATH.read_text(encoding="utf-8"),
encoding="utf-8",
)
(repo_root / space_dir_name / "Dockerfile").write_text(
"FROM python:3.11-slim\n", encoding="utf-8"
)
(repo_root / space_dir_name / "README.md").write_text(
"---\nsdk: docker\n---\n",
encoding="utf-8",
)
fake_bin = tmp_path / "bin"
fake_bin.mkdir()
log_path = tmp_path / "python-log.json"
python_wrapper = fake_bin / "python3"
python_wrapper.write_text(
"\n".join(
[
f"#!{sys.executable}",
"import json, os, sys",
"log_path = os.environ['HF_TEST_LOG']",
"if len(sys.argv) > 1 and sys.argv[1] == '-c':",
" raise SystemExit(0)",
"if len(sys.argv) > 1 and sys.argv[1] == '-':",
" payload = {'argv': sys.argv[2:], 'stdin': sys.stdin.read()}",
" with open(log_path, 'w', encoding='utf-8') as handle:",
" json.dump(payload, handle)",
" raise SystemExit(0)",
"raise SystemExit(f'unexpected python3 invocation: {sys.argv!r}')",
"",
]
),
encoding="utf-8",
)
python_wrapper.chmod(python_wrapper.stat().st_mode | stat.S_IEXEC)
env = os.environ.copy()
env.update(
{
"PATH": f"{fake_bin}:{env['PATH']}",
"HF_TEST_LOG": str(log_path),
"MARIS_REPO_TOKEN": "token",
repo_env: repo_id,
local_space_dir_env: str(repo_root / space_dir_name),
}
)
if sdk_env is not None:
env[sdk_env] = sdk_value
subprocess.run(
["bash", str(repo_root / "huggingface" / "sync.sh"), command],
cwd=repo_root,
env=env,
check=True,
capture_output=True,
text=True,
)
payload = json.loads(log_path.read_text(encoding="utf-8"))
assert payload["argv"][0] == repo_id
assert payload["argv"][1] == "space"
assert payload["argv"][2].startswith("/tmp/")
assert payload["argv"][3] == commit_message
assert payload["argv"][4:] == ["token", "", sdk_value]
def test_upload_model_publishes_branch_suite_text_and_codex_repos(tmp_path: Path) -> None:
repo_root = tmp_path / "repo"
model_root = repo_root / "core-python" / "output" / "model"
(repo_root / "huggingface").mkdir(parents=True)
(repo_root / "core-python" / "output").mkdir(parents=True)
(model_root / "master").mkdir(parents=True)
(model_root / "coder").mkdir(parents=True)
(repo_root / "huggingface" / "sync.sh").write_text(
SYNC_SCRIPT_PATH.read_text(encoding="utf-8"),
encoding="utf-8",
)
(repo_root / "huggingface" / "model-card.md").write_text(
"# Model card\n",
encoding="utf-8",
)
(model_root / "config.json").write_text("{}", encoding="utf-8")
(model_root / "master" / "config.json").write_text("{}", encoding="utf-8")
(model_root / "coder" / "config.json").write_text("{}", encoding="utf-8")
(model_root / "branch-suite.json").write_text(
json.dumps(
{
"branches": {
"master": {"output_dir": "core-python/output/model/master"},
"coder": {"output_dir": "core-python/output/model/coder"},
}
}
),
encoding="utf-8",
)
fake_bin = tmp_path / "bin"
fake_bin.mkdir()
log_path = tmp_path / "python-log.jsonl"
python_wrapper = fake_bin / "python3"
python_wrapper.write_text(
"\n".join(
[
f"#!{sys.executable}",
"import json, os, sys",
"log_path = os.environ['HF_TEST_LOG']",
"entry = {'argv': sys.argv[1:], 'stdin': sys.stdin.read()}",
"with open(log_path, 'a', encoding='utf-8') as handle:",
" handle.write(json.dumps(entry) + '\\n')",
"if len(sys.argv) > 1 and sys.argv[1] == '-c':",
" raise SystemExit(0)",
"if len(sys.argv) > 1 and sys.argv[1] == '-':",
" if len(sys.argv) >= 7 and sys.argv[2].endswith('branch-suite.json'):",
" manifest_path = sys.argv[2]",
" repo_root = sys.argv[4]",
" text_repo = sys.argv[5]",
" codex_repo = sys.argv[6]",
" payload = json.load(open(manifest_path, encoding='utf-8'))",
" branches = payload.get('branches', {})",
" for branch_name, repo_id in (('master', text_repo), ('coder', codex_repo)):",
" output_dir = branches.get(branch_name, {}).get('output_dir', '')",
" if output_dir and repo_id:",
" print(f'{branch_name}\\t{repo_root}/{output_dir}\\t{repo_id}')",
" raise SystemExit(0)",
"raise SystemExit(f'unexpected python3 invocation: {sys.argv!r}')",
"",
]
),
encoding="utf-8",
)
python_wrapper.chmod(python_wrapper.stat().st_mode | stat.S_IEXEC)
env = os.environ.copy()
env.update(
{
"PATH": f"{fake_bin}:{env['PATH']}",
"HF_TEST_LOG": str(log_path),
"MARIS_REPO_TOKEN": "token",
"HF_MODEL_REPO": "MarisUK/maris-ai-master",
"HF_TEXT_MODEL_REPO": "MarisUK/maris-ai-text",
"HF_CODEX_MODEL_REPO": "MarisUK/maris-ai-codex",
"HF_LOCAL_MODEL_DIR": str(model_root),
}
)
subprocess.run(
["bash", str(repo_root / "huggingface" / "sync.sh"), "upload-model"],
cwd=repo_root,
env=env,
check=True,
capture_output=True,
text=True,
)
payloads = [
json.loads(line)
for line in log_path.read_text(encoding="utf-8").splitlines()
if line.strip()
]
workspace_uploads = [
payload
for payload in payloads
if payload["argv"] and payload["argv"][0] == "-" and len(payload["argv"]) >= 5 and payload["argv"][2] == "model"
]
card_uploads = [
payload
for payload in payloads
if payload["argv"]
and payload["argv"][0] == "-"
and len(payload["argv"]) == 5
and payload["argv"][2] == str(repo_root / "huggingface" / "model-card.md")
]
assert [(payload["argv"][1], payload["argv"][4]) for payload in workspace_uploads] == [
("MarisUK/maris-ai-master", "Maris AI model sync"),
("MarisUK/maris-ai-text", "Maris AI model sync (master)"),
("MarisUK/maris-ai-codex", "Maris AI model sync (coder)"),
]
assert all(payload["argv"][3].startswith("/tmp/") for payload in workspace_uploads)
assert [(payload["argv"][1], payload["argv"][3], payload["argv"][4]) for payload in card_uploads] == [
("MarisUK/maris-ai-master", "Maris AI model sync", "token"),
("MarisUK/maris-ai-text", "Maris AI model sync (master)", "token"),
("MarisUK/maris-ai-codex", "Maris AI model sync (coder)", "token"),
]
def test_upload_model_publishes_fallback_master_and_coder_dirs_without_manifest(tmp_path: Path) -> None:
repo_root = tmp_path / "repo"
model_root = repo_root / "core-python" / "output" / "model"
(repo_root / "huggingface").mkdir(parents=True)
(repo_root / "core-python" / "output").mkdir(parents=True)
(model_root / "master").mkdir(parents=True)
(model_root / "coder").mkdir(parents=True)
(repo_root / "huggingface" / "sync.sh").write_text(
SYNC_SCRIPT_PATH.read_text(encoding="utf-8"),
encoding="utf-8",
)
(repo_root / "huggingface" / "model-card.md").write_text(
"# Model card\n",
encoding="utf-8",
)
(model_root / "config.json").write_text("{}", encoding="utf-8")
(model_root / "master" / "config.json").write_text("{}", encoding="utf-8")
(model_root / "coder" / "config.json").write_text("{}", encoding="utf-8")
fake_bin = tmp_path / "bin"
fake_bin.mkdir()
log_path = tmp_path / "python-log.jsonl"
python_wrapper = fake_bin / "python3"
python_wrapper.write_text(
"\n".join(
[
f"#!{sys.executable}",
"import json, os, sys",
"log_path = os.environ['HF_TEST_LOG']",
"entry = {'argv': sys.argv[1:], 'stdin': sys.stdin.read()}",
"with open(log_path, 'a', encoding='utf-8') as handle:",
" handle.write(json.dumps(entry) + '\\n')",
"if len(sys.argv) > 1 and sys.argv[1] == '-c':",
" raise SystemExit(0)",
"if len(sys.argv) > 1 and sys.argv[1] == '-':",
" if len(sys.argv) < 7 or not sys.argv[2].endswith('branch-suite.json'):",
" raise SystemExit(0)",
" model_root = sys.argv[3]",
" text_repo = sys.argv[5]",
" codex_repo = sys.argv[6]",
" print(f'master\\t{model_root}/master\\t{text_repo}')",
" print(f'coder\\t{model_root}/coder\\t{codex_repo}')",
" raise SystemExit(0)",
"raise SystemExit(f'unexpected python3 invocation: {sys.argv!r}')",
"",
]
),
encoding="utf-8",
)
python_wrapper.chmod(python_wrapper.stat().st_mode | stat.S_IEXEC)
env = os.environ.copy()
env.update(
{
"PATH": f"{fake_bin}:{env['PATH']}",
"HF_TEST_LOG": str(log_path),
"MARIS_REPO_TOKEN": "token",
"HF_MODEL_REPO": "MarisUK/maris-ai-master",
"HF_TEXT_MODEL_REPO": "MarisUK/maris-ai-text",
"HF_CODEX_MODEL_REPO": "MarisUK/maris-ai-codex",
"HF_LOCAL_MODEL_DIR": str(model_root),
}
)
subprocess.run(
["bash", str(repo_root / "huggingface" / "sync.sh"), "upload-model"],
cwd=repo_root,
env=env,
check=True,
capture_output=True,
text=True,
)
payloads = [
json.loads(line)
for line in log_path.read_text(encoding="utf-8").splitlines()
if line.strip()
]
workspace_uploads = [
payload
for payload in payloads
if payload["argv"] and payload["argv"][0] == "-" and len(payload["argv"]) >= 5 and payload["argv"][2] == "model"
]
assert [(payload["argv"][1], payload["argv"][4]) for payload in workspace_uploads] == [
("MarisUK/maris-ai-master", "Maris AI model sync"),
("MarisUK/maris-ai-text", "Maris AI model sync (master)"),
("MarisUK/maris-ai-codex", "Maris AI model sync (coder)"),
]
def test_upload_dataset_also_publishes_optional_eval_repo(tmp_path: Path) -> None:
repo_root = tmp_path / "repo"
(repo_root / "huggingface").mkdir(parents=True)
(repo_root / "core-python" / "scripts").mkdir(parents=True)
(repo_root / "data" / "conversation").mkdir(parents=True)
(repo_root / "eval-data" / "conversation").mkdir(parents=True)
(repo_root / "huggingface" / "sync.sh").write_text(
SYNC_SCRIPT_PATH.read_text(encoding="utf-8"),
encoding="utf-8",
)
(repo_root / "huggingface" / "dataset-card.md").write_text(
"# Dataset card\n",
encoding="utf-8",
)
(repo_root / "huggingface" / "eval-dataset-card.md").write_text(
"# Eval dataset card\n",
encoding="utf-8",
)
(repo_root / "core-python" / "scripts" / "validate_datasets.py").write_text(
"print('ok')\n",
encoding="utf-8",
)
(repo_root / "data" / "conversation" / "sample.jsonl").write_text(
'{"timestamp":"2026-04-06T00:02:00Z","type":"conversation","session_id":"main-1","user":"u","assistant":"a","language":"lv","source":"test"}\n',
encoding="utf-8",
)
(repo_root / "eval-data" / "conversation" / "sample.jsonl").write_text(
'{"timestamp":"2026-04-06T00:03:00Z","type":"conversation","session_id":"eval-1","user":"u","assistant":"a","language":"lv","source":"test"}\n',
encoding="utf-8",
)
fake_bin = tmp_path / "bin"
fake_bin.mkdir()
log_path = tmp_path / "python-log.jsonl"
python_wrapper = fake_bin / "python3"
python_wrapper.write_text(
"\n".join(
[
f"#!{sys.executable}",
"import json, os, sys",
"log_path = os.environ['HF_TEST_LOG']",
"entry = {'argv': sys.argv[1:], 'stdin': sys.stdin.read()}",
"with open(log_path, 'a', encoding='utf-8') as handle:",
" handle.write(json.dumps(entry) + '\\n')",
"if len(sys.argv) > 1 and sys.argv[1] == '-c':",
" raise SystemExit(0)",
"if len(sys.argv) > 1 and sys.argv[1].endswith('validate_datasets.py'):",
" raise SystemExit(0)",
"if len(sys.argv) > 1 and sys.argv[1] == '-':",
" raise SystemExit(0)",
"raise SystemExit(f'unexpected python3 invocation: {sys.argv!r}')",
"",
]
),
encoding="utf-8",
)
python_wrapper.chmod(python_wrapper.stat().st_mode | stat.S_IEXEC)
env = os.environ.copy()
env.update(
{
"PATH": f"{fake_bin}:{env['PATH']}",
"HF_TEST_LOG": str(log_path),
"MARIS_REPO_TOKEN": "token",
"HF_DATASET_REPO": "MarisUK/maris-ai-memory",
"HF_EVAL_DATASET_REPO": "MarisUK/maris-ai-evals",
"HF_LOCAL_DATASET_DIR": str(repo_root / "data"),
"HF_LOCAL_EVAL_DATASET_DIR": str(repo_root / "eval-data"),
}
)
subprocess.run(
["bash", str(repo_root / "huggingface" / "sync.sh"), "upload-dataset"],
cwd=repo_root,
env=env,
check=True,
capture_output=True,
text=True,
)
payloads = [
json.loads(line)
for line in log_path.read_text(encoding="utf-8").splitlines()
if line.strip()
]
upload_payloads = [
payload for payload in payloads if payload["argv"] and payload["argv"][0] == "-"
]
validate_payloads = [
payload
for payload in payloads
if payload["argv"] and payload["argv"][0].endswith("validate_datasets.py")
]
assert [payload["argv"][1:] for payload in validate_payloads] == [
["--profile", "bootstrap", str(repo_root / "data")],
["--profile", "eval", str(repo_root / "eval-data")],
]
assert len(upload_payloads) == 4
assert upload_payloads[0]["argv"][1:5] == [
"MarisUK/maris-ai-memory",
"dataset",
upload_payloads[0]["argv"][3],
"Maris AI dataset sync",
]
assert upload_payloads[0]["argv"][3].startswith("/tmp/")
assert upload_payloads[1]["argv"][1:5] == [
"MarisUK/maris-ai-memory",
str(repo_root / "huggingface" / "dataset-card.md"),
"Maris AI dataset sync",
"token",
]
assert upload_payloads[2]["argv"][1:5] == [
"MarisUK/maris-ai-evals",
"dataset",
upload_payloads[2]["argv"][3],
"Maris AI eval dataset sync",
]
assert upload_payloads[2]["argv"][3].startswith("/tmp/")
assert upload_payloads[3]["argv"][1:5] == [
"MarisUK/maris-ai-evals",
str(repo_root / "huggingface" / "eval-dataset-card.md"),
"Maris AI eval dataset sync",
"token",
]
def test_upload_dataset_can_publish_global_memory_eval_and_benchmark_repos(
tmp_path: Path,
) -> None:
repo_root = tmp_path / "repo"
(repo_root / "huggingface").mkdir(parents=True)
(repo_root / "core-python" / "scripts").mkdir(parents=True)
for dataset_dir in ("data", "eval-data", "benchmark-data"):
(repo_root / dataset_dir / "conversation").mkdir(parents=True)
(repo_root / "huggingface" / "sync.sh").write_text(
SYNC_SCRIPT_PATH.read_text(encoding="utf-8"),
encoding="utf-8",
)
(repo_root / "huggingface" / "dataset-card.md").write_text(
"# Dataset card\n",
encoding="utf-8",
)
(repo_root / "huggingface" / "global-memory-dataset-card.md").write_text(
"# Global memory dataset card\n",
encoding="utf-8",
)
(repo_root / "huggingface" / "eval-dataset-card.md").write_text(
"# Eval dataset card\n",
encoding="utf-8",
)
(repo_root / "huggingface" / "benchmark-dataset-card.md").write_text(
"# Benchmark dataset card\n",
encoding="utf-8",
)
(repo_root / "core-python" / "scripts" / "validate_datasets.py").write_text(
"print('ok')\n",
encoding="utf-8",
)
(repo_root / "data" / "conversation" / "sample.jsonl").write_text(
'{"timestamp":"2026-04-06T00:02:00Z","type":"conversation","session_id":"main-1","user":"u","assistant":"a","language":"lv","source":"test"}\n',
encoding="utf-8",
)
eval_record = (
'{"timestamp":"2026-04-06T00:03:00Z","type":"conversation","session_id":"eval-1",'
'"user":"u","assistant":"a","language":"lv","source":"test","task_id":"eval-1",'
'"benchmark_version":"maris-evals-v1","suite":"sanity","difficulty":"easy",'
'"evaluation_mode":"reference-review","risk_level":"low","expected_behavior":["ok"],'
'"scoring_hints":["ok"],"reference_answer":"a","acceptance_criteria":["ok"]}\n'
)
(repo_root / "eval-data" / "conversation" / "sample.jsonl").write_text(
eval_record,
encoding="utf-8",
)
benchmark_record = (
'{"timestamp":"2026-04-06T00:04:00Z","type":"conversation","session_id":"bench-1",'
'"user":"u","assistant":"a","language":"en","source":"test","task_id":"bench-1",'
'"benchmark_version":"maris-benchmark-v1","suite":"release","difficulty":"medium",'
'"evaluation_mode":"reference-review","risk_level":"high","expected_behavior":["ok"],'
'"scoring_hints":["ok"],"reference_answer":"a","acceptance_criteria":["ok"]}\n'
)
(repo_root / "benchmark-data" / "conversation" / "sample.jsonl").write_text(
benchmark_record,
encoding="utf-8",
)
fake_bin = tmp_path / "bin"
fake_bin.mkdir()
log_path = tmp_path / "python-log.jsonl"
python_wrapper = fake_bin / "python3"
python_wrapper.write_text(
"\n".join(
[
f"#!{sys.executable}",
"import json, os, sys",
"log_path = os.environ['HF_TEST_LOG']",
"entry = {'argv': sys.argv[1:], 'stdin': sys.stdin.read()}",
"with open(log_path, 'a', encoding='utf-8') as handle:",
" handle.write(json.dumps(entry) + '\\n')",
"if len(sys.argv) > 1 and sys.argv[1] == '-c':",
" raise SystemExit(0)",
"if len(sys.argv) > 1 and sys.argv[1].endswith('validate_datasets.py'):",
" raise SystemExit(0)",
"if len(sys.argv) > 1 and sys.argv[1] == '-':",
" raise SystemExit(0)",
"raise SystemExit(f'unexpected python3 invocation: {sys.argv!r}')",
"",
]
),
encoding="utf-8",
)
python_wrapper.chmod(python_wrapper.stat().st_mode | stat.S_IEXEC)
env = os.environ.copy()
env.update(
{
"PATH": f"{fake_bin}:{env['PATH']}",
"HF_TEST_LOG": str(log_path),
"MARIS_REPO_TOKEN": "token",
"HF_DATASET_REPO": "MarisUK/maris-ai-memory",
"HF_GLOBAL_MEMORY_REPO": "MarisUK/maris-ai-memory",
"HF_EVAL_DATASET_REPO": "MarisUK/maris-ai-evals",
"HF_BENCHMARK_DATASET_REPO": "MarisUK/maris-ai-benchmark",
"HF_LOCAL_DATASET_DIR": str(repo_root / "data"),
"HF_LOCAL_GLOBAL_DATASET_DIR": str(repo_root / "data"),
"HF_LOCAL_EVAL_DATASET_DIR": str(repo_root / "eval-data"),
"HF_LOCAL_BENCHMARK_DATASET_DIR": str(repo_root / "benchmark-data"),
}
)
result = subprocess.run(
["bash", str(repo_root / "huggingface" / "sync.sh"), "upload-dataset"],
cwd=repo_root,
env=env,
check=True,
capture_output=True,
text=True,
)
payloads = [
json.loads(line)
for line in log_path.read_text(encoding="utf-8").splitlines()
if line.strip()
]
upload_payloads = [
payload for payload in payloads if payload["argv"] and payload["argv"][0] == "-"
]
validate_payloads = [
payload
for payload in payloads
if payload["argv"] and payload["argv"][0].endswith("validate_datasets.py")
]
assert [payload["argv"][1:] for payload in validate_payloads] == [
["--profile", "bootstrap", str(repo_root / "data")],
["--profile", "eval", str(repo_root / "eval-data")],
["--profile", "eval", str(repo_root / "benchmark-data")],
]
assert len(upload_payloads) == 6
assert "Izlaižu global memory upload" in result.stdout
assert upload_payloads[4]["argv"][1:5] == [
"MarisUK/maris-ai-benchmark",
"dataset",
upload_payloads[4]["argv"][3],
"Maris AI benchmark dataset sync",
]
assert upload_payloads[5]["argv"][1:5] == [
"MarisUK/maris-ai-benchmark",
str(repo_root / "huggingface" / "benchmark-dataset-card.md"),
"Maris AI benchmark dataset sync",
"token",
]
def test_upload_dataset_uses_bundled_eval_data_by_default(tmp_path: Path) -> None:
repo_root = tmp_path / "repo"
(repo_root / "huggingface").mkdir(parents=True)
(repo_root / "core-python" / "scripts").mkdir(parents=True)
(repo_root / "data" / "conversation").mkdir(parents=True)
(repo_root / "eval-data" / "conversation").mkdir(parents=True)
(repo_root / "huggingface" / "sync.sh").write_text(
SYNC_SCRIPT_PATH.read_text(encoding="utf-8"),
encoding="utf-8",
)
(repo_root / "huggingface" / "dataset-card.md").write_text(
"# Dataset card\n",
encoding="utf-8",
)
(repo_root / "huggingface" / "eval-dataset-card.md").write_text(
"# Eval dataset card\n",
encoding="utf-8",
)
(repo_root / "core-python" / "scripts" / "validate_datasets.py").write_text(
"print('ok')\n",
encoding="utf-8",
)
(repo_root / "data" / "conversation" / "sample.jsonl").write_text(
'{"timestamp":"2026-04-06T00:02:00Z","type":"conversation","session_id":"main-1","user":"u","assistant":"a","language":"lv","source":"test"}\n',
encoding="utf-8",
)
(repo_root / "eval-data" / "conversation" / "sample.jsonl").write_text(
'{"timestamp":"2026-04-06T00:03:00Z","type":"conversation","session_id":"eval-1","user":"u","assistant":"a","language":"lv","source":"test"}\n',
encoding="utf-8",
)
fake_bin = tmp_path / "bin"
fake_bin.mkdir()
log_path = tmp_path / "python-log.jsonl"
python_wrapper = fake_bin / "python3"
python_wrapper.write_text(
"\n".join(
[
f"#!{sys.executable}",
"import json, os, sys",
"log_path = os.environ['HF_TEST_LOG']",
"entry = {'argv': sys.argv[1:], 'stdin': sys.stdin.read()}",
"with open(log_path, 'a', encoding='utf-8') as handle:",
" handle.write(json.dumps(entry) + '\\n')",
"if len(sys.argv) > 1 and sys.argv[1] == '-c':",
" raise SystemExit(0)",
"if len(sys.argv) > 1 and sys.argv[1].endswith('validate_datasets.py'):",
" raise SystemExit(0)",
"if len(sys.argv) > 1 and sys.argv[1] == '-':",
" raise SystemExit(0)",
"raise SystemExit(f'unexpected python3 invocation: {sys.argv!r}')",
"",
]
),
encoding="utf-8",
)
python_wrapper.chmod(python_wrapper.stat().st_mode | stat.S_IEXEC)
env = os.environ.copy()
env.update(
{
"PATH": f"{fake_bin}:{env['PATH']}",
"HF_TEST_LOG": str(log_path),
"MARIS_REPO_TOKEN": "token",
"HF_DATASET_REPO": "MarisUK/maris-ai-memory",
"HF_EVAL_DATASET_REPO": "MarisUK/maris-ai-evals",
"HF_LOCAL_DATASET_DIR": str(repo_root / "data"),
}
)
subprocess.run(
["bash", str(repo_root / "huggingface" / "sync.sh"), "upload-dataset"],
cwd=repo_root,
env=env,
check=True,
capture_output=True,
text=True,
)
payloads = [
json.loads(line)
for line in log_path.read_text(encoding="utf-8").splitlines()
if line.strip()
]
validate_payloads = [
payload
for payload in payloads
if payload["argv"] and payload["argv"][0].endswith("validate_datasets.py")
]
assert [payload["argv"][1:] for payload in validate_payloads] == [
["--profile", "bootstrap", str(repo_root / "data")],
["--profile", "eval", str(repo_root / "eval-data")],
]
@pytest.mark.parametrize("command", ["upload-benchmark-dataset", "upload-benchmark-data"])
def test_upload_benchmark_dataset_only_publishes_benchmark_repo(
tmp_path: Path, command: str
) -> None:
repo_root = tmp_path / "repo"
(repo_root / "huggingface").mkdir(parents=True)
(repo_root / "core-python" / "scripts").mkdir(parents=True)
(repo_root / "benchmark-data" / "conversation").mkdir(parents=True)
(repo_root / "huggingface" / "sync.sh").write_text(
SYNC_SCRIPT_PATH.read_text(encoding="utf-8"),
encoding="utf-8",
)
(repo_root / "huggingface" / "benchmark-dataset-card.md").write_text(
"# Benchmark dataset card\n",
encoding="utf-8",
)
(repo_root / "core-python" / "scripts" / "validate_datasets.py").write_text(
"print('ok')\n",
encoding="utf-8",
)
(repo_root / "benchmark-data" / "conversation" / "sample.jsonl").write_text(
'{"timestamp":"2026-04-06T00:04:00Z","type":"conversation","session_id":"bench-1","user":"u","assistant":"a","language":"en","source":"test","task_id":"bench-1","benchmark_version":"maris-benchmark-v1","suite":"release","difficulty":"medium","evaluation_mode":"reference-review","risk_level":"high","expected_behavior":["ok"],"scoring_hints":["ok"],"reference_answer":"a","acceptance_criteria":["ok"]}\n',
encoding="utf-8",
)
fake_bin = tmp_path / "bin"
fake_bin.mkdir()
log_path = tmp_path / "python-log.jsonl"
python_wrapper = fake_bin / "python3"
python_wrapper.write_text(
"\n".join(
[
f"#!{sys.executable}",
"import json, os, sys",
"log_path = os.environ['HF_TEST_LOG']",
"entry = {'argv': sys.argv[1:], 'stdin': sys.stdin.read()}",
"with open(log_path, 'a', encoding='utf-8') as handle:",
" handle.write(json.dumps(entry) + '\\n')",
"if len(sys.argv) > 1 and sys.argv[1] == '-c':",
" raise SystemExit(0)",
"if len(sys.argv) > 1 and sys.argv[1].endswith('validate_datasets.py'):",
" raise SystemExit(0)",
"if len(sys.argv) > 1 and sys.argv[1] == '-':",
" raise SystemExit(0)",
"raise SystemExit(f'unexpected python3 invocation: {sys.argv!r}')",
"",
]
),
encoding="utf-8",
)
python_wrapper.chmod(python_wrapper.stat().st_mode | stat.S_IEXEC)
env = os.environ.copy()
env.update(
{
"PATH": f"{fake_bin}:{env['PATH']}",
"HF_TEST_LOG": str(log_path),
"MARIS_REPO_TOKEN": "token",
"HF_DATASET_REPO": "MarisUK/maris-ai-memory",
"HF_BENCHMARK_DATASET_REPO": "MarisUK/maris-ai-benchmark",
"HF_LOCAL_BENCHMARK_DATASET_DIR": str(repo_root / "benchmark-data"),
}
)
subprocess.run(
["bash", str(repo_root / "huggingface" / "sync.sh"), command],
cwd=repo_root,
env=env,
check=True,
capture_output=True,
text=True,
)
payloads = [
json.loads(line)
for line in log_path.read_text(encoding="utf-8").splitlines()
if line.strip()
]
upload_payloads = [
payload for payload in payloads if payload["argv"] and payload["argv"][0] == "-"
]
validate_payloads = [
payload
for payload in payloads
if payload["argv"] and payload["argv"][0].endswith("validate_datasets.py")
]
assert [payload["argv"][1:] for payload in validate_payloads] == [
["--profile", "eval", str(repo_root / "benchmark-data")]
]
assert len(upload_payloads) == 2
assert upload_payloads[0]["argv"][1:5] == [
"MarisUK/maris-ai-benchmark",
"dataset",
upload_payloads[0]["argv"][3],
"Maris AI benchmark dataset sync",
]
assert upload_payloads[1]["argv"][1:5] == [
"MarisUK/maris-ai-benchmark",
str(repo_root / "huggingface" / "benchmark-dataset-card.md"),
"Maris AI benchmark dataset sync",
"token",
]
@pytest.mark.parametrize("command", ["upload-eval-dataset", "upload-evals-data"])
def test_upload_eval_dataset_only_publishes_eval_repo(tmp_path: Path, command: str) -> None:
repo_root = tmp_path / "repo"
(repo_root / "huggingface").mkdir(parents=True)
(repo_root / "core-python" / "scripts").mkdir(parents=True)
(repo_root / "data" / "conversation").mkdir(parents=True)
(repo_root / "eval-data" / "conversation").mkdir(parents=True)
(repo_root / "huggingface" / "sync.sh").write_text(
SYNC_SCRIPT_PATH.read_text(encoding="utf-8"),
encoding="utf-8",
)
(repo_root / "huggingface" / "dataset-card.md").write_text(
"# Dataset card\n",
encoding="utf-8",
)
(repo_root / "huggingface" / "eval-dataset-card.md").write_text(
"# Eval dataset card\n",
encoding="utf-8",
)
(repo_root / "core-python" / "scripts" / "validate_datasets.py").write_text(
"print('ok')\n",
encoding="utf-8",
)
(repo_root / "data" / "conversation" / "sample.jsonl").write_text(
'{"timestamp":"2026-04-06T00:02:00Z","type":"conversation","session_id":"main-1","user":"u","assistant":"a","language":"lv","source":"test"}\n',
encoding="utf-8",
)
(repo_root / "eval-data" / "conversation" / "sample.jsonl").write_text(
'{"timestamp":"2026-04-06T00:03:00Z","type":"conversation","session_id":"eval-1","user":"u","assistant":"a","language":"lv","source":"test"}\n',
encoding="utf-8",
)
fake_bin = tmp_path / "bin"
fake_bin.mkdir()
log_path = tmp_path / "python-log.jsonl"
python_wrapper = fake_bin / "python3"
python_wrapper.write_text(
"\n".join(
[
f"#!{sys.executable}",
"import json, os, sys",
"log_path = os.environ['HF_TEST_LOG']",
"entry = {'argv': sys.argv[1:], 'stdin': sys.stdin.read()}",
"with open(log_path, 'a', encoding='utf-8') as handle:",
" handle.write(json.dumps(entry) + '\\n')",
"if len(sys.argv) > 1 and sys.argv[1] == '-c':",
" raise SystemExit(0)",
"if len(sys.argv) > 1 and sys.argv[1].endswith('validate_datasets.py'):",
" raise SystemExit(0)",
"if len(sys.argv) > 1 and sys.argv[1] == '-':",
" raise SystemExit(0)",
"raise SystemExit(f'unexpected python3 invocation: {sys.argv!r}')",
"",
]
),
encoding="utf-8",
)
python_wrapper.chmod(python_wrapper.stat().st_mode | stat.S_IEXEC)
env = os.environ.copy()
env.update(
{
"PATH": f"{fake_bin}:{env['PATH']}",
"HF_TEST_LOG": str(log_path),
"MARIS_REPO_TOKEN": "token",
"HF_DATASET_REPO": "MarisUK/maris-ai-memory",
"HF_EVAL_DATASET_REPO": "MarisUK/maris-ai-evals",
"HF_LOCAL_DATASET_DIR": str(repo_root / "data"),
"HF_LOCAL_EVAL_DATASET_DIR": str(repo_root / "eval-data"),
}
)
subprocess.run(
["bash", str(repo_root / "huggingface" / "sync.sh"), command],
cwd=repo_root,
env=env,
check=True,
capture_output=True,
text=True,
)
payloads = [
json.loads(line)
for line in log_path.read_text(encoding="utf-8").splitlines()
if line.strip()
]
upload_payloads = [
payload for payload in payloads if payload["argv"] and payload["argv"][0] == "-"
]
validate_payloads = [
payload
for payload in payloads
if payload["argv"] and payload["argv"][0].endswith("validate_datasets.py")
]
assert [payload["argv"][1:] for payload in validate_payloads] == [
["--profile", "eval", str(repo_root / "eval-data")]
]
assert len(upload_payloads) == 2
assert upload_payloads[0]["argv"][1:5] == [
"MarisUK/maris-ai-evals",
"dataset",
upload_payloads[0]["argv"][3],
"Maris AI eval dataset sync",
]
assert upload_payloads[0]["argv"][3].startswith("/tmp/")
assert upload_payloads[1]["argv"][1:5] == [
"MarisUK/maris-ai-evals",
str(repo_root / "huggingface" / "eval-dataset-card.md"),
"Maris AI eval dataset sync",
"token",
]