| from __future__ import annotations |
|
|
| import json |
| import os |
| import stat |
| import subprocess |
| import sys |
| from pathlib import Path |
|
|
| import pytest |
|
|
| SYNC_SCRIPT_PATH = Path(__file__).resolve().parents[2] / "huggingface" / "sync.sh" |
|
|
|
|
| @pytest.mark.parametrize( |
| ( |
| "command", |
| "repo_env", |
| "repo_id", |
| "space_dir_name", |
| "local_space_dir_env", |
| "commit_message", |
| "sdk_env", |
| "sdk_value", |
| ), |
| [ |
| ( |
| "upload-space", |
| "MARIS_AGENT_SPACE_REPO", |
| "MarisUK/maris.ai.agent", |
| "huggingface_space", |
| "MARIS_LOCAL_AGENT_DIR", |
| "Maris AI Space sync", |
| None, |
| "docker", |
| ), |
| ( |
| "upload-chat-space", |
| "MARIS_CHAT_SPACE_REPO", |
| "MarisUK/maris.ai.chat", |
| "huggingface_chat_space", |
| "MARIS_LOCAL_CHAT_AGENT_DIR", |
| "Maris AI chat Space sync", |
| None, |
| "docker", |
| ), |
| ( |
| "upload-human-training-space", |
| "MARIS_HUMAN_TRAINING_SPACE_REPO", |
| "MarisUK/maris.ai.human.training", |
| "huggingface_human_training_space", |
| "MARIS_LOCAL_HUMAN_TRAINING_SPACE_DIR", |
| "Maris AI human training Space sync", |
| None, |
| "docker", |
| ), |
| ( |
| "upload-space", |
| "MARIS_AGENT_SPACE_REPO", |
| "MarisUK/custom.space", |
| "huggingface_space", |
| "MARIS_LOCAL_AGENT_DIR", |
| "Maris AI Space sync", |
| "HF_SPACE_SDK", |
| "gradio", |
| ), |
| ], |
| ) |
| def test_space_uploads_pass_space_sdk( |
| tmp_path: Path, |
| command: str, |
| repo_env: str, |
| repo_id: str, |
| space_dir_name: str, |
| local_space_dir_env: str, |
| commit_message: str, |
| sdk_env: str | None, |
| sdk_value: str, |
| ) -> None: |
| repo_root = tmp_path / "repo" |
| (repo_root / "huggingface").mkdir(parents=True) |
| (repo_root / "core-python").mkdir() |
| (repo_root / space_dir_name).mkdir() |
| (repo_root / "huggingface" / "sync.sh").write_text( |
| SYNC_SCRIPT_PATH.read_text(encoding="utf-8"), |
| encoding="utf-8", |
| ) |
| (repo_root / space_dir_name / "Dockerfile").write_text( |
| "FROM python:3.11-slim\n", encoding="utf-8" |
| ) |
| (repo_root / space_dir_name / "README.md").write_text( |
| "---\nsdk: docker\n---\n", |
| encoding="utf-8", |
| ) |
|
|
| fake_bin = tmp_path / "bin" |
| fake_bin.mkdir() |
| log_path = tmp_path / "python-log.json" |
| python_wrapper = fake_bin / "python3" |
| python_wrapper.write_text( |
| "\n".join( |
| [ |
| f"#!{sys.executable}", |
| "import json, os, sys", |
| "log_path = os.environ['HF_TEST_LOG']", |
| "if len(sys.argv) > 1 and sys.argv[1] == '-c':", |
| " raise SystemExit(0)", |
| "if len(sys.argv) > 1 and sys.argv[1] == '-':", |
| " payload = {'argv': sys.argv[2:], 'stdin': sys.stdin.read()}", |
| " with open(log_path, 'w', encoding='utf-8') as handle:", |
| " json.dump(payload, handle)", |
| " raise SystemExit(0)", |
| "raise SystemExit(f'unexpected python3 invocation: {sys.argv!r}')", |
| "", |
| ] |
| ), |
| encoding="utf-8", |
| ) |
| python_wrapper.chmod(python_wrapper.stat().st_mode | stat.S_IEXEC) |
|
|
| env = os.environ.copy() |
| env.update( |
| { |
| "PATH": f"{fake_bin}:{env['PATH']}", |
| "HF_TEST_LOG": str(log_path), |
| "MARIS_REPO_TOKEN": "token", |
| repo_env: repo_id, |
| local_space_dir_env: str(repo_root / space_dir_name), |
| } |
| ) |
| if sdk_env is not None: |
| env[sdk_env] = sdk_value |
|
|
| subprocess.run( |
| ["bash", str(repo_root / "huggingface" / "sync.sh"), command], |
| cwd=repo_root, |
| env=env, |
| check=True, |
| capture_output=True, |
| text=True, |
| ) |
|
|
| payload = json.loads(log_path.read_text(encoding="utf-8")) |
| assert payload["argv"][0] == repo_id |
| assert payload["argv"][1] == "space" |
| assert payload["argv"][2].startswith("/tmp/") |
| assert payload["argv"][3] == commit_message |
| assert payload["argv"][4:] == ["token", "", sdk_value] |
|
|
|
|
| def test_upload_model_publishes_branch_suite_text_and_codex_repos(tmp_path: Path) -> None: |
| repo_root = tmp_path / "repo" |
| model_root = repo_root / "core-python" / "output" / "model" |
| (repo_root / "huggingface").mkdir(parents=True) |
| (repo_root / "core-python" / "output").mkdir(parents=True) |
| (model_root / "master").mkdir(parents=True) |
| (model_root / "coder").mkdir(parents=True) |
| (repo_root / "huggingface" / "sync.sh").write_text( |
| SYNC_SCRIPT_PATH.read_text(encoding="utf-8"), |
| encoding="utf-8", |
| ) |
| (repo_root / "huggingface" / "model-card.md").write_text( |
| "# Model card\n", |
| encoding="utf-8", |
| ) |
| (model_root / "config.json").write_text("{}", encoding="utf-8") |
| (model_root / "master" / "config.json").write_text("{}", encoding="utf-8") |
| (model_root / "coder" / "config.json").write_text("{}", encoding="utf-8") |
| (model_root / "branch-suite.json").write_text( |
| json.dumps( |
| { |
| "branches": { |
| "master": {"output_dir": "core-python/output/model/master"}, |
| "coder": {"output_dir": "core-python/output/model/coder"}, |
| } |
| } |
| ), |
| encoding="utf-8", |
| ) |
|
|
| fake_bin = tmp_path / "bin" |
| fake_bin.mkdir() |
| log_path = tmp_path / "python-log.jsonl" |
| python_wrapper = fake_bin / "python3" |
| python_wrapper.write_text( |
| "\n".join( |
| [ |
| f"#!{sys.executable}", |
| "import json, os, sys", |
| "log_path = os.environ['HF_TEST_LOG']", |
| "entry = {'argv': sys.argv[1:], 'stdin': sys.stdin.read()}", |
| "with open(log_path, 'a', encoding='utf-8') as handle:", |
| " handle.write(json.dumps(entry) + '\\n')", |
| "if len(sys.argv) > 1 and sys.argv[1] == '-c':", |
| " raise SystemExit(0)", |
| "if len(sys.argv) > 1 and sys.argv[1] == '-':", |
| " if len(sys.argv) >= 7 and sys.argv[2].endswith('branch-suite.json'):", |
| " manifest_path = sys.argv[2]", |
| " repo_root = sys.argv[4]", |
| " text_repo = sys.argv[5]", |
| " codex_repo = sys.argv[6]", |
| " payload = json.load(open(manifest_path, encoding='utf-8'))", |
| " branches = payload.get('branches', {})", |
| " for branch_name, repo_id in (('master', text_repo), ('coder', codex_repo)):", |
| " output_dir = branches.get(branch_name, {}).get('output_dir', '')", |
| " if output_dir and repo_id:", |
| " print(f'{branch_name}\\t{repo_root}/{output_dir}\\t{repo_id}')", |
| " raise SystemExit(0)", |
| "raise SystemExit(f'unexpected python3 invocation: {sys.argv!r}')", |
| "", |
| ] |
| ), |
| encoding="utf-8", |
| ) |
| python_wrapper.chmod(python_wrapper.stat().st_mode | stat.S_IEXEC) |
|
|
| env = os.environ.copy() |
| env.update( |
| { |
| "PATH": f"{fake_bin}:{env['PATH']}", |
| "HF_TEST_LOG": str(log_path), |
| "MARIS_REPO_TOKEN": "token", |
| "HF_MODEL_REPO": "MarisUK/maris-ai-master", |
| "HF_TEXT_MODEL_REPO": "MarisUK/maris-ai-text", |
| "HF_CODEX_MODEL_REPO": "MarisUK/maris-ai-codex", |
| "HF_LOCAL_MODEL_DIR": str(model_root), |
| } |
| ) |
|
|
| subprocess.run( |
| ["bash", str(repo_root / "huggingface" / "sync.sh"), "upload-model"], |
| cwd=repo_root, |
| env=env, |
| check=True, |
| capture_output=True, |
| text=True, |
| ) |
|
|
| payloads = [ |
| json.loads(line) |
| for line in log_path.read_text(encoding="utf-8").splitlines() |
| if line.strip() |
| ] |
| workspace_uploads = [ |
| payload |
| for payload in payloads |
| if payload["argv"] and payload["argv"][0] == "-" and len(payload["argv"]) >= 5 and payload["argv"][2] == "model" |
| ] |
| card_uploads = [ |
| payload |
| for payload in payloads |
| if payload["argv"] |
| and payload["argv"][0] == "-" |
| and len(payload["argv"]) == 5 |
| and payload["argv"][2] == str(repo_root / "huggingface" / "model-card.md") |
| ] |
|
|
| assert [(payload["argv"][1], payload["argv"][4]) for payload in workspace_uploads] == [ |
| ("MarisUK/maris-ai-master", "Maris AI model sync"), |
| ("MarisUK/maris-ai-text", "Maris AI model sync (master)"), |
| ("MarisUK/maris-ai-codex", "Maris AI model sync (coder)"), |
| ] |
| assert all(payload["argv"][3].startswith("/tmp/") for payload in workspace_uploads) |
| assert [(payload["argv"][1], payload["argv"][3], payload["argv"][4]) for payload in card_uploads] == [ |
| ("MarisUK/maris-ai-master", "Maris AI model sync", "token"), |
| ("MarisUK/maris-ai-text", "Maris AI model sync (master)", "token"), |
| ("MarisUK/maris-ai-codex", "Maris AI model sync (coder)", "token"), |
| ] |
|
|
|
|
| def test_upload_model_publishes_fallback_master_and_coder_dirs_without_manifest(tmp_path: Path) -> None: |
| repo_root = tmp_path / "repo" |
| model_root = repo_root / "core-python" / "output" / "model" |
| (repo_root / "huggingface").mkdir(parents=True) |
| (repo_root / "core-python" / "output").mkdir(parents=True) |
| (model_root / "master").mkdir(parents=True) |
| (model_root / "coder").mkdir(parents=True) |
| (repo_root / "huggingface" / "sync.sh").write_text( |
| SYNC_SCRIPT_PATH.read_text(encoding="utf-8"), |
| encoding="utf-8", |
| ) |
| (repo_root / "huggingface" / "model-card.md").write_text( |
| "# Model card\n", |
| encoding="utf-8", |
| ) |
| (model_root / "config.json").write_text("{}", encoding="utf-8") |
| (model_root / "master" / "config.json").write_text("{}", encoding="utf-8") |
| (model_root / "coder" / "config.json").write_text("{}", encoding="utf-8") |
|
|
| fake_bin = tmp_path / "bin" |
| fake_bin.mkdir() |
| log_path = tmp_path / "python-log.jsonl" |
| python_wrapper = fake_bin / "python3" |
| python_wrapper.write_text( |
| "\n".join( |
| [ |
| f"#!{sys.executable}", |
| "import json, os, sys", |
| "log_path = os.environ['HF_TEST_LOG']", |
| "entry = {'argv': sys.argv[1:], 'stdin': sys.stdin.read()}", |
| "with open(log_path, 'a', encoding='utf-8') as handle:", |
| " handle.write(json.dumps(entry) + '\\n')", |
| "if len(sys.argv) > 1 and sys.argv[1] == '-c':", |
| " raise SystemExit(0)", |
| "if len(sys.argv) > 1 and sys.argv[1] == '-':", |
| " if len(sys.argv) < 7 or not sys.argv[2].endswith('branch-suite.json'):", |
| " raise SystemExit(0)", |
| " model_root = sys.argv[3]", |
| " text_repo = sys.argv[5]", |
| " codex_repo = sys.argv[6]", |
| " print(f'master\\t{model_root}/master\\t{text_repo}')", |
| " print(f'coder\\t{model_root}/coder\\t{codex_repo}')", |
| " raise SystemExit(0)", |
| "raise SystemExit(f'unexpected python3 invocation: {sys.argv!r}')", |
| "", |
| ] |
| ), |
| encoding="utf-8", |
| ) |
| python_wrapper.chmod(python_wrapper.stat().st_mode | stat.S_IEXEC) |
|
|
| env = os.environ.copy() |
| env.update( |
| { |
| "PATH": f"{fake_bin}:{env['PATH']}", |
| "HF_TEST_LOG": str(log_path), |
| "MARIS_REPO_TOKEN": "token", |
| "HF_MODEL_REPO": "MarisUK/maris-ai-master", |
| "HF_TEXT_MODEL_REPO": "MarisUK/maris-ai-text", |
| "HF_CODEX_MODEL_REPO": "MarisUK/maris-ai-codex", |
| "HF_LOCAL_MODEL_DIR": str(model_root), |
| } |
| ) |
|
|
| subprocess.run( |
| ["bash", str(repo_root / "huggingface" / "sync.sh"), "upload-model"], |
| cwd=repo_root, |
| env=env, |
| check=True, |
| capture_output=True, |
| text=True, |
| ) |
|
|
| payloads = [ |
| json.loads(line) |
| for line in log_path.read_text(encoding="utf-8").splitlines() |
| if line.strip() |
| ] |
| workspace_uploads = [ |
| payload |
| for payload in payloads |
| if payload["argv"] and payload["argv"][0] == "-" and len(payload["argv"]) >= 5 and payload["argv"][2] == "model" |
| ] |
|
|
| assert [(payload["argv"][1], payload["argv"][4]) for payload in workspace_uploads] == [ |
| ("MarisUK/maris-ai-master", "Maris AI model sync"), |
| ("MarisUK/maris-ai-text", "Maris AI model sync (master)"), |
| ("MarisUK/maris-ai-codex", "Maris AI model sync (coder)"), |
| ] |
|
|
|
|
| def test_upload_dataset_also_publishes_optional_eval_repo(tmp_path: Path) -> None: |
| repo_root = tmp_path / "repo" |
| (repo_root / "huggingface").mkdir(parents=True) |
| (repo_root / "core-python" / "scripts").mkdir(parents=True) |
| (repo_root / "data" / "conversation").mkdir(parents=True) |
| (repo_root / "eval-data" / "conversation").mkdir(parents=True) |
| (repo_root / "huggingface" / "sync.sh").write_text( |
| SYNC_SCRIPT_PATH.read_text(encoding="utf-8"), |
| encoding="utf-8", |
| ) |
| (repo_root / "huggingface" / "dataset-card.md").write_text( |
| "# Dataset card\n", |
| encoding="utf-8", |
| ) |
| (repo_root / "huggingface" / "eval-dataset-card.md").write_text( |
| "# Eval dataset card\n", |
| encoding="utf-8", |
| ) |
| (repo_root / "core-python" / "scripts" / "validate_datasets.py").write_text( |
| "print('ok')\n", |
| encoding="utf-8", |
| ) |
| (repo_root / "data" / "conversation" / "sample.jsonl").write_text( |
| '{"timestamp":"2026-04-06T00:02:00Z","type":"conversation","session_id":"main-1","user":"u","assistant":"a","language":"lv","source":"test"}\n', |
| encoding="utf-8", |
| ) |
| (repo_root / "eval-data" / "conversation" / "sample.jsonl").write_text( |
| '{"timestamp":"2026-04-06T00:03:00Z","type":"conversation","session_id":"eval-1","user":"u","assistant":"a","language":"lv","source":"test"}\n', |
| encoding="utf-8", |
| ) |
|
|
| fake_bin = tmp_path / "bin" |
| fake_bin.mkdir() |
| log_path = tmp_path / "python-log.jsonl" |
| python_wrapper = fake_bin / "python3" |
| python_wrapper.write_text( |
| "\n".join( |
| [ |
| f"#!{sys.executable}", |
| "import json, os, sys", |
| "log_path = os.environ['HF_TEST_LOG']", |
| "entry = {'argv': sys.argv[1:], 'stdin': sys.stdin.read()}", |
| "with open(log_path, 'a', encoding='utf-8') as handle:", |
| " handle.write(json.dumps(entry) + '\\n')", |
| "if len(sys.argv) > 1 and sys.argv[1] == '-c':", |
| " raise SystemExit(0)", |
| "if len(sys.argv) > 1 and sys.argv[1].endswith('validate_datasets.py'):", |
| " raise SystemExit(0)", |
| "if len(sys.argv) > 1 and sys.argv[1] == '-':", |
| " raise SystemExit(0)", |
| "raise SystemExit(f'unexpected python3 invocation: {sys.argv!r}')", |
| "", |
| ] |
| ), |
| encoding="utf-8", |
| ) |
| python_wrapper.chmod(python_wrapper.stat().st_mode | stat.S_IEXEC) |
|
|
| env = os.environ.copy() |
| env.update( |
| { |
| "PATH": f"{fake_bin}:{env['PATH']}", |
| "HF_TEST_LOG": str(log_path), |
| "MARIS_REPO_TOKEN": "token", |
| "HF_DATASET_REPO": "MarisUK/maris-ai-memory", |
| "HF_EVAL_DATASET_REPO": "MarisUK/maris-ai-evals", |
| "HF_LOCAL_DATASET_DIR": str(repo_root / "data"), |
| "HF_LOCAL_EVAL_DATASET_DIR": str(repo_root / "eval-data"), |
| } |
| ) |
|
|
| subprocess.run( |
| ["bash", str(repo_root / "huggingface" / "sync.sh"), "upload-dataset"], |
| cwd=repo_root, |
| env=env, |
| check=True, |
| capture_output=True, |
| text=True, |
| ) |
|
|
| payloads = [ |
| json.loads(line) |
| for line in log_path.read_text(encoding="utf-8").splitlines() |
| if line.strip() |
| ] |
| upload_payloads = [ |
| payload for payload in payloads if payload["argv"] and payload["argv"][0] == "-" |
| ] |
| validate_payloads = [ |
| payload |
| for payload in payloads |
| if payload["argv"] and payload["argv"][0].endswith("validate_datasets.py") |
| ] |
|
|
| assert [payload["argv"][1:] for payload in validate_payloads] == [ |
| ["--profile", "bootstrap", str(repo_root / "data")], |
| ["--profile", "eval", str(repo_root / "eval-data")], |
| ] |
| assert len(upload_payloads) == 4 |
| assert upload_payloads[0]["argv"][1:5] == [ |
| "MarisUK/maris-ai-memory", |
| "dataset", |
| upload_payloads[0]["argv"][3], |
| "Maris AI dataset sync", |
| ] |
| assert upload_payloads[0]["argv"][3].startswith("/tmp/") |
| assert upload_payloads[1]["argv"][1:5] == [ |
| "MarisUK/maris-ai-memory", |
| str(repo_root / "huggingface" / "dataset-card.md"), |
| "Maris AI dataset sync", |
| "token", |
| ] |
| assert upload_payloads[2]["argv"][1:5] == [ |
| "MarisUK/maris-ai-evals", |
| "dataset", |
| upload_payloads[2]["argv"][3], |
| "Maris AI eval dataset sync", |
| ] |
| assert upload_payloads[2]["argv"][3].startswith("/tmp/") |
| assert upload_payloads[3]["argv"][1:5] == [ |
| "MarisUK/maris-ai-evals", |
| str(repo_root / "huggingface" / "eval-dataset-card.md"), |
| "Maris AI eval dataset sync", |
| "token", |
| ] |
|
|
|
|
| def test_upload_dataset_can_publish_global_memory_eval_and_benchmark_repos( |
| tmp_path: Path, |
| ) -> None: |
| repo_root = tmp_path / "repo" |
| (repo_root / "huggingface").mkdir(parents=True) |
| (repo_root / "core-python" / "scripts").mkdir(parents=True) |
| for dataset_dir in ("data", "eval-data", "benchmark-data"): |
| (repo_root / dataset_dir / "conversation").mkdir(parents=True) |
| (repo_root / "huggingface" / "sync.sh").write_text( |
| SYNC_SCRIPT_PATH.read_text(encoding="utf-8"), |
| encoding="utf-8", |
| ) |
| (repo_root / "huggingface" / "dataset-card.md").write_text( |
| "# Dataset card\n", |
| encoding="utf-8", |
| ) |
| (repo_root / "huggingface" / "global-memory-dataset-card.md").write_text( |
| "# Global memory dataset card\n", |
| encoding="utf-8", |
| ) |
| (repo_root / "huggingface" / "eval-dataset-card.md").write_text( |
| "# Eval dataset card\n", |
| encoding="utf-8", |
| ) |
| (repo_root / "huggingface" / "benchmark-dataset-card.md").write_text( |
| "# Benchmark dataset card\n", |
| encoding="utf-8", |
| ) |
| (repo_root / "core-python" / "scripts" / "validate_datasets.py").write_text( |
| "print('ok')\n", |
| encoding="utf-8", |
| ) |
| (repo_root / "data" / "conversation" / "sample.jsonl").write_text( |
| '{"timestamp":"2026-04-06T00:02:00Z","type":"conversation","session_id":"main-1","user":"u","assistant":"a","language":"lv","source":"test"}\n', |
| encoding="utf-8", |
| ) |
| eval_record = ( |
| '{"timestamp":"2026-04-06T00:03:00Z","type":"conversation","session_id":"eval-1",' |
| '"user":"u","assistant":"a","language":"lv","source":"test","task_id":"eval-1",' |
| '"benchmark_version":"maris-evals-v1","suite":"sanity","difficulty":"easy",' |
| '"evaluation_mode":"reference-review","risk_level":"low","expected_behavior":["ok"],' |
| '"scoring_hints":["ok"],"reference_answer":"a","acceptance_criteria":["ok"]}\n' |
| ) |
| (repo_root / "eval-data" / "conversation" / "sample.jsonl").write_text( |
| eval_record, |
| encoding="utf-8", |
| ) |
| benchmark_record = ( |
| '{"timestamp":"2026-04-06T00:04:00Z","type":"conversation","session_id":"bench-1",' |
| '"user":"u","assistant":"a","language":"en","source":"test","task_id":"bench-1",' |
| '"benchmark_version":"maris-benchmark-v1","suite":"release","difficulty":"medium",' |
| '"evaluation_mode":"reference-review","risk_level":"high","expected_behavior":["ok"],' |
| '"scoring_hints":["ok"],"reference_answer":"a","acceptance_criteria":["ok"]}\n' |
| ) |
| (repo_root / "benchmark-data" / "conversation" / "sample.jsonl").write_text( |
| benchmark_record, |
| encoding="utf-8", |
| ) |
|
|
| fake_bin = tmp_path / "bin" |
| fake_bin.mkdir() |
| log_path = tmp_path / "python-log.jsonl" |
| python_wrapper = fake_bin / "python3" |
| python_wrapper.write_text( |
| "\n".join( |
| [ |
| f"#!{sys.executable}", |
| "import json, os, sys", |
| "log_path = os.environ['HF_TEST_LOG']", |
| "entry = {'argv': sys.argv[1:], 'stdin': sys.stdin.read()}", |
| "with open(log_path, 'a', encoding='utf-8') as handle:", |
| " handle.write(json.dumps(entry) + '\\n')", |
| "if len(sys.argv) > 1 and sys.argv[1] == '-c':", |
| " raise SystemExit(0)", |
| "if len(sys.argv) > 1 and sys.argv[1].endswith('validate_datasets.py'):", |
| " raise SystemExit(0)", |
| "if len(sys.argv) > 1 and sys.argv[1] == '-':", |
| " raise SystemExit(0)", |
| "raise SystemExit(f'unexpected python3 invocation: {sys.argv!r}')", |
| "", |
| ] |
| ), |
| encoding="utf-8", |
| ) |
| python_wrapper.chmod(python_wrapper.stat().st_mode | stat.S_IEXEC) |
|
|
| env = os.environ.copy() |
| env.update( |
| { |
| "PATH": f"{fake_bin}:{env['PATH']}", |
| "HF_TEST_LOG": str(log_path), |
| "MARIS_REPO_TOKEN": "token", |
| "HF_DATASET_REPO": "MarisUK/maris-ai-memory", |
| "HF_GLOBAL_MEMORY_REPO": "MarisUK/maris-ai-memory", |
| "HF_EVAL_DATASET_REPO": "MarisUK/maris-ai-evals", |
| "HF_BENCHMARK_DATASET_REPO": "MarisUK/maris-ai-benchmark", |
| "HF_LOCAL_DATASET_DIR": str(repo_root / "data"), |
| "HF_LOCAL_GLOBAL_DATASET_DIR": str(repo_root / "data"), |
| "HF_LOCAL_EVAL_DATASET_DIR": str(repo_root / "eval-data"), |
| "HF_LOCAL_BENCHMARK_DATASET_DIR": str(repo_root / "benchmark-data"), |
| } |
| ) |
|
|
| result = subprocess.run( |
| ["bash", str(repo_root / "huggingface" / "sync.sh"), "upload-dataset"], |
| cwd=repo_root, |
| env=env, |
| check=True, |
| capture_output=True, |
| text=True, |
| ) |
|
|
| payloads = [ |
| json.loads(line) |
| for line in log_path.read_text(encoding="utf-8").splitlines() |
| if line.strip() |
| ] |
| upload_payloads = [ |
| payload for payload in payloads if payload["argv"] and payload["argv"][0] == "-" |
| ] |
| validate_payloads = [ |
| payload |
| for payload in payloads |
| if payload["argv"] and payload["argv"][0].endswith("validate_datasets.py") |
| ] |
|
|
| assert [payload["argv"][1:] for payload in validate_payloads] == [ |
| ["--profile", "bootstrap", str(repo_root / "data")], |
| ["--profile", "eval", str(repo_root / "eval-data")], |
| ["--profile", "eval", str(repo_root / "benchmark-data")], |
| ] |
| assert len(upload_payloads) == 6 |
| assert "Izlaižu global memory upload" in result.stdout |
| assert upload_payloads[4]["argv"][1:5] == [ |
| "MarisUK/maris-ai-benchmark", |
| "dataset", |
| upload_payloads[4]["argv"][3], |
| "Maris AI benchmark dataset sync", |
| ] |
| assert upload_payloads[5]["argv"][1:5] == [ |
| "MarisUK/maris-ai-benchmark", |
| str(repo_root / "huggingface" / "benchmark-dataset-card.md"), |
| "Maris AI benchmark dataset sync", |
| "token", |
| ] |
|
|
|
|
| def test_upload_dataset_uses_bundled_eval_data_by_default(tmp_path: Path) -> None: |
| repo_root = tmp_path / "repo" |
| (repo_root / "huggingface").mkdir(parents=True) |
| (repo_root / "core-python" / "scripts").mkdir(parents=True) |
| (repo_root / "data" / "conversation").mkdir(parents=True) |
| (repo_root / "eval-data" / "conversation").mkdir(parents=True) |
| (repo_root / "huggingface" / "sync.sh").write_text( |
| SYNC_SCRIPT_PATH.read_text(encoding="utf-8"), |
| encoding="utf-8", |
| ) |
| (repo_root / "huggingface" / "dataset-card.md").write_text( |
| "# Dataset card\n", |
| encoding="utf-8", |
| ) |
| (repo_root / "huggingface" / "eval-dataset-card.md").write_text( |
| "# Eval dataset card\n", |
| encoding="utf-8", |
| ) |
| (repo_root / "core-python" / "scripts" / "validate_datasets.py").write_text( |
| "print('ok')\n", |
| encoding="utf-8", |
| ) |
| (repo_root / "data" / "conversation" / "sample.jsonl").write_text( |
| '{"timestamp":"2026-04-06T00:02:00Z","type":"conversation","session_id":"main-1","user":"u","assistant":"a","language":"lv","source":"test"}\n', |
| encoding="utf-8", |
| ) |
| (repo_root / "eval-data" / "conversation" / "sample.jsonl").write_text( |
| '{"timestamp":"2026-04-06T00:03:00Z","type":"conversation","session_id":"eval-1","user":"u","assistant":"a","language":"lv","source":"test"}\n', |
| encoding="utf-8", |
| ) |
|
|
| fake_bin = tmp_path / "bin" |
| fake_bin.mkdir() |
| log_path = tmp_path / "python-log.jsonl" |
| python_wrapper = fake_bin / "python3" |
| python_wrapper.write_text( |
| "\n".join( |
| [ |
| f"#!{sys.executable}", |
| "import json, os, sys", |
| "log_path = os.environ['HF_TEST_LOG']", |
| "entry = {'argv': sys.argv[1:], 'stdin': sys.stdin.read()}", |
| "with open(log_path, 'a', encoding='utf-8') as handle:", |
| " handle.write(json.dumps(entry) + '\\n')", |
| "if len(sys.argv) > 1 and sys.argv[1] == '-c':", |
| " raise SystemExit(0)", |
| "if len(sys.argv) > 1 and sys.argv[1].endswith('validate_datasets.py'):", |
| " raise SystemExit(0)", |
| "if len(sys.argv) > 1 and sys.argv[1] == '-':", |
| " raise SystemExit(0)", |
| "raise SystemExit(f'unexpected python3 invocation: {sys.argv!r}')", |
| "", |
| ] |
| ), |
| encoding="utf-8", |
| ) |
| python_wrapper.chmod(python_wrapper.stat().st_mode | stat.S_IEXEC) |
|
|
| env = os.environ.copy() |
| env.update( |
| { |
| "PATH": f"{fake_bin}:{env['PATH']}", |
| "HF_TEST_LOG": str(log_path), |
| "MARIS_REPO_TOKEN": "token", |
| "HF_DATASET_REPO": "MarisUK/maris-ai-memory", |
| "HF_EVAL_DATASET_REPO": "MarisUK/maris-ai-evals", |
| "HF_LOCAL_DATASET_DIR": str(repo_root / "data"), |
| } |
| ) |
|
|
| subprocess.run( |
| ["bash", str(repo_root / "huggingface" / "sync.sh"), "upload-dataset"], |
| cwd=repo_root, |
| env=env, |
| check=True, |
| capture_output=True, |
| text=True, |
| ) |
|
|
| payloads = [ |
| json.loads(line) |
| for line in log_path.read_text(encoding="utf-8").splitlines() |
| if line.strip() |
| ] |
| validate_payloads = [ |
| payload |
| for payload in payloads |
| if payload["argv"] and payload["argv"][0].endswith("validate_datasets.py") |
| ] |
|
|
| assert [payload["argv"][1:] for payload in validate_payloads] == [ |
| ["--profile", "bootstrap", str(repo_root / "data")], |
| ["--profile", "eval", str(repo_root / "eval-data")], |
| ] |
|
|
|
|
| @pytest.mark.parametrize("command", ["upload-benchmark-dataset", "upload-benchmark-data"]) |
| def test_upload_benchmark_dataset_only_publishes_benchmark_repo( |
| tmp_path: Path, command: str |
| ) -> None: |
| repo_root = tmp_path / "repo" |
| (repo_root / "huggingface").mkdir(parents=True) |
| (repo_root / "core-python" / "scripts").mkdir(parents=True) |
| (repo_root / "benchmark-data" / "conversation").mkdir(parents=True) |
| (repo_root / "huggingface" / "sync.sh").write_text( |
| SYNC_SCRIPT_PATH.read_text(encoding="utf-8"), |
| encoding="utf-8", |
| ) |
| (repo_root / "huggingface" / "benchmark-dataset-card.md").write_text( |
| "# Benchmark dataset card\n", |
| encoding="utf-8", |
| ) |
| (repo_root / "core-python" / "scripts" / "validate_datasets.py").write_text( |
| "print('ok')\n", |
| encoding="utf-8", |
| ) |
| (repo_root / "benchmark-data" / "conversation" / "sample.jsonl").write_text( |
| '{"timestamp":"2026-04-06T00:04:00Z","type":"conversation","session_id":"bench-1","user":"u","assistant":"a","language":"en","source":"test","task_id":"bench-1","benchmark_version":"maris-benchmark-v1","suite":"release","difficulty":"medium","evaluation_mode":"reference-review","risk_level":"high","expected_behavior":["ok"],"scoring_hints":["ok"],"reference_answer":"a","acceptance_criteria":["ok"]}\n', |
| encoding="utf-8", |
| ) |
|
|
| fake_bin = tmp_path / "bin" |
| fake_bin.mkdir() |
| log_path = tmp_path / "python-log.jsonl" |
| python_wrapper = fake_bin / "python3" |
| python_wrapper.write_text( |
| "\n".join( |
| [ |
| f"#!{sys.executable}", |
| "import json, os, sys", |
| "log_path = os.environ['HF_TEST_LOG']", |
| "entry = {'argv': sys.argv[1:], 'stdin': sys.stdin.read()}", |
| "with open(log_path, 'a', encoding='utf-8') as handle:", |
| " handle.write(json.dumps(entry) + '\\n')", |
| "if len(sys.argv) > 1 and sys.argv[1] == '-c':", |
| " raise SystemExit(0)", |
| "if len(sys.argv) > 1 and sys.argv[1].endswith('validate_datasets.py'):", |
| " raise SystemExit(0)", |
| "if len(sys.argv) > 1 and sys.argv[1] == '-':", |
| " raise SystemExit(0)", |
| "raise SystemExit(f'unexpected python3 invocation: {sys.argv!r}')", |
| "", |
| ] |
| ), |
| encoding="utf-8", |
| ) |
| python_wrapper.chmod(python_wrapper.stat().st_mode | stat.S_IEXEC) |
|
|
| env = os.environ.copy() |
| env.update( |
| { |
| "PATH": f"{fake_bin}:{env['PATH']}", |
| "HF_TEST_LOG": str(log_path), |
| "MARIS_REPO_TOKEN": "token", |
| "HF_DATASET_REPO": "MarisUK/maris-ai-memory", |
| "HF_BENCHMARK_DATASET_REPO": "MarisUK/maris-ai-benchmark", |
| "HF_LOCAL_BENCHMARK_DATASET_DIR": str(repo_root / "benchmark-data"), |
| } |
| ) |
|
|
| subprocess.run( |
| ["bash", str(repo_root / "huggingface" / "sync.sh"), command], |
| cwd=repo_root, |
| env=env, |
| check=True, |
| capture_output=True, |
| text=True, |
| ) |
|
|
| payloads = [ |
| json.loads(line) |
| for line in log_path.read_text(encoding="utf-8").splitlines() |
| if line.strip() |
| ] |
| upload_payloads = [ |
| payload for payload in payloads if payload["argv"] and payload["argv"][0] == "-" |
| ] |
| validate_payloads = [ |
| payload |
| for payload in payloads |
| if payload["argv"] and payload["argv"][0].endswith("validate_datasets.py") |
| ] |
|
|
| assert [payload["argv"][1:] for payload in validate_payloads] == [ |
| ["--profile", "eval", str(repo_root / "benchmark-data")] |
| ] |
| assert len(upload_payloads) == 2 |
| assert upload_payloads[0]["argv"][1:5] == [ |
| "MarisUK/maris-ai-benchmark", |
| "dataset", |
| upload_payloads[0]["argv"][3], |
| "Maris AI benchmark dataset sync", |
| ] |
| assert upload_payloads[1]["argv"][1:5] == [ |
| "MarisUK/maris-ai-benchmark", |
| str(repo_root / "huggingface" / "benchmark-dataset-card.md"), |
| "Maris AI benchmark dataset sync", |
| "token", |
| ] |
|
|
|
|
| @pytest.mark.parametrize("command", ["upload-eval-dataset", "upload-evals-data"]) |
| def test_upload_eval_dataset_only_publishes_eval_repo(tmp_path: Path, command: str) -> None: |
| repo_root = tmp_path / "repo" |
| (repo_root / "huggingface").mkdir(parents=True) |
| (repo_root / "core-python" / "scripts").mkdir(parents=True) |
| (repo_root / "data" / "conversation").mkdir(parents=True) |
| (repo_root / "eval-data" / "conversation").mkdir(parents=True) |
| (repo_root / "huggingface" / "sync.sh").write_text( |
| SYNC_SCRIPT_PATH.read_text(encoding="utf-8"), |
| encoding="utf-8", |
| ) |
| (repo_root / "huggingface" / "dataset-card.md").write_text( |
| "# Dataset card\n", |
| encoding="utf-8", |
| ) |
| (repo_root / "huggingface" / "eval-dataset-card.md").write_text( |
| "# Eval dataset card\n", |
| encoding="utf-8", |
| ) |
| (repo_root / "core-python" / "scripts" / "validate_datasets.py").write_text( |
| "print('ok')\n", |
| encoding="utf-8", |
| ) |
| (repo_root / "data" / "conversation" / "sample.jsonl").write_text( |
| '{"timestamp":"2026-04-06T00:02:00Z","type":"conversation","session_id":"main-1","user":"u","assistant":"a","language":"lv","source":"test"}\n', |
| encoding="utf-8", |
| ) |
| (repo_root / "eval-data" / "conversation" / "sample.jsonl").write_text( |
| '{"timestamp":"2026-04-06T00:03:00Z","type":"conversation","session_id":"eval-1","user":"u","assistant":"a","language":"lv","source":"test"}\n', |
| encoding="utf-8", |
| ) |
|
|
| fake_bin = tmp_path / "bin" |
| fake_bin.mkdir() |
| log_path = tmp_path / "python-log.jsonl" |
| python_wrapper = fake_bin / "python3" |
| python_wrapper.write_text( |
| "\n".join( |
| [ |
| f"#!{sys.executable}", |
| "import json, os, sys", |
| "log_path = os.environ['HF_TEST_LOG']", |
| "entry = {'argv': sys.argv[1:], 'stdin': sys.stdin.read()}", |
| "with open(log_path, 'a', encoding='utf-8') as handle:", |
| " handle.write(json.dumps(entry) + '\\n')", |
| "if len(sys.argv) > 1 and sys.argv[1] == '-c':", |
| " raise SystemExit(0)", |
| "if len(sys.argv) > 1 and sys.argv[1].endswith('validate_datasets.py'):", |
| " raise SystemExit(0)", |
| "if len(sys.argv) > 1 and sys.argv[1] == '-':", |
| " raise SystemExit(0)", |
| "raise SystemExit(f'unexpected python3 invocation: {sys.argv!r}')", |
| "", |
| ] |
| ), |
| encoding="utf-8", |
| ) |
| python_wrapper.chmod(python_wrapper.stat().st_mode | stat.S_IEXEC) |
|
|
| env = os.environ.copy() |
| env.update( |
| { |
| "PATH": f"{fake_bin}:{env['PATH']}", |
| "HF_TEST_LOG": str(log_path), |
| "MARIS_REPO_TOKEN": "token", |
| "HF_DATASET_REPO": "MarisUK/maris-ai-memory", |
| "HF_EVAL_DATASET_REPO": "MarisUK/maris-ai-evals", |
| "HF_LOCAL_DATASET_DIR": str(repo_root / "data"), |
| "HF_LOCAL_EVAL_DATASET_DIR": str(repo_root / "eval-data"), |
| } |
| ) |
|
|
| subprocess.run( |
| ["bash", str(repo_root / "huggingface" / "sync.sh"), command], |
| cwd=repo_root, |
| env=env, |
| check=True, |
| capture_output=True, |
| text=True, |
| ) |
|
|
| payloads = [ |
| json.loads(line) |
| for line in log_path.read_text(encoding="utf-8").splitlines() |
| if line.strip() |
| ] |
| upload_payloads = [ |
| payload for payload in payloads if payload["argv"] and payload["argv"][0] == "-" |
| ] |
| validate_payloads = [ |
| payload |
| for payload in payloads |
| if payload["argv"] and payload["argv"][0].endswith("validate_datasets.py") |
| ] |
|
|
| assert [payload["argv"][1:] for payload in validate_payloads] == [ |
| ["--profile", "eval", str(repo_root / "eval-data")] |
| ] |
| assert len(upload_payloads) == 2 |
| assert upload_payloads[0]["argv"][1:5] == [ |
| "MarisUK/maris-ai-evals", |
| "dataset", |
| upload_payloads[0]["argv"][3], |
| "Maris AI eval dataset sync", |
| ] |
| assert upload_payloads[0]["argv"][3].startswith("/tmp/") |
| assert upload_payloads[1]["argv"][1:5] == [ |
| "MarisUK/maris-ai-evals", |
| str(repo_root / "huggingface" / "eval-dataset-card.md"), |
| "Maris AI eval dataset sync", |
| "token", |
| ] |
|
|