from __future__ import annotations import json import os import stat import subprocess import sys from pathlib import Path import pytest SYNC_SCRIPT_PATH = Path(__file__).resolve().parents[2] / "huggingface" / "sync.sh" @pytest.mark.parametrize( ( "command", "repo_env", "repo_id", "space_dir_name", "local_space_dir_env", "commit_message", "sdk_env", "sdk_value", ), [ ( "upload-space", "MARIS_AGENT_SPACE_REPO", "MarisUK/maris.ai.agent", "huggingface_space", "MARIS_LOCAL_AGENT_DIR", "Maris AI Space sync", None, "docker", ), ( "upload-chat-space", "MARIS_CHAT_SPACE_REPO", "MarisUK/maris.ai.chat", "huggingface_chat_space", "MARIS_LOCAL_CHAT_AGENT_DIR", "Maris AI chat Space sync", None, "docker", ), ( "upload-human-training-space", "MARIS_HUMAN_TRAINING_SPACE_REPO", "MarisUK/maris.ai.human.training", "huggingface_human_training_space", "MARIS_LOCAL_HUMAN_TRAINING_SPACE_DIR", "Maris AI human training Space sync", None, "docker", ), ( "upload-space", "MARIS_AGENT_SPACE_REPO", "MarisUK/custom.space", "huggingface_space", "MARIS_LOCAL_AGENT_DIR", "Maris AI Space sync", "HF_SPACE_SDK", "gradio", ), ], ) def test_space_uploads_pass_space_sdk( tmp_path: Path, command: str, repo_env: str, repo_id: str, space_dir_name: str, local_space_dir_env: str, commit_message: str, sdk_env: str | None, sdk_value: str, ) -> None: repo_root = tmp_path / "repo" (repo_root / "huggingface").mkdir(parents=True) (repo_root / "core-python").mkdir() (repo_root / space_dir_name).mkdir() (repo_root / "huggingface" / "sync.sh").write_text( SYNC_SCRIPT_PATH.read_text(encoding="utf-8"), encoding="utf-8", ) (repo_root / space_dir_name / "Dockerfile").write_text( "FROM python:3.11-slim\n", encoding="utf-8" ) (repo_root / space_dir_name / "README.md").write_text( "---\nsdk: docker\n---\n", encoding="utf-8", ) fake_bin = tmp_path / "bin" fake_bin.mkdir() log_path = tmp_path / "python-log.json" python_wrapper = fake_bin / "python3" python_wrapper.write_text( "\n".join( [ f"#!{sys.executable}", "import json, os, sys", "log_path = os.environ['HF_TEST_LOG']", "if len(sys.argv) > 1 and sys.argv[1] == '-c':", " raise SystemExit(0)", "if len(sys.argv) > 1 and sys.argv[1] == '-':", " payload = {'argv': sys.argv[2:], 'stdin': sys.stdin.read()}", " with open(log_path, 'w', encoding='utf-8') as handle:", " json.dump(payload, handle)", " raise SystemExit(0)", "raise SystemExit(f'unexpected python3 invocation: {sys.argv!r}')", "", ] ), encoding="utf-8", ) python_wrapper.chmod(python_wrapper.stat().st_mode | stat.S_IEXEC) env = os.environ.copy() env.update( { "PATH": f"{fake_bin}:{env['PATH']}", "HF_TEST_LOG": str(log_path), "MARIS_REPO_TOKEN": "token", repo_env: repo_id, local_space_dir_env: str(repo_root / space_dir_name), } ) if sdk_env is not None: env[sdk_env] = sdk_value subprocess.run( ["bash", str(repo_root / "huggingface" / "sync.sh"), command], cwd=repo_root, env=env, check=True, capture_output=True, text=True, ) payload = json.loads(log_path.read_text(encoding="utf-8")) assert payload["argv"][0] == repo_id assert payload["argv"][1] == "space" assert payload["argv"][2].startswith("/tmp/") assert payload["argv"][3] == commit_message assert payload["argv"][4:] == ["token", "", sdk_value] def test_upload_model_publishes_branch_suite_text_and_codex_repos(tmp_path: Path) -> None: repo_root = tmp_path / "repo" model_root = repo_root / "core-python" / "output" / "model" (repo_root / "huggingface").mkdir(parents=True) (repo_root / "core-python" / "output").mkdir(parents=True) (model_root / "master").mkdir(parents=True) (model_root / "coder").mkdir(parents=True) (repo_root / "huggingface" / "sync.sh").write_text( SYNC_SCRIPT_PATH.read_text(encoding="utf-8"), encoding="utf-8", ) (repo_root / "huggingface" / "model-card.md").write_text( "# Model card\n", encoding="utf-8", ) (model_root / "config.json").write_text("{}", encoding="utf-8") (model_root / "master" / "config.json").write_text("{}", encoding="utf-8") (model_root / "coder" / "config.json").write_text("{}", encoding="utf-8") (model_root / "branch-suite.json").write_text( json.dumps( { "branches": { "master": {"output_dir": "core-python/output/model/master"}, "coder": {"output_dir": "core-python/output/model/coder"}, } } ), encoding="utf-8", ) fake_bin = tmp_path / "bin" fake_bin.mkdir() log_path = tmp_path / "python-log.jsonl" python_wrapper = fake_bin / "python3" python_wrapper.write_text( "\n".join( [ f"#!{sys.executable}", "import json, os, sys", "log_path = os.environ['HF_TEST_LOG']", "entry = {'argv': sys.argv[1:], 'stdin': sys.stdin.read()}", "with open(log_path, 'a', encoding='utf-8') as handle:", " handle.write(json.dumps(entry) + '\\n')", "if len(sys.argv) > 1 and sys.argv[1] == '-c':", " raise SystemExit(0)", "if len(sys.argv) > 1 and sys.argv[1] == '-':", " if len(sys.argv) >= 7 and sys.argv[2].endswith('branch-suite.json'):", " manifest_path = sys.argv[2]", " repo_root = sys.argv[4]", " text_repo = sys.argv[5]", " codex_repo = sys.argv[6]", " payload = json.load(open(manifest_path, encoding='utf-8'))", " branches = payload.get('branches', {})", " for branch_name, repo_id in (('master', text_repo), ('coder', codex_repo)):", " output_dir = branches.get(branch_name, {}).get('output_dir', '')", " if output_dir and repo_id:", " print(f'{branch_name}\\t{repo_root}/{output_dir}\\t{repo_id}')", " raise SystemExit(0)", "raise SystemExit(f'unexpected python3 invocation: {sys.argv!r}')", "", ] ), encoding="utf-8", ) python_wrapper.chmod(python_wrapper.stat().st_mode | stat.S_IEXEC) env = os.environ.copy() env.update( { "PATH": f"{fake_bin}:{env['PATH']}", "HF_TEST_LOG": str(log_path), "MARIS_REPO_TOKEN": "token", "HF_MODEL_REPO": "MarisUK/maris-ai-master", "HF_TEXT_MODEL_REPO": "MarisUK/maris-ai-text", "HF_CODEX_MODEL_REPO": "MarisUK/maris-ai-codex", "HF_LOCAL_MODEL_DIR": str(model_root), } ) subprocess.run( ["bash", str(repo_root / "huggingface" / "sync.sh"), "upload-model"], cwd=repo_root, env=env, check=True, capture_output=True, text=True, ) payloads = [ json.loads(line) for line in log_path.read_text(encoding="utf-8").splitlines() if line.strip() ] workspace_uploads = [ payload for payload in payloads if payload["argv"] and payload["argv"][0] == "-" and len(payload["argv"]) >= 5 and payload["argv"][2] == "model" ] card_uploads = [ payload for payload in payloads if payload["argv"] and payload["argv"][0] == "-" and len(payload["argv"]) == 5 and payload["argv"][2] == str(repo_root / "huggingface" / "model-card.md") ] assert [(payload["argv"][1], payload["argv"][4]) for payload in workspace_uploads] == [ ("MarisUK/maris-ai-master", "Maris AI model sync"), ("MarisUK/maris-ai-text", "Maris AI model sync (master)"), ("MarisUK/maris-ai-codex", "Maris AI model sync (coder)"), ] assert all(payload["argv"][3].startswith("/tmp/") for payload in workspace_uploads) assert [(payload["argv"][1], payload["argv"][3], payload["argv"][4]) for payload in card_uploads] == [ ("MarisUK/maris-ai-master", "Maris AI model sync", "token"), ("MarisUK/maris-ai-text", "Maris AI model sync (master)", "token"), ("MarisUK/maris-ai-codex", "Maris AI model sync (coder)", "token"), ] def test_upload_model_publishes_fallback_master_and_coder_dirs_without_manifest(tmp_path: Path) -> None: repo_root = tmp_path / "repo" model_root = repo_root / "core-python" / "output" / "model" (repo_root / "huggingface").mkdir(parents=True) (repo_root / "core-python" / "output").mkdir(parents=True) (model_root / "master").mkdir(parents=True) (model_root / "coder").mkdir(parents=True) (repo_root / "huggingface" / "sync.sh").write_text( SYNC_SCRIPT_PATH.read_text(encoding="utf-8"), encoding="utf-8", ) (repo_root / "huggingface" / "model-card.md").write_text( "# Model card\n", encoding="utf-8", ) (model_root / "config.json").write_text("{}", encoding="utf-8") (model_root / "master" / "config.json").write_text("{}", encoding="utf-8") (model_root / "coder" / "config.json").write_text("{}", encoding="utf-8") fake_bin = tmp_path / "bin" fake_bin.mkdir() log_path = tmp_path / "python-log.jsonl" python_wrapper = fake_bin / "python3" python_wrapper.write_text( "\n".join( [ f"#!{sys.executable}", "import json, os, sys", "log_path = os.environ['HF_TEST_LOG']", "entry = {'argv': sys.argv[1:], 'stdin': sys.stdin.read()}", "with open(log_path, 'a', encoding='utf-8') as handle:", " handle.write(json.dumps(entry) + '\\n')", "if len(sys.argv) > 1 and sys.argv[1] == '-c':", " raise SystemExit(0)", "if len(sys.argv) > 1 and sys.argv[1] == '-':", " if len(sys.argv) < 7 or not sys.argv[2].endswith('branch-suite.json'):", " raise SystemExit(0)", " model_root = sys.argv[3]", " text_repo = sys.argv[5]", " codex_repo = sys.argv[6]", " print(f'master\\t{model_root}/master\\t{text_repo}')", " print(f'coder\\t{model_root}/coder\\t{codex_repo}')", " raise SystemExit(0)", "raise SystemExit(f'unexpected python3 invocation: {sys.argv!r}')", "", ] ), encoding="utf-8", ) python_wrapper.chmod(python_wrapper.stat().st_mode | stat.S_IEXEC) env = os.environ.copy() env.update( { "PATH": f"{fake_bin}:{env['PATH']}", "HF_TEST_LOG": str(log_path), "MARIS_REPO_TOKEN": "token", "HF_MODEL_REPO": "MarisUK/maris-ai-master", "HF_TEXT_MODEL_REPO": "MarisUK/maris-ai-text", "HF_CODEX_MODEL_REPO": "MarisUK/maris-ai-codex", "HF_LOCAL_MODEL_DIR": str(model_root), } ) subprocess.run( ["bash", str(repo_root / "huggingface" / "sync.sh"), "upload-model"], cwd=repo_root, env=env, check=True, capture_output=True, text=True, ) payloads = [ json.loads(line) for line in log_path.read_text(encoding="utf-8").splitlines() if line.strip() ] workspace_uploads = [ payload for payload in payloads if payload["argv"] and payload["argv"][0] == "-" and len(payload["argv"]) >= 5 and payload["argv"][2] == "model" ] assert [(payload["argv"][1], payload["argv"][4]) for payload in workspace_uploads] == [ ("MarisUK/maris-ai-master", "Maris AI model sync"), ("MarisUK/maris-ai-text", "Maris AI model sync (master)"), ("MarisUK/maris-ai-codex", "Maris AI model sync (coder)"), ] def test_upload_dataset_also_publishes_optional_eval_repo(tmp_path: Path) -> None: repo_root = tmp_path / "repo" (repo_root / "huggingface").mkdir(parents=True) (repo_root / "core-python" / "scripts").mkdir(parents=True) (repo_root / "data" / "conversation").mkdir(parents=True) (repo_root / "eval-data" / "conversation").mkdir(parents=True) (repo_root / "huggingface" / "sync.sh").write_text( SYNC_SCRIPT_PATH.read_text(encoding="utf-8"), encoding="utf-8", ) (repo_root / "huggingface" / "dataset-card.md").write_text( "# Dataset card\n", encoding="utf-8", ) (repo_root / "huggingface" / "eval-dataset-card.md").write_text( "# Eval dataset card\n", encoding="utf-8", ) (repo_root / "core-python" / "scripts" / "validate_datasets.py").write_text( "print('ok')\n", encoding="utf-8", ) (repo_root / "data" / "conversation" / "sample.jsonl").write_text( '{"timestamp":"2026-04-06T00:02:00Z","type":"conversation","session_id":"main-1","user":"u","assistant":"a","language":"lv","source":"test"}\n', encoding="utf-8", ) (repo_root / "eval-data" / "conversation" / "sample.jsonl").write_text( '{"timestamp":"2026-04-06T00:03:00Z","type":"conversation","session_id":"eval-1","user":"u","assistant":"a","language":"lv","source":"test"}\n', encoding="utf-8", ) fake_bin = tmp_path / "bin" fake_bin.mkdir() log_path = tmp_path / "python-log.jsonl" python_wrapper = fake_bin / "python3" python_wrapper.write_text( "\n".join( [ f"#!{sys.executable}", "import json, os, sys", "log_path = os.environ['HF_TEST_LOG']", "entry = {'argv': sys.argv[1:], 'stdin': sys.stdin.read()}", "with open(log_path, 'a', encoding='utf-8') as handle:", " handle.write(json.dumps(entry) + '\\n')", "if len(sys.argv) > 1 and sys.argv[1] == '-c':", " raise SystemExit(0)", "if len(sys.argv) > 1 and sys.argv[1].endswith('validate_datasets.py'):", " raise SystemExit(0)", "if len(sys.argv) > 1 and sys.argv[1] == '-':", " raise SystemExit(0)", "raise SystemExit(f'unexpected python3 invocation: {sys.argv!r}')", "", ] ), encoding="utf-8", ) python_wrapper.chmod(python_wrapper.stat().st_mode | stat.S_IEXEC) env = os.environ.copy() env.update( { "PATH": f"{fake_bin}:{env['PATH']}", "HF_TEST_LOG": str(log_path), "MARIS_REPO_TOKEN": "token", "HF_DATASET_REPO": "MarisUK/maris-ai-memory", "HF_EVAL_DATASET_REPO": "MarisUK/maris-ai-evals", "HF_LOCAL_DATASET_DIR": str(repo_root / "data"), "HF_LOCAL_EVAL_DATASET_DIR": str(repo_root / "eval-data"), } ) subprocess.run( ["bash", str(repo_root / "huggingface" / "sync.sh"), "upload-dataset"], cwd=repo_root, env=env, check=True, capture_output=True, text=True, ) payloads = [ json.loads(line) for line in log_path.read_text(encoding="utf-8").splitlines() if line.strip() ] upload_payloads = [ payload for payload in payloads if payload["argv"] and payload["argv"][0] == "-" ] validate_payloads = [ payload for payload in payloads if payload["argv"] and payload["argv"][0].endswith("validate_datasets.py") ] assert [payload["argv"][1:] for payload in validate_payloads] == [ ["--profile", "bootstrap", str(repo_root / "data")], ["--profile", "eval", str(repo_root / "eval-data")], ] assert len(upload_payloads) == 4 assert upload_payloads[0]["argv"][1:5] == [ "MarisUK/maris-ai-memory", "dataset", upload_payloads[0]["argv"][3], "Maris AI dataset sync", ] assert upload_payloads[0]["argv"][3].startswith("/tmp/") assert upload_payloads[1]["argv"][1:5] == [ "MarisUK/maris-ai-memory", str(repo_root / "huggingface" / "dataset-card.md"), "Maris AI dataset sync", "token", ] assert upload_payloads[2]["argv"][1:5] == [ "MarisUK/maris-ai-evals", "dataset", upload_payloads[2]["argv"][3], "Maris AI eval dataset sync", ] assert upload_payloads[2]["argv"][3].startswith("/tmp/") assert upload_payloads[3]["argv"][1:5] == [ "MarisUK/maris-ai-evals", str(repo_root / "huggingface" / "eval-dataset-card.md"), "Maris AI eval dataset sync", "token", ] def test_upload_dataset_can_publish_global_memory_eval_and_benchmark_repos( tmp_path: Path, ) -> None: repo_root = tmp_path / "repo" (repo_root / "huggingface").mkdir(parents=True) (repo_root / "core-python" / "scripts").mkdir(parents=True) for dataset_dir in ("data", "eval-data", "benchmark-data"): (repo_root / dataset_dir / "conversation").mkdir(parents=True) (repo_root / "huggingface" / "sync.sh").write_text( SYNC_SCRIPT_PATH.read_text(encoding="utf-8"), encoding="utf-8", ) (repo_root / "huggingface" / "dataset-card.md").write_text( "# Dataset card\n", encoding="utf-8", ) (repo_root / "huggingface" / "global-memory-dataset-card.md").write_text( "# Global memory dataset card\n", encoding="utf-8", ) (repo_root / "huggingface" / "eval-dataset-card.md").write_text( "# Eval dataset card\n", encoding="utf-8", ) (repo_root / "huggingface" / "benchmark-dataset-card.md").write_text( "# Benchmark dataset card\n", encoding="utf-8", ) (repo_root / "core-python" / "scripts" / "validate_datasets.py").write_text( "print('ok')\n", encoding="utf-8", ) (repo_root / "data" / "conversation" / "sample.jsonl").write_text( '{"timestamp":"2026-04-06T00:02:00Z","type":"conversation","session_id":"main-1","user":"u","assistant":"a","language":"lv","source":"test"}\n', encoding="utf-8", ) eval_record = ( '{"timestamp":"2026-04-06T00:03:00Z","type":"conversation","session_id":"eval-1",' '"user":"u","assistant":"a","language":"lv","source":"test","task_id":"eval-1",' '"benchmark_version":"maris-evals-v1","suite":"sanity","difficulty":"easy",' '"evaluation_mode":"reference-review","risk_level":"low","expected_behavior":["ok"],' '"scoring_hints":["ok"],"reference_answer":"a","acceptance_criteria":["ok"]}\n' ) (repo_root / "eval-data" / "conversation" / "sample.jsonl").write_text( eval_record, encoding="utf-8", ) benchmark_record = ( '{"timestamp":"2026-04-06T00:04:00Z","type":"conversation","session_id":"bench-1",' '"user":"u","assistant":"a","language":"en","source":"test","task_id":"bench-1",' '"benchmark_version":"maris-benchmark-v1","suite":"release","difficulty":"medium",' '"evaluation_mode":"reference-review","risk_level":"high","expected_behavior":["ok"],' '"scoring_hints":["ok"],"reference_answer":"a","acceptance_criteria":["ok"]}\n' ) (repo_root / "benchmark-data" / "conversation" / "sample.jsonl").write_text( benchmark_record, encoding="utf-8", ) fake_bin = tmp_path / "bin" fake_bin.mkdir() log_path = tmp_path / "python-log.jsonl" python_wrapper = fake_bin / "python3" python_wrapper.write_text( "\n".join( [ f"#!{sys.executable}", "import json, os, sys", "log_path = os.environ['HF_TEST_LOG']", "entry = {'argv': sys.argv[1:], 'stdin': sys.stdin.read()}", "with open(log_path, 'a', encoding='utf-8') as handle:", " handle.write(json.dumps(entry) + '\\n')", "if len(sys.argv) > 1 and sys.argv[1] == '-c':", " raise SystemExit(0)", "if len(sys.argv) > 1 and sys.argv[1].endswith('validate_datasets.py'):", " raise SystemExit(0)", "if len(sys.argv) > 1 and sys.argv[1] == '-':", " raise SystemExit(0)", "raise SystemExit(f'unexpected python3 invocation: {sys.argv!r}')", "", ] ), encoding="utf-8", ) python_wrapper.chmod(python_wrapper.stat().st_mode | stat.S_IEXEC) env = os.environ.copy() env.update( { "PATH": f"{fake_bin}:{env['PATH']}", "HF_TEST_LOG": str(log_path), "MARIS_REPO_TOKEN": "token", "HF_DATASET_REPO": "MarisUK/maris-ai-memory", "HF_GLOBAL_MEMORY_REPO": "MarisUK/maris-ai-memory", "HF_EVAL_DATASET_REPO": "MarisUK/maris-ai-evals", "HF_BENCHMARK_DATASET_REPO": "MarisUK/maris-ai-benchmark", "HF_LOCAL_DATASET_DIR": str(repo_root / "data"), "HF_LOCAL_GLOBAL_DATASET_DIR": str(repo_root / "data"), "HF_LOCAL_EVAL_DATASET_DIR": str(repo_root / "eval-data"), "HF_LOCAL_BENCHMARK_DATASET_DIR": str(repo_root / "benchmark-data"), } ) result = subprocess.run( ["bash", str(repo_root / "huggingface" / "sync.sh"), "upload-dataset"], cwd=repo_root, env=env, check=True, capture_output=True, text=True, ) payloads = [ json.loads(line) for line in log_path.read_text(encoding="utf-8").splitlines() if line.strip() ] upload_payloads = [ payload for payload in payloads if payload["argv"] and payload["argv"][0] == "-" ] validate_payloads = [ payload for payload in payloads if payload["argv"] and payload["argv"][0].endswith("validate_datasets.py") ] assert [payload["argv"][1:] for payload in validate_payloads] == [ ["--profile", "bootstrap", str(repo_root / "data")], ["--profile", "eval", str(repo_root / "eval-data")], ["--profile", "eval", str(repo_root / "benchmark-data")], ] assert len(upload_payloads) == 6 assert "IzlaiÅžu global memory upload" in result.stdout assert upload_payloads[4]["argv"][1:5] == [ "MarisUK/maris-ai-benchmark", "dataset", upload_payloads[4]["argv"][3], "Maris AI benchmark dataset sync", ] assert upload_payloads[5]["argv"][1:5] == [ "MarisUK/maris-ai-benchmark", str(repo_root / "huggingface" / "benchmark-dataset-card.md"), "Maris AI benchmark dataset sync", "token", ] def test_upload_dataset_uses_bundled_eval_data_by_default(tmp_path: Path) -> None: repo_root = tmp_path / "repo" (repo_root / "huggingface").mkdir(parents=True) (repo_root / "core-python" / "scripts").mkdir(parents=True) (repo_root / "data" / "conversation").mkdir(parents=True) (repo_root / "eval-data" / "conversation").mkdir(parents=True) (repo_root / "huggingface" / "sync.sh").write_text( SYNC_SCRIPT_PATH.read_text(encoding="utf-8"), encoding="utf-8", ) (repo_root / "huggingface" / "dataset-card.md").write_text( "# Dataset card\n", encoding="utf-8", ) (repo_root / "huggingface" / "eval-dataset-card.md").write_text( "# Eval dataset card\n", encoding="utf-8", ) (repo_root / "core-python" / "scripts" / "validate_datasets.py").write_text( "print('ok')\n", encoding="utf-8", ) (repo_root / "data" / "conversation" / "sample.jsonl").write_text( '{"timestamp":"2026-04-06T00:02:00Z","type":"conversation","session_id":"main-1","user":"u","assistant":"a","language":"lv","source":"test"}\n', encoding="utf-8", ) (repo_root / "eval-data" / "conversation" / "sample.jsonl").write_text( '{"timestamp":"2026-04-06T00:03:00Z","type":"conversation","session_id":"eval-1","user":"u","assistant":"a","language":"lv","source":"test"}\n', encoding="utf-8", ) fake_bin = tmp_path / "bin" fake_bin.mkdir() log_path = tmp_path / "python-log.jsonl" python_wrapper = fake_bin / "python3" python_wrapper.write_text( "\n".join( [ f"#!{sys.executable}", "import json, os, sys", "log_path = os.environ['HF_TEST_LOG']", "entry = {'argv': sys.argv[1:], 'stdin': sys.stdin.read()}", "with open(log_path, 'a', encoding='utf-8') as handle:", " handle.write(json.dumps(entry) + '\\n')", "if len(sys.argv) > 1 and sys.argv[1] == '-c':", " raise SystemExit(0)", "if len(sys.argv) > 1 and sys.argv[1].endswith('validate_datasets.py'):", " raise SystemExit(0)", "if len(sys.argv) > 1 and sys.argv[1] == '-':", " raise SystemExit(0)", "raise SystemExit(f'unexpected python3 invocation: {sys.argv!r}')", "", ] ), encoding="utf-8", ) python_wrapper.chmod(python_wrapper.stat().st_mode | stat.S_IEXEC) env = os.environ.copy() env.update( { "PATH": f"{fake_bin}:{env['PATH']}", "HF_TEST_LOG": str(log_path), "MARIS_REPO_TOKEN": "token", "HF_DATASET_REPO": "MarisUK/maris-ai-memory", "HF_EVAL_DATASET_REPO": "MarisUK/maris-ai-evals", "HF_LOCAL_DATASET_DIR": str(repo_root / "data"), } ) subprocess.run( ["bash", str(repo_root / "huggingface" / "sync.sh"), "upload-dataset"], cwd=repo_root, env=env, check=True, capture_output=True, text=True, ) payloads = [ json.loads(line) for line in log_path.read_text(encoding="utf-8").splitlines() if line.strip() ] validate_payloads = [ payload for payload in payloads if payload["argv"] and payload["argv"][0].endswith("validate_datasets.py") ] assert [payload["argv"][1:] for payload in validate_payloads] == [ ["--profile", "bootstrap", str(repo_root / "data")], ["--profile", "eval", str(repo_root / "eval-data")], ] @pytest.mark.parametrize("command", ["upload-benchmark-dataset", "upload-benchmark-data"]) def test_upload_benchmark_dataset_only_publishes_benchmark_repo( tmp_path: Path, command: str ) -> None: repo_root = tmp_path / "repo" (repo_root / "huggingface").mkdir(parents=True) (repo_root / "core-python" / "scripts").mkdir(parents=True) (repo_root / "benchmark-data" / "conversation").mkdir(parents=True) (repo_root / "huggingface" / "sync.sh").write_text( SYNC_SCRIPT_PATH.read_text(encoding="utf-8"), encoding="utf-8", ) (repo_root / "huggingface" / "benchmark-dataset-card.md").write_text( "# Benchmark dataset card\n", encoding="utf-8", ) (repo_root / "core-python" / "scripts" / "validate_datasets.py").write_text( "print('ok')\n", encoding="utf-8", ) (repo_root / "benchmark-data" / "conversation" / "sample.jsonl").write_text( '{"timestamp":"2026-04-06T00:04:00Z","type":"conversation","session_id":"bench-1","user":"u","assistant":"a","language":"en","source":"test","task_id":"bench-1","benchmark_version":"maris-benchmark-v1","suite":"release","difficulty":"medium","evaluation_mode":"reference-review","risk_level":"high","expected_behavior":["ok"],"scoring_hints":["ok"],"reference_answer":"a","acceptance_criteria":["ok"]}\n', encoding="utf-8", ) fake_bin = tmp_path / "bin" fake_bin.mkdir() log_path = tmp_path / "python-log.jsonl" python_wrapper = fake_bin / "python3" python_wrapper.write_text( "\n".join( [ f"#!{sys.executable}", "import json, os, sys", "log_path = os.environ['HF_TEST_LOG']", "entry = {'argv': sys.argv[1:], 'stdin': sys.stdin.read()}", "with open(log_path, 'a', encoding='utf-8') as handle:", " handle.write(json.dumps(entry) + '\\n')", "if len(sys.argv) > 1 and sys.argv[1] == '-c':", " raise SystemExit(0)", "if len(sys.argv) > 1 and sys.argv[1].endswith('validate_datasets.py'):", " raise SystemExit(0)", "if len(sys.argv) > 1 and sys.argv[1] == '-':", " raise SystemExit(0)", "raise SystemExit(f'unexpected python3 invocation: {sys.argv!r}')", "", ] ), encoding="utf-8", ) python_wrapper.chmod(python_wrapper.stat().st_mode | stat.S_IEXEC) env = os.environ.copy() env.update( { "PATH": f"{fake_bin}:{env['PATH']}", "HF_TEST_LOG": str(log_path), "MARIS_REPO_TOKEN": "token", "HF_DATASET_REPO": "MarisUK/maris-ai-memory", "HF_BENCHMARK_DATASET_REPO": "MarisUK/maris-ai-benchmark", "HF_LOCAL_BENCHMARK_DATASET_DIR": str(repo_root / "benchmark-data"), } ) subprocess.run( ["bash", str(repo_root / "huggingface" / "sync.sh"), command], cwd=repo_root, env=env, check=True, capture_output=True, text=True, ) payloads = [ json.loads(line) for line in log_path.read_text(encoding="utf-8").splitlines() if line.strip() ] upload_payloads = [ payload for payload in payloads if payload["argv"] and payload["argv"][0] == "-" ] validate_payloads = [ payload for payload in payloads if payload["argv"] and payload["argv"][0].endswith("validate_datasets.py") ] assert [payload["argv"][1:] for payload in validate_payloads] == [ ["--profile", "eval", str(repo_root / "benchmark-data")] ] assert len(upload_payloads) == 2 assert upload_payloads[0]["argv"][1:5] == [ "MarisUK/maris-ai-benchmark", "dataset", upload_payloads[0]["argv"][3], "Maris AI benchmark dataset sync", ] assert upload_payloads[1]["argv"][1:5] == [ "MarisUK/maris-ai-benchmark", str(repo_root / "huggingface" / "benchmark-dataset-card.md"), "Maris AI benchmark dataset sync", "token", ] @pytest.mark.parametrize("command", ["upload-eval-dataset", "upload-evals-data"]) def test_upload_eval_dataset_only_publishes_eval_repo(tmp_path: Path, command: str) -> None: repo_root = tmp_path / "repo" (repo_root / "huggingface").mkdir(parents=True) (repo_root / "core-python" / "scripts").mkdir(parents=True) (repo_root / "data" / "conversation").mkdir(parents=True) (repo_root / "eval-data" / "conversation").mkdir(parents=True) (repo_root / "huggingface" / "sync.sh").write_text( SYNC_SCRIPT_PATH.read_text(encoding="utf-8"), encoding="utf-8", ) (repo_root / "huggingface" / "dataset-card.md").write_text( "# Dataset card\n", encoding="utf-8", ) (repo_root / "huggingface" / "eval-dataset-card.md").write_text( "# Eval dataset card\n", encoding="utf-8", ) (repo_root / "core-python" / "scripts" / "validate_datasets.py").write_text( "print('ok')\n", encoding="utf-8", ) (repo_root / "data" / "conversation" / "sample.jsonl").write_text( '{"timestamp":"2026-04-06T00:02:00Z","type":"conversation","session_id":"main-1","user":"u","assistant":"a","language":"lv","source":"test"}\n', encoding="utf-8", ) (repo_root / "eval-data" / "conversation" / "sample.jsonl").write_text( '{"timestamp":"2026-04-06T00:03:00Z","type":"conversation","session_id":"eval-1","user":"u","assistant":"a","language":"lv","source":"test"}\n', encoding="utf-8", ) fake_bin = tmp_path / "bin" fake_bin.mkdir() log_path = tmp_path / "python-log.jsonl" python_wrapper = fake_bin / "python3" python_wrapper.write_text( "\n".join( [ f"#!{sys.executable}", "import json, os, sys", "log_path = os.environ['HF_TEST_LOG']", "entry = {'argv': sys.argv[1:], 'stdin': sys.stdin.read()}", "with open(log_path, 'a', encoding='utf-8') as handle:", " handle.write(json.dumps(entry) + '\\n')", "if len(sys.argv) > 1 and sys.argv[1] == '-c':", " raise SystemExit(0)", "if len(sys.argv) > 1 and sys.argv[1].endswith('validate_datasets.py'):", " raise SystemExit(0)", "if len(sys.argv) > 1 and sys.argv[1] == '-':", " raise SystemExit(0)", "raise SystemExit(f'unexpected python3 invocation: {sys.argv!r}')", "", ] ), encoding="utf-8", ) python_wrapper.chmod(python_wrapper.stat().st_mode | stat.S_IEXEC) env = os.environ.copy() env.update( { "PATH": f"{fake_bin}:{env['PATH']}", "HF_TEST_LOG": str(log_path), "MARIS_REPO_TOKEN": "token", "HF_DATASET_REPO": "MarisUK/maris-ai-memory", "HF_EVAL_DATASET_REPO": "MarisUK/maris-ai-evals", "HF_LOCAL_DATASET_DIR": str(repo_root / "data"), "HF_LOCAL_EVAL_DATASET_DIR": str(repo_root / "eval-data"), } ) subprocess.run( ["bash", str(repo_root / "huggingface" / "sync.sh"), command], cwd=repo_root, env=env, check=True, capture_output=True, text=True, ) payloads = [ json.loads(line) for line in log_path.read_text(encoding="utf-8").splitlines() if line.strip() ] upload_payloads = [ payload for payload in payloads if payload["argv"] and payload["argv"][0] == "-" ] validate_payloads = [ payload for payload in payloads if payload["argv"] and payload["argv"][0].endswith("validate_datasets.py") ] assert [payload["argv"][1:] for payload in validate_payloads] == [ ["--profile", "eval", str(repo_root / "eval-data")] ] assert len(upload_payloads) == 2 assert upload_payloads[0]["argv"][1:5] == [ "MarisUK/maris-ai-evals", "dataset", upload_payloads[0]["argv"][3], "Maris AI eval dataset sync", ] assert upload_payloads[0]["argv"][3].startswith("/tmp/") assert upload_payloads[1]["argv"][1:5] == [ "MarisUK/maris-ai-evals", str(repo_root / "huggingface" / "eval-dataset-card.md"), "Maris AI eval dataset sync", "token", ]