Spaces:
Sleeping
Sleeping
File size: 7,383 Bytes
b82e5c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 |
#!/usr/bin/env python3
"""
Tests for scripts/push_to_huggingface.py focusing on model card creation/upload.
We mock Hugging Face Hub interactions and create dummy model folders to verify:
- Repo id resolution via whoami
- Repository creation call
- README.md upload with expected content (fallback simple card path)
- Uploading of model files from the directory
"""
import sys
import types
from pathlib import Path
def _repo_root() -> Path:
return Path(__file__).resolve().parents[1]
def _add_scripts_to_path() -> None:
scripts_dir = _repo_root() / "scripts"
if str(scripts_dir) not in sys.path:
sys.path.insert(0, str(scripts_dir))
def _make_full_model_dir(base: Path) -> Path:
model_dir = base / "full_model"
model_dir.mkdir(parents=True, exist_ok=True)
(model_dir / "config.json").write_text("{}", encoding="utf-8")
# Create an empty weight file to satisfy validation
(model_dir / "model.safetensors").write_bytes(b"")
return model_dir
def _make_lora_model_dir(base: Path) -> Path:
model_dir = base / "lora_model"
model_dir.mkdir(parents=True, exist_ok=True)
(model_dir / "adapter_config.json").write_text("{}", encoding="utf-8")
(model_dir / "adapter_model.bin").write_bytes(b"\x00")
return model_dir
def test_push_model_card_full_model(monkeypatch, tmp_path):
_add_scripts_to_path()
import push_to_huggingface as mod
# Ensure module thinks HF is available and patch API + functions
monkeypatch.setattr(mod, "HF_AVAILABLE", True, raising=False)
create_repo_calls = []
upload_file_calls = []
class DummyHfApi:
def __init__(self, token=None):
self.token = token
def whoami(self):
return {"name": "testuser"}
def fake_create_repo(*, repo_id, token=None, private=False, exist_ok=False, repo_type=None):
create_repo_calls.append({
"repo_id": repo_id,
"token": token,
"private": private,
"exist_ok": exist_ok,
"repo_type": repo_type,
})
def fake_upload_file(*, path_or_fileobj, path_in_repo, repo_id, token, repo_type=None):
path = Path(path_or_fileobj)
content = None
if path.exists() and path.is_file():
try:
content = path.read_text(encoding="utf-8")
except Exception:
content = None
upload_file_calls.append({
"path_in_repo": path_in_repo,
"repo_id": repo_id,
"token": token,
"repo_type": repo_type,
"content": content,
"local_path": str(path),
})
monkeypatch.setattr(mod, "HfApi", DummyHfApi, raising=False)
monkeypatch.setattr(mod, "create_repo", fake_create_repo, raising=False)
monkeypatch.setattr(mod, "upload_file", fake_upload_file, raising=False)
# Prepare dummy full model directory
model_dir = _make_full_model_dir(tmp_path)
pusher = mod.HuggingFacePusher(
model_path=str(model_dir),
repo_name="my-repo",
token="fake-token",
private=True,
author_name="Tester",
model_description="Desc",
model_name="BaseModel",
dataset_name="DatasetX",
)
# Execute push (this should use fallback simple model card)
ok = pusher.push_model(
training_config={"param": 1},
results={"train_loss": 0.1, "eval_loss": 0.2, "perplexity": 9.9},
)
assert ok is True
# Repo creation was called with resolved user prefix
assert any(c["repo_id"] == "testuser/my-repo" for c in create_repo_calls)
# README upload occurred and contains either generator or fallback content (full model)
readme_calls = [c for c in upload_file_calls if c["path_in_repo"] == "README.md"]
assert readme_calls, "README.md was not uploaded"
readme_content = readme_calls[-1]["content"] or ""
assert (
"fine-tuned Voxtral ASR model" in readme_content
or "SmolLM3" in readme_content
or "Model Details" in readme_content
)
assert "DatasetX" in readme_content or "Training Configuration" in readme_content
# Model files were uploaded (config and weights)
uploaded_paths = {c["path_in_repo"] for c in upload_file_calls}
assert "config.json" in uploaded_paths
assert "model.safetensors" in uploaded_paths
def test_push_model_card_lora_model_fallback(monkeypatch, tmp_path):
_add_scripts_to_path()
import push_to_huggingface as mod
# Ensure module thinks HF is available and patch API + functions
monkeypatch.setattr(mod, "HF_AVAILABLE", True, raising=False)
upload_file_calls = []
class DummyHfApi:
def __init__(self, token=None):
self.token = token
def whoami(self):
return {"username": "anotheruser"}
def fake_create_repo(*, repo_id, token=None, private=False, exist_ok=False, repo_type=None):
return None
def fake_upload_file(*, path_or_fileobj, path_in_repo, repo_id, token, repo_type=None):
path = Path(path_or_fileobj)
content = None
if path.exists() and path.is_file():
try:
content = path.read_text(encoding="utf-8")
except Exception:
content = None
upload_file_calls.append({
"path_in_repo": path_in_repo,
"repo_id": repo_id,
"content": content,
})
monkeypatch.setattr(mod, "HfApi", DummyHfApi, raising=False)
monkeypatch.setattr(mod, "create_repo", fake_create_repo, raising=False)
monkeypatch.setattr(mod, "upload_file", fake_upload_file, raising=False)
# Insert a dummy generate_model_card module that raises in generate to force fallback
dummy_mod = types.ModuleType("generate_model_card")
class RaisingGen:
def __init__(self, *args, **kwargs):
pass
def generate_model_card(self, variables):
raise RuntimeError("force fallback")
def default_vars():
return {}
dummy_mod.ModelCardGenerator = RaisingGen
dummy_mod.create_default_variables = default_vars
sys.modules["generate_model_card"] = dummy_mod
# Prepare dummy lora model directory
model_dir = _make_lora_model_dir(tmp_path)
pusher = mod.HuggingFacePusher(
model_path=str(model_dir),
repo_name="my-lora-repo",
token="fake-token",
private=False,
author_name="Tester",
model_description="Desc",
model_name="BaseModel",
dataset_name="DatasetY",
)
ok = pusher.push_model(training_config={}, results={})
assert ok is True
# README upload occurred and contains either generator or fallback content (LoRA)
readme_calls = [c for c in upload_file_calls if c["path_in_repo"] == "README.md"]
assert readme_calls, "README.md was not uploaded"
readme_content = readme_calls[-1]["content"] or ""
assert (
"LoRA adapter for Voxtral ASR" in readme_content
or "SmolLM3" in readme_content
or "Model Details" in readme_content
)
assert "DatasetY" in readme_content or "Training Configuration" in readme_content
# LoRA files uploaded
uploaded_paths = {Path(c.get("local_path", "")).name for c in upload_file_calls if c.get("local_path")}
assert any(name.startswith("adapter_") for name in uploaded_paths)
|