File size: 1,390 Bytes
ca67673
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
"""Fetch the Laguna-XS.2 *source* (modeling code + config + tokenizer) — NOT the weights.

This pulls only the small Python/JSON files that define the architecture, so we can
instantiate a tiny random-weight Laguna on CPU for local development of the loop
wrapper. The 33B / ~66GB of `.safetensors` weights are deliberately excluded; those
are downloaded once, on the GPU box, for the real evaluation.

Run from the repo root:  uv run python scripts/fetch_laguna_src.py
"""

from pathlib import Path

from huggingface_hub import snapshot_download

REPO_ID = "poolside/Laguna-XS.2"
DEST = Path(__file__).resolve().parent.parent / "laguna_src"


def main() -> None:
    path = snapshot_download(
        REPO_ID,
        # everything *except* weights (*.safetensors are excluded by omission)
        allow_patterns=[
            "*.py",
            "config.json",
            "generation_config.json",
            "tokenizer*",
            "special_tokens_map.json",
        ],
        local_dir=str(DEST),
    )
    # Make it importable as a package (the modeling file uses a relative import
    # `from .configuration_laguna import LagunaConfig`).
    (DEST / "__init__.py").touch()
    print(f"Fetched Laguna source to {path}")
    for f in sorted(DEST.iterdir()):
        if f.is_file():
            print(f"  {f.stat().st_size:>10,}  {f.name}")


if __name__ == "__main__":
    main()