Spaces:
Running on Zero
Running on Zero
Bellok
feat: Add support for remote pack loading with environment configuration and enhanced metadata handling
1c68bde | from unittest.mock import patch | |
| def test_build_allow_patterns_limits_chunked_remote_pack_downloads(): | |
| from warbler_cda.remote_pack_loader import RemotePackLoader | |
| repo_files = [ | |
| "packs/warbler-pack-core/package.json", | |
| "packs/warbler-pack-core/pack/templates.json", | |
| "packs/warbler-pack-hf-arxiv/package.json", | |
| "packs/warbler-pack-hf-arxiv/warbler-pack-hf-arxiv-chunk-001.jsonl", | |
| "packs/warbler-pack-hf-arxiv/warbler-pack-hf-arxiv-chunk-002.jsonl", | |
| "packs/warbler-pack-hf-arxiv/warbler-pack-hf-arxiv-chunk-003.jsonl", | |
| ] | |
| loader = RemotePackLoader( | |
| repo_id="Bellok/warbler-cda-corpus", | |
| max_documents_per_pack=5000, | |
| ) | |
| with patch.object(loader, "_list_repo_files", return_value=repo_files), patch.object( | |
| loader, | |
| "_load_pack_metadata", | |
| side_effect=lambda pack_name: { | |
| "chunked": True, | |
| "docs_per_chunk": 50000, | |
| } | |
| if pack_name == "warbler-pack-hf-arxiv" | |
| else {"chunked": False}, | |
| ): | |
| allow_patterns = loader.build_allow_patterns() | |
| assert "packs/warbler-pack-core/package.json" in allow_patterns | |
| assert "packs/warbler-pack-core/pack/templates.json" in allow_patterns | |
| assert "packs/warbler-pack-hf-arxiv/package.json" in allow_patterns | |
| assert "packs/warbler-pack-hf-arxiv/warbler-pack-hf-arxiv-chunk-001.jsonl" in allow_patterns | |
| assert "packs/warbler-pack-hf-arxiv/warbler-pack-hf-arxiv-chunk-002.jsonl" not in allow_patterns | |
| def test_from_environment_applies_hosted_defaults_for_remote_loader(): | |
| from warbler_cda.remote_pack_loader import RemotePackLoader | |
| with patch.dict("os.environ", {"SPACE_ID": "Bellok/warbler-cda"}, clear=False): | |
| loader = RemotePackLoader.from_environment("Bellok/warbler-cda-corpus") | |
| assert loader.max_documents_per_pack == 5000 | |
| assert "warbler-pack-hf-tinystories" in loader.exclude_packs |