import pandas as pd import pytest from backend.ml_module.services.storage_service import MLStorageService @pytest.fixture def storage_service(monkeypatch): monkeypatch.setattr(MLStorageService, "_initialize_client", lambda self: None) return MLStorageService() def test_load_dataframe_by_asset_reference_uses_canonical_asset_path(storage_service, monkeypatch): expected_df = pd.DataFrame({"a": [1], "b": [2]}) calls = {} def _fake_load_dataframe(path: str, chunksize=None): calls["path"] = path calls["chunksize"] = chunksize return expected_df monkeypatch.setattr(storage_service, "load_dataframe", _fake_load_dataframe) result = storage_service.load_dataframe_by_asset_reference( tenant_id="tenant_abc", asset_id="asset_xyz", version=2, filename="train.parquet", chunksize=None, ) assert calls["path"] == "tenant_abc/files/asset_xyz/2/train.parquet" assert result.equals(expected_df) def test_load_dataframe_by_asset_reference_rejects_invalid_filename(storage_service): with pytest.raises(ValueError): storage_service.load_dataframe_by_asset_reference( tenant_id="tenant_abc", asset_id="asset_xyz", version=1, filename="../escape.csv", )