Fix loading example to match checkpoint format

Files changed (1) hide show

README.md CHANGED Viewed

@@ -88,25 +88,35 @@ pip install torch transformers pillow safetensors
 import torch
 from huggingface_hub import hf_hub_download
-# Download checkpoint
 checkpoint_path = hf_hub_download(
     repo_id="llm-semantic-router/multi-modal-embed-small",
     filename="model.pt"
 )
-# Load model
 import sys
 sys.path.append("path/to/2DMSE-Multimodal-Embedder")
-from src.models import create_multimodal_model
-model = create_multimodal_model(
-    text_encoder_name="sentence-transformers/all-MiniLM-L6-v2",
-    image_encoder_name="google/siglip-base-patch16-512",
-    audio_encoder_name="openai/whisper-tiny",
-    output_dim=384,
 )
 state_dict = torch.load(checkpoint_path, map_location="cpu")
-model.load_state_dict(state_dict["model_state_dict"])
 model.eval()
 ```

 import torch
 from huggingface_hub import hf_hub_download
+# Download checkpoint and config
 checkpoint_path = hf_hub_download(
     repo_id="llm-semantic-router/multi-modal-embed-small",
     filename="model.pt"
 )
+config_path = hf_hub_download(
+    repo_id="llm-semantic-router/multi-modal-embed-small",
+    filename="config.json"
+)
+# Load model with matching architecture
+import json
 import sys
 sys.path.append("path/to/2DMSE-Multimodal-Embedder")
+from src.models import MultimodalEmbedder
+with open(config_path) as f:
+    config = json.load(f)
+model = MultimodalEmbedder(
+    text_encoder_name=config["text_encoder_name"],
+    image_encoder_name=config["image_encoder_name"],
+    audio_encoder_name=config["audio_encoder_name"],
+    output_dim=config["output_dim"],
+    fusion_type=config["fusion_type"],
+    num_fusion_layers=config["num_fusion_layers"],
 )
 state_dict = torch.load(checkpoint_path, map_location="cpu")
+model.load_state_dict(state_dict)
 model.eval()
 ```