Model Card for Mimir-1.6B

How to Get Started with the Model

Below you can find an example of model usage. To facilitate its usage, we recommend to follow these steps:

huggingface-cli download mimir-lcm/Mimir-1.6B --local-dir mimir-lcm/Mimir-1.6B 
git clone https://github.com/facebookresearch/large_concept_model.git
mv large_concept_model/lcm .
pip install torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cu121 --upgrade
pip install fairseq2==v0.3.0rc1 --pre --extra-index-url  https://fair.pkg.atmeta.com/fairseq2/whl/rc/pt2.5.1/cu121 --upgrade
pip install omegaconf==2.3.0
pip install sonar-space==0.3.2
pip install wtpsplit==2.1.2

Now you should be able to run the following:

import lcm
import torch
from pathlib import Path

from lcm.models.two_tower_diffusion_lcm.builder import (
    create_two_tower_diffusion_lcm_model,
)
from lcm.models.two_tower_diffusion_lcm.archs import two_tower_diffusion_lcm_1_6B
from lcm.inference.two_tower_diffusion_lcm.generator import (
    TwoTowerDiffusionLCMGenerator,
    DiffusionLCMGeneratorOptions,
)
from lcm.datasets.batch import EmbeddingsBatch
from sonar.inference_pipelines.text import TextToEmbeddingModelPipeline, EmbeddingToTextModelPipeline

from wtpsplit import SaT

lcm.setup_fairseq2()

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

from lcm.models.two_tower_diffusion_lcm.builder import TwoTowerDiffusionLCModel

_original_sample_fn = TwoTowerDiffusionLCModel.sample_initial_noise_vectors

def _patched_sample_fn(self, batch_size: int):
    latents = _original_sample_fn(self, batch_size)
    return latents.to(dtype=self.dtype)

TwoTowerDiffusionLCModel.sample_initial_noise_vectors = _patched_sample_fn

CHECKPOINT_PATH = "mimir-lcm/Mimir-1.6B/model.pt"
INFERENCE_DTYPE = torch.float16

TEXT_DECODER = EmbeddingToTextModelPipeline(decoder="text_sonar_basic_decoder", tokenizer="text_sonar_basic_decoder", device=torch.device(DEVICE))
TEXT_EMBEDDER = TextToEmbeddingModelPipeline(encoder="text_sonar_basic_encoder", tokenizer="text_sonar_basic_encoder", device=torch.device(DEVICE))

def decode_embeddings(embeddings):
    
    embeddings = embeddings.to(device=DEVICE, dtype=torch.float32)
    
    print("Decoding...")
    results = TEXT_DECODER.predict(
        embeddings, 
        target_lang="eng_Latn"
    )
    
    return results

def get_eos_vector():
    return TEXT_EMBEDDER.predict(["End of text."], source_lang="eng_Latn").squeeze().to(device=DEVICE, dtype=INFERENCE_DTYPE)

def load_two_tower_model(checkpoint_path, device="cuda"):
    
    config = two_tower_diffusion_lcm_1_6B()

    print("Building model structure...")
    model = create_two_tower_diffusion_lcm_model(
        config, 
        device=torch.device(device),
        dtype=INFERENCE_DTYPE
    )

    print(f"Loading weights from {checkpoint_path}...")
    state_dict = torch.load(checkpoint_path, map_location=device)
    
    if "model" in state_dict:
        state_dict = state_dict["model"]
        
    model.load_state_dict(state_dict, strict=True)
    
    model.eval()
    model.to(device=DEVICE, dtype=INFERENCE_DTYPE)
    print("Model loaded successfully.")
    return model

def run_inference(model, prompt_embeddings, device="cuda"):

    options = DiffusionLCMGeneratorOptions(
        eos_threshold=0.9,
        inference_timesteps=40,
        initial_noise_scale=0.6,
        guidance_scale=1.5,
        guidance_rescale=0.7,
        epsilon_scaling=1.00045,
        stop_on_repetition_cosine_threshold=0.9,
        seed=42,
    )
    
    generator = TwoTowerDiffusionLCMGenerator(model, options, eos_vec=get_eos_vector())

    seqs = prompt_embeddings.to(device)
    batch_input = EmbeddingsBatch(seqs=seqs, padding_mask=None)

    print("Running generation...")
    output = generator(batch_input)
    
    return output

if __name__ == "__main__":

    raw_prompt_text = "A large language model (LLM) is a neural network trained on a vast amount of text for natural language processing tasks, especially language generation. LLMs can generate, summarize, translate and parse text in many contexts, and are a foundational technology behind modern chatbots. Biased or inaccurate training data can make an LLM's output less reliable."

    model = load_two_tower_model(CHECKPOINT_PATH, DEVICE)
    
    with torch.no_grad():

        sat_model = SaT("segment-any-text/sat-3l")
        if torch.cuda.is_available():
            sat_model.half().to(DEVICE)
            
        split_outputs = list(sat_model.split([raw_prompt_text], threshold=0.02))
        sentences = [s.strip() for s in split_outputs[0] if s.strip()]

        print(sentences)

        prompt = TEXT_EMBEDDER.predict(sentences, source_lang="eng_Latn", batch_size=1024)
        prompt = prompt.to(device=DEVICE, dtype=INFERENCE_DTYPE)
        prompt = prompt.unsqueeze(0)
    
    results = run_inference(model, prompt, DEVICE)
    
    for j, hyp in enumerate(results.hypotheses[0]):
        print(decode_embeddings(hyp.seq)[prompt.shape[1]:])

Make sure to change the source language from "eng_Latn" to the one you want to perform inference with.

Citation

If you use this model in your research, please cite the following:

@misc{musacchio2026mimirlargescalemultilingualconcept,
      title={Mimir: Large-scale Multilingual Concept Modeling}, 
      author={Elio Musacchio and Lucia Siciliani and Pierpaolo Basile},
      year={2026},
      eprint={2605.25263},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2605.25263}, 
}
Downloads last month

-

Downloads are not tracked for this model. How to track
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support

Datasets used to train mimir-lcm/Mimir-1.6B

Paper for mimir-lcm/Mimir-1.6B