Mimir: Large-scale Multilingual Concept Modeling
Paper • 2605.25263 • Published
Below you can find an example of model usage. To facilitate its usage, we recommend to follow these steps:
huggingface-cli download mimir-lcm/Mimir-1.6B-Instruct --local-dir mimir-lcm/Mimir-1.6B-Instruct
git clone https://github.com/facebookresearch/large_concept_model.git
mv large_concept_model/lcm .
pip install torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cu121 --upgrade
pip install fairseq2==v0.3.0rc1 --pre --extra-index-url https://fair.pkg.atmeta.com/fairseq2/whl/rc/pt2.5.1/cu121 --upgrade
pip install omegaconf==2.3.0
pip install sonar-space==0.3.2
pip install wtpsplit==2.1.2
Now you should be able to run the following:
import lcm
import torch
from pathlib import Path
from lcm.models.two_tower_diffusion_lcm.builder import (
create_two_tower_diffusion_lcm_model,
)
from lcm.models.two_tower_diffusion_lcm.archs import two_tower_diffusion_lcm_1_6B
from lcm.inference.two_tower_diffusion_lcm.generator import (
TwoTowerDiffusionLCMGenerator,
DiffusionLCMGeneratorOptions,
)
from lcm.datasets.batch import EmbeddingsBatch
from sonar.inference_pipelines.text import TextToEmbeddingModelPipeline, EmbeddingToTextModelPipeline
from wtpsplit import SaT
lcm.setup_fairseq2()
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
from lcm.models.two_tower_diffusion_lcm.builder import TwoTowerDiffusionLCModel
_original_sample_fn = TwoTowerDiffusionLCModel.sample_initial_noise_vectors
def _patched_sample_fn(self, batch_size: int):
latents = _original_sample_fn(self, batch_size)
return latents.to(dtype=self.dtype)
TwoTowerDiffusionLCModel.sample_initial_noise_vectors = _patched_sample_fn
CHECKPOINT_PATH = "mimir-lcm/Mimir-1.6B-Instruct/model.pt"
INFERENCE_DTYPE = torch.float16
TEXT_DECODER = EmbeddingToTextModelPipeline(decoder="text_sonar_basic_decoder", tokenizer="text_sonar_basic_decoder", device=torch.device(DEVICE))
TEXT_EMBEDDER = TextToEmbeddingModelPipeline(encoder="text_sonar_basic_encoder", tokenizer="text_sonar_basic_encoder", device=torch.device(DEVICE))
def decode_embeddings(embeddings):
embeddings = embeddings.to(device=DEVICE, dtype=torch.float32)
print("Decoding...")
results = TEXT_DECODER.predict(
embeddings,
target_lang="eng_Latn"
)
return results
def get_eos_vector():
return TEXT_EMBEDDER.predict(["End of text."], source_lang="eng_Latn").squeeze().to(device=DEVICE, dtype=INFERENCE_DTYPE)
def load_two_tower_model(checkpoint_path, device="cuda"):
config = two_tower_diffusion_lcm_1_6B()
print("Building model structure...")
model = create_two_tower_diffusion_lcm_model(
config,
device=torch.device(device),
dtype=INFERENCE_DTYPE
)
print(f"Loading weights from {checkpoint_path}...")
state_dict = torch.load(checkpoint_path, map_location=device)
if "model" in state_dict:
state_dict = state_dict["model"]
model.load_state_dict(state_dict, strict=True)
model.eval()
model.to(device=DEVICE, dtype=INFERENCE_DTYPE)
print("Model loaded successfully.")
return model
def run_inference(model, prompt_embeddings, device="cuda"):
options = DiffusionLCMGeneratorOptions(
eos_threshold=0.9,
inference_timesteps=40,
initial_noise_scale=0.6,
guidance_scale=1.5,
guidance_rescale=0.7,
epsilon_scaling=1.00045,
stop_on_repetition_cosine_threshold=0.9,
seed=42,
)
generator = TwoTowerDiffusionLCMGenerator(model, options, eos_vec=get_eos_vector())
seqs = prompt_embeddings.to(device)
batch_input = EmbeddingsBatch(seqs=seqs, padding_mask=None)
print("Running generation...")
output = generator(batch_input)
return output
if __name__ == "__main__":
raw_prompt_text = "User turn.\n\nJohn lives in his house and loves to play soccer.\n\nGive a brief definition of the word \"house\" in the sentence given as input. Generate only the definition.\n\nAssistant turn."
model = load_two_tower_model(CHECKPOINT_PATH, DEVICE)
with torch.no_grad():
sat_model = SaT("segment-any-text/sat-3l")
if torch.cuda.is_available():
sat_model.half().to(DEVICE)
split_outputs = list(sat_model.split([raw_prompt_text], threshold=0.02))
sentences = [s.strip() for s in split_outputs[0] if s.strip()]
print(sentences)
prompt = TEXT_EMBEDDER.predict(sentences, source_lang="eng_Latn", batch_size=1024)
prompt = prompt.to(device=DEVICE, dtype=INFERENCE_DTYPE)
prompt = prompt.unsqueeze(0)
results = run_inference(model, prompt, DEVICE)
for j, hyp in enumerate(results.hypotheses[0]):
print(decode_embeddings(hyp.seq)[prompt.shape[1]:])
Make sure to change the source language from "eng_Latn" to the one you want to perform inference with.
Furthermore, for the instruct model, we used the following mappings for the "User turn." and "Assistant turn." strings.
USER_TRANSLATION = {
"arb_Arab": "دور المستخدم.",
"bel_Cyrl": "Ход карыстальніка.",
"ben_Beng": "ব্যবহারকারীর পালা।",
"bos_Latn": "Red korisnika.",
"bul_Cyrl": "Ред е на потребителя.",
"cat_Latn": "Torn de l'usuari.",
"ces_Latn": "Je řada na uživateli.",
"cym_Latn": "Tro'r defnyddiwr.",
"dan_Latn": "Brugerens tur.",
"deu_Latn": "Der Benutzer ist am Zug.",
"eng_Latn": "User turn.",
"fra_Latn": "C'est au tour de l'utilisateur.",
"heb_Hebr": "תור המשתמש.",
"hin_Deva": "उपयोगकर्ता की बारी।",
"hrv_Latn": "Poteg korisnika.",
"ind_Latn": "Giliran pengguna.",
"jpn_Jpan": "ユーザーの番です。",
"ita_Latn": "È il turno dell'utente.",
"kan_Knda": "ಬಳಕೆದಾರರ ತಿರುವು.",
"kor_Hang": "사용자 차례입니다.",
"lvs_Latn": "Lietotāja kārta.",
"mal_Mlym": "ഉപയോക്താവിന്റെ ഊഴം",
"mar_Deva": "वापरकर्त्याची पाळी.",
"mkd_Cyrl": "Потребен е корисник.",
"npi_Deva": "प्रयोगकर्ताको पालो।",
"nld_Latn": "De gebruiker is aan de beurt.",
"ory_Orya": "ୟୁଜର୍ ଟର୍ନ୍ |",
"pol_Latn": "Ruch użytkownika.",
"por_Latn": "É a vez do utilizador.",
"ron_Latn": "E rândul utilizatorului.",
"rus_Cyrl": "Ход пользователя.",
"slk_Latn": "Je na ťa.",
"slv_Latn": "Na vrsti je uporabnik.",
"srp_Cyrl": "Кориснички потез.",
"spa_Latn": "Turno del usuario.",
"swe_Latn": "Användarens tur.",
"swh_Latn": "Mzunguko wa mtumiaji.",
"tam_Taml": "பயனர் முறை.",
"tel_Telu": "వినియోగదారుని వంతు",
"tha_Thai": "ผู้ใช้เทิร์น",
"tur_Latn": "Kullanıcı sırası.",
"ukr_Cyrl": "Хід користувача.",
"urd_Arab": "صارف کی باری۔",
"vie_Latn": "Đến lượt người dùng.",
"zho_Hans": "轮到用户了。"
}
ASSISTANT_TRANSLATION = {
"arb_Arab": "دور المساعد.",
"bel_Cyrl": "Памочнік, свой ход.",
"ben_Beng": "সহকারী পালা।",
"bos_Latn": "Pomoćni potez.",
"bul_Cyrl": "Ред е на помощника.",
"cat_Latn": "Torn de l'ajudant.",
"ces_Latn": "Na řadě je asistent.",
"cym_Latn": "Tro'r cynorthwyydd.",
"dan_Latn": "Assistenten er på tur.",
"deu_Latn": "Der Assistent ist an der Reihe.",
"eng_Latn": "Assistant turn.",
"fra_Latn": "C'est au tour de l'assistant.",
"heb_Hebr": "תורו של העוזר.",
"hin_Deva": "सहायक की बारी।",
"hrv_Latn": "Pomoćni potez.",
"ind_Latn": "Giliran asisten.",
"jpn_Jpan": "アシスタントの番。",
"ita_Latn": "È il turno dell'assistente.",
"kan_Knda": "ಸಹಾಯಕ ತಿರುವು.",
"kor_Hang": "조수 차례.",
"lvs_Latn": "Palīga kārta.",
"mal_Mlym": "സഹായിയുടെ ഊഴം",
"mar_Deva": "सहाय्यक पालवी.",
"mkd_Cyrl": "Помошник-ред.",
"npi_Deva": "सहायक पालो।",
"nld_Latn": "De assistent is aan de beurt.",
"ory_Orya": "ଆସିଷ୍ଟାଣ୍ଟ ଟର୍ନ୍ |",
"pol_Latn": "Rzuty pomocników.",
"por_Latn": "É a vez do assistente.",
"ron_Latn": "E rândul asistentului.",
"rus_Cyrl": "Очередь помощника.",
"slk_Latn": "Na rade je asistent.",
"slv_Latn": "Na vrsti je pomočnik.",
"srp_Cyrl": "Помоћни круг.",
"spa_Latn": "Turno del asistente.",
"swe_Latn": "Assistenten är på tur.",
"swh_Latn": "Mzunguko wa msaidizi.",
"tam_Taml": "துணைவரின் முறை.",
"tel_Telu": "సహాయకుడి వంతు",
"tha_Thai": "ผู้ช่วยเลี้ยว",
"tur_Latn": "Sıra asistanında.",
"ukr_Cyrl": "Черга помічника.",
"urd_Arab": "معاون کی باری۔",
"vie_Latn": "Đến lượt trợ lý.",
"zho_Hans": "助手的回合。"
}
If you use this model in your research, please cite the following:
@misc{musacchio2026mimirlargescalemultilingualconcept,
title={Mimir: Large-scale Multilingual Concept Modeling},
author={Elio Musacchio and Lucia Siciliani and Pierpaolo Basile},
year={2026},
eprint={2605.25263},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2605.25263},
}