mimir-lcm/fineweb-2-sentence-split
Viewer • Updated • 479M • 106
Below you can find an example of model usage. To facilitate its usage, we recommend to follow these steps:
huggingface-cli download mimir-lcm/Mimir-1.6B --local-dir mimir-lcm/Mimir-1.6B
git clone https://github.com/facebookresearch/large_concept_model.git
mv large_concept_model/lcm .
pip install torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cu121 --upgrade
pip install fairseq2==v0.3.0rc1 --pre --extra-index-url https://fair.pkg.atmeta.com/fairseq2/whl/rc/pt2.5.1/cu121 --upgrade
pip install omegaconf==2.3.0
pip install sonar-space==0.3.2
pip install wtpsplit==2.1.2
Now you should be able to run the following:
import lcm
import torch
from pathlib import Path
from lcm.models.two_tower_diffusion_lcm.builder import (
create_two_tower_diffusion_lcm_model,
)
from lcm.models.two_tower_diffusion_lcm.archs import two_tower_diffusion_lcm_1_6B
from lcm.inference.two_tower_diffusion_lcm.generator import (
TwoTowerDiffusionLCMGenerator,
DiffusionLCMGeneratorOptions,
)
from lcm.datasets.batch import EmbeddingsBatch
from sonar.inference_pipelines.text import TextToEmbeddingModelPipeline, EmbeddingToTextModelPipeline
from wtpsplit import SaT
lcm.setup_fairseq2()
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
from lcm.models.two_tower_diffusion_lcm.builder import TwoTowerDiffusionLCModel
_original_sample_fn = TwoTowerDiffusionLCModel.sample_initial_noise_vectors
def _patched_sample_fn(self, batch_size: int):
latents = _original_sample_fn(self, batch_size)
return latents.to(dtype=self.dtype)
TwoTowerDiffusionLCModel.sample_initial_noise_vectors = _patched_sample_fn
CHECKPOINT_PATH = "mimir-lcm/Mimir-1.6B/model.pt"
INFERENCE_DTYPE = torch.float16
TEXT_DECODER = EmbeddingToTextModelPipeline(decoder="text_sonar_basic_decoder", tokenizer="text_sonar_basic_decoder", device=torch.device(DEVICE))
TEXT_EMBEDDER = TextToEmbeddingModelPipeline(encoder="text_sonar_basic_encoder", tokenizer="text_sonar_basic_encoder", device=torch.device(DEVICE))
def decode_embeddings(embeddings):
embeddings = embeddings.to(device=DEVICE, dtype=torch.float32)
print("Decoding...")
results = TEXT_DECODER.predict(
embeddings,
target_lang="eng_Latn"
)
return results
def get_eos_vector():
return TEXT_EMBEDDER.predict(["End of text."], source_lang="eng_Latn").squeeze().to(device=DEVICE, dtype=INFERENCE_DTYPE)
def load_two_tower_model(checkpoint_path, device="cuda"):
config = two_tower_diffusion_lcm_1_6B()
print("Building model structure...")
model = create_two_tower_diffusion_lcm_model(
config,
device=torch.device(device),
dtype=INFERENCE_DTYPE
)
print(f"Loading weights from {checkpoint_path}...")
state_dict = torch.load(checkpoint_path, map_location=device)
if "model" in state_dict:
state_dict = state_dict["model"]
model.load_state_dict(state_dict, strict=True)
model.eval()
model.to(device=DEVICE, dtype=INFERENCE_DTYPE)
print("Model loaded successfully.")
return model
def run_inference(model, prompt_embeddings, device="cuda"):
options = DiffusionLCMGeneratorOptions(
eos_threshold=0.9,
inference_timesteps=40,
initial_noise_scale=0.6,
guidance_scale=1.5,
guidance_rescale=0.7,
epsilon_scaling=1.00045,
stop_on_repetition_cosine_threshold=0.9,
seed=42,
)
generator = TwoTowerDiffusionLCMGenerator(model, options, eos_vec=get_eos_vector())
seqs = prompt_embeddings.to(device)
batch_input = EmbeddingsBatch(seqs=seqs, padding_mask=None)
print("Running generation...")
output = generator(batch_input)
return output
if __name__ == "__main__":
raw_prompt_text = "A large language model (LLM) is a neural network trained on a vast amount of text for natural language processing tasks, especially language generation. LLMs can generate, summarize, translate and parse text in many contexts, and are a foundational technology behind modern chatbots. Biased or inaccurate training data can make an LLM's output less reliable."
model = load_two_tower_model(CHECKPOINT_PATH, DEVICE)
with torch.no_grad():
sat_model = SaT("segment-any-text/sat-3l")
if torch.cuda.is_available():
sat_model.half().to(DEVICE)
split_outputs = list(sat_model.split([raw_prompt_text], threshold=0.02))
sentences = [s.strip() for s in split_outputs[0] if s.strip()]
print(sentences)
prompt = TEXT_EMBEDDER.predict(sentences, source_lang="eng_Latn", batch_size=1024)
prompt = prompt.to(device=DEVICE, dtype=INFERENCE_DTYPE)
prompt = prompt.unsqueeze(0)
results = run_inference(model, prompt, DEVICE)
for j, hyp in enumerate(results.hypotheses[0]):
print(decode_embeddings(hyp.seq)[prompt.shape[1]:])
Make sure to change the source language from "eng_Latn" to the one you want to perform inference with.
If you use this model in your research, please cite the following:
@misc{musacchio2026mimirlargescalemultilingualconcept,
title={Mimir: Large-scale Multilingual Concept Modeling},
author={Elio Musacchio and Lucia Siciliani and Pierpaolo Basile},
year={2026},
eprint={2605.25263},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2605.25263},
}