oist
/

multimodal_nli_model

Sentence Similarity

Model card Files Files and versions

oist commited on Sep 23, 2025

Commit

09e9a83

·

verified ·

1 Parent(s): 9cfb8ad

Update README.md

Files changed (1) hide show

README.md +76 -0

README.md CHANGED Viewed

@@ -360,6 +360,82 @@ with torch.inference_mode():
 print("Prediction:", pred_class)
 # 0 = Entailment, 1 = Neutral, 2 = Contradiction
 ```
 ---

 print("Prediction:", pred_class)
 # 0 = Entailment, 1 = Neutral, 2 = Contradiction
 ```
+### Example 4: Using BLASER Semantic Score with MMNLI
+You can use the BLASER semantic score in combination with the MMNLI NLI class to get a **better understanding of the relationship** between source and candidate translations. The NLI class gives the entailment/contradiction/neutral label, while the BLASER score provides a fine-grained semantic similarity.
+```python
+import torch
+from transformers import AutoTokenizer, AutoModel
+from transformers.models.m2m_100.modeling_m2m_100 import M2M100Encoder
+# -------------------------
+# 1️⃣ Load ported SONAR text encoder
+# -------------------------
+sonar_model_name = "cointegrated/SONAR_200_text_encoder"
+encoder = M2M100Encoder.from_pretrained(sonar_model_name)
+tokenizer = AutoTokenizer.from_pretrained(sonar_model_name)
+def encode_mean_pool(texts, tokenizer, encoder, lang='eng_Latn', norm=False):
+    tokenizer.src_lang = lang
+    with torch.inference_mode():
+        batch = tokenizer(texts, return_tensors='pt', padding=True)
+        seq_embs = encoder(**batch).last_hidden_state
+        mask = batch.attention_mask
+        mean_emb = (seq_embs * mask.unsqueeze(-1)).sum(1) / mask.unsqueeze(-1).sum(1)
+        if norm:
+            mean_emb = torch.nn.functional.normalize(mean_emb)
+    return mean_emb
+# -------------------------
+# 2️⃣ Example sentences
+# -------------------------
+src_sentence = ["He is happy."]
+mt_sentences = [
+    "Il est content.",                   # entailment blaser:4.515
+    "Il est malheureux."                  # contradiction blaser: 4.41
+]
+# Encode source and MT sentences
+src_embs = encode_mean_pool(src_sentence, tokenizer, encoder, lang="eng_Latn")
+mt_embs = encode_mean_pool(mt_sentences, tokenizer, encoder, lang="fra_Latn")
+# -------------------------
+# 3️⃣ Load MMNLI model
+# -------------------------
+mmnli_model_name = "oist/multimodal_nli_model"
+mmnli_model = AutoModel.from_pretrained(mmnli_model_name, trust_remote_code=True)
+mmnli_model.eval()
+# -------------------------
+# 4️⃣ Load BLASER QE model
+# -------------------------
+qe_model_name = "oist/blaser_2_0_qe_ported"
+qe_model = AutoModel.from_pretrained(qe_model_name, trust_remote_code=True)
+qe_model.eval()
+# -------------------------
+# 5️⃣ Run inference
+# -------------------------
+for i, mt_sentence in enumerate(mt_sentences):
+    mt_emb = mt_embs[i].unsqueeze(0)  # keep batch dimension
+    # NLI prediction
+    with torch.inference_mode():
+        logits = mmnli_model(src_embs, mt_emb)
+        pred_class = torch.argmax(logits, dim=-1).item()
+    # BLASER semantic score
+    with torch.inference_mode():
+        qe_score = qe_model(src_embs, mt_emb)  # shape [1, 1]
+    print(f"\nMT sentence: '{mt_sentence}'")
+    print("NLI prediction:", ["Entailment", "Neutral", "Contradiction"][pred_class])
+    print("BLASER semantic score:", qe_score.item())
+```
 ---