govtech
/

lionguard-2.1

Text Classification

Model card Files Files and versions

leannetanyt commited on Nov 17, 2025

Commit

b966c37

·

1 Parent(s): 2a3ee0b

feat: update model config

Files changed (1) hide show

lionguard2.py +4 -4

lionguard2.py CHANGED Viewed

@@ -6,7 +6,7 @@ import torch
 import torch.nn as nn
 from transformers import PretrainedConfig, PreTrainedModel
-INPUT_DIMENSION = 3072  # length of OpenAI embeddings
 CATEGORIES = {
     "binary": ["binary"],
@@ -56,7 +56,7 @@ class LionGuard2Model(PreTrainedModel):
     def __init__(self, config: LionGuard2Config):
         """
-        LionGuard2 is a localised content moderation model that flags whether text violates the following categories:
         1. `hateful`: Text that discriminates, criticizes, insults, denounces, or dehumanizes a person or group on the basis of a protected identity.
@@ -94,14 +94,14 @@ class LionGuard2Model(PreTrainedModel):
         Lastly, there is an additional `binary` category (#7) which flags whether the text is unsafe in general.
-        The model takes in as input text, after it has been encoded with OpenAI's `text-embedding-3-small` model.
         The model outputs the probabilities of each category being true.
         ================================
         Args:
-            input_dim: The dimension of the input embeddings. This defaults to 3072, which is the dimension of the embeddings from OpenAI's `text-embedding-3-small` model. This should not be changed.
             label_names: The names of the labels. This defaults to the keys of the CATEGORIES dictionary. This should not be changed.
             categories: The categories of the labels. This defaults to the CATEGORIES dictionary. This should not be changed.

 import torch.nn as nn
 from transformers import PretrainedConfig, PreTrainedModel
+INPUT_DIMENSION = 3072  # length of Gemini embeddings
 CATEGORIES = {
     "binary": ["binary"],
     def __init__(self, config: LionGuard2Config):
         """
+        LionGuard2.1 is a localised content moderation model that flags whether text violates the following categories:
         1. `hateful`: Text that discriminates, criticizes, insults, denounces, or dehumanizes a person or group on the basis of a protected identity.
         Lastly, there is an additional `binary` category (#7) which flags whether the text is unsafe in general.
+        The model takes in an input text that has been encoded with Gemini's `gemini-embedding-001` model.
         The model outputs the probabilities of each category being true.
         ================================
         Args:
+            input_dim: The dimension of the input embeddings. This defaults to 3072, which is the dimension of the embeddings from Gemini's `gemini-embedding-001` model. This should not be changed.
             label_names: The names of the labels. This defaults to the keys of the CATEGORIES dictionary. This should not be changed.
             categories: The categories of the labels. This defaults to the CATEGORIES dictionary. This should not be changed.