Spaces:

Jet-12138
/

CommentResponse

Runtime error

App Files Files Community

Jet-12138 commited on Apr 25, 2025

Commit

018d244

verified ·

1 Parent(s): 5a7af5f

Upload 7 files

Browse files

Files changed (7) hide show

README.md +103 -10
__init__.py +1 -0
app.py +59 -15
config.json +15 -0
model.py +66 -0
requirements.txt +2 -2
tokenizer_config.json +6 -0

README.md CHANGED Viewed

@@ -1,13 +1,106 @@
 ---
-title: CommentResponse
-emoji: 🔥
-colorFrom: gray
-colorTo: yellow
-sdk: gradio
-sdk_version: 5.26.0
-app_file: app.py
-pinned: false
-license: other
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+language: en
+license: mit
+datasets:
+  - toxic_comment_classification
+tags:
+  - text-classification
+  - toxicity-detection
+  - sentiment-analysis
+  - multi-task-learning
+pipeline_tag: text-classification
 ---
+# Comment MTL BERT Model
+This is a BERT-based multi-task learning model capable of performing sentiment analysis and toxicity detection simultaneously.
+## Model Architecture
+The model is based on the `bert-base-uncased` pre-trained model with two separate classification heads:
+- **Sentiment Analysis Head**: 3-class classification (Negative, Neutral, Positive)
+- **Toxicity Detection Head**: 6-class multi-label classification (toxic, severe_toxic, obscene, threat, insult, identity_hate)
+### Technical Parameters
+- Hidden size: 768
+- Number of attention heads: 12
+- Number of hidden layers: 12
+- Vocabulary size: 30522
+- Maximum position embeddings: 512
+- Hidden activation function: gelu
+- Dropout probability: 0.1
+## Usage
+### Loading the Model
+```python
+from transformers import AutoTokenizer
+from src.model import CommentMTLModel
+import torch
+# Load tokenizer
+tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
+# Load model
+model = CommentMTLModel(
+    model_name="bert-base-uncased",
+    num_sentiment_labels=3,
+    num_toxicity_labels=6
+)
+# Load pre-trained weights
+state_dict = torch.load("model.bin", map_location=torch.device('cpu'))
+model.load_state_dict(state_dict)
+model.eval()
+```
+### Model Inference
+```python
+# Prepare input
+text = "This is a test comment."
+inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
+# Model inference
+with torch.no_grad():
+    outputs = model(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])
+# Get results
+sentiment_logits = outputs["sentiment_logits"]
+toxicity_logits = outputs["toxicity_logits"]
+# Process sentiment analysis results
+sentiment_probs = torch.softmax(sentiment_logits, dim=1)
+sentiment_labels = {0: "Negative", 1: "Neutral", 2: "Positive"}
+sentiment_prediction = sentiment_labels[sentiment_probs.argmax().item()]
+# Process toxicity detection results
+toxicity_probs = torch.sigmoid(toxicity_logits)
+toxicity_cols = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
+toxicity_results = {label: prob.item() for label, prob in zip(toxicity_cols, toxicity_probs[0])}
+print(f"Sentiment: {sentiment_prediction}")
+print(f"Toxicity probabilities: {toxicity_results}")
+```
+## Limitations
+- This model was trained on English data only and is not suitable for other languages.
+- The toxicity detection may produce false positives or negatives in edge cases.
+- The model may lose information when processing long texts as the maximum input length is limited to 128 tokens.
+## Citation
+If you use this model, please cite our repository:
+```
+@misc{comment-mtl-bert,
+  author = {Aseem},
+  title = {Comment MTL BERT: Multi-Task Learning for Comment Analysis},
+  year = {2023},
+  publisher = {GitHub},
+  url = {https://huggingface.co/Aseemks07/comment_mtl_bert_best}
+}
+```

__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # src package

app.py CHANGED Viewed

@@ -1,20 +1,64 @@
 import gradio as gr
-from transformers import pipeline
-# 加载你的模型
-classifier = pipeline("text-classification", model="你的用户名/你的模型名")
-# 定义推理函数
-def classify(text):
-    outputs = classifier(text)
-    # 可以只返回预测标签
-    return outputs[0]["label"]
-# 创建Gradio界面
-iface = gr.Interface(fn=classify,
-                     inputs=gr.Textbox(lines=2, placeholder="输入文本..."),
-                     outputs="text",
-                     title="文本分类模型",
-                     description="请输入一段文本，我来帮你分类！")
-iface.launch()

+import torch
+import torch.nn.functional as F
+from transformers import BertTokenizer
 import gradio as gr
+from model import CommentClassificationModel  # 导入你自定义的模型
+# Set device, including MPS support for Mac
+if torch.backends.mps.is_available():
+    device = torch.device("mps")
+elif torch.cuda.is_available():
+    device = torch.device("cuda")
+else:
+    device = torch.device("cpu")
+# Load tokenizer
+tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
+# Load model
+model = CommentClassificationModel(config_path="config.json")
+model.load_state_dict(torch.load("pytorch_model.bin", map_location=device))
+model.to(device)
+model.eval()
+# Define labels
+sentiment_labels = ["Negative", "Neutral", "Positive"]
+toxicity_labels = ["Toxic", "Severe Toxic", "Obscene", "Threat", "Insult", "Identity Hate"]
+# Define the prediction function
+def analyse_comment(comment):
+    inputs = tokenizer(comment, return_tensors="pt", truncation=True, padding=True, max_length=512)
+    inputs = {k: v.to(device) for k, v in inputs.items()}
+    with torch.no_grad():
+        sentiment_logits, toxicity_logits = model(**inputs)
+    # Process sentiment
+    sentiment_probs = F.softmax(sentiment_logits, dim=1)
+    sentiment_idx = torch.argmax(sentiment_probs, dim=1).item()
+    sentiment_prediction = sentiment_labels[sentiment_idx]
+    # Process toxicity
+    toxicity_probs = F.softmax(toxicity_logits, dim=1)
+    toxicity_idx = torch.argmax(toxicity_probs, dim=1).item()
+    toxicity_prediction = toxicity_labels[toxicity_idx]
+    return {
+        "Sentiment": sentiment_prediction,
+        "Toxicity": toxicity_prediction
+    }
+# Create Gradio interface
+iface = gr.Interface(
+    fn=analyse_comment,
+    inputs=gr.Textbox(lines=3, placeholder="Please enter a comment for analysis..."),
+    outputs=[
+        gr.Label(num_top_classes=1, label="Predicted Sentiment"),
+        gr.Label(num_top_classes=1, label="Predicted Toxicity")
+    ],
+    title="Comment Sentiment and Toxicity Classifier",
+    description="This tool classifies the sentiment and the most probable type of toxicity in a given comment. It utilises a custom fine-tuned BERT model. Developed for academic demonstration purposes in Australia."
+)
+iface.launch()

config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "model_type": "comment_mtl_bert",
+    "pretrained_model_name": "bert-base-uncased",
+    "hidden_size": 768,
+    "num_attention_heads": 12,
+    "num_hidden_layers": 12,
+    "vocab_size": 30522,
+    "max_position_embeddings": 512,
+    "hidden_act": "gelu",
+    "initializer_range": 0.02,
+    "layer_norm_eps": 1e-12,
+    "dropout_prob": 0.1,
+    "num_sentiment_labels": 3,
+    "num_toxicity_labels": 6
+}

model.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import torch
+import torch.nn as nn
+from transformers import BertModel, AutoModel
+class CommentMTLModel(nn.Module):
+    """
+    Multi-Task Learning model using a BERT base and separate heads for
+    sentiment classification and toxicity multi-label classification.
+    """
+    def __init__(self, model_name, num_sentiment_labels, num_toxicity_labels, dropout_prob=0.1):
+        """
+        Args:
+            model_name (str): Name of the pre-trained BERT model from Hugging Face.
+            num_sentiment_labels (int): Number of classes for sentiment analysis.
+            num_toxicity_labels (int): Number of classes for toxicity detection.
+            dropout_prob (float): Dropout probability for the classification heads.
+        """
+        super(CommentMTLModel, self).__init__()
+        # Load the pre-trained BERT model
+        self.bert = AutoModel.from_pretrained(model_name)
+        # Dropout layer for regularization - applied after BERT output, before heads
+        self.dropout = nn.Dropout(dropout_prob)
+        # --- Sentiment Head ---
+        # Takes BERT's pooled output (for [CLS] token) and maps it to sentiment logits
+        self.sentiment_classifier = nn.Linear(self.bert.config.hidden_size, num_sentiment_labels)
+        # --- Toxicity Head ---
+        # Takes BERT's pooled output and maps it to toxicity logits (multi-label)
+        self.toxicity_classifier = nn.Linear(self.bert.config.hidden_size, num_toxicity_labels)
+    def forward(self, input_ids, attention_mask):
+        """
+        Forward pass of the model.
+        Args:
+            input_ids (torch.Tensor): Tensor of input token IDs (batch_size, seq_length).
+            attention_mask (torch.Tensor): Tensor of attention masks (batch_size, seq_length).
+        Returns:
+            dict: A dictionary containing the raw output logits for each task:
+                'sentiment_logits': Logits for sentiment classification (batch_size, num_sentiment_labels).
+                'toxicity_logits': Logits for toxicity multi-label classification (batch_size, num_toxicity_labels).
+        """
+        # Pass input through BERT model
+        outputs = self.bert(
+            input_ids=input_ids,
+            attention_mask=attention_mask
+        )
+        # Get the pooled output
+        pooled_output = outputs.pooler_output
+        # Apply dropout for regularization
+        pooled_output = self.dropout(pooled_output)
+        # Pass the pooled output through the task-specific heads
+        sentiment_logits = self.sentiment_classifier(pooled_output)
+        toxicity_logits = self.toxicity_classifier(pooled_output)
+        return {
+            'sentiment_logits': sentiment_logits,
+            'toxicity_logits': toxicity_logits
+        }

requirements.txt CHANGED Viewed

@@ -1,3 +1,3 @@
-transformers
-torch
 gradio

 gradio
+torch>=1.10.0
+transformers>=4.18.0

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "model_type": "bert",
+  "do_lower_case": true,
+  "tokenizer_class": "BertTokenizer",
+  "name_or_path": "bert-base-uncased"
+}