|
|
--- |
|
|
language: |
|
|
- en |
|
|
--- |
|
|
### Model Description |
|
|
This model is a multiclass classification model trained on the Liar Dataset using the BERT (bert-base-uncased) architecture. |
|
|
The primary task is to classify news articles into different categories, making it suitable for fake news detection. \ |
|
|
BERT (Bidirectional Encoder Representations from Transformers) is a transformer-based model known for its effectiveness in natural language processing tasks. |
|
|
|
|
|
The model classifies the input text into one of 6 target classes. |
|
|
|
|
|
Bias: The model may inherit biases present in the training data, and it's important to be aware of potential biases in the predictions. |
|
|
|
|
|
### Code Implementation |
|
|
```python |
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
|
|
|
# Load model directly |
|
|
tokenizer = AutoTokenizer.from_pretrained( |
|
|
"Arjun24420/BERT-FakeNews-Classification") |
|
|
model = AutoModelForSequenceClassification.from_pretrained( |
|
|
"Arjun24420/BERT-FakeNews-Classification") |
|
|
|
|
|
# Define class labels mapping |
|
|
class_mapping = { |
|
|
0: 'half-true', |
|
|
1: 'mostly-true', |
|
|
2: 'false', |
|
|
3: 'true', |
|
|
4: 'barely-true', |
|
|
5: 'pants-fire' |
|
|
} |
|
|
|
|
|
|
|
|
def predict(text): |
|
|
# Tokenize the input text and move tensors to the GPU if available |
|
|
inputs = tokenizer(text, padding=True, truncation=True, |
|
|
max_length=512, return_tensors="pt") |
|
|
|
|
|
# Get model output (logits) |
|
|
outputs = model(**inputs) |
|
|
|
|
|
probs = outputs.logits.softmax(1) |
|
|
# Get the probabilities for each class |
|
|
class_probabilities = {class_mapping[i]: probs[0, i].item() |
|
|
for i in range(probs.shape[1])} |
|
|
|
|
|
return class_probabilities |
|
|
|
|
|
|
|
|
``` |
|
|
|
|
|
|