|
|
import torch |
|
|
from transformers import RobertaForMaskedLM, RobertaTokenizer |
|
|
import gradio as gr |
|
|
|
|
|
model = RobertaForMaskedLM.from_pretrained('roberta-base') |
|
|
tokenizer = RobertaTokenizer.from_pretrained('roberta-base') |
|
|
|
|
|
|
|
|
def sentence_perplexity(sentence): |
|
|
tokenized_sentence = tokenizer(sentence, return_tensors='pt') |
|
|
input_ids = tokenized_sentence['input_ids'] |
|
|
|
|
|
with torch.no_grad(): |
|
|
output = model(input_ids) |
|
|
logits = output.logits |
|
|
|
|
|
probabilities = torch.softmax(logits, dim=-1) |
|
|
true_token_probabilities = torch.gather(probabilities, 2, input_ids.unsqueeze(-1)).squeeze(-1) |
|
|
log_probs = torch.log(true_token_probabilities) |
|
|
|
|
|
|
|
|
log_probs = log_probs[:, 1:-1] |
|
|
|
|
|
|
|
|
perplexity = torch.exp(-log_probs.mean()).item() |
|
|
|
|
|
return perplexity |
|
|
|
|
|
def weird_score(sentence): |
|
|
perplexity = sentence_perplexity(sentence) |
|
|
|
|
|
|
|
|
weird_score = (perplexity - 1) / (perplexity + 1) * 100 |
|
|
|
|
|
return f"Weird Score: {weird_score:.2f}%" |
|
|
|
|
|
|
|
|
sentence = "This is a normal sentence." |
|
|
print(weird_score(sentence)) |
|
|
|
|
|
sentence = "Giraffes are known to be fluent in six languages." |
|
|
print(weird_score(sentence)) |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=weird_score, |
|
|
inputs=gr.inputs.Textbox(lines=2, placeholder="Enter a sentence..."), |
|
|
outputs="text", |
|
|
title="RoBERTa Weird Score Calculator", |
|
|
description="This app calculates the weird score percentage of a sentence using RoBERTa." |
|
|
) |
|
|
iface.launch() |
|
|
|