| | import gradio as gr |
| | import numpy as np |
| | import torch |
| | from transformers import ( |
| | AutoTokenizer, |
| | AutoModel, |
| | AutoModelForSequenceClassification |
| | ) |
| | from scipy.special import softmax |
| |
|
| | |
| | |
| | |
| |
|
| | bert_model_name = "bert-base-uncased" |
| | tokenizer = AutoTokenizer.from_pretrained(bert_model_name) |
| | bert_model = AutoModel.from_pretrained(bert_model_name) |
| | bert_model.eval() |
| |
|
| | sentiment_model_name = "cardiffnlp/twitter-roberta-base-sentiment" |
| | sentiment_tokenizer = AutoTokenizer.from_pretrained(sentiment_model_name) |
| | sentiment_model = AutoModelForSequenceClassification.from_pretrained(sentiment_model_name) |
| | sentiment_model.eval() |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| |
|
| | def nlp_encode_sentence(values): |
| |
|
| | feature_rows = [] |
| |
|
| | for row in values: |
| | sentence = row[0] |
| |
|
| | inputs = tokenizer( |
| | sentence, |
| | return_tensors="pt", |
| | truncation=True, |
| | padding=True |
| | ) |
| |
|
| | with torch.no_grad(): |
| | outputs = bert_model(**inputs) |
| |
|
| | cls_embedding = outputs.last_hidden_state[:, 0, :].squeeze().numpy() |
| |
|
| | embedding_mean = float(np.mean(cls_embedding)) |
| | embedding_median = float(np.median(cls_embedding)) |
| | embedding_std = float(np.std(cls_embedding)) |
| | embedding_min = float(np.min(cls_embedding)) |
| | embedding_max = float(np.max(cls_embedding)) |
| |
|
| | sentiment_inputs = sentiment_tokenizer( |
| | sentence, |
| | return_tensors="pt", |
| | truncation=True, |
| | padding=True |
| | ) |
| |
|
| | with torch.no_grad(): |
| | sentiment_outputs = sentiment_model(**sentiment_inputs) |
| |
|
| | probs = softmax(sentiment_outputs.logits.numpy()[0]) |
| | sentiment_score = float(probs[2] - probs[0]) |
| |
|
| | feature_rows.append([ |
| | embedding_mean, |
| | embedding_median, |
| | embedding_std, |
| | embedding_min, |
| | embedding_max, |
| | sentiment_score |
| | ]) |
| |
|
| | return feature_rows |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | with gr.Blocks() as demo: |
| | gr.Markdown("### NLP Encoder") |
| |
|
| | input_data = gr.Dataframe( |
| | headers=["value"], |
| | datatype=["str"], |
| | type="array" |
| | ) |
| |
|
| | output_data = gr.Dataframe( |
| | headers=[ |
| | "embedding_mean", |
| | "embedding_median", |
| | "embedding_std", |
| | "embedding_min", |
| | "embedding_max", |
| | "sentiment_score" |
| | ], |
| | type="array" |
| | ) |
| |
|
| | btn = gr.Button("Run") |
| |
|
| | btn.click( |
| | fn=nlp_encode_sentence, |
| | inputs=input_data, |
| | outputs=output_data |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |
| |
|