import gradio as gr import numpy as np import torch from transformers import ( AutoTokenizer, AutoModel, AutoModelForSequenceClassification ) from scipy.special import softmax # ============================== # LOAD MODELS ONCE (GLOBAL) # ============================== bert_model_name = "bert-base-uncased" tokenizer = AutoTokenizer.from_pretrained(bert_model_name) bert_model = AutoModel.from_pretrained(bert_model_name) bert_model.eval() sentiment_model_name = "cardiffnlp/twitter-roberta-base-sentiment" sentiment_tokenizer = AutoTokenizer.from_pretrained(sentiment_model_name) sentiment_model = AutoModelForSequenceClassification.from_pretrained(sentiment_model_name) sentiment_model.eval() # ============================== # CORE FUNCTION (NO PANDAS) # Input: list of lists # Output: list of lists # ============================== def nlp_encode_sentence(values): feature_rows = [] for row in values: sentence = row[0] # first column inputs = tokenizer( sentence, return_tensors="pt", truncation=True, padding=True ) with torch.no_grad(): outputs = bert_model(**inputs) cls_embedding = outputs.last_hidden_state[:, 0, :].squeeze().numpy() embedding_mean = float(np.mean(cls_embedding)) embedding_median = float(np.median(cls_embedding)) embedding_std = float(np.std(cls_embedding)) embedding_min = float(np.min(cls_embedding)) embedding_max = float(np.max(cls_embedding)) sentiment_inputs = sentiment_tokenizer( sentence, return_tensors="pt", truncation=True, padding=True ) with torch.no_grad(): sentiment_outputs = sentiment_model(**sentiment_inputs) probs = softmax(sentiment_outputs.logits.numpy()[0]) sentiment_score = float(probs[2] - probs[0]) feature_rows.append([ embedding_mean, embedding_median, embedding_std, embedding_min, embedding_max, sentiment_score ]) return feature_rows # ============================== # GRADIO APP # ============================== with gr.Blocks() as demo: gr.Markdown("### NLP Encoder") input_data = gr.Dataframe( headers=["value"], datatype=["str"], type="array" ) output_data = gr.Dataframe( headers=[ "embedding_mean", "embedding_median", "embedding_std", "embedding_min", "embedding_max", "sentiment_score" ], type="array" ) btn = gr.Button("Run") btn.click( fn=nlp_encode_sentence, inputs=input_data, outputs=output_data ) if __name__ == "__main__": demo.launch()