VasudevAdhikari
Modify app.py without typehints to avoid errors
83afbd5
import gradio as gr
import numpy as np
import torch
from transformers import (
AutoTokenizer,
AutoModel,
AutoModelForSequenceClassification
)
from scipy.special import softmax
# ==============================
# LOAD MODELS ONCE (GLOBAL)
# ==============================
bert_model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
bert_model = AutoModel.from_pretrained(bert_model_name)
bert_model.eval()
sentiment_model_name = "cardiffnlp/twitter-roberta-base-sentiment"
sentiment_tokenizer = AutoTokenizer.from_pretrained(sentiment_model_name)
sentiment_model = AutoModelForSequenceClassification.from_pretrained(sentiment_model_name)
sentiment_model.eval()
# ==============================
# CORE FUNCTION (NO PANDAS)
# Input: list of lists
# Output: list of lists
# ==============================
def nlp_encode_sentence(values):
feature_rows = []
for row in values:
sentence = row[0] # first column
inputs = tokenizer(
sentence,
return_tensors="pt",
truncation=True,
padding=True
)
with torch.no_grad():
outputs = bert_model(**inputs)
cls_embedding = outputs.last_hidden_state[:, 0, :].squeeze().numpy()
embedding_mean = float(np.mean(cls_embedding))
embedding_median = float(np.median(cls_embedding))
embedding_std = float(np.std(cls_embedding))
embedding_min = float(np.min(cls_embedding))
embedding_max = float(np.max(cls_embedding))
sentiment_inputs = sentiment_tokenizer(
sentence,
return_tensors="pt",
truncation=True,
padding=True
)
with torch.no_grad():
sentiment_outputs = sentiment_model(**sentiment_inputs)
probs = softmax(sentiment_outputs.logits.numpy()[0])
sentiment_score = float(probs[2] - probs[0])
feature_rows.append([
embedding_mean,
embedding_median,
embedding_std,
embedding_min,
embedding_max,
sentiment_score
])
return feature_rows
# ==============================
# GRADIO APP
# ==============================
with gr.Blocks() as demo:
gr.Markdown("### NLP Encoder")
input_data = gr.Dataframe(
headers=["value"],
datatype=["str"],
type="array"
)
output_data = gr.Dataframe(
headers=[
"embedding_mean",
"embedding_median",
"embedding_std",
"embedding_min",
"embedding_max",
"sentiment_score"
],
type="array"
)
btn = gr.Button("Run")
btn.click(
fn=nlp_encode_sentence,
inputs=input_data,
outputs=output_data
)
if __name__ == "__main__":
demo.launch()