slickdata's picture
Create app.py
610c96a
raw
history blame
3.03 kB
# Import libraries
import os
import uuid
import pandas as pd
import numpy as np
from scipy.special import softmax
import gradio as gr
from google.colab import drive
from datasets import load_dataset
from sklearn.model_selection import train_test_split
import torch
from transformers import AutoTokenizer
from transformers import AutoConfig
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import IntervalStrategy
from transformers import TrainingArguments
from transformers import EarlyStoppingCallback
from transformers import pipeline
from transformers import TrainingArguments
from transformers import Trainer
from torch import nn
from transformers import RobertaTokenizer, RobertaForSequenceClassification
# Define the model path where the pre-trained model is saved on the Hugging Face model hub
model_path = "slickdata/finetuned-Sentiment-classfication-ROBERTA-model"
# Initialize the tokenizer for the pre-trained model
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
# Load the configuration for the pre-trained model
config = AutoConfig.from_pretrained(model_path)
# Load the pre-trained model
model = AutoModelForSequenceClassification.from_pretrained(model_path)
# Define a function to preprocess the text data
def preprocess(text):
new_text = []
# Replace user mentions with '@user'
for t in text.split(" "):
t = '@user' if t.startswith('@') and len(t) > 1 else t
# Replace links with 'http'
t = 'http' if t.startswith('http') else t
new_text.append(t)
# Join the preprocessed text
return " ".join(new_text)
# Define a function to perform sentiment analysis on the input text
def sentiment_analysis(text):
# Preprocess the input text
text = preprocess(text)
# Tokenize the input text using the pre-trained tokenizer
encoded_input = tokenizer(text, return_tensors='pt')
# Feed the tokenized input to the pre-trained model and obtain output
output = model(**encoded_input)
# Obtain the prediction scores for the output
scores_ = output[0][0].detach().numpy()
# Apply softmax activation function to obtain probability distribution over the labels
scores_ = softmax(scores_)
# Format the output dictionary with the predicted scores
labels = ['Negative', 'Neutral', 'Positive']
scores = {l:float(s) for (l,s) in zip(labels, scores_) }
# Return the scores
return scores
# Define a Gradio interface to interact with the model
demo = gr.Interface(
fn=sentiment_analysis, # Function to perform sentiment analysis
inputs=gr.Textbox(placeholder="Write your tweet here..."), # Text input field
outputs="label", # Output type (here, we only display the label with the highest score)
interpretation="default", # Interpretation mode
examples=[["This is wonderful!"]]) # Example input(s) to display on the interface
# Launch the Gradio interface
demo.launch(share=True, debug=True)