Spaces:

slickdata
/

finetuned-Sentiment-classfication-ROBERTA-model-App

Sleeping

App Files Files Community

finetuned-Sentiment-classfication-ROBERTA-model-App / app.py

slickdata's picture

Create app.py

610c96a almost 3 years ago

3.03 kB

	# Import libraries
	import os
	import uuid
	import pandas as pd
	import numpy as np
	from scipy.special import softmax
	import gradio as gr

	from google.colab import drive
	from datasets import load_dataset
	from sklearn.model_selection import train_test_split
	import torch
	from transformers import AutoTokenizer
	from transformers import AutoConfig
	from transformers import AutoModelForSequenceClassification
	from transformers import TFAutoModelForSequenceClassification
	from transformers import IntervalStrategy
	from transformers import TrainingArguments
	from transformers import EarlyStoppingCallback
	from transformers import pipeline
	from transformers import TrainingArguments
	from transformers import Trainer
	from torch import nn
	from transformers import RobertaTokenizer, RobertaForSequenceClassification



	# Define the model path where the pre-trained model is saved on the Hugging Face model hub
	model_path = "slickdata/finetuned-Sentiment-classfication-ROBERTA-model"

	# Initialize the tokenizer for the pre-trained model
	tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

	# Load the configuration for the pre-trained model
	config = AutoConfig.from_pretrained(model_path)

	# Load the pre-trained model
	model = AutoModelForSequenceClassification.from_pretrained(model_path)

	# Define a function to preprocess the text data
	def preprocess(text):
	new_text = []
	# Replace user mentions with '@user'
	for t in text.split(" "):
	t = '@user' if t.startswith('@') and len(t) > 1 else t
	# Replace links with 'http'
	t = 'http' if t.startswith('http') else t
	new_text.append(t)
	# Join the preprocessed text
	return " ".join(new_text)

	# Define a function to perform sentiment analysis on the input text
	def sentiment_analysis(text):
	# Preprocess the input text
	text = preprocess(text)

	# Tokenize the input text using the pre-trained tokenizer
	encoded_input = tokenizer(text, return_tensors='pt')

	# Feed the tokenized input to the pre-trained model and obtain output
	output = model(**encoded_input)

	# Obtain the prediction scores for the output
	scores_ = output[0][0].detach().numpy()

	# Apply softmax activation function to obtain probability distribution over the labels
	scores_ = softmax(scores_)

	# Format the output dictionary with the predicted scores
	labels = ['Negative', 'Neutral', 'Positive']
	scores = {l:float(s) for (l,s) in zip(labels, scores_) }

	# Return the scores
	return scores

	# Define a Gradio interface to interact with the model
	demo = gr.Interface(
	fn=sentiment_analysis, # Function to perform sentiment analysis
	inputs=gr.Textbox(placeholder="Write your tweet here..."), # Text input field
	outputs="label", # Output type (here, we only display the label with the highest score)
	interpretation="default", # Interpretation mode
	examples=[["This is wonderful!"]]) # Example input(s) to display on the interface

	# Launch the Gradio interface
	demo.launch(share=True, debug=True)