|
|
import streamlit as st |
|
|
from datasets import load_dataset |
|
|
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoModel, Trainer, TrainingArguments, LineByLineTextDataset |
|
|
import json |
|
|
|
|
|
|
|
|
@st.cache() |
|
|
def get_model(): |
|
|
model = AutoModelForSequenceClassification.from_pretrained("siebert/sentiment-roberta-large-english", num_labels=2) |
|
|
model.load_state_dict(torch.load('model')) |
|
|
return model |
|
|
|
|
|
@st.cache() |
|
|
def get_tokenizer(): |
|
|
tokenizer = AutoTokenizer.from_pretrained("siebert/sentiment-roberta-large-english") |
|
|
return tokenizer |
|
|
|
|
|
def make_prediction(): |
|
|
model = get_model() |
|
|
tokenizer = tokenizer() |
|
|
|
|
|
|
|
|
|
|
|
st.header("Sentiment analysis on twitter datasets") |
|
|
st.markdown("Here is a sentiment model further trained on a slice of a twitter dataset") |
|
|
st.markdown(""" |
|
|
<img width=700px src='https://imagez.tmz.com/image/73/4by3/2020/10/05/735aaee2f6b9464ca220e62ef797dab0_md.jpg'> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
text = st.text_area("Try typing something here! \n You will see how much better our model is compared to the base model! No kidding") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with st.form(key='input_form'): |
|
|
to_analyze = st.text_input(label='Input text to be analyzed') |
|
|
button = st.form_submit_button(label='Classify') |
|
|
if button: |
|
|
if to_analyze: |
|
|
make_prediction(to_analyze) |
|
|
else: |
|
|
st.markdown("Empty request. Please resubmit") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|