gefedya
/

transformer_nlp_ops

Model card Files Files and versions

xet

Community

gefedya commited on Apr 17, 2023

Commit

13c67de

1 Parent(s): 4fbc42a

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -38

app.py CHANGED Viewed

@@ -1,53 +1,53 @@
 import streamlit as st
 from datasets import load_dataset
-import pandas as pd
-import numpy as np
-from transformers import pipeline
 from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoModel, Trainer, TrainingArguments, LineByLineTextDataset
 import json
-st.markdown("### Here is a sentiment model trained on a slice of a twitter dataset")
-st.markdown("<img width=200px src='https://rozetked.me/images/uploads/dwoilp3BVjlE.jpg'>", unsafe_allow_html=True)
-# ^-- можно показывать пользователю текст, картинки, ограниченное подмножество html - всё как в jupyter
-text = st.text_area("Try typing something here! \n You will see how much better our model is compared to the base model. No kidding")
-# ^-- показать текстовое поле. В поле text лежит строка, которая находится там в данный момент
-### Loading and tokenizing data
-data = load_dataset("carblacac/twitter-sentiment-analysis")
-tokenizer = AutoTokenizer.from_pretrained("siebert/sentiment-roberta-large-english")
-dataset = data.map(lambda xs: tokenizer(xs["text"], truncation=True, padding='max_length'))
-dataset = dataset.rename_column("feeling", "labels")
-### Importing existing model
-model = AutoModelForSequenceClassification.from_pretrained("siebert/sentiment-roberta-large-english", num_labels=2)
-# model.to('cpu');
-### Training model
-trainer = Trainer(
-    model=model, train_dataset=dataset["train"].shuffle().select(range(10000)),
-    eval_dataset = dataset['test'].select(range(5000)),
-    args=TrainingArguments(
-        output_dir="./my_saved_model", overwrite_output_dir=True,
-        num_train_epochs=1, per_device_train_batch_size=4,
-        save_steps=10_000, save_total_limit=2),
-)
-trainer.train()
-### Using our new BEAST model to predict the sentiment of uers' entries
-# TODO: add predictions
-model()
-#classifier = pipeline('sentiment-analysis', model="distilbert-base-uncased-finetuned-sst-2-english")
-#raw_predictions = classifier(text)
 # тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost
-st.markdown(f"{raw_predictions}")
 # выводим результаты модели в текстовое поле, на потеху пользователю

 import streamlit as st
 from datasets import load_dataset
 from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoModel, Trainer, TrainingArguments, LineByLineTextDataset
 import json
+@st.cache()
+def get_model():
+    model = AutoModelForSequenceClassification.from_pretrained("siebert/sentiment-roberta-large-english", num_labels=2)
+    model.load_state_dict(torch.load('model'))
+    return model
+@st.cache()
+def get_tokenizer():
+    tokenizer = AutoTokenizer.from_pretrained("siebert/sentiment-roberta-large-english")
+    return tokenizer
+def make_prediction():
+    model = get_model()
+    tokenizer = tokenizer()
+st.header("Sentiment analysis on twitter datasets")
+st.markdown("Here is a sentiment model further trained on a slice of a twitter dataset")
+st.markdown("""
+<img width=700px src='https://imagez.tmz.com/image/73/4by3/2020/10/05/735aaee2f6b9464ca220e62ef797dab0_md.jpg'>
+""", unsafe_allow_html=True)
+text = st.text_area("Try typing something here! \n You will see how much better our model is compared to the base model! No kidding")
+# ^-- показать текстовое поле. В поле text лежит строка, которая находится там в данный момент
+### Loading and tokenizing data
+# data = load_dataset("carblacac/twitter-sentiment-analysis")
+# tokenizer = AutoTokenizer.from_pretrained("siebert/sentiment-roberta-large-english")
+# dataset = data.map(lambda xs: tokenizer(xs["text"], truncation=True, padding='max_length'))
+# dataset = dataset.rename_column("feeling", "labels")
+with st.form(key='input_form'):
+    to_analyze = st.text_input(label='Input text to be analyzed')
+    button = st.form_submit_button(label='Classify')
+if button:
+    if to_analyze:
+        make_prediction(to_analyze)
+    else:
+        st.markdown("Empty request. Please resubmit")
+# classifier = pipeline('sentiment-analysis', model="distilbert-base-uncased-finetuned-sst-2-english")
+# raw_predictions = classifier(text)
 # тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost
+# st.markdown(f"{raw_predictions}")
 # выводим результаты модели в текстовое поле, на потеху пользователю