Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| from datasets import load_dataset | |
| from transformers import pipeline, DistilBertForSequenceClassification, DistilBertTokenizerFast, AutoModelForSequenceClassification, AutoTokenizer, TFAutoModelForSequenceClassification | |
| # Options for models from transformers library | |
| MODEL_OPTS = ['finetuned', 'default', 'bertweet-base-sentiment-analysis', 'twitter-roberta-base', 'distilRoberta-financial-sentiment'] | |
| FINETUNED_OPT = MODEL_OPTS[0] | |
| DEFAULT_OPT = MODEL_OPTS[1] | |
| # Helper function | |
| def map_decision_to_string(example): | |
| return {'decision': decision_to_str[example['decision']]} | |
| def load_abstracts(): | |
| dataset_dict = load_dataset('HUPD/hupd', | |
| name='sample', | |
| data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather", | |
| icpr_label=None, | |
| train_filing_start_date='2016-01-01', | |
| train_filing_end_date='2016-01-31', | |
| val_filing_start_date='2016-01-01', | |
| val_filing_end_date='2016-01-01', | |
| ) | |
| abstracts = dataset_dict['train']['abstract'] | |
| dataset_dict = [] | |
| return abstracts | |
| # returns loaded model and tokenizer, if any | |
| def load_model(opt): | |
| if opt not in MODEL_OPTS: print("Incorrect model selection. Try again!") | |
| model, tokenizer = None, None | |
| # Load the chosen sentiment analysis model from transformers | |
| if opt == FINETUNED_OPT: | |
| tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased') | |
| model = DistilBertForSequenceClassification.from_pretrained('saccharinedreams/finetuned-distilbert-base-uncased-for-hupd') | |
| elif opt == DEFAULT_OPT: | |
| return model, tokenizer | |
| elif opt == 'bertweet-base-sentiment-analysis': | |
| tokenizer = AutoTokenizer.from_pretrained("finiteautomata/bertweet-base-sentiment-analysis") | |
| model = AutoModelForSequenceClassification.from_pretrained("finiteautomata/bertweet-base-sentiment-analysis") | |
| elif opt == 'twitter-roberta-base-sentiment': | |
| tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment") | |
| model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment") | |
| elif opt == 'distilRoberta-financial-sentiment': | |
| tokenizer = AutoTokenizer.from_pretrained("mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis") | |
| model = AutoModelForSequenceClassification.from_pretrained("mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis") | |
| elif not model and not tokenizer: | |
| print("Model not loaded correctly. Try again!") | |
| return model, tokenizer | |
| def sentiment_analysis(model, tokenizer): | |
| if model and tokenizer: | |
| return pipeline('text-classification', model=model, tokenizer=tokenizer) | |
| else: return pipeline('text-classification') | |
| # Title the Streamlit app 'Finetuned Harvard USPTO Patent Dataset (using DistilBert-Base-Uncased)' | |
| st.title('Finetuned Sentiment Analysis for US Patents') | |
| st.markdown('Link to the app - [sentiment-analysis-app](https://huggingface.co/spaces/saccharinedreams/sentiment-analysis-app)') | |
| st.markdown('Link to the model - [model repo](https://huggingface.co/saccharinedreams/finetuned-distilbert-base-uncased-for-hupd') | |
| st.markdown('This model was finetuned on the Harvard USPTO Patent Dataset and uses Distilbert-Base-Uncased.') | |
| abstracts = load_abstracts() | |
| print(len(abstracts)) | |
| print(abstracts[0]) | |
| dropdown_abstracts = st.selectbox('Select one of the following abstracts from the HUPD dataset:', abstracts, index=abstracts.index(abstracts[0])) | |
| model, tokenizer = load_model('finetuned') | |
| # Take in user input | |
| #user_text = st.text_input('Input text to perform sentiment analysis on here.', 'I love AI!') | |
| # The user can interact with a dropdown menu to choose a sentiment analysis model. | |
| #dropdown_value = st.selectbox('Select one of the following sentiment analysis models', MODEL_OPTS, index=MODEL_OPTS.index(DEFAULT_OPT)) | |
| #model, tokenizer = load_model(dropdown_value) | |
| # Perform sentiment analysis on the user's input | |
| result = sentiment_analysis(model, tokenizer)(dropdown_abstracts) | |
| # Display the sentiment analysis results | |
| st.markdown('Labels 0, 1: Not accepted, Accepted') | |
| st.write('Sentiment:', result[0]['label'], '; Score:', result[0]['score']) | |