Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import numpy as np | |
| import os | |
| import matplotlib.pyplot as plt | |
| from sentence_transformers import SentenceTransformer | |
| from tensorflow.keras.models import model_from_json | |
| import plotly.graph_objects as go | |
| from PIL import Image | |
| import streamlit as st | |
| ##################################################################################################################################### | |
| st.set_page_config(layout='wide') | |
| # Sidebar: logo Artefact + main info on text | |
| with st.sidebar: | |
| col1, col2, col3 = st.columns(3) | |
| with col2: | |
| logo_facebook = Image.open('static/logo_facebook.png') | |
| st.image(logo_facebook) | |
| # Checkboxes to see some info on our vocabularies | |
| hateful = st.checkbox('Check to see top hateful words used') | |
| if hateful: | |
| # Loading some hateful text data | |
| df_hate = pd.read_csv('static/data_hate.csv') | |
| number_chosen_hate = st.number_input('How many top hateful words do you want to see?', value=5) | |
| df_chosen_hate = df_hate.iloc[:number_chosen_hate, :] | |
| st.write(f'{number_chosen_hate} most used words in the hateful vocabulary:') | |
| st.dataframe(df_chosen_hate) | |
| non_hateful = st.checkbox('Check to see top non-hateful words used') | |
| if non_hateful: | |
| # Loading some non-hateful text data | |
| df_no_hate = pd.read_csv('static/data_no_hate.csv') | |
| number_chosen = st.number_input('How many top non-hateful words do you want to see?', value=5) | |
| df_chosen = df_no_hate.iloc[:number_chosen, :] | |
| st.write(f'{number_chosen} most used words in the hateful vocabulary:') | |
| st.dataframe(df_chosen) | |
| ##################################################################################################################################### | |
| st.title('Facebook: Hateful Memes recognition') | |
| st.write("---") | |
| # Image selection | |
| img_filepath = 'static/images_streamlit' | |
| list_images = sorted([img for img in os.listdir(img_filepath)]) | |
| st.subheader('Some examples of hateful and non-hateful memes:') | |
| with st.expander('Want to see some memes?'): | |
| selected_image = st.select_slider('Select a meme to show it', options = [list_images[i] for i in range(10)], value=(list_images[0])) | |
| col1, col2, col3 = st.columns(3) | |
| with col2: | |
| st.image(f'{img_filepath}/{selected_image}') | |
| st.write("---") | |
| ##################################################################################################################################### | |
| # Hateful test | |
| st.subheader('Is a word in our hateful vocabulary or not?') | |
| with st.expander('Hateful? Non-hateful?'): | |
| word = st.text_input('Write a word to test it', 'like') | |
| word_lower = word.lower() | |
| # Need to reload them in case it was not done in the sidebar | |
| df_hate = pd.read_csv('static/data_hate.csv') | |
| df_no_hate = pd.read_csv('static/data_no_hate.csv') | |
| try: | |
| if word_lower not in df_hate['word'].values: | |
| st.write(f'"{word}" is not in our hateful vocabulary.') | |
| else: | |
| appeared_hate = df_hate[df_hate['word'] == word_lower]['count'].values[0] | |
| st.write(f'"{word}" is in our hateful vocabulary, it appears {appeared_hate} times.') | |
| if word_lower not in df_no_hate['word'].values: | |
| st.write(f'"{word}"is not in our non-hateful vocabulary.') | |
| else: | |
| appeared_no_hate = df_no_hate[df_no_hate['word'] == word_lower]['count'].values[0] | |
| st.write(f'"{word}" is in our non-hateful vocabulary, it appears {appeared_no_hate} times.') | |
| st.write(f'Ratio hateful vs non-hateful: {round(appeared_hate/appeared_no_hate, 2)}.') | |
| except: | |
| st.write(f'"{word}" is not in our hateful and non-hateful vocabulary.') | |
| st.write("---") | |
| ##################################################################################################################################### | |
| # Slider to choose how many words we want to see and plot the countplot | |
| st.subheader('Barplot of top selected words:') | |
| with st.expander('Select to choose how many top words you want to see and their count'): | |
| option = st.selectbox('Which vocabulary to select?', ('Hateful vocabulary', 'Non-hateful vocabulary', 'Both vocabularies')) | |
| st.write('You selected', option) | |
| if option == 'Hateful vocabulary': | |
| df_hate_subset = df_hate[df_hate.iloc[:, 1] >= 20] | |
| start_word, end_word = st.select_slider( | |
| 'Select a range of top words', | |
| options=[x for x in range(1, df_hate_subset.shape[0]+1)], | |
| value=(1, 10)) | |
| df_slider_hate = df_hate_subset.iloc[start_word-1:end_word, :] | |
| fig, ax = plt.subplots() | |
| bars = plt.barh(y=df_slider_hate['word'], width=df_slider_hate['count'], color=['darkmagenta', 'darkblue', 'darkgreen', 'darkred', 'darkgrey', 'darkorange']) | |
| ax.bar_label(bars) | |
| ax = plt.gca().invert_yaxis() | |
| st.subheader('Selected words hateful vocabulary:') | |
| st.pyplot(fig) | |
| elif option == 'Non-hateful vocabulary': | |
| df_no_hate_subset = df_no_hate[df_no_hate.iloc[:, 1] >= 30] | |
| start_word, end_word = st.select_slider( | |
| 'Select a range of top words', | |
| options=[x for x in range(1, df_no_hate_subset.shape[0]+1)], | |
| value=(1, 10)) | |
| df_slider_no_hate = df_no_hate_subset.iloc[start_word-1:end_word, :] | |
| fig, ax = plt.subplots() | |
| bars = plt.barh(y=df_slider_no_hate['word'], width=df_slider_no_hate['count'], color=['darkmagenta', 'darkblue', 'darkgreen', 'darkred', 'darkgrey', 'darkorange']) | |
| ax.bar_label(bars) | |
| ax = plt.gca().invert_yaxis() | |
| st.subheader('Selected words non-hateful vocabulary:') | |
| st.pyplot(fig) | |
| else: | |
| df_top = pd.read_csv('./static/data_top.csv') | |
| start_word, end_word = st.select_slider( | |
| 'Select a range of top words', | |
| options=[x for x in range(1, df_top.shape[0]+1)], | |
| value=(1, 10)) | |
| df_slider = df_top.iloc[start_word-1:end_word, :] | |
| fig, ax = plt.subplots() | |
| bars = plt.barh(y=df_slider['word'], width=df_slider['count'], color=['darkmagenta', 'darkblue', 'darkgreen', 'darkred', 'darkgrey', 'darkorange']) | |
| ax.bar_label(bars) | |
| ax = plt.gca().invert_yaxis() | |
| st.subheader('Selected words (hateful & non-hateful vocabularies):') | |
| st.pyplot(fig) | |
| st.write("---") | |
| ##################################################################################################################################### | |
| # Testing some sentences | |
| st.subheader('Testing some sentences if you dare:') | |
| with st.expander('Input a sentence and check the probability of it being hateful:'): | |
| # Some user input | |
| model_nlp = SentenceTransformer('all-mpnet-base-v2') | |
| sentence = st.text_input('Write a sentence to test it.', "Hopefully I don't write some hateful content.") | |
| # Encoding | |
| preprocessed_sentence = model_nlp.encode(sentence) | |
| preprocessed_sentence = preprocessed_sentence.reshape(1, -1) | |
| # load json and create model | |
| json_file = open('static/model_nlp/model_nlp.json', 'r') | |
| loaded_model_json = json_file.read() | |
| json_file.close() | |
| loaded_model = model_from_json(loaded_model_json) | |
| # load weights into new model | |
| loaded_model.load_weights("static/model_nlp/model_nlp.h5") | |
| # Our predictions | |
| y_pred = loaded_model.predict(preprocessed_sentence) | |
| percentage = y_pred[0][0] * 100 | |
| # Scoreboard du prêt | |
| fig1 = go.Figure(go.Indicator( | |
| mode = "gauge+number+delta", | |
| value = y_pred[0][0], | |
| delta = {'reference': 0.50}, | |
| gauge = {'axis': {'range': [None, 1]}, | |
| 'steps' : [ | |
| {'range': [0, 0.5], 'color': "lightgray"}, | |
| {'range': [0.5, 0.75], 'color': "gray"}], | |
| 'threshold' : {'line': {'color': "red", 'width': 4}, 'thickness': 0.9, 'value': 0.5}}, | |
| domain = {'x': [0, 1], 'y': [0, 1]}, | |
| title = {'text': 'Probability of your sentence being hateful'})) | |
| st.plotly_chart(fig1) | |
| if y_pred[0][0] < 0.5: | |
| st.write(f"Congrats, it's not hateful!!!") | |
| else: | |
| st.write(f"Shame on you, it's hateful!!!") | |
| st.write("---") | |
| ##################################################################################################################################### | |
| col1, col2, col3, col4, col5 = st.columns(5) | |
| with col5: | |
| logo_artefact = Image.open('static/logo_artefact.png') | |
| st.image(logo_artefact) |