Spaces:

Palplatine
/

Project_artefact_memes

Runtime error

File size: 8,476 Bytes

import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from sentence_transformers import SentenceTransformer
from tensorflow.keras.models import model_from_json
import plotly.graph_objects as go
from PIL import Image
import streamlit as st


#####################################################################################################################################
st.set_page_config(layout='wide')

# Sidebar: logo Artefact + main info on text
with st.sidebar:
    col1, col2, col3 = st.columns(3)
    with col2:
        logo_facebook = Image.open('static/logo_facebook.png')
        st.image(logo_facebook)

    # Checkboxes to see some info on our vocabularies
    hateful = st.checkbox('Check to see top hateful words used')

    if hateful:
        # Loading some hateful text data
        df_hate = pd.read_csv('static/data_hate.csv')

        number_chosen_hate = st.number_input('How many top hateful words do you want to see?', value=5)
        df_chosen_hate = df_hate.iloc[:number_chosen_hate, :]

        st.write(f'{number_chosen_hate} most used words in the hateful vocabulary:')
        st.dataframe(df_chosen_hate)

    non_hateful = st.checkbox('Check to see top non-hateful words used')

    if non_hateful:
        # Loading some non-hateful text data
        df_no_hate = pd.read_csv('static/data_no_hate.csv')

        number_chosen = st.number_input('How many top non-hateful words do you want to see?', value=5)
        df_chosen = df_no_hate.iloc[:number_chosen, :]

        st.write(f'{number_chosen} most used words in the hateful vocabulary:')
        st.dataframe(df_chosen)


#####################################################################################################################################
st.title('Facebook: Hateful Memes recognition')
st.write("---")

# Image selection
img_filepath = 'static/images_streamlit'
list_images = sorted([img for img in os.listdir(img_filepath)])

st.subheader('Some examples of hateful and non-hateful memes:')
with st.expander('Want to see some memes?'):

    selected_image = st.select_slider('Select a meme to show it', options = [list_images[i] for i in range(10)], value=(list_images[0]))

    col1, col2, col3 = st.columns(3)

    with col2:
        st.image(f'{img_filepath}/{selected_image}')

st.write("---")
#####################################################################################################################################

# Hateful test
st.subheader('Is a word in our hateful vocabulary or not?')
with st.expander('Hateful? Non-hateful?'):

    word = st.text_input('Write a word to test it', 'like')
    word_lower = word.lower()

    # Need to reload them in case it was not done in the sidebar
    df_hate = pd.read_csv('static/data_hate.csv')
    df_no_hate = pd.read_csv('static/data_no_hate.csv')

    try:
        if word_lower not in df_hate['word'].values:
            st.write(f'"{word}" is not in our hateful vocabulary.')
        else:
            appeared_hate = df_hate[df_hate['word'] == word_lower]['count'].values[0]
            st.write(f'"{word}" is in our hateful vocabulary, it appears {appeared_hate} times.')

        if word_lower not in df_no_hate['word'].values:
            st.write(f'"{word}"is not in our non-hateful vocabulary.')
        else:
            appeared_no_hate = df_no_hate[df_no_hate['word'] == word_lower]['count'].values[0]
            st.write(f'"{word}" is in our non-hateful vocabulary, it appears {appeared_no_hate} times.')

        st.write(f'Ratio hateful vs non-hateful: {round(appeared_hate/appeared_no_hate, 2)}.')

    except:
        st.write(f'"{word}" is not in our hateful and non-hateful vocabulary.')

st.write("---")
#####################################################################################################################################

# Slider to choose how many words we want to see and plot the countplot
st.subheader('Barplot of top selected words:')
with st.expander('Select to choose how many top words you want to see and their count'):

    option = st.selectbox('Which vocabulary to select?', ('Hateful vocabulary', 'Non-hateful vocabulary', 'Both vocabularies'))
    st.write('You selected', option)

    if option == 'Hateful vocabulary':

        df_hate_subset = df_hate[df_hate.iloc[:, 1] >= 20]

        start_word, end_word = st.select_slider(
            'Select a range of top words',
            options=[x for x in range(1, df_hate_subset.shape[0]+1)],
            value=(1, 10))

        df_slider_hate = df_hate_subset.iloc[start_word-1:end_word, :]

        fig, ax = plt.subplots()
        bars = plt.barh(y=df_slider_hate['word'], width=df_slider_hate['count'], color=['darkmagenta', 'darkblue', 'darkgreen', 'darkred', 'darkgrey', 'darkorange'])

        ax.bar_label(bars)
        ax = plt.gca().invert_yaxis()

        st.subheader('Selected words hateful vocabulary:')
        st.pyplot(fig)

    elif option == 'Non-hateful vocabulary':

        df_no_hate_subset = df_no_hate[df_no_hate.iloc[:, 1] >= 30]

        start_word, end_word = st.select_slider(
            'Select a range of top words',
            options=[x for x in range(1, df_no_hate_subset.shape[0]+1)],
            value=(1, 10))

        df_slider_no_hate = df_no_hate_subset.iloc[start_word-1:end_word, :]

        fig, ax = plt.subplots()
        bars = plt.barh(y=df_slider_no_hate['word'], width=df_slider_no_hate['count'], color=['darkmagenta', 'darkblue', 'darkgreen', 'darkred', 'darkgrey', 'darkorange'])

        ax.bar_label(bars)
        ax = plt.gca().invert_yaxis()

        st.subheader('Selected words non-hateful vocabulary:')
        st.pyplot(fig)

    else:

        df_top = pd.read_csv('./static/data_top.csv')

        start_word, end_word = st.select_slider(
            'Select a range of top words',
            options=[x for x in range(1, df_top.shape[0]+1)],
            value=(1, 10))

        df_slider = df_top.iloc[start_word-1:end_word, :]

        fig, ax = plt.subplots()
        bars = plt.barh(y=df_slider['word'], width=df_slider['count'], color=['darkmagenta', 'darkblue', 'darkgreen', 'darkred', 'darkgrey', 'darkorange'])

        ax.bar_label(bars)
        ax = plt.gca().invert_yaxis()

        st.subheader('Selected words (hateful & non-hateful vocabularies):')
        st.pyplot(fig)


st.write("---")
#####################################################################################################################################

# Testing some sentences
st.subheader('Testing some sentences if you dare:')
with st.expander('Input a sentence and check the probability of it being hateful:'):

    # Some user input
    model_nlp = SentenceTransformer('all-mpnet-base-v2')
    sentence = st.text_input('Write a sentence to test it.', "Hopefully I don't write some hateful content.")
    
    # Encoding
    preprocessed_sentence = model_nlp.encode(sentence)
    preprocessed_sentence = preprocessed_sentence.reshape(1, -1)

    # load json and create model
    json_file = open('static/model_nlp/model_nlp.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)

    # load weights into new model
    loaded_model.load_weights("static/model_nlp/model_nlp.h5")

    # Our predictions
    y_pred = loaded_model.predict(preprocessed_sentence)
    percentage = y_pred[0][0] * 100

    # Scoreboard du prêt
    fig1 = go.Figure(go.Indicator(
        mode = "gauge+number+delta",
        value = y_pred[0][0],
        delta = {'reference': 0.50},
        gauge = {'axis': {'range': [None, 1]},
                'steps' : [
                    {'range': [0, 0.5], 'color': "lightgray"},
                    {'range': [0.5, 0.75], 'color': "gray"}],
                'threshold' : {'line': {'color': "red", 'width': 4}, 'thickness': 0.9, 'value': 0.5}},
        domain = {'x': [0, 1], 'y': [0, 1]},
        title = {'text': 'Probability of your sentence being hateful'}))

    st.plotly_chart(fig1)

    if y_pred[0][0] < 0.5:
        st.write(f"Congrats, it's not hateful!!!")
    else:
        st.write(f"Shame on you, it's hateful!!!")

st.write("---")
#####################################################################################################################################
col1, col2, col3, col4, col5 = st.columns(5)
with col5:
    logo_artefact = Image.open('static/logo_artefact.png')
    st.image(logo_artefact)