File size: 8,476 Bytes
429485e
 
aa4bd4b
429485e
 
 
e0a7610
429485e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e0a7610
faca89f
 
 
 
 
 
 
 
 
 
 
 
 
e0a7610
429485e
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from sentence_transformers import SentenceTransformer
from tensorflow.keras.models import model_from_json
import plotly.graph_objects as go
from PIL import Image
import streamlit as st


#####################################################################################################################################
st.set_page_config(layout='wide')

# Sidebar: logo Artefact + main info on text
with st.sidebar:
    col1, col2, col3 = st.columns(3)
    with col2:
        logo_facebook = Image.open('static/logo_facebook.png')
        st.image(logo_facebook)

    # Checkboxes to see some info on our vocabularies
    hateful = st.checkbox('Check to see top hateful words used')

    if hateful:
        # Loading some hateful text data
        df_hate = pd.read_csv('static/data_hate.csv')

        number_chosen_hate = st.number_input('How many top hateful words do you want to see?', value=5)
        df_chosen_hate = df_hate.iloc[:number_chosen_hate, :]

        st.write(f'{number_chosen_hate} most used words in the hateful vocabulary:')
        st.dataframe(df_chosen_hate)

    non_hateful = st.checkbox('Check to see top non-hateful words used')

    if non_hateful:
        # Loading some non-hateful text data
        df_no_hate = pd.read_csv('static/data_no_hate.csv')

        number_chosen = st.number_input('How many top non-hateful words do you want to see?', value=5)
        df_chosen = df_no_hate.iloc[:number_chosen, :]

        st.write(f'{number_chosen} most used words in the hateful vocabulary:')
        st.dataframe(df_chosen)


#####################################################################################################################################
st.title('Facebook: Hateful Memes recognition')
st.write("---")

# Image selection
img_filepath = 'static/images_streamlit'
list_images = sorted([img for img in os.listdir(img_filepath)])

st.subheader('Some examples of hateful and non-hateful memes:')
with st.expander('Want to see some memes?'):

    selected_image = st.select_slider('Select a meme to show it', options = [list_images[i] for i in range(10)], value=(list_images[0]))

    col1, col2, col3 = st.columns(3)

    with col2:
        st.image(f'{img_filepath}/{selected_image}')

st.write("---")
#####################################################################################################################################

# Hateful test
st.subheader('Is a word in our hateful vocabulary or not?')
with st.expander('Hateful? Non-hateful?'):

    word = st.text_input('Write a word to test it', 'like')
    word_lower = word.lower()

    # Need to reload them in case it was not done in the sidebar
    df_hate = pd.read_csv('static/data_hate.csv')
    df_no_hate = pd.read_csv('static/data_no_hate.csv')

    try:
        if word_lower not in df_hate['word'].values:
            st.write(f'"{word}" is not in our hateful vocabulary.')
        else:
            appeared_hate = df_hate[df_hate['word'] == word_lower]['count'].values[0]
            st.write(f'"{word}" is in our hateful vocabulary, it appears {appeared_hate} times.')

        if word_lower not in df_no_hate['word'].values:
            st.write(f'"{word}"is not in our non-hateful vocabulary.')
        else:
            appeared_no_hate = df_no_hate[df_no_hate['word'] == word_lower]['count'].values[0]
            st.write(f'"{word}" is in our non-hateful vocabulary, it appears {appeared_no_hate} times.')

        st.write(f'Ratio hateful vs non-hateful: {round(appeared_hate/appeared_no_hate, 2)}.')

    except:
        st.write(f'"{word}" is not in our hateful and non-hateful vocabulary.')

st.write("---")
#####################################################################################################################################

# Slider to choose how many words we want to see and plot the countplot
st.subheader('Barplot of top selected words:')
with st.expander('Select to choose how many top words you want to see and their count'):

    option = st.selectbox('Which vocabulary to select?', ('Hateful vocabulary', 'Non-hateful vocabulary', 'Both vocabularies'))
    st.write('You selected', option)

    if option == 'Hateful vocabulary':

        df_hate_subset = df_hate[df_hate.iloc[:, 1] >= 20]

        start_word, end_word = st.select_slider(
            'Select a range of top words',
            options=[x for x in range(1, df_hate_subset.shape[0]+1)],
            value=(1, 10))

        df_slider_hate = df_hate_subset.iloc[start_word-1:end_word, :]

        fig, ax = plt.subplots()
        bars = plt.barh(y=df_slider_hate['word'], width=df_slider_hate['count'], color=['darkmagenta', 'darkblue', 'darkgreen', 'darkred', 'darkgrey', 'darkorange'])

        ax.bar_label(bars)
        ax = plt.gca().invert_yaxis()

        st.subheader('Selected words hateful vocabulary:')
        st.pyplot(fig)

    elif option == 'Non-hateful vocabulary':

        df_no_hate_subset = df_no_hate[df_no_hate.iloc[:, 1] >= 30]

        start_word, end_word = st.select_slider(
            'Select a range of top words',
            options=[x for x in range(1, df_no_hate_subset.shape[0]+1)],
            value=(1, 10))

        df_slider_no_hate = df_no_hate_subset.iloc[start_word-1:end_word, :]

        fig, ax = plt.subplots()
        bars = plt.barh(y=df_slider_no_hate['word'], width=df_slider_no_hate['count'], color=['darkmagenta', 'darkblue', 'darkgreen', 'darkred', 'darkgrey', 'darkorange'])

        ax.bar_label(bars)
        ax = plt.gca().invert_yaxis()

        st.subheader('Selected words non-hateful vocabulary:')
        st.pyplot(fig)

    else:

        df_top = pd.read_csv('./static/data_top.csv')

        start_word, end_word = st.select_slider(
            'Select a range of top words',
            options=[x for x in range(1, df_top.shape[0]+1)],
            value=(1, 10))

        df_slider = df_top.iloc[start_word-1:end_word, :]

        fig, ax = plt.subplots()
        bars = plt.barh(y=df_slider['word'], width=df_slider['count'], color=['darkmagenta', 'darkblue', 'darkgreen', 'darkred', 'darkgrey', 'darkorange'])

        ax.bar_label(bars)
        ax = plt.gca().invert_yaxis()

        st.subheader('Selected words (hateful & non-hateful vocabularies):')
        st.pyplot(fig)


st.write("---")
#####################################################################################################################################

# Testing some sentences
st.subheader('Testing some sentences if you dare:')
with st.expander('Input a sentence and check the probability of it being hateful:'):

    # Some user input
    model_nlp = SentenceTransformer('all-mpnet-base-v2')
    sentence = st.text_input('Write a sentence to test it.', "Hopefully I don't write some hateful content.")
    
    # Encoding
    preprocessed_sentence = model_nlp.encode(sentence)
    preprocessed_sentence = preprocessed_sentence.reshape(1, -1)

    # load json and create model
    json_file = open('static/model_nlp/model_nlp.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)

    # load weights into new model
    loaded_model.load_weights("static/model_nlp/model_nlp.h5")

    # Our predictions
    y_pred = loaded_model.predict(preprocessed_sentence)
    percentage = y_pred[0][0] * 100

    # Scoreboard du prêt
    fig1 = go.Figure(go.Indicator(
        mode = "gauge+number+delta",
        value = y_pred[0][0],
        delta = {'reference': 0.50},
        gauge = {'axis': {'range': [None, 1]},
                'steps' : [
                    {'range': [0, 0.5], 'color': "lightgray"},
                    {'range': [0.5, 0.75], 'color': "gray"}],
                'threshold' : {'line': {'color': "red", 'width': 4}, 'thickness': 0.9, 'value': 0.5}},
        domain = {'x': [0, 1], 'y': [0, 1]},
        title = {'text': 'Probability of your sentence being hateful'}))

    st.plotly_chart(fig1)

    if y_pred[0][0] < 0.5:
        st.write(f"Congrats, it's not hateful!!!")
    else:
        st.write(f"Shame on you, it's hateful!!!")

st.write("---")
#####################################################################################################################################
col1, col2, col3, col4, col5 = st.columns(5)
with col5:
    logo_artefact = Image.open('static/logo_artefact.png')
    st.image(logo_artefact)