stanlys96 commited on
Commit
682fd81
·
verified ·
1 Parent(s): d6f02ad

Upload 6 files

Browse files
Files changed (2) hide show
  1. eda.py +27 -20
  2. prediction.py +39 -0
eda.py CHANGED
@@ -2,10 +2,8 @@ import streamlit as st
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
  import seaborn as sns
5
- import nltk
6
- nltk.download('stopwords')
7
- from nltk.corpus import stopwords
8
- from wordcloud import WordCloud
9
 
10
  def app():
11
  df_original = pd.read_csv("data.csv", delimiter=";")
@@ -15,17 +13,17 @@ def app():
15
  temp_a = df.copy()
16
  temp_a['text_length'] = temp_a['text'].apply(len)
17
  st.header('Exploratory Data Analysis', divider='rainbow')
18
- eda_list = ["Text Length Distribution", "Sentiment Distribution", "Word Clouds", "Boxplot Distributions"]
19
  val = st.sidebar.radio("Choose plot to show", eda_list)
20
- stop_words = set(stopwords.words('english'))
21
- def plot_wordcloud(sentiment):
22
- text = ' '.join(df[df['feeling'] == sentiment]['text'])
23
- wordcloud = WordCloud(stopwords=stop_words, background_color='white').generate(text)
24
- plt.figure(figsize=(10, 6))
25
- plt.imshow(wordcloud, interpolation='bilinear')
26
- plt.axis('off')
27
- plt.title(f"Word Cloud for {sentiment} Sentiment")
28
- st.pyplot(plt)
29
  if val == "Text Length Distribution":
30
  # Plot distribution
31
  st.header('Text Length Distribution')
@@ -55,9 +53,18 @@ def app():
55
  st.pyplot(plt)
56
  st.write("Insight: surprise sentiment has the lowest value of around 900 data")
57
  elif val == "Word Clouds":
58
- plot_wordcloud('joy')
59
- plot_wordcloud('sadness')
60
- plot_wordcloud('anger')
61
- plot_wordcloud('love')
62
- plot_wordcloud('surprise')
63
- plot_wordcloud('fear')
 
 
 
 
 
 
 
 
 
 
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
  import seaborn as sns
5
+ # from nltk.corpus import stopwords
6
+ # from wordcloud import WordCloud
 
 
7
 
8
  def app():
9
  df_original = pd.read_csv("data.csv", delimiter=";")
 
13
  temp_a = df.copy()
14
  temp_a['text_length'] = temp_a['text'].apply(len)
15
  st.header('Exploratory Data Analysis', divider='rainbow')
16
+ eda_list = ["Text Length Distribution", "Sentiment Distribution", "Word Clouds", "Stopwords Boxplot Distributions"]
17
  val = st.sidebar.radio("Choose plot to show", eda_list)
18
+ # stop_words = set(stopwords.words('english'))
19
+ # def plot_wordcloud(sentiment):
20
+ # text = ' '.join(df[df['feeling'] == sentiment]['text'])
21
+ # wordcloud = WordCloud(stopwords=stop_words, background_color='white').generate(text)
22
+ # plt.figure(figsize=(10, 6))
23
+ # plt.imshow(wordcloud, interpolation='bilinear')
24
+ # plt.axis('off')
25
+ # plt.title(f"Word Cloud for {sentiment} Sentiment")
26
+ # st.pyplot(plt)
27
  if val == "Text Length Distribution":
28
  # Plot distribution
29
  st.header('Text Length Distribution')
 
53
  st.pyplot(plt)
54
  st.write("Insight: surprise sentiment has the lowest value of around 900 data")
55
  elif val == "Word Clouds":
56
+ # plot_wordcloud('joy')
57
+ # plot_wordcloud('sadness')
58
+ # plot_wordcloud('anger')
59
+ # plot_wordcloud('love')
60
+ # plot_wordcloud('surprise')
61
+ # plot_wordcloud('fear')
62
+ pass
63
+ elif val == "Stopwords Boxplot Distributions":
64
+ temp_b = df.copy()
65
+ temp_b['stopword_count'] = temp_b['text'].apply(lambda x: sum(1 for word in x.split() if word.lower() in stop_words))
66
+
67
+ # Plot stopword count distribution by sentiment
68
+ sns.boxplot(data=temp_b, x='feeling', y='stopword_count')
69
+ plt.title('Stopword Count by Sentiment')
70
+ st.pyplot(plt)
prediction.py CHANGED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import tensorflow as tf
5
+ import tensorflow_hub as tf_hub
6
+
7
+ number_to_feeling = {
8
+ '0': 'sadness',
9
+ '1': 'anger',
10
+ '2': 'love',
11
+ '3': 'surprise',
12
+ '4': 'fear',
13
+ '5': 'joy'
14
+ }
15
+
16
+ def get_feeling(number):
17
+ # Convert the number to string to match the keys in the dictionary
18
+ feeling = number_to_feeling.get(str(number), "Unknown feeling")
19
+ return feeling
20
+
21
+ def app():
22
+ st.header('Prediction', divider='rainbow')
23
+
24
+ user_input = st.text_input("Enter your text here:")
25
+ the_model = tf.keras.models.load_model('model.keras', custom_objects={'KerasLayer': tf_hub.KerasLayer})
26
+ if st.button('Predict', type="secondary"):
27
+ data = {
28
+ "text_processed": [
29
+ user_input
30
+ ]
31
+ }
32
+ df = pd.DataFrame(data)
33
+ predictions = the_model.predict(df)
34
+ predicted_class = np.argmax(predictions, axis=1)
35
+ the_sentiment = predicted_class[0]
36
+
37
+ st.write(f"We have predicted that the sentiment of this text is {get_feeling(the_sentiment)}")
38
+ else:
39
+ st.write("Click the button to predict!")