Andrii Demydenko commited on
Commit
9716ffb
Β·
1 Parent(s): 6e4b95f
Files changed (4) hide show
  1. .gitignore +1 -0
  2. app.py +12 -1
  3. pages/data_analysis.py +14 -9
  4. pages/run_models.py +2 -2
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__
app.py CHANGED
@@ -1,5 +1,15 @@
1
  import streamlit as st
2
  from st_pages import Page, show_pages, add_page_title
 
 
 
 
 
 
 
 
 
 
3
 
4
 
5
  def main():
@@ -8,7 +18,7 @@ def main():
8
  page_title="NLP - Project",
9
  page_icon="πŸ€–",
10
  )
11
- add_page_title()
12
  show_pages(
13
  [
14
  Page("pages/introduction.py", "Intro", "😊"),
@@ -19,3 +29,4 @@ def main():
19
 
20
  if __name__ == "__main__":
21
  main()
 
 
1
  import streamlit as st
2
  from st_pages import Page, show_pages, add_page_title
3
+ from transformers import pipeline
4
+ from datasets import load_dataset
5
+
6
+
7
+ class TextClassifier:
8
+ bertPipe = pipeline( model="nlp-pw/test-model-3")
9
+
10
+
11
+ class AnalysisData:
12
+ ds = load_dataset("nlp-pw/Disaster-Tweets-Normalized", revision="main", split='train')
13
 
14
 
15
  def main():
 
18
  page_title="NLP - Project",
19
  page_icon="πŸ€–",
20
  )
21
+ add_page_title("NLP - Project")
22
  show_pages(
23
  [
24
  Page("pages/introduction.py", "Intro", "😊"),
 
29
 
30
  if __name__ == "__main__":
31
  main()
32
+
pages/data_analysis.py CHANGED
@@ -2,22 +2,27 @@ import streamlit as st
2
 
3
  from matplotlib import pyplot as plt
4
  from wordcloud import WordCloud, STOPWORDS
5
- from datasets import load_dataset
6
  import numpy as np
7
 
 
8
 
9
- ds = load_dataset("rajteer/Natural_disaster_tweets", revision="main", split='train')
10
 
11
- text_data = ' '.join([row['tweet_text'] for row in ds])
 
12
 
 
 
 
 
13
 
14
- wordcloud = WordCloud(width=800, height=400).generate(text_data)
 
 
 
 
 
15
 
16
- fig, ax = plt.subplots(figsize=(10, 5))
17
- ax.imshow(wordcloud, interpolation='bilinear')
18
- ax.axis('off')
19
-
20
- st.pyplot(fig)
21
 
22
 
23
  # DataSet links
 
2
 
3
  from matplotlib import pyplot as plt
4
  from wordcloud import WordCloud, STOPWORDS
 
5
  import numpy as np
6
 
7
+ from app import AnalysisData
8
 
 
9
 
10
+ df = AnalysisData.ds.to_pandas(batched=False)
11
+ disaster_types = df['disaster_type'].unique()
12
 
13
+ text_data = {
14
+ disaster: ' '.join(df[df['disaster_type'] == disaster]['tweet_text'])
15
+ for disaster in disaster_types
16
+ }
17
 
18
+ for disaster in disaster_types:
19
+ st.subheader(disaster + ' ' + 'Word Cloud')
20
+ wordcloud = WordCloud(width=800, height=400).generate(text_data[disaster])
21
+ fig, ax = plt.subplots(figsize=(10, 5))
22
+ ax.imshow(wordcloud, interpolation='bilinear')
23
+ ax.axis('off')
24
 
25
+ st.pyplot(fig)
 
 
 
 
26
 
27
 
28
  # DataSet links
pages/run_models.py CHANGED
@@ -1,9 +1,9 @@
1
  import streamlit as st
2
  from transformers import pipeline
 
3
 
4
- pipe = pipeline( model="nlp-pw/test-model-2")
5
  text = st.text_area("Enter your text here πŸ˜ƒ")
6
 
7
  if text:
8
- out = pipe(text)
9
  st.json(out)
 
1
  import streamlit as st
2
  from transformers import pipeline
3
+ from app import TextClassifier
4
 
 
5
  text = st.text_area("Enter your text here πŸ˜ƒ")
6
 
7
  if text:
8
+ out = TextClassifier.bertPipe(text)
9
  st.json(out)