README / pages /data_analysis.py
Andrii Demydenko
update
9716ffb
raw
history blame contribute delete
862 Bytes
import streamlit as st
from matplotlib import pyplot as plt
from wordcloud import WordCloud, STOPWORDS
import numpy as np
from app import AnalysisData
df = AnalysisData.ds.to_pandas(batched=False)
disaster_types = df['disaster_type'].unique()
text_data = {
disaster: ' '.join(df[df['disaster_type'] == disaster]['tweet_text'])
for disaster in disaster_types
}
for disaster in disaster_types:
st.subheader(disaster + ' ' + 'Word Cloud')
wordcloud = WordCloud(width=800, height=400).generate(text_data[disaster])
fig, ax = plt.subplots(figsize=(10, 5))
ax.imshow(wordcloud, interpolation='bilinear')
ax.axis('off')
st.pyplot(fig)
# DataSet links
st.subheader("DataSet links")
st.markdown("- [Humaid Dataset](https://crisisnlp.qcri.org/humaid_dataset?fbclid=IwAR2rpSdcVhcXvQagxAG5VA2dvwAUOJOCVwTKxqtDiz7soIhVMUtp_N0BfSo)")