Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import praw | |
| import pandas as pd | |
| import datetime as dt | |
| from wordcloud import WordCloud, STOPWORDS | |
| reddit = praw.Reddit(client_id='w0cDom4nIf5druip4y9zSw', \ | |
| client_secret='mtCul8hEucwNky7hLwgkewlLPzH0sg', \ | |
| user_agent='Profile extractor', \ | |
| username='CarelessSwordfish541', \ | |
| password='Testing@2022') | |
| st.title('Just Reddit as it is ๐') | |
| st.write('This is a simple web app to extract data from Reddit and analyze it.') | |
| DATA_URL = 'subreddit_data_v1.csv' | |
| def load_data(): | |
| data = pd.read_csv(DATA_URL) | |
| lowercase = lambda x: str(x).lower() | |
| data.rename(lowercase, axis='columns', inplace=True) | |
| return data | |
| data_load_state = st.text('Loading data...') | |
| data = load_data() | |
| data_load_state.text("Done! (using st.cache)") | |
| if st.checkbox('Show raw data'): | |
| st.subheader('Raw data') | |
| st.write(data) | |
| subreddit = st.selectbox('Select a subreddit', data['subreddit'].unique()) | |
| st.subheader('Wordcloud of the most common words in the subreddit') | |
| comment_words = '' | |
| stopwords = set(STOPWORDS) | |
| # iterate through the csv file | |
| for val in data[data['subreddit'] == subreddit]['title']: | |
| # typecaste each val to string | |
| val = str(val) | |
| # split the value | |
| tokens = val.split() | |
| # Converts each token into lowercase | |
| for i in range(len(tokens)): | |
| tokens[i] = tokens[i].lower() | |
| comment_words += " ".join(tokens)+" " | |
| wordcloud = WordCloud(width = 800, height = 800, | |
| background_color ='white', | |
| stopwords = stopwords, | |
| min_font_size = 10).generate(comment_words) | |
| # plot the WordCloud image | |
| plt.figure(figsize = (8, 8), facecolor = None) | |
| plt.imshow(wordcloud) | |
| plt.axis("off") | |
| plt.tight_layout(pad = 0) | |
| st.set_option('deprecation.showPyplotGlobalUse', False) | |
| st.pyplot() | |
| #Based on the subreddit selected , show the statistics of the subreddit | |
| st.subheader('Statistics of the subreddit') | |
| st.write(data[data['subreddit'] == subreddit].describe()) | |
| #Based on the subreddit selected display the number of posts per day | |
| st.subheader('Number of posts per day') | |
| st.write(data[data['subreddit'] == subreddit].groupby('created')['title'].count()) | |
| #Based on the subreddit selected display the number of comments per day | |
| st.subheader('Number of comments per day') | |
| st.write(data[data['subreddit'] == subreddit].groupby('created')['num_comments'].sum()) | |
| #display a bar chart of the score of the posts | |
| st.subheader('Score of the posts') | |
| st.bar_chart(data[data['subreddit'] == subreddit]['score']) | |
| # st.subheader('Number of pickups by hour') | |
| # hist_values = np.histogram(data[DATE_COLUMN].dt.hour, bins=24, range=(0,24))[0] | |
| # st.bar_chart(hist_values) | |
| # # Some number in the range 0-23 | |
| # hour_to_filter = st.slider('hour', 0, 23, 17) | |
| # filtered_data = data[data[DATE_COLUMN].dt.hour == hour_to_filter] | |
| # st.subheader('Map of all pickups at %s:00' % hour_to_filter) | |
| # st.map(filtered_data) | |