Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import snscrape.modules.twitter as sntwitter | |
| import pandas as pd | |
| import plotly.express as px | |
| import os | |
| st.set_page_config(page_title="Scraping Twitter") | |
| st.title('Scraping Twitter') | |
| # Input query and number of tweets to scrape | |
| query = st.text_input('Enter a search query:', 'data science') | |
| num_tweets = st.number_input( | |
| 'Number of tweets to scrape:', min_value=1, max_value=1000000, step=1) | |
| # Scrape tweets and store data in a dataframe | |
| def scrape_tweets(query, num_tweets): | |
| tweets_list = [] | |
| for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query + ' lang:id').get_items()): | |
| if i >= num_tweets: | |
| break | |
| tweets_list.append([tweet.id, tweet.date, tweet.content, tweet.user.username, | |
| tweet.user.followersCount, tweet.url, tweet.user.id]) | |
| tweets_df = pd.DataFrame(tweets_list, columns=[ | |
| 'Tweet Id', 'Datetime', 'Text', 'Username', 'Followers', 'URL', 'User Id']) | |
| return tweets_df | |
| if st.button('Scrape Tweets'): | |
| tweets_df = scrape_tweets(query, num_tweets) | |
| st.success('Scraping done!') | |
| # Display data | |
| st.write(tweets_df) | |
| # Line plot of tweet count over time | |
| tweets_df['Date'] = tweets_df['Datetime'].dt.date | |
| tweets_by_date = tweets_df.groupby( | |
| ['Date'])['Tweet Id'].count().reset_index() | |
| fig = px.line(tweets_by_date, x='Date', y='Tweet Id') | |
| st.plotly_chart(fig) | |
| # Scatter plot of followers vs tweet count | |
| fig = px.scatter(tweets_df, x='Followers', y='Tweet Id') | |
| st.plotly_chart(fig) | |
| # Username selection and interaction count | |
| st.sidebar.title('Username and Interaction Count') | |
| selected_username = st.sidebar.selectbox( | |
| 'Select a username:', options=tweets_df['Username'].unique()) | |
| st.sidebar.write( | |
| f'Interactions with @{selected_username}: {tweets_df[tweets_df["Username"] == selected_username].shape[0]}') | |
| # Interaction count by username | |
| interactions_by_user = tweets_df.groupby(['Username'])['Tweet Id'].count( | |
| ).reset_index().sort_values(by=['Tweet Id'], ascending=False) | |
| fig = px.bar(interactions_by_user, x='Username', y='Tweet Id') | |
| st.plotly_chart(fig) | |
| # Interaction count with selected username over time | |
| tweets_by_date = tweets_df[tweets_df['Username'] == selected_username].groupby( | |
| ['Date'])['Tweet Id'].count().reset_index() | |
| fig = px.line(tweets_by_date, x='Date', y='Tweet Id') | |
| st.plotly_chart(fig) | |
| # Tweets involving selected username | |
| st.write(f'Tweets involving @{selected_username}:') | |
| st.write(tweets_df[tweets_df['Username'] == selected_username]) | |
| # Save tweets as CSV file in data folder | |
| if not os.path.exists("data"): | |
| os.mkdir("data") | |
| file_name = f"data/tweets_{query.replace(' ', '_')}.csv" | |
| tweets_df.to_csv(file_name, index=False) | |
| st.write(f"Tweets saved as {file_name}") | |