iqra785 commited on
Commit
05e4494
·
verified ·
1 Parent(s): 01a4b31

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -23
app.py CHANGED
@@ -1,31 +1,63 @@
 
1
  import snscrape.modules.twitter as sntwitter
2
  import pandas as pd
3
- import gradio as gr
4
- from tqdm import tqdm
5
 
6
- def scrape_tweets(query, max_tweets):
 
7
  tweets = []
8
- for i, tweet in enumerate(tqdm(sntwitter.TwitterSearchScraper(query).get_items(), total=max_tweets)):
9
- if i >= max_tweets:
10
- break
11
- tweets.append([tweet.date, tweet.content, tweet.user.username, tweet.likeCount, tweet.retweetCount])
12
 
13
- df = pd.DataFrame(tweets, columns=["Date", "Tweet", "User", "Likes", "Retweets"])
14
- filename = "twitter_dataset.csv"
15
- df.to_csv(filename, index=False)
 
 
 
 
 
 
 
 
16
 
17
- return filename
 
 
 
 
 
 
 
 
 
 
18
 
19
- # Gradio UI for running the scraper
20
- iface = gr.Interface(
21
- fn=scrape_tweets,
22
- inputs=[
23
- gr.Textbox(label="Search Query", placeholder="e.g., AI OR Machine Learning"),
24
- gr.Number(label="Number of Tweets", value=1)
25
- ],
26
- outputs=gr.File(label="Download CSV"),
27
- title="Twitter Scraper",
28
- description="Enter a search query and scrape up to 100,000 tweets, then download them as a CSV.",
29
- )
 
 
 
 
 
 
 
 
 
 
 
30
 
31
- iface.launch()
 
 
 
1
+ import streamlit as st
2
  import snscrape.modules.twitter as sntwitter
3
  import pandas as pd
4
+ import time
5
+ import random
6
 
7
+ # Function to scrape tweets
8
+ def scrape_tweets(keyword, num_tweets):
9
  tweets = []
10
+
11
+ try:
12
+ scraper = sntwitter.TwitterSearchScraper(keyword).get_items()
 
13
 
14
+ for i, tweet in enumerate(scraper):
15
+ if i >= num_tweets:
16
+ break
17
+ tweets.append([tweet.date, tweet.content, tweet.user.username, tweet.likeCount, tweet.retweetCount])
18
+ time.sleep(random.uniform(0.5, 1.5)) # Random delay to avoid rate limits
19
+
20
+ if tweets:
21
+ df = pd.DataFrame(tweets, columns=["Date", "Tweet", "User", "Likes", "Retweets"])
22
+ return df
23
+ else:
24
+ return None
25
 
26
+ except Exception as e:
27
+ st.error(f"Error while scraping: {str(e)}")
28
+ return None
29
+
30
+ # Streamlit UI
31
+ st.title("Twitter Scraper")
32
+ st.markdown("🔹 **Scrape & Download Tweets** (Political/Funny/Custom)")
33
+
34
+ # Input fields
35
+ keyword = st.text_input("Enter keyword(s) for tweets (e.g., 'funny OR meme OR politics'):")
36
+ num_tweets = st.number_input("Number of tweets to scrape:", min_value=10, max_value=100000, value=1000, step=100)
37
 
38
+ # Scrape button
39
+ if st.button("Scrape Tweets"):
40
+ if keyword:
41
+ with st.spinner("Scraping tweets..."):
42
+ df = scrape_tweets(keyword, num_tweets)
43
+
44
+ if df is not None:
45
+ st.success(f" Scraped {len(df)} tweets!")
46
+ st.dataframe(df) # Display table
47
+
48
+ # Download CSV
49
+ csv = df.to_csv(index=False).encode('utf-8')
50
+ st.download_button(
51
+ label="Download CSV 📥",
52
+ data=csv,
53
+ file_name="tweets.csv",
54
+ mime="text/csv",
55
+ )
56
+ else:
57
+ st.warning("⚠ No tweets found. Try different keywords.")
58
+ else:
59
+ st.warning("⚠ Please enter a keyword.")
60
 
61
+ # Footer
62
+ st.markdown("---")
63
+ st.markdown("📌 **Note**: This scraper is for educational purposes. Respect Twitter's terms of service.")