6ee5ali commited on
Commit
18083dc
·
verified ·
1 Parent(s): 4c5188b

Add 3 files

Browse files
Files changed (3) hide show
  1. README.md +7 -5
  2. index.html +202 -19
  3. prompts.txt +2 -0
README.md CHANGED
@@ -1,10 +1,12 @@
1
  ---
2
- title: Twitter
3
- emoji: 🏃
4
- colorFrom: indigo
5
- colorTo: gray
6
  sdk: static
7
  pinned: false
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: twitter
3
+ emoji: ⚛️
4
+ colorFrom: green
5
+ colorTo: blue
6
  sdk: static
7
  pinned: false
8
+ tags:
9
+ - QwenSite
10
  ---
11
 
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
index.html CHANGED
@@ -1,19 +1,202 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
19
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en" class="bg-gray-900 text-white">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>Social Media Scraper - Terra</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
8
+ <script src="https://unpkg.com/feather-icons"></script>
9
+ </head>
10
+ <body class="flex h-screen overflow-hidden">
11
+
12
+ <!-- Sidebar -->
13
+ <div class="w-64 bg-gray-800 p-4 flex flex-col space-y-4">
14
+ <h2 class="text-2xl font-bold mb-4 text-blue-400">Terra OSINT</h2>
15
+ <button onclick="showSection('dashboard')" class="flex items-center space-x-2 text-gray-300 hover:text-white">
16
+ <i data-feather="home"></i><span>Dashboard</span>
17
+ </button>
18
+ <button onclick="showSection('config')" class="flex items-center space-x-2 text-gray-300 hover:text-white">
19
+ <i data-feather="settings"></i><span>Settings</span>
20
+ </button>
21
+ <button onclick="showSection('results')" class="flex items-center space-x-2 text-gray-300 hover:text-white">
22
+ <i data-feather="database"></i><span>Results</span>
23
+ </button>
24
+ <button onclick="showSection('logs')" class="flex items-center space-x-2 text-gray-300 hover:text-white">
25
+ <i data-feather="file-text"></i><span>Logs</span>
26
+ </button>
27
+ </div>
28
+
29
+ <!-- Main Content -->
30
+ <div id="dashboard" class="flex-1 p-6 overflow-auto">
31
+ <div class="max-w-3xl mx-auto">
32
+ <h1 class="text-3xl font-bold mb-6">Instagram & Twitter Scraper</h1>
33
+
34
+ <!-- Target Input -->
35
+ <div class="bg-gray-800 p-6 rounded-lg mb-6">
36
+ <label class="block text-lg font-semibold mb-2">Enter Target Username</label>
37
+ <input id="targetUsername" type="text" placeholder="e.g. realDonaldTrump" class="w-full p-2 rounded bg-gray-700 text-white" />
38
+ <div class="mt-4 flex space-x-4">
39
+ <button onclick="startScraping()" class="bg-blue-600 hover:bg-blue-700 px-4 py-2 rounded">Scrape Instagram</button>
40
+ <button onclick="startScraping()" class="bg-indigo-600 hover:bg-indigo-700 px-4 py-2 rounded">Scrape Twitter</button>
41
+ <button onclick="startScraping()" class="bg-purple-600 hover:bg-purple-700 px-4 py-2 rounded">Scrape Both</button>
42
+ </div>
43
+ </div>
44
+
45
+ <!-- Status -->
46
+ <div id="status" class="bg-gray-800 p-4 rounded-lg text-center text-gray-400">Ready to scrape</div>
47
+ </div>
48
+ </div>
49
+
50
+ <!-- Config Section -->
51
+ <div id="config" class="hidden flex-1 p-6 overflow-auto">
52
+ <div class="max-w-3xl mx-auto">
53
+ <h1 class="text-3xl font-bold mb-6">Configuration</h1>
54
+ <div class="bg-gray-800 p-6 rounded-lg">
55
+ <label class="block text-lg font-semibold mb-2">Instagram Credentials</label>
56
+ <input id="igUser" type="text" placeholder="Instagram Username" class="w-full mb-2 p-2 rounded bg-gray-700 text-white" />
57
+ <input id="igPass" type="password" placeholder="Instagram Password" class="w-full mb-4 p-2 rounded bg-gray-700 text-white" />
58
+
59
+ <label class="block text-lg font-semibold mb-2">Twitter API Keys</label>
60
+ <input id="twApiKey" type="text" placeholder="API Key" class="w-full mb-2 p-2 rounded bg-gray-700 text-white" />
61
+ <input id="twApiKeySecret" type="text" placeholder="API Key Secret" class="w-full mb-2 p-2 rounded bg-gray-700 text-white" />
62
+ <input id="twAccessToken" type="text" placeholder="Access Token" class="w-full mb-2 p-2 rounded bg-gray-700 text-white" />
63
+ <input id="twAccessTokenSecret" type="text" placeholder="Access Token Secret" class="w-full mb-4 p-2 rounded bg-gray-700 text-white" />
64
+
65
+ <div class="flex space-x-4">
66
+ <button onclick="saveConfig()" class="bg-green-600 hover:bg-green-700 px-4 py-2 rounded">Save</button>
67
+ <button onclick="loadConfig()" class="bg-gray-600 hover:bg-gray-700 px-4 py-2 rounded">Load</button>
68
+ </div>
69
+ </div>
70
+ </div>
71
+ </div>
72
+
73
+ <!-- Results Section -->
74
+ <div id="results" class="hidden flex-1 p-6 overflow-auto">
75
+ <div class="max-w-5xl mx-auto">
76
+ <h1 class="text-3xl font-bold mb-6">Scraping Results</h1>
77
+ <div class="bg-gray-800 p-6 rounded-lg">
78
+ <div class="flex space-x-4 mb-4">
79
+ <button onclick="showResultsTab('instagram')" class="bg-pink-600 hover:bg-pink-700 px-4 py-2 rounded">Instagram</button>
80
+ <button onclick="showResultsTab('twitter')" class="bg-blue-600 hover:bg-blue-700 px-4 py-2 rounded">Twitter</button>
81
+ </div>
82
+
83
+ <div id="instagramResults" class="hidden">
84
+ <h2 class="text-xl font-semibold mb-4">Instagram Mentions & Tags</h2>
85
+ <div id="instagramOutput" class="bg-gray-900 p-4 rounded max-h-96 overflow-y-auto text-sm text-gray-300"></div>
86
+ </div>
87
+
88
+ <div id="twitterResults" class="hidden">
89
+ <h2 class="text-xl font-semibold mb-4">Twitter Mentions & Hashtags</h2>
90
+ <div id="twitterOutput" class="bg-gray-900 p-4 rounded max-h-96 overflow-y-auto text-sm text-gray-300"></div>
91
+ </div>
92
+ </div>
93
+ </div>
94
+ </div>
95
+
96
+ <!-- Logs Section -->
97
+ <div id="logs" class="hidden flex-1 p-6 overflow-auto">
98
+ <div class="max-w-5xl mx-auto">
99
+ <h1 class="text-3xl font-bold mb-6">Scraping Logs</h1>
100
+ <div class="bg-gray-800 p-6 rounded">
101
+ <div id="logOutput" class="bg-black text-green-400 p-4 rounded h-96 overflow-y-auto font-mono text-sm"></div>
102
+ </div>
103
+ </div>
104
+ </div>
105
+
106
+ <script>
107
+ function showSection(sectionId) {
108
+ document.querySelectorAll("#dashboard, #config, #results, #logs").forEach(el => el.classList.add("hidden"));
109
+ document.getElementById(sectionId).classList.remove("hidden");
110
+ }
111
+
112
+ function showResultsTab(tab) {
113
+ document.getElementById("instagramResults").classList.add("hidden");
114
+ document.getElementById("twitterResults").classList.add("hidden");
115
+ document.getElementById(tab + "Results").classList.remove("hidden");
116
+ }
117
+
118
+ function startScraping() {
119
+ const username = document.getElementById("targetUsername").value.trim();
120
+ if (!username) {
121
+ alert("Please enter a username.");
122
+ return;
123
+ }
124
+
125
+ const status = document.getElementById("status");
126
+ status.innerText = `Scraping started for ${username}...`;
127
+
128
+ log(`[${new Date().toLocaleTimeString()}] Starting scrape for ${username}`);
129
+
130
+ // Simulate scraping
131
+ setTimeout(() => {
132
+ const result = {
133
+ instagram: {
134
+ mentions: [],
135
+ tags: [{ username: "user123", caption: "Check out this post!", url: "#" }],
136
+ comments: []
137
+ },
138
+ twitter: {
139
+ mentions: [],
140
+ tags: [{ username: "john_doe", text: "Just mentioned @${username}!", url: "#" }],
141
+ tweets: []
142
+ },
143
+ timestamp: new Date().toISOString()
144
+ };
145
+
146
+ log(`[${new Date().toLocaleTimeString()}] Scraping completed.`);
147
+ status.innerText = `Scraping completed for ${username}.`;
148
+
149
+ // Show results
150
+ showResultsTab("instagram");
151
+ document.getElementById("instagramOutput").innerText = JSON.stringify(result.instagram, null, 2);
152
+ document.getElementById("twitterOutput").innerText = JSON.stringify(result.twitter, null, 2);
153
+ showSection("results");
154
+
155
+ // Save to localStorage
156
+ localStorage.setItem("lastResults", JSON.stringify(result));
157
+ localStorage.setItem("lastUsername", username);
158
+ }, 2000);
159
+ }
160
+
161
+ function log(message) {
162
+ const logOutput = document.getElementById("logOutput");
163
+ logOutput.innerHTML += message + "\n";
164
+ logOutput.scrollTop = logOutput.scrollHeight;
165
+ }
166
+
167
+ function saveConfig() {
168
+ const config = {
169
+ instagram: {
170
+ username: document.getElementById("igUser").value,
171
+ password: document.getElementById("igPass").value
172
+ },
173
+ twitter: {
174
+ api_key: document.getElementById("twApiKey").value,
175
+ api_key_secret: document.getElementById("twApiKeySecret").value,
176
+ access_token: document.getElementById("twAccessToken").value,
177
+ access_token_secret: document.getElementById("twAccessTokenSecret").value
178
+ }
179
+ };
180
+ localStorage.setItem("scraperConfig", JSON.stringify(config));
181
+ alert("Configuration saved.");
182
+ }
183
+
184
+ function loadConfig() {
185
+ const config = JSON.parse(localStorage.getItem("scraperConfig"));
186
+ if (config) {
187
+ document.getElementById("igUser").value = config.instagram.username || "";
188
+ document.getElementById("igPass").value = config.instagram.password || "";
189
+ document.getElementById("twApiKey").value = config.twitter.api_key || "";
190
+ document.getElementById("twApiKeySecret").value = config.twitter.api_key_secret || "";
191
+ document.getElementById("twAccessToken").value = config.twitter.access_token || "";
192
+ document.getElementById("twAccessTokenSecret").value = config.twitter.access_token_secret || "";
193
+ alert("Configuration loaded.");
194
+ } else {
195
+ alert("No saved configuration found.");
196
+ }
197
+ }
198
+
199
+ feather.replace();
200
+ </script>
201
+ <p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-qwensite.hf.space/logo.svg" alt="qwensite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-qwensite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >QwenSite</a> - 🧬 <a href="https://enzostvs-qwensite.hf.space?remix=6ee5ali/twitter" style="color: #fff;text-decoration: underline;" target="_blank" >Remix</a></p></body>
202
+ </html>
prompts.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # instagram_x_scraper.py import os import time import json import re from datetime import datetime import logging # API libraries import tweepy from instagrapi import Client as InstagrapiClient # Browser automation from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from webdriver_manager.chrome import ChromeDriverManager # Set up logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler("scraper.log"), logging.StreamHandler() ] ) logger = logging.getLogger("social_scraper") class SocialMediaScraper: def __init__(self, target_username, config_file="config.json"): self.target_username = target_username self.results = { "instagram": { "mentions": [], "tags": [], "comments": [] }, "twitter": { "mentions": [], "tags": [], "tweets": [] }, "timestamp": datetime.now().isoformat() } # Load configuration try: with open(config_file, 'r') as f: self.config = json.load(f) except FileNotFoundError: logger.error(f"Configuration file {config_file} not found") raise # Initialize clients self.init_instagram_client() self.init_twitter_client() def init_instagram_client(self): """Initialize the Instagram API client""" try: self.ig_client = InstagrapiClient() self.ig_client.login( self.config["instagram"]["username"], self.config["instagram"]["password"] ) logger.info("Instagram client initialized successfully") except Exception as e: logger.error(f"Failed to initialize Instagram client: {e}") self.ig_client = None def init_twitter_client(self): """Initialize the Twitter/X API client""" try: auth = tweepy.OAuth1UserHandler( consumer_key=self.config["twitter"]["api_key"], consumer_secret=self.config["twitter"]["api_key_secret"], access_token=self.config["twitter"]["access_token"], access_token_secret=self.config["twitter"]["access_token_secret"] ) self.twitter_client = tweepy.API(auth) logger.info("Twitter/X client initialized successfully") except Exception as e: logger.error(f"Failed to initialize Twitter/X client: {e}") self.twitter_client = None def init_browser(self): """Initialize a browser for web scraping when API fails""" options = Options() if self.config.get("headless", True): options.add_argument("--headless") options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") options.add_argument("--disable-notifications") options.add_argument("--disable-infobars") options.add_argument("--disable-extensions") # Set up user-agent to avoid detection options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36") # Initialize webdriver service = Service(ChromeDriverManager().install()) self.browser = webdriver.Chrome(service=service, options=options) self.browser.maximize_window() logger.info("Browser initialized successfully") def close_browser(self): """Close the browser if it's been initialized""" if hasattr(self, 'browser'): self.browser.quit() logger.info("Browser closed") def scrape_instagram(self): """Scrape Instagram for mentions, tags, and comments about target user""" if not self.ig_client: logger.error("Instagram client not available. Skipping...") return logger.info(f"Searching Instagram for mentions of {self.target_username}") # Try API approach first try: # 1. Search for hashtags related to the user (will need API access) user_id = self.ig_client.user_id_from_username(self.target_username) logger.info(f"Found target user ID: {user_id}") # 2. Get recent posts tagged with the user tagged_posts = self.ig_client.usertag_medias(user_id, 20) for post in tagged_posts: self.results["instagram"]["tags"].append({ "post_id": post.id, "username": post.user.username, "caption": post.caption_text if post.caption_text else "No caption", "timestamp": post.taken_at.isoformat(), "url": f"https://www.instagram.com/p/{post.code}/", "type": "post_tag" }) logger.info(f"Found Instagram tag in post by {post.user.username}") # 3. Search for mentions in comments on recent posts # This requires more complex API usage or browser automation # 4. Find the user's own posts and check comments user_posts = self.ig_client.user_medias(user_id, 20) for post in user_posts: try: comments = self.ig_client.media_comments(post.id, 50) for comment in comments: if not comment.text: continue # Check if comment mentions the target username self.results["instagram"]["comments"].append({ "post_id": post.id, "comment_id": comment.pk, "username": comment.user.username, "text": comment.text, "timestamp": comment.created_at_utc.isoformat(), "url": f"https://www.instagram.com/p/{post.code}/", "type": "comment" }) logger.info(f"Found Instagram comment by {comment.user.username}") except Exception as e: logger.error(f"Error fetching comments for post {post.id}: {e}") except Exception as e: logger.error(f"Error using Instagram API: {e}") logger.info("Falling back to browser automation for Instagram") # Browser fallback approach - this is less reliable and more likely to be detected try: self.init_browser() # Login to Instagram self.browser.get("https://www.instagram.com/accounts/login/") time.sleep(2) # Accept cookies if prompted try: cookie_button = WebDriverWait(self.browser, 5).until( EC.presence_of_element_located((By.XPATH, "//button[contains(text(), 'Accept') or contains(text(), 'Allow')]")) ) cookie_button.click() time.sleep(1) except: logger.info("No cookie prompt or already accepted") # Login form try: username_field = WebDriverWait(self.browser, 10).until( EC.presence_of_element_located((By.CSS_SELECTOR, "input[name='username']")) ) password_field = self.browser.find_element(By.CSS_SELECTOR, "input[name='password']") username_field.send_keys(self.config["instagram"]["username"]) password_field.send_keys(self.config["instagram"]["password"]) login_button = self.browser.find_element(By.CSS_SELECTOR, "button[type='submit']") login_button.click() # Wait for login to complete time.sleep(5) except Exception as e: logger.error(f"Failed to login to Instagram: {e}") return # Search for the target user self.browser.get(f"https://www.instagram.com/{self.target_username}/") time.sleep(3) # Visit the tagged posts section self.browser.get(f"https://www.instagram.com/{self.target_username}/tagged/") time.sleep(3) # Collect posts where the user is tagged try: post_links = WebDriverWait(self.browser, 10).until( EC.presence_of_all_elements_located((By.XPATH, "//a[contains(@href, '/p/')]")) ) for i, link in enumerate(post_links[:min(10, len(post_links))]): try: post_url = link.get_attribute('href') # Visit post to get details self.browser.get(post_url) time.sleep(2) # Get username of poster username_elem = WebDriverWait(self.browser, 5).until( EC.presence_of_element_located((By.XPATH, "//a[@class and contains(@href, '/')]")) ) username = username_elem.text # Get post caption try: caption_elem = self.browser.find_element(By.XPATH, "//div[contains(@class, 'caption')]//span") caption = caption_elem.text except: caption = "No caption found" # Add to results self.results["instagram"]["tags"].append({ "post_id": post_url.split("/p/")[1].replace("/", ""), "username": username, "caption": caption, "timestamp": datetime.now().isoformat(), # Accurate timestamp not available in this context "url": post_url, "type": "post_tag (browser)" }) logger.info(f"Found Instagram tag in post by {username} (via browser)") except Exception as e: logger.error(f"Error processing tagged post {i}: {e}") except Exception as e: logger.error(f"Failed to find tagged posts: {e}") except Exception as e: logger.error(f"Browser automation for Instagram failed: {e}") finally: self.close_browser() def scrape_twitter(self): """Scrape Twitter/X for mentions, tags, and tweets about target user""" if not self.twitter_client: logger.error("Twitter/X client not available. Skipping...") return logger.info(f"Searching Twitter/X for mentions of {self.target_username}") try: # 1. Search for mentions (@username) mention_query = f"@{self.target_username}" mentions = self.twitter_client.search_tweets(q=mention_query, count=100, tweet_mode="extended") for tweet in mentions: self.results["twitter"]["mentions"].append({ "tweet_id": tweet.id_str, "username": tweet.user.screen_name, "text": tweet.full_text, "timestamp": tweet.created_at.isoformat(), "url": f"https://twitter.com/{tweet.user.screen_name}/status/{tweet.id_str}", "type": "mention" }) logger.info(f"Found Twitter mention by @{tweet.user.screen_name}") # 2. Search for hashtags (#username) hashtag_query = f"#{self.target_username}" hashtag_tweets = self.twitter_client.search_tweets(q=hashtag_query, count=100, tweet_mode="extended") for tweet in hashtag_tweets: self.results["twitter"]["tags"].append({ "tweet_id": tweet.id_str, "username": tweet.user.screen_name, "text": tweet.full_text, "timestamp": tweet.created_at.isoformat(), "url": f"https://twitter.com/{tweet.user.screen_name}/status/{tweet.id_str}", "type": "hashtag" }) logger.info(f"Found Twitter hashtag by @{tweet.user.screen_name}") # 3. Search for regular tweets mentioning the username (without @) name_query = self.target_username name_tweets = self.twitter_client.search_tweets(q=name_query, count=100, tweet_mode="extended") for tweet in name_tweets: # Skip if this is already counted as a mention if tweet.id_str in [t["tweet_id"] for t in self.results["twitter"]["mentions"]]: continue self.results["twitter"]["tweets"].append({ "tweet_id": tweet.id_str, "username": tweet.user.screen_name, "text": tweet.full_text, "timestamp": tweet.created_at.isoformat(), "url": f"https://twitter.com/{tweet.user.screen_name}/status/{tweet.id_str}", "type": "text_mention" }) logger.info(f"Found Twitter text mention by @{tweet.user.screen_name}") except Exception as e: logger.error(f"Error using Twitter API: {e}") logger.info("Falling back to browser automation for Twitter") # Browser fallback approach try: self.init_browser() # Login to Twitter self.browser.get("https://twitter.com/login") time.sleep(3) # Handle login - Twitter's login page can be complex and change frequently try: # Username step username_field = WebDriverWait(self.browser, 10).until( EC.presence_of_element_located((By.XPATH, "//input[@autocomplete='username']")) ) username_field.send_keys(self.config["twitter"]["username"]) # Find and click the next button next_button = WebDriverWait(self.browser, 10).until( EC.presence_of_element_located((By.XPATH, "//div[@role='button'][contains(., 'Next') or contains(., 'next')]")) ) next_button.click() time.sleep(2) # Password step password_field = WebDriverWait(self.browser, 10).until( EC.presence_of_element_located((By.XPATH, "//input[@autocomplete='current-password']")) ) password_field.send_keys(self.config["twitter"]["password"]) # Find and click the login button login_button = WebDriverWait(self.browser, 10).until( EC.presence_of_element_located((By.XPATH, "//div[@role='button'][contains(., 'Log in') or contains(., 'login')]")) ) login_button.click() # Wait for login to complete time.sleep(5) except Exception as e: logger.error(f"Failed to login to Twitter: {e}") return # Search for mentions search_terms = [ f"@{self.target_username}", # Mentions f"#{self.target_username}", # Hashtags self.target_username # Name in text ] for search_term in search_terms: self.browser.get(f"https://twitter.com/search?q={search_term}&src=typed_query&f=live") time.sleep(5) # Scroll a bit to load more tweets for _ in range(3): self.browser.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(2) # Collect tweets tweets = WebDriverWait(self.browser, 10).until( EC.presence_of_all_elements_located((By.XPATH, "//article[@data-testid='tweet']")) ) result_type = "mentions" if search_term.startswith("@") else "tags" if search_term.startswith("#") else "tweets" for i, tweet in enumerate(tweets[:min(20, len(tweets))]): try: # Extract username username_elem = tweet.find_element(By.XPATH, ".//div[@data-testid='User-Name']//a[contains(@href, '/status/')]") username_parts = username_elem.get_attribute('href').split('/') username = username_parts[3] if len(username_parts) >= 4 else "unknown" # Extract tweet text text_elem = tweet.find_element(By.XPATH, ".//div[@data-testid='tweetText']") text = text_elem.text # Extract tweet URL tweet_url = username_elem.get_attribute('href') tweet_id = tweet_url.split('/status/')[1] if '/status/' in tweet_url else "unknown" self.results["twitter"][result_type].append({ "tweet_id": tweet_id, "username": username, "text": text, "timestamp": datetime.now().isoformat(), # Accurate timestamp not available in this context "url": tweet_url, "type": f"{result_type} (browser)" }) logger.info(f"Found Twitter {result_type} by @{username} (via browser)") except Exception as e: logger.error(f"Error processing tweet {i} for search '{search_term}': {e}") except Exception as e: logger.error(f"Browser automation for Twitter failed: {e}") finally: self.close_browser() def run(self): """Run the complete scraping process""" logger.info(f"Starting social media scraping for user: {self.target_username}") self.scrape_instagram() self.scrape_twitter() # Save results to file output_file = f"results_{self.target_username}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" with open(output_file, 'w') as f: json.dump(self.results, f, indent=2) logger.info(f"Scraping completed. Results saved to {output_file}") # Summary of results instagram_total = ( len(self.results["instagram"]["mentions"]) + len(self.results["instagram"]["tags"]) + len(self.results["instagram"]["comments"]) ) twitter_total = ( len(self.results["twitter"]["mentions"]) + len(self.results["twitter"]["tags"]) + len(self.results["twitter"]["tweets"]) ) print(f"\n=== RESULTS SUMMARY FOR {self.target_username} ===") print(f"Instagram: {instagram_total} total mentions/tags/comments") print(f"Twitter/X: {twitter_total} total mentions/tags/tweets") print(f"Detailed results saved to: {output_file}") return self.results # Example usage if __name__ == "__main__": # Create a config.json file with your credentials before running # or replace configuration loading with direct values import argparse parser = argparse.ArgumentParser(description='Scrape social media platforms for mentions of a user') parser.add_argument('username', help='Target username to search for') parser.add_argument('--config', default='config.json', help='Path to configuration file') args = parser.parse_args() scraper = SocialMediaScraper(args.username, config_file=args.config) results = scraper.run()
2
+ import argparse import sys import os from rich.console import Console from pyfiglet import Figlet from rich.style import Style from src.instagram import Instagram from src.twittr import Twitter pc = Console() def twitter_all_commands(): pc.print("FILE=y/n : ", style="yellow", end='') print(" Enable/disable output in a '<target username>_<command>.txt' file") pc.print("JSON=y/n : \t", style="white", end='') print("Enable/disable export in a '<target username>_<command>.json' file'") pc.print("tweets\t : ", style="bright_cyan", end='') print("Get recent tweets of target") pc.print("favtweets : \t", style="yellow", end='') print("Get recent tweets which are liked by target") pc.print("followers : \t", style="bright_green", end='') print(" Get total followers of Target") pc.print("following : \t", style="cyan", end='') print("Get total followings of target") pc.print("reset target : \t", style='green', end='') print(" Select new target") pc.print("timeline : \t", style="dark_cyan", end='') print("Full information of target's account") pc.print("profile pic : ", style='bright_magenta', end='') print("Download Target's Profile Picture") pc.print("banner : ", style='steel_blue1', end='') print("Download Target's Profile Banner ") pc.print('htags : ', style='bright_red', end='') print('Get hashtags used by target') pc.print('mentions : ', style='pink1') print('Get users who got mentioned by target in recent tweets') print(" ") pc.print(" Also supports basic terminal commands : ", style='cyan') pc.print("ls : ", style='pink1', end='') pc.print("Displaying all Commands ", ":search:", style='bright_white') pc.print("exit : ", style='orange_red1', end='') pc.print("For Exit from Terra", style='bright_white') pc.print("clear : ", style='orchid2', end='') pc.print("Clear your Screen", style='bright_white') pc.print('back : ', style='purple', end='') pc.print('Back to Main Menu', style='yellow') def insta_all_commands(): pc.print("FILE=y/n : ", style="yellow",end='') print(" Enable/disable output in a '<target username>_<command>.txt' file") pc.print("JSON=y/n : \t", style="white", end='') print("Enable/disable export in a '<target username>_<command>.json' file'") pc.print("locations : \t", style="green", end='') print("Get all registered addressed by target photos") pc.print("captions : \t", style="cyan", end='') print("Get target's photos captions") pc.print("comments : \t", style="red", end='') print("Get total comments of target's posts") pc.print("followers : \t", style="yellow", end='') print("get target's followers") pc.print("followings : \t", style="red", end='') print("Get users followed by target") pc.print("followers emails : \t", style="green", end='') print("Get email of target followers") pc.print("following emails : \t", style="yellow", end='') print("Get email of users followed by target") pc.print("followers phone : \t", style="red", end='') print("Get phone number of target followers") pc.print("followings phone : \t" ,style="cyan", end='') print("Get phone number of users followed by target") pc.print("tags : \t", style="yellow", end='') print("Get hashtags used by target") pc.print("info : \t", style="white", end='') print("Target timeline and information") pc.print("likes : \t", style="red", end='') print("Get total likes of target's posts") pc.print("mediatype : \t", style="green", end='') print("Get target's posts type (photo or video)") pc.print("photodes : \t", style="cyan", end='') print("Get description of target's photos") pc.print("photos : \t", style="yellow", end='') print("Download target's photos in output folder") pc.print("profile pic : \t", style="white", end='') print("Download target's profile picture") pc.print("stories : \t", style="cyan", end='') print("Download target's stories") pc.print("tagged : \t", style="red", end='') print("Get list of users tagged by target") pc.print("reset target : \t\t", style="white", end='') print("Set new target") pc.print("commenter : \t", style="red", end='') print("Get a list of user who commented target's photos") pc.print("ttag : \t", style="green", end='') print("Get a list of user who tagged target") print(" ") pc.print(" Also supports basic terminal commands : ", style='cyan') pc.print("ls : ", style='pink1', end='') pc.print("Displaying all Commands ", ":search:", style='bright_white') pc.print("exit : ", style='orange_red1', end='') pc.print("For Exit from Terra", style='bright_white') pc.print("clear : ", style='orchid2', end='') pc.print("Clear your Screen", style='bright_white') pc.print('back : ', style='purple', end='') pc.print('Back to Main Menu', style='yellow') def clear(): # for windows screen if sys.platform.startswith('win'): os.system('cls') # for mac or linux else: os.system('clear') def banner(): clear() banner = Figlet(font='isometric3',justify='right') pc.print(banner.renderText("Terra"),style="bold red") pc.print(" OSINT TOOL ON SOCIAL MEDIA NETWORKS ", style="cyan1") print(" ") pc.print(" @xadhrit (github.com/xadhrit/) " , style='bold red') def _out(): pc.print("Thank you for using Terra!", style="yellow") sys.exit(0) """ def handle_single(sig, frame): pc.print("\n Sending you Out \n", style="red") sys.exit(0) """ parser = argparse.ArgumentParser(description="Recon with Terra") parser.add_argument('target', type=str, help='username of target') parser.add_argument('-j', '--json', help='save results in a JSON file', action='store_true') parser.add_argument('-f', '--file', help='save results in a Text File', action='store_true') args = parser.parse_args() def main(): banner() pc.print(" \n> 1 for Twitter ",style="bright_yellow") pc.print(" \n> 2 for Instagram ",style="green3") pc.print(" \n> 3 for Exit ") print(" ") pc.print("> Choose one option : ",style='purple',end='') u_input = str(input()) try: if u_input == '1': parser = argparse.ArgumentParser(description="Recon with Terra") parser.add_argument('target', type=str, help='username of target') parser.add_argument('-j', '--json', help='save results in a JSON file', action='store_true') parser.add_argument('-f', '--file', help='save results in a Text File', action='store_true') args = parser.parse_args() api = Twitter(args.target,args.file,args.json) commands = { 'ls': twitter_all_commands, 'help': twitter_all_commands, 'quit': quit, 'clear': clear, 'exit': _out, 'back': main, 'reset target': api.reset_target, 'tweets': api.recent_tweets, 'favtweets': api.recent_fav, 'followers': api.get_followers, 'following': api.get_frnds, 'timeline': api.user_info, 'profile pic': api.profile_pic, 'banner': api.banner_pic, 'htags': api.get_hashtags, 'mentions':api.get_mentions } while True: print(" ") pc.print("~/Terra Command >$ ", style='purple', end='') user_input = input() cmd = commands.get(user_input) if cmd: cmd() elif user_input == 'FILE=y': api.write_file(True) elif user_input == 'FILE=n': api.write_file(False) elif user_input == 'JSON=y': api.json_dump(True) elif user_input == 'JSON=n': api.json_dump(False) elif user_input == '': print("") else: pc.print("ILLEGAL COMMAND", style="red") if u_input == '2': parser = argparse.ArgumentParser(description="Recon with Terra") parser.add_argument('target', type=str, help='username of target') parser.add_argument('-j', '--json', help='save results in a JSON file', action='store_true') parser.add_argument('-f', '--file', help='save results in a Text File', action='store_true') args = parser.parse_args() api = Instagram(args.target,args.file,args.json) commands = { 'ls': insta_all_commands, 'help': insta_all_commands, 'clear': clear, 'quit': quit, 'exit': _out, 'back' : main, 'locations': api.target_locations, 'captions': api.__getCaptions__, 'reset target': api.change_target, 'comments': api._all_comments, 'followers': api._followers, 'followings': api._followings, 'followers emails': api.followers_email, 'following emails': api.followings_email, 'followers phone': api.followers_phoneNumber, 'followings phone': api.followings_phoneNumber, 'tags': api._hashtags, 'timeline': api._user_timeline, 'likes': api._total_likes, 'mediatype': api._media_type, 'photodes': api._photo_description, 'photos': api._user_photo, 'profile pic': api._user_profilepic, 'stories': api._user_stories, 'tagged': api._people_who_tagged_by_target, 'commenter': api._people_who_commented, 'ttag': api._users_who_tagged } while True: pc.print("~/Terra Command >$ ", style='purple', end='') user_input = input() cmd = commands.get(user_input) if cmd: cmd() elif user_input == 'FILE=y': api.write_file(True) elif user_input == 'FILE=n': api.write_file(False) elif user_input == 'JSON=y': api.json_dump(True) elif user_input == 'JSON=n': api.json_dump(False) elif user_input == '': print("") else: pc.print("ILLEGAL COMMAND", style="red") if u_input == '3': sys.exit(0) if u_input == 'exit': sys.exit(0) if KeyboardInterrupt: pc.print('Invalid Option! Try Again.... ', style='bold red') pc.print('Do you want to choose again ? (y/n)', style='red') io = input() if io == 'y' or 'Y': main() elif io == 'n' or 'N': sys.exit(0) else: print('Not a option! Good Bye!') sys.exit(0) else: pc.print('Invalid Option! ',style='bright_red') sys.exit(0) except Exception as e: pc.print(e,style='orange1') if __name__ == '__main__': main()