Add 3 files
Browse files- README.md +7 -5
- index.html +202 -19
- prompts.txt +2 -0
README.md
CHANGED
|
@@ -1,10 +1,12 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: static
|
| 7 |
pinned: false
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: twitter
|
| 3 |
+
emoji: ⚛️
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: blue
|
| 6 |
sdk: static
|
| 7 |
pinned: false
|
| 8 |
+
tags:
|
| 9 |
+
- QwenSite
|
| 10 |
---
|
| 11 |
|
| 12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
index.html
CHANGED
|
@@ -1,19 +1,202 @@
|
|
| 1 |
-
<!
|
| 2 |
-
<html>
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
</
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en" class="bg-gray-900 text-white">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8" />
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 6 |
+
<title>Social Media Scraper - Terra</title>
|
| 7 |
+
<script src="https://cdn.tailwindcss.com"></script>
|
| 8 |
+
<script src="https://unpkg.com/feather-icons"></script>
|
| 9 |
+
</head>
|
| 10 |
+
<body class="flex h-screen overflow-hidden">
|
| 11 |
+
|
| 12 |
+
<!-- Sidebar -->
|
| 13 |
+
<div class="w-64 bg-gray-800 p-4 flex flex-col space-y-4">
|
| 14 |
+
<h2 class="text-2xl font-bold mb-4 text-blue-400">Terra OSINT</h2>
|
| 15 |
+
<button onclick="showSection('dashboard')" class="flex items-center space-x-2 text-gray-300 hover:text-white">
|
| 16 |
+
<i data-feather="home"></i><span>Dashboard</span>
|
| 17 |
+
</button>
|
| 18 |
+
<button onclick="showSection('config')" class="flex items-center space-x-2 text-gray-300 hover:text-white">
|
| 19 |
+
<i data-feather="settings"></i><span>Settings</span>
|
| 20 |
+
</button>
|
| 21 |
+
<button onclick="showSection('results')" class="flex items-center space-x-2 text-gray-300 hover:text-white">
|
| 22 |
+
<i data-feather="database"></i><span>Results</span>
|
| 23 |
+
</button>
|
| 24 |
+
<button onclick="showSection('logs')" class="flex items-center space-x-2 text-gray-300 hover:text-white">
|
| 25 |
+
<i data-feather="file-text"></i><span>Logs</span>
|
| 26 |
+
</button>
|
| 27 |
+
</div>
|
| 28 |
+
|
| 29 |
+
<!-- Main Content -->
|
| 30 |
+
<div id="dashboard" class="flex-1 p-6 overflow-auto">
|
| 31 |
+
<div class="max-w-3xl mx-auto">
|
| 32 |
+
<h1 class="text-3xl font-bold mb-6">Instagram & Twitter Scraper</h1>
|
| 33 |
+
|
| 34 |
+
<!-- Target Input -->
|
| 35 |
+
<div class="bg-gray-800 p-6 rounded-lg mb-6">
|
| 36 |
+
<label class="block text-lg font-semibold mb-2">Enter Target Username</label>
|
| 37 |
+
<input id="targetUsername" type="text" placeholder="e.g. realDonaldTrump" class="w-full p-2 rounded bg-gray-700 text-white" />
|
| 38 |
+
<div class="mt-4 flex space-x-4">
|
| 39 |
+
<button onclick="startScraping()" class="bg-blue-600 hover:bg-blue-700 px-4 py-2 rounded">Scrape Instagram</button>
|
| 40 |
+
<button onclick="startScraping()" class="bg-indigo-600 hover:bg-indigo-700 px-4 py-2 rounded">Scrape Twitter</button>
|
| 41 |
+
<button onclick="startScraping()" class="bg-purple-600 hover:bg-purple-700 px-4 py-2 rounded">Scrape Both</button>
|
| 42 |
+
</div>
|
| 43 |
+
</div>
|
| 44 |
+
|
| 45 |
+
<!-- Status -->
|
| 46 |
+
<div id="status" class="bg-gray-800 p-4 rounded-lg text-center text-gray-400">Ready to scrape</div>
|
| 47 |
+
</div>
|
| 48 |
+
</div>
|
| 49 |
+
|
| 50 |
+
<!-- Config Section -->
|
| 51 |
+
<div id="config" class="hidden flex-1 p-6 overflow-auto">
|
| 52 |
+
<div class="max-w-3xl mx-auto">
|
| 53 |
+
<h1 class="text-3xl font-bold mb-6">Configuration</h1>
|
| 54 |
+
<div class="bg-gray-800 p-6 rounded-lg">
|
| 55 |
+
<label class="block text-lg font-semibold mb-2">Instagram Credentials</label>
|
| 56 |
+
<input id="igUser" type="text" placeholder="Instagram Username" class="w-full mb-2 p-2 rounded bg-gray-700 text-white" />
|
| 57 |
+
<input id="igPass" type="password" placeholder="Instagram Password" class="w-full mb-4 p-2 rounded bg-gray-700 text-white" />
|
| 58 |
+
|
| 59 |
+
<label class="block text-lg font-semibold mb-2">Twitter API Keys</label>
|
| 60 |
+
<input id="twApiKey" type="text" placeholder="API Key" class="w-full mb-2 p-2 rounded bg-gray-700 text-white" />
|
| 61 |
+
<input id="twApiKeySecret" type="text" placeholder="API Key Secret" class="w-full mb-2 p-2 rounded bg-gray-700 text-white" />
|
| 62 |
+
<input id="twAccessToken" type="text" placeholder="Access Token" class="w-full mb-2 p-2 rounded bg-gray-700 text-white" />
|
| 63 |
+
<input id="twAccessTokenSecret" type="text" placeholder="Access Token Secret" class="w-full mb-4 p-2 rounded bg-gray-700 text-white" />
|
| 64 |
+
|
| 65 |
+
<div class="flex space-x-4">
|
| 66 |
+
<button onclick="saveConfig()" class="bg-green-600 hover:bg-green-700 px-4 py-2 rounded">Save</button>
|
| 67 |
+
<button onclick="loadConfig()" class="bg-gray-600 hover:bg-gray-700 px-4 py-2 rounded">Load</button>
|
| 68 |
+
</div>
|
| 69 |
+
</div>
|
| 70 |
+
</div>
|
| 71 |
+
</div>
|
| 72 |
+
|
| 73 |
+
<!-- Results Section -->
|
| 74 |
+
<div id="results" class="hidden flex-1 p-6 overflow-auto">
|
| 75 |
+
<div class="max-w-5xl mx-auto">
|
| 76 |
+
<h1 class="text-3xl font-bold mb-6">Scraping Results</h1>
|
| 77 |
+
<div class="bg-gray-800 p-6 rounded-lg">
|
| 78 |
+
<div class="flex space-x-4 mb-4">
|
| 79 |
+
<button onclick="showResultsTab('instagram')" class="bg-pink-600 hover:bg-pink-700 px-4 py-2 rounded">Instagram</button>
|
| 80 |
+
<button onclick="showResultsTab('twitter')" class="bg-blue-600 hover:bg-blue-700 px-4 py-2 rounded">Twitter</button>
|
| 81 |
+
</div>
|
| 82 |
+
|
| 83 |
+
<div id="instagramResults" class="hidden">
|
| 84 |
+
<h2 class="text-xl font-semibold mb-4">Instagram Mentions & Tags</h2>
|
| 85 |
+
<div id="instagramOutput" class="bg-gray-900 p-4 rounded max-h-96 overflow-y-auto text-sm text-gray-300"></div>
|
| 86 |
+
</div>
|
| 87 |
+
|
| 88 |
+
<div id="twitterResults" class="hidden">
|
| 89 |
+
<h2 class="text-xl font-semibold mb-4">Twitter Mentions & Hashtags</h2>
|
| 90 |
+
<div id="twitterOutput" class="bg-gray-900 p-4 rounded max-h-96 overflow-y-auto text-sm text-gray-300"></div>
|
| 91 |
+
</div>
|
| 92 |
+
</div>
|
| 93 |
+
</div>
|
| 94 |
+
</div>
|
| 95 |
+
|
| 96 |
+
<!-- Logs Section -->
|
| 97 |
+
<div id="logs" class="hidden flex-1 p-6 overflow-auto">
|
| 98 |
+
<div class="max-w-5xl mx-auto">
|
| 99 |
+
<h1 class="text-3xl font-bold mb-6">Scraping Logs</h1>
|
| 100 |
+
<div class="bg-gray-800 p-6 rounded">
|
| 101 |
+
<div id="logOutput" class="bg-black text-green-400 p-4 rounded h-96 overflow-y-auto font-mono text-sm"></div>
|
| 102 |
+
</div>
|
| 103 |
+
</div>
|
| 104 |
+
</div>
|
| 105 |
+
|
| 106 |
+
<script>
|
| 107 |
+
function showSection(sectionId) {
|
| 108 |
+
document.querySelectorAll("#dashboard, #config, #results, #logs").forEach(el => el.classList.add("hidden"));
|
| 109 |
+
document.getElementById(sectionId).classList.remove("hidden");
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
function showResultsTab(tab) {
|
| 113 |
+
document.getElementById("instagramResults").classList.add("hidden");
|
| 114 |
+
document.getElementById("twitterResults").classList.add("hidden");
|
| 115 |
+
document.getElementById(tab + "Results").classList.remove("hidden");
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
function startScraping() {
|
| 119 |
+
const username = document.getElementById("targetUsername").value.trim();
|
| 120 |
+
if (!username) {
|
| 121 |
+
alert("Please enter a username.");
|
| 122 |
+
return;
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
const status = document.getElementById("status");
|
| 126 |
+
status.innerText = `Scraping started for ${username}...`;
|
| 127 |
+
|
| 128 |
+
log(`[${new Date().toLocaleTimeString()}] Starting scrape for ${username}`);
|
| 129 |
+
|
| 130 |
+
// Simulate scraping
|
| 131 |
+
setTimeout(() => {
|
| 132 |
+
const result = {
|
| 133 |
+
instagram: {
|
| 134 |
+
mentions: [],
|
| 135 |
+
tags: [{ username: "user123", caption: "Check out this post!", url: "#" }],
|
| 136 |
+
comments: []
|
| 137 |
+
},
|
| 138 |
+
twitter: {
|
| 139 |
+
mentions: [],
|
| 140 |
+
tags: [{ username: "john_doe", text: "Just mentioned @${username}!", url: "#" }],
|
| 141 |
+
tweets: []
|
| 142 |
+
},
|
| 143 |
+
timestamp: new Date().toISOString()
|
| 144 |
+
};
|
| 145 |
+
|
| 146 |
+
log(`[${new Date().toLocaleTimeString()}] Scraping completed.`);
|
| 147 |
+
status.innerText = `Scraping completed for ${username}.`;
|
| 148 |
+
|
| 149 |
+
// Show results
|
| 150 |
+
showResultsTab("instagram");
|
| 151 |
+
document.getElementById("instagramOutput").innerText = JSON.stringify(result.instagram, null, 2);
|
| 152 |
+
document.getElementById("twitterOutput").innerText = JSON.stringify(result.twitter, null, 2);
|
| 153 |
+
showSection("results");
|
| 154 |
+
|
| 155 |
+
// Save to localStorage
|
| 156 |
+
localStorage.setItem("lastResults", JSON.stringify(result));
|
| 157 |
+
localStorage.setItem("lastUsername", username);
|
| 158 |
+
}, 2000);
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
function log(message) {
|
| 162 |
+
const logOutput = document.getElementById("logOutput");
|
| 163 |
+
logOutput.innerHTML += message + "\n";
|
| 164 |
+
logOutput.scrollTop = logOutput.scrollHeight;
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
function saveConfig() {
|
| 168 |
+
const config = {
|
| 169 |
+
instagram: {
|
| 170 |
+
username: document.getElementById("igUser").value,
|
| 171 |
+
password: document.getElementById("igPass").value
|
| 172 |
+
},
|
| 173 |
+
twitter: {
|
| 174 |
+
api_key: document.getElementById("twApiKey").value,
|
| 175 |
+
api_key_secret: document.getElementById("twApiKeySecret").value,
|
| 176 |
+
access_token: document.getElementById("twAccessToken").value,
|
| 177 |
+
access_token_secret: document.getElementById("twAccessTokenSecret").value
|
| 178 |
+
}
|
| 179 |
+
};
|
| 180 |
+
localStorage.setItem("scraperConfig", JSON.stringify(config));
|
| 181 |
+
alert("Configuration saved.");
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
function loadConfig() {
|
| 185 |
+
const config = JSON.parse(localStorage.getItem("scraperConfig"));
|
| 186 |
+
if (config) {
|
| 187 |
+
document.getElementById("igUser").value = config.instagram.username || "";
|
| 188 |
+
document.getElementById("igPass").value = config.instagram.password || "";
|
| 189 |
+
document.getElementById("twApiKey").value = config.twitter.api_key || "";
|
| 190 |
+
document.getElementById("twApiKeySecret").value = config.twitter.api_key_secret || "";
|
| 191 |
+
document.getElementById("twAccessToken").value = config.twitter.access_token || "";
|
| 192 |
+
document.getElementById("twAccessTokenSecret").value = config.twitter.access_token_secret || "";
|
| 193 |
+
alert("Configuration loaded.");
|
| 194 |
+
} else {
|
| 195 |
+
alert("No saved configuration found.");
|
| 196 |
+
}
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
feather.replace();
|
| 200 |
+
</script>
|
| 201 |
+
<p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-qwensite.hf.space/logo.svg" alt="qwensite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-qwensite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >QwenSite</a> - 🧬 <a href="https://enzostvs-qwensite.hf.space?remix=6ee5ali/twitter" style="color: #fff;text-decoration: underline;" target="_blank" >Remix</a></p></body>
|
| 202 |
+
</html>
|
prompts.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# instagram_x_scraper.py import os import time import json import re from datetime import datetime import logging # API libraries import tweepy from instagrapi import Client as InstagrapiClient # Browser automation from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from webdriver_manager.chrome import ChromeDriverManager # Set up logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler("scraper.log"), logging.StreamHandler() ] ) logger = logging.getLogger("social_scraper") class SocialMediaScraper: def __init__(self, target_username, config_file="config.json"): self.target_username = target_username self.results = { "instagram": { "mentions": [], "tags": [], "comments": [] }, "twitter": { "mentions": [], "tags": [], "tweets": [] }, "timestamp": datetime.now().isoformat() } # Load configuration try: with open(config_file, 'r') as f: self.config = json.load(f) except FileNotFoundError: logger.error(f"Configuration file {config_file} not found") raise # Initialize clients self.init_instagram_client() self.init_twitter_client() def init_instagram_client(self): """Initialize the Instagram API client""" try: self.ig_client = InstagrapiClient() self.ig_client.login( self.config["instagram"]["username"], self.config["instagram"]["password"] ) logger.info("Instagram client initialized successfully") except Exception as e: logger.error(f"Failed to initialize Instagram client: {e}") self.ig_client = None def init_twitter_client(self): """Initialize the Twitter/X API client""" try: auth = tweepy.OAuth1UserHandler( consumer_key=self.config["twitter"]["api_key"], consumer_secret=self.config["twitter"]["api_key_secret"], access_token=self.config["twitter"]["access_token"], access_token_secret=self.config["twitter"]["access_token_secret"] ) self.twitter_client = tweepy.API(auth) logger.info("Twitter/X client initialized successfully") except Exception as e: logger.error(f"Failed to initialize Twitter/X client: {e}") self.twitter_client = None def init_browser(self): """Initialize a browser for web scraping when API fails""" options = Options() if self.config.get("headless", True): options.add_argument("--headless") options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") options.add_argument("--disable-notifications") options.add_argument("--disable-infobars") options.add_argument("--disable-extensions") # Set up user-agent to avoid detection options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36") # Initialize webdriver service = Service(ChromeDriverManager().install()) self.browser = webdriver.Chrome(service=service, options=options) self.browser.maximize_window() logger.info("Browser initialized successfully") def close_browser(self): """Close the browser if it's been initialized""" if hasattr(self, 'browser'): self.browser.quit() logger.info("Browser closed") def scrape_instagram(self): """Scrape Instagram for mentions, tags, and comments about target user""" if not self.ig_client: logger.error("Instagram client not available. Skipping...") return logger.info(f"Searching Instagram for mentions of {self.target_username}") # Try API approach first try: # 1. Search for hashtags related to the user (will need API access) user_id = self.ig_client.user_id_from_username(self.target_username) logger.info(f"Found target user ID: {user_id}") # 2. Get recent posts tagged with the user tagged_posts = self.ig_client.usertag_medias(user_id, 20) for post in tagged_posts: self.results["instagram"]["tags"].append({ "post_id": post.id, "username": post.user.username, "caption": post.caption_text if post.caption_text else "No caption", "timestamp": post.taken_at.isoformat(), "url": f"https://www.instagram.com/p/{post.code}/", "type": "post_tag" }) logger.info(f"Found Instagram tag in post by {post.user.username}") # 3. Search for mentions in comments on recent posts # This requires more complex API usage or browser automation # 4. Find the user's own posts and check comments user_posts = self.ig_client.user_medias(user_id, 20) for post in user_posts: try: comments = self.ig_client.media_comments(post.id, 50) for comment in comments: if not comment.text: continue # Check if comment mentions the target username self.results["instagram"]["comments"].append({ "post_id": post.id, "comment_id": comment.pk, "username": comment.user.username, "text": comment.text, "timestamp": comment.created_at_utc.isoformat(), "url": f"https://www.instagram.com/p/{post.code}/", "type": "comment" }) logger.info(f"Found Instagram comment by {comment.user.username}") except Exception as e: logger.error(f"Error fetching comments for post {post.id}: {e}") except Exception as e: logger.error(f"Error using Instagram API: {e}") logger.info("Falling back to browser automation for Instagram") # Browser fallback approach - this is less reliable and more likely to be detected try: self.init_browser() # Login to Instagram self.browser.get("https://www.instagram.com/accounts/login/") time.sleep(2) # Accept cookies if prompted try: cookie_button = WebDriverWait(self.browser, 5).until( EC.presence_of_element_located((By.XPATH, "//button[contains(text(), 'Accept') or contains(text(), 'Allow')]")) ) cookie_button.click() time.sleep(1) except: logger.info("No cookie prompt or already accepted") # Login form try: username_field = WebDriverWait(self.browser, 10).until( EC.presence_of_element_located((By.CSS_SELECTOR, "input[name='username']")) ) password_field = self.browser.find_element(By.CSS_SELECTOR, "input[name='password']") username_field.send_keys(self.config["instagram"]["username"]) password_field.send_keys(self.config["instagram"]["password"]) login_button = self.browser.find_element(By.CSS_SELECTOR, "button[type='submit']") login_button.click() # Wait for login to complete time.sleep(5) except Exception as e: logger.error(f"Failed to login to Instagram: {e}") return # Search for the target user self.browser.get(f"https://www.instagram.com/{self.target_username}/") time.sleep(3) # Visit the tagged posts section self.browser.get(f"https://www.instagram.com/{self.target_username}/tagged/") time.sleep(3) # Collect posts where the user is tagged try: post_links = WebDriverWait(self.browser, 10).until( EC.presence_of_all_elements_located((By.XPATH, "//a[contains(@href, '/p/')]")) ) for i, link in enumerate(post_links[:min(10, len(post_links))]): try: post_url = link.get_attribute('href') # Visit post to get details self.browser.get(post_url) time.sleep(2) # Get username of poster username_elem = WebDriverWait(self.browser, 5).until( EC.presence_of_element_located((By.XPATH, "//a[@class and contains(@href, '/')]")) ) username = username_elem.text # Get post caption try: caption_elem = self.browser.find_element(By.XPATH, "//div[contains(@class, 'caption')]//span") caption = caption_elem.text except: caption = "No caption found" # Add to results self.results["instagram"]["tags"].append({ "post_id": post_url.split("/p/")[1].replace("/", ""), "username": username, "caption": caption, "timestamp": datetime.now().isoformat(), # Accurate timestamp not available in this context "url": post_url, "type": "post_tag (browser)" }) logger.info(f"Found Instagram tag in post by {username} (via browser)") except Exception as e: logger.error(f"Error processing tagged post {i}: {e}") except Exception as e: logger.error(f"Failed to find tagged posts: {e}") except Exception as e: logger.error(f"Browser automation for Instagram failed: {e}") finally: self.close_browser() def scrape_twitter(self): """Scrape Twitter/X for mentions, tags, and tweets about target user""" if not self.twitter_client: logger.error("Twitter/X client not available. Skipping...") return logger.info(f"Searching Twitter/X for mentions of {self.target_username}") try: # 1. Search for mentions (@username) mention_query = f"@{self.target_username}" mentions = self.twitter_client.search_tweets(q=mention_query, count=100, tweet_mode="extended") for tweet in mentions: self.results["twitter"]["mentions"].append({ "tweet_id": tweet.id_str, "username": tweet.user.screen_name, "text": tweet.full_text, "timestamp": tweet.created_at.isoformat(), "url": f"https://twitter.com/{tweet.user.screen_name}/status/{tweet.id_str}", "type": "mention" }) logger.info(f"Found Twitter mention by @{tweet.user.screen_name}") # 2. Search for hashtags (#username) hashtag_query = f"#{self.target_username}" hashtag_tweets = self.twitter_client.search_tweets(q=hashtag_query, count=100, tweet_mode="extended") for tweet in hashtag_tweets: self.results["twitter"]["tags"].append({ "tweet_id": tweet.id_str, "username": tweet.user.screen_name, "text": tweet.full_text, "timestamp": tweet.created_at.isoformat(), "url": f"https://twitter.com/{tweet.user.screen_name}/status/{tweet.id_str}", "type": "hashtag" }) logger.info(f"Found Twitter hashtag by @{tweet.user.screen_name}") # 3. Search for regular tweets mentioning the username (without @) name_query = self.target_username name_tweets = self.twitter_client.search_tweets(q=name_query, count=100, tweet_mode="extended") for tweet in name_tweets: # Skip if this is already counted as a mention if tweet.id_str in [t["tweet_id"] for t in self.results["twitter"]["mentions"]]: continue self.results["twitter"]["tweets"].append({ "tweet_id": tweet.id_str, "username": tweet.user.screen_name, "text": tweet.full_text, "timestamp": tweet.created_at.isoformat(), "url": f"https://twitter.com/{tweet.user.screen_name}/status/{tweet.id_str}", "type": "text_mention" }) logger.info(f"Found Twitter text mention by @{tweet.user.screen_name}") except Exception as e: logger.error(f"Error using Twitter API: {e}") logger.info("Falling back to browser automation for Twitter") # Browser fallback approach try: self.init_browser() # Login to Twitter self.browser.get("https://twitter.com/login") time.sleep(3) # Handle login - Twitter's login page can be complex and change frequently try: # Username step username_field = WebDriverWait(self.browser, 10).until( EC.presence_of_element_located((By.XPATH, "//input[@autocomplete='username']")) ) username_field.send_keys(self.config["twitter"]["username"]) # Find and click the next button next_button = WebDriverWait(self.browser, 10).until( EC.presence_of_element_located((By.XPATH, "//div[@role='button'][contains(., 'Next') or contains(., 'next')]")) ) next_button.click() time.sleep(2) # Password step password_field = WebDriverWait(self.browser, 10).until( EC.presence_of_element_located((By.XPATH, "//input[@autocomplete='current-password']")) ) password_field.send_keys(self.config["twitter"]["password"]) # Find and click the login button login_button = WebDriverWait(self.browser, 10).until( EC.presence_of_element_located((By.XPATH, "//div[@role='button'][contains(., 'Log in') or contains(., 'login')]")) ) login_button.click() # Wait for login to complete time.sleep(5) except Exception as e: logger.error(f"Failed to login to Twitter: {e}") return # Search for mentions search_terms = [ f"@{self.target_username}", # Mentions f"#{self.target_username}", # Hashtags self.target_username # Name in text ] for search_term in search_terms: self.browser.get(f"https://twitter.com/search?q={search_term}&src=typed_query&f=live") time.sleep(5) # Scroll a bit to load more tweets for _ in range(3): self.browser.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(2) # Collect tweets tweets = WebDriverWait(self.browser, 10).until( EC.presence_of_all_elements_located((By.XPATH, "//article[@data-testid='tweet']")) ) result_type = "mentions" if search_term.startswith("@") else "tags" if search_term.startswith("#") else "tweets" for i, tweet in enumerate(tweets[:min(20, len(tweets))]): try: # Extract username username_elem = tweet.find_element(By.XPATH, ".//div[@data-testid='User-Name']//a[contains(@href, '/status/')]") username_parts = username_elem.get_attribute('href').split('/') username = username_parts[3] if len(username_parts) >= 4 else "unknown" # Extract tweet text text_elem = tweet.find_element(By.XPATH, ".//div[@data-testid='tweetText']") text = text_elem.text # Extract tweet URL tweet_url = username_elem.get_attribute('href') tweet_id = tweet_url.split('/status/')[1] if '/status/' in tweet_url else "unknown" self.results["twitter"][result_type].append({ "tweet_id": tweet_id, "username": username, "text": text, "timestamp": datetime.now().isoformat(), # Accurate timestamp not available in this context "url": tweet_url, "type": f"{result_type} (browser)" }) logger.info(f"Found Twitter {result_type} by @{username} (via browser)") except Exception as e: logger.error(f"Error processing tweet {i} for search '{search_term}': {e}") except Exception as e: logger.error(f"Browser automation for Twitter failed: {e}") finally: self.close_browser() def run(self): """Run the complete scraping process""" logger.info(f"Starting social media scraping for user: {self.target_username}") self.scrape_instagram() self.scrape_twitter() # Save results to file output_file = f"results_{self.target_username}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" with open(output_file, 'w') as f: json.dump(self.results, f, indent=2) logger.info(f"Scraping completed. Results saved to {output_file}") # Summary of results instagram_total = ( len(self.results["instagram"]["mentions"]) + len(self.results["instagram"]["tags"]) + len(self.results["instagram"]["comments"]) ) twitter_total = ( len(self.results["twitter"]["mentions"]) + len(self.results["twitter"]["tags"]) + len(self.results["twitter"]["tweets"]) ) print(f"\n=== RESULTS SUMMARY FOR {self.target_username} ===") print(f"Instagram: {instagram_total} total mentions/tags/comments") print(f"Twitter/X: {twitter_total} total mentions/tags/tweets") print(f"Detailed results saved to: {output_file}") return self.results # Example usage if __name__ == "__main__": # Create a config.json file with your credentials before running # or replace configuration loading with direct values import argparse parser = argparse.ArgumentParser(description='Scrape social media platforms for mentions of a user') parser.add_argument('username', help='Target username to search for') parser.add_argument('--config', default='config.json', help='Path to configuration file') args = parser.parse_args() scraper = SocialMediaScraper(args.username, config_file=args.config) results = scraper.run()
|
| 2 |
+
import argparse import sys import os from rich.console import Console from pyfiglet import Figlet from rich.style import Style from src.instagram import Instagram from src.twittr import Twitter pc = Console() def twitter_all_commands(): pc.print("FILE=y/n : ", style="yellow", end='') print(" Enable/disable output in a '<target username>_<command>.txt' file") pc.print("JSON=y/n : \t", style="white", end='') print("Enable/disable export in a '<target username>_<command>.json' file'") pc.print("tweets\t : ", style="bright_cyan", end='') print("Get recent tweets of target") pc.print("favtweets : \t", style="yellow", end='') print("Get recent tweets which are liked by target") pc.print("followers : \t", style="bright_green", end='') print(" Get total followers of Target") pc.print("following : \t", style="cyan", end='') print("Get total followings of target") pc.print("reset target : \t", style='green', end='') print(" Select new target") pc.print("timeline : \t", style="dark_cyan", end='') print("Full information of target's account") pc.print("profile pic : ", style='bright_magenta', end='') print("Download Target's Profile Picture") pc.print("banner : ", style='steel_blue1', end='') print("Download Target's Profile Banner ") pc.print('htags : ', style='bright_red', end='') print('Get hashtags used by target') pc.print('mentions : ', style='pink1') print('Get users who got mentioned by target in recent tweets') print(" ") pc.print(" Also supports basic terminal commands : ", style='cyan') pc.print("ls : ", style='pink1', end='') pc.print("Displaying all Commands ", ":search:", style='bright_white') pc.print("exit : ", style='orange_red1', end='') pc.print("For Exit from Terra", style='bright_white') pc.print("clear : ", style='orchid2', end='') pc.print("Clear your Screen", style='bright_white') pc.print('back : ', style='purple', end='') pc.print('Back to Main Menu', style='yellow') def insta_all_commands(): pc.print("FILE=y/n : ", style="yellow",end='') print(" Enable/disable output in a '<target username>_<command>.txt' file") pc.print("JSON=y/n : \t", style="white", end='') print("Enable/disable export in a '<target username>_<command>.json' file'") pc.print("locations : \t", style="green", end='') print("Get all registered addressed by target photos") pc.print("captions : \t", style="cyan", end='') print("Get target's photos captions") pc.print("comments : \t", style="red", end='') print("Get total comments of target's posts") pc.print("followers : \t", style="yellow", end='') print("get target's followers") pc.print("followings : \t", style="red", end='') print("Get users followed by target") pc.print("followers emails : \t", style="green", end='') print("Get email of target followers") pc.print("following emails : \t", style="yellow", end='') print("Get email of users followed by target") pc.print("followers phone : \t", style="red", end='') print("Get phone number of target followers") pc.print("followings phone : \t" ,style="cyan", end='') print("Get phone number of users followed by target") pc.print("tags : \t", style="yellow", end='') print("Get hashtags used by target") pc.print("info : \t", style="white", end='') print("Target timeline and information") pc.print("likes : \t", style="red", end='') print("Get total likes of target's posts") pc.print("mediatype : \t", style="green", end='') print("Get target's posts type (photo or video)") pc.print("photodes : \t", style="cyan", end='') print("Get description of target's photos") pc.print("photos : \t", style="yellow", end='') print("Download target's photos in output folder") pc.print("profile pic : \t", style="white", end='') print("Download target's profile picture") pc.print("stories : \t", style="cyan", end='') print("Download target's stories") pc.print("tagged : \t", style="red", end='') print("Get list of users tagged by target") pc.print("reset target : \t\t", style="white", end='') print("Set new target") pc.print("commenter : \t", style="red", end='') print("Get a list of user who commented target's photos") pc.print("ttag : \t", style="green", end='') print("Get a list of user who tagged target") print(" ") pc.print(" Also supports basic terminal commands : ", style='cyan') pc.print("ls : ", style='pink1', end='') pc.print("Displaying all Commands ", ":search:", style='bright_white') pc.print("exit : ", style='orange_red1', end='') pc.print("For Exit from Terra", style='bright_white') pc.print("clear : ", style='orchid2', end='') pc.print("Clear your Screen", style='bright_white') pc.print('back : ', style='purple', end='') pc.print('Back to Main Menu', style='yellow') def clear(): # for windows screen if sys.platform.startswith('win'): os.system('cls') # for mac or linux else: os.system('clear') def banner(): clear() banner = Figlet(font='isometric3',justify='right') pc.print(banner.renderText("Terra"),style="bold red") pc.print(" OSINT TOOL ON SOCIAL MEDIA NETWORKS ", style="cyan1") print(" ") pc.print(" @xadhrit (github.com/xadhrit/) " , style='bold red') def _out(): pc.print("Thank you for using Terra!", style="yellow") sys.exit(0) """ def handle_single(sig, frame): pc.print("\n Sending you Out \n", style="red") sys.exit(0) """ parser = argparse.ArgumentParser(description="Recon with Terra") parser.add_argument('target', type=str, help='username of target') parser.add_argument('-j', '--json', help='save results in a JSON file', action='store_true') parser.add_argument('-f', '--file', help='save results in a Text File', action='store_true') args = parser.parse_args() def main(): banner() pc.print(" \n> 1 for Twitter ",style="bright_yellow") pc.print(" \n> 2 for Instagram ",style="green3") pc.print(" \n> 3 for Exit ") print(" ") pc.print("> Choose one option : ",style='purple',end='') u_input = str(input()) try: if u_input == '1': parser = argparse.ArgumentParser(description="Recon with Terra") parser.add_argument('target', type=str, help='username of target') parser.add_argument('-j', '--json', help='save results in a JSON file', action='store_true') parser.add_argument('-f', '--file', help='save results in a Text File', action='store_true') args = parser.parse_args() api = Twitter(args.target,args.file,args.json) commands = { 'ls': twitter_all_commands, 'help': twitter_all_commands, 'quit': quit, 'clear': clear, 'exit': _out, 'back': main, 'reset target': api.reset_target, 'tweets': api.recent_tweets, 'favtweets': api.recent_fav, 'followers': api.get_followers, 'following': api.get_frnds, 'timeline': api.user_info, 'profile pic': api.profile_pic, 'banner': api.banner_pic, 'htags': api.get_hashtags, 'mentions':api.get_mentions } while True: print(" ") pc.print("~/Terra Command >$ ", style='purple', end='') user_input = input() cmd = commands.get(user_input) if cmd: cmd() elif user_input == 'FILE=y': api.write_file(True) elif user_input == 'FILE=n': api.write_file(False) elif user_input == 'JSON=y': api.json_dump(True) elif user_input == 'JSON=n': api.json_dump(False) elif user_input == '': print("") else: pc.print("ILLEGAL COMMAND", style="red") if u_input == '2': parser = argparse.ArgumentParser(description="Recon with Terra") parser.add_argument('target', type=str, help='username of target') parser.add_argument('-j', '--json', help='save results in a JSON file', action='store_true') parser.add_argument('-f', '--file', help='save results in a Text File', action='store_true') args = parser.parse_args() api = Instagram(args.target,args.file,args.json) commands = { 'ls': insta_all_commands, 'help': insta_all_commands, 'clear': clear, 'quit': quit, 'exit': _out, 'back' : main, 'locations': api.target_locations, 'captions': api.__getCaptions__, 'reset target': api.change_target, 'comments': api._all_comments, 'followers': api._followers, 'followings': api._followings, 'followers emails': api.followers_email, 'following emails': api.followings_email, 'followers phone': api.followers_phoneNumber, 'followings phone': api.followings_phoneNumber, 'tags': api._hashtags, 'timeline': api._user_timeline, 'likes': api._total_likes, 'mediatype': api._media_type, 'photodes': api._photo_description, 'photos': api._user_photo, 'profile pic': api._user_profilepic, 'stories': api._user_stories, 'tagged': api._people_who_tagged_by_target, 'commenter': api._people_who_commented, 'ttag': api._users_who_tagged } while True: pc.print("~/Terra Command >$ ", style='purple', end='') user_input = input() cmd = commands.get(user_input) if cmd: cmd() elif user_input == 'FILE=y': api.write_file(True) elif user_input == 'FILE=n': api.write_file(False) elif user_input == 'JSON=y': api.json_dump(True) elif user_input == 'JSON=n': api.json_dump(False) elif user_input == '': print("") else: pc.print("ILLEGAL COMMAND", style="red") if u_input == '3': sys.exit(0) if u_input == 'exit': sys.exit(0) if KeyboardInterrupt: pc.print('Invalid Option! Try Again.... ', style='bold red') pc.print('Do you want to choose again ? (y/n)', style='red') io = input() if io == 'y' or 'Y': main() elif io == 'n' or 'N': sys.exit(0) else: print('Not a option! Good Bye!') sys.exit(0) else: pc.print('Invalid Option! ',style='bright_red') sys.exit(0) except Exception as e: pc.print(e,style='orange1') if __name__ == '__main__': main()
|