Twitaut / scrape.py
AiDeveloper1's picture
Upload 5 files
417e4b6 verified
import http.client
import os
import json
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
RAPIDAPI_KEY = os.getenv("RAPIDAPI_KEY")
def get_filtered_user_media(username: str) -> dict:
if not RAPIDAPI_KEY:
raise EnvironmentError("RAPIDAPI_KEY not found in environment variables")
conn = http.client.HTTPSConnection("twitter-api45.p.rapidapi.com")
headers = {
'x-rapidapi-key': RAPIDAPI_KEY,
'x-rapidapi-host': "twitter-api45.p.rapidapi.com"
}
endpoint = f"/usermedia.php?screenname={username}"
conn.request("GET", endpoint, headers=headers)
res = conn.getresponse()
if res.status != 200:
raise Exception(f"Request failed: {res.status} {res.reason}")
raw_data = res.read().decode("utf-8")
try:
json_data = json.loads(raw_data)
user_info = json_data.get("user", {})
timeline = json_data.get("timeline", [])
# Extract user-level info
filtered_data = {
"name": user_info.get("name"),
"screen_name": user_info.get("profile"),
"avatar": user_info.get("avatar"),
"blue_verified": user_info.get("blue_verified"),
"media_count": user_info.get("media_count"),
"posts": []
}
# Extract post-level info
for post in timeline:
post_data = {
"text": post.get("text"),
"media_url": None,
"created_at": post.get("created_at")
}
# Get the first media URL if available (photo or video)
media = post.get("media", {})
if "photo" in media:
post_data["media_url"] = media["photo"][0]["media_url_https"]
elif "video" in media:
post_data["media_url"] = media["video"][0]["variants"][0]["url"]
filtered_data["posts"].append(post_data)
return filtered_data
except json.JSONDecodeError:
raise ValueError("Invalid JSON response from API")