import requests import pandas as pd import time # Parameters BASE_URL = "https://api.pullpush.io/reddit/search/comment/" params = {"subreddit": "wallstreetbets", "q": "stock", "size": 100} resp = requests.get(BASE_URL, params=params) data = resp.json()["data"] # Map Reddit fields to your CSV schema records = [] for c in data: records.append({ "source": "reddit", # All come from Reddit "author": c.get("author"), # Reddit username "title": None, # Reddit comments don't have a title "description": None, # Optional "url": f"https://reddit.com{c.get('permalink','')}", # link to comment "publishedAt": pd.to_datetime(c.get("created_utc"), unit='s'), "content": c.get("body") # actual comment text }) # Create DataFrame with exact column order df = pd.DataFrame(records, columns=["source","author","title","description","url","publishedAt","content"]) # Save to CSV df.to_csv("reddit_data.csv", index=False, encoding="utf-8") print(f"✅ Saved {len(df)} Reddit comments to reddit_data.csv") print(df.head())