Mayur-cinderace's picture
Pre-EDA
2fc949d
import requests
import pandas as pd
import time
# Parameters
BASE_URL = "https://api.pullpush.io/reddit/search/comment/"
params = {"subreddit": "wallstreetbets", "q": "stock", "size": 100}
resp = requests.get(BASE_URL, params=params)
data = resp.json()["data"]
# Map Reddit fields to your CSV schema
records = []
for c in data:
records.append({
"source": "reddit", # All come from Reddit
"author": c.get("author"), # Reddit username
"title": None, # Reddit comments don't have a title
"description": None, # Optional
"url": f"https://reddit.com{c.get('permalink','')}", # link to comment
"publishedAt": pd.to_datetime(c.get("created_utc"), unit='s'),
"content": c.get("body") # actual comment text
})
# Create DataFrame with exact column order
df = pd.DataFrame(records, columns=["source","author","title","description","url","publishedAt","content"])
# Save to CSV
df.to_csv("reddit_data.csv", index=False, encoding="utf-8")
print(f"✅ Saved {len(df)} Reddit comments to reddit_data.csv")
print(df.head())