Spaces:

Mayur-cinderace
/

investormlops-api

Sleeping

Pre-EDA

2fc949d 5 months ago

1.16 kB

	import requests
	import pandas as pd
	import time

	# Parameters
	BASE_URL = "https://api.pullpush.io/reddit/search/comment/"
	params = {"subreddit": "wallstreetbets", "q": "stock", "size": 100}

	resp = requests.get(BASE_URL, params=params)
	data = resp.json()["data"]

	# Map Reddit fields to your CSV schema
	records = []
	for c in data:
	records.append({
	"source": "reddit", # All come from Reddit
	"author": c.get("author"), # Reddit username
	"title": None, # Reddit comments don't have a title
	"description": None, # Optional
	"url": f"https://reddit.com{c.get('permalink','')}", # link to comment
	"publishedAt": pd.to_datetime(c.get("created_utc"), unit='s'),
	"content": c.get("body") # actual comment text
	})

	# Create DataFrame with exact column order
	df = pd.DataFrame(records, columns=["source","author","title","description","url","publishedAt","content"])

	# Save to CSV
	df.to_csv("reddit_data.csv", index=False, encoding="utf-8")
	print(f"✅ Saved {len(df)} Reddit comments to reddit_data.csv")
	print(df.head())