File size: 2,112 Bytes
a70eb3d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | import requests, json, re, sys
from openpyxl import Workbook
APP = "45BWZJ1SGC"
def get_key():
r = requests.get("https://www.ycombinator.com/companies",
headers={"User-Agent": "Mozilla/5.0"}, timeout=30)
m = re.search(r'AlgoliaOpts\s*=\s*(\{[^}]*\})', r.text)
return json.loads(m.group(1))["key"]
KEY = get_key()
URL = f"https://{APP.lower()}-dsn.algolia.net/1/indexes/YCCompany_production/query"
HDR = {"X-Algolia-Application-Id": APP, "X-Algolia-API-Key": KEY,
"Content-Type": "application/json"}
BATCHES = ["Fall 2025", "Winter 2026", "Spring 2026", "Summer 2026"]
def fetch_batch(batch):
hits = []
page = 0
while True:
body = {"query": "", "facetFilters": [[f"batch:{batch}"]],
"hitsPerPage": 1000, "page": page}
r = requests.post(URL, headers=HDR, data=json.dumps(body), timeout=30)
d = r.json()
hits.extend(d.get("hits", []))
if page + 1 >= d.get("nbPages", 0):
break
page += 1
return hits
wb = Workbook()
ws = wb.active
ws.title = "YC Startups"
ws.append(["Name", "Batch", "Website", "One-liner", "Location",
"Industry", "Team Size", "Status", "Hiring", "Tags", "YC Page"])
totals = {}
all_hits = []
for b in BATCHES:
hits = fetch_batch(b)
totals[b] = len(hits)
print(f"{b}: {len(hits)}", flush=True)
all_hits.extend(hits)
for h in all_hits:
ws.append([
h.get("name", ""),
h.get("batch", ""),
h.get("website", ""),
h.get("one_liner", ""),
h.get("all_locations", ""),
h.get("industry", ""),
h.get("team_size", ""),
h.get("status", ""),
"Yes" if h.get("isHiring") else "No",
", ".join(h.get("tags", []) or []),
f"https://www.ycombinator.com/companies/{h.get('slug','')}",
])
for col, width in enumerate([22, 14, 40, 55, 28, 18, 11, 10, 8, 40, 50], start=1):
ws.column_dimensions[chr(64 + col)].width = width
out = "/home/azureuser/yc_companies.xlsx"
wb.save(out)
print(f"\nTOTAL: {sum(totals.values())} companies")
print(f"Saved: {out}")
|