| import requests, json, re, sys |
| from openpyxl import Workbook |
|
|
| APP = "45BWZJ1SGC" |
|
|
| def get_key(): |
| r = requests.get("https://www.ycombinator.com/companies", |
| headers={"User-Agent": "Mozilla/5.0"}, timeout=30) |
| m = re.search(r'AlgoliaOpts\s*=\s*(\{[^}]*\})', r.text) |
| return json.loads(m.group(1))["key"] |
|
|
| KEY = get_key() |
| URL = f"https://{APP.lower()}-dsn.algolia.net/1/indexes/YCCompany_production/query" |
| HDR = {"X-Algolia-Application-Id": APP, "X-Algolia-API-Key": KEY, |
| "Content-Type": "application/json"} |
|
|
| BATCHES = ["Fall 2025", "Winter 2026", "Spring 2026", "Summer 2026"] |
|
|
| def fetch_batch(batch): |
| hits = [] |
| page = 0 |
| while True: |
| body = {"query": "", "facetFilters": [[f"batch:{batch}"]], |
| "hitsPerPage": 1000, "page": page} |
| r = requests.post(URL, headers=HDR, data=json.dumps(body), timeout=30) |
| d = r.json() |
| hits.extend(d.get("hits", [])) |
| if page + 1 >= d.get("nbPages", 0): |
| break |
| page += 1 |
| return hits |
|
|
| wb = Workbook() |
| ws = wb.active |
| ws.title = "YC Startups" |
| ws.append(["Name", "Batch", "Website", "One-liner", "Location", |
| "Industry", "Team Size", "Status", "Hiring", "Tags", "YC Page"]) |
|
|
| totals = {} |
| all_hits = [] |
| for b in BATCHES: |
| hits = fetch_batch(b) |
| totals[b] = len(hits) |
| print(f"{b}: {len(hits)}", flush=True) |
| all_hits.extend(hits) |
|
|
| for h in all_hits: |
| ws.append([ |
| h.get("name", ""), |
| h.get("batch", ""), |
| h.get("website", ""), |
| h.get("one_liner", ""), |
| h.get("all_locations", ""), |
| h.get("industry", ""), |
| h.get("team_size", ""), |
| h.get("status", ""), |
| "Yes" if h.get("isHiring") else "No", |
| ", ".join(h.get("tags", []) or []), |
| f"https://www.ycombinator.com/companies/{h.get('slug','')}", |
| ]) |
|
|
| for col, width in enumerate([22, 14, 40, 55, 28, 18, 11, 10, 8, 40, 50], start=1): |
| ws.column_dimensions[chr(64 + col)].width = width |
|
|
| out = "/home/azureuser/yc_companies.xlsx" |
| wb.save(out) |
| print(f"\nTOTAL: {sum(totals.values())} companies") |
| print(f"Saved: {out}") |
|
|