File size: 2,112 Bytes
a70eb3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import requests, json, re, sys
from openpyxl import Workbook

APP = "45BWZJ1SGC"

def get_key():
    r = requests.get("https://www.ycombinator.com/companies",
                     headers={"User-Agent": "Mozilla/5.0"}, timeout=30)
    m = re.search(r'AlgoliaOpts\s*=\s*(\{[^}]*\})', r.text)
    return json.loads(m.group(1))["key"]

KEY = get_key()
URL = f"https://{APP.lower()}-dsn.algolia.net/1/indexes/YCCompany_production/query"
HDR = {"X-Algolia-Application-Id": APP, "X-Algolia-API-Key": KEY,
       "Content-Type": "application/json"}

BATCHES = ["Fall 2025", "Winter 2026", "Spring 2026", "Summer 2026"]

def fetch_batch(batch):
    hits = []
    page = 0
    while True:
        body = {"query": "", "facetFilters": [[f"batch:{batch}"]],
                "hitsPerPage": 1000, "page": page}
        r = requests.post(URL, headers=HDR, data=json.dumps(body), timeout=30)
        d = r.json()
        hits.extend(d.get("hits", []))
        if page + 1 >= d.get("nbPages", 0):
            break
        page += 1
    return hits

wb = Workbook()
ws = wb.active
ws.title = "YC Startups"
ws.append(["Name", "Batch", "Website", "One-liner", "Location",
           "Industry", "Team Size", "Status", "Hiring", "Tags", "YC Page"])

totals = {}
all_hits = []
for b in BATCHES:
    hits = fetch_batch(b)
    totals[b] = len(hits)
    print(f"{b}: {len(hits)}", flush=True)
    all_hits.extend(hits)

for h in all_hits:
    ws.append([
        h.get("name", ""),
        h.get("batch", ""),
        h.get("website", ""),
        h.get("one_liner", ""),
        h.get("all_locations", ""),
        h.get("industry", ""),
        h.get("team_size", ""),
        h.get("status", ""),
        "Yes" if h.get("isHiring") else "No",
        ", ".join(h.get("tags", []) or []),
        f"https://www.ycombinator.com/companies/{h.get('slug','')}",
    ])

for col, width in enumerate([22, 14, 40, 55, 28, 18, 11, 10, 8, 40, 50], start=1):
    ws.column_dimensions[chr(64 + col)].width = width

out = "/home/azureuser/yc_companies.xlsx"
wb.save(out)
print(f"\nTOTAL: {sum(totals.values())} companies")
print(f"Saved: {out}")