Spaces:
Sleeping
Sleeping
| import os | |
| import requests | |
| import pandas as pd | |
| import gradio as gr | |
| BASE = "https://huggingface.co/api" | |
| HF_TOKEN = (os.getenv("HF_TOKEN") or "").strip() | |
| HEADERS = {"User-Agent": "hf-most-active-orgs-multi-seed/1.0"} | |
| if HF_TOKEN: | |
| HEADERS["Authorization"] = f"Bearer {HF_TOKEN}" | |
| def get_json(url: str): | |
| try: | |
| r = requests.get(url, headers=HEADERS, timeout=30) | |
| if r.status_code != 200: | |
| return None, f"{r.status_code}: {r.text[:180]}" | |
| return r.json(), None | |
| except Exception as e: | |
| return None, f"{type(e).__name__}: {e}" | |
| def normalize_plan(p): | |
| if not p: | |
| return "" | |
| p = str(p).lower() | |
| if p == "plus": | |
| return "enterprise plus" | |
| if p in ("team", "enterprise"): | |
| return p | |
| return p | |
| def fetch_top_models(sort_key: str, limit: int): | |
| # sort_key examples: downloads, likes, lastModified | |
| url = f"{BASE}/models?limit={limit}&sort={sort_key}&direction=-1" | |
| return get_json(url) | |
| def extract_owner_from_model_id(m): | |
| mid = m.get("modelId") or m.get("id") | |
| if isinstance(mid, str) and "/" in mid: | |
| return mid.split("/", 1)[0] | |
| return None | |
| def build_table( | |
| top_downloads=500, | |
| top_likes=500, | |
| top_recent=500, | |
| org_scan_limit=3000, | |
| rows_to_show=400, | |
| ): | |
| dbg = [] | |
| dbg.append(f"top_downloads={top_downloads}") | |
| dbg.append(f"top_likes={top_likes}") | |
| dbg.append(f"top_recent={top_recent}") | |
| dbg.append(f"org_scan_limit={org_scan_limit}") | |
| dbg.append(f"rows_to_show={rows_to_show}") | |
| dbg.append(f"HF_TOKEN_present={bool(HF_TOKEN)}") | |
| # 1) Fetch 3 seeds of models | |
| models_downloads, err_d = fetch_top_models("downloads", int(top_downloads)) | |
| models_likes, err_l = fetch_top_models("likes", int(top_likes)) | |
| models_recent, err_r = fetch_top_models("lastModified", int(top_recent)) | |
| if err_d or not isinstance(models_downloads, list): | |
| return f"<pre>/api/models downloads failed: {err_d}</pre>", "\n".join(["### Debug"] + dbg), None | |
| if err_l or not isinstance(models_likes, list): | |
| return f"<pre>/api/models likes failed: {err_l}</pre>", "\n".join(["### Debug"] + dbg), None | |
| if err_r or not isinstance(models_recent, list): | |
| return f"<pre>/api/models lastModified failed: {err_r}</pre>", "\n".join(["### Debug"] + dbg), None | |
| # 2) Union org handles + track which seed(s) they come from | |
| org_sources = {} # org -> dict flags | |
| def mark(org, key): | |
| if not org: | |
| return | |
| if org not in org_sources: | |
| org_sources[org] = {"from_downloads": 0, "from_likes": 0, "from_recent": 0} | |
| org_sources[org][key] = 1 | |
| for m in models_downloads: | |
| if isinstance(m, dict): | |
| mark(extract_owner_from_model_id(m), "from_downloads") | |
| for m in models_likes: | |
| if isinstance(m, dict): | |
| mark(extract_owner_from_model_id(m), "from_likes") | |
| for m in models_recent: | |
| if isinstance(m, dict): | |
| mark(extract_owner_from_model_id(m), "from_recent") | |
| orgs = list(org_sources.keys())[: int(org_scan_limit)] | |
| dbg.append(f"orgs_discovered_union={len(orgs)}") | |
| dbg.append( | |
| "orgs_by_seed_counts=" | |
| f"downloads={sum(v['from_downloads'] for v in org_sources.values())}, " | |
| f"likes={sum(v['from_likes'] for v in org_sources.values())}, " | |
| f"recent={sum(v['from_recent'] for v in org_sources.values())}" | |
| ) | |
| # 3) Fetch org overviews + score | |
| rows = [] | |
| failures = 0 | |
| for org in orgs: | |
| data, e = get_json(f"{BASE}/organizations/{org}/overview") | |
| if e or not isinstance(data, dict): | |
| failures += 1 | |
| continue | |
| plan = normalize_plan(data.get("plan")) | |
| num_models = int(data.get("numModels") or 0) | |
| num_datasets = int(data.get("numDatasets") or 0) | |
| num_spaces = int(data.get("numSpaces") or 0) | |
| num_followers = int(data.get("numFollowers") or 0) | |
| num_users = int(data.get("numUsers") or 0) | |
| # Score "activité footprint" (même logique que ton autre Space) | |
| activity_score = num_models * 3 + num_datasets * 3 + num_spaces * 2 + num_followers | |
| # Bonus: “multi-seed” score (si une org apparaît dans plusieurs seeds) | |
| src = org_sources.get(org, {}) | |
| seed_score = int(src.get("from_downloads", 0)) + int(src.get("from_likes", 0)) + int(src.get("from_recent", 0)) | |
| rows.append({ | |
| "org": org, | |
| "plan": plan, | |
| "seed_score(0-3)": seed_score, | |
| "from_downloads": int(src.get("from_downloads", 0)), | |
| "from_likes": int(src.get("from_likes", 0)), | |
| "from_recent": int(src.get("from_recent", 0)), | |
| "num_models": num_models, | |
| "num_datasets": num_datasets, | |
| "num_spaces": num_spaces, | |
| "num_users": num_users, | |
| "num_followers": num_followers, | |
| "activity_score": activity_score, | |
| }) | |
| dbg.append(f"orgs_ranked={len(rows)}") | |
| dbg.append(f"org_overview_failures={failures}") | |
| if not rows: | |
| probe_org = orgs[0] if orgs else "N/A" | |
| probe, probe_err = get_json(f"{BASE}/organizations/{probe_org}/overview") if orgs else (None, "no orgs") | |
| dbg.append(f"probe_org={probe_org}") | |
| dbg.append(f"probe_err={probe_err}") | |
| dbg.append(f"probe_type={type(probe).__name__ if probe is not None else 'None'}") | |
| dbg.append(f"probe_keys={(list(probe.keys())[:20] if isinstance(probe, dict) else '')}") | |
| return "<pre>No orgs ranked (see debug)</pre>", "\n".join(["### Debug"] + dbg), None | |
| df = pd.DataFrame(rows).sort_values( | |
| by=["activity_score", "seed_score(0-3)"], | |
| ascending=[False, False] | |
| ) | |
| # Display N rows in the UI | |
| df_show = df.head(int(rows_to_show)) | |
| # Save full CSV for download | |
| csv_path = "/tmp/most_active_orgs_multi_seed.csv" | |
| df.to_csv(csv_path, index=False) | |
| # Render HTML table with scroll | |
| html_table = f""" | |
| <div style="max-height: 1100px; overflow:auto; border:1px solid #333; border-radius:10px; padding:8px;"> | |
| {df_show.to_html(index=False)} | |
| </div> | |
| """ | |
| return html_table, "\n".join(["### Debug"] + dbg), csv_path | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 🏆 Most Active Orgs (Global) — multi-seed discovery") | |
| gr.Markdown( | |
| "Découverte des orgs via 3 seeds de modèles: **downloads + likes + récents (lastModified)**, " | |
| "puis ranking via `/api/organizations/{org}/overview`." | |
| ) | |
| with gr.Row(): | |
| top_downloads = gr.Slider(50, 1000, value=500, step=50, label="Top models by downloads (N)") | |
| top_likes = gr.Slider(50, 1000, value=500, step=50, label="Top models by likes (N)") | |
| top_recent = gr.Slider(50, 1000, value=500, step=50, label="Top models by recency (lastModified) (N)") | |
| with gr.Row(): | |
| org_scan_limit = gr.Slider(100, 8000, value=3000, step=100, label="Max orgs to evaluate (union cap)") | |
| rows_to_show = gr.Slider(50, 2000, value=400, step=50, label="Rows to display (visible)") | |
| btn = gr.Button("Refresh Ranking") | |
| out = gr.HTML() | |
| dbg = gr.Markdown() | |
| csv_file = gr.File(label="Download full CSV") | |
| btn.click( | |
| build_table, | |
| inputs=[top_downloads, top_likes, top_recent, org_scan_limit, rows_to_show], | |
| outputs=[out, dbg, csv_file], | |
| ) | |
| demo.launch() | |