""" Hackathon Roulette — spin through the Build Small Hackathon and discover projects you'd never find by scrolling. Categorizes every submission and lets you roulette your way to hidden gems (with in-session memory of what you've already seen). """ import random import re import time import html as _html import gradio as gr from huggingface_hub import HfApi # ── Config (the only hackathon this tool follows) ─────────────────────────── HACKATHON_ORG = "build-small-hackathon" HACKATHON_NAME = "Build Small Hackathon" # Org spaces that aren't real submissions — excluded from the roulette. INFRA_SPACES = {"field-guide", "readme", "registration"} # ── Category taxonomy ─────────────────────────────────────────────────────── # Ordered list of (name, emoji, keywords). Categorization is FIRST-MATCH-WINS, # so order runs specific genres/domains first, modality next, and the broad # "Assistants" bucket last as a catch-all. Keywords are matched with a LEFT # word-boundary (\bkw) against a camelCase-split, lowercased blob of # title+name+description+tags — so "radio" never matches "gradio" and "voice" # never matches "invoice", while stems like "transcrib" still catch # "transcribe". Tuned against all live submissions: ~92% land in a real # category; the remaining "Other" are spaces that ship no description and no # topical tags on the Hub, so there is nothing to categorize them by. # NOTE: avoid keywords that match non-topical badge/track tags (e.g. "brand" # hits the "off-brand" achievement tag, "track" hits "track:wood"). TAXONOMY = [ ("Games, RPGs & Interactive Fiction", "🎮", ["game","rpg","roguelike","roguelite","deckbuild","dungeon","escape room","escape-room","text-adventure","text adventure","adventure","misadventure","interactive-fiction","interactive fiction","visual novel","visual-novel","ttrpg","tabletop","whodunit","detective","courtroom","tribunal","verdict","jury","puzzle","riddle","arcade","poker","chess","mafia","duel","arena","akinator","backrooms","parkour","phaser","playable","gameplay","roleplay","role-play","reverse-turing","reverse turing","trolley","board game","deathmatch","choose your","trivia","survival","maze","wordle","sudoku","betting","dating sim","platformer","sokoban","tower defense","idle game","mystery","heist","casino","card game","gamif","goblin","brawl","argue","debate","karate","kung fu","ninja","samurai","dota","doom","emulat","gameboy","game boy","nintendo","n64","royale","worldcup","world cup","objection","trial","arcane","fighter","battle","combat","boss fight","zombie","monster","tycoon","racing","racer","wrestl","boxing","party game","interrogat","dare","escape the","you are a god","play as","npc","pixel game","bullet hell","stealth game","sandbox game","grid royale","party","speak to your manager","chaos goblin","worst","you decide"]), ("Worlds, Sims & Multi-Agent Societies", "🌍", ["multi-agent","multi agent","simulation","simulator","persistent world","world-build","world building","world-building","world model","colony","civilization","civilisation","city-builder","city builder","village","smol town","ecosystem","terraform","self-play","agent-simulation","trading firm","rumor economy","gossip economy","tiny civilization","living forest","sandbox","life sim","tamagotchi","god game","council","swarm","terrarium","parliament","senate","society","kingdom","empire","dynasty","planet","universe","galaxy","cosmos","forest","organism","flock","hive mind","tiny minds","mastermind","multi-model","multi model","agent society","emergent","cellular automata","game of life","conway","ant colony","aquarium","habitat","biome"]), ("Health, Medical & Wellness", "🩺", ["medical","clinical","clinic","healthcare","health companion","medgemma","patient","symptom","diagnos","radiology","x-ray","dental","pharmacy","pharma","prescription","medicine","blood test","nutrition","nutri","pregnan","caregiver","elderly","parkinson","stroke","vaccine","neoantigen","first aid","physiotherap","cardiac","heartbeat","variant effect","prediction engine","mental-health","mental health","emotional support","anxiety","therapy","wellness","grief","adhd","neurodiverg","allergen","disease","doctor","nurse","hospital","telemedicine","surgery","surgeon","dentist","cancer","tumor","tumour","diabet","insulin","glucose","blood pressure","asthma","skin","derma","biosignal","biomarker","genom","dna","protein","molecul","metaboli","drug","medication","rehab","addiction","depression","autism","fitness","workout","exercise","meditat","sleep tracker","calorie","diet","vitamin","supplement","counsel","mri","ultrasound","spoilage"]), ("Tutors, Study & Language Learning", "🎓", ["tutor","study buddy","study partner","studybuddy","study copilot","homework","exam","flashcard","quiz","socratic","lesson","teaching","teach an","teach the","classroom","kids","children","preschool","toddler","language-learning","language learning","language practice","lingo","learn korean","learn japanese","learn languages","learn english","english expression","duolingo","reading buddy","read-along","read along","dyslexi","iep","whiteboard","cbse","ncert","samacheer","scholar","literature-review","interview coach","interview prep","speech coach","running coach","gym coach","curriculum","educational","flashcard generator","spaced repetition","vocabulary","grammar","mnemonic","coach","mentor","study","math","physics","chemistry","biology","school","student","academic","university","college","mandarin","spanish","french","german","japanese","sentence","essay","spelling","explainer","encyclopedia","admission","scholarship","upskill","tutorial","counsellor","counselor","exam prep","study abroad","memoriz","practice partner","skill builder","wikipedia"]), ("Finance, Documents & Business", "💼", ["invoice","accounting","bookkeep","ledger","receipt","expense","budget","taxes","tax filing","tax season","finops","finance","financ","financial","stocks","stock market","trading","alpha signal","sec filing","insurance","crm","inventory","udhaar","shopkeeper","kirana","dukaan","sales","pricing","price comparison","real-estate","real estate","ads advisor","resume","job description","reconcil","lease","legal","legislation","contract","bureaucra","immigration","compliance","ocr","document-ai","text-to-sql","spreadsheet","rfq","payroll","procurement","gig work","freelance","order desk","orders","retail","shop","store","marketplace","ecommerce","e-commerce","wallet","payment","money","salary","wage","startup","business","entrepreneur","marketing","advertis","customer","revenue","profit","bank","loan","mortgage","crypto","portfolio","market","economy","logistics","supply chain","warehouse","dispatch","delivery","vendor","quote","estimate","proposal","pdf","paperwork","report generat","dashboard","analytics","excel","csv","data analysis","structured data","audit","job","hiring","recruit","applicant","rejection","pitch deck","pitch practice","operations","letter","decoder","finops","fin ops","viscosity","controller"]), ("Scan, Vision & Physical-World Utilities", "📸", ["plant","garden","crop","weed","herbicide","farm","beekeep","honeybee","apiar","soil","satellite","eurosat","land-use","solar","weather","watering","fridge","recipe","meal plan","cook","ingredient","food waste","upcycl","mushroom","forager","edible","stray animal","wardrobe","outfit","fashion","stylist","car diagnostic","appliance fault","repair","defect","building-inspection","road-defect","object-detection","yolo","vlm","vision-language","computer-vision","image classif","segment","label lens","posture","barcode","football","soccer","referee","sports","photo","photograph","camera","scan","snap","detect","recognition","classif","classifier","identif","vision","map","maps","gps","navigation","travel","route","drone","aircraft","vehicle","automobile","traffic","parking","animal","bird","insect","wildlife","species","fish","pet ","dog","cat ","kitten","food","kitchen","waste","recycl","agriculture","3d print","3d-print","printer","factory","manufactur","mechanic","machine fault","maintenance","inspection","sensor","iot","gesture","augmented reality","junk drawer","aircraft rarity"]), ("Safety, Security & Privacy", "🛡️", ["scam","phishing","fraud","online-safety","online safety","cybersec","security","soc analyst","osint","threat","vuln","pii","privacy","redact","guardrail","firewall","dark pattern","deepfake","captcha","jailbreak","safety-eval","prompt-injection","airlock","malware","ransomware","hacker","exploit","encrypt","decrypt","password","authentication","biometric","surveillance","moderation","toxic","abuse","harassment","nsfw","misinformation","fact check","fake news","watermark","ai detector","deepverify","scrub","sensitive leak","content filter","redac","anonymiz"]), ("Dev, Code, Robotics & Model Tooling", "⚙️", ["code review","code-review","code-generation","commit","repo","github","mermaid","flowchart","diagram","debug","rubber duck","rubber-duck","pull request","refactor","decompil","gpu kernel","gpu-kernel","fine-tune","fine-tuning","distill","quantiz","benchmark","structured json","data-engineering","excalidraw","skill router","skill-router","formal-verification","lean4","mcp-server","devops","reachy","robot","esp32","arduino","onnx","webgpu","transformers.js","transformers-js","rl-environment","openenv","grpo","interpretab","surprisal","next-token","abliterat","tokenizer","regex","evaluation pipeline","llm eval","sql query","coding","developer","software","api ","cli ","terminal","shell","bash","python","javascript","database","backend","frontend","kubernetes","deploy","unit test","compile","kernel","vram","cuda","inference","latency","throughput","embedding","neural","pytorch","wasm","plugin","firmware","observability","monitoring","logging","codebase","cognitive architecture","evaluat","benchmark suite","prompt engineer","activation","emulator","vla ","policy network","skill","recap and shell"]), ("Voice, Speech, Music & Translation", "🎙️", ["voice","music","radio","voice-clon","voice clon","tts","text-to-speech","asr","speech-to-text","speech-to-speech","speech-translation","whisper","voxcpm","kokoro","f5-tts","fastconformer","dubbing","narration","narrator","read aloud","read-aloud","audiobook","podcast","music-generation","music generator","lofi","lo-fi","lullab","singalong","karaoke","lyric","sheet music","transcrib","pronunciation","recitation","sign language","sign-language","sign2voice","dictation","translation","translate","translat","spoken word","song","melody","chorus","beat","rhythm","remix","compose","composer","composition","instrument","instrumental","piano","guitar","drum","violin","saxophone","synth","vocal","singer","singing","soundtrack","midi","harmony","chord","ambient","playlist","jingle","anthem","acoustic","audio","sound","speech","spoken","multilingual","subtitle","caption","accent","band","music video"]), ("Story, Image & Creative Generation", "🎨", ["story","stories","storybook","storytell","bedtime","fairytale","fairy tale","picture book","comic","manga","illustrat","poem","poetry","poetic","novel","fable","creative-writing","creative writing","narrative","text-to-image","image-generation","image generation","img2img","image-to-image","diffusion","flux","sd-turbo","sticker","pixel-art","pixel art","typograph","voxel","avatar","manim","animation","text-to-video","image-to-video","claymation","postcard","sketch","doodle","watercolour","minecraftify","generative-art","kinetic typography","icon generator","logo","wallpaper","emoji","meme","theater","theatre","puppet","myth","legend","fantasy","witch","wizard","dragon","coloring","collage","video","film","tale","tales","draw","drawing","paint","painting","artist","design","designer","creative","render","scene","webtoon","poster","greeting card","album cover","cover art","font","3d","canvas","author","quill","writing","write","clip","broadcast","movie","cinema","gif","caricature","portrait"]), ("Companions, Personas & Reflective Toys", "💚", ["companion","waifu","persona","character chat","character with","in character","virtual character","desktop pet","floating desktop","virtual pet","tsundere","yandere","girlfriend","confidant","journaling","voice journal","mood journal","gratitude journal","daily journal","reflection","oneiro","oracle","divination","prophecy","fortune teller","magic 8","tarot","arcana","seance","séance","occult","constellation","lives unlived","unlived","parallel life","museum of","elegy","afterlife","loneliness","mindful","stargazing","imaginary friend","pen pal","penpal","diary","gratitude","affirmation","horoscope","zodiac","astrolog","emotion","feeling","mood","empath","friend","buddy","familiar","creature","soul","spirit","ghost","fairy","imaginary","comfort","vent","confession","secret","wish","memory","nostalgia","heart","love","romance","crush","relationship","tombstone","raise a","baby ai","overthink","decompress","dream","temper","inner voice","alter ego"]), ("Assistants, Agents & Productivity", "💬", ["assistant","co-pilot","copilot","chatbot","chat assistant","chat with","agent","helper","concierge","butler","productivity","to-do","todo","task manager","note-taking","note taking","notetaking","meeting notes","summari","email","inbox","calendar","schedul","planner","reminder","advisor","recommend","knowledge base","knowledge hub","knowledgehub","second brain","retrieval-augmented","rag ","search engine","web search","personal ai","local ai","offline ai","home assistant","conversational","workflow","support","docu","research assistant","news","digest","faq","automation","ai","app","tool","chat","bot","gpt","llm","small model","tiny model","local model","on-device","mini","tiny","smol","helps you","help you","capture","recall","organize","tracker","logbook","search","ask","query","answer","wiki","scrape","crawl","pipeline","integration","slack","discord","telegram","whatsapp","decision","prompt","model","plan","vibe","gemma","llama","qwen","nemotron","minicpm","smol","context","studio","powered","pocket","forge","32b","14b","8b","mind"]), ] OTHER_NAME, OTHER_EMOJI = "Other", "🎁" CAT_EMOJI = {name: emoji for name, emoji, _ in TAXONOMY} CAT_EMOJI[OTHER_NAME] = OTHER_EMOJI CAT_EMOJI["All"] = "🎲" # Filter choices: All, every real category, then Other (the grab-bag). ALL_CATEGORIES = ["All"] + [name for name, _, _ in TAXONOMY] + [OTHER_NAME] # Pre-compile keyword patterns once (left word-boundary). _COMPILED = [(name, [re.compile(r"\b" + re.escape(k)) for k in kws]) for name, _, kws in TAXONOMY] _CAMEL = re.compile(r"(?<=[a-z0-9])(?=[A-Z])") # Framework / sponsor / model tags that carry no topical signal. _NOISE_TOK = { "gradio", "docker", "static", "build-small-hackathon", "backyard-ai", "backyard ai", "tiny-titan", "tiny titan", "off-the-grid", "offgrid", "nemotron", "minicpm", "llama-cpp", "openbmb", "zerogpu", "transformers", "nvidia", "modal", "openai", "region:us", } # Official hackathon tracks → friendly label (shown as a card badge). _TRACK_MAP = { "track:backyard": "Backyard", "track:backyard-ai": "Backyard", "track:wood": "Thousand Token Wood", "track:thousand-token-wood": "Thousand Token Wood", "track:thousand_token_wood": "Thousand Token Wood", } # ── Categorization ─────────────────────────────────────────────────────────── def _categorize(title: str, raw: str, desc: str, tags: list) -> str: keep = [t for t in tags if t.lower() not in _NOISE_TOK] blob = f"{title} {raw.replace('-', ' ').replace('_', ' ')} {desc} {' '.join(keep)}" blob = _CAMEL.sub(" ", blob).lower() for name, pats in _COMPILED: if any(p.search(blob) for p in pats): return name return OTHER_NAME def _track_of(tags: list) -> str: for t in tags: lbl = _TRACK_MAP.get(t.lower()) if lbl: return lbl return "" # ── Data loading (cached once per process) ─────────────────────────────────── _CACHE: list | None = None _CACHE_TS: float = 0.0 CACHE_TTL = 600 # re-index at most every 10 min so live submissions join the pool # Populated on every successful load so the UI can explain the pool size. _STATS = {"listed": 0, "private": 0, "infra": 0, "pool": 0} def _card_data(space) -> dict: """Robustly pull the card dict — SpaceCardData is NOT a plain dict on modern huggingface_hub, so isinstance(dict) would silently drop everything.""" cd = getattr(space, "cardData", None) if cd is None: return {} if hasattr(cd, "to_dict"): try: return cd.to_dict() except Exception: pass if isinstance(cd, dict): return dict(cd) return {} def load_spaces(force: bool = False) -> list: """Index every PUBLIC submission in the hackathon org. Private/gated spaces are skipped on purpose: they aren't in the public listing, they'd be dead links for anyone the roulette sends there, and skipping them makes the pool identical whether or not an HF token is present in the environment — so the draw is fair and reproducible. """ global _CACHE, _CACHE_TS now = time.time() if not force and _CACHE is not None and (now - _CACHE_TS) < CACHE_TTL: return _CACHE api = HfApi() spaces: list = [] listed = private = infra = 0 try: for s in api.list_spaces(author=HACKATHON_ORG, full=True): listed += 1 if getattr(s, "private", False): private += 1 continue raw = s.id.split("/")[-1] if "/" in s.id else s.id if raw.lower() in INFRA_SPACES: infra += 1 continue cd = _card_data(s) title = str(cd.get("title") or "").strip() desc = str(cd.get("short_description") or cd.get("description") or "").strip()[:320] sdk = str(cd.get("sdk") or "").strip() tags = list(s.tags or []) author = s.author or (s.id.split("/")[0] if "/" in s.id else "?") spaces.append({ "id": s.id, "name": title or raw.replace("-", " ").replace("_", " ").title(), "raw_name": raw, "author": author, "tags": tags, "likes": int(getattr(s, "likes", 0) or 0), "url": f"https://huggingface.co/spaces/{s.id}", "desc": desc, "sdk": sdk, "track": _track_of(tags), "category": _categorize(title, raw, desc, tags), }) except Exception as exc: # never let a network hiccup crash the Space print(f"[hackathon-roulette] load error: {exc}") # Only replace a good cache when we actually got data — a transient fetch # error shouldn't empty the pool mid-event. if spaces or _CACHE is None: _CACHE = spaces _CACHE_TS = now if spaces: _STATS.update(listed=listed, private=private, infra=infra, pool=len(spaces)) return _CACHE def _bucket(spaces: list, cat: str) -> list: return [s for s in spaces if cat == "All" or s["category"] == cat] def _display_tags(sp: dict) -> list: out = [] for t in sp["tags"]: tl = t.lower() if tl in _NOISE_TOK: continue if t.startswith(("region:", "license:", "arxiv:", "doi:", "track:", "sponsor:", "achievement:", "base_model:", "dataset:", "pipeline_tag:", "badge-", "badge:")): continue out.append(t) return out # ── HTML builders ───────────────────────────────────────────────────────────── def _stats(seen: list, spaces: list, cat: str) -> str: seen_set = set(seen) bucket = _bucket(spaces, cat) total = len(bucket) found = sum(1 for s in bucket if s["id"] in seen_set) left = total - found pct = round(found / total * 100) if total else 0 cats_html = "" if cat == "All": counts = {} for s in spaces: counts[s["category"]] = counts.get(s["category"], 0) + 1 ordered = [(n, e, counts.get(n, 0)) for n, e, _ in TAXONOMY] ordered.append((OTHER_NAME, OTHER_EMOJI, counts.get(OTHER_NAME, 0))) cats_html = "".join( f'{e} {c}' for n, e, c in ordered if c ) return f"""
""" def _why_note() -> str: pool = _STATS.get("pool", 0) infra = _STATS.get("infra", 0) priv = _STATS.get("private", 0) listed = _STATS.get("listed", 0) or (pool + infra + priv) if not listed: return "" pub = listed - priv # public Spaces — the count the org page shows visitors info = (f"{infra} org info page" + ("s" if infra != 1 else "") + " (field-guide & README)") if priv: # Authenticated: we can actually see and count the private/test spaces, so # reconcile every number — total, private, the public page count, infra. head = f"ⓘ why {pool}, not {listed}?" body = (f"The org has {listed} Spaces in total. " f"{priv} are private or test spaces (so the public page " f"shows {pub}), and {info} aren't competition entries — " f"leaving {pool} real, public submissions to spin through.") else: # Anonymous: the Hub API won't reveal the private spaces, so don't claim a # total that would contradict the higher number on the org page. head = f"ⓘ {pool} public submissions" body = (f"Every public project the Hub's API serves, minus {info}. Private and " f"test Spaces — plus a handful the org page counts that the public API " f"doesn't return — aren't included.") return f"""{inner}
Spin to discover hidden gems from the {HACKATHON_NAME}