jcbowyer's picture
Deploy: Consolidated gold tables, fixed nginx docs routing
3d16fe6 verified
<!doctype html><html lang=en dir=ltr class="docs-wrapper plugin-docs plugin-id-default docs-version-current docs-doc-page docs-doc-id-guides/huggingface-quickstart" data-has-hydrated=false><head><meta charset=UTF-8><meta name=generator content="Docusaurus v3.10.0"><title data-rh=true>πŸš€ QUICK START: FREE STORAGE WITH HUGGING FACE | Open Navigator</title><meta data-rh=true name=viewport content="width=device-width, initial-scale=1.0"/><meta data-rh=true property=og:image content=https://www.communityone.com/img/docusaurus-social-card.jpg /><meta data-rh=true name=twitter:image content=https://www.communityone.com/img/docusaurus-social-card.jpg /><meta data-rh=true property=og:url content=https://www.communityone.com/docs/guides/huggingface-quickstart /><meta data-rh=true property=og:locale content=en /><meta data-rh=true name=docusaurus_locale content=en /><meta data-rh=true name=docsearch:language content=en /><meta data-rh=true name=keywords content="civic engagement, policy tracking, meeting minutes, nonprofit tracking, municipal government, advocacy, open data, local government"/><meta data-rh=true property=og:type content=website /><meta data-rh=true property=og:site_name content="Open Navigator"/><meta data-rh=true name=twitter:card content=summary_large_image /><meta data-rh=true name=docusaurus_version content=current /><meta data-rh=true name=docusaurus_tag content=docs-default-current /><meta data-rh=true name=docsearch:version content=current /><meta data-rh=true name=docsearch:docusaurus_tag content=docs-default-current /><meta data-rh=true property=og:title content="πŸš€ QUICK START: FREE STORAGE WITH HUGGING FACE | Open Navigator"/><meta data-rh=true name=description content="TL;DR: Store unlimited data for FREE on Hugging Face!"/><meta data-rh=true property=og:description content="TL;DR: Store unlimited data for FREE on Hugging Face!"/><link data-rh=true rel=icon href=/img/favicon.ico /><link data-rh=true rel=canonical href=https://www.communityone.com/docs/guides/huggingface-quickstart /><link data-rh=true rel=alternate href=https://www.communityone.com/docs/guides/huggingface-quickstart hreflang=en /><link data-rh=true rel=alternate href=https://www.communityone.com/docs/guides/huggingface-quickstart hreflang=x-default /><script data-rh=true type=application/ld+json>{"@context":"https://schema.org","@type":"BreadcrumbList","itemListElement":[{"@type":"ListItem","item":"https://www.communityone.com/docs/for-developers","name":"Developers & Technical Users","position":1},{"@type":"ListItem","item":"https://www.communityone.com/docs/guides/huggingface-quickstart","name":"πŸš€ QUICK START: FREE STORAGE WITH HUGGING FACE","position":2}]}</script><link rel=alternate type=application/rss+xml href=/blog/rss.xml title="Open Navigator RSS Feed"><link rel=alternate type=application/atom+xml href=/blog/atom.xml title="Open Navigator Atom Feed"><link rel=preconnect href=https://www.google-analytics.com><link rel=preconnect href=https://www.googletagmanager.com><script async src="https://www.googletagmanager.com/gtag/js?id=G-5EQV815915"></script><script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","G-5EQV815915",{anonymize_ip:!0})</script><link rel=stylesheet href=/assets/css/styles.c89d6b2d.css /><script src=/assets/js/runtime~main.c8fa085e.js defer></script><script src=/assets/js/main.6e24e536.js defer></script></head><body><svg style="display: none;"><defs>
<symbol id=theme-svg-external-link viewBox="0 0 24 24"><path fill=currentColor d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"/></symbol>
</defs></svg>
<script>!function(){var t=function(){try{return new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}}()||function(){try{return window.localStorage.getItem("theme-7e9")}catch(t){}}();document.documentElement.setAttribute("data-theme",t||(window.matchMedia("(prefers-color-scheme: dark)").matches?"dark":"light")),document.documentElement.setAttribute("data-theme-choice",t||"system")}(),function(){try{for(var[t,e]of new URLSearchParams(window.location.search).entries())if(t.startsWith("docusaurus-data-")){var a=t.replace("docusaurus-data-","data-");document.documentElement.setAttribute(a,e)}}catch(t){}}()</script><div id=__docusaurus><link rel=preload as=image href=/img/communityone_logo.svg /><script type=application/ld+json>{"@context":"https://schema.org","@type":"Organization","address":{"@type":"PostalAddress","addressCountry":"US","addressLocality":"Tuscaloosa","addressRegion":"AL","postalCode":"35406","streetAddress":"5617 Lakeridge Court"},"contactPoint":{"@type":"ContactPoint","availableLanguage":["English"],"contactType":"Customer Service","email":"johnbowyer@communityone.com"},"description":"Track 90,000+ jurisdictions, 1.8M nonprofits, and analyze meeting minutes with AI. The open path to everything local.","email":"johnbowyer@communityone.com","legalName":"CommunityOne","logo":"https://www.communityone.com/img/communityone_logo.svg","name":"CommunityOne","sameAs":["https://www.facebook.com/communityone","https://www.instagram.com/communityone","https://twitter.com/communityone","https://www.linkedin.com/company/communityone","https://www.youtube.com/@communityone","https://discord.gg/communityone","https://github.com/getcommunityone/open-navigator"],"url":"https://www.communityone.com"}</script><script type=application/ld+json>{"@context":"https://schema.org","@type":"WebSite","alternateName":"CommunityOne Open Navigator","description":"AI-powered civic engagement platform tracking jurisdictions, nonprofits, and government meetings","name":"Open Navigator","potentialAction":{"@type":"SearchAction","query-input":"required name=search_term_string","target":{"@type":"EntryPoint","urlTemplate":"https://www.communityone.com/search?q={search_term_string}"}},"url":"https://www.communityone.com"}</script><script type=application/ld+json>{"@context":"https://schema.org","@type":"SoftwareApplication","aggregateRating":{"@type":"AggregateRating","ratingCount":"1","ratingValue":"5"},"applicationCategory":"BusinessApplication","description":"Track 90,000+ jurisdictions, 1.8M nonprofits, and analyze meeting minutes with AI","featureList":["Track 90,000+ jurisdictions","Monitor 1.8M nonprofits","Analyze meeting minutes","Legislative bill tracking","Campaign finance data"],"name":"Open Navigator","offers":{"@type":"Offer","price":"0","priceCurrency":"USD"},"operatingSystem":"Web","screenshot":"https://www.communityone.com/img/docusaurus-social-card.jpg","softwareVersion":"1.0.0"}</script><div role=region aria-label="Skip to main content"><a class=skipToContent_fXgn href=#__docusaurus_skipToContent_fallback>Skip to main content</a></div><nav aria-label=Main class="theme-layout-navbar navbar navbar--fixed-top"><div class=navbar__inner><div class="theme-layout-navbar-left navbar__items"><button aria-label="Toggle navigation bar" aria-expanded=false class="navbar__toggle clean-btn" type=button><svg width=30 height=30 viewBox="0 0 30 30" aria-hidden=true><path stroke=currentColor stroke-linecap=round stroke-miterlimit=10 stroke-width=2 d="M4 7h22M4 15h22M4 23h22"/></svg></button><a href=https://www.communityone.com target=_self rel="noopener noreferrer" class=navbar__brand><div class=navbar__logo><img src=/img/communityone_logo.svg alt="CommunityOne Logo" class="themedComponent_mlkZ themedComponent--light_NVdE"/><img src=/img/communityone_logo.svg alt="CommunityOne Logo" class="themedComponent_mlkZ themedComponent--dark_xIcU"/></div><b class="navbar__title text--truncate">Open Navigator Home</b></a><a class="navbar__item navbar__link" href=/docs/intro>Getting Started</a><a class="navbar__item navbar__link" href=/docs/for-families>Families & Individuals</a><a class="navbar__item navbar__link" href=/docs/for-advocates>Policy Makers</a><a class="navbar__item navbar__link" href=/docs/for-developers>Developers</a><a class="navbar__item navbar__link" href=/docs/data-sources/citations>Data and Terms</a><a class="navbar__item navbar__link" href=/blog>Blog</a></div><div class="theme-layout-navbar-right navbar__items navbar__items--right"><a href=https://github.com/getcommunityone/open-navigator-for-engagement target=_blank rel="noopener noreferrer" class="navbar__item navbar__link">GitHub<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><div class="toggle_vylO colorModeToggle_DEke"><button class="clean-btn toggleButton_gllP toggleButtonDisabled_aARS" type=button disabled title="system mode" aria-label="Switch between dark and light mode (currently system mode)"><svg viewBox="0 0 24 24" width=24 height=24 aria-hidden=true class="toggleIcon_g3eP lightToggleIcon_pyhR"><path fill=currentColor d="M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"/></svg><svg viewBox="0 0 24 24" width=24 height=24 aria-hidden=true class="toggleIcon_g3eP darkToggleIcon_wfgR"><path fill=currentColor d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"/></svg><svg viewBox="0 0 24 24" width=24 height=24 aria-hidden=true class="toggleIcon_g3eP systemToggleIcon_QzmC"><path fill=currentColor d="m12 21c4.971 0 9-4.029 9-9s-4.029-9-9-9-9 4.029-9 9 4.029 9 9 9zm4.95-13.95c1.313 1.313 2.05 3.093 2.05 4.95s-0.738 3.637-2.05 4.95c-1.313 1.313-3.093 2.05-4.95 2.05v-14c1.857 0 3.637 0.737 4.95 2.05z"/></svg></button></div><div class=navbarSearchContainer_Bca1></div></div></div><div role=presentation class=navbar-sidebar__backdrop></div></nav><div id=__docusaurus_skipToContent_fallback class="theme-layout-main main-wrapper mainWrapper_z2l0"><div class=docsWrapper_hBAB><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type=button></button><div class=docRoot_UBD9><aside class="theme-doc-sidebar-container docSidebarContainer_YfHR"><div class=sidebarViewport_aRkj><div class=sidebar_njMd><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_SIkG"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=true href=/docs/intro><span title="Getting Started" class=categoryLinkLabel_W154>Getting Started</span></a></div><ul class=menu__list><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class=menu__link tabindex=0 href=/docs/intro><span title=Introduction class=linkLabel_WmDU>Introduction</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class=menu__link tabindex=0 href=/docs/open-navigator><span title="Open Navigator" class=linkLabel_WmDU>Open Navigator</span></a></ul><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist" href=/docs/for-families><span title="Families & Individuals" class=categoryLinkLabel_W154>Families & Individuals</span></a><button aria-label="Collapse sidebar category 'Families & Individuals'" aria-expanded=true type=button class="clean-btn menu__caret"></button></div><ul class=menu__list><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/families/community-events><span title="Resources for Families" class=categoryLinkLabel_W154>Resources for Families</span></a></div><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class=menu__link tabindex=0 href=/docs/open-navigator><span title="Getting Started with Open Navigator" class=linkLabel_WmDU>Getting Started with Open Navigator</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class=menu__link tabindex=0 href=/docs/data-sources/citations><span title="Data and Citations" class=linkLabel_WmDU>Data and Citations</span></a></ul><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist" href=/docs/for-advocates><span title="Policy Makers & Advocates" class=categoryLinkLabel_W154>Policy Makers & Advocates</span></a><button aria-label="Collapse sidebar category 'Policy Makers & Advocates'" aria-expanded=true type=button class="clean-btn menu__caret"></button></div><ul class=menu__list><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/data-sources/overview><span title="Understanding the Data" class=categoryLinkLabel_W154>Understanding the Data</span></a></div><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/guides/political-economy><span title="Analysis & Strategy" class=categoryLinkLabel_W154>Analysis & Strategy</span></a></div><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/case-studies/tuscaloosa-complete><span title="Real-World Examples" class=categoryLinkLabel_W154>Real-World Examples</span></a></div></ul><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--active" href=/docs/for-developers><span title="Developers & Technical Users" class=categoryLinkLabel_W154>Developers & Technical Users</span></a><button aria-label="Collapse sidebar category 'Developers & Technical Users'" aria-expanded=true type=button class="clean-btn menu__caret"></button></div><ul class=menu__list><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/quickstart><span title="Setup & Installation" class=categoryLinkLabel_W154>Setup & Installation</span></a></div><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/data-sources/citations><span title="Data Sources (Technical)" class=categoryLinkLabel_W154>Data Sources (Technical)</span></a></div><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret menu__link--active" role=button aria-expanded=true tabindex=0 href=/docs/guides/jurisdiction-setup><span title="How-To Guides" class=categoryLinkLabel_W154>How-To Guides</span></a></div><ul class=menu__list><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/guides/jurisdiction-setup><span title="Jurisdiction Discovery - Quick Start Guide" class=linkLabel_WmDU>Jurisdiction Discovery - Quick Start Guide</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/guides/huggingface-publishing><span title="HuggingFace Dataset Publishing Guide" class=linkLabel_WmDU>HuggingFace Dataset Publishing Guide</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link menu__link--active" aria-current=page tabindex=0 href=/docs/guides/huggingface-quickstart><span title="πŸš€ QUICK START: FREE STORAGE WITH HUGGING FACE" class=linkLabel_WmDU>πŸš€ QUICK START: FREE STORAGE WITH HUGGING FACE</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/guides/huggingface-features><span title="βœ… HuggingFace Dataset Sharing Added!" class=linkLabel_WmDU>βœ… HuggingFace Dataset Sharing Added!</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/guides/huggingface-limits><span title="⚠️ HUGGING FACE FILE LIMITS & SOLUTIONS" class=linkLabel_WmDU>⚠️ HUGGING FACE FILE LIMITS & SOLUTIONS</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/guides/handling-formats><span title="πŸ“„ HANDLING MULTIPLE DOCUMENT FORMATS" class=linkLabel_WmDU>πŸ“„ HANDLING MULTIPLE DOCUMENT FORMATS</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/guides/document-libraries><span title="πŸ“¦ INSTALLING DOCUMENT PROCESSING LIBRARIES" class=linkLabel_WmDU>πŸ“¦ INSTALLING DOCUMENT PROCESSING LIBRARIES</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/guides/scraper-improvements><span title="Scraper Improvements Summary" class=linkLabel_WmDU>Scraper Improvements Summary</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/guides/search-patterns><span title="Scale and Search Patterns: End-to-End Civic Tech Projects" class=linkLabel_WmDU>Scale and Search Patterns: End-to-End Civic Tech Projects</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/guides/split-screen><span title="Split-Screen System: Government Decisions ↔ Community Response" class=linkLabel_WmDU>Split-Screen System: Government Decisions ↔ Community Response</span></a></ul><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/integrations/mcp-server><span title=Integrations class=categoryLinkLabel_W154>Integrations</span></a></div><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/deployment/databricks-apps><span title=Deployment class=categoryLinkLabel_W154>Deployment</span></a></div><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/development/database-setup><span title=Development class=categoryLinkLabel_W154>Development</span></a></div></ul></ul></nav></div></div></aside><main class=docMainContainer_TBSr><div class="container padding-top--md padding-bottom--lg"><div class=row><div class="col docItemCol_VOVn"><div class=docItemContainer_Djhp><article><nav class="theme-doc-breadcrumbs breadcrumbsContainer_Z_bl" aria-label=Breadcrumbs><ul class=breadcrumbs><li class=breadcrumbs__item><a aria-label="Home page" class=breadcrumbs__link href=/><svg viewBox="0 0 24 24" class=breadcrumbHomeIcon_YNFT><path d="M10 19v-5h4v5c0 .55.45 1 1 1h3c.55 0 1-.45 1-1v-7h1.7c.46 0 .68-.57.33-.87L12.67 3.6c-.38-.34-.96-.34-1.34 0l-8.36 7.53c-.34.3-.13.87.33.87H5v7c0 .55.45 1 1 1h3c.55 0 1-.45 1-1z" fill=currentColor /></svg></a><li class=breadcrumbs__item><a class=breadcrumbs__link href=/docs/for-developers><span>Developers & Technical Users</span></a><li class=breadcrumbs__item><span class=breadcrumbs__link>How-To Guides</span><li class="breadcrumbs__item breadcrumbs__item--active"><span class=breadcrumbs__link>πŸš€ QUICK START: FREE STORAGE WITH HUGGING FACE</span></ul></nav><div class="tocCollapsible_ETCw theme-doc-toc-mobile tocMobile_ITEo"><button type=button class="clean-btn tocCollapsibleButton_TO0P">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>πŸš€ QUICK START: FREE STORAGE WITH HUGGING FACE</h1></header>
<p><strong>TL;DR: Store unlimited data for FREE on Hugging Face!</strong></p>
<p><strong>⚠️ IMPORTANT: Use Parquet format, NOT individual PDFs! See <a class="" href=/docs/guides/HUGGINGFACE_FILE_LIMITS.md>file limits guide</a></strong></p>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-3-minute-setup>⚑ 3-MINUTE SETUP<a href=#-3-minute-setup class=hash-link aria-label="Direct link to ⚑ 3-MINUTE SETUP" title="Direct link to ⚑ 3-MINUTE SETUP" translate=no>​</a></h2>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=1-create-hugging-face-account-1-minute>1. Create Hugging Face Account (1 minute)<a href=#1-create-hugging-face-account-1-minute class=hash-link aria-label="Direct link to 1. Create Hugging Face Account (1 minute)" title="Direct link to 1. Create Hugging Face Account (1 minute)" translate=no>​</a></h3>
<div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain"># Go to https://huggingface.co/join</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Sign up (FREE)</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Verify email</span><br/></div></code></pre></div></div>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=2-get-api-token-1-minute>2. Get API Token (1 minute)<a href=#2-get-api-token-1-minute class=hash-link aria-label="Direct link to 2. Get API Token (1 minute)" title="Direct link to 2. Get API Token (1 minute)" translate=no>​</a></h3>
<div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain"># Go to https://huggingface.co/settings/tokens</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Click "New token"</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Name it "oral-health-upload"</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Token Type: Write (required for publishing datasets)</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Repository permissions: All repositories</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Copy the token (hf_xxxxxxxxxxxx)</span><br/></div></code></pre></div></div>
<p><strong>⚠️ Important: Token Permissions</strong></p>
<ul>
<li class=""><strong>Write</strong> access required for publishing datasets</li>
<li class=""><strong>Read</strong> access sufficient for downloading public datasets only</li>
<li class="">For this project: Use <strong>Write</strong> token to publish your scraped data</li>
</ul>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=3-install--login-1-minute>3. Install & Login (1 minute)<a href=#3-install--login-1-minute class=hash-link aria-label="Direct link to 3. Install & Login (1 minute)" title="Direct link to 3. Install & Login (1 minute)" translate=no>​</a></h3>
<div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain">pip install huggingface_hub datasets</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Set your token</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">export HUGGINGFACE_TOKEN="hf_YOUR_TOKEN_HERE"</span><br/></div></code></pre></div></div>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=️-critical-file-limits>⚠️ CRITICAL: FILE LIMITS<a href=#️-critical-file-limits class=hash-link aria-label="Direct link to ⚠️ CRITICAL: FILE LIMITS" title="Direct link to ⚠️ CRITICAL: FILE LIMITS" translate=no>​</a></h2>
<p><strong>Hugging Face Limits:</strong></p>
<ul>
<li class="">Files per folder: &lt;10,000</li>
<li class="">Total files per repo: &lt;100,000</li>
<li class="">For large datasets: Use Parquet or WebDataset format</li>
</ul>
<p><strong>Your Scale:</strong></p>
<ul>
<li class="">22,000 jurisdictions Γ— 1,000 docs = 22 MILLION files ❌</li>
</ul>
<p><strong>Solution:</strong></p>
<ul>
<li class="">Extract text from PDFs</li>
<li class="">Store in Parquet format</li>
<li class="">Result: 50 files instead of 22 million βœ…</li>
</ul>
<p><strong>See detailed guide:</strong> <a class="" href=/docs/guides/HUGGINGFACE_FILE_LIMITS.md>HUGGINGFACE_FILE_LIMITS.md</a></p>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-upload-your-data>πŸ“€ UPLOAD YOUR DATA<a href=#-upload-your-data class=hash-link aria-label="Direct link to πŸ“€ UPLOAD YOUR DATA" title="Direct link to πŸ“€ UPLOAD YOUR DATA" translate=no>​</a></h2>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=option-1-use-the-upload-script-recommended>Option 1: Use the Upload Script (Recommended)<a href=#option-1-use-the-upload-script-recommended class=hash-link aria-label="Direct link to Option 1: Use the Upload Script (Recommended)" title="Direct link to Option 1: Use the Upload Script (Recommended)" translate=no>​</a></h3>
<p><strong>For discovery data:</strong></p>
<div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain"># Go to your project</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">cd /home/developer/projects/open-navigator</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Activate environment</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">source venv/bin/activate</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Upload discovery results</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">python scripts/upload_to_huggingface.py \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --repo "YOUR_USERNAME/oral-health-policy-data" \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --discovery</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># View your dataset</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># https://huggingface.co/datasets/YOUR_USERNAME/oral-health-policy-data</span><br/></div></code></pre></div></div>
<p><strong>For meeting PDFs (extract text first!):</strong></p>
<div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain"># DON'T upload individual PDFs!</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Instead, extract text and save as Parquet</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># 1. Create a file with PDF URLs (one per line)</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">cat > pdf_urls.txt &lt;&lt; EOF</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">https://tuscaloosaal.suiteonemedia.com/agenda1.pdf</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">https://tuscaloosaal.suiteonemedia.com/agenda2.pdf</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">...</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">EOF</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># 2. Process PDFs to Parquet (extracts text, deletes PDFs)</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">python scripts/upload_to_huggingface.py \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --repo "YOUR_USERNAME/oral-health-policy-data" \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --process-pdfs pdf_urls.txt</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># 3. Upload the Parquet file (1 file, not thousands!)</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">python scripts/upload_to_huggingface.py \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --repo "YOUR_USERNAME/oral-health-policy-data" \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --meetings meetings_processed.parquet</span><br/></div></code></pre></div></div>
<hr/>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> datasets </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> Dataset</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> huggingface_hub </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> login</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> pandas </span><span class="token keyword" style=color:#00009f>as</span><span class="token plain"> pd</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Login</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">login</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">token</span><span class="token operator" style=color:#393A34>=</span><span class="token string" style=color:#e3116c>"hf_YOUR_TOKEN"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Load your data</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">df </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> pd</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">read_csv</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'data/bronze/discovered_sources/discovery_summary_final.csv'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Convert to dataset</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">dataset </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> Dataset</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">from_pandas</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">df</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Upload to Hugging Face (FREE!)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">dataset</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">push_to_hub</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>"YOUR_USERNAME/oral-health-policy-data"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> split</span><span class="token operator" style=color:#393A34>=</span><span class="token string" style=color:#e3116c>"discovery"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>"βœ… Data uploaded! View at:"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>"https://huggingface.co/datasets/YOUR_USERNAME/oral-health-policy-data"</span><span class="token punctuation" style=color:#393A34>)</span><br/></div></code></pre></div></div>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-cost-breakdown>πŸ’° COST BREAKDOWN<a href=#-cost-breakdown class=hash-link aria-label="Direct link to πŸ’° COST BREAKDOWN" title="Direct link to πŸ’° COST BREAKDOWN" translate=no>​</a></h2>
<table><thead><tr><th>What You Get<th>Cost<tbody><tr><td><strong>Unlimited storage</strong> (public datasets)<td><strong>FREE</strong><tr><td>Unlimited downloads<td>FREE<tr><td>Built-in viewer<td>FREE<tr><td>Version control<td>FREE<tr><td>Search & filtering<td>FREE<tr><td>API access<td>FREE<tr><td><strong>TOTAL</strong><td><strong>$0/month</strong> βœ…</table>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-storage-comparison>πŸ“Š STORAGE COMPARISON<a href=#-storage-comparison class=hash-link aria-label="Direct link to πŸ“Š STORAGE COMPARISON" title="Direct link to πŸ“Š STORAGE COMPARISON" translate=no>​</a></h2>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=bad-approach-expensive>Bad Approach (Expensive)<a href=#bad-approach-expensive class=hash-link aria-label="Direct link to Bad Approach (Expensive)" title="Direct link to Bad Approach (Expensive)" translate=no>​</a></h3>
<div class="language-text codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-text codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain">❌ Download all videos: 250 TB = $5,000/month</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">❌ Store all PDFs: 30 TB = $600/month</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">❌ Total: $5,600/month πŸ’Έ</span><br/></div></code></pre></div></div>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=good-approach-free>Good Approach (FREE)<a href=#good-approach-free class=hash-link aria-label="Direct link to Good Approach (FREE)" title="Direct link to Good Approach (FREE)" translate=no>​</a></h3>
<div class="language-text codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-text codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain">βœ… Store discovery data: 1 GB = FREE</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">βœ… Store extracted text: 25 GB = FREE</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">βœ… Store oral health subset: 5 GB = FREE</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">βœ… Total: $0/month πŸŽ‰</span><br/></div></code></pre></div></div>
<p><strong>Savings: $5,600/month β†’ $0/month</strong></p>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-what-to-upload>🎯 WHAT TO UPLOAD<a href=#-what-to-upload class=hash-link aria-label="Direct link to 🎯 WHAT TO UPLOAD" title="Direct link to 🎯 WHAT TO UPLOAD" translate=no>​</a></h2>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=-upload-these>βœ… Upload These:<a href=#-upload-these class=hash-link aria-label="Direct link to βœ… Upload These:" title="Direct link to βœ… Upload These:" translate=no>​</a></h3>
<ol>
<li class="">
<p><strong>Discovery Results</strong> (~1 GB)</p>
<ul>
<li class="">Jurisdiction websites</li>
<li class="">YouTube channels</li>
<li class="">Meeting platforms</li>
<li class="">Social media links</li>
</ul>
</li>
<li class="">
<p><strong>Meeting Metadata</strong> (~2 GB)</p>
<ul>
<li class="">Meeting dates/titles</li>
<li class="">Agenda item lists</li>
<li class="">Source URLs</li>
</ul>
</li>
<li class="">
<p><strong>Extracted Text</strong> (~25 GB)</p>
<ul>
<li class="">Text from PDFs</li>
<li class="">Meeting transcripts</li>
<li class="">Filtered for oral health</li>
</ul>
</li>
</ol>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=-dont-upload-these>❌ Don't Upload These:<a href=#-dont-upload-these class=hash-link aria-label="Direct link to ❌ Don't Upload These:" title="Direct link to ❌ Don't Upload These:" translate=no>​</a></h3>
<ol>
<li class=""><strong>Videos</strong> - Link to YouTube instead</li>
<li class=""><strong>Full PDFs</strong> - Store text + URL to original</li>
<li class=""><strong>Website HTML</strong> - Just store the data you extracted</li>
<li class=""><strong>Duplicates</strong> - Filter first</li>
</ol>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-example-workflow>πŸ“ EXAMPLE WORKFLOW<a href=#-example-workflow class=hash-link aria-label="Direct link to πŸ“ EXAMPLE WORKFLOW" title="Direct link to πŸ“ EXAMPLE WORKFLOW" translate=no>​</a></h2>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=step-1-run-discovery>Step 1: Run Discovery<a href=#step-1-run-discovery class=hash-link aria-label="Direct link to Step 1: Run Discovery" title="Direct link to Step 1: Run Discovery" translate=no>​</a></h3>
<div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain"># Discover all Alabama jurisdictions</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">python discovery/comprehensive_discovery_pipeline.py --state AL</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Output: data/bronze/discovered_sources/discovery_summary_AL.csv (~50 KB)</span><br/></div></code></pre></div></div>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=step-2-upload-to-hugging-face>Step 2: Upload to Hugging Face<a href=#step-2-upload-to-hugging-face class=hash-link aria-label="Direct link to Step 2: Upload to Hugging Face" title="Direct link to Step 2: Upload to Hugging Face" translate=no>​</a></h3>
<div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain"># Upload discovery results</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">python scripts/upload_to_huggingface.py \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --repo "YOUR_USERNAME/oral-health-policy-data" \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --discovery</span><br/></div></code></pre></div></div>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=step-3-free-up-local-space>Step 3: Free Up Local Space<a href=#step-3-free-up-local-space class=hash-link aria-label="Direct link to Step 3: Free Up Local Space" title="Direct link to Step 3: Free Up Local Space" translate=no>​</a></h3>
<div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain"># Optional: Delete local files (data is safely in cloud)</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">rm -rf data/bronze/discovered_sources/*.csv</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># You can always download from Hugging Face later!</span><br/></div></code></pre></div></div>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=step-4-share--analyze>Step 4: Share & Analyze<a href=#step-4-share--analyze class=hash-link aria-label="Direct link to Step 4: Share & Analyze" title="Direct link to Step 4: Share & Analyze" translate=no>​</a></h3>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># Anyone can now use your data (including you!)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> datasets </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> load_dataset</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">data </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> load_dataset</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>"YOUR_USERNAME/oral-health-policy-data"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> split</span><span class="token operator" style=color:#393A34>=</span><span class="token string" style=color:#e3116c>"discovery"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">alabama </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> data</span><span class="token punctuation" style=color:#393A34>.</span><span class="token builtin">filter</span><span class="token punctuation" style=color:#393A34>(</span><span class="token keyword" style=color:#00009f>lambda</span><span class="token plain"> x</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> x</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'state'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>==</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'AL'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"Alabama jurisdictions: </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation builtin">len</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>(</span><span class="token string-interpolation interpolation">alabama</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>)</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>"</span><span class="token punctuation" style=color:#393A34>)</span><br/></div></code></pre></div></div>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-continuous-workflow>πŸ”„ CONTINUOUS WORKFLOW<a href=#-continuous-workflow class=hash-link aria-label="Direct link to πŸ”„ CONTINUOUS WORKFLOW" title="Direct link to πŸ”„ CONTINUOUS WORKFLOW" translate=no>​</a></h2>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=keep-local-storage-low-100-mb>Keep Local Storage Low (~100 MB)<a href=#keep-local-storage-low-100-mb class=hash-link aria-label="Direct link to Keep Local Storage Low (~100 MB)" title="Direct link to Keep Local Storage Low (~100 MB)" translate=no>​</a></h3>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># Process one jurisdiction at a time</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> jurisdiction </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> all_jurisdictions</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># 1. Download PDF (2 MB)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> pdf </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> download_agenda</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">jurisdiction</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># 2. Extract text (50 KB)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> text </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> extract_text</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">pdf</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># 3. Upload to Hugging Face</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> upload_to_hf</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">text</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># 4. Delete local file</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> os</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">remove</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">pdf</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Local storage: Never exceeds 100 MB! βœ…</span><br/></div></code></pre></div></div>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-hugging-face-basics>πŸ“š HUGGING FACE BASICS<a href=#-hugging-face-basics class=hash-link aria-label="Direct link to πŸ“š HUGGING FACE BASICS" title="Direct link to πŸ“š HUGGING FACE BASICS" translate=no>​</a></h2>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=load-your-data-anywhere>Load Your Data Anywhere<a href=#load-your-data-anywhere class=hash-link aria-label="Direct link to Load Your Data Anywhere" title="Direct link to Load Your Data Anywhere" translate=no>​</a></h3>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> datasets </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> load_dataset</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Load on your laptop</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">data </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> load_dataset</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>"YOUR_USERNAME/oral-health-policy-data"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Or in Google Colab (FREE GPU)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Or on a friend's computer</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Or 5 years from now</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Your data is always available, forever, for FREE!</span><br/></div></code></pre></div></div>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=search--filter>Search & Filter<a href=#search--filter class=hash-link aria-label="Direct link to Search & Filter" title="Direct link to Search & Filter" translate=no>​</a></h3>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># Find cities with YouTube channels</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">with_youtube </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> data</span><span class="token punctuation" style=color:#393A34>.</span><span class="token builtin">filter</span><span class="token punctuation" style=color:#393A34>(</span><span class="token keyword" style=color:#00009f>lambda</span><span class="token plain"> x</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> x</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'youtube_channels'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>></span><span class="token plain"> </span><span class="token number" style=color:#36acaa>0</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Find high-quality sources</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">high_quality </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> data</span><span class="token punctuation" style=color:#393A34>.</span><span class="token builtin">filter</span><span class="token punctuation" style=color:#393A34>(</span><span class="token keyword" style=color:#00009f>lambda</span><span class="token plain"> x</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> x</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'completeness'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>></span><span class="token plain"> </span><span class="token number" style=color:#36acaa>0.8</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Find specific state</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">indiana </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> data</span><span class="token punctuation" style=color:#393A34>.</span><span class="token builtin">filter</span><span class="token punctuation" style=color:#393A34>(</span><span class="token keyword" style=color:#00009f>lambda</span><span class="token plain"> x</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> x</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'state'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>==</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'IN'</span><span class="token punctuation" style=color:#393A34>)</span><br/></div></code></pre></div></div>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=download-subset>Download Subset<a href=#download-subset class=hash-link aria-label="Direct link to Download Subset" title="Direct link to Download Subset" translate=no>​</a></h3>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># Only download what you need (save bandwidth)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">oral_health_only </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> load_dataset</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"YOUR_USERNAME/oral-health-policy-data"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> split</span><span class="token operator" style=color:#393A34>=</span><span class="token string" style=color:#e3116c>"oral_health"</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Only the filtered subset</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Maybe only 5 GB instead of 50 GB!</span><br/></div></code></pre></div></div>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-benefits>βœ… BENEFITS<a href=#-benefits class=hash-link aria-label="Direct link to βœ… BENEFITS" title="Direct link to βœ… BENEFITS" translate=no>​</a></h2>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=1-free-unlimited-storage>1. <strong>FREE Unlimited Storage</strong><a href=#1-free-unlimited-storage class=hash-link aria-label="Direct link to 1-free-unlimited-storage" title="Direct link to 1-free-unlimited-storage" translate=no>​</a></h3>
<ul>
<li class="">No storage limits for public datasets</li>
<li class="">No bandwidth limits</li>
<li class="">No time limits</li>
</ul>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=2-accessible-anywhere>2. <strong>Accessible Anywhere</strong><a href=#2-accessible-anywhere class=hash-link aria-label="Direct link to 2-accessible-anywhere" title="Direct link to 2-accessible-anywhere" translate=no>​</a></h3>
<ul>
<li class="">Download from any computer</li>
<li class="">Share with collaborators</li>
<li class="">Use in Google Colab</li>
</ul>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=3-version-control>3. <strong>Version Control</strong><a href=#3-version-control class=hash-link aria-label="Direct link to 3-version-control" title="Direct link to 3-version-control" translate=no>​</a></h3>
<ul>
<li class="">Git-based system</li>
<li class="">Track all changes</li>
<li class="">Revert if needed</li>
</ul>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=4-discovery>4. <strong>Discovery</strong><a href=#4-discovery class=hash-link aria-label="Direct link to 4-discovery" title="Direct link to 4-discovery" translate=no>​</a></h3>
<ul>
<li class="">Your dataset appears in Hugging Face search</li>
<li class="">Other researchers can use it</li>
<li class="">Builds your portfolio</li>
</ul>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=5-integration>5. <strong>Integration</strong><a href=#5-integration class=hash-link aria-label="Direct link to 5-integration" title="Direct link to 5-integration" translate=no>​</a></h3>
<ul>
<li class="">Works with PyTorch, TensorFlow</li>
<li class="">Built-in data viewer</li>
<li class="">API access</li>
</ul>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-learn-more>πŸŽ“ LEARN MORE<a href=#-learn-more class=hash-link aria-label="Direct link to πŸŽ“ LEARN MORE" title="Direct link to πŸŽ“ LEARN MORE" translate=no>​</a></h2>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=official-docs>Official Docs<a href=#official-docs class=hash-link aria-label="Direct link to Official Docs" title="Direct link to Official Docs" translate=no>​</a></h3>
<ul>
<li class=""><strong>Hugging Face Datasets:</strong> <a href=https://huggingface.co/docs/datasets/ target=_blank rel="noopener noreferrer" class="">https://huggingface.co/docs/datasets/</a></li>
<li class=""><strong>Quick Start:</strong> <a href=https://huggingface.co/docs/datasets/quickstart target=_blank rel="noopener noreferrer" class="">https://huggingface.co/docs/datasets/quickstart</a></li>
<li class=""><strong>Upload Guide:</strong> <a href=https://huggingface.co/docs/datasets/upload_dataset target=_blank rel="noopener noreferrer" class="">https://huggingface.co/docs/datasets/upload_dataset</a></li>
</ul>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=examples>Examples<a href=#examples class=hash-link aria-label="Direct link to Examples" title="Direct link to Examples" translate=no>​</a></h3>
<ul>
<li class=""><strong>MeetingBank:</strong> <a href=https://huggingface.co/datasets/huuuyeah/meetingbank target=_blank rel="noopener noreferrer" class="">https://huggingface.co/datasets/huuuyeah/meetingbank</a></li>
<li class=""><strong>Browse Datasets:</strong> <a href=https://huggingface.co/datasets target=_blank rel="noopener noreferrer" class="">https://huggingface.co/datasets</a></li>
</ul>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-troubleshooting>πŸ†˜ TROUBLESHOOTING<a href=#-troubleshooting class=hash-link aria-label="Direct link to πŸ†˜ TROUBLESHOOTING" title="Direct link to πŸ†˜ TROUBLESHOOTING" translate=no>​</a></h2>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=authentication-failed>"Authentication failed"<a href=#authentication-failed class=hash-link aria-label='Direct link to "Authentication failed"' title='Direct link to "Authentication failed"' translate=no>​</a></h3>
<div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain"># Make sure token is set</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">echo $HUGGINGFACE_TOKEN</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># If empty, set it</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">export HUGGINGFACE_TOKEN="hf_YOUR_TOKEN"</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Or login interactively</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">huggingface-cli login</span><br/></div></code></pre></div></div>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=permission-denied>"Permission denied"<a href=#permission-denied class=hash-link aria-label='Direct link to "Permission denied"' title='Direct link to "Permission denied"' translate=no>​</a></h3>
<div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain"># Make sure repo name includes your username</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># βœ… Correct: "myusername/oral-health-policy-data"</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># ❌ Wrong: "oral-health-policy-data"</span><br/></div></code></pre></div></div>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=dataset-too-large>"Dataset too large"<a href=#dataset-too-large class=hash-link aria-label='Direct link to "Dataset too large"' title='Direct link to "Dataset too large"' translate=no>​</a></h3>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># Don't upload raw files!</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Upload processed/filtered data only</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># ❌ Bad: Upload 50 GB of PDFs</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># βœ… Good: Upload 5 GB of extracted text</span><br/></div></code></pre></div></div>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-next-steps>🎯 NEXT STEPS<a href=#-next-steps class=hash-link aria-label="Direct link to 🎯 NEXT STEPS" title="Direct link to 🎯 NEXT STEPS" translate=no>​</a></h2>
<ol>
<li class="">βœ… Create Hugging Face account</li>
<li class="">βœ… Get API token</li>
<li class="">βœ… Run discovery for your state</li>
<li class="">βœ… Upload to Hugging Face</li>
<li class="">βœ… Delete local files to free space</li>
<li class="">βœ… Scale to all 22,000+ jurisdictions!</li>
</ol>
<p><strong>Your data is safe in the cloud, FREE, forever!</strong> πŸŽ‰</p>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-pro-tip>πŸ’‘ PRO TIP<a href=#-pro-tip class=hash-link aria-label="Direct link to πŸ’‘ PRO TIP" title="Direct link to πŸ’‘ PRO TIP" translate=no>​</a></h2>
<p>Make your dataset <strong>public</strong> (not private):</p>
<ul>
<li class="">βœ… FREE unlimited storage</li>
<li class="">βœ… Helps research community</li>
<li class="">βœ… Builds your portfolio</li>
<li class="">βœ… Appears in search results</li>
</ul>
<p>Private datasets are limited to 100 GB and don't help anyone!</p>
<p><strong>Public = Win-Win-Win</strong> πŸ†</div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="row margin-top--sm theme-doc-footer-edit-meta-row"><div class="col noPrint_WFHX"><a href=https://github.com/getcommunityone/open-navigator-for-engagement/tree/main/website/docs/guides/huggingface-quickstart.md target=_blank rel="noopener noreferrer" class=theme-edit-this-page><svg fill=currentColor height=20 width=20 viewBox="0 0 40 40" class=iconEdit_Z9Sw aria-hidden=true><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"/></g></svg>Edit this page</a></div><div class="col lastUpdated_JAkA"></div></div></footer></article><nav class="docusaurus-mt-lg pagination-nav" aria-label="Docs pages"><a class="pagination-nav__link pagination-nav__link--prev" href=/docs/guides/huggingface-publishing><div class=pagination-nav__sublabel>Previous</div><div class=pagination-nav__label>HuggingFace Dataset Publishing Guide</div></a><a class="pagination-nav__link pagination-nav__link--next" href=/docs/guides/huggingface-features><div class=pagination-nav__sublabel>Next</div><div class=pagination-nav__label>βœ… HuggingFace Dataset Sharing Added!</div></a></nav></div></div><div class="col col--3"><div class="tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href=#-3-minute-setup class="table-of-contents__link toc-highlight">⚑ 3-MINUTE SETUP</a><ul><li><a href=#1-create-hugging-face-account-1-minute class="table-of-contents__link toc-highlight">1. Create Hugging Face Account (1 minute)</a><li><a href=#2-get-api-token-1-minute class="table-of-contents__link toc-highlight">2. Get API Token (1 minute)</a><li><a href=#3-install--login-1-minute class="table-of-contents__link toc-highlight">3. Install & Login (1 minute)</a></ul><li><a href=#️-critical-file-limits class="table-of-contents__link toc-highlight">⚠️ CRITICAL: FILE LIMITS</a><li><a href=#-upload-your-data class="table-of-contents__link toc-highlight">πŸ“€ UPLOAD YOUR DATA</a><ul><li><a href=#option-1-use-the-upload-script-recommended class="table-of-contents__link toc-highlight">Option 1: Use the Upload Script (Recommended)</a></ul><li><a href=#-cost-breakdown class="table-of-contents__link toc-highlight">πŸ’° COST BREAKDOWN</a><li><a href=#-storage-comparison class="table-of-contents__link toc-highlight">πŸ“Š STORAGE COMPARISON</a><ul><li><a href=#bad-approach-expensive class="table-of-contents__link toc-highlight">Bad Approach (Expensive)</a><li><a href=#good-approach-free class="table-of-contents__link toc-highlight">Good Approach (FREE)</a></ul><li><a href=#-what-to-upload class="table-of-contents__link toc-highlight">🎯 WHAT TO UPLOAD</a><ul><li><a href=#-upload-these class="table-of-contents__link toc-highlight">βœ… Upload These:</a><li><a href=#-dont-upload-these class="table-of-contents__link toc-highlight">❌ Don't Upload These:</a></ul><li><a href=#-example-workflow class="table-of-contents__link toc-highlight">πŸ“ EXAMPLE WORKFLOW</a><ul><li><a href=#step-1-run-discovery class="table-of-contents__link toc-highlight">Step 1: Run Discovery</a><li><a href=#step-2-upload-to-hugging-face class="table-of-contents__link toc-highlight">Step 2: Upload to Hugging Face</a><li><a href=#step-3-free-up-local-space class="table-of-contents__link toc-highlight">Step 3: Free Up Local Space</a><li><a href=#step-4-share--analyze class="table-of-contents__link toc-highlight">Step 4: Share & Analyze</a></ul><li><a href=#-continuous-workflow class="table-of-contents__link toc-highlight">πŸ”„ CONTINUOUS WORKFLOW</a><ul><li><a href=#keep-local-storage-low-100-mb class="table-of-contents__link toc-highlight">Keep Local Storage Low (~100 MB)</a></ul><li><a href=#-hugging-face-basics class="table-of-contents__link toc-highlight">πŸ“š HUGGING FACE BASICS</a><ul><li><a href=#load-your-data-anywhere class="table-of-contents__link toc-highlight">Load Your Data Anywhere</a><li><a href=#search--filter class="table-of-contents__link toc-highlight">Search & Filter</a><li><a href=#download-subset class="table-of-contents__link toc-highlight">Download Subset</a></ul><li><a href=#-benefits class="table-of-contents__link toc-highlight">βœ… BENEFITS</a><ul><li><a href=#1-free-unlimited-storage class="table-of-contents__link toc-highlight">1. <strong>FREE Unlimited Storage</strong></a><li><a href=#2-accessible-anywhere class="table-of-contents__link toc-highlight">2. <strong>Accessible Anywhere</strong></a><li><a href=#3-version-control class="table-of-contents__link toc-highlight">3. <strong>Version Control</strong></a><li><a href=#4-discovery class="table-of-contents__link toc-highlight">4. <strong>Discovery</strong></a><li><a href=#5-integration class="table-of-contents__link toc-highlight">5. <strong>Integration</strong></a></ul><li><a href=#-learn-more class="table-of-contents__link toc-highlight">πŸŽ“ LEARN MORE</a><ul><li><a href=#official-docs class="table-of-contents__link toc-highlight">Official Docs</a><li><a href=#examples class="table-of-contents__link toc-highlight">Examples</a></ul><li><a href=#-troubleshooting class="table-of-contents__link toc-highlight">πŸ†˜ TROUBLESHOOTING</a><ul><li><a href=#authentication-failed class="table-of-contents__link toc-highlight">"Authentication failed"</a><li><a href=#permission-denied class="table-of-contents__link toc-highlight">"Permission denied"</a><li><a href=#dataset-too-large class="table-of-contents__link toc-highlight">"Dataset too large"</a></ul><li><a href=#-next-steps class="table-of-contents__link toc-highlight">🎯 NEXT STEPS</a><li><a href=#-pro-tip class="table-of-contents__link toc-highlight">πŸ’‘ PRO TIP</a></ul></div></div></div></div></main></div></div></div><footer class="theme-layout-footer footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="theme-layout-footer-column col footer__col"><div class=footer__title>Documentation</div><ul class="footer__items clean-list"><li class=footer__item><a class=footer__link-item href=/docs/intro>Getting Started</a><li class=footer__item><a class=footer__link-item href=/docs/data-sources/citations>Citations & Data Sources</a><li class=footer__item><a class=footer__link-item href=/docs/data-sources/overview>Data Sources</a><li class=footer__item><a class=footer__link-item href=/docs/for-developers>For Developers</a></ul></div><div class="theme-layout-footer-column col footer__col"><div class=footer__title>Resources</div><ul class="footer__items clean-list"><li class=footer__item><a href=https://www.communityone.com target=_blank rel="noopener noreferrer" class=footer__link-item>Launch Open Navigator<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://github.com/getcommunityone/open-navigator-for-engagement target=_blank rel="noopener noreferrer" class=footer__link-item>GitHub<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://www.groundvue.org/ target=_blank rel="noopener noreferrer" class=footer__link-item>GroundVue (Partner)<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a></ul></div><div class="theme-layout-footer-column col footer__col"><div class=footer__title>Community</div><ul class="footer__items clean-list"><li class=footer__item><a href=https://www.instagram.com/getcommunityone/ target=_blank rel="noopener noreferrer" class=footer__link-item>Instagram<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://www.facebook.com/getcommunityone target=_blank rel="noopener noreferrer" class=footer__link-item>Facebook<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://x.com/getcommunityone/ target=_blank rel="noopener noreferrer" class=footer__link-item>X (Twitter)<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://www.linkedin.com/company/getcommunityone target=_blank rel="noopener noreferrer" class=footer__link-item>LinkedIn<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://www.youtube.com/@getcommunityone target=_blank rel="noopener noreferrer" class=footer__link-item>YouTube<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://discord.gg/uH6Dytek target=_blank rel="noopener noreferrer" class=footer__link-item>Discord<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a></ul></div><div class="theme-layout-footer-column col footer__col"><div class=footer__title>Legal</div><ul class="footer__items clean-list"><li class=footer__item><a class=footer__link-item href=/docs/legal/privacy-policy>Privacy Policy</a><li class=footer__item><a class=footer__link-item href=/docs/legal/terms-of-service>Terms of Service</a><li class=footer__item><a class=footer__link-item href=/docs/legal/data-provider-terms>Data Provider Terms</a></ul></div><div class="theme-layout-footer-column col footer__col"><div class=footer__title>More</div><ul class="footer__items clean-list"><li class=footer__item><a class=footer__link-item href=/blog>Blog</a><li class=footer__item><a href=https://github.com/getcommunityone/open-navigator-for-engagement/blob/main/LICENSE target=_blank rel="noopener noreferrer" class=footer__link-item>License (MIT)<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a></ul></div></div><div class="footer__bottom text--center"><div class=footer__copyright>Copyright Β© 2026 Community One. Built with Docusaurus.</div></div></div></footer></div></body>