File size: 45,457 Bytes
3d16fe6
896453f
 
1f7780e
896453f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fcf298e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
<!doctype html><html lang=en dir=ltr class="docs-wrapper plugin-docs plugin-id-default docs-version-current docs-doc-page docs-doc-id-guides/state-split-data" data-has-hydrated=false><head><meta charset=UTF-8><meta name=generator content="Docusaurus v3.10.0"><title data-rh=true>State-Split Data Files (Deprecated) | Open Navigator</title><meta data-rh=true name=viewport content="width=device-width, initial-scale=1.0"/><meta data-rh=true property=og:image content=https://www.communityone.com/img/docusaurus-social-card.jpg /><meta data-rh=true name=twitter:image content=https://www.communityone.com/img/docusaurus-social-card.jpg /><meta data-rh=true property=og:url content=https://www.communityone.com/docs/guides/state-split-data /><meta data-rh=true property=og:locale content=en /><meta data-rh=true name=docusaurus_locale content=en /><meta data-rh=true name=docsearch:language content=en /><meta data-rh=true name=keywords content="civic engagement, policy tracking, meeting minutes, nonprofit tracking, municipal government, advocacy, open data, local government"/><meta data-rh=true property=og:type content=website /><meta data-rh=true property=og:site_name content="Open Navigator"/><meta data-rh=true name=twitter:card content=summary_large_image /><meta data-rh=true name=docusaurus_version content=current /><meta data-rh=true name=docusaurus_tag content=docs-default-current /><meta data-rh=true name=docsearch:version content=current /><meta data-rh=true name=docsearch:docusaurus_tag content=docs-default-current /><meta data-rh=true property=og:title content="State-Split Data Files (Deprecated) | Open Navigator"/><meta data-rh=true name=description content="This approach of splitting files into separate state files is deprecated."/><meta data-rh=true property=og:description content="This approach of splitting files into separate state files is deprecated."/><link data-rh=true rel=icon href=/img/favicon.ico /><link data-rh=true rel=canonical href=https://www.communityone.com/docs/guides/state-split-data /><link data-rh=true rel=alternate href=https://www.communityone.com/docs/guides/state-split-data hreflang=en /><link data-rh=true rel=alternate href=https://www.communityone.com/docs/guides/state-split-data hreflang=x-default /><link rel=alternate type=application/rss+xml href=/blog/rss.xml title="Open Navigator RSS Feed"><link rel=alternate type=application/atom+xml href=/blog/atom.xml title="Open Navigator Atom Feed"><link rel=preconnect href=https://www.google-analytics.com><link rel=preconnect href=https://www.googletagmanager.com><script async src="https://www.googletagmanager.com/gtag/js?id=G-5EQV815915"></script><script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","G-5EQV815915",{anonymize_ip:!0})</script><link rel=stylesheet href=/assets/css/styles.c89d6b2d.css /><script src=/assets/js/runtime~main.c8fa085e.js defer></script><script src=/assets/js/main.6e24e536.js defer></script></head><body><svg style="display: none;"><defs>
<symbol id=theme-svg-external-link viewBox="0 0 24 24"><path fill=currentColor d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"/></symbol>
</defs></svg>
<script>!function(){var t=function(){try{return new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}}()||function(){try{return window.localStorage.getItem("theme-7e9")}catch(t){}}();document.documentElement.setAttribute("data-theme",t||(window.matchMedia("(prefers-color-scheme: dark)").matches?"dark":"light")),document.documentElement.setAttribute("data-theme-choice",t||"system")}(),function(){try{for(var[t,e]of new URLSearchParams(window.location.search).entries())if(t.startsWith("docusaurus-data-")){var a=t.replace("docusaurus-data-","data-");document.documentElement.setAttribute(a,e)}}catch(t){}}()</script><div id=__docusaurus><link rel=preload as=image href=/img/communityone_logo.svg /><script type=application/ld+json>{"@context":"https://schema.org","@type":"Organization","address":{"@type":"PostalAddress","addressCountry":"US","addressLocality":"Tuscaloosa","addressRegion":"AL","postalCode":"35406","streetAddress":"5617 Lakeridge Court"},"contactPoint":{"@type":"ContactPoint","availableLanguage":["English"],"contactType":"Customer Service","email":"johnbowyer@communityone.com"},"description":"Track 90,000+ jurisdictions, 1.8M nonprofits, and analyze meeting minutes with AI. The open path to everything local.","email":"johnbowyer@communityone.com","legalName":"CommunityOne","logo":"https://www.communityone.com/img/communityone_logo.svg","name":"CommunityOne","sameAs":["https://www.facebook.com/communityone","https://www.instagram.com/communityone","https://twitter.com/communityone","https://www.linkedin.com/company/communityone","https://www.youtube.com/@communityone","https://discord.gg/communityone","https://github.com/getcommunityone/open-navigator"],"url":"https://www.communityone.com"}</script><script type=application/ld+json>{"@context":"https://schema.org","@type":"WebSite","alternateName":"CommunityOne Open Navigator","description":"AI-powered civic engagement platform tracking jurisdictions, nonprofits, and government meetings","name":"Open Navigator","potentialAction":{"@type":"SearchAction","query-input":"required name=search_term_string","target":{"@type":"EntryPoint","urlTemplate":"https://www.communityone.com/search?q={search_term_string}"}},"url":"https://www.communityone.com"}</script><script type=application/ld+json>{"@context":"https://schema.org","@type":"SoftwareApplication","aggregateRating":{"@type":"AggregateRating","ratingCount":"1","ratingValue":"5"},"applicationCategory":"BusinessApplication","description":"Track 90,000+ jurisdictions, 1.8M nonprofits, and analyze meeting minutes with AI","featureList":["Track 90,000+ jurisdictions","Monitor 1.8M nonprofits","Analyze meeting minutes","Legislative bill tracking","Campaign finance data"],"name":"Open Navigator","offers":{"@type":"Offer","price":"0","priceCurrency":"USD"},"operatingSystem":"Web","screenshot":"https://www.communityone.com/img/docusaurus-social-card.jpg","softwareVersion":"1.0.0"}</script><div role=region aria-label="Skip to main content"><a class=skipToContent_fXgn href=#__docusaurus_skipToContent_fallback>Skip to main content</a></div><nav aria-label=Main class="theme-layout-navbar navbar navbar--fixed-top"><div class=navbar__inner><div class="theme-layout-navbar-left navbar__items"><button aria-label="Toggle navigation bar" aria-expanded=false class="navbar__toggle clean-btn" type=button><svg width=30 height=30 viewBox="0 0 30 30" aria-hidden=true><path stroke=currentColor stroke-linecap=round stroke-miterlimit=10 stroke-width=2 d="M4 7h22M4 15h22M4 23h22"/></svg></button><a href=https://www.communityone.com target=_self rel="noopener noreferrer" class=navbar__brand><div class=navbar__logo><img src=/img/communityone_logo.svg alt="CommunityOne Logo" class="themedComponent_mlkZ themedComponent--light_NVdE"/><img src=/img/communityone_logo.svg alt="CommunityOne Logo" class="themedComponent_mlkZ themedComponent--dark_xIcU"/></div><b class="navbar__title text--truncate">Open Navigator Home</b></a><a class="navbar__item navbar__link" href=/docs/intro>Getting Started</a><a class="navbar__item navbar__link" href=/docs/for-families>Families & Individuals</a><a class="navbar__item navbar__link" href=/docs/for-advocates>Policy Makers</a><a class="navbar__item navbar__link" href=/docs/for-developers>Developers</a><a class="navbar__item navbar__link" href=/docs/data-sources/citations>Data and Terms</a><a class="navbar__item navbar__link" href=/blog>Blog</a></div><div class="theme-layout-navbar-right navbar__items navbar__items--right"><a href=https://github.com/getcommunityone/open-navigator-for-engagement target=_blank rel="noopener noreferrer" class="navbar__item navbar__link">GitHub<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><div class="toggle_vylO colorModeToggle_DEke"><button class="clean-btn toggleButton_gllP toggleButtonDisabled_aARS" type=button disabled title="system mode" aria-label="Switch between dark and light mode (currently system mode)"><svg viewBox="0 0 24 24" width=24 height=24 aria-hidden=true class="toggleIcon_g3eP lightToggleIcon_pyhR"><path fill=currentColor d="M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"/></svg><svg viewBox="0 0 24 24" width=24 height=24 aria-hidden=true class="toggleIcon_g3eP darkToggleIcon_wfgR"><path fill=currentColor d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"/></svg><svg viewBox="0 0 24 24" width=24 height=24 aria-hidden=true class="toggleIcon_g3eP systemToggleIcon_QzmC"><path fill=currentColor d="m12 21c4.971 0 9-4.029 9-9s-4.029-9-9-9-9 4.029-9 9 4.029 9 9 9zm4.95-13.95c1.313 1.313 2.05 3.093 2.05 4.95s-0.738 3.637-2.05 4.95c-1.313 1.313-3.093 2.05-4.95 2.05v-14c1.857 0 3.637 0.737 4.95 2.05z"/></svg></button></div><div class=navbarSearchContainer_Bca1></div></div></div><div role=presentation class=navbar-sidebar__backdrop></div></nav><div id=__docusaurus_skipToContent_fallback class="theme-layout-main main-wrapper mainWrapper_z2l0"><div class=docsWrapper_hBAB><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type=button></button><div class=docRoot_UBD9><main class="docMainContainer_TBSr docMainContainerEnhanced_lQrH"><div class="container padding-top--md padding-bottom--lg"><div class=row><div class="col docItemCol_VOVn"><div class=docItemContainer_Djhp><article><div class="tocCollapsible_ETCw theme-doc-toc-mobile tocMobile_ITEo"><button type=button class="clean-btn tocCollapsibleButton_TO0P">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>State-Split Data Files (Deprecated)</h1></header>
<p>:::warning Deprecated
This approach of splitting files into separate state files is <strong>deprecated</strong>.</p>
<p>Use <strong><a class="" href=/docs/guides/partitioned-datasets>Partitioned Datasets</a></strong> instead for:</p>
<ul>
<li class="">Same efficiency as separate files</li>
<li class="">Ability to query across states</li>
<li class="">Better analytics tool support</li>
<li class="">Simpler data management
:::</li>
</ul>
<p>All gold parquet files with state information were previously split into state-specific files. This has been replaced by partitioned datasets which offer the same benefits with better queryability.</p>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=what-changed>What Changed<a href=#what-changed class=hash-link aria-label="Direct link to What Changed" title="Direct link to What Changed" translate=no></a></h2>
<p>Instead of downloading one massive file with all states:</p>
<ul>
<li class=""><code>nonprofits_organizations.parquet</code> (72 MB, 1.9M records)</li>
</ul>
<p>You can now download just the state(s) you need:</p>
<ul>
<li class=""><code>nonprofits_organizations_AL.parquet</code> (Alabama only, ~1 MB)</li>
<li class=""><code>nonprofits_organizations_CA.parquet</code> (California only, ~8 MB)</li>
<li class=""><code>nonprofits_organizations_TX.parquet</code> (Texas only, ~6 MB)</li>
</ul>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=benefits>Benefits<a href=#benefits class=hash-link aria-label="Direct link to Benefits" title="Direct link to Benefits" translate=no></a></h2>
<ol>
<li class=""><strong>Smaller Downloads</strong>: Only download the data you need</li>
<li class=""><strong>Faster Queries</strong>: Load and analyze state-specific data faster</li>
<li class=""><strong>Better Organization</strong>: Easier to manage and share state-level datasets</li>
<li class=""><strong>HuggingFace Friendly</strong>: Avoids file size limits, enables state-specific repos</li>
</ol>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=file-structure>File Structure<a href=#file-structure class=hash-link aria-label="Direct link to File Structure" title="Direct link to File Structure" translate=no></a></h2>
<p>State-split files are located in <code>data/gold/by_state/</code>:</p>
<div class="language-text codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-text codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain">data/gold/by_state/</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">├── nonprofits_organizations_AL.parquet</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">├── nonprofits_organizations_AK.parquet</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">├── nonprofits_locations_AL.parquet</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">├── jurisdictions_cities_AL.parquet</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">├── jurisdictions_counties_AL.parquet</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">├── jurisdictions_school_districts_AL.parquet</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">└── ... (388 total files)</span><br/></div></code></pre></div></div>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=files-that-were-split>Files That Were Split<a href=#files-that-were-split class=hash-link aria-label="Direct link to Files That Were Split" title="Direct link to Files That Were Split" translate=no></a></h2>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=nonprofit-data-62-statesterritories-each>Nonprofit Data (62 states/territories each)<a href=#nonprofit-data-62-statesterritories-each class=hash-link aria-label="Direct link to Nonprofit Data (62 states/territories each)" title="Direct link to Nonprofit Data (62 states/territories each)" translate=no></a></h3>
<ul>
<li class=""><code>nonprofits_organizations_*.parquet</code> - Organization details</li>
<li class=""><code>nonprofits_locations_*.parquet</code> - Geographic locations</li>
</ul>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=jurisdiction-data-52-states-each>Jurisdiction Data (52 states each)<a href=#jurisdiction-data-52-states-each class=hash-link aria-label="Direct link to Jurisdiction Data (52 states each)" title="Direct link to Jurisdiction Data (52 states each)" translate=no></a></h3>
<ul>
<li class=""><code>jurisdictions_cities_*.parquet</code> - Cities and municipalities</li>
<li class=""><code>jurisdictions_counties_*.parquet</code> - Counties</li>
<li class=""><code>jurisdictions_school_districts_*.parquet</code> - School districts</li>
<li class=""><code>jurisdictions_townships_*.parquet</code> - Townships</li>
</ul>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=other-data-56-states-each>Other Data (56 states each)<a href=#other-data-56-states-each class=hash-link aria-label="Direct link to Other Data (56 states each)" title="Direct link to Other Data (56 states each)" translate=no></a></h3>
<ul>
<li class=""><code>domains_gsa_domains_*.parquet</code> - Government domains</li>
</ul>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=usage>Usage<a href=#usage class=hash-link aria-label="Direct link to Usage" title="Direct link to Usage" translate=no></a></h2>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=load-alabama-nonprofits>Load Alabama Nonprofits<a href=#load-alabama-nonprofits class=hash-link aria-label="Direct link to Load Alabama Nonprofits" title="Direct link to Load Alabama Nonprofits" translate=no></a></h3>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> pandas </span><span class="token keyword" style=color:#00009f>as</span><span class="token plain"> pd</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Load only Alabama data</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">df </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> pd</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">read_parquet</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'data/gold/by_state/nonprofits_organizations_AL.parquet'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"Alabama nonprofits: </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation builtin">len</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>(</span><span class="token string-interpolation interpolation">df</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>)</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>:</span><span class="token string-interpolation interpolation format-spec">,</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>"</span><span class="token punctuation" style=color:#393A34>)</span><br/></div></code></pre></div></div>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=load-multiple-states>Load Multiple States<a href=#load-multiple-states class=hash-link aria-label="Direct link to Load Multiple States" title="Direct link to Load Multiple States" translate=no></a></h3>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> pandas </span><span class="token keyword" style=color:#00009f>as</span><span class="token plain"> pd</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> pathlib </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> Path</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Load all southeastern states</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">states </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'AL'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'GA'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'FL'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'MS'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'TN'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'SC'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'NC'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">dfs </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> state </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> states</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">    path </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token string-interpolation string" style=color:#e3116c>f'data/gold/by_state/nonprofits_organizations_</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation">state</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>.parquet'</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">    df </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> pd</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">read_parquet</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">path</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">    dfs</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">append</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">df</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Combine into one DataFrame</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">southeast </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> pd</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">concat</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">dfs</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> ignore_index</span><span class="token operator" style=color:#393A34>=</span><span class="token boolean" style=color:#36acaa>True</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"Southeast nonprofits: </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation builtin">len</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>(</span><span class="token string-interpolation interpolation">southeast</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>)</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>:</span><span class="token string-interpolation interpolation format-spec">,</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>"</span><span class="token punctuation" style=color:#393A34>)</span><br/></div></code></pre></div></div>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=recreate-full-dataset>Recreate Full Dataset<a href=#recreate-full-dataset class=hash-link aria-label="Direct link to Recreate Full Dataset" title="Direct link to Recreate Full Dataset" translate=no></a></h3>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> pandas </span><span class="token keyword" style=color:#00009f>as</span><span class="token plain"> pd</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> pathlib </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> Path</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Load all nonprofit organization files</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">files </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> Path</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'data/gold/by_state'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">glob</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'nonprofits_organizations_*.parquet'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">dfs </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">pd</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">read_parquet</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">f</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> f </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> files</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Combine</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">full_dataset </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> pd</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">concat</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">dfs</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> ignore_index</span><span class="token operator" style=color:#393A34>=</span><span class="token boolean" style=color:#36acaa>True</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"All nonprofits: </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation builtin">len</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>(</span><span class="token string-interpolation interpolation">full_dataset</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>)</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>:</span><span class="token string-interpolation interpolation format-spec">,</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>"</span><span class="token punctuation" style=color:#393A34>)</span><br/></div></code></pre></div></div>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=managing-state-splits>Managing State Splits<a href=#managing-state-splits class=hash-link aria-label="Direct link to Managing State Splits" title="Direct link to Managing State Splits" translate=no></a></h2>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=createupdate-state-splits>Create/Update State Splits<a href=#createupdate-state-splits class=hash-link aria-label="Direct link to Create/Update State Splits" title="Direct link to Create/Update State Splits" translate=no></a></h3>
<div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain"># Split all files by state</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">python scripts/split_gold_by_state.py --all</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Split specific file</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">python scripts/split_gold_by_state.py --file nonprofits_organizations.parquet</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Dry run (see what would happen)</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">python scripts/split_gold_by_state.py --all --dry-run</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># View statistics</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">python scripts/split_gold_by_state.py --stats</span><br/></div></code></pre></div></div>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=upload-to-huggingface>Upload to HuggingFace<a href=#upload-to-huggingface class=hash-link aria-label="Direct link to Upload to HuggingFace" title="Direct link to Upload to HuggingFace" translate=no></a></h3>
<p>Upload state-specific datasets to HuggingFace for public access:</p>
<div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain"># Upload all states</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">python scripts/upload_state_splits_to_hf.py --all</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Upload Alabama only</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">python scripts/upload_state_splits_to_hf.py --state AL</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Upload multiple states</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">python scripts/upload_state_splits_to_hf.py --states AL AK AZ CA</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Dry run</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">python scripts/upload_state_splits_to_hf.py --all --dry-run</span><br/></div></code></pre></div></div>
<p>This creates state-specific repos on HuggingFace:</p>
<ul>
<li class=""><code>CommunityOne/one-data-AL</code> - All Alabama data</li>
<li class=""><code>CommunityOne/one-data-CA</code> - All California data</li>
<li class=""><code>CommunityOne/one-data-TX</code> - All Texas data</li>
</ul>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=statistics>Statistics<a href=#statistics class=hash-link aria-label="Direct link to Statistics" title="Direct link to Statistics" translate=no></a></h2>
<p><strong>Total State-Split Files</strong>: 388 files<br/>
<strong>Total Size</strong>: 172 MB<br/>
<strong>States/Territories</strong>: 62 (all US states, DC, territories, military addresses)</p>
<p><strong>File Breakdown</strong>:</p>
<ul>
<li class="">62 nonprofit organization files</li>
<li class="">62 nonprofit location files</li>
<li class="">56 government domain files</li>
<li class="">52 jurisdiction city files</li>
<li class="">52 jurisdiction county files</li>
<li class="">52 jurisdiction school district files</li>
<li class="">52 jurisdiction township files</li>
</ul>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=notes>Notes<a href=#notes class=hash-link aria-label="Direct link to Notes" title="Direct link to Notes" translate=no></a></h2>
<ul>
<li class="">Original monolithic files are still in <code>data/gold/</code> for backward compatibility</li>
<li class="">State-split files use standard 2-letter state codes (AL, AK, AZ, etc.)</li>
<li class="">Includes US territories: PR, VI, GU, AS, MP</li>
<li class="">Includes military addresses: AA, AE, AP</li>
<li class="">Some files have fewer states if no data exists for that state</li>
</ul></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="row margin-top--sm theme-doc-footer-edit-meta-row"><div class="col noPrint_WFHX"><a href=https://github.com/getcommunityone/open-navigator-for-engagement/tree/main/website/docs/guides/state-split-data.md target=_blank rel="noopener noreferrer" class=theme-edit-this-page><svg fill=currentColor height=20 width=20 viewBox="0 0 40 40" class=iconEdit_Z9Sw aria-hidden=true><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"/></g></svg>Edit this page</a></div><div class="col lastUpdated_JAkA"></div></div></footer></article><nav class="docusaurus-mt-lg pagination-nav" aria-label="Docs pages"></nav></div></div><div class="col col--3"><div class="tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href=#what-changed class="table-of-contents__link toc-highlight">What Changed</a><li><a href=#benefits class="table-of-contents__link toc-highlight">Benefits</a><li><a href=#file-structure class="table-of-contents__link toc-highlight">File Structure</a><li><a href=#files-that-were-split class="table-of-contents__link toc-highlight">Files That Were Split</a><ul><li><a href=#nonprofit-data-62-statesterritories-each class="table-of-contents__link toc-highlight">Nonprofit Data (62 states/territories each)</a><li><a href=#jurisdiction-data-52-states-each class="table-of-contents__link toc-highlight">Jurisdiction Data (52 states each)</a><li><a href=#other-data-56-states-each class="table-of-contents__link toc-highlight">Other Data (56 states each)</a></ul><li><a href=#usage class="table-of-contents__link toc-highlight">Usage</a><ul><li><a href=#load-alabama-nonprofits class="table-of-contents__link toc-highlight">Load Alabama Nonprofits</a><li><a href=#load-multiple-states class="table-of-contents__link toc-highlight">Load Multiple States</a><li><a href=#recreate-full-dataset class="table-of-contents__link toc-highlight">Recreate Full Dataset</a></ul><li><a href=#managing-state-splits class="table-of-contents__link toc-highlight">Managing State Splits</a><ul><li><a href=#createupdate-state-splits class="table-of-contents__link toc-highlight">Create/Update State Splits</a><li><a href=#upload-to-huggingface class="table-of-contents__link toc-highlight">Upload to HuggingFace</a></ul><li><a href=#statistics class="table-of-contents__link toc-highlight">Statistics</a><li><a href=#notes class="table-of-contents__link toc-highlight">Notes</a></ul></div></div></div></div></main></div></div></div><footer class="theme-layout-footer footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="theme-layout-footer-column col footer__col"><div class=footer__title>Documentation</div><ul class="footer__items clean-list"><li class=footer__item><a class=footer__link-item href=/docs/intro>Getting Started</a><li class=footer__item><a class=footer__link-item href=/docs/data-sources/citations>Citations & Data Sources</a><li class=footer__item><a class=footer__link-item href=/docs/data-sources/overview>Data Sources</a><li class=footer__item><a class=footer__link-item href=/docs/for-developers>For Developers</a></ul></div><div class="theme-layout-footer-column col footer__col"><div class=footer__title>Resources</div><ul class="footer__items clean-list"><li class=footer__item><a href=https://www.communityone.com target=_blank rel="noopener noreferrer" class=footer__link-item>Launch Open Navigator<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://github.com/getcommunityone/open-navigator-for-engagement target=_blank rel="noopener noreferrer" class=footer__link-item>GitHub<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://www.groundvue.org/ target=_blank rel="noopener noreferrer" class=footer__link-item>GroundVue (Partner)<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a></ul></div><div class="theme-layout-footer-column col footer__col"><div class=footer__title>Community</div><ul class="footer__items clean-list"><li class=footer__item><a href=https://www.instagram.com/getcommunityone/ target=_blank rel="noopener noreferrer" class=footer__link-item>Instagram<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://www.facebook.com/getcommunityone target=_blank rel="noopener noreferrer" class=footer__link-item>Facebook<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://x.com/getcommunityone/ target=_blank rel="noopener noreferrer" class=footer__link-item>X (Twitter)<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://www.linkedin.com/company/getcommunityone target=_blank rel="noopener noreferrer" class=footer__link-item>LinkedIn<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://www.youtube.com/@getcommunityone target=_blank rel="noopener noreferrer" class=footer__link-item>YouTube<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://discord.gg/uH6Dytek target=_blank rel="noopener noreferrer" class=footer__link-item>Discord<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a></ul></div><div class="theme-layout-footer-column col footer__col"><div class=footer__title>Legal</div><ul class="footer__items clean-list"><li class=footer__item><a class=footer__link-item href=/docs/legal/privacy-policy>Privacy Policy</a><li class=footer__item><a class=footer__link-item href=/docs/legal/terms-of-service>Terms of Service</a><li class=footer__item><a class=footer__link-item href=/docs/legal/data-provider-terms>Data Provider Terms</a></ul></div><div class="theme-layout-footer-column col footer__col"><div class=footer__title>More</div><ul class="footer__items clean-list"><li class=footer__item><a class=footer__link-item href=/blog>Blog</a><li class=footer__item><a href=https://github.com/getcommunityone/open-navigator-for-engagement/blob/main/LICENSE target=_blank rel="noopener noreferrer" class=footer__link-item>License (MIT)<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a></ul></div></div><div class="footer__bottom text--center"><div class=footer__copyright>Copyright © 2026 Community One. Built with Docusaurus.</div></div></div></footer></div></body>