Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
| <html lang=en dir=ltr class="docs-wrapper plugin-docs plugin-id-default docs-version-current docs-doc-page docs-doc-id-data-sources/form-990-xml" data-has-hydrated=false><head><meta charset=UTF-8><meta name=generator content="Docusaurus v3.10.0"><title data-rh=true>Form 990 XML Data (GivingTuesday Data Lake) | Open Navigator</title><meta data-rh=true name=viewport content="width=device-width, initial-scale=1.0"/><meta data-rh=true property=og:image content=https://www.communityone.com/img/docusaurus-social-card.jpg /><meta data-rh=true name=twitter:image content=https://www.communityone.com/img/docusaurus-social-card.jpg /><meta data-rh=true property=og:url content=https://www.communityone.com/docs/data-sources/form-990-xml /><meta data-rh=true property=og:locale content=en /><meta data-rh=true name=docusaurus_locale content=en /><meta data-rh=true name=docsearch:language content=en /><meta data-rh=true name=keywords content="civic engagement, policy tracking, meeting minutes, nonprofit tracking, municipal government, advocacy, open data, local government"/><meta data-rh=true property=og:type content=website /><meta data-rh=true property=og:site_name content="Open Navigator"/><meta data-rh=true name=twitter:card content=summary_large_image /><meta data-rh=true name=docusaurus_version content=current /><meta data-rh=true name=docusaurus_tag content=docs-default-current /><meta data-rh=true name=docsearch:version content=current /><meta data-rh=true name=docsearch:docusaurus_tag content=docs-default-current /><meta data-rh=true property=og:title content="Form 990 XML Data (GivingTuesday Data Lake) | Open Navigator"/><meta data-rh=true name=description content="Extract detailed financial data from IRS Form 990 XML filings using GivingTuesday's 990 Data Infrastructure."/><meta data-rh=true property=og:description content="Extract detailed financial data from IRS Form 990 XML filings using GivingTuesday's 990 Data Infrastructure."/><link data-rh=true rel=icon href=/img/favicon.ico /><link data-rh=true rel=canonical href=https://www.communityone.com/docs/data-sources/form-990-xml /><link data-rh=true rel=alternate href=https://www.communityone.com/docs/data-sources/form-990-xml hreflang=en /><link data-rh=true rel=alternate href=https://www.communityone.com/docs/data-sources/form-990-xml hreflang=x-default /><link rel=alternate type=application/rss+xml href=/blog/rss.xml title="Open Navigator RSS Feed"><link rel=alternate type=application/atom+xml href=/blog/atom.xml title="Open Navigator Atom Feed"><link rel=preconnect href=https://www.google-analytics.com><link rel=preconnect href=https://www.googletagmanager.com><script async src="https://www.googletagmanager.com/gtag/js?id=G-5EQV815915"></script><script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","G-5EQV815915",{anonymize_ip:!0})</script><link rel=stylesheet href=/assets/css/styles.c89d6b2d.css /><script src=/assets/js/runtime~main.c8fa085e.js defer></script><script src=/assets/js/main.6e24e536.js defer></script></head><body><svg style="display: none;"><defs> | |
| <symbol id=theme-svg-external-link viewBox="0 0 24 24"><path fill=currentColor d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"/></symbol> | |
| </defs></svg> | |
| <script>!function(){var t=function(){try{return new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}}()||function(){try{return window.localStorage.getItem("theme-7e9")}catch(t){}}();document.documentElement.setAttribute("data-theme",t||(window.matchMedia("(prefers-color-scheme: dark)").matches?"dark":"light")),document.documentElement.setAttribute("data-theme-choice",t||"system")}(),function(){try{for(var[t,e]of new URLSearchParams(window.location.search).entries())if(t.startsWith("docusaurus-data-")){var a=t.replace("docusaurus-data-","data-");document.documentElement.setAttribute(a,e)}}catch(t){}}()</script><div id=__docusaurus><link rel=preload as=image href=/img/communityone_logo.svg /><script type=application/ld+json>{"@context":"https://schema.org","@type":"Organization","address":{"@type":"PostalAddress","addressCountry":"US","addressLocality":"Tuscaloosa","addressRegion":"AL","postalCode":"35406","streetAddress":"5617 Lakeridge Court"},"contactPoint":{"@type":"ContactPoint","availableLanguage":["English"],"contactType":"Customer Service","email":"johnbowyer@communityone.com"},"description":"Track 90,000+ jurisdictions, 1.8M nonprofits, and analyze meeting minutes with AI. The open path to everything local.","email":"johnbowyer@communityone.com","legalName":"CommunityOne","logo":"https://www.communityone.com/img/communityone_logo.svg","name":"CommunityOne","sameAs":["https://www.facebook.com/communityone","https://www.instagram.com/communityone","https://twitter.com/communityone","https://www.linkedin.com/company/communityone","https://www.youtube.com/@communityone","https://discord.gg/communityone","https://github.com/getcommunityone/open-navigator"],"url":"https://www.communityone.com"}</script><script type=application/ld+json>{"@context":"https://schema.org","@type":"WebSite","alternateName":"CommunityOne Open Navigator","description":"AI-powered civic engagement platform tracking jurisdictions, nonprofits, and government meetings","name":"Open Navigator","potentialAction":{"@type":"SearchAction","query-input":"required name=search_term_string","target":{"@type":"EntryPoint","urlTemplate":"https://www.communityone.com/search?q={search_term_string}"}},"url":"https://www.communityone.com"}</script><script type=application/ld+json>{"@context":"https://schema.org","@type":"SoftwareApplication","aggregateRating":{"@type":"AggregateRating","ratingCount":"1","ratingValue":"5"},"applicationCategory":"BusinessApplication","description":"Track 90,000+ jurisdictions, 1.8M nonprofits, and analyze meeting minutes with AI","featureList":["Track 90,000+ jurisdictions","Monitor 1.8M nonprofits","Analyze meeting minutes","Legislative bill tracking","Campaign finance data"],"name":"Open Navigator","offers":{"@type":"Offer","price":"0","priceCurrency":"USD"},"operatingSystem":"Web","screenshot":"https://www.communityone.com/img/docusaurus-social-card.jpg","softwareVersion":"1.0.0"}</script><div role=region aria-label="Skip to main content"><a class=skipToContent_fXgn href=#__docusaurus_skipToContent_fallback>Skip to main content</a></div><nav aria-label=Main class="theme-layout-navbar navbar navbar--fixed-top"><div class=navbar__inner><div class="theme-layout-navbar-left navbar__items"><button aria-label="Toggle navigation bar" aria-expanded=false class="navbar__toggle clean-btn" type=button><svg width=30 height=30 viewBox="0 0 30 30" aria-hidden=true><path stroke=currentColor stroke-linecap=round stroke-miterlimit=10 stroke-width=2 d="M4 7h22M4 15h22M4 23h22"/></svg></button><a href=https://www.communityone.com target=_self rel="noopener noreferrer" class=navbar__brand><div class=navbar__logo><img src=/img/communityone_logo.svg alt="CommunityOne Logo" class="themedComponent_mlkZ themedComponent--light_NVdE"/><img src=/img/communityone_logo.svg alt="CommunityOne Logo" class="themedComponent_mlkZ themedComponent--dark_xIcU"/></div><b class="navbar__title text--truncate">Open Navigator Home</b></a><a class="navbar__item navbar__link" href=/docs/intro>Getting Started</a><a class="navbar__item navbar__link" href=/docs/for-families>Families & Individuals</a><a class="navbar__item navbar__link" href=/docs/for-advocates>Policy Makers</a><a class="navbar__item navbar__link" href=/docs/for-developers>Developers</a><a class="navbar__item navbar__link" href=/docs/data-sources/citations>Data and Terms</a><a class="navbar__item navbar__link" href=/blog>Blog</a></div><div class="theme-layout-navbar-right navbar__items navbar__items--right"><a href=https://github.com/getcommunityone/open-navigator-for-engagement target=_blank rel="noopener noreferrer" class="navbar__item navbar__link">GitHub<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><div class="toggle_vylO colorModeToggle_DEke"><button class="clean-btn toggleButton_gllP toggleButtonDisabled_aARS" type=button disabled title="system mode" aria-label="Switch between dark and light mode (currently system mode)"><svg viewBox="0 0 24 24" width=24 height=24 aria-hidden=true class="toggleIcon_g3eP lightToggleIcon_pyhR"><path fill=currentColor d="M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"/></svg><svg viewBox="0 0 24 24" width=24 height=24 aria-hidden=true class="toggleIcon_g3eP darkToggleIcon_wfgR"><path fill=currentColor d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"/></svg><svg viewBox="0 0 24 24" width=24 height=24 aria-hidden=true class="toggleIcon_g3eP systemToggleIcon_QzmC"><path fill=currentColor d="m12 21c4.971 0 9-4.029 9-9s-4.029-9-9-9-9 4.029-9 9 4.029 9 9 9zm4.95-13.95c1.313 1.313 2.05 3.093 2.05 4.95s-0.738 3.637-2.05 4.95c-1.313 1.313-3.093 2.05-4.95 2.05v-14c1.857 0 3.637 0.737 4.95 2.05z"/></svg></button></div><div class=navbarSearchContainer_Bca1></div></div></div><div role=presentation class=navbar-sidebar__backdrop></div></nav><div id=__docusaurus_skipToContent_fallback class="theme-layout-main main-wrapper mainWrapper_z2l0"><div class=docsWrapper_hBAB><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type=button></button><div class=docRoot_UBD9><main class="docMainContainer_TBSr docMainContainerEnhanced_lQrH"><div class="container padding-top--md padding-bottom--lg"><div class=row><div class="col docItemCol_VOVn"><div class=docItemContainer_Djhp><article><div class="tocCollapsible_ETCw theme-doc-toc-mobile tocMobile_ITEo"><button type=button class="clean-btn tocCollapsibleButton_TO0P">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Form 990 XML Data (GivingTuesday Data Lake)</h1></header> | |
| <p>Extract detailed financial data from IRS Form 990 XML filings using <a href=https://990data.givingtuesday.org/ target=_blank rel="noopener noreferrer" class="">GivingTuesday's 990 Data Infrastructure</a>.</p> | |
| <h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-overview>π― Overview<a href=#-overview class=hash-link aria-label="Direct link to π― Overview" title="Direct link to π― Overview" translate=no>β</a></h2> | |
| <p><strong>Current data:</strong> IRS EO-BMF CSV files (basic info - name, EIN, address, NTEE code)<br/> | |
| <strong>Enhancement:</strong> Form 990 XML filings from GivingTuesday Data Lake (detailed financials - revenue, expenses, programs, grants)</p> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=what-is-the-givingtuesday-990-data-lake>What is the GivingTuesday 990 Data Lake?<a href=#what-is-the-givingtuesday-990-data-lake class=hash-link aria-label="Direct link to What is the GivingTuesday 990 Data Lake?" title="Direct link to What is the GivingTuesday 990 Data Lake?" translate=no>β</a></h3> | |
| <p>The <a href=https://990data.givingtuesday.org/ target=_blank rel="noopener noreferrer" class="">990 Data Infrastructure</a> is a collaborative data lake of clean, standardized 990 data in XML format maintained by GivingTuesday. This is the rawest form of 990 data in GivingTuesday's infrastructure.</p> | |
| <p><strong>Data Lake Structure:</strong></p> | |
| <ul> | |
| <li class=""><strong>Bucket</strong>: <code>gt990datalake-rawdata</code> (AWS S3, us-east-1 Virginia)</li> | |
| <li class=""><strong>Access</strong>: Public, no AWS credentials required (<code>--no-sign-request</code>)</li> | |
| <li class=""><strong>E-filed 990s</strong>: <code>EfileData/XmlFiles/</code> (individual XML returns)</li> | |
| <li class=""><strong>Indices</strong>: <code>Indices/990xmls/</code> (CSV files listing all available 990s)</li> | |
| </ul> | |
| <p><strong>Console Access</strong>: <a href=https://us-east-1.console.aws.amazon.com/s3/buckets/gt990datalake-rawdata target=_blank rel="noopener noreferrer" class="">https://us-east-1.console.aws.amazon.com/s3/buckets/gt990datalake-rawdata</a></p> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=whats-the-difference>What's the Difference?<a href=#whats-the-difference class=hash-link aria-label="Direct link to What's the Difference?" title="Direct link to What's the Difference?" translate=no>β</a></h3> | |
| <table><thead><tr><th>Data Source<th>Type<th>Records<th>Data Richness<th>Access Method<th>Best For<tbody><tr><td><strong>EO-BMF CSV</strong> β Currently using<td>Basic registry<td>1.9M+<td>β Low<td>Direct download<td>Initial org list<tr><td><strong>Google BigQuery</strong> β‘ Recommended<td>SQL queries<td>5M+<td>βββββ High<td>SQL (serverless)<td><strong>Bulk mission/website extraction</strong><tr><td><strong>GivingTuesday Data Lake</strong> π Advanced<td>XML files<td>5.4M+<td>βββββ Very High<td>S3 download<td>Detailed parsing, custom fields</table> | |
| <h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-what-additional-data-you-can-get>π What Additional Data You Can Get<a href=#-what-additional-data-you-can-get class=hash-link aria-label="Direct link to π What Additional Data You Can Get" title="Direct link to π What Additional Data You Can Get" translate=no>β</a></h2> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=from-form-990-xml>From Form 990 XML:<a href=#from-form-990-xml class=hash-link aria-label="Direct link to From Form 990 XML:" title="Direct link to From Form 990 XML:" translate=no>β</a></h3> | |
| <ul> | |
| <li class=""><strong>Financials</strong>: Total revenue, program revenue, contributions, grants, investment income</li> | |
| <li class=""><strong>Expenses</strong>: Total expenses, program expenses, administrative, fundraising</li> | |
| <li class=""><strong>Assets</strong>: Total assets, liabilities, net assets</li> | |
| <li class=""><strong>Programs</strong>: Program service descriptions, accomplishments, expenses per program</li> | |
| <li class=""><strong>Governance</strong>: Board members, officer compensation, key employees</li> | |
| <li class=""><strong>Grants</strong>: Grants awarded, grant recipients</li> | |
| <li class=""><strong>Mission</strong>: Detailed mission statement and program descriptions</li> | |
| <li class=""><strong>Activities</strong>: Legislative activities, political expenditures, lobbying</li> | |
| </ul> | |
| <p><strong>Example:</strong> Instead of just knowing "Alabama Oral Health Foundation exists," you get:</p> | |
| <ul> | |
| <li class="">Revenue: $2.5M</li> | |
| <li class="">Program expenses: $1.8M</li> | |
| <li class="">Grants awarded: $500K to 10 community health centers</li> | |
| <li class="">Mission: "Improve oral health access in underserved communities"</li> | |
| <li class="">Officers: CEO Sarah Johnson ($150K salary)</li> | |
| <li class="">Website: <a href=https://alabamaoralhealth.org target=_blank rel="noopener noreferrer" class="">https://alabamaoralhealth.org</a></li> | |
| </ul> | |
| <hr/> | |
| <h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-google-bigquery-recommended-for-bulk-queries>β‘ Google BigQuery (Recommended for Bulk Queries)<a href=#-google-bigquery-recommended-for-bulk-queries class=hash-link aria-label="Direct link to β‘ Google BigQuery (Recommended for Bulk Queries)" title="Direct link to β‘ Google BigQuery (Recommended for Bulk Queries)" translate=no>β</a></h2> | |
| <p><strong>Fastest way to enrich 1M+ organizations with missions and websites!</strong></p> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=why-bigquery>Why BigQuery?<a href=#why-bigquery class=hash-link aria-label="Direct link to Why BigQuery?" title="Direct link to Why BigQuery?" translate=no>β</a></h3> | |
| <p>Google Cloud hosts the complete IRS 990 dataset in BigQuery - a serverless SQL database that lets you query <strong>5 million Form 990s in seconds</strong> without downloading any files.</p> | |
| <p><strong>Key advantages:</strong></p> | |
| <ul> | |
| <li class="">β <strong>No downloads</strong>: Query directly in the cloud</li> | |
| <li class="">β <strong>Blazing fast</strong>: Bulk queries complete in <30 seconds</li> | |
| <li class="">β <strong>Free tier</strong>: First 1 TB/month is free (enough for most research)</li> | |
| <li class="">β <strong>SQL interface</strong>: Easy to extract specific fields</li> | |
| <li class="">β <strong>No infrastructure</strong>: Serverless, nothing to manage</li> | |
| </ul> | |
| <p><strong>Cost:</strong> Form 990 text fields are small - you can query <strong>all 1.9M nonprofits for ~$0</strong> using the free tier.</p> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=quick-start>Quick Start<a href=#quick-start class=hash-link aria-label="Direct link to Quick Start" title="Direct link to Quick Start" translate=no>β</a></h3> | |
| <p><strong>1. Set up Google Cloud (one-time)</strong></p> | |
| <div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain"># Install Google Cloud SDK</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Visit: https://cloud.google.com/sdk/docs/install</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Authenticate</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">gcloud auth login</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Set project</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">gcloud config set project YOUR_PROJECT_ID</span><br/></div></code></pre></div></div> | |
| <p><strong>2. Extract mission statements & websites for all Alabama health orgs</strong></p> | |
| <div class="language-sql codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-sql codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic>-- Query in BigQuery Console or via bq CLI</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>SELECT</span><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> ein</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> organization_name</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> website_address_txt </span><span class="token keyword" style=color:#00009f>AS</span><span class="token plain"> website</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> activity_or_mission_desc </span><span class="token keyword" style=color:#00009f>AS</span><span class="token plain"> mission</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> total_revenue_current_year </span><span class="token keyword" style=color:#00009f>AS</span><span class="token plain"> revenue</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> total_expenses_current_year </span><span class="token keyword" style=color:#00009f>AS</span><span class="token plain"> expenses</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> tax_period</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>FROM</span><span class="token plain"> </span><span class="token identifier punctuation" style=color:#393A34>`</span><span class="token identifier">bigquery-public-data.irs_990.irs_990_2023</span><span class="token identifier punctuation" style=color:#393A34>`</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>WHERE</span><span class="token plain"> state </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'AL'</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token operator" style=color:#393A34>AND</span><span class="token plain"> ntee_code </span><span class="token operator" style=color:#393A34>LIKE</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'E%'</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token operator" style=color:#393A34>AND</span><span class="token plain"> activity_or_mission_desc </span><span class="token operator" style=color:#393A34>IS</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>NOT</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>NULL</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>ORDER</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>BY</span><span class="token plain"> total_revenue_current_year </span><span class="token keyword" style=color:#00009f>DESC</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>LIMIT</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>10000</span><span class="token punctuation" style=color:#393A34>;</span><br/></div></code></pre></div></div> | |
| <p><strong>3. Run query from Python</strong></p> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> google</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">cloud </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> bigquery</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> pandas </span><span class="token keyword" style=color:#00009f>as</span><span class="token plain"> pd</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Initialize BigQuery client</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">client </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> bigquery</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">Client</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Query for Alabama + Michigan health nonprofits with missions</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">query </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c>SELECT </span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> ein,</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> organization_name,</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> website_address_txt AS website,</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> activity_or_mission_desc AS mission,</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> total_revenue_current_year AS revenue,</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> total_expenses_current_year AS expenses,</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> total_assets_eoy AS assets,</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> state,</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> tax_period</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c>FROM `bigquery-public-data.irs_990.irs_990_2023`</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c>WHERE state IN ('AL', 'MI')</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> AND ntee_code LIKE 'E%'</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> AND total_revenue_current_year > 0</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> AND activity_or_mission_desc IS NOT NULL</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c>"""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Execute query and load to DataFrame</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">df </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> client</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">query</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">query</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">to_dataframe</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"Retrieved </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation builtin">len</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>(</span><span class="token string-interpolation interpolation">df</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>)</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>:</span><span class="token string-interpolation interpolation format-spec">,</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c> organizations with missions"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Clean XML tags from mission text</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> re</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">df</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'mission_clean'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> df</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'mission'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>.</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">replace</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>r'<[^>]+>'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>''</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> regex</span><span class="token operator" style=color:#393A34>=</span><span class="token boolean" style=color:#36acaa>True</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>.</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">strip</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Save locally</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">df</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">to_parquet</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'data/gold/nonprofits_990_bigquery.parquet'</span><span class="token punctuation" style=color:#393A34>)</span><br/></div></code></pre></div></div> | |
| <p><strong>4. Merge with existing nonprofit data</strong></p> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> pandas </span><span class="token keyword" style=color:#00009f>as</span><span class="token plain"> pd</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Load your existing nonprofit data (from IRS EO-BMF)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">orgs </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> pd</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">read_parquet</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'data/gold/nonprofits_organizations.parquet'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"Existing orgs: </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation builtin">len</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>(</span><span class="token string-interpolation interpolation">orgs</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>)</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>:</span><span class="token string-interpolation interpolation format-spec">,</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Load BigQuery results with missions & websites</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">bq_data </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> pd</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">read_parquet</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'data/gold/nonprofits_990_bigquery.parquet'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"BigQuery results: </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation builtin">len</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>(</span><span class="token string-interpolation interpolation">bq_data</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>)</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>:</span><span class="token string-interpolation interpolation format-spec">,</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Merge on EIN</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">enriched </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> orgs</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">merge</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> bq_data</span><span class="token punctuation" style=color:#393A34>[</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'ein'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'mission_clean'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'website'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'revenue'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'expenses'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'assets'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> on</span><span class="token operator" style=color:#393A34>=</span><span class="token string" style=color:#e3116c>'ein'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> how</span><span class="token operator" style=color:#393A34>=</span><span class="token string" style=color:#e3116c>'left'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> suffixes</span><span class="token operator" style=color:#393A34>=</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>''</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'_990'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Fill missing data from 990 fields</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>if</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'mission'</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>not</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> enriched</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">columns</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> enriched</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'mission'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> enriched</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'mission_clean'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>if</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'website'</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>not</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> enriched</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">columns</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> enriched</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'website'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> enriched</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'website_990'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Show enrichment stats</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">missions_added </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> enriched</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'mission'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">notna</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>.</span><span class="token builtin">sum</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">websites_added </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> enriched</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'website'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">notna</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>.</span><span class="token builtin">sum</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"β Missions: </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation">missions_added</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>:</span><span class="token string-interpolation interpolation format-spec">,</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c> (</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation number" style=color:#36acaa>100</span><span class="token string-interpolation interpolation operator" style=color:#393A34>*</span><span class="token string-interpolation interpolation">missions_added</span><span class="token string-interpolation interpolation operator" style=color:#393A34>/</span><span class="token string-interpolation interpolation builtin">len</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>(</span><span class="token string-interpolation interpolation">enriched</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>)</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>:</span><span class="token string-interpolation interpolation format-spec">.1f</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>%)"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"β Websites: </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation">websites_added</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>:</span><span class="token string-interpolation interpolation format-spec">,</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c> (</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation number" style=color:#36acaa>100</span><span class="token string-interpolation interpolation operator" style=color:#393A34>*</span><span class="token string-interpolation interpolation">websites_added</span><span class="token string-interpolation interpolation operator" style=color:#393A34>/</span><span class="token string-interpolation interpolation builtin">len</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>(</span><span class="token string-interpolation interpolation">enriched</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>)</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>:</span><span class="token string-interpolation interpolation format-spec">.1f</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>%)"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Save enriched dataset</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">enriched</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">to_parquet</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'data/gold/nonprofits_enriched_bigquery.parquet'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"πΎ Saved </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation builtin">len</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>(</span><span class="token string-interpolation interpolation">enriched</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>)</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>:</span><span class="token string-interpolation interpolation format-spec">,</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c> enriched organizations"</span><span class="token punctuation" style=color:#393A34>)</span><br/></div></code></pre></div></div> | |
| <p><strong>Expected results:</strong></p> | |
| <ul> | |
| <li class="">30-50% of orgs will have missions (larger orgs file Form 990)</li> | |
| <li class="">20-40% will have websites listed</li> | |
| <li class="">100% will have EIN matching for revenue/expense data</li> | |
| </ul> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=bigquery-table-structure>BigQuery Table Structure<a href=#bigquery-table-structure class=hash-link aria-label="Direct link to BigQuery Table Structure" title="Direct link to BigQuery Table Structure" translate=no>β</a></h3> | |
| <p>The IRS 990 dataset is organized into <strong>multiple tables</strong> matching Form 990 schedules:</p> | |
| <h4 class="anchor anchorTargetStickyNavbar_Vzrq" id=master-index-table>Master Index Table<a href=#master-index-table class=hash-link aria-label="Direct link to Master Index Table" title="Direct link to Master Index Table" translate=no>β</a></h4> | |
| <p><strong><code>bigquery-public-data.irs_990.irs_990_index</code></strong></p> | |
| <ul> | |
| <li class="">Links all returns together</li> | |
| <li class="">Fields: <code>ein</code>, <code>organization_name</code>, <code>tax_period</code>, <code>return_id</code> (foreign key)</li> | |
| </ul> | |
| <h4 class="anchor anchorTargetStickyNavbar_Vzrq" id=main-return-tables-by-year>Main Return Tables (by year)<a href=#main-return-tables-by-year class=hash-link aria-label="Direct link to Main Return Tables (by year)" title="Direct link to Main Return Tables (by year)" translate=no>β</a></h4> | |
| <p><strong><code>bigquery-public-data.irs_990.irs_990_YYYY</code></strong> - Full Form 990</p> | |
| <ul> | |
| <li class=""><strong>Mission</strong>: <code>activity_or_mission_desc</code> β</li> | |
| <li class=""><strong>Website</strong>: <code>website_address_txt</code> β</li> | |
| <li class=""><strong>Financials</strong>: <code>total_revenue_current_year</code>, <code>total_expenses_current_year</code></li> | |
| <li class=""><strong>Assets</strong>: <code>total_assets_eoy</code>, <code>total_liabilities_eoy</code></li> | |
| <li class=""><strong>State/NTEE</strong>: <code>state</code>, <code>ntee_code</code></li> | |
| </ul> | |
| <p><strong><code>bigquery-public-data.irs_990.irs_990_ez_YYYY</code></strong> - Form 990-EZ (smaller orgs)</p> | |
| <ul> | |
| <li class=""><strong>Mission</strong>: <code>mission_description</code> β</li> | |
| <li class=""><strong>Website</strong>: <code>website_address_txt</code> β</li> | |
| <li class=""><strong>Financials</strong>: <code>total_revenue</code>, <code>total_expenses</code></li> | |
| </ul> | |
| <p><strong><code>bigquery-public-data.irs_990.irs_990_pf_YYYY</code></strong> - Form 990-PF (Private Foundations)</p> | |
| <ul> | |
| <li class=""><strong>Grants</strong>: Largest grants awarded (for grantmakers)</li> | |
| <li class=""><strong>Financials</strong>: Foundation-specific fields</li> | |
| </ul> | |
| <h4 class="anchor anchorTargetStickyNavbar_Vzrq" id=schedule-tables-detailed-information>Schedule Tables (Detailed Information)<a href=#schedule-tables-detailed-information class=hash-link aria-label="Direct link to Schedule Tables (Detailed Information)" title="Direct link to Schedule Tables (Detailed Information)" translate=no>β</a></h4> | |
| <p><strong><code>bigquery-public-data.irs_990.irs_990_schedule_a_YYYY</code></strong></p> | |
| <ul> | |
| <li class="">Public charity status and public support calculations</li> | |
| </ul> | |
| <p><strong><code>bigquery-public-data.irs_990.irs_990_schedule_j_YYYY</code></strong></p> | |
| <ul> | |
| <li class=""><strong>Executive compensation</strong> (CEO, CFO, board member salaries) π°</li> | |
| </ul> | |
| <p><strong><code>bigquery-public-data.irs_990.irs_990_schedule_r_YYYY</code></strong></p> | |
| <ul> | |
| <li class="">Related organizations and transactions</li> | |
| </ul> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=complete-field-mapping>Complete Field Mapping<a href=#complete-field-mapping class=hash-link aria-label="Direct link to Complete Field Mapping" title="Direct link to Complete Field Mapping" translate=no>β</a></h3> | |
| <table><thead><tr><th>Data Point<th>Table Name<th>Field Name<th>Notes<tbody><tr><td><strong>Organization Name</strong><td><code>irs_990_index</code><td><code>organization_name</code><td>Master list<tr><td><strong>EIN</strong> (Primary Key)<td>All tables<td><code>ein</code><td>9-digit ID<tr><td><strong>Mission (990-EZ)</strong><td><code>irs_990_ez_YYYY</code><td><code>mission_description</code><td>Smaller orgs<tr><td><strong>Mission (Full 990)</strong><td><code>irs_990_YYYY</code><td><code>activity_or_mission_desc</code><td>Larger orgs<tr><td><strong>Website URL</strong><td><code>irs_990_YYYY</code>, <code>irs_990_ez_YYYY</code><td><code>website_address_txt</code><td>Both forms<tr><td><strong>Total Revenue</strong><td><code>irs_990_YYYY</code><td><code>total_revenue_current_year</code><td>Annual revenue<tr><td><strong>Total Expenses</strong><td><code>irs_990_YYYY</code><td><code>total_expenses_current_year</code><td>Annual expenses<tr><td><strong>Program Expenses</strong><td><code>irs_990_YYYY</code><td><code>program_service_revenue</code><td>Program revenue<tr><td><strong>Assets</strong><td><code>irs_990_YYYY</code><td><code>total_assets_eoy</code><td>End of year<tr><td><strong>Liabilities</strong><td><code>irs_990_YYYY</code><td><code>total_liabilities_eoy</code><td>End of year<tr><td><strong>Executive Salaries</strong><td><code>irs_990_schedule_j_YYYY</code><td>Compensation fields<td>CEO, CFO pay<tr><td><strong>Grants Paid</strong><td><code>irs_990_pf_YYYY</code><td>Grant fields<td>For foundations<tr><td><strong>Tax Period</strong><td>All tables<td><code>tax_period</code><td>YYYYMMDD format<tr><td><strong>State</strong><td><code>irs_990_YYYY</code><td><code>state</code><td>2-letter code</table> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=query-examples-for-all-key-fields>Query Examples for All Key Fields<a href=#query-examples-for-all-key-fields class=hash-link aria-label="Direct link to Query Examples for All Key Fields" title="Direct link to Query Examples for All Key Fields" translate=no>β</a></h3> | |
| <p><strong>Extract mission, website, AND revenue from both 990 and 990-EZ:</strong></p> | |
| <div class="language-sql codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-sql codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic>-- Combine Full 990 and 990-EZ for complete coverage</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>WITH</span><span class="token plain"> full_990 </span><span class="token keyword" style=color:#00009f>AS</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>SELECT</span><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> ein</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> activity_or_mission_desc </span><span class="token keyword" style=color:#00009f>AS</span><span class="token plain"> mission</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> website_address_txt </span><span class="token keyword" style=color:#00009f>AS</span><span class="token plain"> website</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> total_revenue_current_year </span><span class="token keyword" style=color:#00009f>AS</span><span class="token plain"> revenue</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> total_expenses_current_year </span><span class="token keyword" style=color:#00009f>AS</span><span class="token plain"> expenses</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'990'</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>AS</span><span class="token plain"> form_type</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>FROM</span><span class="token plain"> </span><span class="token identifier punctuation" style=color:#393A34>`</span><span class="token identifier">bigquery-public-data.irs_990.irs_990_2023</span><span class="token identifier punctuation" style=color:#393A34>`</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>WHERE</span><span class="token plain"> state </span><span class="token operator" style=color:#393A34>IN</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'AL'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'MI'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token operator" style=color:#393A34>AND</span><span class="token plain"> activity_or_mission_desc </span><span class="token operator" style=color:#393A34>IS</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>NOT</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>NULL</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">ez_990 </span><span class="token keyword" style=color:#00009f>AS</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>SELECT</span><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> ein</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> mission_description </span><span class="token keyword" style=color:#00009f>AS</span><span class="token plain"> mission</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> website_address_txt </span><span class="token keyword" style=color:#00009f>AS</span><span class="token plain"> website</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> total_revenue </span><span class="token keyword" style=color:#00009f>AS</span><span class="token plain"> revenue</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> total_expenses </span><span class="token keyword" style=color:#00009f>AS</span><span class="token plain"> expenses</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'990-EZ'</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>AS</span><span class="token plain"> form_type</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>FROM</span><span class="token plain"> </span><span class="token identifier punctuation" style=color:#393A34>`</span><span class="token identifier">bigquery-public-data.irs_990.irs_990_ez_2023</span><span class="token identifier punctuation" style=color:#393A34>`</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>WHERE</span><span class="token plain"> state </span><span class="token operator" style=color:#393A34>IN</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'AL'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'MI'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token operator" style=color:#393A34>AND</span><span class="token plain"> mission_description </span><span class="token operator" style=color:#393A34>IS</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>NOT</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>NULL</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>SELECT</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>*</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>FROM</span><span class="token plain"> full_990</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>UNION</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>ALL</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>SELECT</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>*</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>FROM</span><span class="token plain"> ez_990</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>ORDER</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>BY</span><span class="token plain"> revenue </span><span class="token keyword" style=color:#00009f>DESC</span><span class="token punctuation" style=color:#393A34>;</span><br/></div></code></pre></div></div> | |
| <p><strong>Add executive compensation:</strong></p> | |
| <div class="language-sql codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-sql codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic>-- Get mission + CEO salary</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>SELECT</span><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> f</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">ein</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> f</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">activity_or_mission_desc </span><span class="token keyword" style=color:#00009f>AS</span><span class="token plain"> mission</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> f</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">website_address_txt </span><span class="token keyword" style=color:#00009f>AS</span><span class="token plain"> website</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> f</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">total_revenue_current_year </span><span class="token keyword" style=color:#00009f>AS</span><span class="token plain"> revenue</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> j</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">compensation_amount </span><span class="token keyword" style=color:#00009f>AS</span><span class="token plain"> ceo_compensation</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>FROM</span><span class="token plain"> </span><span class="token identifier punctuation" style=color:#393A34>`</span><span class="token identifier">bigquery-public-data.irs_990.irs_990_2023</span><span class="token identifier punctuation" style=color:#393A34>`</span><span class="token plain"> f</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>LEFT</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>JOIN</span><span class="token plain"> </span><span class="token identifier punctuation" style=color:#393A34>`</span><span class="token identifier">bigquery-public-data.irs_990.irs_990_schedule_j_2023</span><span class="token identifier punctuation" style=color:#393A34>`</span><span class="token plain"> j</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>ON</span><span class="token plain"> f</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">ein </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> j</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">ein</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>WHERE</span><span class="token plain"> f</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">state </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'AL'</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token operator" style=color:#393A34>AND</span><span class="token plain"> j</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">title_txt </span><span class="token operator" style=color:#393A34>LIKE</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'%CEO%'</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>OR</span><span class="token plain"> j</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">title_txt </span><span class="token operator" style=color:#393A34>LIKE</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'%President%'</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>ORDER</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>BY</span><span class="token plain"> revenue </span><span class="token keyword" style=color:#00009f>DESC</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>LIMIT</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>100</span><span class="token punctuation" style=color:#393A34>;</span><br/></div></code></pre></div></div> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=data-cleaning-tips>Data Cleaning Tips<a href=#data-cleaning-tips class=hash-link aria-label="Direct link to Data Cleaning Tips" title="Direct link to Data Cleaning Tips" translate=no>β</a></h3> | |
| <p><strong>The catch:</strong> Some fields have messy XML tags embedded (like <code><MissionDesc></code>). Clean with regex:</p> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> re</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> pandas </span><span class="token keyword" style=color:#00009f>as</span><span class="token plain"> pd</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Clean XML tags from missions</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">df</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'mission_clean'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> df</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'mission'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>.</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">replace</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>r'<[^>]+>'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>''</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> regex</span><span class="token operator" style=color:#393A34>=</span><span class="token boolean" style=color:#36acaa>True</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Trim whitespace</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">df</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'mission_clean'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> df</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'mission_clean'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>.</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">strip</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Remove common artifacts</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">df</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'mission_clean'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> df</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'mission_clean'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>.</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">replace</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>r'\s+'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>' '</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> regex</span><span class="token operator" style=color:#393A34>=</span><span class="token boolean" style=color:#36acaa>True</span><span class="token punctuation" style=color:#393A34>)</span><br/></div></code></pre></div></div> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=cost-estimation>Cost Estimation<a href=#cost-estimation class=hash-link aria-label="Direct link to Cost Estimation" title="Direct link to Cost Estimation" translate=no>β</a></h3> | |
| <p><strong>Free tier:</strong> 1 TB of queries per month (resets monthly)</p> | |
| <p><strong>Typical query costs:</strong></p> | |
| <ul> | |
| <li class="">Extract missions for 1M orgs: <strong>~50 GB scanned = FREE</strong></li> | |
| <li class="">Extract all fields for 1M orgs: <strong>~200 GB scanned = FREE</strong></li> | |
| <li class="">Full table scan of all years: <strong>~2 TB = $10</strong> (one-time cost)</li> | |
| </ul> | |
| <p><strong>Tip:</strong> Use <code>WHERE</code> clauses to filter by state/NTEE to reduce data scanned.</p> | |
| <hr/> | |
| <h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-givingtuesday-data-lake-for-advanced-parsing>π GivingTuesday Data Lake (For Advanced Parsing)<a href=#-givingtuesday-data-lake-for-advanced-parsing class=hash-link aria-label="Direct link to π GivingTuesday Data Lake (For Advanced Parsing)" title="Direct link to π GivingTuesday Data Lake (For Advanced Parsing)" translate=no>β</a></h2> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=option-1-via-aws-console-free-account>Option 1: Via AWS Console (Free Account)<a href=#option-1-via-aws-console-free-account class=hash-link aria-label="Direct link to Option 1: Via AWS Console (Free Account)" title="Direct link to Option 1: Via AWS Console (Free Account)" translate=no>β</a></h3> | |
| <ol> | |
| <li class="">Visit <a href=https://aws.amazon.com target=_blank rel="noopener noreferrer" class="">aws.amazon.com</a> and create a free AWS account (requires CC for validation, but no charges for accessing this data)</li> | |
| <li class="">Log in to AWS Console</li> | |
| <li class="">Open the data lake: <a href="https://s3.console.aws.amazon.com/s3/buckets/gt990datalake-rawdata/?region=us-east-1&tab=objects" target=_blank rel="noopener noreferrer" class="">https://s3.console.aws.amazon.com/s3/buckets/gt990datalake-rawdata/?region=us-east-1&tab=objects</a></li> | |
| </ol> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=option-2-via-command-line-recommended-for-automation>Option 2: Via Command Line (Recommended for Automation)<a href=#option-2-via-command-line-recommended-for-automation class=hash-link aria-label="Direct link to Option 2: Via Command Line (Recommended for Automation)" title="Direct link to Option 2: Via Command Line (Recommended for Automation)" translate=no>β</a></h3> | |
| <p><strong>Prerequisites</strong>: Install <a href=https://aws.amazon.com/cli/ target=_blank rel="noopener noreferrer" class="">AWS CLI</a></p> | |
| <div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain"># List main bucket contents</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">aws s3 ls gt990datalake-rawdata --no-sign-request</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># List indices (CSV files listing all 990s)</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">aws s3 ls gt990datalake-rawdata/Indices/990xmls/ --no-sign-request</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Download the latest index</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">aws s3 cp \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> s3://gt990datalake-rawdata/Indices/990xmls/index_all_years_efiledata_xmls_created_on_2023-10-29.csv \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> data/cache/form990_index.csv \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --no-sign-request</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Download a specific 990 XML</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">aws s3 cp \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> s3://gt990datalake-rawdata/EfileData/XmlFiles/[OBJECT_ID]_public.xml \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> data/cache/form_990_xml/ \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --no-sign-request</span><br/></div></code></pre></div></div> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=option-3-automated-python-integration>Option 3: Automated Python Integration<a href=#option-3-automated-python-integration class=hash-link aria-label="Direct link to Option 3: Automated Python Integration" title="Direct link to Option 3: Automated Python Integration" translate=no>β</a></h3> | |
| <p>Use our enrichment script that automates downloading and parsing:</p> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> boto3</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> botocore </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> UNSIGNED</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> botocore</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">config </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> Config</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> pandas </span><span class="token keyword" style=color:#00009f>as</span><span class="token plain"> pd</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Configure S3 client for GivingTuesday Data Lake (no credentials needed)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">s3 </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> boto3</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">client</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'s3'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> region_name</span><span class="token operator" style=color:#393A34>=</span><span class="token string" style=color:#e3116c>'us-east-1'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> config</span><span class="token operator" style=color:#393A34>=</span><span class="token plain">Config</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">signature_version</span><span class="token operator" style=color:#393A34>=</span><span class="token plain">UNSIGNED</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Download index to find available 990s</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">index_response </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> s3</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">get_object</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> Bucket</span><span class="token operator" style=color:#393A34>=</span><span class="token string" style=color:#e3116c>'gt990datalake-rawdata'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> Key</span><span class="token operator" style=color:#393A34>=</span><span class="token string" style=color:#e3116c>'Indices/990xmls/index_all_years_efiledata_xmls_created_on_2023-10-29.csv'</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">index_df </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> pd</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">read_csv</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">index_response</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'Body'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"Available 990s: </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation builtin">len</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>(</span><span class="token string-interpolation interpolation">index_df</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>)</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>:</span><span class="token string-interpolation interpolation format-spec">,</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"Columns: </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation">index_df</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>.</span><span class="token string-interpolation interpolation">columns</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>.</span><span class="token string-interpolation interpolation">tolist</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>(</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>)</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Find 990s for a specific EIN</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">ein </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>"123456789"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">org_filings </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> index_df</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">index_df</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'EIN'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>==</span><span class="token plain"> ein</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Download latest filing</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>if</span><span class="token plain"> </span><span class="token builtin">len</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">org_filings</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>></span><span class="token plain"> </span><span class="token number" style=color:#36acaa>0</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> latest </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> org_filings</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">iloc</span><span class="token punctuation" style=color:#393A34>[</span><span class="token number" style=color:#36acaa>0</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> xml_key </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token string-interpolation string" style=color:#e3116c>f"EfileData/XmlFiles/</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation">latest</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>[</span><span class="token string-interpolation interpolation string" style=color:#e3116c>'OBJECT_ID'</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>]</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>_public.xml"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> xml_obj </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> s3</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">get_object</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">Bucket</span><span class="token operator" style=color:#393A34>=</span><span class="token string" style=color:#e3116c>'gt990datalake-rawdata'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> Key</span><span class="token operator" style=color:#393A34>=</span><span class="token plain">xml_key</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> xml_content </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> xml_obj</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'Body'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">read</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Parse with xmltodict (simplified approach)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> xmltodict</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> doc </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> xmltodict</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">parse</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">xml_content</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Extract fields from doc['Return']['ReturnData']['IRS990']</span><br/></div></code></pre></div></div> | |
| <h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-automated-enrichment-script>π€ Automated Enrichment Script<a href=#-automated-enrichment-script class=hash-link aria-label="Direct link to π€ Automated Enrichment Script" title="Direct link to π€ Automated Enrichment Script" translate=no>β</a></h2> | |
| <p>We provide <strong><a class="" href=/scripts/enrich_nonprofits_gt990.py>scripts/enrich_nonprofits_gt990.py</a></strong> - a complete automated solution.</p> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=quick-start-1>Quick Start<a href=#quick-start-1 class=hash-link aria-label="Direct link to Quick Start" title="Direct link to Quick Start" translate=no>β</a></h3> | |
| <p><strong>Step 1: Download Index (One-Time Setup)</strong></p> | |
| <div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain"># Install dependencies</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">pip install boto3 xmltodict pandas pyarrow tqdm loguru</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Download GivingTuesday Data Lake index (~200MB CSV, 1M+ records)</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">python scripts/enrich_nonprofits_gt990.py --download-index</span><br/></div></code></pre></div></div> | |
| <p>This creates <code>data/cache/form990_gt_index.parquet</code> for fast EINβOBJECT_ID lookups.</p> | |
| <p><strong>Step 2: Enrich Your Data</strong></p> | |
| <div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain"># Enrich all Tuscaloosa nonprofits</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">python scripts/enrich_nonprofits_gt990.py \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --input data/gold/nonprofits_tuscaloosa.parquet \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --output data/gold/nonprofits_tuscaloosa_form990.parquet \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --concurrent 20</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Enrich Alabama + Michigan health orgs</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">python scripts/enrich_nonprofits_gt990.py \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --input data/gold/nonprofits_organizations.parquet \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --output data/gold/nonprofits_990_enriched.parquet \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --states AL MI \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --ntee E \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --concurrent 50</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Test with sample</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">python scripts/enrich_nonprofits_gt990.py \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --input data/gold/nonprofits_organizations.parquet \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --output /tmp/test_990.parquet \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --sample 100</span><br/></div></code></pre></div></div> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=features>Features<a href=#features class=hash-link aria-label="Direct link to Features" title="Direct link to Features" translate=no>β</a></h3> | |
| <p>β <strong>Index-based lookup</strong>: Uses OBJECT_ID from GivingTuesday index (no filename guessing)<br/> | |
| <!-- -->β <strong>Async/parallel</strong>: Process 20-50 organizations concurrently<br/> | |
| <!-- -->β <strong>Smart caching</strong>: JSON cache prevents re-downloading same 990s<br/> | |
| <!-- -->β <strong>Automatic retries</strong>: Handles S3 errors gracefully<br/> | |
| <!-- -->β <strong>Progress tracking</strong>: tqdm progress bar with ETA<br/> | |
| <!-- -->β <strong>Comprehensive logging</strong>: Detailed logs with statistics</p> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=performance>Performance<a href=#performance class=hash-link aria-label="Direct link to Performance" title="Direct link to Performance" translate=no>β</a></h3> | |
| <ul> | |
| <li class=""><strong>Speed</strong>: ~2-3 sec/org (download + parse)</li> | |
| <li class=""><strong>Concurrent=20</strong>: ~450 orgs/hour</li> | |
| <li class=""><strong>Concurrent=50</strong>: ~1,100 orgs/hour</li> | |
| <li class=""><strong>1,000 orgs @ 50% success</strong>: ~15-20 minutes</li> | |
| </ul> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=enriched-fields>Enriched Fields<a href=#enriched-fields class=hash-link aria-label="Direct link to Enriched Fields" title="Direct link to Enriched Fields" translate=no>β</a></h3> | |
| <p>The script adds these columns to your DataFrame:</p> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain">form_990_status </span><span class="token comment" style=color:#999988;font-style:italic># 'found', 'not_found', or 'parse_error'</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">form_990_tax_year </span><span class="token comment" style=color:#999988;font-style:italic># e.g., 202312</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">form_990_filing_type </span><span class="token comment" style=color:#999988;font-style:italic># 990, 990EZ, or 990PF</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">form_990_total_revenue </span><span class="token comment" style=color:#999988;font-style:italic># Total revenue</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">form_990_total_expenses </span><span class="token comment" style=color:#999988;font-style:italic># Total expenses</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">form_990_net_income </span><span class="token comment" style=color:#999988;font-style:italic># Revenue - Expenses</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">form_990_contributions </span><span class="token comment" style=color:#999988;font-style:italic># Donations and grants received</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">form_990_program_revenue </span><span class="token comment" style=color:#999988;font-style:italic># Revenue from programs/services</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">form_990_investment_income </span><span class="token comment" style=color:#999988;font-style:italic># Investment income</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">form_990_program_expenses </span><span class="token comment" style=color:#999988;font-style:italic># Program service expenses</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">form_990_admin_expenses </span><span class="token comment" style=color:#999988;font-style:italic># Administrative expenses</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">form_990_fundraising_expenses </span><span class="token comment" style=color:#999988;font-style:italic># Fundraising expenses</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">form_990_total_assets </span><span class="token comment" style=color:#999988;font-style:italic># Total assets</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">form_990_total_liabilities </span><span class="token comment" style=color:#999988;font-style:italic># Total liabilities</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">form_990_net_assets </span><span class="token comment" style=color:#999988;font-style:italic># Assets - Liabilities</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">form_990_grants_paid </span><span class="token comment" style=color:#999988;font-style:italic># Grants awarded to others</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">form_990_mission </span><span class="token comment" style=color:#999988;font-style:italic># Mission statement</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">form_990_last_updated </span><span class="token comment" style=color:#999988;font-style:italic># Timestamp of enrichment</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Filter to your state</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">alabama_filings </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> filings_index</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">filings_index</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'State'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>==</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'AL'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"Alabama filings: </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation builtin">len</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>(</span><span class="token string-interpolation interpolation">alabama_filings</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>)</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>:</span><span class="token string-interpolation interpolation format-spec">,</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Sample columns</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># EIN, OrganizationName, State, URL, SubmittedOn, TaxPeriod</span><br/></div></code></pre></div></div> | |
| <h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-integration-with-current-data>π Integration with Current Data<a href=#-integration-with-current-data class=hash-link aria-label="Direct link to π Integration with Current Data" title="Direct link to π Integration with Current Data" translate=no>β</a></h2> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=enrich-existing-nonprofits>Enrich Existing Nonprofits<a href=#enrich-existing-nonprofits class=hash-link aria-label="Direct link to Enrich Existing Nonprofits" title="Direct link to Enrich Existing Nonprofits" translate=no>β</a></h3> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> pandas </span><span class="token keyword" style=color:#00009f>as</span><span class="token plain"> pd</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> form990_parser </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> Form990Parser</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> boto3</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Load your current nonprofit data</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">orgs </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> pd</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">read_parquet</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'data/gold/nonprofits_organizations.parquet'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Filter to Alabama health organizations (NTEE code E)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">health_orgs </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> orgs</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">orgs</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'state'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>==</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'AL'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>&</span><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">orgs</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'ntee_code'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>.</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">startswith</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'E'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> na</span><span class="token operator" style=color:#393A34>=</span><span class="token boolean" style=color:#36acaa>False</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"Alabama health nonprofits: </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation builtin">len</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>(</span><span class="token string-interpolation interpolation">health_orgs</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>)</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>:</span><span class="token string-interpolation interpolation format-spec">,</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Enrich with Form 990 data</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">s3 </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> boto3</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">client</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'s3'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> region_name</span><span class="token operator" style=color:#393A34>=</span><span class="token string" style=color:#e3116c>'us-east-1'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">parser </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> Form990Parser</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">enriched </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> idx</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> org </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> health_orgs</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">iterrows</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> ein </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> org</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'ein'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Try to find most recent 990</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>try</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Construct likely S3 key (simplified - actual naming varies)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> key </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token string-interpolation string" style=color:#e3116c>f"</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation">ein</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>_202312_990.xml"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> xml_obj </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> s3</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">get_object</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">Bucket</span><span class="token operator" style=color:#393A34>=</span><span class="token string" style=color:#e3116c>'irs-form-990'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> Key</span><span class="token operator" style=color:#393A34>=</span><span class="token plain">key</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> xml_content </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> xml_obj</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'Body'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">read</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> filing_data </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> parser</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">parse_xml</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">xml_content</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Merge with org data</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> org_enriched </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> org</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">to_dict</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> org_enriched</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">update</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">filing_data</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> enriched</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">append</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">org_enriched</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>except</span><span class="token plain"> Exception </span><span class="token keyword" style=color:#00009f>as</span><span class="token plain"> e</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># No 990 found for this org</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>continue</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">enriched_df </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> pd</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">DataFrame</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">enriched</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">enriched_df</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">to_parquet</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'data/gold/nonprofits_alabama_health_990.parquet'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"Enriched </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation builtin">len</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>(</span><span class="token string-interpolation interpolation">enriched_df</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>)</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>:</span><span class="token string-interpolation interpolation format-spec">,</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c> organizations with Form 990 data"</span><span class="token punctuation" style=color:#393A34>)</span><br/></div></code></pre></div></div> | |
| <h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-data-schema>πΎ Data Schema<a href=#-data-schema class=hash-link aria-label="Direct link to πΎ Data Schema" title="Direct link to πΎ Data Schema" translate=no>β</a></h2> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=form-990-parser-output>Form 990 Parser Output<a href=#form-990-parser-output class=hash-link aria-label="Direct link to Form 990 Parser Output" title="Direct link to Form 990 Parser Output" translate=no>β</a></h3> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token punctuation" style=color:#393A34>{</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Basic Info</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'ein'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'123456789'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'organization_name'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'Alabama Oral Health Foundation'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'tax_year'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>2023</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'tax_period'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'202312'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Financials</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'total_revenue'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>2500000</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'total_expenses'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>2100000</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'net_income'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>400000</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'total_assets'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>5000000</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'total_liabilities'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>500000</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Revenue Breakdown</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'contributions_grants'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>1200000</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'program_service_revenue'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>800000</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'investment_income'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>300000</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'other_revenue'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>200000</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Expense Breakdown</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'program_expenses'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>1800000</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'administrative_expenses'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>200000</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'fundraising_expenses'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>100000</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Programs</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'program_service_descriptions'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>{</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'description'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'Community dental clinics'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'expenses'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>1000000</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'grants'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>200000</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>}</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>{</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'description'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'School fluoride programs'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'expenses'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>500000</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'grants'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>100000</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>}</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Governance</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'officers'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>{</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'name'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'Sarah Johnson'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'title'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'CEO'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'compensation'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>150000</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>}</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>{</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'name'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'John Smith'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'title'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'CFO'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'compensation'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>120000</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>}</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Mission</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'mission_statement'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'Improve oral health access in underserved communities...'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'program_accomplishments'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'Served 10,000 patients in 2023...'</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token punctuation" style=color:#393A34>}</span><br/></div></code></pre></div></div> | |
| <h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-performance-considerations>π Performance Considerations<a href=#-performance-considerations class=hash-link aria-label="Direct link to π Performance Considerations" title="Direct link to π Performance Considerations" translate=no>β</a></h2> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=data-volume>Data Volume<a href=#data-volume class=hash-link aria-label="Direct link to Data Volume" title="Direct link to Data Volume" translate=no>β</a></h3> | |
| <ul> | |
| <li class=""><strong>Form 990 XMLs</strong>: ~300,000 new filings per year</li> | |
| <li class=""><strong>Average XML size</strong>: 500KB - 5MB</li> | |
| <li class=""><strong>Total storage</strong>: ~500GB for all historical 990s (2011-present)</li> | |
| </ul> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=processing-speed>Processing Speed<a href=#processing-speed class=hash-link aria-label="Direct link to Processing Speed" title="Direct link to Processing Speed" translate=no>β</a></h3> | |
| <p><strong>Sequential (current approach):</strong></p> | |
| <ul> | |
| <li class="">Download + parse: ~2-5 seconds per 990</li> | |
| <li class="">300,000 filings Γ 3 sec = <strong>250 hours</strong> π±</li> | |
| </ul> | |
| <p><strong>Async parallel (recommended):</strong></p> | |
| <ul> | |
| <li class="">50 concurrent workers</li> | |
| <li class="">300,000 filings Γ 3 sec / 50 = <strong>5 hours</strong> β‘</li> | |
| </ul> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=smart-strategies>Smart Strategies<a href=#smart-strategies class=hash-link aria-label="Direct link to Smart Strategies" title="Direct link to Smart Strategies" translate=no>β</a></h3> | |
| <ol> | |
| <li class=""> | |
| <p><strong>Filter first</strong>: Only download 990s for organizations you care about</p> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># Only health orgs in your states</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">health_eins </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> orgs</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">orgs</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'state'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">isin</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'AL'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'MI'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>&</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">orgs</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'ntee_code'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>.</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">startswith</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'E'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'ein'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">tolist</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Result: ~50,000 instead of 300,000 = 1 hour</span><br/></div></code></pre></div></div> | |
| </li> | |
| <li class=""> | |
| <p><strong>Use index files</strong>: Download the index first, filter, then fetch XMLs</p> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># Get index</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">index </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> pd</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">read_json</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'https://s3.amazonaws.com/irs-form-990/index_2023.json'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Filter to your EINs</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">relevant </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> index</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">index</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'EIN'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">isin</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">health_eins</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Only download these</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> url </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> relevant</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'URL'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># download and parse</span><br/></div></code></pre></div></div> | |
| </li> | |
| <li class=""> | |
| <p><strong>Cache aggressively</strong>: 990s don't change after filing</p> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain">cache_dir </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> Path</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'data/cache/form_990_xml'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">cache_file </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> cache_dir </span><span class="token operator" style=color:#393A34>/</span><span class="token plain"> </span><span class="token string-interpolation string" style=color:#e3116c>f"</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation">ein</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>_</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation">tax_year</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>.parquet"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>if</span><span class="token plain"> cache_file</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">exists</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>return</span><span class="token plain"> pd</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">read_parquet</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">cache_file</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>else</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># download, parse, cache</span><br/></div></code></pre></div></div> | |
| </li> | |
| </ol> | |
| <h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-use-cases>π― Use Cases<a href=#-use-cases class=hash-link aria-label="Direct link to π― Use Cases" title="Direct link to π― Use Cases" translate=no>β</a></h2> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=1-financial-health-analysis>1. Financial Health Analysis<a href=#1-financial-health-analysis class=hash-link aria-label="Direct link to 1. Financial Health Analysis" title="Direct link to 1. Financial Health Analysis" translate=no>β</a></h3> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># Which nonprofits are most financially stable?</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">df</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'efficiency_ratio'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> df</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'program_expenses'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>/</span><span class="token plain"> df</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'total_expenses'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">df</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'reserve_months'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> df</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'net_assets'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>/</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">df</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'total_expenses'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>/</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>12</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">efficient </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> df</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">df</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'efficiency_ratio'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>></span><span class="token plain"> </span><span class="token number" style=color:#36acaa>0.75</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># >75% on programs</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"Efficient organizations: </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation builtin">len</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>(</span><span class="token string-interpolation interpolation">efficient</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>)</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>:</span><span class="token string-interpolation interpolation format-spec">,</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>"</span><span class="token punctuation" style=color:#393A34>)</span><br/></div></code></pre></div></div> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=2-grant-research>2. Grant Research<a href=#2-grant-research class=hash-link aria-label="Direct link to 2. Grant Research" title="Direct link to 2. Grant Research" translate=no>β</a></h3> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># Who's giving grants in oral health?</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">grantmakers </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> df</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">df</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'grants_paid'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>></span><span class="token plain"> </span><span class="token number" style=color:#36acaa>0</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>&</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">df</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'ntee_code'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>.</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">startswith</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'E'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"Oral health grantmakers: </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation builtin">len</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>(</span><span class="token string-interpolation interpolation">grantmakers</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>)</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>:</span><span class="token string-interpolation interpolation format-spec">,</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"Total grants: $</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation">grantmakers</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>[</span><span class="token string-interpolation interpolation string" style=color:#e3116c>'grants_paid'</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>]</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>.</span><span class="token string-interpolation interpolation builtin">sum</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>(</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>)</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>:</span><span class="token string-interpolation interpolation format-spec">,.0f</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>"</span><span class="token punctuation" style=color:#393A34>)</span><br/></div></code></pre></div></div> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=3-program-discovery>3. Program Discovery<a href=#3-program-discovery class=hash-link aria-label="Direct link to 3. Program Discovery" title="Direct link to 3. Program Discovery" translate=no>β</a></h3> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># Find organizations running specific programs</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">fluoride_programs </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> df</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> df</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'program_service_descriptions'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>.</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">contains</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'fluoride'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>case</span><span class="token operator" style=color:#393A34>=</span><span class="token boolean" style=color:#36acaa>False</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> na</span><span class="token operator" style=color:#393A34>=</span><span class="token boolean" style=color:#36acaa>False</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"Orgs with fluoride programs: </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation builtin">len</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>(</span><span class="token string-interpolation interpolation">fluoride_programs</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>)</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>:</span><span class="token string-interpolation interpolation format-spec">,</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>"</span><span class="token punctuation" style=color:#393A34>)</span><br/></div></code></pre></div></div> | |
| <h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-next-steps>π Next Steps<a href=#-next-steps class=hash-link aria-label="Direct link to π Next Steps" title="Direct link to π Next Steps" translate=no>β</a></h2> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=quick-test>Quick Test<a href=#quick-test class=hash-link aria-label="Direct link to Quick Test" title="Direct link to Quick Test" translate=no>β</a></h3> | |
| <div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain"># Install dependencies</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">pip install form-990-xml-parser boto3</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># Test with single organization</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">python -c "</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">from form990_parser import Form990Parser</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">import boto3</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">s3 = boto3.client('s3', region_name='us-east-1')</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">ein = '631307851' # Delta Dental of Alabama (example)</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">try:</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> # Try to fetch 2023 filing</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> key = f'{ein}_202312_990.xml'</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> obj = s3.get_object(Bucket='irs-form-990', Key=key)</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> parser = Form990Parser()</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> data = parser.parse_xml(obj['Body'].read())</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> print(f'Organization: {data.get(\"organization_name\")}')</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> print(f'Revenue: \${data.get(\"total_revenue\", 0):,.0f}')</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> print(f'Assets: \${data.get(\"total_assets\", 0):,.0f}')</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">except Exception as e:</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> print(f'Error: {e}')</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">"</span><br/></div></code></pre></div></div> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=full-integration>Full Integration<a href=#full-integration class=hash-link aria-label="Direct link to Full Integration" title="Direct link to Full Integration" translate=no>β</a></h3> | |
| <p>Create a new enrichment pipeline:</p> | |
| <div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain"># Create new script</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">python scripts/enrich_nonprofits_form990.py \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --input data/gold/nonprofits_organizations.parquet \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --output data/gold/nonprofits_organizations_990.parquet \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --states AL MI \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --ntee E \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> --concurrent 50</span><br/></div></code></pre></div></div> | |
| <h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-resources>π Resources<a href=#-resources class=hash-link aria-label="Direct link to π Resources" title="Direct link to π Resources" translate=no>β</a></h2> | |
| <ul> | |
| <li class=""><strong>Giving Tuesday GitHub</strong>: <a href=https://github.com/Giving-Tuesday target=_blank rel="noopener noreferrer" class="">https://github.com/Giving-Tuesday</a></li> | |
| <li class=""><strong>Form 990 XML Parser</strong>: <a href=https://github.com/Giving-Tuesday/form-990-xml-parser target=_blank rel="noopener noreferrer" class="">https://github.com/Giving-Tuesday/form-990-xml-parser</a></li> | |
| <li class=""><strong>Form 990 XML Mapper</strong>: <a href=https://github.com/Giving-Tuesday/form-990-xml-mapper target=_blank rel="noopener noreferrer" class="">https://github.com/Giving-Tuesday/form-990-xml-mapper</a></li> | |
| <li class=""><strong>IRS 990 AWS Bucket</strong>: <a href=https://registry.opendata.aws/irs990/ target=_blank rel="noopener noreferrer" class="">https://registry.opendata.aws/irs990/</a></li> | |
| <li class=""><strong>IRS Index Files</strong>: <a href=https://s3.amazonaws.com/irs-form-990/index_YYYY.json target=_blank rel="noopener noreferrer" class="">https://s3.amazonaws.com/irs-form-990/index_YYYY.json</a></li> | |
| <li class=""><strong>990 Schema Documentation</strong>: <a href=https://www.irs.gov/e-file-providers/current-valid-xml-schemas-and-business-rules target=_blank rel="noopener noreferrer" class="">https://www.irs.gov/e-file-providers/current-valid-xml-schemas-and-business-rules</a></li> | |
| </ul> | |
| <h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-faq>β FAQ<a href=#-faq class=hash-link aria-label="Direct link to β FAQ" title="Direct link to β FAQ" translate=no>β</a></h2> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=q-are-we-currently-downloading-xml>Q: Are we currently downloading XML?<a href=#q-are-we-currently-downloading-xml class=hash-link aria-label="Direct link to Q: Are we currently downloading XML?" title="Direct link to Q: Are we currently downloading XML?" translate=no>β</a></h3> | |
| <p><strong>A: No.</strong> Currently using EO-BMF CSV files (basic data). Form 990 XML would be an enhancement for detailed financials.</p> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=q-can-we-use-giving-tuesday-libraries>Q: Can we use Giving Tuesday libraries?<a href=#q-can-we-use-giving-tuesday-libraries class=hash-link aria-label="Direct link to Q: Can we use Giving Tuesday libraries?" title="Direct link to Q: Can we use Giving Tuesday libraries?" translate=no>β</a></h3> | |
| <p><strong>A: Yes!</strong> They're open source and designed exactly for this purpose. Would provide much richer data.</p> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=q-how-much-data-is-it>Q: How much data is it?<a href=#q-how-much-data-is-it class=hash-link aria-label="Direct link to Q: How much data is it?" title="Direct link to Q: How much data is it?" translate=no>β</a></h3> | |
| <p><strong>A:</strong></p> | |
| <ul> | |
| <li class="">All 990s (2011-present): ~500GB</li> | |
| <li class="">Alabama only: ~5GB</li> | |
| <li class="">Alabama health orgs: ~500MB</li> | |
| <li class="">Very manageable!</li> | |
| </ul> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=q-whats-the-license>Q: What's the license?<a href=#q-whats-the-license class=hash-link aria-label="Direct link to Q: What's the license?" title="Direct link to Q: What's the license?" translate=no>β</a></h3> | |
| <p><strong>A:</strong> Public domain (U.S. Government data) + Giving Tuesday tools are open source</p> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=q-integration-effort>Q: Integration effort?<a href=#q-integration-effort class=hash-link aria-label="Direct link to Q: Integration effort?" title="Direct link to Q: Integration effort?" translate=no>β</a></h3> | |
| <p><strong>A:</strong> Low - can reuse existing async enrichment patterns. Estimated: 1-2 days for initial integration.</div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="row margin-top--sm theme-doc-footer-edit-meta-row"><div class="col noPrint_WFHX"><a href=https://github.com/getcommunityone/open-navigator-for-engagement/tree/main/website/docs/data-sources/form-990-xml.md target=_blank rel="noopener noreferrer" class=theme-edit-this-page><svg fill=currentColor height=20 width=20 viewBox="0 0 40 40" class=iconEdit_Z9Sw aria-hidden=true><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"/></g></svg>Edit this page</a></div><div class="col lastUpdated_JAkA"></div></div></footer></article><nav class="docusaurus-mt-lg pagination-nav" aria-label="Docs pages"></nav></div></div><div class="col col--3"><div class="tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href=#-overview class="table-of-contents__link toc-highlight">π― Overview</a><ul><li><a href=#what-is-the-givingtuesday-990-data-lake class="table-of-contents__link toc-highlight">What is the GivingTuesday 990 Data Lake?</a><li><a href=#whats-the-difference class="table-of-contents__link toc-highlight">What's the Difference?</a></ul><li><a href=#-what-additional-data-you-can-get class="table-of-contents__link toc-highlight">π What Additional Data You Can Get</a><ul><li><a href=#from-form-990-xml class="table-of-contents__link toc-highlight">From Form 990 XML:</a></ul><li><a href=#-google-bigquery-recommended-for-bulk-queries class="table-of-contents__link toc-highlight">β‘ Google BigQuery (Recommended for Bulk Queries)</a><ul><li><a href=#why-bigquery class="table-of-contents__link toc-highlight">Why BigQuery?</a><li><a href=#quick-start class="table-of-contents__link toc-highlight">Quick Start</a><li><a href=#bigquery-table-structure class="table-of-contents__link toc-highlight">BigQuery Table Structure</a><ul><li><a href=#master-index-table class="table-of-contents__link toc-highlight">Master Index Table</a><li><a href=#main-return-tables-by-year class="table-of-contents__link toc-highlight">Main Return Tables (by year)</a><li><a href=#schedule-tables-detailed-information class="table-of-contents__link toc-highlight">Schedule Tables (Detailed Information)</a></ul><li><a href=#complete-field-mapping class="table-of-contents__link toc-highlight">Complete Field Mapping</a><li><a href=#query-examples-for-all-key-fields class="table-of-contents__link toc-highlight">Query Examples for All Key Fields</a><li><a href=#data-cleaning-tips class="table-of-contents__link toc-highlight">Data Cleaning Tips</a><li><a href=#cost-estimation class="table-of-contents__link toc-highlight">Cost Estimation</a></ul><li><a href=#-givingtuesday-data-lake-for-advanced-parsing class="table-of-contents__link toc-highlight">π GivingTuesday Data Lake (For Advanced Parsing)</a><ul><li><a href=#option-1-via-aws-console-free-account class="table-of-contents__link toc-highlight">Option 1: Via AWS Console (Free Account)</a><li><a href=#option-2-via-command-line-recommended-for-automation class="table-of-contents__link toc-highlight">Option 2: Via Command Line (Recommended for Automation)</a><li><a href=#option-3-automated-python-integration class="table-of-contents__link toc-highlight">Option 3: Automated Python Integration</a></ul><li><a href=#-automated-enrichment-script class="table-of-contents__link toc-highlight">π€ Automated Enrichment Script</a><ul><li><a href=#quick-start-1 class="table-of-contents__link toc-highlight">Quick Start</a><li><a href=#features class="table-of-contents__link toc-highlight">Features</a><li><a href=#performance class="table-of-contents__link toc-highlight">Performance</a><li><a href=#enriched-fields class="table-of-contents__link toc-highlight">Enriched Fields</a></ul><li><a href=#-integration-with-current-data class="table-of-contents__link toc-highlight">π Integration with Current Data</a><ul><li><a href=#enrich-existing-nonprofits class="table-of-contents__link toc-highlight">Enrich Existing Nonprofits</a></ul><li><a href=#-data-schema class="table-of-contents__link toc-highlight">πΎ Data Schema</a><ul><li><a href=#form-990-parser-output class="table-of-contents__link toc-highlight">Form 990 Parser Output</a></ul><li><a href=#-performance-considerations class="table-of-contents__link toc-highlight">π Performance Considerations</a><ul><li><a href=#data-volume class="table-of-contents__link toc-highlight">Data Volume</a><li><a href=#processing-speed class="table-of-contents__link toc-highlight">Processing Speed</a><li><a href=#smart-strategies class="table-of-contents__link toc-highlight">Smart Strategies</a></ul><li><a href=#-use-cases class="table-of-contents__link toc-highlight">π― Use Cases</a><ul><li><a href=#1-financial-health-analysis class="table-of-contents__link toc-highlight">1. Financial Health Analysis</a><li><a href=#2-grant-research class="table-of-contents__link toc-highlight">2. Grant Research</a><li><a href=#3-program-discovery class="table-of-contents__link toc-highlight">3. Program Discovery</a></ul><li><a href=#-next-steps class="table-of-contents__link toc-highlight">π Next Steps</a><ul><li><a href=#quick-test class="table-of-contents__link toc-highlight">Quick Test</a><li><a href=#full-integration class="table-of-contents__link toc-highlight">Full Integration</a></ul><li><a href=#-resources class="table-of-contents__link toc-highlight">π Resources</a><li><a href=#-faq class="table-of-contents__link toc-highlight">β FAQ</a><ul><li><a href=#q-are-we-currently-downloading-xml class="table-of-contents__link toc-highlight">Q: Are we currently downloading XML?</a><li><a href=#q-can-we-use-giving-tuesday-libraries class="table-of-contents__link toc-highlight">Q: Can we use Giving Tuesday libraries?</a><li><a href=#q-how-much-data-is-it class="table-of-contents__link toc-highlight">Q: How much data is it?</a><li><a href=#q-whats-the-license class="table-of-contents__link toc-highlight">Q: What's the license?</a><li><a href=#q-integration-effort class="table-of-contents__link toc-highlight">Q: Integration effort?</a></ul></ul></div></div></div></div></main></div></div></div><footer class="theme-layout-footer footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="theme-layout-footer-column col footer__col"><div class=footer__title>Documentation</div><ul class="footer__items clean-list"><li class=footer__item><a class=footer__link-item href=/docs/intro>Getting Started</a><li class=footer__item><a class=footer__link-item href=/docs/data-sources/citations>Citations & Data Sources</a><li class=footer__item><a class=footer__link-item href=/docs/data-sources/overview>Data Sources</a><li class=footer__item><a class=footer__link-item href=/docs/for-developers>For Developers</a></ul></div><div class="theme-layout-footer-column col footer__col"><div class=footer__title>Resources</div><ul class="footer__items clean-list"><li class=footer__item><a href=https://www.communityone.com target=_blank rel="noopener noreferrer" class=footer__link-item>Launch Open Navigator<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://github.com/getcommunityone/open-navigator-for-engagement target=_blank rel="noopener noreferrer" class=footer__link-item>GitHub<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://www.groundvue.org/ target=_blank rel="noopener noreferrer" class=footer__link-item>GroundVue (Partner)<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a></ul></div><div class="theme-layout-footer-column col footer__col"><div class=footer__title>Community</div><ul class="footer__items clean-list"><li class=footer__item><a href=https://www.instagram.com/getcommunityone/ target=_blank rel="noopener noreferrer" class=footer__link-item>Instagram<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://www.facebook.com/getcommunityone target=_blank rel="noopener noreferrer" class=footer__link-item>Facebook<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://x.com/getcommunityone/ target=_blank rel="noopener noreferrer" class=footer__link-item>X (Twitter)<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://www.linkedin.com/company/getcommunityone target=_blank rel="noopener noreferrer" class=footer__link-item>LinkedIn<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://www.youtube.com/@getcommunityone target=_blank rel="noopener noreferrer" class=footer__link-item>YouTube<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://discord.gg/uH6Dytek target=_blank rel="noopener noreferrer" class=footer__link-item>Discord<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a></ul></div><div class="theme-layout-footer-column col footer__col"><div class=footer__title>Legal</div><ul class="footer__items clean-list"><li class=footer__item><a class=footer__link-item href=/docs/legal/privacy-policy>Privacy Policy</a><li class=footer__item><a class=footer__link-item href=/docs/legal/terms-of-service>Terms of Service</a><li class=footer__item><a class=footer__link-item href=/docs/legal/data-provider-terms>Data Provider Terms</a></ul></div><div class="theme-layout-footer-column col footer__col"><div class=footer__title>More</div><ul class="footer__items clean-list"><li class=footer__item><a class=footer__link-item href=/blog>Blog</a><li class=footer__item><a href=https://github.com/getcommunityone/open-navigator-for-engagement/blob/main/LICENSE target=_blank rel="noopener noreferrer" class=footer__link-item>License (MIT)<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a></ul></div></div><div class="footer__bottom text--center"><div class=footer__copyright>Copyright Β© 2026 Community One. Built with Docusaurus.</div></div></div></footer></div></body> |