jcbowyer's picture
Deploy: Consolidated gold tables, fixed nginx docs routing
3d16fe6 verified
<!doctype html><html lang=en dir=ltr class="docs-wrapper plugin-docs plugin-id-default docs-version-current docs-doc-page docs-doc-id-data-sources/huggingface-datasets" data-has-hydrated=false><head><meta charset=UTF-8><meta name=generator content="Docusaurus v3.10.0"><title data-rh=true>βœ… Confirmed: HuggingFace Datasets That WILL Help | Open Navigator</title><meta data-rh=true name=viewport content="width=device-width, initial-scale=1.0"/><meta data-rh=true property=og:image content=https://www.communityone.com/img/docusaurus-social-card.jpg /><meta data-rh=true name=twitter:image content=https://www.communityone.com/img/docusaurus-social-card.jpg /><meta data-rh=true property=og:url content=https://www.communityone.com/docs/data-sources/huggingface-datasets /><meta data-rh=true property=og:locale content=en /><meta data-rh=true name=docusaurus_locale content=en /><meta data-rh=true name=docsearch:language content=en /><meta data-rh=true name=keywords content="civic engagement, policy tracking, meeting minutes, nonprofit tracking, municipal government, advocacy, open data, local government"/><meta data-rh=true property=og:type content=website /><meta data-rh=true property=og:site_name content="Open Navigator"/><meta data-rh=true name=twitter:card content=summary_large_image /><meta data-rh=true name=docusaurus_version content=current /><meta data-rh=true name=docusaurus_tag content=docs-default-current /><meta data-rh=true name=docsearch:version content=current /><meta data-rh=true name=docsearch:docusaurus_tag content=docs-default-current /><meta data-rh=true property=og:title content="βœ… Confirmed: HuggingFace Datasets That WILL Help | Open Navigator"/><meta data-rh=true name=description content="Quick Answer: YES, 2 of 4 will help significantly!"/><meta data-rh=true property=og:description content="Quick Answer: YES, 2 of 4 will help significantly!"/><link data-rh=true rel=icon href=/img/favicon.ico /><link data-rh=true rel=canonical href=https://www.communityone.com/docs/data-sources/huggingface-datasets /><link data-rh=true rel=alternate href=https://www.communityone.com/docs/data-sources/huggingface-datasets hreflang=en /><link data-rh=true rel=alternate href=https://www.communityone.com/docs/data-sources/huggingface-datasets hreflang=x-default /><script data-rh=true type=application/ld+json>{"@context":"https://schema.org","@type":"BreadcrumbList","itemListElement":[{"@type":"ListItem","item":"https://www.communityone.com/docs/data-sources/huggingface-datasets","name":"βœ… Confirmed: HuggingFace Datasets That WILL Help","position":1}]}</script><link rel=alternate type=application/rss+xml href=/blog/rss.xml title="Open Navigator RSS Feed"><link rel=alternate type=application/atom+xml href=/blog/atom.xml title="Open Navigator Atom Feed"><link rel=preconnect href=https://www.google-analytics.com><link rel=preconnect href=https://www.googletagmanager.com><script async src="https://www.googletagmanager.com/gtag/js?id=G-5EQV815915"></script><script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","G-5EQV815915",{anonymize_ip:!0})</script><link rel=stylesheet href=/assets/css/styles.c89d6b2d.css /><script src=/assets/js/runtime~main.c8fa085e.js defer></script><script src=/assets/js/main.6e24e536.js defer></script></head><body><svg style="display: none;"><defs>
<symbol id=theme-svg-external-link viewBox="0 0 24 24"><path fill=currentColor d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"/></symbol>
</defs></svg>
<script>!function(){var t=function(){try{return new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}}()||function(){try{return window.localStorage.getItem("theme-7e9")}catch(t){}}();document.documentElement.setAttribute("data-theme",t||(window.matchMedia("(prefers-color-scheme: dark)").matches?"dark":"light")),document.documentElement.setAttribute("data-theme-choice",t||"system")}(),function(){try{for(var[t,e]of new URLSearchParams(window.location.search).entries())if(t.startsWith("docusaurus-data-")){var a=t.replace("docusaurus-data-","data-");document.documentElement.setAttribute(a,e)}}catch(t){}}()</script><div id=__docusaurus><link rel=preload as=image href=/img/communityone_logo.svg /><script type=application/ld+json>{"@context":"https://schema.org","@type":"Organization","address":{"@type":"PostalAddress","addressCountry":"US","addressLocality":"Tuscaloosa","addressRegion":"AL","postalCode":"35406","streetAddress":"5617 Lakeridge Court"},"contactPoint":{"@type":"ContactPoint","availableLanguage":["English"],"contactType":"Customer Service","email":"johnbowyer@communityone.com"},"description":"Track 90,000+ jurisdictions, 1.8M nonprofits, and analyze meeting minutes with AI. The open path to everything local.","email":"johnbowyer@communityone.com","legalName":"CommunityOne","logo":"https://www.communityone.com/img/communityone_logo.svg","name":"CommunityOne","sameAs":["https://www.facebook.com/communityone","https://www.instagram.com/communityone","https://twitter.com/communityone","https://www.linkedin.com/company/communityone","https://www.youtube.com/@communityone","https://discord.gg/communityone","https://github.com/getcommunityone/open-navigator"],"url":"https://www.communityone.com"}</script><script type=application/ld+json>{"@context":"https://schema.org","@type":"WebSite","alternateName":"CommunityOne Open Navigator","description":"AI-powered civic engagement platform tracking jurisdictions, nonprofits, and government meetings","name":"Open Navigator","potentialAction":{"@type":"SearchAction","query-input":"required name=search_term_string","target":{"@type":"EntryPoint","urlTemplate":"https://www.communityone.com/search?q={search_term_string}"}},"url":"https://www.communityone.com"}</script><script type=application/ld+json>{"@context":"https://schema.org","@type":"SoftwareApplication","aggregateRating":{"@type":"AggregateRating","ratingCount":"1","ratingValue":"5"},"applicationCategory":"BusinessApplication","description":"Track 90,000+ jurisdictions, 1.8M nonprofits, and analyze meeting minutes with AI","featureList":["Track 90,000+ jurisdictions","Monitor 1.8M nonprofits","Analyze meeting minutes","Legislative bill tracking","Campaign finance data"],"name":"Open Navigator","offers":{"@type":"Offer","price":"0","priceCurrency":"USD"},"operatingSystem":"Web","screenshot":"https://www.communityone.com/img/docusaurus-social-card.jpg","softwareVersion":"1.0.0"}</script><div role=region aria-label="Skip to main content"><a class=skipToContent_fXgn href=#__docusaurus_skipToContent_fallback>Skip to main content</a></div><nav aria-label=Main class="theme-layout-navbar navbar navbar--fixed-top"><div class=navbar__inner><div class="theme-layout-navbar-left navbar__items"><button aria-label="Toggle navigation bar" aria-expanded=false class="navbar__toggle clean-btn" type=button><svg width=30 height=30 viewBox="0 0 30 30" aria-hidden=true><path stroke=currentColor stroke-linecap=round stroke-miterlimit=10 stroke-width=2 d="M4 7h22M4 15h22M4 23h22"/></svg></button><a href=https://www.communityone.com target=_self rel="noopener noreferrer" class=navbar__brand><div class=navbar__logo><img src=/img/communityone_logo.svg alt="CommunityOne Logo" class="themedComponent_mlkZ themedComponent--light_NVdE"/><img src=/img/communityone_logo.svg alt="CommunityOne Logo" class="themedComponent_mlkZ themedComponent--dark_xIcU"/></div><b class="navbar__title text--truncate">Open Navigator Home</b></a><a class="navbar__item navbar__link" href=/docs/intro>Getting Started</a><a class="navbar__item navbar__link" href=/docs/for-families>Families & Individuals</a><a class="navbar__item navbar__link" href=/docs/for-advocates>Policy Makers</a><a aria-current=page class="navbar__item navbar__link navbar__link--active" href=/docs/for-developers>Developers</a><a class="navbar__item navbar__link" href=/docs/data-sources/citations>Data and Terms</a><a class="navbar__item navbar__link" href=/blog>Blog</a></div><div class="theme-layout-navbar-right navbar__items navbar__items--right"><a href=https://github.com/getcommunityone/open-navigator-for-engagement target=_blank rel="noopener noreferrer" class="navbar__item navbar__link">GitHub<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><div class="toggle_vylO colorModeToggle_DEke"><button class="clean-btn toggleButton_gllP toggleButtonDisabled_aARS" type=button disabled title="system mode" aria-label="Switch between dark and light mode (currently system mode)"><svg viewBox="0 0 24 24" width=24 height=24 aria-hidden=true class="toggleIcon_g3eP lightToggleIcon_pyhR"><path fill=currentColor d="M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"/></svg><svg viewBox="0 0 24 24" width=24 height=24 aria-hidden=true class="toggleIcon_g3eP darkToggleIcon_wfgR"><path fill=currentColor d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"/></svg><svg viewBox="0 0 24 24" width=24 height=24 aria-hidden=true class="toggleIcon_g3eP systemToggleIcon_QzmC"><path fill=currentColor d="m12 21c4.971 0 9-4.029 9-9s-4.029-9-9-9-9 4.029-9 9 4.029 9 9 9zm4.95-13.95c1.313 1.313 2.05 3.093 2.05 4.95s-0.738 3.637-2.05 4.95c-1.313 1.313-3.093 2.05-4.95 2.05v-14c1.857 0 3.637 0.737 4.95 2.05z"/></svg></button></div><div class=navbarSearchContainer_Bca1></div></div></div><div role=presentation class=navbar-sidebar__backdrop></div></nav><div id=__docusaurus_skipToContent_fallback class="theme-layout-main main-wrapper mainWrapper_z2l0"><div class=docsWrapper_hBAB><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type=button></button><div class=docRoot_UBD9><aside class="theme-doc-sidebar-container docSidebarContainer_YfHR"><div class=sidebarViewport_aRkj><div class=sidebar_njMd><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_SIkG"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret menu__link--active" role=button aria-expanded=true href=/docs/for-developers><span title="Developers & Technical Users" class=categoryLinkLabel_W154>Developers & Technical Users</span></a></div><ul class=menu__list><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class=menu__link tabindex=0 href=/docs/for-developers><span title="For Developers & Technical Users" class=linkLabel_WmDU>For Developers & Technical Users</span></a><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/quickstart><span title="Setup & Installation" class=categoryLinkLabel_W154>Setup & Installation</span></a></div><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret menu__link--active" role=button aria-expanded=true tabindex=0 href=/docs/data-sources/citations><span title="Data Sources (Technical)" class=categoryLinkLabel_W154>Data Sources (Technical)</span></a></div><ul class=menu__list><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/data-sources/citations><span title="Data and Citations" class=linkLabel_WmDU>Data and Citations</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/legal-compliance><span title="Legal & Compliance" class=linkLabel_WmDU>Legal & Compliance</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/data-sources/data-model-erd><span title="Data Model & Entity Relationship Diagram" class=linkLabel_WmDU>Data Model & Entity Relationship Diagram</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/data-sources/jurisdiction-discovery><span title="Jurisdiction Discovery System" class=linkLabel_WmDU>Jurisdiction Discovery System</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/data-sources/census-data><span title="Census Bureau Data URL Fix" class=linkLabel_WmDU>Census Bureau Data URL Fix</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link menu__link--active" aria-current=page tabindex=0 href=/docs/data-sources/huggingface-datasets><span title="βœ… Confirmed: HuggingFace Datasets That WILL Help" class=linkLabel_WmDU>βœ… Confirmed: HuggingFace Datasets That WILL Help</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/data-sources/url-datasets><span title="🎯 ANSWER: Yes, You Should Look at Those Datasets!" class=linkLabel_WmDU>🎯 ANSWER: Yes, You Should Look at Those Datasets!</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/data-sources/youtube-discovery><span title="YouTube Channel Discovery - Issues & Solutions" class=linkLabel_WmDU>YouTube Channel Discovery - Issues & Solutions</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/data-sources/video-channels><span title="Video Channel Discovery: Current State & Enhancement Plan" class=linkLabel_WmDU>Video Channel Discovery: Current State & Enhancement Plan</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/data-sources/open-source-repositories><span title="Open Source Repository Data Sources" class=linkLabel_WmDU>Open Source Repository Data Sources</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/data-sources/ballot-election-sources><span title="Ballot Measures & Election Results" class=linkLabel_WmDU>Ballot Measures & Election Results</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/data-sources/polling-survey-sources><span title="Public Opinion & Survey Data" class=linkLabel_WmDU>Public Opinion & Survey Data</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/data-sources/factcheck-sources><span title="Fact-Checking & Claim Verification" class=linkLabel_WmDU>Fact-Checking & Claim Verification</span></a></ul><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/guides/jurisdiction-setup><span title="How-To Guides" class=categoryLinkLabel_W154>How-To Guides</span></a></div><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/integrations/mcp-server><span title=Integrations class=categoryLinkLabel_W154>Integrations</span></a></div><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/deployment/databricks-apps><span title=Deployment class=categoryLinkLabel_W154>Deployment</span></a></div><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/development/database-setup><span title=Development class=categoryLinkLabel_W154>Development</span></a></div></ul></ul></nav></div></div></aside><main class=docMainContainer_TBSr><div class="container padding-top--md padding-bottom--lg"><div class=row><div class="col docItemCol_VOVn"><div class=docItemContainer_Djhp><article><nav class="theme-doc-breadcrumbs breadcrumbsContainer_Z_bl" aria-label=Breadcrumbs><ul class=breadcrumbs><li class=breadcrumbs__item><a aria-label="Home page" class=breadcrumbs__link href=/><svg viewBox="0 0 24 24" class=breadcrumbHomeIcon_YNFT><path d="M10 19v-5h4v5c0 .55.45 1 1 1h3c.55 0 1-.45 1-1v-7h1.7c.46 0 .68-.57.33-.87L12.67 3.6c-.38-.34-.96-.34-1.34 0l-8.36 7.53c-.34.3-.13.87.33.87H5v7c0 .55.45 1 1 1h3c.55 0 1-.45 1-1z" fill=currentColor /></svg></a><li class=breadcrumbs__item><span class=breadcrumbs__link>Developers & Technical Users</span><li class=breadcrumbs__item><span class=breadcrumbs__link>Data Sources (Technical)</span><li class="breadcrumbs__item breadcrumbs__item--active"><span class=breadcrumbs__link>βœ… Confirmed: HuggingFace Datasets That WILL Help</span></ul></nav><div class="tocCollapsible_ETCw theme-doc-toc-mobile tocMobile_ITEo"><button type=button class="clean-btn tocCollapsibleButton_TO0P">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>βœ… Confirmed: HuggingFace Datasets That WILL Help</h1></header>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=quick-answer-yes-2-of-4-will-help-significantly>Quick Answer: YES, 2 of 4 will help significantly!<a href=#quick-answer-yes-2-of-4-will-help-significantly class=hash-link aria-label="Direct link to Quick Answer: YES, 2 of 4 will help significantly!" title="Direct link to Quick Answer: YES, 2 of 4 will help significantly!" translate=no>​</a></h2>
<table><thead><tr><th>Dataset<th>Status<th>Usefulness<th>Priority<tbody><tr><td><strong>MeetingBank</strong><td>βœ… <strong>READY TO USE</strong><td>πŸ”₯ <strong>VERY HIGH</strong><td><strong>USE IMMEDIATELY</strong><tr><td><strong>LocalView</strong><td>βœ… Already covered<td>HIGH<td>Download from Harvard<tr><td><strong>Council Data Project</strong><td>βœ… Already covered<td>HIGH<td>Already integrated<tr><td><strong>CivicBand</strong><td>⚠️ Limited access<td>MEDIUM<td>Scrape municipality list</table>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=1-meetingbank--new-use-this>1. MeetingBank πŸ”₯ (NEW! USE THIS!)<a href=#1-meetingbank--new-use-this class=hash-link aria-label="Direct link to 1. MeetingBank πŸ”₯ (NEW! USE THIS!)" title="Direct link to 1. MeetingBank πŸ”₯ (NEW! USE THIS!)" translate=no>​</a></h2>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=what-it-is>What It Is:<a href=#what-it-is class=hash-link aria-label="Direct link to What It Is:" title="Direct link to What It Is:" translate=no>​</a></h3>
<p><strong>A benchmark dataset from 6 major U.S. cities specifically designed for meeting summarization</strong></p>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=urls>URLs:<a href=#urls class=hash-link aria-label="Direct link to URLs:" title="Direct link to URLs:" translate=no>​</a></h3>
<ul>
<li class=""><strong>HuggingFace (text)</strong>: <a href=https://huggingface.co/datasets/huuuyeah/meetingbank target=_blank rel="noopener noreferrer" class="">https://huggingface.co/datasets/huuuyeah/meetingbank</a></li>
<li class=""><strong>HuggingFace (audio)</strong>: <a href=https://huggingface.co/datasets/huuuyeah/MeetingBank_Audio target=_blank rel="noopener noreferrer" class="">https://huggingface.co/datasets/huuuyeah/MeetingBank_Audio</a></li>
<li class=""><strong>Zenodo (all files)</strong>: <a href=https://zenodo.org/record/7989108 target=_blank rel="noopener noreferrer" class="">https://zenodo.org/record/7989108</a></li>
<li class=""><strong>Archive.org (videos)</strong>:<!-- -->
<ul>
<li class=""><a href=https://archive.org/details/meetingbank-alameda target=_blank rel="noopener noreferrer" class="">https://archive.org/details/meetingbank-alameda</a></li>
<li class=""><a href=https://archive.org/details/meetingbank-boston target=_blank rel="noopener noreferrer" class="">https://archive.org/details/meetingbank-boston</a></li>
<li class=""><a href=https://archive.org/details/meetingbank-denver target=_blank rel="noopener noreferrer" class="">https://archive.org/details/meetingbank-denver</a></li>
<li class=""><a href=https://archive.org/details/meetingbank-long-beach target=_blank rel="noopener noreferrer" class="">https://archive.org/details/meetingbank-long-beach</a></li>
<li class=""><a href=https://archive.org/details/meetingbank-king-county target=_blank rel="noopener noreferrer" class="">https://archive.org/details/meetingbank-king-county</a></li>
<li class=""><a href=https://archive.org/details/meetingbank-seattle target=_blank rel="noopener noreferrer" class="">https://archive.org/details/meetingbank-seattle</a></li>
</ul>
</li>
</ul>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=what-you-get>What You Get:<a href=#what-you-get class=hash-link aria-label="Direct link to What You Get:" title="Direct link to What You Get:" translate=no>​</a></h3>
<p>βœ… <strong>1,366 city council meetings</strong> from 6 cities:</p>
<ul>
<li class="">Alameda, CA</li>
<li class="">Boston, MA</li>
<li class="">Denver, CO</li>
<li class="">King County, WA</li>
<li class="">Long Beach, CA</li>
<li class="">Seattle, WA</li>
</ul>
<p>βœ… <strong>3,579 hours of video</strong></p>
<p>βœ… <strong>Full transcripts</strong> (average 28,000 tokens per meeting)</p>
<p>βœ… <strong>PDF meeting minutes & agendas</strong></p>
<p>βœ… <strong>Human-written summaries</strong> (ground truth for evaluation)</p>
<p>βœ… <strong>Machine-generated summaries</strong> (from 6 different systems)</p>
<p>βœ… <strong>6,892 segment-level summarization instances</strong> for training</p>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=why-this-is-perfect-for-your-project>Why This Is PERFECT for Your Project:<a href=#why-this-is-perfect-for-your-project class=hash-link aria-label="Direct link to Why This Is PERFECT for Your Project:" title="Direct link to Why This Is PERFECT for Your Project:" translate=no>​</a></h3>
<ol>
<li class="">
<p><strong>Immediate prototyping</strong>: Download from HuggingFace in 5 minutes</p>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> datasets </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> load_dataset</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">meetingbank </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> load_dataset</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>"huuuyeah/meetingbank"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> instance </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> meetingbank</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'train'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">instance</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'id'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">instance</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'summary'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">instance</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'transcript'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>)</span><br/></div></code></pre></div></div>
</li>
<li class="">
<p><strong>Quality validation</strong>: Compare your AI summarization against human-written summaries</p>
</li>
<li class="">
<p><strong>URL discovery</strong>: Each meeting has source URLs to city websites</p>
</li>
<li class="">
<p><strong>Benchmark your oral health keyword detection</strong>: Test against 1,366 real transcripts</p>
</li>
<li class="">
<p><strong>Training data</strong>: If you want to fine-tune models for oral health policy</p>
</li>
</ol>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=paper>Paper:<a href=#paper class=hash-link aria-label="Direct link to Paper:" title="Direct link to Paper:" translate=no>​</a></h3>
<p>"MeetingBank: A Benchmark Dataset for Meeting Summarization"<br/>
<!-- -->ACL 2023 (Association for Computational Linguistics)<br/>
<a href=https://arxiv.org/abs/2305.17529 target=_blank rel="noopener noreferrer" class="">https://arxiv.org/abs/2305.17529</a></p>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=-action-plan>🎯 ACTION PLAN:<a href=#-action-plan class=hash-link aria-label="Direct link to 🎯 ACTION PLAN:" title="Direct link to 🎯 ACTION PLAN:" translate=no>​</a></h3>
<div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain"># 1. Install HuggingFace datasets</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">pip install datasets</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># 2. Download MeetingBank</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">python -c "</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">from datasets import load_dataset</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">meetingbank = load_dataset('huuuyeah/meetingbank')</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">print(f'Loaded {len(meetingbank['train'])} training instances')</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">"</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># 3. Create discovery/meetingbank_ingestion.py</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># - Parse meetings</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># - Extract URLs</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># - Load to Bronze layer</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># - Run keyword detection on transcripts</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># - Evaluate against human summaries</span><br/></div></code></pre></div></div>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=expected-roi>Expected ROI:<a href=#expected-roi class=hash-link aria-label="Direct link to Expected ROI:" title="Direct link to Expected ROI:" translate=no>​</a></h3>
<ul>
<li class=""><strong>Time</strong>: 2 hours to integrate</li>
<li class=""><strong>Value</strong>: 1,366 meetings with transcripts + summaries + URLs</li>
<li class=""><strong>Quality</strong>: Academic benchmark (peer-reviewed, ACL published)</li>
<li class=""><strong>Coverage</strong>: 6 major cities (all large, high-value for advocacy)</li>
</ul>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=2-localview--already-covered>2. LocalView βœ… (Already Covered)<a href=#2-localview--already-covered class=hash-link aria-label="Direct link to 2. LocalView βœ… (Already Covered)" title="Direct link to 2. LocalView βœ… (Already Covered)" translate=no>​</a></h2>
<p><strong>Status</strong>: Already identified in previous investigation<br/>
<strong>Location</strong>: Harvard Dataverse (doi:10.7910/DVN/NJTBEM)<br/>
<strong>Coverage</strong>: 1,000-10,000 jurisdictions<br/>
<strong>Action</strong>: Download from Harvard (already documented)</p>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=3-council-data-project--already-covered>3. Council Data Project βœ… (Already Covered)<a href=#3-council-data-project--already-covered class=hash-link aria-label="Direct link to 3. Council Data Project βœ… (Already Covered)" title="Direct link to 3. Council Data Project βœ… (Already Covered)" translate=no>​</a></h2>
<p><strong>Status</strong>: Already integrated in <a class="" href=/docs/discovery/external_url_datasets.py><code>external_url_datasets.py</code></a><br/>
<strong>Coverage</strong>: 20+ cities with full pipelines<br/>
<strong>Action</strong>: Already coded, just run the script</p>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=4-civicband-️-limited-usefulness>4. CivicBand ⚠️ (Limited Usefulness)<a href=#4-civicband-️-limited-usefulness class=hash-link aria-label="Direct link to 4. CivicBand ⚠️ (Limited Usefulness)" title="Direct link to 4. CivicBand ⚠️ (Limited Usefulness)" translate=no>​</a></h2>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=what-it-is-1>What It Is:<a href=#what-it-is-1 class=hash-link aria-label="Direct link to What It Is:" title="Direct link to What It Is:" translate=no>​</a></h3>
<p>"Largest public collection of civic meeting and election finance data"<br/>
<!-- -->Website: <a href=https://civic.band/ target=_blank rel="noopener noreferrer" class="">https://civic.band/</a></p>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=what-exists>What Exists:<a href=#what-exists class=hash-link aria-label="Direct link to What Exists:" title="Direct link to What Exists:" translate=no>​</a></h3>
<p>βœ… <strong>1,031 municipalities tracked</strong><br/>
<!-- -->βœ… Millions of pages scraped (meeting minutes, agendas)<br/>
<!-- -->βœ… Search interface available<br/>
<!-- -->βœ… Publicly browsable</p>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=the-problem>The Problem:<a href=#the-problem class=hash-link aria-label="Direct link to The Problem:" title="Direct link to The Problem:" translate=no>​</a></h3>
<p>❌ <strong>"Dataset access is via their platform; raw dumps require coordination"</strong></p>
<ul>
<li class="">Can't directly download bulk URL list</li>
<li class="">Would need to contact founder (Philip James: <a href=mailto:hello@civic.band target=_blank rel="noopener noreferrer" class="">hello@civic.band</a>)</li>
<li class="">Or scrape the municipality list from their website</li>
</ul>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=what-you-can-get>What You CAN Get:<a href=#what-you-can-get class=hash-link aria-label="Direct link to What You CAN Get:" title="Direct link to What You CAN Get:" translate=no>​</a></h3>
<p>The list of 1,031 municipalities is publicly visible on their site. You could:</p>
<ol>
<li class=""><strong>Scrape the municipality list</strong> (city names + states)</li>
<li class=""><strong>Match against your Census data</strong> to get FIPS codes</li>
<li class=""><strong>Use as verification</strong> (these 1,031 are confirmed to have meeting data)</li>
</ol>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=limited-value-because>Limited Value Because:<a href=#limited-value-because class=hash-link aria-label="Direct link to Limited Value Because:" title="Direct link to Limited Value Because:" translate=no>​</a></h3>
<ul>
<li class="">Can't get direct URLs (need to coordinate with founder)</li>
<li class="">Already have larger coverage from LocalView (1,000-10,000 jurisdictions)</li>
<li class="">Already have premium coverage from CDP (20 cities)</li>
<li class="">CivicBand's main value is their <em>content</em> (scraped minutes), not URLs</li>
</ul>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=possible-action>Possible Action:<a href=#possible-action class=hash-link aria-label="Direct link to Possible Action:" title="Direct link to Possible Action:" translate=no>​</a></h3>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># Scrape CivicBand's municipality list</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> requests</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> bs4 </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> BeautifulSoup</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">response </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> requests</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">get</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>"https://civic.band/"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">soup </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> BeautifulSoup</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">response</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">text</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'html.parser'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Parse the table of municipalities</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Match against Census data</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Use as validation list</span><br/></div></code></pre></div></div>
<p><strong>Estimated value</strong>: MEDIUM (validation only, not bulk URLs)</p>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-revised-priority-ranking>πŸ“Š Revised Priority Ranking<a href=#-revised-priority-ranking class=hash-link aria-label="Direct link to πŸ“Š Revised Priority Ranking" title="Direct link to πŸ“Š Revised Priority Ranking" translate=no>​</a></h2>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=immediate-do-this-week>IMMEDIATE (Do This Week):<a href=#immediate-do-this-week class=hash-link aria-label="Direct link to IMMEDIATE (Do This Week):" title="Direct link to IMMEDIATE (Do This Week):" translate=no>​</a></h3>
<ol>
<li class="">πŸ”₯ <strong>Download MeetingBank</strong> (2 hours)<!-- -->
<ul>
<li class="">HuggingFace dataset ready to use</li>
<li class="">1,366 meetings with transcripts, summaries, URLs</li>
<li class="">Perfect for prototyping and evaluation</li>
</ul>
</li>
</ol>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=high-priority-do-this-month>HIGH PRIORITY (Do This Month):<a href=#high-priority-do-this-month class=hash-link aria-label="Direct link to HIGH PRIORITY (Do This Month):" title="Direct link to HIGH PRIORITY (Do This Month):" translate=no>​</a></h3>
<ol start=2>
<li class="">
<p>βœ… <strong>Download LocalView</strong> (1 day)</p>
<ul>
<li class="">Harvard Dataverse</li>
<li class="">1,000-10,000 jurisdictions</li>
</ul>
</li>
<li class="">
<p>βœ… <strong>Run CDP integration</strong> (2 hours)</p>
<ul>
<li class="">Already coded</li>
<li class="">20 premium cities</li>
</ul>
</li>
</ol>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=medium-priority-optional>MEDIUM PRIORITY (Optional):<a href=#medium-priority-optional class=hash-link aria-label="Direct link to MEDIUM PRIORITY (Optional):" title="Direct link to MEDIUM PRIORITY (Optional):" translate=no>​</a></h3>
<ol start=4>
<li class="">⚠️ <strong>Scrape CivicBand list</strong> (4 hours)<!-- -->
<ul>
<li class="">1,031 municipality names</li>
<li class="">Use for validation</li>
<li class="">Or contact founder for bulk access</li>
</ul>
</li>
</ol>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-updated-integration-code>🎯 Updated Integration Code<a href=#-updated-integration-code class=hash-link aria-label="Direct link to 🎯 Updated Integration Code" title="Direct link to 🎯 Updated Integration Code" translate=no>​</a></h2>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=add-meetingbank-to-your-pipeline>Add MeetingBank to your pipeline:<a href=#add-meetingbank-to-your-pipeline class=hash-link aria-label="Direct link to Add MeetingBank to your pipeline:" title="Direct link to Add MeetingBank to your pipeline:" translate=no>​</a></h3>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># discovery/meetingbank_ingestion.py</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> datasets </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> load_dataset</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> pyspark</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">sql </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> SparkSession</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> loguru </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> logger</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>def</span><span class="token plain"> </span><span class="token function" style=color:#d73a49>load_meetingbank_to_bronze</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">spark</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> SparkSession</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>-</span><span class="token operator" style=color:#393A34>></span><span class="token plain"> </span><span class="token builtin">dict</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> Load MeetingBank dataset to Bronze layer.</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> </span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> MeetingBank contains 1,366 city council meetings from 6 major cities</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> with full transcripts, summaries, and source URLs.</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> """</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> logger</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">info</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>"Loading MeetingBank dataset from HuggingFace"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Download from HuggingFace</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> meetingbank </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> load_dataset</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>"huuuyeah/meetingbank"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> meetings </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> split </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'train'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'validation'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'test'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> instance </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> meetingbank</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">split</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> meetings</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">append</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>{</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"meeting_id"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> instance</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'id'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"jurisdiction_name"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> instance</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">get</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'city'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'Unknown'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"state_code"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> instance</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">get</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'state'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'Unknown'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"transcript"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> instance</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'transcript'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"summary_human"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> instance</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'summary'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"source_url"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> instance</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">get</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'url'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>''</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"date"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> instance</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">get</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'date'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>''</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"has_transcript"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>True</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"has_summary"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>True</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"has_url"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">bool</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">instance</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">get</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'url'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"transcript_length"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">len</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">instance</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'transcript'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"source"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>"meetingbank"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>}</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Convert to DataFrame</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> df </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> spark</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">createDataFrame</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">meetings</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Write to Bronze layer</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> output_path </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token string-interpolation string" style=color:#e3116c>f"</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation">settings</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>.</span><span class="token string-interpolation interpolation">delta_lake_path</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>/bronze/meetingbank_meetings"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> df</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">write \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>.</span><span class="token builtin">format</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>"delta"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">mode</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>"overwrite"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> \</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">save</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">output_path</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> logger</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">info</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"βœ… Loaded </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation builtin">len</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>(</span><span class="token string-interpolation interpolation">meetings</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>)</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c> meetings from MeetingBank"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>return</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>{</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"total_meetings"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">len</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">meetings</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"cities"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>6</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"source"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>"meetingbank"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>}</span><br/></div></code></pre></div></div>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=test-your-keyword-detection>Test your keyword detection:<a href=#test-your-keyword-detection class=hash-link aria-label="Direct link to Test your keyword detection:" title="Direct link to Test your keyword detection:" translate=no>​</a></h3>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># Test keyword detection on MeetingBank transcripts</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> datasets </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> load_dataset</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> alerts</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">keyword_monitor </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> KeywordAlertSystem</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">meetingbank </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> load_dataset</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>"huuuyeah/meetingbank"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">alert_system </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> KeywordAlertSystem</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Test on first 10 meetings</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> instance </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> meetingbank</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'train'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>[</span><span class="token punctuation" style=color:#393A34>:</span><span class="token number" style=color:#36acaa>10</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> matches </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> alert_system</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">_find_keywords_in_text</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> instance</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'transcript'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> alert_system</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">KEYWORD_CATEGORIES</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>if</span><span class="token plain"> matches</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"Meeting </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation">instance</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>[</span><span class="token string-interpolation interpolation string" style=color:#e3116c>'id'</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>]</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>: </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation builtin">len</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>(</span><span class="token string-interpolation interpolation">matches</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>)</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c> oral health keywords found"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>match</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> matches</span><span class="token punctuation" style=color:#393A34>[</span><span class="token punctuation" style=color:#393A34>:</span><span class="token number" style=color:#36acaa>3</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Show first 3</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f" - </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation keyword" style=color:#00009f>match</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>.</span><span class="token string-interpolation interpolation">keyword</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c> (</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation keyword" style=color:#00009f>match</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>.</span><span class="token string-interpolation interpolation">category</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>)"</span><span class="token punctuation" style=color:#393A34>)</span><br/></div></code></pre></div></div>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=evaluate-your-ai-summarization>Evaluate your AI summarization:<a href=#evaluate-your-ai-summarization class=hash-link aria-label="Direct link to Evaluate your AI summarization:" title="Direct link to Evaluate your AI summarization:" translate=no>​</a></h3>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># Compare your summaries against human-written ground truth</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> extraction</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">summarizer </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> MeetingSummarizer</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> datasets </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> load_dataset</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">summarizer </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> MeetingSummarizer</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">meetingbank </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> load_dataset</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>"huuuyeah/meetingbank"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> instance </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> meetingbank</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'test'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>[</span><span class="token punctuation" style=color:#393A34>:</span><span class="token number" style=color:#36acaa>10</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Generate your summary</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> your_summary </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> summarizer</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">summarize</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> event</span><span class="token operator" style=color:#393A34>=</span><span class="token boolean" style=color:#36acaa>None</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Create MeetingEvent from instance</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> full_text</span><span class="token operator" style=color:#393A34>=</span><span class="token plain">instance</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'transcript'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> focus_on_health</span><span class="token operator" style=color:#393A34>=</span><span class="token boolean" style=color:#36acaa>False</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Compare against human summary</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> human_summary </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> instance</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'summary'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"Meeting: </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation">instance</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>[</span><span class="token string-interpolation interpolation string" style=color:#e3116c>'id'</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>]</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"Your summary: </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation">your_summary</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>.</span><span class="token string-interpolation interpolation">executive_summary</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"Human summary: </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation">human_summary</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"Quality: </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation">your_summary</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>.</span><span class="token string-interpolation interpolation">confidence_score</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>print</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><br/></div></code></pre></div></div>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-expected-outcomes>πŸ“ˆ Expected Outcomes<a href=#-expected-outcomes class=hash-link aria-label="Direct link to πŸ“ˆ Expected Outcomes" title="Direct link to πŸ“ˆ Expected Outcomes" translate=no>​</a></h2>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=before-meetingbank>Before MeetingBank:<a href=#before-meetingbank class=hash-link aria-label="Direct link to Before MeetingBank:" title="Direct link to Before MeetingBank:" translate=no>​</a></h3>
<ul>
<li class="">76 URLs discovered (15% match rate)</li>
<li class="">No evaluation benchmark</li>
<li class="">No ground truth for summarization</li>
</ul>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=after-meetingbank>After MeetingBank:<a href=#after-meetingbank class=hash-link aria-label="Direct link to After MeetingBank:" title="Direct link to After MeetingBank:" translate=no>​</a></h3>
<ul>
<li class=""><strong>+1,366 meetings</strong> with transcripts</li>
<li class=""><strong>+6 major cities</strong> with verified URLs</li>
<li class=""><strong>Academic benchmark</strong> for evaluation</li>
<li class=""><strong>Human summaries</strong> for quality validation</li>
<li class=""><strong>Total meetings</strong>: 1,366 ready to analyze immediately</li>
</ul>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=-final-recommendation>πŸš€ Final Recommendation<a href=#-final-recommendation class=hash-link aria-label="Direct link to πŸš€ Final Recommendation" title="Direct link to πŸš€ Final Recommendation" translate=no>​</a></h2>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=do-this-first-2-hours>DO THIS FIRST (2 hours):<a href=#do-this-first-2-hours class=hash-link aria-label="Direct link to DO THIS FIRST (2 hours):" title="Direct link to DO THIS FIRST (2 hours):" translate=no>​</a></h3>
<div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain"># 1. Install HuggingFace datasets</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">pip install datasets</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># 2. Download MeetingBank</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">python -c "</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">from datasets import load_dataset</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">meetingbank = load_dataset('huuuyeah/meetingbank')</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">print(f'βœ… Downloaded {len(meetingbank[\"train\"])} meetings')</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">"</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># 3. Create integration script</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># See code example above</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># 4. Test your keyword detection</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># See test code above</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># 5. Evaluate your summarization</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"># See evaluation code above</span><br/></div></code></pre></div></div>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=expected-result>Expected Result:<a href=#expected-result class=hash-link aria-label="Direct link to Expected Result:" title="Direct link to Expected Result:" translate=no>​</a></h3>
<ul>
<li class=""><strong>Immediate access</strong> to 1,366 meetings</li>
<li class=""><strong>6 major cities</strong> for prototyping</li>
<li class=""><strong>Academic quality</strong> benchmark</li>
<li class=""><strong>Proven ROI</strong>: Published in top NLP conference (ACL 2023)</li>
</ul>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=summary-table>Summary Table<a href=#summary-table class=hash-link aria-label="Direct link to Summary Table" title="Direct link to Summary Table" translate=no>​</a></h2>
<table><thead><tr><th>Dataset<th>Available?<th>Download Time<th>Meetings<th>Usefulness<tbody><tr><td><strong>MeetingBank</strong><td>βœ… <strong>YES</strong> (HuggingFace)<td><strong>5 minutes</strong><td><strong>1,366</strong><td>πŸ”₯ <strong>VERY HIGH</strong><tr><td><strong>LocalView</strong><td>βœ… YES (Harvard)<td>1 day<td>1,000-10,000<td>πŸ”₯ VERY HIGH<tr><td><strong>CDP</strong><td>βœ… YES (already coded)<td>2 hours<td>20 cities<td>πŸ”₯ HIGH<tr><td><strong>CivicBand</strong><td>⚠️ PARTIAL (need coordination)<td>4 hours<td>1,031 list<td>🟑 MEDIUM</table>
<p><strong>Bottom line</strong>: MeetingBank is the fastest win! Download it today and start testing your summarization and keyword detection on real city council meeting transcripts.</div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="row margin-top--sm theme-doc-footer-edit-meta-row"><div class="col noPrint_WFHX"><a href=https://github.com/getcommunityone/open-navigator-for-engagement/tree/main/website/docs/data-sources/huggingface-datasets.md target=_blank rel="noopener noreferrer" class=theme-edit-this-page><svg fill=currentColor height=20 width=20 viewBox="0 0 40 40" class=iconEdit_Z9Sw aria-hidden=true><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"/></g></svg>Edit this page</a></div><div class="col lastUpdated_JAkA"></div></div></footer></article><nav class="docusaurus-mt-lg pagination-nav" aria-label="Docs pages"><a class="pagination-nav__link pagination-nav__link--prev" href=/docs/data-sources/census-data><div class=pagination-nav__sublabel>Previous</div><div class=pagination-nav__label>Census Bureau Data URL Fix</div></a><a class="pagination-nav__link pagination-nav__link--next" href=/docs/data-sources/url-datasets><div class=pagination-nav__sublabel>Next</div><div class=pagination-nav__label>🎯 ANSWER: Yes, You Should Look at Those Datasets!</div></a></nav></div></div><div class="col col--3"><div class="tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href=#quick-answer-yes-2-of-4-will-help-significantly class="table-of-contents__link toc-highlight">Quick Answer: YES, 2 of 4 will help significantly!</a><li><a href=#1-meetingbank--new-use-this class="table-of-contents__link toc-highlight">1. MeetingBank πŸ”₯ (NEW! USE THIS!)</a><ul><li><a href=#what-it-is class="table-of-contents__link toc-highlight">What It Is:</a><li><a href=#urls class="table-of-contents__link toc-highlight">URLs:</a><li><a href=#what-you-get class="table-of-contents__link toc-highlight">What You Get:</a><li><a href=#why-this-is-perfect-for-your-project class="table-of-contents__link toc-highlight">Why This Is PERFECT for Your Project:</a><li><a href=#paper class="table-of-contents__link toc-highlight">Paper:</a><li><a href=#-action-plan class="table-of-contents__link toc-highlight">🎯 ACTION PLAN:</a><li><a href=#expected-roi class="table-of-contents__link toc-highlight">Expected ROI:</a></ul><li><a href=#2-localview--already-covered class="table-of-contents__link toc-highlight">2. LocalView βœ… (Already Covered)</a><li><a href=#3-council-data-project--already-covered class="table-of-contents__link toc-highlight">3. Council Data Project βœ… (Already Covered)</a><li><a href=#4-civicband-️-limited-usefulness class="table-of-contents__link toc-highlight">4. CivicBand ⚠️ (Limited Usefulness)</a><ul><li><a href=#what-it-is-1 class="table-of-contents__link toc-highlight">What It Is:</a><li><a href=#what-exists class="table-of-contents__link toc-highlight">What Exists:</a><li><a href=#the-problem class="table-of-contents__link toc-highlight">The Problem:</a><li><a href=#what-you-can-get class="table-of-contents__link toc-highlight">What You CAN Get:</a><li><a href=#limited-value-because class="table-of-contents__link toc-highlight">Limited Value Because:</a><li><a href=#possible-action class="table-of-contents__link toc-highlight">Possible Action:</a></ul><li><a href=#-revised-priority-ranking class="table-of-contents__link toc-highlight">πŸ“Š Revised Priority Ranking</a><ul><li><a href=#immediate-do-this-week class="table-of-contents__link toc-highlight">IMMEDIATE (Do This Week):</a><li><a href=#high-priority-do-this-month class="table-of-contents__link toc-highlight">HIGH PRIORITY (Do This Month):</a><li><a href=#medium-priority-optional class="table-of-contents__link toc-highlight">MEDIUM PRIORITY (Optional):</a></ul><li><a href=#-updated-integration-code class="table-of-contents__link toc-highlight">🎯 Updated Integration Code</a><ul><li><a href=#add-meetingbank-to-your-pipeline class="table-of-contents__link toc-highlight">Add MeetingBank to your pipeline:</a><li><a href=#test-your-keyword-detection class="table-of-contents__link toc-highlight">Test your keyword detection:</a><li><a href=#evaluate-your-ai-summarization class="table-of-contents__link toc-highlight">Evaluate your AI summarization:</a></ul><li><a href=#-expected-outcomes class="table-of-contents__link toc-highlight">πŸ“ˆ Expected Outcomes</a><ul><li><a href=#before-meetingbank class="table-of-contents__link toc-highlight">Before MeetingBank:</a><li><a href=#after-meetingbank class="table-of-contents__link toc-highlight">After MeetingBank:</a></ul><li><a href=#-final-recommendation class="table-of-contents__link toc-highlight">πŸš€ Final Recommendation</a><ul><li><a href=#do-this-first-2-hours class="table-of-contents__link toc-highlight">DO THIS FIRST (2 hours):</a><li><a href=#expected-result class="table-of-contents__link toc-highlight">Expected Result:</a></ul><li><a href=#summary-table class="table-of-contents__link toc-highlight">Summary Table</a></ul></div></div></div></div></main></div></div></div><footer class="theme-layout-footer footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="theme-layout-footer-column col footer__col"><div class=footer__title>Documentation</div><ul class="footer__items clean-list"><li class=footer__item><a class=footer__link-item href=/docs/intro>Getting Started</a><li class=footer__item><a class=footer__link-item href=/docs/data-sources/citations>Citations & Data Sources</a><li class=footer__item><a class=footer__link-item href=/docs/data-sources/overview>Data Sources</a><li class=footer__item><a class=footer__link-item href=/docs/for-developers>For Developers</a></ul></div><div class="theme-layout-footer-column col footer__col"><div class=footer__title>Resources</div><ul class="footer__items clean-list"><li class=footer__item><a href=https://www.communityone.com target=_blank rel="noopener noreferrer" class=footer__link-item>Launch Open Navigator<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://github.com/getcommunityone/open-navigator-for-engagement target=_blank rel="noopener noreferrer" class=footer__link-item>GitHub<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://www.groundvue.org/ target=_blank rel="noopener noreferrer" class=footer__link-item>GroundVue (Partner)<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a></ul></div><div class="theme-layout-footer-column col footer__col"><div class=footer__title>Community</div><ul class="footer__items clean-list"><li class=footer__item><a href=https://www.instagram.com/getcommunityone/ target=_blank rel="noopener noreferrer" class=footer__link-item>Instagram<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://www.facebook.com/getcommunityone target=_blank rel="noopener noreferrer" class=footer__link-item>Facebook<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://x.com/getcommunityone/ target=_blank rel="noopener noreferrer" class=footer__link-item>X (Twitter)<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://www.linkedin.com/company/getcommunityone target=_blank rel="noopener noreferrer" class=footer__link-item>LinkedIn<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://www.youtube.com/@getcommunityone target=_blank rel="noopener noreferrer" class=footer__link-item>YouTube<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://discord.gg/uH6Dytek target=_blank rel="noopener noreferrer" class=footer__link-item>Discord<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a></ul></div><div class="theme-layout-footer-column col footer__col"><div class=footer__title>Legal</div><ul class="footer__items clean-list"><li class=footer__item><a class=footer__link-item href=/docs/legal/privacy-policy>Privacy Policy</a><li class=footer__item><a class=footer__link-item href=/docs/legal/terms-of-service>Terms of Service</a><li class=footer__item><a class=footer__link-item href=/docs/legal/data-provider-terms>Data Provider Terms</a></ul></div><div class="theme-layout-footer-column col footer__col"><div class=footer__title>More</div><ul class="footer__items clean-list"><li class=footer__item><a class=footer__link-item href=/blog>Blog</a><li class=footer__item><a href=https://github.com/getcommunityone/open-navigator-for-engagement/blob/main/LICENSE target=_blank rel="noopener noreferrer" class=footer__link-item>License (MIT)<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a></ul></div></div><div class="footer__bottom text--center"><div class=footer__copyright>Copyright Β© 2026 Community One. Built with Docusaurus.</div></div></div></footer></div></body>