Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
| <html lang=en dir=ltr class="docs-wrapper plugin-docs plugin-id-default docs-version-current docs-doc-page docs-doc-id-integrations/overview" data-has-hydrated=false><head><meta charset=UTF-8><meta name=generator content="Docusaurus v3.10.0"><title data-rh=true>Integration Guide: Reusing Open-Source Municipal Scraping Logic | Open Navigator</title><meta data-rh=true name=viewport content="width=device-width, initial-scale=1.0"/><meta data-rh=true property=og:image content=https://www.communityone.com/img/docusaurus-social-card.jpg /><meta data-rh=true name=twitter:image content=https://www.communityone.com/img/docusaurus-social-card.jpg /><meta data-rh=true property=og:url content=https://www.communityone.com/docs/integrations/overview /><meta data-rh=true property=og:locale content=en /><meta data-rh=true name=docusaurus_locale content=en /><meta data-rh=true name=docsearch:language content=en /><meta data-rh=true name=keywords content="civic engagement, policy tracking, meeting minutes, nonprofit tracking, municipal government, advocacy, open data, local government"/><meta data-rh=true property=og:type content=website /><meta data-rh=true property=og:site_name content="Open Navigator"/><meta data-rh=true name=twitter:card content=summary_large_image /><meta data-rh=true name=docusaurus_version content=current /><meta data-rh=true name=docusaurus_tag content=docs-default-current /><meta data-rh=true name=docsearch:version content=current /><meta data-rh=true name=docsearch:docusaurus_tag content=docs-default-current /><meta data-rh=true property=og:title content="Integration Guide: Reusing Open-Source Municipal Scraping Logic | Open Navigator"/><meta data-rh=true name=description content=Overview /><meta data-rh=true property=og:description content=Overview /><link data-rh=true rel=icon href=/img/favicon.ico /><link data-rh=true rel=canonical href=https://www.communityone.com/docs/integrations/overview /><link data-rh=true rel=alternate href=https://www.communityone.com/docs/integrations/overview hreflang=en /><link data-rh=true rel=alternate href=https://www.communityone.com/docs/integrations/overview hreflang=x-default /><script data-rh=true type=application/ld+json>{"@context":"https://schema.org","@type":"BreadcrumbList","itemListElement":[{"@type":"ListItem","item":"https://www.communityone.com/docs/for-developers","name":"Developers & Technical Users","position":1},{"@type":"ListItem","item":"https://www.communityone.com/docs/integrations/overview","name":"Integration Guide: Reusing Open-Source Municipal Scraping Logic","position":2}]}</script><link rel=alternate type=application/rss+xml href=/blog/rss.xml title="Open Navigator RSS Feed"><link rel=alternate type=application/atom+xml href=/blog/atom.xml title="Open Navigator Atom Feed"><link rel=preconnect href=https://www.google-analytics.com><link rel=preconnect href=https://www.googletagmanager.com><script async src="https://www.googletagmanager.com/gtag/js?id=G-5EQV815915"></script><script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","G-5EQV815915",{anonymize_ip:!0})</script><link rel=stylesheet href=/assets/css/styles.c89d6b2d.css /><script src=/assets/js/runtime~main.c8fa085e.js defer></script><script src=/assets/js/main.6e24e536.js defer></script></head><body><svg style="display: none;"><defs> | |
| <symbol id=theme-svg-external-link viewBox="0 0 24 24"><path fill=currentColor d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"/></symbol> | |
| </defs></svg> | |
| <script>!function(){var t=function(){try{return new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}}()||function(){try{return window.localStorage.getItem("theme-7e9")}catch(t){}}();document.documentElement.setAttribute("data-theme",t||(window.matchMedia("(prefers-color-scheme: dark)").matches?"dark":"light")),document.documentElement.setAttribute("data-theme-choice",t||"system")}(),function(){try{for(var[t,e]of new URLSearchParams(window.location.search).entries())if(t.startsWith("docusaurus-data-")){var a=t.replace("docusaurus-data-","data-");document.documentElement.setAttribute(a,e)}}catch(t){}}()</script><div id=__docusaurus><link rel=preload as=image href=/img/communityone_logo.svg /><script type=application/ld+json>{"@context":"https://schema.org","@type":"Organization","address":{"@type":"PostalAddress","addressCountry":"US","addressLocality":"Tuscaloosa","addressRegion":"AL","postalCode":"35406","streetAddress":"5617 Lakeridge Court"},"contactPoint":{"@type":"ContactPoint","availableLanguage":["English"],"contactType":"Customer Service","email":"johnbowyer@communityone.com"},"description":"Track 90,000+ jurisdictions, 1.8M nonprofits, and analyze meeting minutes with AI. The open path to everything local.","email":"johnbowyer@communityone.com","legalName":"CommunityOne","logo":"https://www.communityone.com/img/communityone_logo.svg","name":"CommunityOne","sameAs":["https://www.facebook.com/communityone","https://www.instagram.com/communityone","https://twitter.com/communityone","https://www.linkedin.com/company/communityone","https://www.youtube.com/@communityone","https://discord.gg/communityone","https://github.com/getcommunityone/open-navigator"],"url":"https://www.communityone.com"}</script><script type=application/ld+json>{"@context":"https://schema.org","@type":"WebSite","alternateName":"CommunityOne Open Navigator","description":"AI-powered civic engagement platform tracking jurisdictions, nonprofits, and government meetings","name":"Open Navigator","potentialAction":{"@type":"SearchAction","query-input":"required name=search_term_string","target":{"@type":"EntryPoint","urlTemplate":"https://www.communityone.com/search?q={search_term_string}"}},"url":"https://www.communityone.com"}</script><script type=application/ld+json>{"@context":"https://schema.org","@type":"SoftwareApplication","aggregateRating":{"@type":"AggregateRating","ratingCount":"1","ratingValue":"5"},"applicationCategory":"BusinessApplication","description":"Track 90,000+ jurisdictions, 1.8M nonprofits, and analyze meeting minutes with AI","featureList":["Track 90,000+ jurisdictions","Monitor 1.8M nonprofits","Analyze meeting minutes","Legislative bill tracking","Campaign finance data"],"name":"Open Navigator","offers":{"@type":"Offer","price":"0","priceCurrency":"USD"},"operatingSystem":"Web","screenshot":"https://www.communityone.com/img/docusaurus-social-card.jpg","softwareVersion":"1.0.0"}</script><div role=region aria-label="Skip to main content"><a class=skipToContent_fXgn href=#__docusaurus_skipToContent_fallback>Skip to main content</a></div><nav aria-label=Main class="theme-layout-navbar navbar navbar--fixed-top"><div class=navbar__inner><div class="theme-layout-navbar-left navbar__items"><button aria-label="Toggle navigation bar" aria-expanded=false class="navbar__toggle clean-btn" type=button><svg width=30 height=30 viewBox="0 0 30 30" aria-hidden=true><path stroke=currentColor stroke-linecap=round stroke-miterlimit=10 stroke-width=2 d="M4 7h22M4 15h22M4 23h22"/></svg></button><a href=https://www.communityone.com target=_self rel="noopener noreferrer" class=navbar__brand><div class=navbar__logo><img src=/img/communityone_logo.svg alt="CommunityOne Logo" class="themedComponent_mlkZ themedComponent--light_NVdE"/><img src=/img/communityone_logo.svg alt="CommunityOne Logo" class="themedComponent_mlkZ themedComponent--dark_xIcU"/></div><b class="navbar__title text--truncate">Open Navigator Home</b></a><a class="navbar__item navbar__link" href=/docs/intro>Getting Started</a><a class="navbar__item navbar__link" href=/docs/for-families>Families & Individuals</a><a class="navbar__item navbar__link" href=/docs/for-advocates>Policy Makers</a><a class="navbar__item navbar__link" href=/docs/for-developers>Developers</a><a class="navbar__item navbar__link" href=/docs/data-sources/citations>Data and Terms</a><a class="navbar__item navbar__link" href=/blog>Blog</a></div><div class="theme-layout-navbar-right navbar__items navbar__items--right"><a href=https://github.com/getcommunityone/open-navigator-for-engagement target=_blank rel="noopener noreferrer" class="navbar__item navbar__link">GitHub<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><div class="toggle_vylO colorModeToggle_DEke"><button class="clean-btn toggleButton_gllP toggleButtonDisabled_aARS" type=button disabled title="system mode" aria-label="Switch between dark and light mode (currently system mode)"><svg viewBox="0 0 24 24" width=24 height=24 aria-hidden=true class="toggleIcon_g3eP lightToggleIcon_pyhR"><path fill=currentColor d="M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"/></svg><svg viewBox="0 0 24 24" width=24 height=24 aria-hidden=true class="toggleIcon_g3eP darkToggleIcon_wfgR"><path fill=currentColor d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"/></svg><svg viewBox="0 0 24 24" width=24 height=24 aria-hidden=true class="toggleIcon_g3eP systemToggleIcon_QzmC"><path fill=currentColor d="m12 21c4.971 0 9-4.029 9-9s-4.029-9-9-9-9 4.029-9 9 4.029 9 9 9zm4.95-13.95c1.313 1.313 2.05 3.093 2.05 4.95s-0.738 3.637-2.05 4.95c-1.313 1.313-3.093 2.05-4.95 2.05v-14c1.857 0 3.637 0.737 4.95 2.05z"/></svg></button></div><div class=navbarSearchContainer_Bca1></div></div></div><div role=presentation class=navbar-sidebar__backdrop></div></nav><div id=__docusaurus_skipToContent_fallback class="theme-layout-main main-wrapper mainWrapper_z2l0"><div class=docsWrapper_hBAB><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type=button></button><div class=docRoot_UBD9><aside class="theme-doc-sidebar-container docSidebarContainer_YfHR"><div class=sidebarViewport_aRkj><div class=sidebar_njMd><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_SIkG"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=true href=/docs/intro><span title="Getting Started" class=categoryLinkLabel_W154>Getting Started</span></a></div><ul class=menu__list><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class=menu__link tabindex=0 href=/docs/intro><span title=Introduction class=linkLabel_WmDU>Introduction</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class=menu__link tabindex=0 href=/docs/open-navigator><span title="Open Navigator" class=linkLabel_WmDU>Open Navigator</span></a></ul><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist" href=/docs/for-families><span title="Families & Individuals" class=categoryLinkLabel_W154>Families & Individuals</span></a><button aria-label="Collapse sidebar category 'Families & Individuals'" aria-expanded=true type=button class="clean-btn menu__caret"></button></div><ul class=menu__list><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/families/community-events><span title="Resources for Families" class=categoryLinkLabel_W154>Resources for Families</span></a></div><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class=menu__link tabindex=0 href=/docs/open-navigator><span title="Getting Started with Open Navigator" class=linkLabel_WmDU>Getting Started with Open Navigator</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class=menu__link tabindex=0 href=/docs/data-sources/citations><span title="Data and Citations" class=linkLabel_WmDU>Data and Citations</span></a></ul><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist" href=/docs/for-advocates><span title="Policy Makers & Advocates" class=categoryLinkLabel_W154>Policy Makers & Advocates</span></a><button aria-label="Collapse sidebar category 'Policy Makers & Advocates'" aria-expanded=true type=button class="clean-btn menu__caret"></button></div><ul class=menu__list><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/data-sources/overview><span title="Understanding the Data" class=categoryLinkLabel_W154>Understanding the Data</span></a></div><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/guides/political-economy><span title="Analysis & Strategy" class=categoryLinkLabel_W154>Analysis & Strategy</span></a></div><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/case-studies/tuscaloosa-complete><span title="Real-World Examples" class=categoryLinkLabel_W154>Real-World Examples</span></a></div></ul><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--active" href=/docs/for-developers><span title="Developers & Technical Users" class=categoryLinkLabel_W154>Developers & Technical Users</span></a><button aria-label="Collapse sidebar category 'Developers & Technical Users'" aria-expanded=true type=button class="clean-btn menu__caret"></button></div><ul class=menu__list><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/quickstart><span title="Setup & Installation" class=categoryLinkLabel_W154>Setup & Installation</span></a></div><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/data-sources/citations><span title="Data Sources (Technical)" class=categoryLinkLabel_W154>Data Sources (Technical)</span></a></div><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/guides/jurisdiction-setup><span title="How-To Guides" class=categoryLinkLabel_W154>How-To Guides</span></a></div><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret menu__link--active" role=button aria-expanded=true tabindex=0 href=/docs/integrations/mcp-server><span title=Integrations class=categoryLinkLabel_W154>Integrations</span></a></div><ul class=menu__list><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/integrations/mcp-server><span title="Model Context Protocol (MCP) Server" class=linkLabel_WmDU>Model Context Protocol (MCP) Server</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/integrations/grants-gov-api><span title="Grants.gov API Integration" class=linkLabel_WmDU>Grants.gov API Integration</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/integrations/fec-political-contributions><span title="FEC Political Contributions" class=linkLabel_WmDU>FEC Political Contributions</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/integrations/fec-campaign-finance><span title="FEC Campaign Finance Integration" class=linkLabel_WmDU>FEC Campaign Finance Integration</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/integrations/dataverse-summary><span title="🎉 Harvard Dataverse Integration - Complete!" class=linkLabel_WmDU>🎉 Harvard Dataverse Integration - Complete!</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/integrations/dataverse><span title="📚 Dataverse API Integration" class=linkLabel_WmDU>📚 Dataverse API Integration</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/integrations/eboard-automated><span title="Automated eBoard Scraping Solutions" class=linkLabel_WmDU>Automated eBoard Scraping Solutions</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/integrations/eboard-cookies><span title="eBoard Cookie Extraction Guide" class=linkLabel_WmDU>eBoard Cookie Extraction Guide</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/integrations/eboard-manual><span title="eBoard Platform Manual Download Guide" class=linkLabel_WmDU>eBoard Platform Manual Download Guide</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/integrations/fec-integration-summary><span title="FEC Campaign Finance Integration - Implementation Summary" class=linkLabel_WmDU>FEC Campaign Finance Integration - Implementation Summary</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/integrations/frontend><span title="Frontend Integration Guide" class=linkLabel_WmDU>Frontend Integration Guide</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/integrations/localview><span title="📚 LocalView Integration Guide" class=linkLabel_WmDU>📚 LocalView Integration Guide</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link menu__link--active" aria-current=page tabindex=0 href=/docs/integrations/overview><span title="Integration Guide: Reusing Open-Source Municipal Scraping Logic" class=linkLabel_WmDU>Integration Guide: Reusing Open-Source Municipal Scraping Logic</span></a></ul><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/deployment/databricks-apps><span title=Deployment class=categoryLinkLabel_W154>Deployment</span></a></div><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/development/database-setup><span title=Development class=categoryLinkLabel_W154>Development</span></a></div></ul></ul></nav></div></div></aside><main class=docMainContainer_TBSr><div class="container padding-top--md padding-bottom--lg"><div class=row><div class="col docItemCol_VOVn"><div class=docItemContainer_Djhp><article><nav class="theme-doc-breadcrumbs breadcrumbsContainer_Z_bl" aria-label=Breadcrumbs><ul class=breadcrumbs><li class=breadcrumbs__item><a aria-label="Home page" class=breadcrumbs__link href=/><svg viewBox="0 0 24 24" class=breadcrumbHomeIcon_YNFT><path d="M10 19v-5h4v5c0 .55.45 1 1 1h3c.55 0 1-.45 1-1v-7h1.7c.46 0 .68-.57.33-.87L12.67 3.6c-.38-.34-.96-.34-1.34 0l-8.36 7.53c-.34.3-.13.87.33.87H5v7c0 .55.45 1 1 1h3c.55 0 1-.45 1-1z" fill=currentColor /></svg></a><li class=breadcrumbs__item><a class=breadcrumbs__link href=/docs/for-developers><span>Developers & Technical Users</span></a><li class=breadcrumbs__item><span class=breadcrumbs__link>Integrations</span><li class="breadcrumbs__item breadcrumbs__item--active"><span class=breadcrumbs__link>Integration Guide: Reusing Open-Source Municipal Scraping Logic</span></ul></nav><div class="tocCollapsible_ETCw theme-doc-toc-mobile tocMobile_ITEo"><button type=button class="clean-btn tocCollapsibleButton_TO0P">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Integration Guide: Reusing Open-Source Municipal Scraping Logic</h1></header> | |
| <h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=overview>Overview<a href=#overview class=hash-link aria-label="Direct link to Overview" title="Direct link to Overview" translate=no></a></h2> | |
| <p>This guide shows how to integrate proven patterns from established open-source projects into the Oral Health Policy Pulse scraping pipeline.</p> | |
| <h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=current-state>Current State<a href=#current-state class=hash-link aria-label="Direct link to Current State" title="Direct link to Current State" translate=no></a></h2> | |
| <p>✅ <strong>You already have:</strong></p> | |
| <ul> | |
| <li class="">Census Gazetteer data with 85,302 jurisdictions (names + FIPS codes)</li> | |
| <li class="">GSA .gov domain matching</li> | |
| <li class="">76 discovered URLs ready for scraping</li> | |
| <li class="">Legistar platform references in codebase</li> | |
| <li class="">Base ScraperAgent class in <code>agents/scraper.py</code></li> | |
| </ul> | |
| <hr/> | |
| <h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=1-civic-scraper-integration>1. Civic Scraper Integration<a href=#1-civic-scraper-integration class=hash-link aria-label="Direct link to 1. Civic Scraper Integration" title="Direct link to 1. Civic Scraper Integration" translate=no></a></h2> | |
| <p><strong>Repository:</strong> <code>biglocalnews/civic-scraper</code> | |
| <strong>License:</strong> Apache 2.0 (✅ Compatible)</p> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=what-to-adopt>What to Adopt:<a href=#what-to-adopt class=hash-link aria-label="Direct link to What to Adopt:" title="Direct link to What to Adopt:" translate=no></a></h3> | |
| <h4 class="anchor anchorTargetStickyNavbar_Vzrq" id=a-platform-detection-logic>A. Platform Detection Logic<a href=#a-platform-detection-logic class=hash-link aria-label="Direct link to A. Platform Detection Logic" title="Direct link to A. Platform Detection Logic" translate=no></a></h4> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># They have excellent platform detection</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Location: civic_scraper/platforms/__init__.py</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">PLATFORMS </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>{</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'legistar'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> LegistarScraper</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'granicus'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> GranicusScraper</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'calagenda'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> CalAgendaScraper</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'civicplus'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> CivicPlusScraper</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token punctuation" style=color:#393A34>}</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>def</span><span class="token plain"> </span><span class="token function" style=color:#d73a49>detect_platform</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">url</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>-</span><span class="token operator" style=color:#393A34>></span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""Auto-detect which platform a URL uses"""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>if</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'legistar.com'</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> url </span><span class="token keyword" style=color:#00009f>or</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'/Legistar/'</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> url</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>return</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'legistar'</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>elif</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'granicus.com'</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> url </span><span class="token keyword" style=color:#00009f>or</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'/Mediasite/'</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> url</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>return</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'granicus'</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># ... more patterns</span><br/></div></code></pre></div></div> | |
| <p><strong>Your Action:</strong> Add <code>discovery/platform_detector.py</code> using their patterns</p> | |
| <h4 class="anchor anchorTargetStickyNavbar_Vzrq" id=b-document-downloader-with-retry-logic>B. Document Downloader with Retry Logic<a href=#b-document-downloader-with-retry-logic class=hash-link aria-label="Direct link to B. Document Downloader with Retry Logic" title="Direct link to B. Document Downloader with Retry Logic" translate=no></a></h4> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># civic_scraper/download.py has robust downloading</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Features:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># - Exponential backoff</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># - Content-type validation</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># - Duplicate detection via hash</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># - Progress tracking</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>async</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>def</span><span class="token plain"> </span><span class="token function" style=color:#d73a49>download_document</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">url</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> session</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> httpx</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">AsyncClient</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>-</span><span class="token operator" style=color:#393A34>></span><span class="token plain"> </span><span class="token builtin">bytes</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""Download with retries and validation"""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> attempt </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> </span><span class="token builtin">range</span><span class="token punctuation" style=color:#393A34>(</span><span class="token number" style=color:#36acaa>3</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>try</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> response </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>await</span><span class="token plain"> session</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">get</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">url</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> timeout</span><span class="token operator" style=color:#393A34>=</span><span class="token number" style=color:#36acaa>30.0</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> response</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">raise_for_status</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Validate it's actually a document</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> content_type </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> response</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">headers</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">get</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'content-type'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>''</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>if</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'pdf'</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> content_type </span><span class="token keyword" style=color:#00009f>or</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'html'</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> content_type</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>return</span><span class="token plain"> response</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">content</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>except</span><span class="token plain"> Exception </span><span class="token keyword" style=color:#00009f>as</span><span class="token plain"> e</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>if</span><span class="token plain"> attempt </span><span class="token operator" style=color:#393A34>==</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>2</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>raise</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>await</span><span class="token plain"> asyncio</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">sleep</span><span class="token punctuation" style=color:#393A34>(</span><span class="token number" style=color:#36acaa>2</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>**</span><span class="token plain"> attempt</span><span class="token punctuation" style=color:#393A34>)</span><br/></div></code></pre></div></div> | |
| <p><strong>Your Action:</strong> Enhance <code>agents/scraper.py</code> with their retry patterns</p> | |
| <hr/> | |
| <h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=2-city-scrapers-integration>2. City Scrapers Integration<a href=#2-city-scrapers-integration class=hash-link aria-label="Direct link to 2. City Scrapers Integration" title="Direct link to 2. City Scrapers Integration" translate=no></a></h2> | |
| <p><strong>Repository:</strong> <code>city-scrapers/city-scrapers</code> | |
| <strong>License:</strong> MIT (✅ Compatible)</p> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=what-to-adopt-1>What to Adopt:<a href=#what-to-adopt-1 class=hash-link aria-label="Direct link to What to Adopt:" title="Direct link to What to Adopt:" translate=no></a></h3> | |
| <h4 class="anchor anchorTargetStickyNavbar_Vzrq" id=a-standardized-event-schema>A. Standardized Event Schema<a href=#a-standardized-event-schema class=hash-link aria-label="Direct link to A. Standardized Event Schema" title="Direct link to A. Standardized Event Schema" translate=no></a></h4> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># They normalize all meeting data to a common format</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># city_scrapers/core/models.py</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token decorator annotation punctuation" style=color:#393A34>@dataclass</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>class</span><span class="token plain"> </span><span class="token class-name">Event</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> title</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> description</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> classification</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># "Board", "Commission", "Council"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> start</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> datetime</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> end</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">datetime</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> all_day</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">bool</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> location</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Dict</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> Any</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> links</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> List</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">Dict</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># [{"title": "Agenda", "href": "..."}]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> source</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Classification types they use:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">CLASSIFICATIONS </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"Board"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"Commission"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"Committee"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"Council"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"Town Hall"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"Public Hearing"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token punctuation" style=color:#393A34>]</span><br/></div></code></pre></div></div> | |
| <p><strong>Your Action:</strong> Create <code>models/meeting_event.py</code> with this schema for your Silver layer</p> | |
| <h4 class="anchor anchorTargetStickyNavbar_Vzrq" id=b-scraper-testing-framework>B. Scraper Testing Framework<a href=#b-scraper-testing-framework class=hash-link aria-label="Direct link to B. Scraper Testing Framework" title="Direct link to B. Scraper Testing Framework" translate=no></a></h4> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># They have excellent test patterns</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># tests/test_scrapers.py</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>def</span><span class="token plain"> </span><span class="token function" style=color:#d73a49>test_scraper</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""Test with frozen HTML responses"""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> scraper </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> CityScraper</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Use saved HTML files to avoid live requests during testing</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>with</span><span class="token plain"> </span><span class="token builtin">open</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'tests/fixtures/sample_calendar.html'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>as</span><span class="token plain"> f</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> results </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> scraper</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">parse</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">f</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">read</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>assert</span><span class="token plain"> </span><span class="token builtin">len</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">results</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>></span><span class="token plain"> </span><span class="token number" style=color:#36acaa>0</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>assert</span><span class="token plain"> results</span><span class="token punctuation" style=color:#393A34>[</span><span class="token number" style=color:#36acaa>0</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">title</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>assert</span><span class="token plain"> results</span><span class="token punctuation" style=color:#393A34>[</span><span class="token number" style=color:#36acaa>0</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">source</span><br/></div></code></pre></div></div> | |
| <p><strong>Your Action:</strong> Add <code>tests/fixtures/</code> directory with sample HTML from different platforms</p> | |
| <hr/> | |
| <h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=3-council-data-project-cdp-integration>3. Council Data Project (CDP) Integration<a href=#3-council-data-project-cdp-integration class=hash-link aria-label="Direct link to 3. Council Data Project (CDP) Integration" title="Direct link to 3. Council Data Project (CDP) Integration" translate=no></a></h2> | |
| <p><strong>Repository:</strong> <code>CouncilDataProject/cdp-scrapers</code> | |
| <strong>License:</strong> MIT (✅ Compatible)</p> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=what-to-adopt-2>What to Adopt:<a href=#what-to-adopt-2 class=hash-link aria-label="Direct link to What to Adopt:" title="Direct link to What to Adopt:" translate=no></a></h3> | |
| <h4 class="anchor anchorTargetStickyNavbar_Vzrq" id=a-generic-ingestion-pipeline>A. Generic Ingestion Pipeline<a href=#a-generic-ingestion-pipeline class=hash-link aria-label="Direct link to A. Generic Ingestion Pipeline" title="Direct link to A. Generic Ingestion Pipeline" translate=no></a></h4> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># CDP has a beautiful generic scraper pipeline</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># cdp_scrapers/scraper_utils.py</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>class</span><span class="token plain"> </span><span class="token class-name">IngestionModel</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""Standard format for ingested data"""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> sessions</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> List</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">Session</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Individual meetings</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token decorator annotation punctuation" style=color:#393A34>@dataclass</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>class</span><span class="token plain"> </span><span class="token class-name">Session</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> video_uri</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> session_datetime</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> datetime</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> session_index</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">int</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> caption_uri</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token decorator annotation punctuation" style=color:#393A34>@dataclass</span><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>class</span><span class="token plain"> </span><span class="token class-name">EventMinutesItem</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> name</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> minutes_item</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> MinutesItem</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>def</span><span class="token plain"> </span><span class="token function" style=color:#d73a49>reduced_list</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">items</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> List</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">Any</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> key_attr</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>-</span><span class="token operator" style=color:#393A34>></span><span class="token plain"> List</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">Any</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""Deduplicate items by a key attribute"""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> seen </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token builtin">set</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> result </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> item </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> items</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> key </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token builtin">getattr</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">item</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> key_attr</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>if</span><span class="token plain"> key </span><span class="token keyword" style=color:#00009f>not</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> seen</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> seen</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">add</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">key</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> result</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">append</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">item</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>return</span><span class="token plain"> result</span><br/></div></code></pre></div></div> | |
| <p><strong>Your Action:</strong> Create <code>models/ingestion.py</code> based on their schemas</p> | |
| <h4 class="anchor anchorTargetStickyNavbar_Vzrq" id=b-video-transcript-integration-future>B. Video Transcript Integration (Future)<a href=#b-video-transcript-integration-future class=hash-link aria-label="Direct link to B. Video Transcript Integration (Future)" title="Direct link to B. Video Transcript Integration (Future)" translate=no></a></h4> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># CDP processes meeting videos into searchable transcripts</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># This is advanced but incredibly valuable</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># They use:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># - AWS Transcribe / Google Speech-to-Text</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># - Sentence indexing with timestamps</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># - Speaker diarization (who said what)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># You could add this in Phase 2 after document scraping works</span><br/></div></code></pre></div></div> | |
| <p><strong>Your Action:</strong> Document in <code>docs/ROADMAP.md</code> for future implementation</p> | |
| <hr/> | |
| <h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=4-engagic-integration>4. Engagic Integration<a href=#4-engagic-integration class=hash-link aria-label="Direct link to 4. Engagic Integration" title="Direct link to 4. Engagic Integration" translate=no></a></h2> | |
| <p><strong>Repository:</strong> <code>Engagic/engagic</code> | |
| <strong>License:</strong> Check repo (likely AGPL)</p> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=what-to-adopt-3>What to Adopt:<a href=#what-to-adopt-3 class=hash-link aria-label="Direct link to What to Adopt:" title="Direct link to What to Adopt:" translate=no></a></h3> | |
| <h4 class="anchor anchorTargetStickyNavbar_Vzrq" id=a-matter-tracking-across-meetings>A. "Matter" Tracking Across Meetings<a href=#a-matter-tracking-across-meetings class=hash-link aria-label='Direct link to A. "Matter" Tracking Across Meetings' title='Direct link to A. "Matter" Tracking Across Meetings' translate=no></a></h4> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># Engagic tracks individual legislative items across meetings</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># This is PERFECT for oral health policy tracking</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token decorator annotation punctuation" style=color:#393A34>@dataclass</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>class</span><span class="token plain"> </span><span class="token class-name">Matter</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> matter_id</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> matter_number</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># "Bill 2024-001"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> title</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token builtin">type</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># "Ordinance", "Resolution", "Motion"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> first_introduced</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> datetime</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> status</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># "Introduced", "Committee", "Passed", "Failed"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> votes</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> List</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">Vote</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> related_documents</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> List</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Track how a fluoridation ordinance evolves:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Meeting 1: Introduced (just mentioned in minutes)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Meeting 2: Committee review (document link added)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Meeting 3: Public hearing (comments recorded)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Meeting 4: Final vote (result captured)</span><br/></div></code></pre></div></div> | |
| <p><strong>Your Action:</strong> Create <code>models/matter.py</code> for tracking policy evolution</p> | |
| <h4 class="anchor anchorTargetStickyNavbar_Vzrq" id=b-llm-powered-document-parsing>B. LLM-Powered Document Parsing<a href=#b-llm-powered-document-parsing class=hash-link aria-label="Direct link to B. LLM-Powered Document Parsing" title="Direct link to B. LLM-Powered Document Parsing" translate=no></a></h4> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># Engagic uses LLMs to extract structure from "blob" PDFs</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># You already have OpenAI configured!</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>async</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>def</span><span class="token plain"> </span><span class="token function" style=color:#d73a49>extract_agenda_items</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">pdf_text</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>-</span><span class="token operator" style=color:#393A34>></span><span class="token plain"> List</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">AgendaItem</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""Use GPT to extract structured items from unstructured text"""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> prompt </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> Extract agenda items from this meeting minutes text.</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> For each item, identify:</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> - Item number</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> - Title</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> - Description </span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> - Any votes or decisions</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> - Keywords related to health, dental, fluoride, water, public health</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> </span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> Return JSON array.</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> """</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> response </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>await</span><span class="token plain"> openai_client</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">chat</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">completions</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">create</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> model</span><span class="token operator" style=color:#393A34>=</span><span class="token string" style=color:#e3116c>"gpt-4o-mini"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> messages</span><span class="token operator" style=color:#393A34>=</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>{</span><span class="token string" style=color:#e3116c>"role"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>"system"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>"content"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>"You extract structured data from government documents"</span><span class="token punctuation" style=color:#393A34>}</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>{</span><span class="token string" style=color:#e3116c>"role"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>"user"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>"content"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token string-interpolation string" style=color:#e3116c>f"</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation">prompt</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>\n\n</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation">pdf_text</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>"</span><span class="token punctuation" style=color:#393A34>}</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> response_format</span><span class="token operator" style=color:#393A34>=</span><span class="token punctuation" style=color:#393A34>{</span><span class="token string" style=color:#e3116c>"type"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>"json_object"</span><span class="token punctuation" style=color:#393A34>}</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>return</span><span class="token plain"> json</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">loads</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">response</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">choices</span><span class="token punctuation" style=color:#393A34>[</span><span class="token number" style=color:#36acaa>0</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">message</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">content</span><span class="token punctuation" style=color:#393A34>)</span><br/></div></code></pre></div></div> | |
| <p><strong>Your Action:</strong> Add <code>extraction/llm_parser.py</code> using your existing OpenAI setup</p> | |
| <hr/> | |
| <h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=5-councilmatic-integration>5. Councilmatic Integration<a href=#5-councilmatic-integration class=hash-link aria-label="Direct link to 5. Councilmatic Integration" title="Direct link to 5. Councilmatic Integration" translate=no></a></h2> | |
| <p><strong>Repository:</strong> <code>datamade/councilmatic-starter-template</code> | |
| <strong>License:</strong> MIT (✅ Compatible)</p> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=what-to-adopt-4>What to Adopt:<a href=#what-to-adopt-4 class=hash-link aria-label="Direct link to What to Adopt:" title="Direct link to What to Adopt:" translate=no></a></h3> | |
| <h4 class="anchor anchorTargetStickyNavbar_Vzrq" id=a-personorganization-tracking>A. Person/Organization Tracking<a href=#a-personorganization-tracking class=hash-link aria-label="Direct link to A. Person/Organization Tracking" title="Direct link to A. Person/Organization Tracking" translate=no></a></h4> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># Councilmatic tracks who voted on what</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Useful for understanding power dynamics around oral health policy</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token decorator annotation punctuation" style=color:#393A34>@dataclass</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>class</span><span class="token plain"> </span><span class="token class-name">Person</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> name</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> role</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># "Council Member", "Mayor", "Commissioner"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> district</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> party</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token decorator annotation punctuation" style=color:#393A34>@dataclass</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>class</span><span class="token plain"> </span><span class="token class-name">Vote</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> motion</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> option</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># "yes", "no", "abstain"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> person</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Person</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> date</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> datetime</span><br/></div></code></pre></div></div> | |
| <p><strong>Your Action:</strong> Add to <code>models/governance.py</code></p> | |
| <h4 class="anchor anchorTargetStickyNavbar_Vzrq" id=b-search-interface-patterns>B. Search Interface Patterns<a href=#b-search-interface-patterns class=hash-link aria-label="Direct link to B. Search Interface Patterns" title="Direct link to B. Search Interface Patterns" translate=no></a></h4> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># They have excellent search UX</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># filters.py shows what users want:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">SEARCH_FILTERS </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"date_range"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"topic"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># ["health", "water", "budget"]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"organization"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Which board/commission</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"document_type"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># ["agenda", "minutes", "transcript"]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"status"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># ["pending", "passed", "failed"]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Your FastAPI endpoints could mirror this</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token decorator annotation punctuation" style=color:#393A34>@app</span><span class="token decorator annotation punctuation" style=color:#393A34>.</span><span class="token decorator annotation punctuation" style=color:#393A34>get</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>"/api/search"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>async</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>def</span><span class="token plain"> </span><span class="token function" style=color:#d73a49>search_documents</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> query</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> topics</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> List</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> Query</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">default</span><span class="token operator" style=color:#393A34>=</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>"oral_health"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>"fluoridation"</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> date_from</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">date</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>None</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> date_to</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">date</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>None</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> state</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>None</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""Search scraped documents with filters"""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Query your Delta Lake Gold layer</span><br/></div></code></pre></div></div> | |
| <p><strong>Your Action:</strong> Add to <code>api/routes/search.py</code> (create if doesn't exist)</p> | |
| <hr/> | |
| <h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=implementation-priorities>Implementation Priorities<a href=#implementation-priorities class=hash-link aria-label="Direct link to Implementation Priorities" title="Direct link to Implementation Priorities" translate=no></a></h2> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=phase-1-foundation-week-1>Phase 1: Foundation (Week 1)<a href=#phase-1-foundation-week-1 class=hash-link aria-label="Direct link to Phase 1: Foundation (Week 1)" title="Direct link to Phase 1: Foundation (Week 1)" translate=no></a></h3> | |
| <ul class="contains-task-list containsTaskList_mC6p"> | |
| <li class=task-list-item><input type=checkbox disabled/> <strong>Platform Detection</strong> - Add <code>discovery/platform_detector.py</code> from Civic Scraper patterns</li> | |
| <li class=task-list-item><input type=checkbox disabled/> <strong>Standardized Schema</strong> - Create <code>models/meeting_event.py</code> from City Scrapers</li> | |
| <li class=task-list-item><input type=checkbox disabled/> <strong>Enhanced Downloader</strong> - Improve <code>agents/scraper.py</code> retry logic</li> | |
| </ul> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=phase-2-scraping-week-2-3>Phase 2: Scraping (Week 2-3)<a href=#phase-2-scraping-week-2-3 class=hash-link aria-label="Direct link to Phase 2: Scraping (Week 2-3)" title="Direct link to Phase 2: Scraping (Week 2-3)" translate=no></a></h3> | |
| <ul class="contains-task-list containsTaskList_mC6p"> | |
| <li class=task-list-item><input type=checkbox disabled/> <strong>Legistar Scraper</strong> - Implement full Legistar support using Civic Scraper patterns</li> | |
| <li class=task-list-item><input type=checkbox disabled/> <strong>Generic HTML Parser</strong> - Use BeautifulSoup patterns from City Scrapers</li> | |
| <li class=task-list-item><input type=checkbox disabled/> <strong>PDF Extraction</strong> - Add PyPDF2/pdfplumber support</li> | |
| </ul> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=phase-3-intelligence-week-4>Phase 3: Intelligence (Week 4)<a href=#phase-3-intelligence-week-4 class=hash-link aria-label="Direct link to Phase 3: Intelligence (Week 4)" title="Direct link to Phase 3: Intelligence (Week 4)" translate=no></a></h3> | |
| <ul class="contains-task-list containsTaskList_mC6p"> | |
| <li class=task-list-item><input type=checkbox disabled/> <strong>LLM Parser</strong> - Add <code>extraction/llm_parser.py</code> from Engagic patterns</li> | |
| <li class=task-list-item><input type=checkbox disabled/> <strong>Matter Tracking</strong> - Create <code>models/matter.py</code> for policy evolution</li> | |
| <li class=task-list-item><input type=checkbox disabled/> <strong>Keyword Detection</strong> - Oral health, fluoridation, dental policy detection</li> | |
| </ul> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=phase-4-scale-week-5>Phase 4: Scale (Week 5+)<a href=#phase-4-scale-week-5 class=hash-link aria-label="Direct link to Phase 4: Scale (Week 5+)" title="Direct link to Phase 4: Scale (Week 5+)" translate=no></a></h3> | |
| <ul class="contains-task-list containsTaskList_mC6p"> | |
| <li class=task-list-item><input type=checkbox disabled/> <strong>Test All 76 URLs</strong> - Run full scraper on discovered targets</li> | |
| <li class=task-list-item><input type=checkbox disabled/> <strong>Expand to All Municipalities</strong> - Process all 32,333 jurisdictions</li> | |
| <li class=task-list-item><input type=checkbox disabled/> <strong>Video Transcripts</strong> - CDP-style video processing (future)</li> | |
| </ul> | |
| <hr/> | |
| <h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=code-snippets-to-add-now>Code Snippets to Add Now<a href=#code-snippets-to-add-now class=hash-link aria-label="Direct link to Code Snippets to Add Now" title="Direct link to Code Snippets to Add Now" translate=no></a></h2> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=1-platform-detector>1. Platform Detector<a href=#1-platform-detector class=hash-link aria-label="Direct link to 1. Platform Detector" title="Direct link to 1. Platform Detector" translate=no></a></h3> | |
| <p><strong>File:</strong> <code>discovery/platform_detector.py</code></p> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c>"""</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c>Platform detection for municipal websites.</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c>Based on patterns from biglocalnews/civic-scraper.</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c>"""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> typing </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> Optional</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> urllib</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">parse </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> urlparse</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">PLATFORM_PATTERNS </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>{</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'legistar'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'legistar.com'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'/Legistar/'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'/LegislationDetail.aspx'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'/Calendar.aspx'</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'granicus'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'granicus.com'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'/Mediasite/'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'/ViewPublisher.php'</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'municode'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'municode.com'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'/meeting_minutes'</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'civicplus'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'civicplus.com'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'/AgendaCenter/'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'/DocumentCenter/'</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token punctuation" style=color:#393A34>}</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>def</span><span class="token plain"> </span><span class="token function" style=color:#d73a49>detect_platform</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">url</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>-</span><span class="token operator" style=color:#393A34>></span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> Detect which platform a municipality website uses.</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> </span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> Args:</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> url: Municipality website URL</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> </span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> Returns:</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> Platform name or None if unknown</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> """</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> url_lower </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> url</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">lower</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> platform</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> patterns </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> PLATFORM_PATTERNS</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">items</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>if</span><span class="token plain"> </span><span class="token builtin">any</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">pattern</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">lower</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> url_lower </span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> pattern </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> patterns</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>return</span><span class="token plain"> platform</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>return</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>None</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>def</span><span class="token plain"> </span><span class="token function" style=color:#d73a49>get_scraper_class</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">platform</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""Get appropriate scraper class for platform"""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> scrapers</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">legistar </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> LegistarScraper</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> scrapers</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">granicus </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> GranicusScraper</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> scrapers</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">generic </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> GenericScraper</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> scrapers </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>{</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'legistar'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> LegistarScraper</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'granicus'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> GranicusScraper</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>}</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>return</span><span class="token plain"> scrapers</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">get</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">platform</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> GenericScraper</span><span class="token punctuation" style=color:#393A34>)</span><br/></div></code></pre></div></div> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=2-meeting-event-model>2. Meeting Event Model<a href=#2-meeting-event-model class=hash-link aria-label="Direct link to 2. Meeting Event Model" title="Direct link to 2. Meeting Event Model" translate=no></a></h3> | |
| <p><strong>File:</strong> <code>models/meeting_event.py</code></p> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c>"""</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c>Standardized meeting event model.</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c>Based on City Scrapers schema.</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c>"""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> dataclasses </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> dataclass</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> field</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> datetime </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> datetime</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> typing </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> List</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> Dict</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> Any</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token decorator annotation punctuation" style=color:#393A34>@dataclass</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>class</span><span class="token plain"> </span><span class="token class-name">Location</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> name</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> address</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>None</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> city</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>None</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> state</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>None</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token decorator annotation punctuation" style=color:#393A34>@dataclass</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>class</span><span class="token plain"> </span><span class="token class-name">Link</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> title</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># "Agenda", "Minutes", "Video"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> href</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> content_type</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>None</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># "application/pdf", "text/html"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token decorator annotation punctuation" style=color:#393A34>@dataclass</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>class</span><span class="token plain"> </span><span class="token class-name">MeetingEvent</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> Normalized representation of a government meeting.</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> Compatible with City Scrapers format.</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> """</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Core identification</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token builtin">id</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Hash of source_url + start_time</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> title</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> description</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> classification</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># "Board", "Commission", "Council", "Committee"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Temporal</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> start</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> datetime</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> end</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">datetime</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>None</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> all_day</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">bool</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>False</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Spatial</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> location</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Location</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Content</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> links</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> List</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">Link</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> field</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">default_factory</span><span class="token operator" style=color:#393A34>=</span><span class="token builtin">list</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> source</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>""</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Original URL</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Metadata</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> jurisdiction_name</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> state_code</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> fips_code</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>None</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> scraped_at</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> datetime </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> field</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">default_factory</span><span class="token operator" style=color:#393A34>=</span><span class="token plain">datetime</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">utcnow</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Health policy relevance (your special sauce!)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> oral_health_relevant</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">bool</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>False</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> keywords_found</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> List</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> field</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">default_factory</span><span class="token operator" style=color:#393A34>=</span><span class="token builtin">list</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> confidence_score</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">float</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>0.0</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>def</span><span class="token plain"> </span><span class="token function" style=color:#d73a49>to_dict</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">self</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>-</span><span class="token operator" style=color:#393A34>></span><span class="token plain"> Dict</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> Any</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""Convert to dictionary for Delta Lake storage"""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>return</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>{</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'id'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token builtin">id</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'title'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">title</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'description'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">description</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'classification'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">classification</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'start'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">start</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">isoformat</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'end'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">end</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">isoformat</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>if</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">end </span><span class="token keyword" style=color:#00009f>else</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>None</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'all_day'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">all_day</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'location_name'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">location</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">name</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'location_address'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">location</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">address</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'links'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token punctuation" style=color:#393A34>{</span><span class="token string" style=color:#e3116c>'title'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> l</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">title</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'href'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> l</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">href</span><span class="token punctuation" style=color:#393A34>}</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> l </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">links</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'source'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">source</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'jurisdiction_name'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">jurisdiction_name</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'state_code'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">state_code</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'fips_code'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">fips_code</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'scraped_at'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">scraped_at</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">isoformat</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'oral_health_relevant'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">oral_health_relevant</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'keywords_found'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">keywords_found</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'confidence_score'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">confidence_score</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>}</span><br/></div></code></pre></div></div> | |
| <h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=3-enhanced-discovery-pipeline>3. Enhanced Discovery Pipeline<a href=#3-enhanced-discovery-pipeline class=hash-link aria-label="Direct link to 3. Enhanced Discovery Pipeline" title="Direct link to 3. Enhanced Discovery Pipeline" translate=no></a></h3> | |
| <p><strong>Add to:</strong> <code>discovery/discovery_pipeline.py</code></p> | |
| <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>async</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>def</span><span class="token plain"> </span><span class="token function" style=color:#d73a49>discover_platform_capabilities</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">self</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> For each discovered URL, detect which platform it uses.</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> This prepares optimal scraping strategies.</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> """</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> discovery</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">platform_detector </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> detect_platform</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> logger</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">info</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>"Detecting platforms for discovered URLs..."</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> silver_path </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token string-interpolation string" style=color:#e3116c>f"</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation">settings</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>.</span><span class="token string-interpolation interpolation">delta_lake_path</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>/silver/discovered_urls"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> urls_df </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">spark</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">read</span><span class="token punctuation" style=color:#393A34>.</span><span class="token builtin">format</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>"delta"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">load</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">silver_path</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> enriched_urls </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> row </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> urls_df</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">take</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">urls_df</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">count</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> row_dict </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> row</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">asDict</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> url </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> row_dict</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'url'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Detect platform</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> platform </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> detect_platform</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">url</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> row_dict</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'platform'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> platform </span><span class="token keyword" style=color:#00009f>if</span><span class="token plain"> platform </span><span class="token keyword" style=color:#00009f>else</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'generic'</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> row_dict</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'scraper_ready'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> platform </span><span class="token keyword" style=color:#00009f>is</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>not</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>None</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> enriched_urls</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">append</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">row_dict</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Write back to Silver layer with platform info</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> pyspark</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">sql </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> Row</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> enriched_df </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">spark</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">createDataFrame</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">Row</span><span class="token punctuation" style=color:#393A34>(</span><span class="token operator" style=color:#393A34>**</span><span class="token plain">u</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> u </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> enriched_urls</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> enriched_df</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">write</span><span class="token punctuation" style=color:#393A34>.</span><span class="token builtin">format</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>"delta"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">mode</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>"overwrite"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">save</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">silver_path</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> logger</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">success</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"Platform detection complete - </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation builtin">len</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>(</span><span class="token string-interpolation interpolation">enriched_urls</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>)</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c> URLs analyzed"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>return</span><span class="token plain"> enriched_urls</span><br/></div></code></pre></div></div> | |
| <hr/> | |
| <h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=next-steps>Next Steps<a href=#next-steps class=hash-link aria-label="Direct link to Next Steps" title="Direct link to Next Steps" translate=no></a></h2> | |
| <ol> | |
| <li class=""><strong>Review Licenses</strong> - All mentioned projects use permissive licenses (MIT/Apache 2.0), but double-check</li> | |
| <li class=""><strong>Clone Repos Locally</strong> - Study their code structure:<!-- --> | |
| <div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain">cd /tmp</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">git clone https://github.com/biglocalnews/civic-scraper</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">git clone https://github.com/city-scrapers/city-scrapers</span><br/></div></code></pre></div></div> | |
| </li> | |
| <li class=""><strong>Add Attribution</strong> - In your <code>README.md</code>, credit these projects</li> | |
| <li class=""><strong>Start with Platform Detector</strong> - Implement <code>discovery/platform_detector.py</code> first</li> | |
| <li class=""><strong>Test with Your 76 URLs</strong> - Run platform detection on your discovered URLs</li> | |
| </ol> | |
| <hr/> | |
| <h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=resources>Resources<a href=#resources class=hash-link aria-label="Direct link to Resources" title="Direct link to Resources" translate=no></a></h2> | |
| <ul> | |
| <li class=""><strong>Civic Scraper Docs</strong>: <a href=https://github.com/biglocalnews/civic-scraper/wiki target=_blank rel="noopener noreferrer" class="">https://github.com/biglocalnews/civic-scraper/wiki</a></li> | |
| <li class=""><strong>City Scrapers Tutorial</strong>: <a href=https://cityscrapers.org/docs/development/ target=_blank rel="noopener noreferrer" class="">https://cityscrapers.org/docs/development/</a></li> | |
| <li class=""><strong>CDP Architecture</strong>: <a href=https://councildataproject.org/ target=_blank rel="noopener noreferrer" class="">https://councildataproject.org/</a></li> | |
| <li class=""><strong>Legistar API Docs</strong>: <a href=https://webapi.legistar.com/Home/Examples target=_blank rel="noopener noreferrer" class="">https://webapi.legistar.com/Home/Examples</a></li> | |
| </ul> | |
| <hr/> | |
| <h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=questions-to-consider>Questions to Consider<a href=#questions-to-consider class=hash-link aria-label="Direct link to Questions to Consider" title="Direct link to Questions to Consider" translate=no></a></h2> | |
| <ol> | |
| <li class=""><strong>Do you want video transcript support?</strong> (CDP pattern, requires AWS/GCP credits)</li> | |
| <li class=""><strong>How important is real-time tracking?</strong> (vs batch processing)</li> | |
| <li class=""><strong>Will you expose a public API?</strong> (Councilmatic patterns useful here)</li> | |
| <li class=""><strong>Need to track voting records?</strong> (Councilmatic person/vote models)</li> | |
| </ol> | |
| <p>Let me know which phase you want to implement first!</div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="row margin-top--sm theme-doc-footer-edit-meta-row"><div class="col noPrint_WFHX"><a href=https://github.com/getcommunityone/open-navigator-for-engagement/tree/main/website/docs/integrations/overview.md target=_blank rel="noopener noreferrer" class=theme-edit-this-page><svg fill=currentColor height=20 width=20 viewBox="0 0 40 40" class=iconEdit_Z9Sw aria-hidden=true><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"/></g></svg>Edit this page</a></div><div class="col lastUpdated_JAkA"></div></div></footer></article><nav class="docusaurus-mt-lg pagination-nav" aria-label="Docs pages"><a class="pagination-nav__link pagination-nav__link--prev" href=/docs/integrations/localview><div class=pagination-nav__sublabel>Previous</div><div class=pagination-nav__label>📚 LocalView Integration Guide</div></a><a class="pagination-nav__link pagination-nav__link--next" href=/docs/deployment/databricks-apps><div class=pagination-nav__sublabel>Next</div><div class=pagination-nav__label>Databricks Apps Deployment Guide</div></a></nav></div></div><div class="col col--3"><div class="tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href=#overview class="table-of-contents__link toc-highlight">Overview</a><li><a href=#current-state class="table-of-contents__link toc-highlight">Current State</a><li><a href=#1-civic-scraper-integration class="table-of-contents__link toc-highlight">1. Civic Scraper Integration</a><ul><li><a href=#what-to-adopt class="table-of-contents__link toc-highlight">What to Adopt:</a><ul><li><a href=#a-platform-detection-logic class="table-of-contents__link toc-highlight">A. Platform Detection Logic</a><li><a href=#b-document-downloader-with-retry-logic class="table-of-contents__link toc-highlight">B. Document Downloader with Retry Logic</a></ul></ul><li><a href=#2-city-scrapers-integration class="table-of-contents__link toc-highlight">2. City Scrapers Integration</a><ul><li><a href=#what-to-adopt-1 class="table-of-contents__link toc-highlight">What to Adopt:</a><ul><li><a href=#a-standardized-event-schema class="table-of-contents__link toc-highlight">A. Standardized Event Schema</a><li><a href=#b-scraper-testing-framework class="table-of-contents__link toc-highlight">B. Scraper Testing Framework</a></ul></ul><li><a href=#3-council-data-project-cdp-integration class="table-of-contents__link toc-highlight">3. Council Data Project (CDP) Integration</a><ul><li><a href=#what-to-adopt-2 class="table-of-contents__link toc-highlight">What to Adopt:</a><ul><li><a href=#a-generic-ingestion-pipeline class="table-of-contents__link toc-highlight">A. Generic Ingestion Pipeline</a><li><a href=#b-video-transcript-integration-future class="table-of-contents__link toc-highlight">B. Video Transcript Integration (Future)</a></ul></ul><li><a href=#4-engagic-integration class="table-of-contents__link toc-highlight">4. Engagic Integration</a><ul><li><a href=#what-to-adopt-3 class="table-of-contents__link toc-highlight">What to Adopt:</a><ul><li><a href=#a-matter-tracking-across-meetings class="table-of-contents__link toc-highlight">A. "Matter" Tracking Across Meetings</a><li><a href=#b-llm-powered-document-parsing class="table-of-contents__link toc-highlight">B. LLM-Powered Document Parsing</a></ul></ul><li><a href=#5-councilmatic-integration class="table-of-contents__link toc-highlight">5. Councilmatic Integration</a><ul><li><a href=#what-to-adopt-4 class="table-of-contents__link toc-highlight">What to Adopt:</a><ul><li><a href=#a-personorganization-tracking class="table-of-contents__link toc-highlight">A. Person/Organization Tracking</a><li><a href=#b-search-interface-patterns class="table-of-contents__link toc-highlight">B. Search Interface Patterns</a></ul></ul><li><a href=#implementation-priorities class="table-of-contents__link toc-highlight">Implementation Priorities</a><ul><li><a href=#phase-1-foundation-week-1 class="table-of-contents__link toc-highlight">Phase 1: Foundation (Week 1)</a><li><a href=#phase-2-scraping-week-2-3 class="table-of-contents__link toc-highlight">Phase 2: Scraping (Week 2-3)</a><li><a href=#phase-3-intelligence-week-4 class="table-of-contents__link toc-highlight">Phase 3: Intelligence (Week 4)</a><li><a href=#phase-4-scale-week-5 class="table-of-contents__link toc-highlight">Phase 4: Scale (Week 5+)</a></ul><li><a href=#code-snippets-to-add-now class="table-of-contents__link toc-highlight">Code Snippets to Add Now</a><ul><li><a href=#1-platform-detector class="table-of-contents__link toc-highlight">1. Platform Detector</a><li><a href=#2-meeting-event-model class="table-of-contents__link toc-highlight">2. Meeting Event Model</a><li><a href=#3-enhanced-discovery-pipeline class="table-of-contents__link toc-highlight">3. Enhanced Discovery Pipeline</a></ul><li><a href=#next-steps class="table-of-contents__link toc-highlight">Next Steps</a><li><a href=#resources class="table-of-contents__link toc-highlight">Resources</a><li><a href=#questions-to-consider class="table-of-contents__link toc-highlight">Questions to Consider</a></ul></div></div></div></div></main></div></div></div><footer class="theme-layout-footer footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="theme-layout-footer-column col footer__col"><div class=footer__title>Documentation</div><ul class="footer__items clean-list"><li class=footer__item><a class=footer__link-item href=/docs/intro>Getting Started</a><li class=footer__item><a class=footer__link-item href=/docs/data-sources/citations>Citations & Data Sources</a><li class=footer__item><a class=footer__link-item href=/docs/data-sources/overview>Data Sources</a><li class=footer__item><a class=footer__link-item href=/docs/for-developers>For Developers</a></ul></div><div class="theme-layout-footer-column col footer__col"><div class=footer__title>Resources</div><ul class="footer__items clean-list"><li class=footer__item><a href=https://www.communityone.com target=_blank rel="noopener noreferrer" class=footer__link-item>Launch Open Navigator<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://github.com/getcommunityone/open-navigator-for-engagement target=_blank rel="noopener noreferrer" class=footer__link-item>GitHub<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://www.groundvue.org/ target=_blank rel="noopener noreferrer" class=footer__link-item>GroundVue (Partner)<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a></ul></div><div class="theme-layout-footer-column col footer__col"><div class=footer__title>Community</div><ul class="footer__items clean-list"><li class=footer__item><a href=https://www.instagram.com/getcommunityone/ target=_blank rel="noopener noreferrer" class=footer__link-item>Instagram<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://www.facebook.com/getcommunityone target=_blank rel="noopener noreferrer" class=footer__link-item>Facebook<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://x.com/getcommunityone/ target=_blank rel="noopener noreferrer" class=footer__link-item>X (Twitter)<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://www.linkedin.com/company/getcommunityone target=_blank rel="noopener noreferrer" class=footer__link-item>LinkedIn<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://www.youtube.com/@getcommunityone target=_blank rel="noopener noreferrer" class=footer__link-item>YouTube<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://discord.gg/uH6Dytek target=_blank rel="noopener noreferrer" class=footer__link-item>Discord<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a></ul></div><div class="theme-layout-footer-column col footer__col"><div class=footer__title>Legal</div><ul class="footer__items clean-list"><li class=footer__item><a class=footer__link-item href=/docs/legal/privacy-policy>Privacy Policy</a><li class=footer__item><a class=footer__link-item href=/docs/legal/terms-of-service>Terms of Service</a><li class=footer__item><a class=footer__link-item href=/docs/legal/data-provider-terms>Data Provider Terms</a></ul></div><div class="theme-layout-footer-column col footer__col"><div class=footer__title>More</div><ul class="footer__items clean-list"><li class=footer__item><a class=footer__link-item href=/blog>Blog</a><li class=footer__item><a href=https://github.com/getcommunityone/open-navigator-for-engagement/blob/main/LICENSE target=_blank rel="noopener noreferrer" class=footer__link-item>License (MIT)<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a></ul></div></div><div class="footer__bottom text--center"><div class=footer__copyright>Copyright © 2026 Community One. Built with Docusaurus.</div></div></div></footer></div></body> |