Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
File size: 178,326 Bytes
3d16fe6 896453f 1f7780e 896453f fcf298e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | <!doctype html><html lang=en dir=ltr class="docs-wrapper plugin-docs plugin-id-default docs-version-current docs-doc-page docs-doc-id-integrations/overview" data-has-hydrated=false><head><meta charset=UTF-8><meta name=generator content="Docusaurus v3.10.0"><title data-rh=true>Integration Guide: Reusing Open-Source Municipal Scraping Logic | Open Navigator</title><meta data-rh=true name=viewport content="width=device-width, initial-scale=1.0"/><meta data-rh=true property=og:image content=https://www.communityone.com/img/docusaurus-social-card.jpg /><meta data-rh=true name=twitter:image content=https://www.communityone.com/img/docusaurus-social-card.jpg /><meta data-rh=true property=og:url content=https://www.communityone.com/docs/integrations/overview /><meta data-rh=true property=og:locale content=en /><meta data-rh=true name=docusaurus_locale content=en /><meta data-rh=true name=docsearch:language content=en /><meta data-rh=true name=keywords content="civic engagement, policy tracking, meeting minutes, nonprofit tracking, municipal government, advocacy, open data, local government"/><meta data-rh=true property=og:type content=website /><meta data-rh=true property=og:site_name content="Open Navigator"/><meta data-rh=true name=twitter:card content=summary_large_image /><meta data-rh=true name=docusaurus_version content=current /><meta data-rh=true name=docusaurus_tag content=docs-default-current /><meta data-rh=true name=docsearch:version content=current /><meta data-rh=true name=docsearch:docusaurus_tag content=docs-default-current /><meta data-rh=true property=og:title content="Integration Guide: Reusing Open-Source Municipal Scraping Logic | Open Navigator"/><meta data-rh=true name=description content=Overview /><meta data-rh=true property=og:description content=Overview /><link data-rh=true rel=icon href=/img/favicon.ico /><link data-rh=true rel=canonical href=https://www.communityone.com/docs/integrations/overview /><link data-rh=true rel=alternate href=https://www.communityone.com/docs/integrations/overview hreflang=en /><link data-rh=true rel=alternate href=https://www.communityone.com/docs/integrations/overview hreflang=x-default /><script data-rh=true type=application/ld+json>{"@context":"https://schema.org","@type":"BreadcrumbList","itemListElement":[{"@type":"ListItem","item":"https://www.communityone.com/docs/for-developers","name":"Developers & Technical Users","position":1},{"@type":"ListItem","item":"https://www.communityone.com/docs/integrations/overview","name":"Integration Guide: Reusing Open-Source Municipal Scraping Logic","position":2}]}</script><link rel=alternate type=application/rss+xml href=/blog/rss.xml title="Open Navigator RSS Feed"><link rel=alternate type=application/atom+xml href=/blog/atom.xml title="Open Navigator Atom Feed"><link rel=preconnect href=https://www.google-analytics.com><link rel=preconnect href=https://www.googletagmanager.com><script async src="https://www.googletagmanager.com/gtag/js?id=G-5EQV815915"></script><script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","G-5EQV815915",{anonymize_ip:!0})</script><link rel=stylesheet href=/assets/css/styles.c89d6b2d.css /><script src=/assets/js/runtime~main.c8fa085e.js defer></script><script src=/assets/js/main.6e24e536.js defer></script></head><body><svg style="display: none;"><defs>
<symbol id=theme-svg-external-link viewBox="0 0 24 24"><path fill=currentColor d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"/></symbol>
</defs></svg>
<script>!function(){var t=function(){try{return new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}}()||function(){try{return window.localStorage.getItem("theme-7e9")}catch(t){}}();document.documentElement.setAttribute("data-theme",t||(window.matchMedia("(prefers-color-scheme: dark)").matches?"dark":"light")),document.documentElement.setAttribute("data-theme-choice",t||"system")}(),function(){try{for(var[t,e]of new URLSearchParams(window.location.search).entries())if(t.startsWith("docusaurus-data-")){var a=t.replace("docusaurus-data-","data-");document.documentElement.setAttribute(a,e)}}catch(t){}}()</script><div id=__docusaurus><link rel=preload as=image href=/img/communityone_logo.svg /><script type=application/ld+json>{"@context":"https://schema.org","@type":"Organization","address":{"@type":"PostalAddress","addressCountry":"US","addressLocality":"Tuscaloosa","addressRegion":"AL","postalCode":"35406","streetAddress":"5617 Lakeridge Court"},"contactPoint":{"@type":"ContactPoint","availableLanguage":["English"],"contactType":"Customer Service","email":"johnbowyer@communityone.com"},"description":"Track 90,000+ jurisdictions, 1.8M nonprofits, and analyze meeting minutes with AI. The open path to everything local.","email":"johnbowyer@communityone.com","legalName":"CommunityOne","logo":"https://www.communityone.com/img/communityone_logo.svg","name":"CommunityOne","sameAs":["https://www.facebook.com/communityone","https://www.instagram.com/communityone","https://twitter.com/communityone","https://www.linkedin.com/company/communityone","https://www.youtube.com/@communityone","https://discord.gg/communityone","https://github.com/getcommunityone/open-navigator"],"url":"https://www.communityone.com"}</script><script type=application/ld+json>{"@context":"https://schema.org","@type":"WebSite","alternateName":"CommunityOne Open Navigator","description":"AI-powered civic engagement platform tracking jurisdictions, nonprofits, and government meetings","name":"Open Navigator","potentialAction":{"@type":"SearchAction","query-input":"required name=search_term_string","target":{"@type":"EntryPoint","urlTemplate":"https://www.communityone.com/search?q={search_term_string}"}},"url":"https://www.communityone.com"}</script><script type=application/ld+json>{"@context":"https://schema.org","@type":"SoftwareApplication","aggregateRating":{"@type":"AggregateRating","ratingCount":"1","ratingValue":"5"},"applicationCategory":"BusinessApplication","description":"Track 90,000+ jurisdictions, 1.8M nonprofits, and analyze meeting minutes with AI","featureList":["Track 90,000+ jurisdictions","Monitor 1.8M nonprofits","Analyze meeting minutes","Legislative bill tracking","Campaign finance data"],"name":"Open Navigator","offers":{"@type":"Offer","price":"0","priceCurrency":"USD"},"operatingSystem":"Web","screenshot":"https://www.communityone.com/img/docusaurus-social-card.jpg","softwareVersion":"1.0.0"}</script><div role=region aria-label="Skip to main content"><a class=skipToContent_fXgn href=#__docusaurus_skipToContent_fallback>Skip to main content</a></div><nav aria-label=Main class="theme-layout-navbar navbar navbar--fixed-top"><div class=navbar__inner><div class="theme-layout-navbar-left navbar__items"><button aria-label="Toggle navigation bar" aria-expanded=false class="navbar__toggle clean-btn" type=button><svg width=30 height=30 viewBox="0 0 30 30" aria-hidden=true><path stroke=currentColor stroke-linecap=round stroke-miterlimit=10 stroke-width=2 d="M4 7h22M4 15h22M4 23h22"/></svg></button><a href=https://www.communityone.com target=_self rel="noopener noreferrer" class=navbar__brand><div class=navbar__logo><img src=/img/communityone_logo.svg alt="CommunityOne Logo" class="themedComponent_mlkZ themedComponent--light_NVdE"/><img src=/img/communityone_logo.svg alt="CommunityOne Logo" class="themedComponent_mlkZ themedComponent--dark_xIcU"/></div><b class="navbar__title text--truncate">Open Navigator Home</b></a><a class="navbar__item navbar__link" href=/docs/intro>Getting Started</a><a class="navbar__item navbar__link" href=/docs/for-families>Families & Individuals</a><a class="navbar__item navbar__link" href=/docs/for-advocates>Policy Makers</a><a class="navbar__item navbar__link" href=/docs/for-developers>Developers</a><a class="navbar__item navbar__link" href=/docs/data-sources/citations>Data and Terms</a><a class="navbar__item navbar__link" href=/blog>Blog</a></div><div class="theme-layout-navbar-right navbar__items navbar__items--right"><a href=https://github.com/getcommunityone/open-navigator-for-engagement target=_blank rel="noopener noreferrer" class="navbar__item navbar__link">GitHub<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><div class="toggle_vylO colorModeToggle_DEke"><button class="clean-btn toggleButton_gllP toggleButtonDisabled_aARS" type=button disabled title="system mode" aria-label="Switch between dark and light mode (currently system mode)"><svg viewBox="0 0 24 24" width=24 height=24 aria-hidden=true class="toggleIcon_g3eP lightToggleIcon_pyhR"><path fill=currentColor d="M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"/></svg><svg viewBox="0 0 24 24" width=24 height=24 aria-hidden=true class="toggleIcon_g3eP darkToggleIcon_wfgR"><path fill=currentColor d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"/></svg><svg viewBox="0 0 24 24" width=24 height=24 aria-hidden=true class="toggleIcon_g3eP systemToggleIcon_QzmC"><path fill=currentColor d="m12 21c4.971 0 9-4.029 9-9s-4.029-9-9-9-9 4.029-9 9 4.029 9 9 9zm4.95-13.95c1.313 1.313 2.05 3.093 2.05 4.95s-0.738 3.637-2.05 4.95c-1.313 1.313-3.093 2.05-4.95 2.05v-14c1.857 0 3.637 0.737 4.95 2.05z"/></svg></button></div><div class=navbarSearchContainer_Bca1></div></div></div><div role=presentation class=navbar-sidebar__backdrop></div></nav><div id=__docusaurus_skipToContent_fallback class="theme-layout-main main-wrapper mainWrapper_z2l0"><div class=docsWrapper_hBAB><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type=button></button><div class=docRoot_UBD9><aside class="theme-doc-sidebar-container docSidebarContainer_YfHR"><div class=sidebarViewport_aRkj><div class=sidebar_njMd><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_SIkG"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=true href=/docs/intro><span title="Getting Started" class=categoryLinkLabel_W154>Getting Started</span></a></div><ul class=menu__list><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class=menu__link tabindex=0 href=/docs/intro><span title=Introduction class=linkLabel_WmDU>Introduction</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class=menu__link tabindex=0 href=/docs/open-navigator><span title="Open Navigator" class=linkLabel_WmDU>Open Navigator</span></a></ul><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist" href=/docs/for-families><span title="Families & Individuals" class=categoryLinkLabel_W154>Families & Individuals</span></a><button aria-label="Collapse sidebar category 'Families & Individuals'" aria-expanded=true type=button class="clean-btn menu__caret"></button></div><ul class=menu__list><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/families/community-events><span title="Resources for Families" class=categoryLinkLabel_W154>Resources for Families</span></a></div><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class=menu__link tabindex=0 href=/docs/open-navigator><span title="Getting Started with Open Navigator" class=linkLabel_WmDU>Getting Started with Open Navigator</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class=menu__link tabindex=0 href=/docs/data-sources/citations><span title="Data and Citations" class=linkLabel_WmDU>Data and Citations</span></a></ul><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist" href=/docs/for-advocates><span title="Policy Makers & Advocates" class=categoryLinkLabel_W154>Policy Makers & Advocates</span></a><button aria-label="Collapse sidebar category 'Policy Makers & Advocates'" aria-expanded=true type=button class="clean-btn menu__caret"></button></div><ul class=menu__list><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/data-sources/overview><span title="Understanding the Data" class=categoryLinkLabel_W154>Understanding the Data</span></a></div><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/guides/political-economy><span title="Analysis & Strategy" class=categoryLinkLabel_W154>Analysis & Strategy</span></a></div><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/case-studies/tuscaloosa-complete><span title="Real-World Examples" class=categoryLinkLabel_W154>Real-World Examples</span></a></div></ul><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--active" href=/docs/for-developers><span title="Developers & Technical Users" class=categoryLinkLabel_W154>Developers & Technical Users</span></a><button aria-label="Collapse sidebar category 'Developers & Technical Users'" aria-expanded=true type=button class="clean-btn menu__caret"></button></div><ul class=menu__list><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/quickstart><span title="Setup & Installation" class=categoryLinkLabel_W154>Setup & Installation</span></a></div><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/data-sources/citations><span title="Data Sources (Technical)" class=categoryLinkLabel_W154>Data Sources (Technical)</span></a></div><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/guides/jurisdiction-setup><span title="How-To Guides" class=categoryLinkLabel_W154>How-To Guides</span></a></div><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret menu__link--active" role=button aria-expanded=true tabindex=0 href=/docs/integrations/mcp-server><span title=Integrations class=categoryLinkLabel_W154>Integrations</span></a></div><ul class=menu__list><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/integrations/mcp-server><span title="Model Context Protocol (MCP) Server" class=linkLabel_WmDU>Model Context Protocol (MCP) Server</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/integrations/grants-gov-api><span title="Grants.gov API Integration" class=linkLabel_WmDU>Grants.gov API Integration</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/integrations/fec-political-contributions><span title="FEC Political Contributions" class=linkLabel_WmDU>FEC Political Contributions</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/integrations/fec-campaign-finance><span title="FEC Campaign Finance Integration" class=linkLabel_WmDU>FEC Campaign Finance Integration</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/integrations/dataverse-summary><span title="🎉 Harvard Dataverse Integration - Complete!" class=linkLabel_WmDU>🎉 Harvard Dataverse Integration - Complete!</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/integrations/dataverse><span title="📚 Dataverse API Integration" class=linkLabel_WmDU>📚 Dataverse API Integration</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/integrations/eboard-automated><span title="Automated eBoard Scraping Solutions" class=linkLabel_WmDU>Automated eBoard Scraping Solutions</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/integrations/eboard-cookies><span title="eBoard Cookie Extraction Guide" class=linkLabel_WmDU>eBoard Cookie Extraction Guide</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/integrations/eboard-manual><span title="eBoard Platform Manual Download Guide" class=linkLabel_WmDU>eBoard Platform Manual Download Guide</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/integrations/fec-integration-summary><span title="FEC Campaign Finance Integration - Implementation Summary" class=linkLabel_WmDU>FEC Campaign Finance Integration - Implementation Summary</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/integrations/frontend><span title="Frontend Integration Guide" class=linkLabel_WmDU>Frontend Integration Guide</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class=menu__link tabindex=0 href=/docs/integrations/localview><span title="📚 LocalView Integration Guide" class=linkLabel_WmDU>📚 LocalView Integration Guide</span></a><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-3 menu__list-item"><a class="menu__link menu__link--active" aria-current=page tabindex=0 href=/docs/integrations/overview><span title="Integration Guide: Reusing Open-Source Municipal Scraping Logic" class=linkLabel_WmDU>Integration Guide: Reusing Open-Source Municipal Scraping Logic</span></a></ul><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/deployment/databricks-apps><span title=Deployment class=categoryLinkLabel_W154>Deployment</span></a></div><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class=menu__list-item-collapsible><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role=button aria-expanded=false tabindex=0 href=/docs/development/database-setup><span title=Development class=categoryLinkLabel_W154>Development</span></a></div></ul></ul></nav></div></div></aside><main class=docMainContainer_TBSr><div class="container padding-top--md padding-bottom--lg"><div class=row><div class="col docItemCol_VOVn"><div class=docItemContainer_Djhp><article><nav class="theme-doc-breadcrumbs breadcrumbsContainer_Z_bl" aria-label=Breadcrumbs><ul class=breadcrumbs><li class=breadcrumbs__item><a aria-label="Home page" class=breadcrumbs__link href=/><svg viewBox="0 0 24 24" class=breadcrumbHomeIcon_YNFT><path d="M10 19v-5h4v5c0 .55.45 1 1 1h3c.55 0 1-.45 1-1v-7h1.7c.46 0 .68-.57.33-.87L12.67 3.6c-.38-.34-.96-.34-1.34 0l-8.36 7.53c-.34.3-.13.87.33.87H5v7c0 .55.45 1 1 1h3c.55 0 1-.45 1-1z" fill=currentColor /></svg></a><li class=breadcrumbs__item><a class=breadcrumbs__link href=/docs/for-developers><span>Developers & Technical Users</span></a><li class=breadcrumbs__item><span class=breadcrumbs__link>Integrations</span><li class="breadcrumbs__item breadcrumbs__item--active"><span class=breadcrumbs__link>Integration Guide: Reusing Open-Source Municipal Scraping Logic</span></ul></nav><div class="tocCollapsible_ETCw theme-doc-toc-mobile tocMobile_ITEo"><button type=button class="clean-btn tocCollapsibleButton_TO0P">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Integration Guide: Reusing Open-Source Municipal Scraping Logic</h1></header>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=overview>Overview<a href=#overview class=hash-link aria-label="Direct link to Overview" title="Direct link to Overview" translate=no></a></h2>
<p>This guide shows how to integrate proven patterns from established open-source projects into the Oral Health Policy Pulse scraping pipeline.</p>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=current-state>Current State<a href=#current-state class=hash-link aria-label="Direct link to Current State" title="Direct link to Current State" translate=no></a></h2>
<p>✅ <strong>You already have:</strong></p>
<ul>
<li class="">Census Gazetteer data with 85,302 jurisdictions (names + FIPS codes)</li>
<li class="">GSA .gov domain matching</li>
<li class="">76 discovered URLs ready for scraping</li>
<li class="">Legistar platform references in codebase</li>
<li class="">Base ScraperAgent class in <code>agents/scraper.py</code></li>
</ul>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=1-civic-scraper-integration>1. Civic Scraper Integration<a href=#1-civic-scraper-integration class=hash-link aria-label="Direct link to 1. Civic Scraper Integration" title="Direct link to 1. Civic Scraper Integration" translate=no></a></h2>
<p><strong>Repository:</strong> <code>biglocalnews/civic-scraper</code>
<strong>License:</strong> Apache 2.0 (✅ Compatible)</p>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=what-to-adopt>What to Adopt:<a href=#what-to-adopt class=hash-link aria-label="Direct link to What to Adopt:" title="Direct link to What to Adopt:" translate=no></a></h3>
<h4 class="anchor anchorTargetStickyNavbar_Vzrq" id=a-platform-detection-logic>A. Platform Detection Logic<a href=#a-platform-detection-logic class=hash-link aria-label="Direct link to A. Platform Detection Logic" title="Direct link to A. Platform Detection Logic" translate=no></a></h4>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># They have excellent platform detection</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Location: civic_scraper/platforms/__init__.py</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">PLATFORMS </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>{</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'legistar'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> LegistarScraper</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'granicus'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> GranicusScraper</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'calagenda'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> CalAgendaScraper</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'civicplus'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> CivicPlusScraper</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token punctuation" style=color:#393A34>}</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>def</span><span class="token plain"> </span><span class="token function" style=color:#d73a49>detect_platform</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">url</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>-</span><span class="token operator" style=color:#393A34>></span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""Auto-detect which platform a URL uses"""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>if</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'legistar.com'</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> url </span><span class="token keyword" style=color:#00009f>or</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'/Legistar/'</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> url</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>return</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'legistar'</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>elif</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'granicus.com'</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> url </span><span class="token keyword" style=color:#00009f>or</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'/Mediasite/'</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> url</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>return</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'granicus'</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># ... more patterns</span><br/></div></code></pre></div></div>
<p><strong>Your Action:</strong> Add <code>discovery/platform_detector.py</code> using their patterns</p>
<h4 class="anchor anchorTargetStickyNavbar_Vzrq" id=b-document-downloader-with-retry-logic>B. Document Downloader with Retry Logic<a href=#b-document-downloader-with-retry-logic class=hash-link aria-label="Direct link to B. Document Downloader with Retry Logic" title="Direct link to B. Document Downloader with Retry Logic" translate=no></a></h4>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># civic_scraper/download.py has robust downloading</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Features:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># - Exponential backoff</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># - Content-type validation</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># - Duplicate detection via hash</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># - Progress tracking</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>async</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>def</span><span class="token plain"> </span><span class="token function" style=color:#d73a49>download_document</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">url</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> session</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> httpx</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">AsyncClient</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>-</span><span class="token operator" style=color:#393A34>></span><span class="token plain"> </span><span class="token builtin">bytes</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""Download with retries and validation"""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> attempt </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> </span><span class="token builtin">range</span><span class="token punctuation" style=color:#393A34>(</span><span class="token number" style=color:#36acaa>3</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>try</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> response </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>await</span><span class="token plain"> session</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">get</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">url</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> timeout</span><span class="token operator" style=color:#393A34>=</span><span class="token number" style=color:#36acaa>30.0</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> response</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">raise_for_status</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Validate it's actually a document</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> content_type </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> response</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">headers</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">get</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'content-type'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>''</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>if</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'pdf'</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> content_type </span><span class="token keyword" style=color:#00009f>or</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'html'</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> content_type</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>return</span><span class="token plain"> response</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">content</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>except</span><span class="token plain"> Exception </span><span class="token keyword" style=color:#00009f>as</span><span class="token plain"> e</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>if</span><span class="token plain"> attempt </span><span class="token operator" style=color:#393A34>==</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>2</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>raise</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>await</span><span class="token plain"> asyncio</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">sleep</span><span class="token punctuation" style=color:#393A34>(</span><span class="token number" style=color:#36acaa>2</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>**</span><span class="token plain"> attempt</span><span class="token punctuation" style=color:#393A34>)</span><br/></div></code></pre></div></div>
<p><strong>Your Action:</strong> Enhance <code>agents/scraper.py</code> with their retry patterns</p>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=2-city-scrapers-integration>2. City Scrapers Integration<a href=#2-city-scrapers-integration class=hash-link aria-label="Direct link to 2. City Scrapers Integration" title="Direct link to 2. City Scrapers Integration" translate=no></a></h2>
<p><strong>Repository:</strong> <code>city-scrapers/city-scrapers</code>
<strong>License:</strong> MIT (✅ Compatible)</p>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=what-to-adopt-1>What to Adopt:<a href=#what-to-adopt-1 class=hash-link aria-label="Direct link to What to Adopt:" title="Direct link to What to Adopt:" translate=no></a></h3>
<h4 class="anchor anchorTargetStickyNavbar_Vzrq" id=a-standardized-event-schema>A. Standardized Event Schema<a href=#a-standardized-event-schema class=hash-link aria-label="Direct link to A. Standardized Event Schema" title="Direct link to A. Standardized Event Schema" translate=no></a></h4>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># They normalize all meeting data to a common format</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># city_scrapers/core/models.py</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token decorator annotation punctuation" style=color:#393A34>@dataclass</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>class</span><span class="token plain"> </span><span class="token class-name">Event</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> title</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> description</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> classification</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># "Board", "Commission", "Council"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> start</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> datetime</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> end</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">datetime</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> all_day</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">bool</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> location</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Dict</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> Any</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> links</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> List</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">Dict</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># [{"title": "Agenda", "href": "..."}]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> source</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Classification types they use:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">CLASSIFICATIONS </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"Board"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"Commission"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"Committee"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"Council"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"Town Hall"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"Public Hearing"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token punctuation" style=color:#393A34>]</span><br/></div></code></pre></div></div>
<p><strong>Your Action:</strong> Create <code>models/meeting_event.py</code> with this schema for your Silver layer</p>
<h4 class="anchor anchorTargetStickyNavbar_Vzrq" id=b-scraper-testing-framework>B. Scraper Testing Framework<a href=#b-scraper-testing-framework class=hash-link aria-label="Direct link to B. Scraper Testing Framework" title="Direct link to B. Scraper Testing Framework" translate=no></a></h4>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># They have excellent test patterns</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># tests/test_scrapers.py</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>def</span><span class="token plain"> </span><span class="token function" style=color:#d73a49>test_scraper</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""Test with frozen HTML responses"""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> scraper </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> CityScraper</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Use saved HTML files to avoid live requests during testing</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>with</span><span class="token plain"> </span><span class="token builtin">open</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>'tests/fixtures/sample_calendar.html'</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>as</span><span class="token plain"> f</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> results </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> scraper</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">parse</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">f</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">read</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>assert</span><span class="token plain"> </span><span class="token builtin">len</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">results</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>></span><span class="token plain"> </span><span class="token number" style=color:#36acaa>0</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>assert</span><span class="token plain"> results</span><span class="token punctuation" style=color:#393A34>[</span><span class="token number" style=color:#36acaa>0</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">title</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>assert</span><span class="token plain"> results</span><span class="token punctuation" style=color:#393A34>[</span><span class="token number" style=color:#36acaa>0</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">source</span><br/></div></code></pre></div></div>
<p><strong>Your Action:</strong> Add <code>tests/fixtures/</code> directory with sample HTML from different platforms</p>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=3-council-data-project-cdp-integration>3. Council Data Project (CDP) Integration<a href=#3-council-data-project-cdp-integration class=hash-link aria-label="Direct link to 3. Council Data Project (CDP) Integration" title="Direct link to 3. Council Data Project (CDP) Integration" translate=no></a></h2>
<p><strong>Repository:</strong> <code>CouncilDataProject/cdp-scrapers</code>
<strong>License:</strong> MIT (✅ Compatible)</p>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=what-to-adopt-2>What to Adopt:<a href=#what-to-adopt-2 class=hash-link aria-label="Direct link to What to Adopt:" title="Direct link to What to Adopt:" translate=no></a></h3>
<h4 class="anchor anchorTargetStickyNavbar_Vzrq" id=a-generic-ingestion-pipeline>A. Generic Ingestion Pipeline<a href=#a-generic-ingestion-pipeline class=hash-link aria-label="Direct link to A. Generic Ingestion Pipeline" title="Direct link to A. Generic Ingestion Pipeline" translate=no></a></h4>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># CDP has a beautiful generic scraper pipeline</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># cdp_scrapers/scraper_utils.py</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>class</span><span class="token plain"> </span><span class="token class-name">IngestionModel</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""Standard format for ingested data"""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> sessions</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> List</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">Session</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Individual meetings</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token decorator annotation punctuation" style=color:#393A34>@dataclass</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>class</span><span class="token plain"> </span><span class="token class-name">Session</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> video_uri</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> session_datetime</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> datetime</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> session_index</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">int</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> caption_uri</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token decorator annotation punctuation" style=color:#393A34>@dataclass</span><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>class</span><span class="token plain"> </span><span class="token class-name">EventMinutesItem</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> name</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> minutes_item</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> MinutesItem</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>def</span><span class="token plain"> </span><span class="token function" style=color:#d73a49>reduced_list</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">items</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> List</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">Any</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> key_attr</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>-</span><span class="token operator" style=color:#393A34>></span><span class="token plain"> List</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">Any</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""Deduplicate items by a key attribute"""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> seen </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token builtin">set</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> result </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> item </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> items</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> key </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token builtin">getattr</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">item</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> key_attr</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>if</span><span class="token plain"> key </span><span class="token keyword" style=color:#00009f>not</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> seen</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> seen</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">add</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">key</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> result</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">append</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">item</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>return</span><span class="token plain"> result</span><br/></div></code></pre></div></div>
<p><strong>Your Action:</strong> Create <code>models/ingestion.py</code> based on their schemas</p>
<h4 class="anchor anchorTargetStickyNavbar_Vzrq" id=b-video-transcript-integration-future>B. Video Transcript Integration (Future)<a href=#b-video-transcript-integration-future class=hash-link aria-label="Direct link to B. Video Transcript Integration (Future)" title="Direct link to B. Video Transcript Integration (Future)" translate=no></a></h4>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># CDP processes meeting videos into searchable transcripts</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># This is advanced but incredibly valuable</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># They use:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># - AWS Transcribe / Google Speech-to-Text</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># - Sentence indexing with timestamps</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># - Speaker diarization (who said what)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># You could add this in Phase 2 after document scraping works</span><br/></div></code></pre></div></div>
<p><strong>Your Action:</strong> Document in <code>docs/ROADMAP.md</code> for future implementation</p>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=4-engagic-integration>4. Engagic Integration<a href=#4-engagic-integration class=hash-link aria-label="Direct link to 4. Engagic Integration" title="Direct link to 4. Engagic Integration" translate=no></a></h2>
<p><strong>Repository:</strong> <code>Engagic/engagic</code>
<strong>License:</strong> Check repo (likely AGPL)</p>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=what-to-adopt-3>What to Adopt:<a href=#what-to-adopt-3 class=hash-link aria-label="Direct link to What to Adopt:" title="Direct link to What to Adopt:" translate=no></a></h3>
<h4 class="anchor anchorTargetStickyNavbar_Vzrq" id=a-matter-tracking-across-meetings>A. "Matter" Tracking Across Meetings<a href=#a-matter-tracking-across-meetings class=hash-link aria-label='Direct link to A. "Matter" Tracking Across Meetings' title='Direct link to A. "Matter" Tracking Across Meetings' translate=no></a></h4>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># Engagic tracks individual legislative items across meetings</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># This is PERFECT for oral health policy tracking</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token decorator annotation punctuation" style=color:#393A34>@dataclass</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>class</span><span class="token plain"> </span><span class="token class-name">Matter</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> matter_id</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> matter_number</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># "Bill 2024-001"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> title</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token builtin">type</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># "Ordinance", "Resolution", "Motion"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> first_introduced</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> datetime</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> status</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># "Introduced", "Committee", "Passed", "Failed"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> votes</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> List</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">Vote</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> related_documents</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> List</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Track how a fluoridation ordinance evolves:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Meeting 1: Introduced (just mentioned in minutes)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Meeting 2: Committee review (document link added)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Meeting 3: Public hearing (comments recorded)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Meeting 4: Final vote (result captured)</span><br/></div></code></pre></div></div>
<p><strong>Your Action:</strong> Create <code>models/matter.py</code> for tracking policy evolution</p>
<h4 class="anchor anchorTargetStickyNavbar_Vzrq" id=b-llm-powered-document-parsing>B. LLM-Powered Document Parsing<a href=#b-llm-powered-document-parsing class=hash-link aria-label="Direct link to B. LLM-Powered Document Parsing" title="Direct link to B. LLM-Powered Document Parsing" translate=no></a></h4>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># Engagic uses LLMs to extract structure from "blob" PDFs</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># You already have OpenAI configured!</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>async</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>def</span><span class="token plain"> </span><span class="token function" style=color:#d73a49>extract_agenda_items</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">pdf_text</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>-</span><span class="token operator" style=color:#393A34>></span><span class="token plain"> List</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">AgendaItem</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""Use GPT to extract structured items from unstructured text"""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> prompt </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> Extract agenda items from this meeting minutes text.</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> For each item, identify:</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> - Item number</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> - Title</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> - Description </span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> - Any votes or decisions</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> - Keywords related to health, dental, fluoride, water, public health</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> </span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> Return JSON array.</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> """</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> response </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>await</span><span class="token plain"> openai_client</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">chat</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">completions</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">create</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> model</span><span class="token operator" style=color:#393A34>=</span><span class="token string" style=color:#e3116c>"gpt-4o-mini"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> messages</span><span class="token operator" style=color:#393A34>=</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>{</span><span class="token string" style=color:#e3116c>"role"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>"system"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>"content"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>"You extract structured data from government documents"</span><span class="token punctuation" style=color:#393A34>}</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>{</span><span class="token string" style=color:#e3116c>"role"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>"user"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>"content"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token string-interpolation string" style=color:#e3116c>f"</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation">prompt</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>\n\n</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation">pdf_text</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>"</span><span class="token punctuation" style=color:#393A34>}</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> response_format</span><span class="token operator" style=color:#393A34>=</span><span class="token punctuation" style=color:#393A34>{</span><span class="token string" style=color:#e3116c>"type"</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>"json_object"</span><span class="token punctuation" style=color:#393A34>}</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>return</span><span class="token plain"> json</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">loads</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">response</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">choices</span><span class="token punctuation" style=color:#393A34>[</span><span class="token number" style=color:#36acaa>0</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">message</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">content</span><span class="token punctuation" style=color:#393A34>)</span><br/></div></code></pre></div></div>
<p><strong>Your Action:</strong> Add <code>extraction/llm_parser.py</code> using your existing OpenAI setup</p>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=5-councilmatic-integration>5. Councilmatic Integration<a href=#5-councilmatic-integration class=hash-link aria-label="Direct link to 5. Councilmatic Integration" title="Direct link to 5. Councilmatic Integration" translate=no></a></h2>
<p><strong>Repository:</strong> <code>datamade/councilmatic-starter-template</code>
<strong>License:</strong> MIT (✅ Compatible)</p>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=what-to-adopt-4>What to Adopt:<a href=#what-to-adopt-4 class=hash-link aria-label="Direct link to What to Adopt:" title="Direct link to What to Adopt:" translate=no></a></h3>
<h4 class="anchor anchorTargetStickyNavbar_Vzrq" id=a-personorganization-tracking>A. Person/Organization Tracking<a href=#a-personorganization-tracking class=hash-link aria-label="Direct link to A. Person/Organization Tracking" title="Direct link to A. Person/Organization Tracking" translate=no></a></h4>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># Councilmatic tracks who voted on what</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Useful for understanding power dynamics around oral health policy</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token decorator annotation punctuation" style=color:#393A34>@dataclass</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>class</span><span class="token plain"> </span><span class="token class-name">Person</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> name</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> role</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># "Council Member", "Mayor", "Commissioner"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> district</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> party</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token decorator annotation punctuation" style=color:#393A34>@dataclass</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>class</span><span class="token plain"> </span><span class="token class-name">Vote</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> motion</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> option</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># "yes", "no", "abstain"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> person</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Person</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> date</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> datetime</span><br/></div></code></pre></div></div>
<p><strong>Your Action:</strong> Add to <code>models/governance.py</code></p>
<h4 class="anchor anchorTargetStickyNavbar_Vzrq" id=b-search-interface-patterns>B. Search Interface Patterns<a href=#b-search-interface-patterns class=hash-link aria-label="Direct link to B. Search Interface Patterns" title="Direct link to B. Search Interface Patterns" translate=no></a></h4>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token comment" style=color:#999988;font-style:italic># They have excellent search UX</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># filters.py shows what users want:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">SEARCH_FILTERS </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"date_range"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"topic"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># ["health", "water", "budget"]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"organization"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Which board/commission</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"document_type"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># ["agenda", "minutes", "transcript"]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>"status"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># ["pending", "passed", "failed"]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token comment" style=color:#999988;font-style:italic># Your FastAPI endpoints could mirror this</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token decorator annotation punctuation" style=color:#393A34>@app</span><span class="token decorator annotation punctuation" style=color:#393A34>.</span><span class="token decorator annotation punctuation" style=color:#393A34>get</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>"/api/search"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>async</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>def</span><span class="token plain"> </span><span class="token function" style=color:#d73a49>search_documents</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> query</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> topics</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> List</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> Query</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">default</span><span class="token operator" style=color:#393A34>=</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>"oral_health"</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>"fluoridation"</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> date_from</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">date</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>None</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> date_to</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">date</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>None</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> state</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>None</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""Search scraped documents with filters"""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Query your Delta Lake Gold layer</span><br/></div></code></pre></div></div>
<p><strong>Your Action:</strong> Add to <code>api/routes/search.py</code> (create if doesn't exist)</p>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=implementation-priorities>Implementation Priorities<a href=#implementation-priorities class=hash-link aria-label="Direct link to Implementation Priorities" title="Direct link to Implementation Priorities" translate=no></a></h2>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=phase-1-foundation-week-1>Phase 1: Foundation (Week 1)<a href=#phase-1-foundation-week-1 class=hash-link aria-label="Direct link to Phase 1: Foundation (Week 1)" title="Direct link to Phase 1: Foundation (Week 1)" translate=no></a></h3>
<ul class="contains-task-list containsTaskList_mC6p">
<li class=task-list-item><input type=checkbox disabled/> <strong>Platform Detection</strong> - Add <code>discovery/platform_detector.py</code> from Civic Scraper patterns</li>
<li class=task-list-item><input type=checkbox disabled/> <strong>Standardized Schema</strong> - Create <code>models/meeting_event.py</code> from City Scrapers</li>
<li class=task-list-item><input type=checkbox disabled/> <strong>Enhanced Downloader</strong> - Improve <code>agents/scraper.py</code> retry logic</li>
</ul>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=phase-2-scraping-week-2-3>Phase 2: Scraping (Week 2-3)<a href=#phase-2-scraping-week-2-3 class=hash-link aria-label="Direct link to Phase 2: Scraping (Week 2-3)" title="Direct link to Phase 2: Scraping (Week 2-3)" translate=no></a></h3>
<ul class="contains-task-list containsTaskList_mC6p">
<li class=task-list-item><input type=checkbox disabled/> <strong>Legistar Scraper</strong> - Implement full Legistar support using Civic Scraper patterns</li>
<li class=task-list-item><input type=checkbox disabled/> <strong>Generic HTML Parser</strong> - Use BeautifulSoup patterns from City Scrapers</li>
<li class=task-list-item><input type=checkbox disabled/> <strong>PDF Extraction</strong> - Add PyPDF2/pdfplumber support</li>
</ul>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=phase-3-intelligence-week-4>Phase 3: Intelligence (Week 4)<a href=#phase-3-intelligence-week-4 class=hash-link aria-label="Direct link to Phase 3: Intelligence (Week 4)" title="Direct link to Phase 3: Intelligence (Week 4)" translate=no></a></h3>
<ul class="contains-task-list containsTaskList_mC6p">
<li class=task-list-item><input type=checkbox disabled/> <strong>LLM Parser</strong> - Add <code>extraction/llm_parser.py</code> from Engagic patterns</li>
<li class=task-list-item><input type=checkbox disabled/> <strong>Matter Tracking</strong> - Create <code>models/matter.py</code> for policy evolution</li>
<li class=task-list-item><input type=checkbox disabled/> <strong>Keyword Detection</strong> - Oral health, fluoridation, dental policy detection</li>
</ul>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=phase-4-scale-week-5>Phase 4: Scale (Week 5+)<a href=#phase-4-scale-week-5 class=hash-link aria-label="Direct link to Phase 4: Scale (Week 5+)" title="Direct link to Phase 4: Scale (Week 5+)" translate=no></a></h3>
<ul class="contains-task-list containsTaskList_mC6p">
<li class=task-list-item><input type=checkbox disabled/> <strong>Test All 76 URLs</strong> - Run full scraper on discovered targets</li>
<li class=task-list-item><input type=checkbox disabled/> <strong>Expand to All Municipalities</strong> - Process all 32,333 jurisdictions</li>
<li class=task-list-item><input type=checkbox disabled/> <strong>Video Transcripts</strong> - CDP-style video processing (future)</li>
</ul>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=code-snippets-to-add-now>Code Snippets to Add Now<a href=#code-snippets-to-add-now class=hash-link aria-label="Direct link to Code Snippets to Add Now" title="Direct link to Code Snippets to Add Now" translate=no></a></h2>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=1-platform-detector>1. Platform Detector<a href=#1-platform-detector class=hash-link aria-label="Direct link to 1. Platform Detector" title="Direct link to 1. Platform Detector" translate=no></a></h3>
<p><strong>File:</strong> <code>discovery/platform_detector.py</code></p>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c>"""</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c>Platform detection for municipal websites.</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c>Based on patterns from biglocalnews/civic-scraper.</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c>"""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> typing </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> Optional</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> urllib</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">parse </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> urlparse</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain">PLATFORM_PATTERNS </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>{</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'legistar'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'legistar.com'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'/Legistar/'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'/LegislationDetail.aspx'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'/Calendar.aspx'</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'granicus'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'granicus.com'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'/Mediasite/'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'/ViewPublisher.php'</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'municode'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'municode.com'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'/meeting_minutes'</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'civicplus'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'civicplus.com'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'/AgendaCenter/'</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'/DocumentCenter/'</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token punctuation" style=color:#393A34>}</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>def</span><span class="token plain"> </span><span class="token function" style=color:#d73a49>detect_platform</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">url</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>-</span><span class="token operator" style=color:#393A34>></span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> Detect which platform a municipality website uses.</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> </span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> Args:</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> url: Municipality website URL</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> </span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> Returns:</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> Platform name or None if unknown</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> """</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> url_lower </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> url</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">lower</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> platform</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> patterns </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> PLATFORM_PATTERNS</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">items</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>if</span><span class="token plain"> </span><span class="token builtin">any</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">pattern</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">lower</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> url_lower </span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> pattern </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> patterns</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>return</span><span class="token plain"> platform</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>return</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>None</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>def</span><span class="token plain"> </span><span class="token function" style=color:#d73a49>get_scraper_class</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">platform</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""Get appropriate scraper class for platform"""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> scrapers</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">legistar </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> LegistarScraper</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> scrapers</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">granicus </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> GranicusScraper</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> scrapers</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">generic </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> GenericScraper</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> scrapers </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>{</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'legistar'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> LegistarScraper</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'granicus'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> GranicusScraper</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>}</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>return</span><span class="token plain"> scrapers</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">get</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">platform</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> GenericScraper</span><span class="token punctuation" style=color:#393A34>)</span><br/></div></code></pre></div></div>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=2-meeting-event-model>2. Meeting Event Model<a href=#2-meeting-event-model class=hash-link aria-label="Direct link to 2. Meeting Event Model" title="Direct link to 2. Meeting Event Model" translate=no></a></h3>
<p><strong>File:</strong> <code>models/meeting_event.py</code></p>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c>"""</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c>Standardized meeting event model.</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c>Based on City Scrapers schema.</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c>"""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> dataclasses </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> dataclass</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> field</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> datetime </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> datetime</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> typing </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> List</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> Dict</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> Any</span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token decorator annotation punctuation" style=color:#393A34>@dataclass</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>class</span><span class="token plain"> </span><span class="token class-name">Location</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> name</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> address</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>None</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> city</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>None</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> state</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>None</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token decorator annotation punctuation" style=color:#393A34>@dataclass</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>class</span><span class="token plain"> </span><span class="token class-name">Link</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> title</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># "Agenda", "Minutes", "Video"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> href</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> content_type</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>None</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># "application/pdf", "text/html"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain" style=display:inline-block></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token decorator annotation punctuation" style=color:#393A34>@dataclass</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"></span><span class="token keyword" style=color:#00009f>class</span><span class="token plain"> </span><span class="token class-name">MeetingEvent</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> Normalized representation of a government meeting.</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> Compatible with City Scrapers format.</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> """</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Core identification</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token builtin">id</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Hash of source_url + start_time</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> title</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> description</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> classification</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># "Board", "Commission", "Council", "Committee"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Temporal</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> start</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> datetime</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> end</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">datetime</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>None</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> all_day</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">bool</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>False</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Spatial</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> location</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Location</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Content</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> links</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> List</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">Link</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> field</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">default_factory</span><span class="token operator" style=color:#393A34>=</span><span class="token builtin">list</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> source</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>""</span><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Original URL</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Metadata</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> jurisdiction_name</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> state_code</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">str</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> fips_code</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> Optional</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>None</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> scraped_at</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> datetime </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> field</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">default_factory</span><span class="token operator" style=color:#393A34>=</span><span class="token plain">datetime</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">utcnow</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Health policy relevance (your special sauce!)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> oral_health_relevant</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">bool</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>False</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> keywords_found</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> List</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> field</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">default_factory</span><span class="token operator" style=color:#393A34>=</span><span class="token builtin">list</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> confidence_score</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token builtin">float</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token number" style=color:#36acaa>0.0</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>def</span><span class="token plain"> </span><span class="token function" style=color:#d73a49>to_dict</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">self</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>-</span><span class="token operator" style=color:#393A34>></span><span class="token plain"> Dict</span><span class="token punctuation" style=color:#393A34>[</span><span class="token builtin">str</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> Any</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""Convert to dictionary for Delta Lake storage"""</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>return</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>{</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'id'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token builtin">id</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'title'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">title</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'description'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">description</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'classification'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">classification</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'start'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">start</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">isoformat</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'end'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">end</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">isoformat</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>if</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">end </span><span class="token keyword" style=color:#00009f>else</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>None</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'all_day'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">all_day</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'location_name'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">location</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">name</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'location_address'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">location</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">address</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'links'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token punctuation" style=color:#393A34>{</span><span class="token string" style=color:#e3116c>'title'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> l</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">title</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'href'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> l</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">href</span><span class="token punctuation" style=color:#393A34>}</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> l </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">links</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'source'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">source</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'jurisdiction_name'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">jurisdiction_name</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'state_code'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">state_code</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'fips_code'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">fips_code</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'scraped_at'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">scraped_at</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">isoformat</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'oral_health_relevant'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">oral_health_relevant</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'keywords_found'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">keywords_found</span><span class="token punctuation" style=color:#393A34>,</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token string" style=color:#e3116c>'confidence_score'</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">confidence_score</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>}</span><br/></div></code></pre></div></div>
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id=3-enhanced-discovery-pipeline>3. Enhanced Discovery Pipeline<a href=#3-enhanced-discovery-pipeline class=hash-link aria-label="Direct link to 3. Enhanced Discovery Pipeline" title="Direct link to 3. Enhanced Discovery Pipeline" translate=no></a></h3>
<p><strong>Add to:</strong> <code>discovery/discovery_pipeline.py</code></p>
<div class="language-python codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-python codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>async</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>def</span><span class="token plain"> </span><span class="token function" style=color:#d73a49>discover_platform_capabilities</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">self</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token triple-quoted-string string" style=color:#e3116c>"""</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> For each discovered URL, detect which platform it uses.</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> This prepares optimal scraping strategies.</span><br/></div><div class=token-line style=color:#393A34><span class="token triple-quoted-string string" style=color:#e3116c> """</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> discovery</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">platform_detector </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> detect_platform</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> logger</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">info</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>"Detecting platforms for discovered URLs..."</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> silver_path </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token string-interpolation string" style=color:#e3116c>f"</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation">settings</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>.</span><span class="token string-interpolation interpolation">delta_lake_path</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c>/silver/discovered_urls"</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> urls_df </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">spark</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">read</span><span class="token punctuation" style=color:#393A34>.</span><span class="token builtin">format</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>"delta"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">load</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">silver_path</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> enriched_urls </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> </span><span class="token punctuation" style=color:#393A34>[</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> row </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> urls_df</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">take</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">urls_df</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">count</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>:</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> row_dict </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> row</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">asDict</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> url </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> row_dict</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'url'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Detect platform</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> platform </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> detect_platform</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">url</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> row_dict</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'platform'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> platform </span><span class="token keyword" style=color:#00009f>if</span><span class="token plain"> platform </span><span class="token keyword" style=color:#00009f>else</span><span class="token plain"> </span><span class="token string" style=color:#e3116c>'generic'</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> row_dict</span><span class="token punctuation" style=color:#393A34>[</span><span class="token string" style=color:#e3116c>'scraper_ready'</span><span class="token punctuation" style=color:#393A34>]</span><span class="token plain"> </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> platform </span><span class="token keyword" style=color:#00009f>is</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>not</span><span class="token plain"> </span><span class="token boolean" style=color:#36acaa>None</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> enriched_urls</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">append</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">row_dict</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token comment" style=color:#999988;font-style:italic># Write back to Silver layer with platform info</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>from</span><span class="token plain"> pyspark</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">sql </span><span class="token keyword" style=color:#00009f>import</span><span class="token plain"> Row</span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> enriched_df </span><span class="token operator" style=color:#393A34>=</span><span class="token plain"> self</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">spark</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">createDataFrame</span><span class="token punctuation" style=color:#393A34>(</span><span class="token punctuation" style=color:#393A34>[</span><span class="token plain">Row</span><span class="token punctuation" style=color:#393A34>(</span><span class="token operator" style=color:#393A34>**</span><span class="token plain">u</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"> </span><span class="token keyword" style=color:#00009f>for</span><span class="token plain"> u </span><span class="token keyword" style=color:#00009f>in</span><span class="token plain"> enriched_urls</span><span class="token punctuation" style=color:#393A34>]</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> enriched_df</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">write</span><span class="token punctuation" style=color:#393A34>.</span><span class="token builtin">format</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>"delta"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">mode</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string" style=color:#e3116c>"overwrite"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">save</span><span class="token punctuation" style=color:#393A34>(</span><span class="token plain">silver_path</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> logger</span><span class="token punctuation" style=color:#393A34>.</span><span class="token plain">success</span><span class="token punctuation" style=color:#393A34>(</span><span class="token string-interpolation string" style=color:#e3116c>f"Platform detection complete - </span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>{</span><span class="token string-interpolation interpolation builtin">len</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>(</span><span class="token string-interpolation interpolation">enriched_urls</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>)</span><span class="token string-interpolation interpolation punctuation" style=color:#393A34>}</span><span class="token string-interpolation string" style=color:#e3116c> URLs analyzed"</span><span class="token punctuation" style=color:#393A34>)</span><span class="token plain"></span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><br/></div><div class=token-line style=color:#393A34><span class="token plain"> </span><span class="token keyword" style=color:#00009f>return</span><span class="token plain"> enriched_urls</span><br/></div></code></pre></div></div>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=next-steps>Next Steps<a href=#next-steps class=hash-link aria-label="Direct link to Next Steps" title="Direct link to Next Steps" translate=no></a></h2>
<ol>
<li class=""><strong>Review Licenses</strong> - All mentioned projects use permissive licenses (MIT/Apache 2.0), but double-check</li>
<li class=""><strong>Clone Repos Locally</strong> - Study their code structure:<!-- -->
<div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style=--prism-color:#393A34;--prism-background-color:#f6f8fa><div class=codeBlockContent_QJqH><pre tabindex=0 class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style=color:#393A34;background-color:#f6f8fa><code class=codeBlockLines_e6Vv><div class=token-line style=color:#393A34><span class="token plain">cd /tmp</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">git clone https://github.com/biglocalnews/civic-scraper</span><br/></div><div class=token-line style=color:#393A34><span class="token plain">git clone https://github.com/city-scrapers/city-scrapers</span><br/></div></code></pre></div></div>
</li>
<li class=""><strong>Add Attribution</strong> - In your <code>README.md</code>, credit these projects</li>
<li class=""><strong>Start with Platform Detector</strong> - Implement <code>discovery/platform_detector.py</code> first</li>
<li class=""><strong>Test with Your 76 URLs</strong> - Run platform detection on your discovered URLs</li>
</ol>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=resources>Resources<a href=#resources class=hash-link aria-label="Direct link to Resources" title="Direct link to Resources" translate=no></a></h2>
<ul>
<li class=""><strong>Civic Scraper Docs</strong>: <a href=https://github.com/biglocalnews/civic-scraper/wiki target=_blank rel="noopener noreferrer" class="">https://github.com/biglocalnews/civic-scraper/wiki</a></li>
<li class=""><strong>City Scrapers Tutorial</strong>: <a href=https://cityscrapers.org/docs/development/ target=_blank rel="noopener noreferrer" class="">https://cityscrapers.org/docs/development/</a></li>
<li class=""><strong>CDP Architecture</strong>: <a href=https://councildataproject.org/ target=_blank rel="noopener noreferrer" class="">https://councildataproject.org/</a></li>
<li class=""><strong>Legistar API Docs</strong>: <a href=https://webapi.legistar.com/Home/Examples target=_blank rel="noopener noreferrer" class="">https://webapi.legistar.com/Home/Examples</a></li>
</ul>
<hr/>
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id=questions-to-consider>Questions to Consider<a href=#questions-to-consider class=hash-link aria-label="Direct link to Questions to Consider" title="Direct link to Questions to Consider" translate=no></a></h2>
<ol>
<li class=""><strong>Do you want video transcript support?</strong> (CDP pattern, requires AWS/GCP credits)</li>
<li class=""><strong>How important is real-time tracking?</strong> (vs batch processing)</li>
<li class=""><strong>Will you expose a public API?</strong> (Councilmatic patterns useful here)</li>
<li class=""><strong>Need to track voting records?</strong> (Councilmatic person/vote models)</li>
</ol>
<p>Let me know which phase you want to implement first!</div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="row margin-top--sm theme-doc-footer-edit-meta-row"><div class="col noPrint_WFHX"><a href=https://github.com/getcommunityone/open-navigator-for-engagement/tree/main/website/docs/integrations/overview.md target=_blank rel="noopener noreferrer" class=theme-edit-this-page><svg fill=currentColor height=20 width=20 viewBox="0 0 40 40" class=iconEdit_Z9Sw aria-hidden=true><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"/></g></svg>Edit this page</a></div><div class="col lastUpdated_JAkA"></div></div></footer></article><nav class="docusaurus-mt-lg pagination-nav" aria-label="Docs pages"><a class="pagination-nav__link pagination-nav__link--prev" href=/docs/integrations/localview><div class=pagination-nav__sublabel>Previous</div><div class=pagination-nav__label>📚 LocalView Integration Guide</div></a><a class="pagination-nav__link pagination-nav__link--next" href=/docs/deployment/databricks-apps><div class=pagination-nav__sublabel>Next</div><div class=pagination-nav__label>Databricks Apps Deployment Guide</div></a></nav></div></div><div class="col col--3"><div class="tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href=#overview class="table-of-contents__link toc-highlight">Overview</a><li><a href=#current-state class="table-of-contents__link toc-highlight">Current State</a><li><a href=#1-civic-scraper-integration class="table-of-contents__link toc-highlight">1. Civic Scraper Integration</a><ul><li><a href=#what-to-adopt class="table-of-contents__link toc-highlight">What to Adopt:</a><ul><li><a href=#a-platform-detection-logic class="table-of-contents__link toc-highlight">A. Platform Detection Logic</a><li><a href=#b-document-downloader-with-retry-logic class="table-of-contents__link toc-highlight">B. Document Downloader with Retry Logic</a></ul></ul><li><a href=#2-city-scrapers-integration class="table-of-contents__link toc-highlight">2. City Scrapers Integration</a><ul><li><a href=#what-to-adopt-1 class="table-of-contents__link toc-highlight">What to Adopt:</a><ul><li><a href=#a-standardized-event-schema class="table-of-contents__link toc-highlight">A. Standardized Event Schema</a><li><a href=#b-scraper-testing-framework class="table-of-contents__link toc-highlight">B. Scraper Testing Framework</a></ul></ul><li><a href=#3-council-data-project-cdp-integration class="table-of-contents__link toc-highlight">3. Council Data Project (CDP) Integration</a><ul><li><a href=#what-to-adopt-2 class="table-of-contents__link toc-highlight">What to Adopt:</a><ul><li><a href=#a-generic-ingestion-pipeline class="table-of-contents__link toc-highlight">A. Generic Ingestion Pipeline</a><li><a href=#b-video-transcript-integration-future class="table-of-contents__link toc-highlight">B. Video Transcript Integration (Future)</a></ul></ul><li><a href=#4-engagic-integration class="table-of-contents__link toc-highlight">4. Engagic Integration</a><ul><li><a href=#what-to-adopt-3 class="table-of-contents__link toc-highlight">What to Adopt:</a><ul><li><a href=#a-matter-tracking-across-meetings class="table-of-contents__link toc-highlight">A. "Matter" Tracking Across Meetings</a><li><a href=#b-llm-powered-document-parsing class="table-of-contents__link toc-highlight">B. LLM-Powered Document Parsing</a></ul></ul><li><a href=#5-councilmatic-integration class="table-of-contents__link toc-highlight">5. Councilmatic Integration</a><ul><li><a href=#what-to-adopt-4 class="table-of-contents__link toc-highlight">What to Adopt:</a><ul><li><a href=#a-personorganization-tracking class="table-of-contents__link toc-highlight">A. Person/Organization Tracking</a><li><a href=#b-search-interface-patterns class="table-of-contents__link toc-highlight">B. Search Interface Patterns</a></ul></ul><li><a href=#implementation-priorities class="table-of-contents__link toc-highlight">Implementation Priorities</a><ul><li><a href=#phase-1-foundation-week-1 class="table-of-contents__link toc-highlight">Phase 1: Foundation (Week 1)</a><li><a href=#phase-2-scraping-week-2-3 class="table-of-contents__link toc-highlight">Phase 2: Scraping (Week 2-3)</a><li><a href=#phase-3-intelligence-week-4 class="table-of-contents__link toc-highlight">Phase 3: Intelligence (Week 4)</a><li><a href=#phase-4-scale-week-5 class="table-of-contents__link toc-highlight">Phase 4: Scale (Week 5+)</a></ul><li><a href=#code-snippets-to-add-now class="table-of-contents__link toc-highlight">Code Snippets to Add Now</a><ul><li><a href=#1-platform-detector class="table-of-contents__link toc-highlight">1. Platform Detector</a><li><a href=#2-meeting-event-model class="table-of-contents__link toc-highlight">2. Meeting Event Model</a><li><a href=#3-enhanced-discovery-pipeline class="table-of-contents__link toc-highlight">3. Enhanced Discovery Pipeline</a></ul><li><a href=#next-steps class="table-of-contents__link toc-highlight">Next Steps</a><li><a href=#resources class="table-of-contents__link toc-highlight">Resources</a><li><a href=#questions-to-consider class="table-of-contents__link toc-highlight">Questions to Consider</a></ul></div></div></div></div></main></div></div></div><footer class="theme-layout-footer footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="theme-layout-footer-column col footer__col"><div class=footer__title>Documentation</div><ul class="footer__items clean-list"><li class=footer__item><a class=footer__link-item href=/docs/intro>Getting Started</a><li class=footer__item><a class=footer__link-item href=/docs/data-sources/citations>Citations & Data Sources</a><li class=footer__item><a class=footer__link-item href=/docs/data-sources/overview>Data Sources</a><li class=footer__item><a class=footer__link-item href=/docs/for-developers>For Developers</a></ul></div><div class="theme-layout-footer-column col footer__col"><div class=footer__title>Resources</div><ul class="footer__items clean-list"><li class=footer__item><a href=https://www.communityone.com target=_blank rel="noopener noreferrer" class=footer__link-item>Launch Open Navigator<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://github.com/getcommunityone/open-navigator-for-engagement target=_blank rel="noopener noreferrer" class=footer__link-item>GitHub<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://www.groundvue.org/ target=_blank rel="noopener noreferrer" class=footer__link-item>GroundVue (Partner)<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a></ul></div><div class="theme-layout-footer-column col footer__col"><div class=footer__title>Community</div><ul class="footer__items clean-list"><li class=footer__item><a href=https://www.instagram.com/getcommunityone/ target=_blank rel="noopener noreferrer" class=footer__link-item>Instagram<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://www.facebook.com/getcommunityone target=_blank rel="noopener noreferrer" class=footer__link-item>Facebook<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://x.com/getcommunityone/ target=_blank rel="noopener noreferrer" class=footer__link-item>X (Twitter)<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://www.linkedin.com/company/getcommunityone target=_blank rel="noopener noreferrer" class=footer__link-item>LinkedIn<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://www.youtube.com/@getcommunityone target=_blank rel="noopener noreferrer" class=footer__link-item>YouTube<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a><li class=footer__item><a href=https://discord.gg/uH6Dytek target=_blank rel="noopener noreferrer" class=footer__link-item>Discord<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a></ul></div><div class="theme-layout-footer-column col footer__col"><div class=footer__title>Legal</div><ul class="footer__items clean-list"><li class=footer__item><a class=footer__link-item href=/docs/legal/privacy-policy>Privacy Policy</a><li class=footer__item><a class=footer__link-item href=/docs/legal/terms-of-service>Terms of Service</a><li class=footer__item><a class=footer__link-item href=/docs/legal/data-provider-terms>Data Provider Terms</a></ul></div><div class="theme-layout-footer-column col footer__col"><div class=footer__title>More</div><ul class="footer__items clean-list"><li class=footer__item><a class=footer__link-item href=/blog>Blog</a><li class=footer__item><a href=https://github.com/getcommunityone/open-navigator-for-engagement/blob/main/LICENSE target=_blank rel="noopener noreferrer" class=footer__link-item>License (MIT)<svg width=13.5 height=13.5 aria-label="(opens in new tab)" class=iconExternalLink_nPIU><use href=#theme-svg-external-link /></svg></a></ul></div></div><div class="footer__bottom text--center"><div class=footer__copyright>Copyright © 2026 Community One. Built with Docusaurus.</div></div></div></footer></div></body> |