Spaces:

cjber
/

planning-ai

Build error

App Files Files Community

cjber commited on Mar 7, 2025

Commit

2d44ef6

1 Parent(s): 5767260

fix: allow for specifying chapters

Browse files

Files changed (27) hide show

.streamlit/config.toml +0 -1
app.py +114 -45
data/covers/Overview_of_Public_Submissions.docx +3 -0
data/covers/{cover2 1.pdf → Overview_of_Public_Submissions.pdf} +0 -0
data/covers/Summaries_of_Public_Submissions.docx +3 -0
data/covers/{cover_summary_responses.pdf → Summaries_of_Public_Submissions.pdf} +0 -0
data/covers/reference.docx +3 -0
data/raw/chapters.txt +5 -0
data/raw/title.txt +1 -0
logo.png +0 -0
planning_ai/chains/fix_chain.py +7 -3
planning_ai/chains/map_chain.py +4 -70
planning_ai/chains/policy_chain.py +4 -6
planning_ai/chains/prompts/chapters.txt +26 -0
planning_ai/chains/prompts/map.txt +4 -11
planning_ai/chains/prompts/policy.txt +6 -8
planning_ai/chains/prompts/themes.txt +0 -40
planning_ai/chains/themes_chain.py +35 -18
planning_ai/chapters.py +0 -25
planning_ai/documents/document.py +62 -133
planning_ai/documents/themes.txt +3 -1
planning_ai/main.py +17 -25
planning_ai/nodes/hallucination_node.py +4 -6
planning_ai/nodes/map_node.py +18 -55
planning_ai/nodes/reduce_node.py +20 -60
reports/DOCS/_extensions/nrennie/PrettyPDF/logo.png +0 -0
uv.lock +37 -36

.streamlit/config.toml CHANGED Viewed

@@ -3,4 +3,3 @@ primaryColor="#0A3D91"
 backgroundColor="#f0f0f5"
 secondaryBackgroundColor="#e0e0ef"
 textColor="#262730"
-font="sans serif"

 backgroundColor="#f0f0f5"
 secondaryBackgroundColor="#e0e0ef"
 textColor="#262730"

app.py CHANGED Viewed

@@ -1,11 +1,14 @@
 import base64
 import time
 from os import getenv
 import polars as pl
 import py7zr
 import streamlit as st
 import streamlit_authenticator as stauth
 from streamlit_extras.stylable_container import stylable_container
 from planning_ai.common.utils import Paths
@@ -150,16 +153,10 @@ authenticator = stauth.Authenticate(
 UPLOAD_DIR = Paths.RAW / "gcpt3"
-def handle_authentication():
-    """Handle user authentication."""
-    try:
-        authenticator.login()
-    except Exception as e:
-        st.error(e)
 def initialize_session_state():
     """Initialize session state variables."""
     if "files_extracted" not in st.session_state:
         st.session_state["files_extracted"] = False
     if "completed" not in st.session_state:
@@ -170,6 +167,72 @@ def initialize_session_state():
         st.session_state["end_time"] = None
 def upload_and_extract_files():
     """Handle file upload and extraction."""
     main1, main2 = st.columns(2)
@@ -185,11 +248,7 @@ def upload_and_extract_files():
     2. **Executive Report** documents contain first an executive summary of the key points extracted from response documents, following this, a **Profile of Submissions** plots the demographic and geographic distribution of responses. Finally this document details **Themes and Policies**, where key themes and policies by response are highlighted, with notable information from responses bullet-pointed. This document contains inline citations, which relate back to the numbers associated with responses in the **Representation Summary Documents**. Citations are included to allow readers to manually verify the claims and points made by the AI model.
                 """
         )
-        st.write("---")
-        st.title("Select Document Type")
-        doc_type = st.selectbox(
-            "Select the type of document:", ["Themes & Policies", "SPT"]
-        )
     with main2:
         st.title("Upload JDi files")
         st.write(
@@ -243,10 +302,9 @@ def upload_and_extract_files():
                     )
                 except Exception as e:
                     st.error(f"Failed to extract files {e}")
-    return doc_type
-def build_report(doc_type):
     """Build the report from extracted files."""
     # Remove old files
     _ = [file.unlink() for file in (Paths.OUT / "summaries").rglob("*.pdf")]
@@ -277,7 +335,7 @@ def build_report(doc_type):
                 except Exception as e:
                     st.error(f"An error occurred during PDF text extraction: {e}")
             with st.spinner("Building report...", show_time=True):
-                report_main(doc_type=doc_type)
                 st.session_state["end_time"] = time.time()
                 st.session_state["completed"] = True
                 total_time = (
@@ -319,10 +377,10 @@ def display_download_buttons():
     with st.expander("**Executive Reports**"):
         for i, rep in enumerate(representations_documents):
             summaries_pdf_path = (
-                Paths.SUMMARY / f"Summary_of_Submitted_Representations-{rep}.pdf"
             )
             summaries_docx_path = (
-                Paths.SUMMARY / f"Summary_of_Submitted_Representations-{rep}.docx"
             )
             with st.container():
                 st.subheader(f"Executive Report for {rep}")
@@ -332,7 +390,7 @@ def display_download_buttons():
                         st.download_button(
                             label="Download PDF Version",
                             data=pdf_file,
-                            file_name=f"Summary_of_Submitted_Representations-{rep}.pdf",
                             mime="application/pdf",
                             use_container_width=True,
                             key=f"exec_pdf_{i}_{hash(rep)}",
@@ -343,7 +401,7 @@ def display_download_buttons():
                             st.download_button(
                                 label="Download DOCX Version",
                                 data=docx_file,
-                                file_name=f"Summary_of_Submitted_Representations-{rep}.docx",
                                 mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
                                 use_container_width=True,
                                 key=f"exec_docx_{i}_{hash(rep)}",
@@ -355,8 +413,12 @@ def display_download_buttons():
     # Create a container for the Representation Summaries
     with st.expander("**Representation Summaries**"):
         for i, rep in enumerate(representations_documents):
-            report_pdf_path = Paths.SUMMARY / f"Summary_Documents-{rep}.pdf"
-            report_docx_path = Paths.SUMMARY / f"Summary_Documents-{rep}.docx"
             with st.container():
                 st.subheader(f"Representation Summary for {rep}")
                 col1, col2 = st.columns(2)
@@ -365,7 +427,7 @@ def display_download_buttons():
                         st.download_button(
                             label="Download PDF Version",
                             data=pdf_file,
-                            file_name=f"Summary_Documents-{rep}.pdf",
                             mime="application/pdf",
                             use_container_width=True,
                             key=f"rep_pdf_{i}_{hash(rep)}",
@@ -376,7 +438,7 @@ def display_download_buttons():
                             st.download_button(
                                 label="Download DOCX Version",
                                 data=docx_file,
-                                file_name=f"Summary_Documents-{rep}.docx",
                                 mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
                                 use_container_width=True,
                                 key=f"rep_docx_{i}_{hash(rep)}",
@@ -387,13 +449,16 @@ def display_download_buttons():
 def reset_session():
-    st.session_state["completed"] = False
     st.session_state["files_extracted"] = False
 def main():
     """Main function to run the Streamlit app."""
-    handle_authentication()
     initialize_session_state()
     # Handle authentication states
@@ -407,28 +472,32 @@ def main():
         reset_session()
         return
-    # Authenticated user flow
-    with stylable_container(
-        key="Logout",
-        css_styles="""
-        button {
-            float: right;
-        }
-        """,
-    ):
-        authenticator.logout()  # show logout button
-    # Step 1: Upload and extract files
-    if not st.session_state["files_extracted"]:
-        doc_type = upload_and_extract_files()
-    # Step 2: Build report if files are ready
-    if st.session_state["files_extracted"]:
-        build_report(doc_type)
-    # Step 3: Show download buttons when complete
-    if st.session_state["completed"]:
-        display_download_buttons()
 if __name__ == "__main__":

 import base64
+import re
 import time
 from os import getenv
 import polars as pl
 import py7zr
+import requests
 import streamlit as st
 import streamlit_authenticator as stauth
+from bs4 import BeautifulSoup
 from streamlit_extras.stylable_container import stylable_container
 from planning_ai.common.utils import Paths
 UPLOAD_DIR = Paths.RAW / "gcpt3"
 def initialize_session_state():
     """Initialize session state variables."""
+    if "chapters" not in st.session_state:
+        st.session_state["chapters"] = False
     if "files_extracted" not in st.session_state:
         st.session_state["files_extracted"] = False
     if "completed" not in st.session_state:
         st.session_state["end_time"] = None
+def get_chapters(consultation_url: str):
+    if not consultation_url:
+        return "None", ["None"]
+    response = requests.get(consultation_url)
+    if not response.ok:
+        st.error("Failed to fetch consultation document")
+        return "", []
+    soup = BeautifulSoup(response.text, "html.parser")
+    h2_tags = soup.find_all("h2")
+    if not len(h2_tags) >= 2:
+        st.error("Invalid page format - not enough <h2> headers")
+        return "", []
+    first_h2 = h2_tags[0]
+    second_h2 = h2_tags[1]
+    # Collect links between the first and second <h2>
+    links_between = []
+    for sibling in first_h2.find_all_next():
+        if sibling == second_h2:  # Stop when reaching the second <h2>
+            break
+        if sibling.name == "a":  # If it's a link
+            link_text = sibling.text.strip()
+            if link_text:
+                links_between.append(link_text)
+    cleaned_links = [re.sub(r"\s*\(.*?\)$", "", link) for link in links_between]
+    cleaned_title = first_h2.text.strip()
+    return cleaned_title, cleaned_links
+def specify_chapters():
+    st.title("Specify Chapters")
+    st.write(
+        "Please specify the Consultation Document URL from the Consultation Hub. This will autopopulate the chapter headings for the final document. \n\n**Please ensure that the final chapter headings are correct.**"
+    )
+    chapters = []
+    consultation_url = st.text_input(
+        "Consultation Document URL",
+        key="consultation_url",
+        placeholder="https://oc2.greatercambridgeplanning.org/document/1314",
+    )
+    title, chapters = get_chapters(consultation_url)
+    st.write(f"**Title:** {title}")
+    st.write("**Chapters:**", "\n- " + "\n- ".join(chapters))
+    st.write(
+        "**If the chapter headings are incorrect, please add them manually below, separated by commas.**"
+    )
+    chapters = st.text_input(
+        "Chapter Headings",
+        key="chapter_headings",
+        placeholder=", ".join(chapters),
+        value=", ".join(chapters),
+    )
+    chapters = [chapter.strip() for chapter in chapters.split(",")]
+    with open(Paths.RAW / "chapters.txt", "w") as f:
+        f.write("\n".join(chapters))
+    with open(Paths.RAW / "title.txt", "w") as f:
+        f.write(title)
+    st.button(
+        "Save Chapters", on_click=lambda: st.session_state.update({"chapters": True})
+    )
 def upload_and_extract_files():
     """Handle file upload and extraction."""
     main1, main2 = st.columns(2)
     2. **Executive Report** documents contain first an executive summary of the key points extracted from response documents, following this, a **Profile of Submissions** plots the demographic and geographic distribution of responses. Finally this document details **Themes and Policies**, where key themes and policies by response are highlighted, with notable information from responses bullet-pointed. This document contains inline citations, which relate back to the numbers associated with responses in the **Representation Summary Documents**. Citations are included to allow readers to manually verify the claims and points made by the AI model.
                 """
         )
     with main2:
         st.title("Upload JDi files")
         st.write(
                     )
                 except Exception as e:
                     st.error(f"Failed to extract files {e}")
+def build_report():
     """Build the report from extracted files."""
     # Remove old files
     _ = [file.unlink() for file in (Paths.OUT / "summaries").rglob("*.pdf")]
                 except Exception as e:
                     st.error(f"An error occurred during PDF text extraction: {e}")
             with st.spinner("Building report...", show_time=True):
+                report_main()
                 st.session_state["end_time"] = time.time()
                 st.session_state["completed"] = True
                 total_time = (
     with st.expander("**Executive Reports**"):
         for i, rep in enumerate(representations_documents):
             summaries_pdf_path = (
+                Paths.SUMMARY / f"Overview_of_Public_Submissions-{rep}.pdf"
             )
             summaries_docx_path = (
+                Paths.SUMMARY / f"Overview_of_Public_Submissions-{rep}.docx"
             )
             with st.container():
                 st.subheader(f"Executive Report for {rep}")
                         st.download_button(
                             label="Download PDF Version",
                             data=pdf_file,
+                            file_name=f"Overview_of_Public_Submissions-{rep}.pdf",
                             mime="application/pdf",
                             use_container_width=True,
                             key=f"exec_pdf_{i}_{hash(rep)}",
                             st.download_button(
                                 label="Download DOCX Version",
                                 data=docx_file,
+                                file_name=f"Overview_of_Public_Submissions-{rep}.docx",
                                 mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
                                 use_container_width=True,
                                 key=f"exec_docx_{i}_{hash(rep)}",
     # Create a container for the Representation Summaries
     with st.expander("**Representation Summaries**"):
         for i, rep in enumerate(representations_documents):
+            report_pdf_path = (
+                Paths.SUMMARY / f"Summaries_of_Public_Submissions-{rep}.pdf"
+            )
+            report_docx_path = (
+                Paths.SUMMARY / f"Summaries_of_Public_Submissions-{rep}.docx"
+            )
             with st.container():
                 st.subheader(f"Representation Summary for {rep}")
                 col1, col2 = st.columns(2)
                         st.download_button(
                             label="Download PDF Version",
                             data=pdf_file,
+                            file_name=f"Summaries_of_Public_Submissions-{rep}.pdf",
                             mime="application/pdf",
                             use_container_width=True,
                             key=f"rep_pdf_{i}_{hash(rep)}",
                             st.download_button(
                                 label="Download DOCX Version",
                                 data=docx_file,
+                                file_name=f"Summaries_of_Public_Submissions-{rep}.docx",
                                 mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
                                 use_container_width=True,
                                 key=f"rep_docx_{i}_{hash(rep)}",
 def reset_session():
+    st.session_state["chapters"] = False
     st.session_state["files_extracted"] = False
+    st.session_state["completed"] = False
+    st.session_state["start_time"] = None
+    st.session_state["end_time"] = None
 def main():
     """Main function to run the Streamlit app."""
+    authenticator.login()
     initialize_session_state()
     # Handle authentication states
         reset_session()
         return
+    if st.session_state["authentication_status"]:
+        with stylable_container(
+            key="Logout",
+            css_styles="""
+            button {
+                float: right;
+            }
+            """,
+        ):
+            authenticator.logout()  # show logout button
+        # Step 1: Specify chapters
+        if not st.session_state["chapters"]:
+            specify_chapters()
+        # Step 2: Upload and extract files
+        if not st.session_state["files_extracted"] and st.session_state["chapters"]:
+            upload_and_extract_files()
+        # Step 3: Build report if files are ready
+        if st.session_state["files_extracted"]:
+            build_report()
+        # Step 4: Show download buttons when complete
+        if st.session_state["completed"]:
+            display_download_buttons()
 if __name__ == "__main__":

data/covers/Overview_of_Public_Submissions.docx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d25f4fe1da660ebcb4655a27b9a8bca9a1dab73962900f858309a683b8fbc58d
+size 1595802

data/covers/{cover2 1.pdf → Overview_of_Public_Submissions.pdf} RENAMED Viewed

Binary files a/data/covers/cover2 1.pdf and b/data/covers/Overview_of_Public_Submissions.pdf differ

data/covers/Summaries_of_Public_Submissions.docx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d6bc5191e10d13f15337f649b829c6f210bdda541bcde3ad4fe05d63f099a5b0
+size 1595689

data/covers/{cover_summary_responses.pdf → Summaries_of_Public_Submissions.pdf} RENAMED Viewed

Binary files a/data/covers/cover_summary_responses.pdf and b/data/covers/Summaries_of_Public_Submissions.pdf differ

data/covers/reference.docx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9b4999b22cc124005ceb7adfc0ade6977005841b9af2a5c3dea717ae6b3fafe0
+size 5057

data/raw/chapters.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+Chapter 1: Introduction and purpose
+Chapter 2: Ambitions for the Campus and development to date
+Chapter 3: Site context
+Chapter 4: Cambridge Biomedical Campus development principles
+Chapter 5: Obligations and mitigation

data/raw/title.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ Draft Cambridge Biomedical Campus Supplementary Planning Document

logo.png ADDED Viewed

planning_ai/chains/fix_chain.py CHANGED Viewed

@@ -1,9 +1,15 @@
-from planning_ai.chains.map_chain import create_dynamic_map_chain
 from planning_ai.common.utils import Paths
 with open(Paths.PROMPTS / "fix_hallucination.txt", "r") as f:
     fix_template = f.read()
 if __name__ == "__main__":
     test_document = """
     The Local Plan proposes a mass development north-west of Cambridge despite marked growth
@@ -12,7 +18,6 @@ if __name__ == "__main__":
     Papworth Everard has grown beyond recognition. This in itself is a matter of concern.
     """
     test_themes = {"Great Places", "Homes", "Climate Change"}
-    fix_chain = create_dynamic_map_chain(test_themes, fix_template)
     result = fix_chain.invoke(
         {
             "summary": "This plan is great because they are building a nuclear power plant.",
@@ -20,4 +25,3 @@ if __name__ == "__main__":
             "context": test_document,
         }
     )
-    __import__("pprint").pprint(dict(result))

+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate
 from planning_ai.common.utils import Paths
+from planning_ai.llms.llm import GPT4o
 with open(Paths.PROMPTS / "fix_hallucination.txt", "r") as f:
     fix_template = f.read()
+fix_prompt = ChatPromptTemplate.from_messages([("system", fix_template)])
+fix_chain = fix_prompt | GPT4o | StrOutputParser()
 if __name__ == "__main__":
     test_document = """
     The Local Plan proposes a mass development north-west of Cambridge despite marked growth
     Papworth Everard has grown beyond recognition. This in itself is a matter of concern.
     """
     test_themes = {"Great Places", "Homes", "Climate Change"}
     result = fix_chain.invoke(
         {
             "summary": "This plan is great because they are building a nuclear power plant.",
             "context": test_document,
         }
     )

planning_ai/chains/map_chain.py CHANGED Viewed

@@ -1,77 +1,14 @@
-from enum import Enum, auto
-from typing import Optional, Type
 from langchain_core.prompts import ChatPromptTemplate
-from pydantic import BaseModel, create_model
 from planning_ai.common.utils import Paths
 from planning_ai.llms.llm import GPT4o
-from planning_ai.themes import THEMES_AND_POLICIES
 with open(Paths.PROMPTS / "map.txt", "r") as f:
     map_template = f.read()
-def create_policy_enum(
-    policy_groups: list[str], name: str = "DynamicPolicyEnum"
-) -> Enum:
-    """
-    Create a dynamic enum for policies based on the given policy groups.
-    Args:
-        policy_groups (list[str]): A set of policy group names.
-        name (str): Name of the enum to be created.
-    Returns:
-        Type[Enum]: A dynamically created Enum class for the policies.
-    """
-    return Enum(name, {policy: auto() for policy in policy_groups})
-def create_brief_summary_model(policy_enum: Enum) -> Type[BaseModel]:
-    """
-    Dynamically create a BriefSummary model using the provided policy enum.
-    Args:
-        policy_enum (Type[Enum]): The dynamically created policy enum.
-    Returns:
-        Type[BaseModel]: A dynamically generated Pydantic model for BriefSummary.
-    """
-    class Policy(BaseModel):
-        policy: policy_enum
-        note: str
-    return create_model(
-        "DynamicBriefSummary",
-        summary=(str, ...),
-        policies=(Optional[list[Policy]], ...),
-        __module__=__name__,
-        __config__={"extra": "forbid"},
-    )
-def create_dynamic_map_chain(themes, prompt: str, doc_type: str):
-    policy_groups = []
-    for theme in themes:
-        if theme in THEMES_AND_POLICIES:
-            policy_groups.extend(THEMES_AND_POLICIES[theme])
-    PolicyEnum = create_policy_enum(policy_groups)
-    DynamicBriefSummary = create_brief_summary_model(PolicyEnum)
-    SLLM = GPT4o.with_structured_output(DynamicBriefSummary, strict=True)
-    prompt = (
-        f"{prompt}\n\nAvailable Policies:\n\n- "
-        + "\n- ".join(policy_groups)
-        + "\n\nContext:\n\n{context}"
-    )
-    map_prompt = ChatPromptTemplate.from_messages([("system", prompt)])
-    return map_prompt | SLLM
 if __name__ == "__main__":
     test_document = """
@@ -80,8 +17,5 @@ if __name__ == "__main__":
     the major settlement of Cambourne has been created - now over the projected 3,000 homes and
     Papworth Everard has grown beyond recognition. This in itself is a matter of concern.
     """
-    test_themes = {"Homes", "Great Places"}
-    dynamic_map_chain = create_dynamic_map_chain(test_themes, prompt=map_template)
-    result = dynamic_map_chain.invoke({"context": test_document, "themes": test_themes})
-    __import__("pprint").pprint(dict(result))

+from langchain_core.output_parsers import StrOutputParser
 from langchain_core.prompts import ChatPromptTemplate
 from planning_ai.common.utils import Paths
 from planning_ai.llms.llm import GPT4o
 with open(Paths.PROMPTS / "map.txt", "r") as f:
     map_template = f.read()
+map_prompt = ChatPromptTemplate.from_messages([("system", map_template)])
+map_chain = map_prompt | GPT4o | StrOutputParser()
 if __name__ == "__main__":
     test_document = """
     the major settlement of Cambourne has been created - now over the projected 3,000 homes and
     Papworth Everard has grown beyond recognition. This in itself is a matter of concern.
     """
+    result = map_chain.invoke({"context": test_document})

planning_ai/chains/policy_chain.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from typing import Optional
 from langchain_core.prompts import ChatPromptTemplate
-from pydantic import BaseModel
 from planning_ai.common.utils import Paths
 from planning_ai.llms.llm import GPT4o
@@ -13,8 +13,8 @@ with open(Paths.PROMPTS / "policy.txt", "r") as f:
 class Policy(BaseModel):
     """Return condensed details and their associated doc_ids"""
-    detail: str
-    doc_id: list[int]
 class PolicyList(BaseModel):
@@ -37,7 +37,5 @@ if __name__ == "__main__":
     ]
     test_docids = [1, 13, 21]
-    result = policy_chain.invoke(
-        {"theme": "Climate Change", "policy": test_policy, "details": test_bullet}
-    )
     print(result)

 from typing import Optional
 from langchain_core.prompts import ChatPromptTemplate
+from pydantic import BaseModel, Field
 from planning_ai.common.utils import Paths
 from planning_ai.llms.llm import GPT4o
 class Policy(BaseModel):
     """Return condensed details and their associated doc_ids"""
+    detail: str = Field(description="The policy detail")
+    doc_id: list[int] = Field(description="The associated doc_ids")
 class PolicyList(BaseModel):
     ]
     test_docids = [1, 13, 21]
+    result = policy_chain.invoke({"chapter": "Climate Change", "details": test_bullet})
     print(result)

planning_ai/chains/prompts/chapters.txt ADDED Viewed

	@@ -0,0 +1,26 @@

+Please identify any relevant topics that accurately relate to the provided representation. For each topic, provide a relevance score between **0 (not relevant)** and **5 (highly relevant)** based on how strongly the topic is present or connected in the document. You may select none, one, or multiple topics as applicable.
+---
+### **Representation Content:**
+{document}
+---
+### **Key Guidelines:**
+- **0 (Not Relevant)**: The topic is **not present** or does not apply to the representation.
+- **1-2 (Low Relevance)**: The topic is **mentioned briefly** but without substantial impact or significance to the representation's key messages.
+- **3 (Moderate Relevance)**: The topic is **discussed** with some importance, but it may not be a central focus.
+- **4 (High Relevance)**: The topic is **significantly discussed** and closely aligns with the main ideas or objectives of the representation.
+- **5 (Highly Relevant)**: The topic is **central** to the representation and its message, making it crucial for understanding the overall content.
+---
+### **Task:**
+1. **Topic identification**: Identify related topics in the document from those provided. Focus on capturing topics that are explicitly mentioned or strongly implied. Avoid inferring new topics beyond those stated. Select **only** relevant topics, do not include those that are only to the content indirectly.
+2. **Topic scores**: For each identified topic attribute a score denoting the relevance based on the guidelines provided. Ensure the score aligns with the relevance of the topic within the document.
+3. **Topic Notes**: For each identified topic, state information from the representation that **directly** relates to it. Ensure the **full** context is retained, so the section can be understood independently. Topic notes may overlap. If a note does not have a clear link to the topic, omit both the topic and the note.

planning_ai/chains/prompts/map.txt CHANGED Viewed

@@ -1,16 +1,9 @@
-You have been provided with a response to a policy document, known as a representation. The representation highlights key points with respect to the policy document and provides feedback. The policy document contains a set of policies some of which will be relevant to the representation Your task is to identify these policies and extract relevant sections from the representation that correspond to them.
-Your tasks are as follows:
-1. **Summary**: Provide a concise summary of the representation, highlighting the main points and any significant details. If the representation makes direct reference to the content of the policy document, please include a reference to the relevant section.
-2. **Policy Identification**: Carefully review the representation and identify all relevant policies from the provided list. Focus on capturing policies that are explicitly mentioned or strongly implied. Avoid inferring new policies beyond those stated. Select **only** relevant policies, do not include those that are only to the content indirectly.
-3. **Policy Notes**: For each identified policy, state information from the representation that **directly** relates to it. Ensure the **full** context is retained, so the section can be understood independently. Policy notes may overlap. If a note does not have a clear link to the policy, omit both the policy and the note.
-Your output must be formatted in valid JSON as specified. Ensure clarity and accuracy in your extraction process.
 **Always use British English**
-Select policies from the provided list using their exact names only:

+You have been provided with a response to a policy document, known as a representation. The representation highlights key points with respect to the policy document and provides feedback.
+Provide a concise summary of the representation, highlighting the main points and any significant details. If the representation makes direct reference to the content of the policy document, please include a reference to the relevant section. If the document does not provide any information, or refers only to a document that you do not have access to, state this, and do not make assumptions.
 **Always use British English**
+Response:
+{context}

planning_ai/chains/prompts/policy.txt CHANGED Viewed

@@ -1,13 +1,13 @@
-You are tasked with refining a list of details related to a specific planning policy and theme. Your goal is to:
 1. Extract and emphasise the core action or idea from each detail.
-2. Remove any non-essential context, such as the policy name or irrelevant details, along with their associated document IDs.
 3. Combine details that convey **identical** points into a single, concise point, merging their related document IDs.
-4. Exclude any details that do not pertain to the policy **and** theme provided.
-It is most important to ensure that all information contained within the final details are clearly related to their associated policy. The removal of too much contextual information may result in bullet points that do not clearly relate with the associated policy. Do not be afraid to omit details and citations that do not relate with the provided theme and policy. Do not attempt to find a tangible link, it is likely that there may be none.
-Ensure that all returned details use proper sentence structure. Only include document IDs within the 'doc_id' JSON attribute; **not** in the 'details' output.
 **Always use British English**
@@ -15,9 +15,7 @@ Ensure that all returned details use proper sentence structure. Only include doc
 **Provided information**
-Theme: {theme}
-Policy: {policy}
 Details:

+You are tasked with refining a list of details relating to a specific topic in a policy document. Your goal is to:
 1. Extract and emphasise the core action or idea from each detail.
+2. Remove any non-essential context, such as the topic name or irrelevant details, along with their associated document IDs.
 3. Combine details that convey **identical** points into a single, concise point, merging their related document IDs.
+4. Exclude any details that do not pertain to the chapter provided.
+It is most important to ensure that all information contained within the final details are clearly related to their associated topic. The removal of too much contextual information may result in bullet points that do not clearly relate with the associated policy. Do not be afraid to omit details and citations that do not relate with the provided topic. Do not attempt to find a tangible link, it is likely that there may be none.
+You must return the **details** and **doc_ids** separately. Do **not** include document IDs within the **details** text.
 **Always use British English**
 **Provided information**
+Topic: {chapter}
 Details:

planning_ai/chains/prompts/themes.txt DELETED Viewed

@@ -1,40 +0,0 @@
-Please identify any relevant themes from the list below that accurately relate to the document. For each theme, provide a relevance score between **0 (not relevant)** and **5 (highly relevant)** based on how strongly the theme is present or connected in the document. You may select none, one, or multiple themes as applicable.
----
-### **Available Themes:**
-**Climate change:** Help Cambridge transition to net zero carbon by 2050, by ensuring that development is sited in places that help to limit carbon emissions, is designed to the highest achievable standards for energy and water use, and is resilient to current and future climate risks.
-**Biodiversity and green spaces:** Increase and improve our network of habitats for wildlife, and green spaces for people, ensuring that development leaves the natural environment better than it was before.
-**Wellbeing and social inclusion:** Help people in Greater Cambridge to lead healthier and happier lives, ensuring that everyone benefits from the development of new homes and jobs.
-**Great places:** Sustain the unique character of Cambridge and South Cambridgeshire, and complement it with beautiful and distinctive development, creating a place where people want to live, work and play.
-**Jobs:** Encourage a flourishing and mixed economy in Greater Cambridge which includes a wide range of jobs, while maintaining our area's global reputation for innovation.
-**Homes:** Plan for enough housing to meet our needs, including significant quantities of housing that is affordable to rent and buy, and different kinds of homes to suit our diverse communities.
-**Infrastructure:** Plan for transport, water, energy and digital networks; and health, education and cultural facilities; in the right places and built at the right times to serve our growing communities.
----
-### **Document Content:**
-{document}
----
-### **Key Guidelines:**
-- **0 (Not Relevant)**: The theme is **not present** or does not apply to the document.
-- **1-2 (Low Relevance)**: The theme is **mentioned briefly** but without substantial impact or significance to the document's key messages.
-- **3 (Moderate Relevance)**: The theme is **discussed** with some importance, but it may not be a central focus.
-- **4 (High Relevance)**: The theme is **significantly discussed** and closely aligns with the main ideas or objectives of the document.
-- **5 (Highly Relevant)**: The theme is **central** to the document and its message, making it crucial for understanding the overall content.
----
-### **Task:**
-For each theme, assess the relevance of the theme in the document and provide a score. This will allow us to better understand which themes are central to the document's content, enabling a more targeted and accurate summary.

planning_ai/chains/themes_chain.py CHANGED Viewed

@@ -7,34 +7,51 @@ from pydantic import BaseModel
 from planning_ai.common.utils import Paths
 from planning_ai.llms.llm import GPT4o
-class Theme(Enum):
-    climate_change = "Climate Change"
-    biodiversity = "Biodiversity and Green Spaces"
-    wellbeing = "Wellbeing and Social Inclusion"
-    great_places = "Great Places"
-    jobs = "Jobs"
-    homes = "Homes"
-    infrastructure = "Infrastructure"
-class ThemeScore(BaseModel):
-    theme: Theme
     score: int
-class ThemeSelector(BaseModel):
-    themes: Optional[list[ThemeScore]]
-with open(Paths.PROMPTS / "themes.txt", "r") as f:
-    themes_template = f.read()
-themes_prompt = ChatPromptTemplate.from_messages([("system", themes_template)])
-SLLM = GPT4o.with_structured_output(ThemeSelector, strict=True)
-themes_chain = themes_prompt | SLLM
 if __name__ == "__main__":
@@ -45,5 +62,5 @@ if __name__ == "__main__":
     Papworth Everard has grown beyond recognition. This in itself is a matter of concern.
     """
-    result = themes_chain.invoke({"document": test_document})
     __import__("pprint").pprint(dict(result))

 from planning_ai.common.utils import Paths
 from planning_ai.llms.llm import GPT4o
+# Read the chapter lines from the file
+with open(Paths.RAW / "chapters.txt", "r") as f:
+    chapters = [line.strip() for line in f.readlines() if line.strip()]
+def create_dynamic_enum(chapters):
+    """
+    Dynamically create an Enum class from the provided chapters list.
+    The names of the enum members are derived from the chapter names.
+    """
+    # Prepare the enum name and value mappings
+    enum_members = {}
+    for chapter in chapters:
+        # Create valid Python identifier by replacing spaces with underscores and removing special characters
+        name = chapter.replace(" ", "_").replace("-", "_").replace("'", "")
+        name = "".join(c for c in name if c.isalnum() or c == "_")
+        # Assign each name and value
+        enum_members[name] = chapter
+    # Create the Enum class dynamically
+    return Enum("Chapter", enum_members)
+# Create the dynamic enum
+Chapter = create_dynamic_enum(chapters)
+class ChapterScore(BaseModel):
+    chapter: Chapter
     score: int
+    description: str
+class ChapterSelector(BaseModel):
+    chapters: Optional[list[ChapterScore]]
+with open(Paths.PROMPTS / "chapters.txt", "r") as f:
+    chapters_template = f.read()
+chapters_prompt = ChatPromptTemplate.from_messages([("system", chapters_template)])
+SLLM = GPT4o.with_structured_output(ChapterSelector, strict=True)
+chapters_chain = chapters_prompt | SLLM
 if __name__ == "__main__":
     Papworth Everard has grown beyond recognition. This in itself is a matter of concern.
     """
+    result = chapters_chain.invoke({"document": test_document})
     __import__("pprint").pprint(dict(result))

planning_ai/chapters.py DELETED Viewed

@@ -1,25 +0,0 @@
-[
-    "Introduction",
-    "Approach to Planning Obligations",
-    "How to use this Supplementary Planning Document",
-    "Affordable Housing",
-    "Green Infrastructure",
-    "Biodiversity",
-    "Community Facilities",
-    "Social and Community Support Services",
-    "Libraries and Lifelong Learning",
-    "Transport and Highways",
-    "Education",
-    "Public Art",
-    "Burial Space",
-    "Public Open Space",
-    "Indoor Sports, including Swimming",
-    "Public Realm",
-    "Waste and Recycling",
-    "Emergency Services",
-    "Planning Obligations to support local employment and skills",
-    "Planning Obligations to support affordable workspace",
-    "Public Rights of Way",
-    "Healthcare",
-    "Other Potential Development Specific Requirements",
-]

planning_ai/documents/document.py CHANGED Viewed

@@ -30,6 +30,7 @@ def _process_postcodes(final):
     """
     documents = final["documents"]
     postcodes = [doc["document"].metadata["respondentpostcode"] for doc in documents]
     postcodes = (
         pl.DataFrame({"postcode": postcodes})["postcode"]
         .value_counts()
@@ -42,13 +43,30 @@ def _process_postcodes(final):
     postcodes = postcodes.join(onspd, on="postcode", how="left")
     outside_pcs = postcodes.filter(pl.col("osward").is_null()).drop_nulls("postcode")
     pcs_url = "https://api.postcodes.io/postcodes"
-    outside_pcs = outside_pcs.with_columns(
-        pl.col("postcode")
-        .map_elements(
-            lambda x: requests.get(f"{pcs_url}/{x}").json()["result"]["admin_ward"],
-            return_dtype=pl.String,
-        )
-        .alias("osward")
     )
     return postcodes.drop_nulls(subset=["osward"]), outside_pcs
@@ -63,16 +81,16 @@ def _process_policies(final):
         tuple: A tuple containing strings of support, object, and other policies.
     """
-    def process_policy_group(policy_group, theme, stance):
         details = "".join(
-            f"\n### {row['policies']}\n\n"
             + "".join(
                 f"- {detail} {doc_id}\n"
                 for detail, doc_id in zip(row["detail"], row["doc_id"])
             )
-            for row in policy_group.rows(named=True)
         )
-        return f"## {theme} - {stance}\n\n{details}\n"
     policies_df = final["policies"]
@@ -80,16 +98,15 @@ def _process_policies(final):
     object_policies = ""
     other_policies = ""
-    for (theme, stance), policy in policies_df.group_by(
         ["themes", "stance"], maintain_order=True
     ):
         if stance == "Support":
-            support_policies += process_policy_group(policy, theme, stance)
         elif stance == "Object":
-            object_policies += process_policy_group(policy, theme, stance)
         else:
-            other_policies += process_policy_group(policy, theme, stance)
     return support_policies, object_policies, other_policies
@@ -130,9 +147,8 @@ def _process_themes(final):
         str: A markdown table of themes with their counts and percentages.
     """
     documents = final["documents"]
-    themes = Counter(
-        [theme["theme"].value for doc in documents for theme in doc["themes"]]
-    )
     themes = pl.DataFrame(themes).transpose(include_header=True)
     themes_breakdown = themes.with_columns(
         ((pl.col("column_0") / pl.sum("column_0")) * 100).round(2).alias("percentage")
@@ -255,7 +271,6 @@ def fig_oa(postcodes, rep):
 def fig_wards(postcodes, rep):
-    camb_lads = gpd.read_parquet(Paths.RAW / "camb_lads.parquet")
     ward_boundaries = gpd.read_parquet(Paths.RAW / "camb_wards.parquet")
     ward_pcs = postcodes.group_by("osward").sum()
     ward_boundaries_prop = ward_boundaries.merge(
@@ -264,7 +279,7 @@ def fig_wards(postcodes, rep):
     _, ax = plt.subplots(figsize=(8, 8))
     ward_boundaries.plot(ax=ax, color="none", edgecolor="black", linewidth=1.5)
-    camb_lads.plot(ax=ax, color="white", edgecolor="gray", linewidth=0.5)
     ward_boundaries_prop.plot(
         ax=ax,
         column="count",
@@ -272,7 +287,6 @@ def fig_wards(postcodes, rep):
         legend=True,
         legend_kwds={"label": "Number of Representations"},
     )
-    ward_boundaries.plot(ax=ax, color="none", edgecolor="grey", linewidth=0.5)
     cbar = ax.get_figure().axes[-1]  # Get the colorbar axis
     cbar.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f"{int(x)}"))
@@ -347,42 +361,6 @@ def build_final_report(out, rep):
         .unique("id")
         .collect()
     )
-    unused_documents = out["generate_final_report"]["unused_documents"]
-    unused_pdfs = (
-        pl.DataFrame(
-            [
-                doc["metadata"]
-                for doc in unused_documents
-                if "representations_id" not in doc["metadata"]
-            ]
-        )
-        .select(["id", "pdf_id", "page_label"])
-        .rename({"pdf_id": "representations_id"})
-        .with_columns(
-            pl.col("representations_id").cast(pl.Int64), pl.lit("PDF").alias("type")
-        )
-    )
-    unused_docs = (
-        pl.DataFrame(
-            [
-                doc["metadata"]
-                for doc in unused_documents
-                if "representations_id" in doc["metadata"]
-            ]
-        )
-        .select(["id", "representations_id"])
-        .with_columns(pl.lit("").alias("page_label"), pl.lit("Text").alias("type"))
-    )
-    unused_tbl = pl.concat([unused_pdfs, unused_docs]).rename(
-        {
-            "id": "JDi ID",
-            "representations_id": "Representations ID",
-            "page_label": "Page Number",
-            "type": "Type",
-        }
-    )
-    unused_tbl = unused_tbl.to_pandas().to_markdown(index=False)
     support_policies, object_policies, other_policies = _process_policies(final)
     postcodes, outside_pcs = _process_postcodes(final)
@@ -396,13 +374,16 @@ def build_final_report(out, rep):
     outside_pcs = (
         outside_pcs.group_by("osward")
         .sum()[["osward", "count"]]
-        .rename({"osward": "Ward", "count": "Number of Representations"})
         .to_pandas()
         .to_markdown(index=False)
     )
     quarto_doc = (
-        f"---\ntitle: '**{rep}**'\n"
         r"""
 mainfont: Liberation Sans
 fontsize: 12pt
@@ -438,7 +419,7 @@ header-includes: |
         "\n# Executive Summary\n\n"
         f"{final['executive']}\n\n"
         f"There were a total of {len(responses):,} responses. Of these, representations left "
-        "comment, or indicated the following support and objection of the plan:\n\n"
         f"{stances}\n\n"
         "# Introduction\n\n"
         f"{introduction_paragraph}\n\n"
@@ -446,32 +427,25 @@ header-includes: |
         f"{figures_paragraph}\n\n"
         f"![Total number of representations submitted by Ward within Greater Cambridgeshire\\label{{fig-wards}}](./data/out/summary/figs/wards-{rep}.pdf)\n\n"
         f": Postcodes outside the Greater Cambridge Ward areas {{#tbl:outside}}\n\n{outside_pcs}n\n"
-        f"![Proportional frequency of representations submitted by 2021 Output Area relative to the national average\\label{{fig-oas}}](./data/out/summary/figs/oas-{rep}.pdf)\n\n"
-        f"![Distribution of representations submitted by the of index of multiple deprivation (2019) relative to the national average\\label{{fig-imd}}](./data/out/summary/figs/imd_decile-{rep}.pdf)\n\n"
         r"\newpage"
         "\n\n# Themes and Policies\n\n"
         f"{themes_paragraph}\n\n"
-        f": Breakdown of representation themes {{#tbl:themes}}\n\n{themes}\n\n"
         "## Supporting Representations\n\n"
-        "The following section presents a list of all points raised in representations that support the plan"
-        ", grouped by theme and policy.\n\n"
         f"{support_policies or '_No supporting representations._'}\n\n"
         "## Objecting Representations\n\n"
-        "The following section presents a list of all points raised in representations that object to "
-        "the plan, grouped by theme and policy.\n\n"
         f"{object_policies or '_No objecting representations._'}\n\n"
         "## Comment\n\n"
-        "The following section presents a list of all points raised in representations that do not support "
-        "or object to the plan, grouped by theme and policy.\n\n"
         f"{other_policies or '_No other representations._'}\n\n"
-        "## Unused Documents\n\n"
-        "For full transparency, this section details those documents that were excluded from this report on Table @tbl:unused."
-        "These documents are typically very short, and contain information that provides no relation to policies or themes.\n\n TODO: expand."
-        f": Unused representations {{#tbl:unused}}\n\n{unused_tbl}\n\n"
     )
-    out_path = Paths.SUMMARY / f"Summary_of_Submitted_Representations-{rep}.md"
-    out_file = Paths.SUMMARY / f"Summary_of_Submitted_Representations-{rep}"
     with open(out_path, "w") as f:
         f.write(quarto_doc)
     try:
@@ -486,7 +460,7 @@ header-includes: |
         subprocess.run(command, check=True, capture_output=True)
         command = [
             "pdftk",
-            "data/covers/cover2 1.pdf",
             f"{out_file}.pdf",
             "cat",
             "output",
@@ -507,7 +481,7 @@ header-includes: |
         subprocess.run(command, check=True, capture_output=True)
         command = [
             "pandoc",
-            "data/covers/Cover2 1.docx",
             f"{out_file}.docx",
             "-o",
             f"{out_file}.docx",
@@ -516,60 +490,22 @@ header-includes: |
         ]
         subprocess.run(command, check=True, capture_output=True)
     except subprocess.CalledProcessError as e:
-        logging.error(
-            f"Error during Summary_of_Submitted_Representations.md render: {e}"
-        )
 def build_summaries_document(out, rep):
     sub = r"Document ID: \[\d+\]\n\n"
     summary_intro = load_txt("planning_ai/documents/summary_intro.txt")
-    unused_documents = out["generate_final_report"]["unused_documents"]
-    unused_pdfs = (
-        pl.DataFrame(
-            [
-                doc["metadata"]
-                for doc in unused_documents
-                if "representations_id" not in doc["metadata"]
-            ]
-        )
-        .select(["id", "pdf_id", "page_label"])
-        .rename({"pdf_id": "representations_id"})
-        .with_columns(
-            pl.col("representations_id").cast(pl.Int64), pl.lit("PDF").alias("type")
-        )
-    )
-    unused_docs = (
-        pl.DataFrame(
-            [
-                doc["metadata"]
-                for doc in unused_documents
-                if "representations_id" in doc["metadata"]
-            ]
-        )
-        .select(["id", "representations_id"])
-        .with_columns(pl.lit("").alias("page_label"), pl.lit("Text").alias("type"))
-    )
-    unused_tbl = pl.concat([unused_pdfs, unused_docs]).rename(
-        {
-            "id": "JDi ID",
-            "representations_id": "Representations ID",
-            "page_label": "Page Number",
-            "type": "Type",
-        }
-    )
-    unused_tbl = unused_tbl.to_pandas().to_markdown(index=False)
     full_text = "".join(
-        f"**Document ID**: {document['doc_id']}\n\n"
         f"**Representations ID**: {document['document'].metadata['id']}\n\n"
-        f"**Representations Name**: {document['document'].metadata['representations_document']}\n\n"
-        f"\n\n{re.sub(sub, '', document['summary'].summary)}\n\n"
         "---\n\n"
         for document in out["generate_final_report"]["documents"]
     )
     header = (
-        f"---\ntitle: '**{rep}**'\n"
         r"""
 mainfont: Liberation Sans
 fontsize: 12pt
@@ -604,18 +540,11 @@ header-includes: |
 """
         f"\n{summary_intro}\n\n"
     )
-    unused_text = (
-        "\n\n## Unused Documents\n\n"
-        "For full transparency, this section details those documents that were excluded from this report on Table @tbl:unused.\n\n"
-        f": Unused representations {{#tbl:unused}}\n\n{unused_tbl}\n\n"
-        "These documents are typically very short, and contain information that provides no relation to policies or themes."
-    )
-    out_path = Paths.SUMMARY / f"Summary_Documents-{rep}.md"
-    out_file = Paths.SUMMARY / f"Summary_Documents-{rep}"
     with open(out_path, "w") as f:
-        f.write(f"{header}{full_text}{unused_text}")
     try:
         command = [
             "pandoc",
@@ -628,7 +557,7 @@ header-includes: |
         subprocess.run(command, check=True, capture_output=True)
         command = [
             "pdftk",
-            "data/covers/cover_summary_responses.pdf",
             f"{out_file}.pdf",
             "cat",
             "output",
@@ -649,7 +578,7 @@ header-includes: |
         subprocess.run(command, check=True, capture_output=True)
         command = [
             "pandoc",
-            "data/covers/Cover 1.docx",
             f"{out_file}.docx",
             "-o",
             f"{out_file}.docx",

     """
     documents = final["documents"]
     postcodes = [doc["document"].metadata["respondentpostcode"] for doc in documents]
+    missing = ["Missing" for pcd in postcodes if not pcd]
     postcodes = (
         pl.DataFrame({"postcode": postcodes})["postcode"]
         .value_counts()
     postcodes = postcodes.join(onspd, on="postcode", how="left")
     outside_pcs = postcodes.filter(pl.col("osward").is_null()).drop_nulls("postcode")
     pcs_url = "https://api.postcodes.io/postcodes"
+    def get_pcs(postcode):
+        response = requests.get(f"{pcs_url}/{postcode}")
+        if response.status_code == 200:
+            out = response.json()["result"]
+            admin_ward = out.get("admin_ward")
+            admin_district = out.get("admin_district")
+            return f"{admin_ward}, {admin_district}"
+        else:
+            return "Unknown"
+    outside_pcs = pl.concat(
+        [
+            outside_pcs.with_columns(
+                pl.col("postcode")
+                .map_elements(lambda x: get_pcs(x), return_dtype=pl.String)
+                .alias("osward")
+            )
+            .select(["postcode", "osward", "count"])
+            .with_columns(pl.col("count").cast(pl.Int32)),
+            pl.DataFrame(
+                {"postcode": missing, "osward": "Unknown", "count": len(missing)}
+            ).with_columns(pl.col("count").cast(pl.Int32)),
+        ],
     )
     return postcodes.drop_nulls(subset=["osward"]), outside_pcs
         tuple: A tuple containing strings of support, object, and other policies.
     """
+    def process_policy_group(policy_group):
         details = "".join(
+            f"\n### {row['themes']} - {row['stance']}\n\n"
             + "".join(
                 f"- {detail} {doc_id}\n"
                 for detail, doc_id in zip(row["detail"], row["doc_id"])
             )
+            for row in policy_group.rows(named=True)[:1]
         )
+        return details
     policies_df = final["policies"]
     object_policies = ""
     other_policies = ""
+    for (_, stance), policy in policies_df.group_by(
         ["themes", "stance"], maintain_order=True
     ):
         if stance == "Support":
+            support_policies += process_policy_group(policy)
         elif stance == "Object":
+            object_policies += process_policy_group(policy)
         else:
+            other_policies += process_policy_group(policy)
     return support_policies, object_policies, other_policies
         str: A markdown table of themes with their counts and percentages.
     """
     documents = final["documents"]
+    documents[0]["themes"]
+    themes = Counter([theme["chapter"] for doc in documents for theme in doc["themes"]])
     themes = pl.DataFrame(themes).transpose(include_header=True)
     themes_breakdown = themes.with_columns(
         ((pl.col("column_0") / pl.sum("column_0")) * 100).round(2).alias("percentage")
 def fig_wards(postcodes, rep):
     ward_boundaries = gpd.read_parquet(Paths.RAW / "camb_wards.parquet")
     ward_pcs = postcodes.group_by("osward").sum()
     ward_boundaries_prop = ward_boundaries.merge(
     _, ax = plt.subplots(figsize=(8, 8))
     ward_boundaries.plot(ax=ax, color="none", edgecolor="black", linewidth=1.5)
+    ward_boundaries.plot(ax=ax, color="white", edgecolor="grey", linewidth=0.5)
     ward_boundaries_prop.plot(
         ax=ax,
         column="count",
         legend=True,
         legend_kwds={"label": "Number of Representations"},
     )
     cbar = ax.get_figure().axes[-1]  # Get the colorbar axis
     cbar.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f"{int(x)}"))
         .unique("id")
         .collect()
     )
     support_policies, object_policies, other_policies = _process_policies(final)
     postcodes, outside_pcs = _process_postcodes(final)
     outside_pcs = (
         outside_pcs.group_by("osward")
         .sum()[["osward", "count"]]
+        .filter(pl.col("osward") != "Unknown")
+        .rename(
+            {"osward": "Ward, Local Authority", "count": "Number of Representations"}
+        )
         .to_pandas()
         .to_markdown(index=False)
     )
     quarto_doc = (
+        f"---\ntitle: '**Overview of Public Submissions: {rep}**'\n"
         r"""
 mainfont: Liberation Sans
 fontsize: 12pt
         "\n# Executive Summary\n\n"
         f"{final['executive']}\n\n"
         f"There were a total of {len(responses):,} responses. Of these, representations left "
+        "comment, or indicated the following support and objection of the consultation document:\n\n"
         f"{stances}\n\n"
         "# Introduction\n\n"
         f"{introduction_paragraph}\n\n"
         f"{figures_paragraph}\n\n"
         f"![Total number of representations submitted by Ward within Greater Cambridgeshire\\label{{fig-wards}}](./data/out/summary/figs/wards-{rep}.pdf)\n\n"
         f": Postcodes outside the Greater Cambridge Ward areas {{#tbl:outside}}\n\n{outside_pcs}n\n"
+        f"![The proportion of representations submitted by 2021 Output Area Classification^[Wyszomierski, J., Longley, P.A., Singleton, A.D., Gale, C. & O’Brien, O. (2024) A neighbourhood Output Area Classification from the 2021 and 2022 UK censuses. The Geographical Journal, 190, e12550. Available from: https://doi.org/10.1111/geoj.12550] relative to the national average\\label{{fig-oas}}](./data/out/summary/figs/oas-{rep}.pdf)\n\n"
+        f"![The proportion of representations submitted by the Index of Multiple Deprivation (2019) relative to the national average](./data/out/summary/figs/imd_decile-{rep}.pdf)\n\n"
         r"\newpage"
         "\n\n# Themes and Policies\n\n"
         f"{themes_paragraph}\n\n"
+        f": Breakdown of representations by section {{#tbl:themes}}\n\n{themes}\n\n"
         "## Supporting Representations\n\n"
+        "The following section presents a list of all points raised in representations that support the consultation document, grouped by sections."
         f"{support_policies or '_No supporting representations._'}\n\n"
         "## Objecting Representations\n\n"
+        "The following section presents a list of all points raised in representations that object to the consultation document, grouped by sections."
         f"{object_policies or '_No objecting representations._'}\n\n"
         "## Comment\n\n"
+        "The following section presents a list of all points raised in representations that do not support or object to the consultation document, grouped by sections."
         f"{other_policies or '_No other representations._'}\n\n"
     )
+    out_path = Paths.SUMMARY / f"Overview_of_Public_Submissions-{rep}.md"
+    out_file = Paths.SUMMARY / f"Overview_of_Public_Submissions-{rep}"
     with open(out_path, "w") as f:
         f.write(quarto_doc)
     try:
         subprocess.run(command, check=True, capture_output=True)
         command = [
             "pdftk",
+            "data/covers/Overview_of_Public_Submissions.pdf",
             f"{out_file}.pdf",
             "cat",
             "output",
         subprocess.run(command, check=True, capture_output=True)
         command = [
             "pandoc",
+            "data/covers/Overview_of_Public_Submissions.docx",
             f"{out_file}.docx",
             "-o",
             f"{out_file}.docx",
         ]
         subprocess.run(command, check=True, capture_output=True)
     except subprocess.CalledProcessError as e:
+        logging.error(f"Error during Overview_of_Public_Submissions render: {e}")
 def build_summaries_document(out, rep):
     sub = r"Document ID: \[\d+\]\n\n"
     summary_intro = load_txt("planning_ai/documents/summary_intro.txt")
     full_text = "".join(
+        f"**Part ID**: {document['doc_id']}\n\n"
         f"**Representations ID**: {document['document'].metadata['id']}\n\n"
+        f"\n\n{re.sub(sub, '', document['summary'])}\n\n"
         "---\n\n"
         for document in out["generate_final_report"]["documents"]
     )
     header = (
+        f"---\ntitle: '**Summaries of Public Submissions: {rep}**'\n"
         r"""
 mainfont: Liberation Sans
 fontsize: 12pt
 """
         f"\n{summary_intro}\n\n"
     )
+    out_path = Paths.SUMMARY / f"Summaries_of_Public_Submissions-{rep}.md"
+    out_file = Paths.SUMMARY / f"Summaries_of_Public_Submissions-{rep}"
     with open(out_path, "w") as f:
+        f.write(f"{header}{full_text}")
     try:
         command = [
             "pandoc",
         subprocess.run(command, check=True, capture_output=True)
         command = [
             "pdftk",
+            "data/covers/Summaries_of_Public_Submissions.pdf",
             f"{out_file}.pdf",
             "cat",
             "output",
         subprocess.run(command, check=True, capture_output=True)
         command = [
             "pandoc",
+            "data/covers/Summaries_of_Public_Submissions.docx",
             f"{out_file}.docx",
             "-o",
             f"{out_file}.docx",

planning_ai/documents/themes.txt CHANGED Viewed

	@@ -1 +1,3 @@
1	- The following section provides a detailed breakdown of notable details from the representations, grouped by the Themes and Policies set out in the Greater Cambridgeshire Local Plan. Both the Themes and associated Policies are automatically determined through an analysis of the summary content by an LLM agent. Each Theme is organised according to whether representations were specified as supportive, opposed, or provide a general comment. This section offers a comprehensive overview of those key issues raised by members of the public or organisations with respect to these Themes and Policies. We have incorporated citations into each point (see numbers in square brackets) to indicate the specific document^[Each representation made can have a number of documents associated with them. Each document ID is unique, and can be referenced in the Summary Responses report.] where each representation was made, thereby promoting transparency of sources. Finally, @tbl-themes provides a breakdown of the number of submissions that relate to each Theme (noting that submissions may be associated with more than one Theme).


1	+ The following section provides a detailed breakdown of notable details from the representations, grouped by the Themes and Policies set out in the Greater Cambridgeshire Local Plan. Both the Themes and associated Policies are automatically determined through an analysis of the summary content by an LLM agent. Each Theme is organised according to whether representations were specified as supportive, opposed, or provide a general comment.
2	+
3	+ This section offers a comprehensive overview of those key issues raised by members of the public or organisations with respect to sections of the consultation document. We have incorporated citations into each point (see numbers in square brackets) to indicate the specific part^[Each representation can be a text entry and optionally a further attached piece of evidence. Attachments are split into pages, and along with the text entry are referred to here as a "part". Each representation can therefore comprise multiple parts, which are given a Part ID. These can be cross referenced to the Summaries of Public Submissions report.] of the representation where points were made, thereby promoting transparency of sources. Finally, @tbl:themes provides a breakdown of the number of submissions that relate to each Theme (noting that submissions may be associated with more than one Theme).

planning_ai/main.py CHANGED Viewed

@@ -13,7 +13,7 @@ from planning_ai.graph import create_graph
 from planning_ai.logging import logger
-def read_docs(representations_document: str, doc_type: str):
     logger.warning("Reading documents...")
     df = (
         pl.scan_parquet(Paths.STAGING / "gcpt3.parquet")
@@ -83,36 +83,28 @@ def read_docs(representations_document: str, doc_type: str):
             if doc.page_content and len(doc.page_content.split(" ")) > 25
         }.values()
     )
-    return [
-        {"document": doc, "filename": doc.metadata["filename"], "doc_type": doc_type}
-        for doc in docs
-    ]
-def main(doc_type: str = "Themes & Policies"):
-    representations_documents = (
-        pl.scan_parquet(Paths.STAGING / "gcpt3.parquet")
-        .select(pl.col("representations_document"))
-        .unique()
-        .collect()["representations_document"]
-        .to_list()
-    )
-    for rep in representations_documents:
-        docs = read_docs(rep, doc_type)
-        n_docs = len(docs)
-        logger.info(f"{n_docs} documents being processed!")
-        app = create_graph()
-        step = None
-        for step in app.stream({"documents": docs, "n_docs": n_docs}):
-            print(step.keys())
-        if step is None:
-            raise ValueError("No steps were processed!")
-        build_final_report(step, rep)
-        build_summaries_document(step, rep)
 if __name__ == "__main__":

 from planning_ai.logging import logger
+def read_docs(representations_document: str):
     logger.warning("Reading documents...")
     df = (
         pl.scan_parquet(Paths.STAGING / "gcpt3.parquet")
             if doc.page_content and len(doc.page_content.split(" ")) > 25
         }.values()
     )
+    return [{"document": doc, "filename": doc.metadata["filename"]} for doc in docs]
+def main():
+    with open(Paths.RAW / "title.txt", "r") as f:
+        rep = f.read().strip()
+    docs = read_docs(rep)
+    n_docs = len(docs)
+    logger.info(f"{n_docs} documents being processed!")
+    app = create_graph()
+    step = None
+    for step in app.stream({"documents": docs, "n_docs": n_docs}):
+        print(step.keys())
+    if step is None:
+        raise ValueError("No steps were processed!")
+    build_final_report(step, rep)
+    build_summaries_document(step, rep)
 if __name__ == "__main__":

planning_ai/nodes/hallucination_node.py CHANGED Viewed

@@ -1,8 +1,7 @@
 from langgraph.types import Send
-from planning_ai.chains.fix_chain import fix_template
 from planning_ai.chains.hallucination_chain import hallucination_chain
-from planning_ai.chains.map_chain import create_dynamic_map_chain
 from planning_ai.logging import logger
 from planning_ai.states import DocumentState, OverallState
@@ -35,7 +34,7 @@ def check_hallucination(state: DocumentState):
     try:
         response = hallucination_chain.invoke(
-            {"document": state["document"], "summary": state["summary"].summary}
         )
         is_hallucinated = response.score == 0
         refinement_attempts = state["refinement_attempts"] + 1
@@ -83,13 +82,12 @@ def fix_hallucination(state: DocumentState):
         hallucinations.
     """
     logger.warning(f"Fixing hallucinations for document {state['filename']}")
-    themes = [theme["theme"].value for theme in state["themes"]]
-    fix_chain = create_dynamic_map_chain(themes, fix_template)
     try:
         response = fix_chain.invoke(
             {
                 "context": state["document"],
-                "summary": state["summary"].summary,
                 "explanation": state["hallucination"].explanation,
             }
         )

 from langgraph.types import Send
+from planning_ai.chains.fix_chain import fix_chain
 from planning_ai.chains.hallucination_chain import hallucination_chain
 from planning_ai.logging import logger
 from planning_ai.states import DocumentState, OverallState
     try:
         response = hallucination_chain.invoke(
+            {"document": state["document"], "summary": state["summary"]}
         )
         is_hallucinated = response.score == 0
         refinement_attempts = state["refinement_attempts"] + 1
         hallucinations.
     """
     logger.warning(f"Fixing hallucinations for document {state['filename']}")
     try:
         response = fix_chain.invoke(
             {
                 "context": state["document"],
+                "summary": state["summary"],
                 "explanation": state["hallucination"].explanation,
             }
         )

planning_ai/nodes/map_node.py CHANGED Viewed

@@ -4,8 +4,8 @@ from langgraph.types import Send
 from presidio_analyzer import AnalyzerEngine
 from presidio_anonymizer import AnonymizerEngine
-from planning_ai.chains.map_chain import create_dynamic_map_chain, map_template
-from planning_ai.chains.themes_chain import themes_chain
 from planning_ai.logging import logger
 from planning_ai.states import DocumentState, OverallState
@@ -15,7 +15,7 @@ anonymizer = AnonymizerEngine()
 nlp = spacy.load("en_core_web_lg")
-def retrieve_themes(state: DocumentState) -> DocumentState:
     """Retrieve themes from a document's content.
     This function uses the `themes_chain` to extract themes from the document's
@@ -28,39 +28,23 @@ def retrieve_themes(state: DocumentState) -> DocumentState:
         DocumentState: The updated document state with themes and scores.
     """
     try:
-        result = themes_chain.invoke({"document": state["document"].page_content})
-        if not result.themes:
             state["themes"] = []
             return state
-        themes = [theme.model_dump() for theme in result.themes]
     except Exception as e:
         logger.error(f"Theme selection error: {e}")
-        themes = []
-    state["themes"] = [d for d in themes if d["score"] >= 4]
-    return state
-def add_entities(state: OverallState) -> OverallState:
-    """Add named entities to all documents in the state.
-    This function processes each document using a spaCy NLP pipeline to extract
-    named entities and adds them to the document state.
-    Args:
-        state (OverallState): The overall state containing multiple documents.
-    Returns:
-        OverallState: The updated state with entities added to each document.
-    """
-    logger.info("Adding entities to all documents.")
-    for idx, document in enumerate(
-        nlp.pipe(
-            [doc["document"].page_content for doc in state["documents"]],
-        )
-    ):
-        state["documents"][idx]["entities"] = [
-            {"entity": ent.text, "label": ent.label_} for ent in document.ents
-        ]
     return state
@@ -102,30 +86,9 @@ def generate_summary(state: DocumentState) -> dict:
     logger.info(f"Starting PII removal for: {state['filename']}")
     state["document"].page_content = remove_pii(state["document"].page_content)
-    if state["doc_type"] == "Themes & Policies":
-        logger.info(f"Retrieving themes for: {state['filename']}")
-        state = retrieve_themes(state)
-    elif state["doc_type"] == "SPT":
-        logger.info(f"Retrieving SPT for: {state['filename']}")
-        state = retrieve_spt(state)
-    if not state["themes"]:
-        logger.warning(f"No themes found for {state['filename']}")
-        return {
-            "documents": [
-                {
-                    **state,
-                    "summary": "",
-                    "refinement_attempts": 0,
-                    "is_hallucinated": True,
-                    "processed": True,
-                    "failed": True,
-                }
-            ]
-        }
-    themes = [theme["theme"].value for theme in state["themes"]]
-    map_chain = create_dynamic_map_chain(themes=themes, prompt=map_template)
     try:
         response = map_chain.invoke({"context": state["document"].page_content})
     except Exception as e:

 from presidio_analyzer import AnalyzerEngine
 from presidio_anonymizer import AnonymizerEngine
+from planning_ai.chains.map_chain import map_chain
+from planning_ai.chains.themes_chain import chapters_chain
 from planning_ai.logging import logger
 from planning_ai.states import DocumentState, OverallState
 nlp = spacy.load("en_core_web_lg")
+def retrieve_chapters(state: DocumentState) -> DocumentState:
     """Retrieve themes from a document's content.
     This function uses the `themes_chain` to extract themes from the document's
         DocumentState: The updated document state with themes and scores.
     """
     try:
+        result = chapters_chain.invoke({"document": state["document"].page_content})
+        if not result.chapters:
             state["themes"] = []
             return state
+        chapters = [chapter.model_dump() for chapter in result.chapters]
+        chapters = [
+            {
+                "chapter": chapter["chapter"].value,
+                "score": chapter["score"],
+                "description": chapter["description"],
+            }
+            for chapter in chapters
+        ]
     except Exception as e:
         logger.error(f"Theme selection error: {e}")
+        chapters = []
+    state["themes"] = [d for d in chapters if d["score"] >= 4]
     return state
     logger.info(f"Starting PII removal for: {state['filename']}")
     state["document"].page_content = remove_pii(state["document"].page_content)
+    logger.info(f"Retrieving themes for: {state['filename']}")
+    state = retrieve_chapters(state)
     try:
         response = map_chain.invoke({"context": state["document"].page_content})
     except Exception as e:

planning_ai/nodes/reduce_node.py CHANGED Viewed

@@ -1,66 +1,27 @@
-import json
-from pathlib import Path
 import polars as pl
 from planning_ai.chains.policy_chain import policy_chain
 from planning_ai.chains.reduce_chain import reduce_chain, reduce_chain_final
 from planning_ai.logging import logger
 from planning_ai.states import OverallState
-from planning_ai.themes import THEMES_AND_POLICIES
-def save_summaries_to_json(docs):
-    """Saves summaries to JSON files.
-    Args:
-        out (list): A list of summary dictionaries.
-    """
-    out = [
-        {
-            "document": doc["document"].model_dump()["page_content"],
-            **doc["document"].metadata,
-            "filename": doc["filename"],
-            "entities": doc["entities"],
-            "themes": doc["themes"].model_dump(),
-            "summary": doc["summary"].model_dump()["summary"],
-            "policies": doc["policies"],
-            "notes": doc["notes"],
-            "refinement_attempts": doc["refinement_attempts"],
-            "hallucination": doc["hallucination"].model_dump(),
-            "is_hallucinated": doc["is_hallucinated"],
-            "failed": doc["failed"],
-        }
-        for doc in docs
-    ]
-    for doc in out:
-        filename = Path(str(doc["filename"])).stem
-        with open(f"data/out/summaries/{filename}.json", "w") as f:
-            json.dump(doc, f)
 def extract_policies_from_docs(docs):
-    policies = {"doc_id": [], "themes": [], "policies": [], "details": [], "stance": []}
     for doc in docs:
-        if not doc["summary"].policies:
-            continue
-        for policy in doc["summary"].policies:
-            for theme, p in THEMES_AND_POLICIES.items():
-                if policy.policy.name in p:
-                    policies["doc_id"].append(doc["doc_id"])
-                    policies["themes"].append(theme)
-                    policies["policies"].append(policy.policy.name)
-                    policies["details"].append(policy.note)
-                    policies["stance"].append(
-                        doc["document"].metadata["representations_support/object"]
-                    )
     return pl.DataFrame(policies)
 def add_doc_id(final_docs):
     out_docs = []
     for id, doc in enumerate(final_docs):
-        doc["summary"].summary = f"Document ID: [{id}]\n\n{doc['summary'].summary}"
         doc["doc_id"] = id
         out_docs.append(doc)
     return out_docs
@@ -76,7 +37,7 @@ def batch_generate_executive_summaries(summaries):
         list: A list of final responses.
     """
     summaries_text = [
-        f"Document ID: {[s['doc_id']]}\n\n{s['summary'].summary}" for s in summaries
     ]
     final_responses = []
     batch_size = 50
@@ -93,11 +54,11 @@ def batch_generate_executive_summaries(summaries):
 def generate_policy_output(policy_groups):
     out = []
     for policy in (
-        policy_groups.group_by(["themes", "policies", "stance"])
         .agg(pl.col("details"), pl.col("doc_id"))
         .rows(named=True)
     ):
-        logger.info(f"Processing policies: {policy['policies']}...")
         zipped = [
             f"{bullet} Doc ID: {id}"
             for (bullet, id) in zip(policy["details"], policy["doc_id"], strict=True)
@@ -105,20 +66,15 @@ def generate_policy_output(policy_groups):
         try:
             reduced = policy_chain.invoke(
                 {
-                    "theme": policy["themes"],
-                    "policy": policy["policies"],
                     "details": zipped,
                 }
             )
             out.extend(policy | p for p in reduced.dict()["policies"])
         except Exception as e:
-            logger.error(f"Failed to generate policies for {policy['policies']}: {e}")
             continue
-    return (
-        pl.DataFrame(out)
-        .group_by(["themes", "policies", "stance"])
-        .agg(["detail", "doc_id"])
-    )
 def generate_final_report(state: OverallState):
@@ -129,9 +85,13 @@ def generate_final_report(state: OverallState):
 def final_output(final_docs):
-    docs = [doc for doc in final_docs if not doc["failed"]]
-    failed_docs = [doc["document"].model_dump() for doc in final_docs if doc["failed"]]
     docs = add_doc_id(docs)
     policy_groups = extract_policies_from_docs(docs)

 import polars as pl
 from planning_ai.chains.policy_chain import policy_chain
 from planning_ai.chains.reduce_chain import reduce_chain, reduce_chain_final
 from planning_ai.logging import logger
 from planning_ai.states import OverallState
 def extract_policies_from_docs(docs):
+    policies = {"doc_id": [], "themes": [], "details": [], "stance": []}
     for doc in docs:
+        for policy in doc["themes"]:
+            policies["doc_id"].append(doc["doc_id"])
+            policies["themes"].append(policy["chapter"])
+            policies["details"].append(policy["description"])
+            policies["stance"].append(
+                doc["document"].metadata["representations_support/object"]
+            )
     return pl.DataFrame(policies)
 def add_doc_id(final_docs):
     out_docs = []
     for id, doc in enumerate(final_docs):
         doc["doc_id"] = id
         out_docs.append(doc)
     return out_docs
         list: A list of final responses.
     """
     summaries_text = [
+        f"Document ID: {[s['doc_id']]}\n\n{s['summary']}" for s in summaries
     ]
     final_responses = []
     batch_size = 50
 def generate_policy_output(policy_groups):
     out = []
     for policy in (
+        policy_groups.group_by(["themes", "stance"])
         .agg(pl.col("details"), pl.col("doc_id"))
         .rows(named=True)
     ):
+        logger.info(f"Processing chapter: {policy['themes']}...")
         zipped = [
             f"{bullet} Doc ID: {id}"
             for (bullet, id) in zip(policy["details"], policy["doc_id"], strict=True)
         try:
             reduced = policy_chain.invoke(
                 {
+                    "chapter": policy["themes"],
                     "details": zipped,
                 }
             )
             out.extend(policy | p for p in reduced.dict()["policies"])
         except Exception as e:
+            logger.error(f"Failed to generate policies for {policy['themes']}: {e}")
             continue
+    return pl.DataFrame(out).group_by(["themes", "stance"]).agg(["detail", "doc_id"])
 def generate_final_report(state: OverallState):
 def final_output(final_docs):
+    docs = [doc for doc in final_docs if doc["themes"] and not doc["failed"]]
+    failed_docs = [
+        doc["document"].model_dump()
+        for doc in final_docs
+        if not doc["themes"] or doc["failed"]
+    ]
     docs = add_doc_id(docs)
     policy_groups = extract_policies_from_docs(docs)

reports/DOCS/_extensions/nrennie/PrettyPDF/logo.png ADDED Viewed

uv.lock CHANGED Viewed

@@ -833,18 +833,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7b/8f/c4d9bafc34ad7ad5d8dc16dd1347ee0e507a52c3adb6bfa8887e1c6a26ba/executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa", size = 26702 },
 ]
-[[package]]
-name = "extra-streamlit-components"
-version = "0.1.71"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "streamlit" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/16/a7/580b13af828ef38888196f8b2c03fa97afa89cdb7946438ca5f3271e9a81/extra_streamlit_components-0.1.71.tar.gz", hash = "sha256:d18314cf2ed009f95641882b50aa3bdb11b6a0eb6403fb43dbc8af1722419617", size = 2250093 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/25/57/1115e9b974478fac83ba9cd79def8b3770a91b7a9001c46a76491071f2fe/extra_streamlit_components-0.1.71-py3-none-any.whl", hash = "sha256:c8e6f98446adecd3002756362e50d0669693b7673afaa89cebfced6415cc6bd3", size = 4858597 },
-]
 [[package]]
 name = "faker"
 version = "36.2.2"
@@ -1043,14 +1031,14 @@ wheels = [
 [[package]]
 name = "googleapis-common-protos"
-version = "1.69.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "protobuf" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c2/92/6bb11dad062ad7cc40665d0a8986193d54f1a0032b510e84e7182df9e661/googleapis_common_protos-1.69.0.tar.gz", hash = "sha256:5a46d58af72846f59009b9c4710425b9af2139555c71837081706b213b298187", size = 61264 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1d/66/0025e2b7a2ae353acea03cf9d4a96ae32ef02c116944e2eb11f559cf4b7b/googleapis_common_protos-1.69.0-py2.py3-none-any.whl", hash = "sha256:17835fdc4fa8da1d61cfe2d4d5d57becf7c61d4112f8d81c67eaa9d7ce43042d", size = 169749 },
 ]
 [[package]]
@@ -1834,15 +1822,15 @@ wheels = [
 [[package]]
 name = "langgraph-checkpoint"
-version = "2.0.16"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "langchain-core" },
     { name = "msgpack" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/01/66/5d4a2013a84c511be289bb4a5ef91cbaad28c091b6b366fdb79710a1458b/langgraph_checkpoint-2.0.16.tar.gz", hash = "sha256:49ba8cfa12b2aae845ccc3b1fbd1d7a8d3a6c4a2e387ab3a92fca40dd3d4baa5", size = 34206 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7c/63/03bc3dd304ead45b53313cab8727329e1d139a2d220f2d030c72242c860e/langgraph_checkpoint-2.0.16-py3-none-any.whl", hash = "sha256:dfab51076a6eddb5f9e146cfe1b977e3dd6419168b2afa23ff3f4e47973bf06f", size = 38291 },
 ]
 [[package]]
@@ -1860,20 +1848,20 @@ wheels = [
 [[package]]
 name = "langgraph-sdk"
-version = "0.1.53"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "httpx" },
     { name = "orjson" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/39/b2/a261cfbf91a4499396ba0993cf5601076301dd22883d3c0901e905253917/langgraph_sdk-0.1.53.tar.gz", hash = "sha256:12906ed965905fa27e0c28d9fa07dc6fd89e6895ff321ff049fdf3965d057cc4", size = 42369 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/fc/97/3492a07b454cc74bf49938e83f0a95c608a8bc5c3dda338091d3c66e3ec5/langgraph_sdk-0.1.53-py3-none-any.whl", hash = "sha256:4fab62caad73661ffe4c3ababedcd0d7bfaaba986bee4416b9c28948458a3af5", size = 45441 },
 ]
 [[package]]
 name = "langsmith"
-version = "0.3.11"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "httpx" },
@@ -1884,9 +1872,9 @@ dependencies = [
     { name = "requests-toolbelt" },
     { name = "zstandard" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ea/34/c4c0eddad03e00457cd6be1a88c288cd4419da8d368d8f519a29abe5392c/langsmith-0.3.11.tar.gz", hash = "sha256:ddf29d24352e99de79c9618aaf95679214324e146c5d3d9475a7ddd2870018b1", size = 323815 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ff/68/514ffa62860202a5a0a3acbf5c05017ef9df38d4437d2cb44a3cf93d617b/langsmith-0.3.11-py3-none-any.whl", hash = "sha256:0cca22737ef07d3b038a437c141deda37e00add56022582680188b681bec095e", size = 335265 },
 ]
 [[package]]
@@ -2236,11 +2224,11 @@ wheels = [
 [[package]]
 name = "narwhals"
-version = "1.29.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/e6/f7/caa23ebc4aed3ef2314441c44e1d842e701adc6af57587ffda9263c03b6e/narwhals-1.29.0.tar.gz", hash = "sha256:1021c345d56c66ff0cc8e6d03ca8c543d01ffc411630973a5cb69ee86824d823", size = 248349 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ee/f6/1fcd6b3d0e21d9b75e71ae68fbc92bbb9b9b1f4f33dd81c61d8f53378b30/narwhals-1.29.0-py3-none-any.whl", hash = "sha256:653aa8e5eb435816e7b50c8def17e7e5e3324c2ffd8a3eec03fef85792e9cf5e", size = 305214 },
 ]
 [[package]]
@@ -2844,7 +2832,7 @@ wheels = [
 [[package]]
 name = "posthog"
-version = "3.18.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "backoff" },
@@ -2854,9 +2842,9 @@ dependencies = [
     { name = "requests" },
     { name = "six" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/a5/1c/aa6bb26491108e9e350cd7af4d4b0a54d48c755cc76b2c2d90ef2916b8b3/posthog-3.18.1.tar.gz", hash = "sha256:ce115b8422f26c57cd4143499115b741f5683c93d0b5b87bab391579aaef084b", size = 65573 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/04/c2/407c8cf3edf4fe33b82de3fee11178d083ee0b6e3eb28ff8072caaa85907/posthog-3.18.1-py2.py3-none-any.whl", hash = "sha256:6865104b7cf3a5b13949e2bc2aab9b37b5fbf5f9e045fa55b9eabe21b3850200", size = 76762 },
 ]
 [[package]]
@@ -3416,11 +3404,11 @@ wheels = [
 [[package]]
 name = "python-json-logger"
-version = "3.2.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/e3/c4/358cd13daa1d912ef795010897a483ab2f0b41c9ea1b35235a8b2f7d15a7/python_json_logger-3.2.1.tar.gz", hash = "sha256:8eb0554ea17cb75b05d2848bc14fb02fbdbd9d6972120781b974380bfa162008", size = 16287 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/4b/72/2f30cf26664fcfa0bd8ec5ee62ec90c03bd485e4a294d92aabc76c5203a5/python_json_logger-3.2.1-py3-none-any.whl", hash = "sha256:cdc17047eb5374bd311e748b42f99d71223f3b0e186f4206cc5d52aefe85b090", size = 14924 },
 ]
 [[package]]
@@ -4031,19 +4019,20 @@ wheels = [
 [[package]]
 name = "streamlit-authenticator"
-version = "0.4.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "bcrypt" },
     { name = "captcha" },
     { name = "cryptography" },
-    { name = "extra-streamlit-components" },
     { name = "pyjwt" },
     { name = "pyyaml" },
     { name = "streamlit" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/83/47/837b158e1a5b0d187d20c6be22c46d84d12a8d3e8d7113b67ebb33e221c9/streamlit_authenticator-0.4.2-py3-none-any.whl", hash = "sha256:442acccef6af65e2b0feb15d5e9f68707f204c1d31c60673690d87179c7ca5b2", size = 43197 },
 ]
 [[package]]
@@ -4140,6 +4129,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5e/cf/1dba1380eb3b632f1f86c80533a3fca1376a938517044048122adf816a90/streamlit_image_coordinates-0.1.9-py3-none-any.whl", hash = "sha256:e577d475707ce8a3f7be1825027af6b4d7b609a456f4b25b794756ed2436ab06", size = 7049 },
 ]
 [[package]]
 name = "streamlit-keyup"
 version = "0.3.0"

     { url = "https://files.pythonhosted.org/packages/7b/8f/c4d9bafc34ad7ad5d8dc16dd1347ee0e507a52c3adb6bfa8887e1c6a26ba/executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa", size = 26702 },
 ]
 [[package]]
 name = "faker"
 version = "36.2.2"
 [[package]]
 name = "googleapis-common-protos"
+version = "1.69.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "protobuf" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/41/4f/d8be74b88621131dfd1ed70e5aff2c47f2bdf2289a70736bbf3eb0e7bc70/googleapis_common_protos-1.69.1.tar.gz", hash = "sha256:e20d2d8dda87da6fe7340afbbdf4f0bcb4c8fae7e6cadf55926c31f946b0b9b1", size = 144514 }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/16/cb/2f4aa605b16df1e031dd7c322c597613eef933e8dd5b6a4414330b21e791/googleapis_common_protos-1.69.1-py2.py3-none-any.whl", hash = "sha256:4077f27a6900d5946ee5a369fab9c8ded4c0ef1c6e880458ea2f70c14f7b70d5", size = 293229 },
 ]
 [[package]]
 [[package]]
 name = "langgraph-checkpoint"
+version = "2.0.17"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "langchain-core" },
     { name = "msgpack" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/90/92/f0d6c3e2b2e131c687a9ec87c6e1a430287c430160038e8dfaa4d0db9aab/langgraph_checkpoint-2.0.17.tar.gz", hash = "sha256:255c249f03369c41252f888bc1e1e481bf4fdecf6b3854a39e4935dc34152bc0", size = 34932 }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/4d/61/35faa34145ddaffc70eb851b85561c96bbad4718ea6f34ef3c717e748c15/langgraph_checkpoint-2.0.17-py3-none-any.whl", hash = "sha256:7da9cd9af41bda5074afef0dcdbd6fa9a050f68beed9d7f80593a253412bf020", size = 39103 },
 ]
 [[package]]
 [[package]]
 name = "langgraph-sdk"
+version = "0.1.55"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "httpx" },
     { name = "orjson" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/7a/6c/8286151a21124dc0189b57495541c2e3cace317056f60feb04076b438f82/langgraph_sdk-0.1.55.tar.gz", hash = "sha256:89a0240157a27822cc4edd1c9e72bc852e20f5c71165a4c9b91eeffa11fd6a6b", size = 42690 }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/4e/64/4b75f4b57f0c8f39bdb43aa74b1d2edcdb604b5baa58465ccc54b8b906c5/langgraph_sdk-0.1.55-py3-none-any.whl", hash = "sha256:266e92a558eb738da1ef04c29fbfc2157cd3a977b80905d9509a2cb79331f8fc", size = 45785 },
 ]
 [[package]]
 name = "langsmith"
+version = "0.3.12"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "httpx" },
     { name = "requests-toolbelt" },
     { name = "zstandard" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/48/32/0ef5ad579ae096f40fc108b6920e742267a0e9c07d778c1d381586616715/langsmith-0.3.12.tar.gz", hash = "sha256:045b49d0401d0e985d025ff0cf69743ab9a429e309ce5d533eab3c774d004bc2", size = 324149 }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/9a/92/9702c45974c4dbea978f8af1cfb077677b96f98df12b1638be8eff5ae5ff/langsmith-0.3.12-py3-none-any.whl", hash = "sha256:cf7926bd12d56adbd74a294ebbfc5a34c413172bfbdcd763175cc472b45afbea", size = 335663 },
 ]
 [[package]]
 [[package]]
 name = "narwhals"
+version = "1.29.1"
 source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a7/17/7d35094da0820ae941d8ce51842f253da36c6f95360ea0afabfc18bc02c6/narwhals-1.29.1.tar.gz", hash = "sha256:c408acf09e90c116f247cf34f24a3a89d147e3e235b1d3c708cfd1960baf320a", size = 251464 }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/f1/22/380df533b08a57bc9013bb5714f33c571e1447828d83213a66adaefc0a04/narwhals-1.29.1-py3-none-any.whl", hash = "sha256:2f68cfbb2562672c4dfa54f158ed8c2828e9920ef784981cd9114e419c444216", size = 308220 },
 ]
 [[package]]
 [[package]]
 name = "posthog"
+version = "3.19.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "backoff" },
     { name = "requests" },
     { name = "six" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/d4/fe/5f785ed1514caa4687738e112716904d4fce8752068f9cad2afaa8207b05/posthog-3.19.0.tar.gz", hash = "sha256:7fe5c9e494fc2cca9baa2bd8074c0844d572df46a54378101bc20eec2776027e", size = 66172 }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/5c/28/57f95743a17af817ea56f0f3aa280af677884f0d6b59c8edf4c30fc5ac2e/posthog-3.19.0-py2.py3-none-any.whl", hash = "sha256:c294bc0a939e21ecf88d625496f8073cc566c28ec2a917a47d5d32ba33e90a7f", size = 77800 },
 ]
 [[package]]
 [[package]]
 name = "python-json-logger"
+version = "3.3.0"
 source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/9e/de/d3144a0bceede957f961e975f3752760fbe390d57fbe194baf709d8f1f7b/python_json_logger-3.3.0.tar.gz", hash = "sha256:12b7e74b17775e7d565129296105bbe3910842d9d0eb083fc83a6a617aa8df84", size = 16642 }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/08/20/0f2523b9e50a8052bc6a8b732dfc8568abbdc42010aef03a2d750bdab3b2/python_json_logger-3.3.0-py3-none-any.whl", hash = "sha256:dd980fae8cffb24c13caf6e158d3d61c0d6d22342f932cb6e9deedab3d35eec7", size = 15163 },
 ]
 [[package]]
 [[package]]
 name = "streamlit-authenticator"
+version = "0.4.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "bcrypt" },
     { name = "captcha" },
     { name = "cryptography" },
     { name = "pyjwt" },
     { name = "pyyaml" },
     { name = "streamlit" },
+    { name = "streamlit-javascript" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/bf/6d/7c59285447bdb18c03e37888f8d1f593e241f4af275c36e46fddced8590a/streamlit_authenticator-0.4.3.tar.gz", hash = "sha256:d05dd8656bdff0b4881b40c36108bf068e7aad8424c20cf87c9e657aac8d4a92", size = 39752 }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/ed/5a/039d6303526306740d214a8edd360a465a1710491b3cb0284799bbd7131c/streamlit_authenticator-0.4.3-py3-none-any.whl", hash = "sha256:1f7347d480cf71c76bb915cff0a62516d087115ad1e72f99708b944705baf78d", size = 43196 },
 ]
 [[package]]
     { url = "https://files.pythonhosted.org/packages/5e/cf/1dba1380eb3b632f1f86c80533a3fca1376a938517044048122adf816a90/streamlit_image_coordinates-0.1.9-py3-none-any.whl", hash = "sha256:e577d475707ce8a3f7be1825027af6b4d7b609a456f4b25b794756ed2436ab06", size = 7049 },
 ]
+[[package]]
+name = "streamlit-javascript"
+version = "0.1.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "streamlit" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a5/69/723ebc7a58057b1e6f54d0c23c86488054d756e0e470daf4db703fe02a63/streamlit-javascript-0.1.5.tar.gz", hash = "sha256:9da5176522a0acf2c39d3b0bec9f856fdd8ea3c70bb1066841a546ab1348ae1d", size = 512374 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/87/81/0c9e9e4d2dab97224efe105ba44f47b259f37d8e7673b94f5b5523fe2c8f/streamlit_javascript-0.1.5-py3-none-any.whl", hash = "sha256:36ca4d8c46fd5b6526d1a705530472e03cb6e5bb24694330649f21d5c436d280", size = 518387 },
+]
 [[package]]
 name = "streamlit-keyup"
 version = "0.3.0"