Spaces:
Build error
Build error
fix: allow for specifying chapters
Browse files- .streamlit/config.toml +0 -1
- app.py +114 -45
- data/covers/Overview_of_Public_Submissions.docx +3 -0
- data/covers/{cover2 1.pdf → Overview_of_Public_Submissions.pdf} +0 -0
- data/covers/Summaries_of_Public_Submissions.docx +3 -0
- data/covers/{cover_summary_responses.pdf → Summaries_of_Public_Submissions.pdf} +0 -0
- data/covers/reference.docx +3 -0
- data/raw/chapters.txt +5 -0
- data/raw/title.txt +1 -0
- logo.png +0 -0
- planning_ai/chains/fix_chain.py +7 -3
- planning_ai/chains/map_chain.py +4 -70
- planning_ai/chains/policy_chain.py +4 -6
- planning_ai/chains/prompts/chapters.txt +26 -0
- planning_ai/chains/prompts/map.txt +4 -11
- planning_ai/chains/prompts/policy.txt +6 -8
- planning_ai/chains/prompts/themes.txt +0 -40
- planning_ai/chains/themes_chain.py +35 -18
- planning_ai/chapters.py +0 -25
- planning_ai/documents/document.py +62 -133
- planning_ai/documents/themes.txt +3 -1
- planning_ai/main.py +17 -25
- planning_ai/nodes/hallucination_node.py +4 -6
- planning_ai/nodes/map_node.py +18 -55
- planning_ai/nodes/reduce_node.py +20 -60
- reports/DOCS/_extensions/nrennie/PrettyPDF/logo.png +0 -0
- uv.lock +37 -36
.streamlit/config.toml
CHANGED
|
@@ -3,4 +3,3 @@ primaryColor="#0A3D91"
|
|
| 3 |
backgroundColor="#f0f0f5"
|
| 4 |
secondaryBackgroundColor="#e0e0ef"
|
| 5 |
textColor="#262730"
|
| 6 |
-
font="sans serif"
|
|
|
|
| 3 |
backgroundColor="#f0f0f5"
|
| 4 |
secondaryBackgroundColor="#e0e0ef"
|
| 5 |
textColor="#262730"
|
|
|
app.py
CHANGED
|
@@ -1,11 +1,14 @@
|
|
| 1 |
import base64
|
|
|
|
| 2 |
import time
|
| 3 |
from os import getenv
|
| 4 |
|
| 5 |
import polars as pl
|
| 6 |
import py7zr
|
|
|
|
| 7 |
import streamlit as st
|
| 8 |
import streamlit_authenticator as stauth
|
|
|
|
| 9 |
from streamlit_extras.stylable_container import stylable_container
|
| 10 |
|
| 11 |
from planning_ai.common.utils import Paths
|
|
@@ -150,16 +153,10 @@ authenticator = stauth.Authenticate(
|
|
| 150 |
UPLOAD_DIR = Paths.RAW / "gcpt3"
|
| 151 |
|
| 152 |
|
| 153 |
-
def handle_authentication():
|
| 154 |
-
"""Handle user authentication."""
|
| 155 |
-
try:
|
| 156 |
-
authenticator.login()
|
| 157 |
-
except Exception as e:
|
| 158 |
-
st.error(e)
|
| 159 |
-
|
| 160 |
-
|
| 161 |
def initialize_session_state():
|
| 162 |
"""Initialize session state variables."""
|
|
|
|
|
|
|
| 163 |
if "files_extracted" not in st.session_state:
|
| 164 |
st.session_state["files_extracted"] = False
|
| 165 |
if "completed" not in st.session_state:
|
|
@@ -170,6 +167,72 @@ def initialize_session_state():
|
|
| 170 |
st.session_state["end_time"] = None
|
| 171 |
|
| 172 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
def upload_and_extract_files():
|
| 174 |
"""Handle file upload and extraction."""
|
| 175 |
main1, main2 = st.columns(2)
|
|
@@ -185,11 +248,7 @@ def upload_and_extract_files():
|
|
| 185 |
2. **Executive Report** documents contain first an executive summary of the key points extracted from response documents, following this, a **Profile of Submissions** plots the demographic and geographic distribution of responses. Finally this document details **Themes and Policies**, where key themes and policies by response are highlighted, with notable information from responses bullet-pointed. This document contains inline citations, which relate back to the numbers associated with responses in the **Representation Summary Documents**. Citations are included to allow readers to manually verify the claims and points made by the AI model.
|
| 186 |
"""
|
| 187 |
)
|
| 188 |
-
|
| 189 |
-
st.title("Select Document Type")
|
| 190 |
-
doc_type = st.selectbox(
|
| 191 |
-
"Select the type of document:", ["Themes & Policies", "SPT"]
|
| 192 |
-
)
|
| 193 |
with main2:
|
| 194 |
st.title("Upload JDi files")
|
| 195 |
st.write(
|
|
@@ -243,10 +302,9 @@ def upload_and_extract_files():
|
|
| 243 |
)
|
| 244 |
except Exception as e:
|
| 245 |
st.error(f"Failed to extract files {e}")
|
| 246 |
-
return doc_type
|
| 247 |
|
| 248 |
|
| 249 |
-
def build_report(
|
| 250 |
"""Build the report from extracted files."""
|
| 251 |
# Remove old files
|
| 252 |
_ = [file.unlink() for file in (Paths.OUT / "summaries").rglob("*.pdf")]
|
|
@@ -277,7 +335,7 @@ def build_report(doc_type):
|
|
| 277 |
except Exception as e:
|
| 278 |
st.error(f"An error occurred during PDF text extraction: {e}")
|
| 279 |
with st.spinner("Building report...", show_time=True):
|
| 280 |
-
report_main(
|
| 281 |
st.session_state["end_time"] = time.time()
|
| 282 |
st.session_state["completed"] = True
|
| 283 |
total_time = (
|
|
@@ -319,10 +377,10 @@ def display_download_buttons():
|
|
| 319 |
with st.expander("**Executive Reports**"):
|
| 320 |
for i, rep in enumerate(representations_documents):
|
| 321 |
summaries_pdf_path = (
|
| 322 |
-
Paths.SUMMARY / f"
|
| 323 |
)
|
| 324 |
summaries_docx_path = (
|
| 325 |
-
Paths.SUMMARY / f"
|
| 326 |
)
|
| 327 |
with st.container():
|
| 328 |
st.subheader(f"Executive Report for {rep}")
|
|
@@ -332,7 +390,7 @@ def display_download_buttons():
|
|
| 332 |
st.download_button(
|
| 333 |
label="Download PDF Version",
|
| 334 |
data=pdf_file,
|
| 335 |
-
file_name=f"
|
| 336 |
mime="application/pdf",
|
| 337 |
use_container_width=True,
|
| 338 |
key=f"exec_pdf_{i}_{hash(rep)}",
|
|
@@ -343,7 +401,7 @@ def display_download_buttons():
|
|
| 343 |
st.download_button(
|
| 344 |
label="Download DOCX Version",
|
| 345 |
data=docx_file,
|
| 346 |
-
file_name=f"
|
| 347 |
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
| 348 |
use_container_width=True,
|
| 349 |
key=f"exec_docx_{i}_{hash(rep)}",
|
|
@@ -355,8 +413,12 @@ def display_download_buttons():
|
|
| 355 |
# Create a container for the Representation Summaries
|
| 356 |
with st.expander("**Representation Summaries**"):
|
| 357 |
for i, rep in enumerate(representations_documents):
|
| 358 |
-
report_pdf_path =
|
| 359 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 360 |
with st.container():
|
| 361 |
st.subheader(f"Representation Summary for {rep}")
|
| 362 |
col1, col2 = st.columns(2)
|
|
@@ -365,7 +427,7 @@ def display_download_buttons():
|
|
| 365 |
st.download_button(
|
| 366 |
label="Download PDF Version",
|
| 367 |
data=pdf_file,
|
| 368 |
-
file_name=f"
|
| 369 |
mime="application/pdf",
|
| 370 |
use_container_width=True,
|
| 371 |
key=f"rep_pdf_{i}_{hash(rep)}",
|
|
@@ -376,7 +438,7 @@ def display_download_buttons():
|
|
| 376 |
st.download_button(
|
| 377 |
label="Download DOCX Version",
|
| 378 |
data=docx_file,
|
| 379 |
-
file_name=f"
|
| 380 |
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
| 381 |
use_container_width=True,
|
| 382 |
key=f"rep_docx_{i}_{hash(rep)}",
|
|
@@ -387,13 +449,16 @@ def display_download_buttons():
|
|
| 387 |
|
| 388 |
|
| 389 |
def reset_session():
|
| 390 |
-
st.session_state["
|
| 391 |
st.session_state["files_extracted"] = False
|
|
|
|
|
|
|
|
|
|
| 392 |
|
| 393 |
|
| 394 |
def main():
|
| 395 |
"""Main function to run the Streamlit app."""
|
| 396 |
-
|
| 397 |
initialize_session_state()
|
| 398 |
|
| 399 |
# Handle authentication states
|
|
@@ -407,28 +472,32 @@ def main():
|
|
| 407 |
reset_session()
|
| 408 |
return
|
| 409 |
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 420 |
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
|
| 433 |
|
| 434 |
if __name__ == "__main__":
|
|
|
|
| 1 |
import base64
|
| 2 |
+
import re
|
| 3 |
import time
|
| 4 |
from os import getenv
|
| 5 |
|
| 6 |
import polars as pl
|
| 7 |
import py7zr
|
| 8 |
+
import requests
|
| 9 |
import streamlit as st
|
| 10 |
import streamlit_authenticator as stauth
|
| 11 |
+
from bs4 import BeautifulSoup
|
| 12 |
from streamlit_extras.stylable_container import stylable_container
|
| 13 |
|
| 14 |
from planning_ai.common.utils import Paths
|
|
|
|
| 153 |
UPLOAD_DIR = Paths.RAW / "gcpt3"
|
| 154 |
|
| 155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
def initialize_session_state():
|
| 157 |
"""Initialize session state variables."""
|
| 158 |
+
if "chapters" not in st.session_state:
|
| 159 |
+
st.session_state["chapters"] = False
|
| 160 |
if "files_extracted" not in st.session_state:
|
| 161 |
st.session_state["files_extracted"] = False
|
| 162 |
if "completed" not in st.session_state:
|
|
|
|
| 167 |
st.session_state["end_time"] = None
|
| 168 |
|
| 169 |
|
| 170 |
+
def get_chapters(consultation_url: str):
|
| 171 |
+
if not consultation_url:
|
| 172 |
+
return "None", ["None"]
|
| 173 |
+
response = requests.get(consultation_url)
|
| 174 |
+
if not response.ok:
|
| 175 |
+
st.error("Failed to fetch consultation document")
|
| 176 |
+
return "", []
|
| 177 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
| 178 |
+
h2_tags = soup.find_all("h2")
|
| 179 |
+
|
| 180 |
+
if not len(h2_tags) >= 2:
|
| 181 |
+
st.error("Invalid page format - not enough <h2> headers")
|
| 182 |
+
return "", []
|
| 183 |
+
|
| 184 |
+
first_h2 = h2_tags[0]
|
| 185 |
+
second_h2 = h2_tags[1]
|
| 186 |
+
|
| 187 |
+
# Collect links between the first and second <h2>
|
| 188 |
+
links_between = []
|
| 189 |
+
for sibling in first_h2.find_all_next():
|
| 190 |
+
if sibling == second_h2: # Stop when reaching the second <h2>
|
| 191 |
+
break
|
| 192 |
+
if sibling.name == "a": # If it's a link
|
| 193 |
+
link_text = sibling.text.strip()
|
| 194 |
+
if link_text:
|
| 195 |
+
links_between.append(link_text)
|
| 196 |
+
cleaned_links = [re.sub(r"\s*\(.*?\)$", "", link) for link in links_between]
|
| 197 |
+
cleaned_title = first_h2.text.strip()
|
| 198 |
+
return cleaned_title, cleaned_links
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
def specify_chapters():
|
| 202 |
+
st.title("Specify Chapters")
|
| 203 |
+
st.write(
|
| 204 |
+
"Please specify the Consultation Document URL from the Consultation Hub. This will autopopulate the chapter headings for the final document. \n\n**Please ensure that the final chapter headings are correct.**"
|
| 205 |
+
)
|
| 206 |
+
|
| 207 |
+
chapters = []
|
| 208 |
+
consultation_url = st.text_input(
|
| 209 |
+
"Consultation Document URL",
|
| 210 |
+
key="consultation_url",
|
| 211 |
+
placeholder="https://oc2.greatercambridgeplanning.org/document/1314",
|
| 212 |
+
)
|
| 213 |
+
title, chapters = get_chapters(consultation_url)
|
| 214 |
+
st.write(f"**Title:** {title}")
|
| 215 |
+
st.write("**Chapters:**", "\n- " + "\n- ".join(chapters))
|
| 216 |
+
st.write(
|
| 217 |
+
"**If the chapter headings are incorrect, please add them manually below, separated by commas.**"
|
| 218 |
+
)
|
| 219 |
+
chapters = st.text_input(
|
| 220 |
+
"Chapter Headings",
|
| 221 |
+
key="chapter_headings",
|
| 222 |
+
placeholder=", ".join(chapters),
|
| 223 |
+
value=", ".join(chapters),
|
| 224 |
+
)
|
| 225 |
+
chapters = [chapter.strip() for chapter in chapters.split(",")]
|
| 226 |
+
with open(Paths.RAW / "chapters.txt", "w") as f:
|
| 227 |
+
f.write("\n".join(chapters))
|
| 228 |
+
with open(Paths.RAW / "title.txt", "w") as f:
|
| 229 |
+
f.write(title)
|
| 230 |
+
|
| 231 |
+
st.button(
|
| 232 |
+
"Save Chapters", on_click=lambda: st.session_state.update({"chapters": True})
|
| 233 |
+
)
|
| 234 |
+
|
| 235 |
+
|
| 236 |
def upload_and_extract_files():
|
| 237 |
"""Handle file upload and extraction."""
|
| 238 |
main1, main2 = st.columns(2)
|
|
|
|
| 248 |
2. **Executive Report** documents contain first an executive summary of the key points extracted from response documents, following this, a **Profile of Submissions** plots the demographic and geographic distribution of responses. Finally this document details **Themes and Policies**, where key themes and policies by response are highlighted, with notable information from responses bullet-pointed. This document contains inline citations, which relate back to the numbers associated with responses in the **Representation Summary Documents**. Citations are included to allow readers to manually verify the claims and points made by the AI model.
|
| 249 |
"""
|
| 250 |
)
|
| 251 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
with main2:
|
| 253 |
st.title("Upload JDi files")
|
| 254 |
st.write(
|
|
|
|
| 302 |
)
|
| 303 |
except Exception as e:
|
| 304 |
st.error(f"Failed to extract files {e}")
|
|
|
|
| 305 |
|
| 306 |
|
| 307 |
+
def build_report():
|
| 308 |
"""Build the report from extracted files."""
|
| 309 |
# Remove old files
|
| 310 |
_ = [file.unlink() for file in (Paths.OUT / "summaries").rglob("*.pdf")]
|
|
|
|
| 335 |
except Exception as e:
|
| 336 |
st.error(f"An error occurred during PDF text extraction: {e}")
|
| 337 |
with st.spinner("Building report...", show_time=True):
|
| 338 |
+
report_main()
|
| 339 |
st.session_state["end_time"] = time.time()
|
| 340 |
st.session_state["completed"] = True
|
| 341 |
total_time = (
|
|
|
|
| 377 |
with st.expander("**Executive Reports**"):
|
| 378 |
for i, rep in enumerate(representations_documents):
|
| 379 |
summaries_pdf_path = (
|
| 380 |
+
Paths.SUMMARY / f"Overview_of_Public_Submissions-{rep}.pdf"
|
| 381 |
)
|
| 382 |
summaries_docx_path = (
|
| 383 |
+
Paths.SUMMARY / f"Overview_of_Public_Submissions-{rep}.docx"
|
| 384 |
)
|
| 385 |
with st.container():
|
| 386 |
st.subheader(f"Executive Report for {rep}")
|
|
|
|
| 390 |
st.download_button(
|
| 391 |
label="Download PDF Version",
|
| 392 |
data=pdf_file,
|
| 393 |
+
file_name=f"Overview_of_Public_Submissions-{rep}.pdf",
|
| 394 |
mime="application/pdf",
|
| 395 |
use_container_width=True,
|
| 396 |
key=f"exec_pdf_{i}_{hash(rep)}",
|
|
|
|
| 401 |
st.download_button(
|
| 402 |
label="Download DOCX Version",
|
| 403 |
data=docx_file,
|
| 404 |
+
file_name=f"Overview_of_Public_Submissions-{rep}.docx",
|
| 405 |
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
| 406 |
use_container_width=True,
|
| 407 |
key=f"exec_docx_{i}_{hash(rep)}",
|
|
|
|
| 413 |
# Create a container for the Representation Summaries
|
| 414 |
with st.expander("**Representation Summaries**"):
|
| 415 |
for i, rep in enumerate(representations_documents):
|
| 416 |
+
report_pdf_path = (
|
| 417 |
+
Paths.SUMMARY / f"Summaries_of_Public_Submissions-{rep}.pdf"
|
| 418 |
+
)
|
| 419 |
+
report_docx_path = (
|
| 420 |
+
Paths.SUMMARY / f"Summaries_of_Public_Submissions-{rep}.docx"
|
| 421 |
+
)
|
| 422 |
with st.container():
|
| 423 |
st.subheader(f"Representation Summary for {rep}")
|
| 424 |
col1, col2 = st.columns(2)
|
|
|
|
| 427 |
st.download_button(
|
| 428 |
label="Download PDF Version",
|
| 429 |
data=pdf_file,
|
| 430 |
+
file_name=f"Summaries_of_Public_Submissions-{rep}.pdf",
|
| 431 |
mime="application/pdf",
|
| 432 |
use_container_width=True,
|
| 433 |
key=f"rep_pdf_{i}_{hash(rep)}",
|
|
|
|
| 438 |
st.download_button(
|
| 439 |
label="Download DOCX Version",
|
| 440 |
data=docx_file,
|
| 441 |
+
file_name=f"Summaries_of_Public_Submissions-{rep}.docx",
|
| 442 |
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
| 443 |
use_container_width=True,
|
| 444 |
key=f"rep_docx_{i}_{hash(rep)}",
|
|
|
|
| 449 |
|
| 450 |
|
| 451 |
def reset_session():
|
| 452 |
+
st.session_state["chapters"] = False
|
| 453 |
st.session_state["files_extracted"] = False
|
| 454 |
+
st.session_state["completed"] = False
|
| 455 |
+
st.session_state["start_time"] = None
|
| 456 |
+
st.session_state["end_time"] = None
|
| 457 |
|
| 458 |
|
| 459 |
def main():
|
| 460 |
"""Main function to run the Streamlit app."""
|
| 461 |
+
authenticator.login()
|
| 462 |
initialize_session_state()
|
| 463 |
|
| 464 |
# Handle authentication states
|
|
|
|
| 472 |
reset_session()
|
| 473 |
return
|
| 474 |
|
| 475 |
+
if st.session_state["authentication_status"]:
|
| 476 |
+
with stylable_container(
|
| 477 |
+
key="Logout",
|
| 478 |
+
css_styles="""
|
| 479 |
+
button {
|
| 480 |
+
float: right;
|
| 481 |
+
}
|
| 482 |
+
""",
|
| 483 |
+
):
|
| 484 |
+
authenticator.logout() # show logout button
|
| 485 |
+
|
| 486 |
+
# Step 1: Specify chapters
|
| 487 |
+
if not st.session_state["chapters"]:
|
| 488 |
+
specify_chapters()
|
| 489 |
|
| 490 |
+
# Step 2: Upload and extract files
|
| 491 |
+
if not st.session_state["files_extracted"] and st.session_state["chapters"]:
|
| 492 |
+
upload_and_extract_files()
|
| 493 |
|
| 494 |
+
# Step 3: Build report if files are ready
|
| 495 |
+
if st.session_state["files_extracted"]:
|
| 496 |
+
build_report()
|
| 497 |
|
| 498 |
+
# Step 4: Show download buttons when complete
|
| 499 |
+
if st.session_state["completed"]:
|
| 500 |
+
display_download_buttons()
|
| 501 |
|
| 502 |
|
| 503 |
if __name__ == "__main__":
|
data/covers/Overview_of_Public_Submissions.docx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d25f4fe1da660ebcb4655a27b9a8bca9a1dab73962900f858309a683b8fbc58d
|
| 3 |
+
size 1595802
|
data/covers/{cover2 1.pdf → Overview_of_Public_Submissions.pdf}
RENAMED
|
Binary files a/data/covers/cover2 1.pdf and b/data/covers/Overview_of_Public_Submissions.pdf differ
|
|
|
data/covers/Summaries_of_Public_Submissions.docx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6bc5191e10d13f15337f649b829c6f210bdda541bcde3ad4fe05d63f099a5b0
|
| 3 |
+
size 1595689
|
data/covers/{cover_summary_responses.pdf → Summaries_of_Public_Submissions.pdf}
RENAMED
|
Binary files a/data/covers/cover_summary_responses.pdf and b/data/covers/Summaries_of_Public_Submissions.pdf differ
|
|
|
data/covers/reference.docx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b4999b22cc124005ceb7adfc0ade6977005841b9af2a5c3dea717ae6b3fafe0
|
| 3 |
+
size 5057
|
data/raw/chapters.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Chapter 1: Introduction and purpose
|
| 2 |
+
Chapter 2: Ambitions for the Campus and development to date
|
| 3 |
+
Chapter 3: Site context
|
| 4 |
+
Chapter 4: Cambridge Biomedical Campus development principles
|
| 5 |
+
Chapter 5: Obligations and mitigation
|
data/raw/title.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Draft Cambridge Biomedical Campus Supplementary Planning Document
|
logo.png
ADDED
|
planning_ai/chains/fix_chain.py
CHANGED
|
@@ -1,9 +1,15 @@
|
|
| 1 |
-
from
|
|
|
|
|
|
|
| 2 |
from planning_ai.common.utils import Paths
|
|
|
|
| 3 |
|
| 4 |
with open(Paths.PROMPTS / "fix_hallucination.txt", "r") as f:
|
| 5 |
fix_template = f.read()
|
| 6 |
|
|
|
|
|
|
|
|
|
|
| 7 |
if __name__ == "__main__":
|
| 8 |
test_document = """
|
| 9 |
The Local Plan proposes a mass development north-west of Cambridge despite marked growth
|
|
@@ -12,7 +18,6 @@ if __name__ == "__main__":
|
|
| 12 |
Papworth Everard has grown beyond recognition. This in itself is a matter of concern.
|
| 13 |
"""
|
| 14 |
test_themes = {"Great Places", "Homes", "Climate Change"}
|
| 15 |
-
fix_chain = create_dynamic_map_chain(test_themes, fix_template)
|
| 16 |
result = fix_chain.invoke(
|
| 17 |
{
|
| 18 |
"summary": "This plan is great because they are building a nuclear power plant.",
|
|
@@ -20,4 +25,3 @@ if __name__ == "__main__":
|
|
| 20 |
"context": test_document,
|
| 21 |
}
|
| 22 |
)
|
| 23 |
-
__import__("pprint").pprint(dict(result))
|
|
|
|
| 1 |
+
from langchain_core.output_parsers import StrOutputParser
|
| 2 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 3 |
+
|
| 4 |
from planning_ai.common.utils import Paths
|
| 5 |
+
from planning_ai.llms.llm import GPT4o
|
| 6 |
|
| 7 |
with open(Paths.PROMPTS / "fix_hallucination.txt", "r") as f:
|
| 8 |
fix_template = f.read()
|
| 9 |
|
| 10 |
+
fix_prompt = ChatPromptTemplate.from_messages([("system", fix_template)])
|
| 11 |
+
fix_chain = fix_prompt | GPT4o | StrOutputParser()
|
| 12 |
+
|
| 13 |
if __name__ == "__main__":
|
| 14 |
test_document = """
|
| 15 |
The Local Plan proposes a mass development north-west of Cambridge despite marked growth
|
|
|
|
| 18 |
Papworth Everard has grown beyond recognition. This in itself is a matter of concern.
|
| 19 |
"""
|
| 20 |
test_themes = {"Great Places", "Homes", "Climate Change"}
|
|
|
|
| 21 |
result = fix_chain.invoke(
|
| 22 |
{
|
| 23 |
"summary": "This plan is great because they are building a nuclear power plant.",
|
|
|
|
| 25 |
"context": test_document,
|
| 26 |
}
|
| 27 |
)
|
|
|
planning_ai/chains/map_chain.py
CHANGED
|
@@ -1,77 +1,14 @@
|
|
| 1 |
-
from
|
| 2 |
-
from typing import Optional, Type
|
| 3 |
-
|
| 4 |
from langchain_core.prompts import ChatPromptTemplate
|
| 5 |
-
from pydantic import BaseModel, create_model
|
| 6 |
|
| 7 |
from planning_ai.common.utils import Paths
|
| 8 |
from planning_ai.llms.llm import GPT4o
|
| 9 |
-
from planning_ai.themes import THEMES_AND_POLICIES
|
| 10 |
|
| 11 |
with open(Paths.PROMPTS / "map.txt", "r") as f:
|
| 12 |
map_template = f.read()
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
policy_groups: list[str], name: str = "DynamicPolicyEnum"
|
| 17 |
-
) -> Enum:
|
| 18 |
-
"""
|
| 19 |
-
Create a dynamic enum for policies based on the given policy groups.
|
| 20 |
-
|
| 21 |
-
Args:
|
| 22 |
-
policy_groups (list[str]): A set of policy group names.
|
| 23 |
-
name (str): Name of the enum to be created.
|
| 24 |
-
|
| 25 |
-
Returns:
|
| 26 |
-
Type[Enum]: A dynamically created Enum class for the policies.
|
| 27 |
-
"""
|
| 28 |
-
return Enum(name, {policy: auto() for policy in policy_groups})
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
def create_brief_summary_model(policy_enum: Enum) -> Type[BaseModel]:
|
| 32 |
-
"""
|
| 33 |
-
Dynamically create a BriefSummary model using the provided policy enum.
|
| 34 |
-
|
| 35 |
-
Args:
|
| 36 |
-
policy_enum (Type[Enum]): The dynamically created policy enum.
|
| 37 |
-
|
| 38 |
-
Returns:
|
| 39 |
-
Type[BaseModel]: A dynamically generated Pydantic model for BriefSummary.
|
| 40 |
-
"""
|
| 41 |
-
|
| 42 |
-
class Policy(BaseModel):
|
| 43 |
-
policy: policy_enum
|
| 44 |
-
note: str
|
| 45 |
-
|
| 46 |
-
return create_model(
|
| 47 |
-
"DynamicBriefSummary",
|
| 48 |
-
summary=(str, ...),
|
| 49 |
-
policies=(Optional[list[Policy]], ...),
|
| 50 |
-
__module__=__name__,
|
| 51 |
-
__config__={"extra": "forbid"},
|
| 52 |
-
)
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
def create_dynamic_map_chain(themes, prompt: str, doc_type: str):
|
| 56 |
-
|
| 57 |
-
policy_groups = []
|
| 58 |
-
for theme in themes:
|
| 59 |
-
if theme in THEMES_AND_POLICIES:
|
| 60 |
-
policy_groups.extend(THEMES_AND_POLICIES[theme])
|
| 61 |
-
|
| 62 |
-
PolicyEnum = create_policy_enum(policy_groups)
|
| 63 |
-
DynamicBriefSummary = create_brief_summary_model(PolicyEnum)
|
| 64 |
-
|
| 65 |
-
SLLM = GPT4o.with_structured_output(DynamicBriefSummary, strict=True)
|
| 66 |
-
|
| 67 |
-
prompt = (
|
| 68 |
-
f"{prompt}\n\nAvailable Policies:\n\n- "
|
| 69 |
-
+ "\n- ".join(policy_groups)
|
| 70 |
-
+ "\n\nContext:\n\n{context}"
|
| 71 |
-
)
|
| 72 |
-
map_prompt = ChatPromptTemplate.from_messages([("system", prompt)])
|
| 73 |
-
return map_prompt | SLLM
|
| 74 |
-
|
| 75 |
|
| 76 |
if __name__ == "__main__":
|
| 77 |
test_document = """
|
|
@@ -80,8 +17,5 @@ if __name__ == "__main__":
|
|
| 80 |
the major settlement of Cambourne has been created - now over the projected 3,000 homes and
|
| 81 |
Papworth Everard has grown beyond recognition. This in itself is a matter of concern.
|
| 82 |
"""
|
| 83 |
-
test_themes = {"Homes", "Great Places"}
|
| 84 |
|
| 85 |
-
|
| 86 |
-
result = dynamic_map_chain.invoke({"context": test_document, "themes": test_themes})
|
| 87 |
-
__import__("pprint").pprint(dict(result))
|
|
|
|
| 1 |
+
from langchain_core.output_parsers import StrOutputParser
|
|
|
|
|
|
|
| 2 |
from langchain_core.prompts import ChatPromptTemplate
|
|
|
|
| 3 |
|
| 4 |
from planning_ai.common.utils import Paths
|
| 5 |
from planning_ai.llms.llm import GPT4o
|
|
|
|
| 6 |
|
| 7 |
with open(Paths.PROMPTS / "map.txt", "r") as f:
|
| 8 |
map_template = f.read()
|
| 9 |
|
| 10 |
+
map_prompt = ChatPromptTemplate.from_messages([("system", map_template)])
|
| 11 |
+
map_chain = map_prompt | GPT4o | StrOutputParser()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
if __name__ == "__main__":
|
| 14 |
test_document = """
|
|
|
|
| 17 |
the major settlement of Cambourne has been created - now over the projected 3,000 homes and
|
| 18 |
Papworth Everard has grown beyond recognition. This in itself is a matter of concern.
|
| 19 |
"""
|
|
|
|
| 20 |
|
| 21 |
+
result = map_chain.invoke({"context": test_document})
|
|
|
|
|
|
planning_ai/chains/policy_chain.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
from typing import Optional
|
| 2 |
|
| 3 |
from langchain_core.prompts import ChatPromptTemplate
|
| 4 |
-
from pydantic import BaseModel
|
| 5 |
|
| 6 |
from planning_ai.common.utils import Paths
|
| 7 |
from planning_ai.llms.llm import GPT4o
|
|
@@ -13,8 +13,8 @@ with open(Paths.PROMPTS / "policy.txt", "r") as f:
|
|
| 13 |
class Policy(BaseModel):
|
| 14 |
"""Return condensed details and their associated doc_ids"""
|
| 15 |
|
| 16 |
-
detail: str
|
| 17 |
-
doc_id: list[int]
|
| 18 |
|
| 19 |
|
| 20 |
class PolicyList(BaseModel):
|
|
@@ -37,7 +37,5 @@ if __name__ == "__main__":
|
|
| 37 |
]
|
| 38 |
test_docids = [1, 13, 21]
|
| 39 |
|
| 40 |
-
result = policy_chain.invoke(
|
| 41 |
-
{"theme": "Climate Change", "policy": test_policy, "details": test_bullet}
|
| 42 |
-
)
|
| 43 |
print(result)
|
|
|
|
| 1 |
from typing import Optional
|
| 2 |
|
| 3 |
from langchain_core.prompts import ChatPromptTemplate
|
| 4 |
+
from pydantic import BaseModel, Field
|
| 5 |
|
| 6 |
from planning_ai.common.utils import Paths
|
| 7 |
from planning_ai.llms.llm import GPT4o
|
|
|
|
| 13 |
class Policy(BaseModel):
|
| 14 |
"""Return condensed details and their associated doc_ids"""
|
| 15 |
|
| 16 |
+
detail: str = Field(description="The policy detail")
|
| 17 |
+
doc_id: list[int] = Field(description="The associated doc_ids")
|
| 18 |
|
| 19 |
|
| 20 |
class PolicyList(BaseModel):
|
|
|
|
| 37 |
]
|
| 38 |
test_docids = [1, 13, 21]
|
| 39 |
|
| 40 |
+
result = policy_chain.invoke({"chapter": "Climate Change", "details": test_bullet})
|
|
|
|
|
|
|
| 41 |
print(result)
|
planning_ai/chains/prompts/chapters.txt
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Please identify any relevant topics that accurately relate to the provided representation. For each topic, provide a relevance score between **0 (not relevant)** and **5 (highly relevant)** based on how strongly the topic is present or connected in the document. You may select none, one, or multiple topics as applicable.
|
| 2 |
+
|
| 3 |
+
---
|
| 4 |
+
|
| 5 |
+
### **Representation Content:**
|
| 6 |
+
|
| 7 |
+
{document}
|
| 8 |
+
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
### **Key Guidelines:**
|
| 12 |
+
- **0 (Not Relevant)**: The topic is **not present** or does not apply to the representation.
|
| 13 |
+
- **1-2 (Low Relevance)**: The topic is **mentioned briefly** but without substantial impact or significance to the representation's key messages.
|
| 14 |
+
- **3 (Moderate Relevance)**: The topic is **discussed** with some importance, but it may not be a central focus.
|
| 15 |
+
- **4 (High Relevance)**: The topic is **significantly discussed** and closely aligns with the main ideas or objectives of the representation.
|
| 16 |
+
- **5 (Highly Relevant)**: The topic is **central** to the representation and its message, making it crucial for understanding the overall content.
|
| 17 |
+
|
| 18 |
+
---
|
| 19 |
+
|
| 20 |
+
### **Task:**
|
| 21 |
+
|
| 22 |
+
1. **Topic identification**: Identify related topics in the document from those provided. Focus on capturing topics that are explicitly mentioned or strongly implied. Avoid inferring new topics beyond those stated. Select **only** relevant topics, do not include those that are only to the content indirectly.
|
| 23 |
+
|
| 24 |
+
2. **Topic scores**: For each identified topic attribute a score denoting the relevance based on the guidelines provided. Ensure the score aligns with the relevance of the topic within the document.
|
| 25 |
+
|
| 26 |
+
3. **Topic Notes**: For each identified topic, state information from the representation that **directly** relates to it. Ensure the **full** context is retained, so the section can be understood independently. Topic notes may overlap. If a note does not have a clear link to the topic, omit both the topic and the note.
|
planning_ai/chains/prompts/map.txt
CHANGED
|
@@ -1,16 +1,9 @@
|
|
| 1 |
-
You have been provided with a response to a policy document, known as a representation. The representation highlights key points with respect to the policy document and provides feedback.
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
1. **Summary**: Provide a concise summary of the representation, highlighting the main points and any significant details. If the representation makes direct reference to the content of the policy document, please include a reference to the relevant section.
|
| 6 |
-
|
| 7 |
-
2. **Policy Identification**: Carefully review the representation and identify all relevant policies from the provided list. Focus on capturing policies that are explicitly mentioned or strongly implied. Avoid inferring new policies beyond those stated. Select **only** relevant policies, do not include those that are only to the content indirectly.
|
| 8 |
-
|
| 9 |
-
3. **Policy Notes**: For each identified policy, state information from the representation that **directly** relates to it. Ensure the **full** context is retained, so the section can be understood independently. Policy notes may overlap. If a note does not have a clear link to the policy, omit both the policy and the note.
|
| 10 |
-
|
| 11 |
-
Your output must be formatted in valid JSON as specified. Ensure clarity and accuracy in your extraction process.
|
| 12 |
|
| 13 |
**Always use British English**
|
| 14 |
|
| 15 |
-
|
| 16 |
|
|
|
|
|
|
| 1 |
+
You have been provided with a response to a policy document, known as a representation. The representation highlights key points with respect to the policy document and provides feedback.
|
| 2 |
|
| 3 |
+
Provide a concise summary of the representation, highlighting the main points and any significant details. If the representation makes direct reference to the content of the policy document, please include a reference to the relevant section. If the document does not provide any information, or refers only to a document that you do not have access to, state this, and do not make assumptions.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
**Always use British English**
|
| 6 |
|
| 7 |
+
Response:
|
| 8 |
|
| 9 |
+
{context}
|
planning_ai/chains/prompts/policy.txt
CHANGED
|
@@ -1,13 +1,13 @@
|
|
| 1 |
-
You are tasked with refining a list of details
|
| 2 |
|
| 3 |
1. Extract and emphasise the core action or idea from each detail.
|
| 4 |
-
2. Remove any non-essential context, such as the
|
| 5 |
3. Combine details that convey **identical** points into a single, concise point, merging their related document IDs.
|
| 6 |
-
4. Exclude any details that do not pertain to the
|
| 7 |
|
| 8 |
-
It is most important to ensure that all information contained within the final details are clearly related to their associated
|
| 9 |
|
| 10 |
-
|
| 11 |
|
| 12 |
**Always use British English**
|
| 13 |
|
|
@@ -15,9 +15,7 @@ Ensure that all returned details use proper sentence structure. Only include doc
|
|
| 15 |
|
| 16 |
**Provided information**
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
Policy: {policy}
|
| 21 |
|
| 22 |
Details:
|
| 23 |
|
|
|
|
| 1 |
+
You are tasked with refining a list of details relating to a specific topic in a policy document. Your goal is to:
|
| 2 |
|
| 3 |
1. Extract and emphasise the core action or idea from each detail.
|
| 4 |
+
2. Remove any non-essential context, such as the topic name or irrelevant details, along with their associated document IDs.
|
| 5 |
3. Combine details that convey **identical** points into a single, concise point, merging their related document IDs.
|
| 6 |
+
4. Exclude any details that do not pertain to the chapter provided.
|
| 7 |
|
| 8 |
+
It is most important to ensure that all information contained within the final details are clearly related to their associated topic. The removal of too much contextual information may result in bullet points that do not clearly relate with the associated policy. Do not be afraid to omit details and citations that do not relate with the provided topic. Do not attempt to find a tangible link, it is likely that there may be none.
|
| 9 |
|
| 10 |
+
You must return the **details** and **doc_ids** separately. Do **not** include document IDs within the **details** text.
|
| 11 |
|
| 12 |
**Always use British English**
|
| 13 |
|
|
|
|
| 15 |
|
| 16 |
**Provided information**
|
| 17 |
|
| 18 |
+
Topic: {chapter}
|
|
|
|
|
|
|
| 19 |
|
| 20 |
Details:
|
| 21 |
|
planning_ai/chains/prompts/themes.txt
DELETED
|
@@ -1,40 +0,0 @@
|
|
| 1 |
-
Please identify any relevant themes from the list below that accurately relate to the document. For each theme, provide a relevance score between **0 (not relevant)** and **5 (highly relevant)** based on how strongly the theme is present or connected in the document. You may select none, one, or multiple themes as applicable.
|
| 2 |
-
|
| 3 |
-
---
|
| 4 |
-
|
| 5 |
-
### **Available Themes:**
|
| 6 |
-
|
| 7 |
-
**Climate change:** Help Cambridge transition to net zero carbon by 2050, by ensuring that development is sited in places that help to limit carbon emissions, is designed to the highest achievable standards for energy and water use, and is resilient to current and future climate risks.
|
| 8 |
-
|
| 9 |
-
**Biodiversity and green spaces:** Increase and improve our network of habitats for wildlife, and green spaces for people, ensuring that development leaves the natural environment better than it was before.
|
| 10 |
-
|
| 11 |
-
**Wellbeing and social inclusion:** Help people in Greater Cambridge to lead healthier and happier lives, ensuring that everyone benefits from the development of new homes and jobs.
|
| 12 |
-
|
| 13 |
-
**Great places:** Sustain the unique character of Cambridge and South Cambridgeshire, and complement it with beautiful and distinctive development, creating a place where people want to live, work and play.
|
| 14 |
-
|
| 15 |
-
**Jobs:** Encourage a flourishing and mixed economy in Greater Cambridge which includes a wide range of jobs, while maintaining our area's global reputation for innovation.
|
| 16 |
-
|
| 17 |
-
**Homes:** Plan for enough housing to meet our needs, including significant quantities of housing that is affordable to rent and buy, and different kinds of homes to suit our diverse communities.
|
| 18 |
-
|
| 19 |
-
**Infrastructure:** Plan for transport, water, energy and digital networks; and health, education and cultural facilities; in the right places and built at the right times to serve our growing communities.
|
| 20 |
-
|
| 21 |
-
---
|
| 22 |
-
|
| 23 |
-
### **Document Content:**
|
| 24 |
-
|
| 25 |
-
{document}
|
| 26 |
-
|
| 27 |
-
---
|
| 28 |
-
|
| 29 |
-
### **Key Guidelines:**
|
| 30 |
-
- **0 (Not Relevant)**: The theme is **not present** or does not apply to the document.
|
| 31 |
-
- **1-2 (Low Relevance)**: The theme is **mentioned briefly** but without substantial impact or significance to the document's key messages.
|
| 32 |
-
- **3 (Moderate Relevance)**: The theme is **discussed** with some importance, but it may not be a central focus.
|
| 33 |
-
- **4 (High Relevance)**: The theme is **significantly discussed** and closely aligns with the main ideas or objectives of the document.
|
| 34 |
-
- **5 (Highly Relevant)**: The theme is **central** to the document and its message, making it crucial for understanding the overall content.
|
| 35 |
-
|
| 36 |
-
---
|
| 37 |
-
|
| 38 |
-
### **Task:**
|
| 39 |
-
|
| 40 |
-
For each theme, assess the relevance of the theme in the document and provide a score. This will allow us to better understand which themes are central to the document's content, enabling a more targeted and accurate summary.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
planning_ai/chains/themes_chain.py
CHANGED
|
@@ -7,34 +7,51 @@ from pydantic import BaseModel
|
|
| 7 |
from planning_ai.common.utils import Paths
|
| 8 |
from planning_ai.llms.llm import GPT4o
|
| 9 |
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
|
| 21 |
-
class
|
| 22 |
-
|
| 23 |
score: int
|
|
|
|
| 24 |
|
| 25 |
|
| 26 |
-
class
|
| 27 |
-
|
| 28 |
|
| 29 |
|
| 30 |
-
with open(Paths.PROMPTS / "
|
| 31 |
-
|
| 32 |
|
| 33 |
-
|
| 34 |
|
| 35 |
-
SLLM = GPT4o.with_structured_output(
|
| 36 |
|
| 37 |
-
|
| 38 |
|
| 39 |
|
| 40 |
if __name__ == "__main__":
|
|
@@ -45,5 +62,5 @@ if __name__ == "__main__":
|
|
| 45 |
Papworth Everard has grown beyond recognition. This in itself is a matter of concern.
|
| 46 |
"""
|
| 47 |
|
| 48 |
-
result =
|
| 49 |
__import__("pprint").pprint(dict(result))
|
|
|
|
| 7 |
from planning_ai.common.utils import Paths
|
| 8 |
from planning_ai.llms.llm import GPT4o
|
| 9 |
|
| 10 |
+
# Read the chapter lines from the file
|
| 11 |
+
with open(Paths.RAW / "chapters.txt", "r") as f:
|
| 12 |
+
chapters = [line.strip() for line in f.readlines() if line.strip()]
|
| 13 |
|
| 14 |
+
|
| 15 |
+
def create_dynamic_enum(chapters):
|
| 16 |
+
"""
|
| 17 |
+
Dynamically create an Enum class from the provided chapters list.
|
| 18 |
+
The names of the enum members are derived from the chapter names.
|
| 19 |
+
"""
|
| 20 |
+
# Prepare the enum name and value mappings
|
| 21 |
+
enum_members = {}
|
| 22 |
+
for chapter in chapters:
|
| 23 |
+
# Create valid Python identifier by replacing spaces with underscores and removing special characters
|
| 24 |
+
name = chapter.replace(" ", "_").replace("-", "_").replace("'", "")
|
| 25 |
+
name = "".join(c for c in name if c.isalnum() or c == "_")
|
| 26 |
+
# Assign each name and value
|
| 27 |
+
enum_members[name] = chapter
|
| 28 |
+
|
| 29 |
+
# Create the Enum class dynamically
|
| 30 |
+
return Enum("Chapter", enum_members)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# Create the dynamic enum
|
| 34 |
+
Chapter = create_dynamic_enum(chapters)
|
| 35 |
|
| 36 |
|
| 37 |
+
class ChapterScore(BaseModel):
|
| 38 |
+
chapter: Chapter
|
| 39 |
score: int
|
| 40 |
+
description: str
|
| 41 |
|
| 42 |
|
| 43 |
+
class ChapterSelector(BaseModel):
|
| 44 |
+
chapters: Optional[list[ChapterScore]]
|
| 45 |
|
| 46 |
|
| 47 |
+
with open(Paths.PROMPTS / "chapters.txt", "r") as f:
|
| 48 |
+
chapters_template = f.read()
|
| 49 |
|
| 50 |
+
chapters_prompt = ChatPromptTemplate.from_messages([("system", chapters_template)])
|
| 51 |
|
| 52 |
+
SLLM = GPT4o.with_structured_output(ChapterSelector, strict=True)
|
| 53 |
|
| 54 |
+
chapters_chain = chapters_prompt | SLLM
|
| 55 |
|
| 56 |
|
| 57 |
if __name__ == "__main__":
|
|
|
|
| 62 |
Papworth Everard has grown beyond recognition. This in itself is a matter of concern.
|
| 63 |
"""
|
| 64 |
|
| 65 |
+
result = chapters_chain.invoke({"document": test_document})
|
| 66 |
__import__("pprint").pprint(dict(result))
|
planning_ai/chapters.py
DELETED
|
@@ -1,25 +0,0 @@
|
|
| 1 |
-
[
|
| 2 |
-
"Introduction",
|
| 3 |
-
"Approach to Planning Obligations",
|
| 4 |
-
"How to use this Supplementary Planning Document",
|
| 5 |
-
"Affordable Housing",
|
| 6 |
-
"Green Infrastructure",
|
| 7 |
-
"Biodiversity",
|
| 8 |
-
"Community Facilities",
|
| 9 |
-
"Social and Community Support Services",
|
| 10 |
-
"Libraries and Lifelong Learning",
|
| 11 |
-
"Transport and Highways",
|
| 12 |
-
"Education",
|
| 13 |
-
"Public Art",
|
| 14 |
-
"Burial Space",
|
| 15 |
-
"Public Open Space",
|
| 16 |
-
"Indoor Sports, including Swimming",
|
| 17 |
-
"Public Realm",
|
| 18 |
-
"Waste and Recycling",
|
| 19 |
-
"Emergency Services",
|
| 20 |
-
"Planning Obligations to support local employment and skills",
|
| 21 |
-
"Planning Obligations to support affordable workspace",
|
| 22 |
-
"Public Rights of Way",
|
| 23 |
-
"Healthcare",
|
| 24 |
-
"Other Potential Development Specific Requirements",
|
| 25 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
planning_ai/documents/document.py
CHANGED
|
@@ -30,6 +30,7 @@ def _process_postcodes(final):
|
|
| 30 |
"""
|
| 31 |
documents = final["documents"]
|
| 32 |
postcodes = [doc["document"].metadata["respondentpostcode"] for doc in documents]
|
|
|
|
| 33 |
postcodes = (
|
| 34 |
pl.DataFrame({"postcode": postcodes})["postcode"]
|
| 35 |
.value_counts()
|
|
@@ -42,13 +43,30 @@ def _process_postcodes(final):
|
|
| 42 |
postcodes = postcodes.join(onspd, on="postcode", how="left")
|
| 43 |
outside_pcs = postcodes.filter(pl.col("osward").is_null()).drop_nulls("postcode")
|
| 44 |
pcs_url = "https://api.postcodes.io/postcodes"
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
.
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
)
|
| 53 |
return postcodes.drop_nulls(subset=["osward"]), outside_pcs
|
| 54 |
|
|
@@ -63,16 +81,16 @@ def _process_policies(final):
|
|
| 63 |
tuple: A tuple containing strings of support, object, and other policies.
|
| 64 |
"""
|
| 65 |
|
| 66 |
-
def process_policy_group(policy_group
|
| 67 |
details = "".join(
|
| 68 |
-
f"\n### {row['
|
| 69 |
+ "".join(
|
| 70 |
f"- {detail} {doc_id}\n"
|
| 71 |
for detail, doc_id in zip(row["detail"], row["doc_id"])
|
| 72 |
)
|
| 73 |
-
for row in policy_group.rows(named=True)
|
| 74 |
)
|
| 75 |
-
return
|
| 76 |
|
| 77 |
policies_df = final["policies"]
|
| 78 |
|
|
@@ -80,16 +98,15 @@ def _process_policies(final):
|
|
| 80 |
object_policies = ""
|
| 81 |
other_policies = ""
|
| 82 |
|
| 83 |
-
for (
|
| 84 |
["themes", "stance"], maintain_order=True
|
| 85 |
):
|
| 86 |
if stance == "Support":
|
| 87 |
-
support_policies += process_policy_group(policy
|
| 88 |
elif stance == "Object":
|
| 89 |
-
object_policies += process_policy_group(policy
|
| 90 |
else:
|
| 91 |
-
other_policies += process_policy_group(policy
|
| 92 |
-
|
| 93 |
return support_policies, object_policies, other_policies
|
| 94 |
|
| 95 |
|
|
@@ -130,9 +147,8 @@ def _process_themes(final):
|
|
| 130 |
str: A markdown table of themes with their counts and percentages.
|
| 131 |
"""
|
| 132 |
documents = final["documents"]
|
| 133 |
-
themes
|
| 134 |
-
|
| 135 |
-
)
|
| 136 |
themes = pl.DataFrame(themes).transpose(include_header=True)
|
| 137 |
themes_breakdown = themes.with_columns(
|
| 138 |
((pl.col("column_0") / pl.sum("column_0")) * 100).round(2).alias("percentage")
|
|
@@ -255,7 +271,6 @@ def fig_oa(postcodes, rep):
|
|
| 255 |
|
| 256 |
|
| 257 |
def fig_wards(postcodes, rep):
|
| 258 |
-
camb_lads = gpd.read_parquet(Paths.RAW / "camb_lads.parquet")
|
| 259 |
ward_boundaries = gpd.read_parquet(Paths.RAW / "camb_wards.parquet")
|
| 260 |
ward_pcs = postcodes.group_by("osward").sum()
|
| 261 |
ward_boundaries_prop = ward_boundaries.merge(
|
|
@@ -264,7 +279,7 @@ def fig_wards(postcodes, rep):
|
|
| 264 |
|
| 265 |
_, ax = plt.subplots(figsize=(8, 8))
|
| 266 |
ward_boundaries.plot(ax=ax, color="none", edgecolor="black", linewidth=1.5)
|
| 267 |
-
|
| 268 |
ward_boundaries_prop.plot(
|
| 269 |
ax=ax,
|
| 270 |
column="count",
|
|
@@ -272,7 +287,6 @@ def fig_wards(postcodes, rep):
|
|
| 272 |
legend=True,
|
| 273 |
legend_kwds={"label": "Number of Representations"},
|
| 274 |
)
|
| 275 |
-
ward_boundaries.plot(ax=ax, color="none", edgecolor="grey", linewidth=0.5)
|
| 276 |
cbar = ax.get_figure().axes[-1] # Get the colorbar axis
|
| 277 |
cbar.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f"{int(x)}"))
|
| 278 |
|
|
@@ -347,42 +361,6 @@ def build_final_report(out, rep):
|
|
| 347 |
.unique("id")
|
| 348 |
.collect()
|
| 349 |
)
|
| 350 |
-
unused_documents = out["generate_final_report"]["unused_documents"]
|
| 351 |
-
|
| 352 |
-
unused_pdfs = (
|
| 353 |
-
pl.DataFrame(
|
| 354 |
-
[
|
| 355 |
-
doc["metadata"]
|
| 356 |
-
for doc in unused_documents
|
| 357 |
-
if "representations_id" not in doc["metadata"]
|
| 358 |
-
]
|
| 359 |
-
)
|
| 360 |
-
.select(["id", "pdf_id", "page_label"])
|
| 361 |
-
.rename({"pdf_id": "representations_id"})
|
| 362 |
-
.with_columns(
|
| 363 |
-
pl.col("representations_id").cast(pl.Int64), pl.lit("PDF").alias("type")
|
| 364 |
-
)
|
| 365 |
-
)
|
| 366 |
-
unused_docs = (
|
| 367 |
-
pl.DataFrame(
|
| 368 |
-
[
|
| 369 |
-
doc["metadata"]
|
| 370 |
-
for doc in unused_documents
|
| 371 |
-
if "representations_id" in doc["metadata"]
|
| 372 |
-
]
|
| 373 |
-
)
|
| 374 |
-
.select(["id", "representations_id"])
|
| 375 |
-
.with_columns(pl.lit("").alias("page_label"), pl.lit("Text").alias("type"))
|
| 376 |
-
)
|
| 377 |
-
unused_tbl = pl.concat([unused_pdfs, unused_docs]).rename(
|
| 378 |
-
{
|
| 379 |
-
"id": "JDi ID",
|
| 380 |
-
"representations_id": "Representations ID",
|
| 381 |
-
"page_label": "Page Number",
|
| 382 |
-
"type": "Type",
|
| 383 |
-
}
|
| 384 |
-
)
|
| 385 |
-
unused_tbl = unused_tbl.to_pandas().to_markdown(index=False)
|
| 386 |
|
| 387 |
support_policies, object_policies, other_policies = _process_policies(final)
|
| 388 |
postcodes, outside_pcs = _process_postcodes(final)
|
|
@@ -396,13 +374,16 @@ def build_final_report(out, rep):
|
|
| 396 |
outside_pcs = (
|
| 397 |
outside_pcs.group_by("osward")
|
| 398 |
.sum()[["osward", "count"]]
|
| 399 |
-
.
|
|
|
|
|
|
|
|
|
|
| 400 |
.to_pandas()
|
| 401 |
.to_markdown(index=False)
|
| 402 |
)
|
| 403 |
|
| 404 |
quarto_doc = (
|
| 405 |
-
f"---\ntitle: '**{rep}**'\n"
|
| 406 |
r"""
|
| 407 |
mainfont: Liberation Sans
|
| 408 |
fontsize: 12pt
|
|
@@ -438,7 +419,7 @@ header-includes: |
|
|
| 438 |
"\n# Executive Summary\n\n"
|
| 439 |
f"{final['executive']}\n\n"
|
| 440 |
f"There were a total of {len(responses):,} responses. Of these, representations left "
|
| 441 |
-
"comment, or indicated the following support and objection of the
|
| 442 |
f"{stances}\n\n"
|
| 443 |
"# Introduction\n\n"
|
| 444 |
f"{introduction_paragraph}\n\n"
|
|
@@ -446,32 +427,25 @@ header-includes: |
|
|
| 446 |
f"{figures_paragraph}\n\n"
|
| 447 |
f"\n\n"
|
| 448 |
f": Postcodes outside the Greater Cambridge Ward areas {{#tbl:outside}}\n\n{outside_pcs}n\n"
|
| 449 |
-
f"![
|
| 450 |
-
f"![
|
| 451 |
r"\newpage"
|
| 452 |
"\n\n# Themes and Policies\n\n"
|
| 453 |
f"{themes_paragraph}\n\n"
|
| 454 |
-
f": Breakdown of
|
| 455 |
"## Supporting Representations\n\n"
|
| 456 |
-
"The following section presents a list of all points raised in representations that support the
|
| 457 |
-
", grouped by theme and policy.\n\n"
|
| 458 |
f"{support_policies or '_No supporting representations._'}\n\n"
|
| 459 |
"## Objecting Representations\n\n"
|
| 460 |
-
"The following section presents a list of all points raised in representations that object to "
|
| 461 |
-
"the plan, grouped by theme and policy.\n\n"
|
| 462 |
f"{object_policies or '_No objecting representations._'}\n\n"
|
| 463 |
"## Comment\n\n"
|
| 464 |
-
"The following section presents a list of all points raised in representations that do not support "
|
| 465 |
-
"or object to the plan, grouped by theme and policy.\n\n"
|
| 466 |
f"{other_policies or '_No other representations._'}\n\n"
|
| 467 |
-
"## Unused Documents\n\n"
|
| 468 |
-
"For full transparency, this section details those documents that were excluded from this report on Table @tbl:unused."
|
| 469 |
-
"These documents are typically very short, and contain information that provides no relation to policies or themes.\n\n TODO: expand."
|
| 470 |
-
f": Unused representations {{#tbl:unused}}\n\n{unused_tbl}\n\n"
|
| 471 |
)
|
| 472 |
|
| 473 |
-
out_path = Paths.SUMMARY / f"
|
| 474 |
-
out_file = Paths.SUMMARY / f"
|
| 475 |
with open(out_path, "w") as f:
|
| 476 |
f.write(quarto_doc)
|
| 477 |
try:
|
|
@@ -486,7 +460,7 @@ header-includes: |
|
|
| 486 |
subprocess.run(command, check=True, capture_output=True)
|
| 487 |
command = [
|
| 488 |
"pdftk",
|
| 489 |
-
"data/covers/
|
| 490 |
f"{out_file}.pdf",
|
| 491 |
"cat",
|
| 492 |
"output",
|
|
@@ -507,7 +481,7 @@ header-includes: |
|
|
| 507 |
subprocess.run(command, check=True, capture_output=True)
|
| 508 |
command = [
|
| 509 |
"pandoc",
|
| 510 |
-
"data/covers/
|
| 511 |
f"{out_file}.docx",
|
| 512 |
"-o",
|
| 513 |
f"{out_file}.docx",
|
|
@@ -516,60 +490,22 @@ header-includes: |
|
|
| 516 |
]
|
| 517 |
subprocess.run(command, check=True, capture_output=True)
|
| 518 |
except subprocess.CalledProcessError as e:
|
| 519 |
-
logging.error(
|
| 520 |
-
f"Error during Summary_of_Submitted_Representations.md render: {e}"
|
| 521 |
-
)
|
| 522 |
|
| 523 |
|
| 524 |
def build_summaries_document(out, rep):
|
| 525 |
sub = r"Document ID: \[\d+\]\n\n"
|
| 526 |
summary_intro = load_txt("planning_ai/documents/summary_intro.txt")
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
unused_pdfs = (
|
| 530 |
-
pl.DataFrame(
|
| 531 |
-
[
|
| 532 |
-
doc["metadata"]
|
| 533 |
-
for doc in unused_documents
|
| 534 |
-
if "representations_id" not in doc["metadata"]
|
| 535 |
-
]
|
| 536 |
-
)
|
| 537 |
-
.select(["id", "pdf_id", "page_label"])
|
| 538 |
-
.rename({"pdf_id": "representations_id"})
|
| 539 |
-
.with_columns(
|
| 540 |
-
pl.col("representations_id").cast(pl.Int64), pl.lit("PDF").alias("type")
|
| 541 |
-
)
|
| 542 |
-
)
|
| 543 |
-
unused_docs = (
|
| 544 |
-
pl.DataFrame(
|
| 545 |
-
[
|
| 546 |
-
doc["metadata"]
|
| 547 |
-
for doc in unused_documents
|
| 548 |
-
if "representations_id" in doc["metadata"]
|
| 549 |
-
]
|
| 550 |
-
)
|
| 551 |
-
.select(["id", "representations_id"])
|
| 552 |
-
.with_columns(pl.lit("").alias("page_label"), pl.lit("Text").alias("type"))
|
| 553 |
-
)
|
| 554 |
-
unused_tbl = pl.concat([unused_pdfs, unused_docs]).rename(
|
| 555 |
-
{
|
| 556 |
-
"id": "JDi ID",
|
| 557 |
-
"representations_id": "Representations ID",
|
| 558 |
-
"page_label": "Page Number",
|
| 559 |
-
"type": "Type",
|
| 560 |
-
}
|
| 561 |
-
)
|
| 562 |
-
unused_tbl = unused_tbl.to_pandas().to_markdown(index=False)
|
| 563 |
full_text = "".join(
|
| 564 |
-
f"**
|
| 565 |
f"**Representations ID**: {document['document'].metadata['id']}\n\n"
|
| 566 |
-
f"
|
| 567 |
-
f"\n\n{re.sub(sub, '', document['summary'].summary)}\n\n"
|
| 568 |
"---\n\n"
|
| 569 |
for document in out["generate_final_report"]["documents"]
|
| 570 |
)
|
| 571 |
header = (
|
| 572 |
-
f"---\ntitle: '**{rep}**'\n"
|
| 573 |
r"""
|
| 574 |
mainfont: Liberation Sans
|
| 575 |
fontsize: 12pt
|
|
@@ -604,18 +540,11 @@ header-includes: |
|
|
| 604 |
"""
|
| 605 |
f"\n{summary_intro}\n\n"
|
| 606 |
)
|
| 607 |
-
unused_text = (
|
| 608 |
-
"\n\n## Unused Documents\n\n"
|
| 609 |
-
"For full transparency, this section details those documents that were excluded from this report on Table @tbl:unused.\n\n"
|
| 610 |
-
f": Unused representations {{#tbl:unused}}\n\n{unused_tbl}\n\n"
|
| 611 |
-
"These documents are typically very short, and contain information that provides no relation to policies or themes."
|
| 612 |
-
)
|
| 613 |
|
| 614 |
-
out_path = Paths.SUMMARY / f"
|
| 615 |
-
out_file = Paths.SUMMARY / f"
|
| 616 |
with open(out_path, "w") as f:
|
| 617 |
-
f.write(f"{header}{full_text}
|
| 618 |
-
|
| 619 |
try:
|
| 620 |
command = [
|
| 621 |
"pandoc",
|
|
@@ -628,7 +557,7 @@ header-includes: |
|
|
| 628 |
subprocess.run(command, check=True, capture_output=True)
|
| 629 |
command = [
|
| 630 |
"pdftk",
|
| 631 |
-
"data/covers/
|
| 632 |
f"{out_file}.pdf",
|
| 633 |
"cat",
|
| 634 |
"output",
|
|
@@ -649,7 +578,7 @@ header-includes: |
|
|
| 649 |
subprocess.run(command, check=True, capture_output=True)
|
| 650 |
command = [
|
| 651 |
"pandoc",
|
| 652 |
-
"data/covers/
|
| 653 |
f"{out_file}.docx",
|
| 654 |
"-o",
|
| 655 |
f"{out_file}.docx",
|
|
|
|
| 30 |
"""
|
| 31 |
documents = final["documents"]
|
| 32 |
postcodes = [doc["document"].metadata["respondentpostcode"] for doc in documents]
|
| 33 |
+
missing = ["Missing" for pcd in postcodes if not pcd]
|
| 34 |
postcodes = (
|
| 35 |
pl.DataFrame({"postcode": postcodes})["postcode"]
|
| 36 |
.value_counts()
|
|
|
|
| 43 |
postcodes = postcodes.join(onspd, on="postcode", how="left")
|
| 44 |
outside_pcs = postcodes.filter(pl.col("osward").is_null()).drop_nulls("postcode")
|
| 45 |
pcs_url = "https://api.postcodes.io/postcodes"
|
| 46 |
+
|
| 47 |
+
def get_pcs(postcode):
|
| 48 |
+
response = requests.get(f"{pcs_url}/{postcode}")
|
| 49 |
+
if response.status_code == 200:
|
| 50 |
+
out = response.json()["result"]
|
| 51 |
+
admin_ward = out.get("admin_ward")
|
| 52 |
+
admin_district = out.get("admin_district")
|
| 53 |
+
return f"{admin_ward}, {admin_district}"
|
| 54 |
+
else:
|
| 55 |
+
return "Unknown"
|
| 56 |
+
|
| 57 |
+
outside_pcs = pl.concat(
|
| 58 |
+
[
|
| 59 |
+
outside_pcs.with_columns(
|
| 60 |
+
pl.col("postcode")
|
| 61 |
+
.map_elements(lambda x: get_pcs(x), return_dtype=pl.String)
|
| 62 |
+
.alias("osward")
|
| 63 |
+
)
|
| 64 |
+
.select(["postcode", "osward", "count"])
|
| 65 |
+
.with_columns(pl.col("count").cast(pl.Int32)),
|
| 66 |
+
pl.DataFrame(
|
| 67 |
+
{"postcode": missing, "osward": "Unknown", "count": len(missing)}
|
| 68 |
+
).with_columns(pl.col("count").cast(pl.Int32)),
|
| 69 |
+
],
|
| 70 |
)
|
| 71 |
return postcodes.drop_nulls(subset=["osward"]), outside_pcs
|
| 72 |
|
|
|
|
| 81 |
tuple: A tuple containing strings of support, object, and other policies.
|
| 82 |
"""
|
| 83 |
|
| 84 |
+
def process_policy_group(policy_group):
|
| 85 |
details = "".join(
|
| 86 |
+
f"\n### {row['themes']} - {row['stance']}\n\n"
|
| 87 |
+ "".join(
|
| 88 |
f"- {detail} {doc_id}\n"
|
| 89 |
for detail, doc_id in zip(row["detail"], row["doc_id"])
|
| 90 |
)
|
| 91 |
+
for row in policy_group.rows(named=True)[:1]
|
| 92 |
)
|
| 93 |
+
return details
|
| 94 |
|
| 95 |
policies_df = final["policies"]
|
| 96 |
|
|
|
|
| 98 |
object_policies = ""
|
| 99 |
other_policies = ""
|
| 100 |
|
| 101 |
+
for (_, stance), policy in policies_df.group_by(
|
| 102 |
["themes", "stance"], maintain_order=True
|
| 103 |
):
|
| 104 |
if stance == "Support":
|
| 105 |
+
support_policies += process_policy_group(policy)
|
| 106 |
elif stance == "Object":
|
| 107 |
+
object_policies += process_policy_group(policy)
|
| 108 |
else:
|
| 109 |
+
other_policies += process_policy_group(policy)
|
|
|
|
| 110 |
return support_policies, object_policies, other_policies
|
| 111 |
|
| 112 |
|
|
|
|
| 147 |
str: A markdown table of themes with their counts and percentages.
|
| 148 |
"""
|
| 149 |
documents = final["documents"]
|
| 150 |
+
documents[0]["themes"]
|
| 151 |
+
themes = Counter([theme["chapter"] for doc in documents for theme in doc["themes"]])
|
|
|
|
| 152 |
themes = pl.DataFrame(themes).transpose(include_header=True)
|
| 153 |
themes_breakdown = themes.with_columns(
|
| 154 |
((pl.col("column_0") / pl.sum("column_0")) * 100).round(2).alias("percentage")
|
|
|
|
| 271 |
|
| 272 |
|
| 273 |
def fig_wards(postcodes, rep):
|
|
|
|
| 274 |
ward_boundaries = gpd.read_parquet(Paths.RAW / "camb_wards.parquet")
|
| 275 |
ward_pcs = postcodes.group_by("osward").sum()
|
| 276 |
ward_boundaries_prop = ward_boundaries.merge(
|
|
|
|
| 279 |
|
| 280 |
_, ax = plt.subplots(figsize=(8, 8))
|
| 281 |
ward_boundaries.plot(ax=ax, color="none", edgecolor="black", linewidth=1.5)
|
| 282 |
+
ward_boundaries.plot(ax=ax, color="white", edgecolor="grey", linewidth=0.5)
|
| 283 |
ward_boundaries_prop.plot(
|
| 284 |
ax=ax,
|
| 285 |
column="count",
|
|
|
|
| 287 |
legend=True,
|
| 288 |
legend_kwds={"label": "Number of Representations"},
|
| 289 |
)
|
|
|
|
| 290 |
cbar = ax.get_figure().axes[-1] # Get the colorbar axis
|
| 291 |
cbar.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f"{int(x)}"))
|
| 292 |
|
|
|
|
| 361 |
.unique("id")
|
| 362 |
.collect()
|
| 363 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 364 |
|
| 365 |
support_policies, object_policies, other_policies = _process_policies(final)
|
| 366 |
postcodes, outside_pcs = _process_postcodes(final)
|
|
|
|
| 374 |
outside_pcs = (
|
| 375 |
outside_pcs.group_by("osward")
|
| 376 |
.sum()[["osward", "count"]]
|
| 377 |
+
.filter(pl.col("osward") != "Unknown")
|
| 378 |
+
.rename(
|
| 379 |
+
{"osward": "Ward, Local Authority", "count": "Number of Representations"}
|
| 380 |
+
)
|
| 381 |
.to_pandas()
|
| 382 |
.to_markdown(index=False)
|
| 383 |
)
|
| 384 |
|
| 385 |
quarto_doc = (
|
| 386 |
+
f"---\ntitle: '**Overview of Public Submissions: {rep}**'\n"
|
| 387 |
r"""
|
| 388 |
mainfont: Liberation Sans
|
| 389 |
fontsize: 12pt
|
|
|
|
| 419 |
"\n# Executive Summary\n\n"
|
| 420 |
f"{final['executive']}\n\n"
|
| 421 |
f"There were a total of {len(responses):,} responses. Of these, representations left "
|
| 422 |
+
"comment, or indicated the following support and objection of the consultation document:\n\n"
|
| 423 |
f"{stances}\n\n"
|
| 424 |
"# Introduction\n\n"
|
| 425 |
f"{introduction_paragraph}\n\n"
|
|
|
|
| 427 |
f"{figures_paragraph}\n\n"
|
| 428 |
f"\n\n"
|
| 429 |
f": Postcodes outside the Greater Cambridge Ward areas {{#tbl:outside}}\n\n{outside_pcs}n\n"
|
| 430 |
+
f"![The proportion of representations submitted by 2021 Output Area Classification^[Wyszomierski, J., Longley, P.A., Singleton, A.D., Gale, C. & O’Brien, O. (2024) A neighbourhood Output Area Classification from the 2021 and 2022 UK censuses. The Geographical Journal, 190, e12550. Available from: https://doi.org/10.1111/geoj.12550] relative to the national average\\label{{fig-oas}}](./data/out/summary/figs/oas-{rep}.pdf)\n\n"
|
| 431 |
+
f"\n\n"
|
| 432 |
r"\newpage"
|
| 433 |
"\n\n# Themes and Policies\n\n"
|
| 434 |
f"{themes_paragraph}\n\n"
|
| 435 |
+
f": Breakdown of representations by section {{#tbl:themes}}\n\n{themes}\n\n"
|
| 436 |
"## Supporting Representations\n\n"
|
| 437 |
+
"The following section presents a list of all points raised in representations that support the consultation document, grouped by sections."
|
|
|
|
| 438 |
f"{support_policies or '_No supporting representations._'}\n\n"
|
| 439 |
"## Objecting Representations\n\n"
|
| 440 |
+
"The following section presents a list of all points raised in representations that object to the consultation document, grouped by sections."
|
|
|
|
| 441 |
f"{object_policies or '_No objecting representations._'}\n\n"
|
| 442 |
"## Comment\n\n"
|
| 443 |
+
"The following section presents a list of all points raised in representations that do not support or object to the consultation document, grouped by sections."
|
|
|
|
| 444 |
f"{other_policies or '_No other representations._'}\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 445 |
)
|
| 446 |
|
| 447 |
+
out_path = Paths.SUMMARY / f"Overview_of_Public_Submissions-{rep}.md"
|
| 448 |
+
out_file = Paths.SUMMARY / f"Overview_of_Public_Submissions-{rep}"
|
| 449 |
with open(out_path, "w") as f:
|
| 450 |
f.write(quarto_doc)
|
| 451 |
try:
|
|
|
|
| 460 |
subprocess.run(command, check=True, capture_output=True)
|
| 461 |
command = [
|
| 462 |
"pdftk",
|
| 463 |
+
"data/covers/Overview_of_Public_Submissions.pdf",
|
| 464 |
f"{out_file}.pdf",
|
| 465 |
"cat",
|
| 466 |
"output",
|
|
|
|
| 481 |
subprocess.run(command, check=True, capture_output=True)
|
| 482 |
command = [
|
| 483 |
"pandoc",
|
| 484 |
+
"data/covers/Overview_of_Public_Submissions.docx",
|
| 485 |
f"{out_file}.docx",
|
| 486 |
"-o",
|
| 487 |
f"{out_file}.docx",
|
|
|
|
| 490 |
]
|
| 491 |
subprocess.run(command, check=True, capture_output=True)
|
| 492 |
except subprocess.CalledProcessError as e:
|
| 493 |
+
logging.error(f"Error during Overview_of_Public_Submissions render: {e}")
|
|
|
|
|
|
|
| 494 |
|
| 495 |
|
| 496 |
def build_summaries_document(out, rep):
|
| 497 |
sub = r"Document ID: \[\d+\]\n\n"
|
| 498 |
summary_intro = load_txt("planning_ai/documents/summary_intro.txt")
|
| 499 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 500 |
full_text = "".join(
|
| 501 |
+
f"**Part ID**: {document['doc_id']}\n\n"
|
| 502 |
f"**Representations ID**: {document['document'].metadata['id']}\n\n"
|
| 503 |
+
f"\n\n{re.sub(sub, '', document['summary'])}\n\n"
|
|
|
|
| 504 |
"---\n\n"
|
| 505 |
for document in out["generate_final_report"]["documents"]
|
| 506 |
)
|
| 507 |
header = (
|
| 508 |
+
f"---\ntitle: '**Summaries of Public Submissions: {rep}**'\n"
|
| 509 |
r"""
|
| 510 |
mainfont: Liberation Sans
|
| 511 |
fontsize: 12pt
|
|
|
|
| 540 |
"""
|
| 541 |
f"\n{summary_intro}\n\n"
|
| 542 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 543 |
|
| 544 |
+
out_path = Paths.SUMMARY / f"Summaries_of_Public_Submissions-{rep}.md"
|
| 545 |
+
out_file = Paths.SUMMARY / f"Summaries_of_Public_Submissions-{rep}"
|
| 546 |
with open(out_path, "w") as f:
|
| 547 |
+
f.write(f"{header}{full_text}")
|
|
|
|
| 548 |
try:
|
| 549 |
command = [
|
| 550 |
"pandoc",
|
|
|
|
| 557 |
subprocess.run(command, check=True, capture_output=True)
|
| 558 |
command = [
|
| 559 |
"pdftk",
|
| 560 |
+
"data/covers/Summaries_of_Public_Submissions.pdf",
|
| 561 |
f"{out_file}.pdf",
|
| 562 |
"cat",
|
| 563 |
"output",
|
|
|
|
| 578 |
subprocess.run(command, check=True, capture_output=True)
|
| 579 |
command = [
|
| 580 |
"pandoc",
|
| 581 |
+
"data/covers/Summaries_of_Public_Submissions.docx",
|
| 582 |
f"{out_file}.docx",
|
| 583 |
"-o",
|
| 584 |
f"{out_file}.docx",
|
planning_ai/documents/themes.txt
CHANGED
|
@@ -1 +1,3 @@
|
|
| 1 |
-
The following section provides a detailed breakdown of notable details from the **representations**, grouped by the **Themes and Policies** set out in the **Greater Cambridgeshire Local Plan**. Both the Themes and associated Policies are automatically determined through an analysis of the summary content by an LLM agent. Each Theme is organised according to whether representations were specified as supportive, opposed, or provide a general comment.
|
|
|
|
|
|
|
|
|
| 1 |
+
The following section provides a detailed breakdown of notable details from the **representations**, grouped by the **Themes and Policies** set out in the **Greater Cambridgeshire Local Plan**. Both the Themes and associated Policies are automatically determined through an analysis of the summary content by an LLM agent. Each Theme is organised according to whether representations were specified as supportive, opposed, or provide a general comment.
|
| 2 |
+
|
| 3 |
+
This section offers a comprehensive overview of those key issues raised by members of the public or organisations with respect to sections of the consultation document. We have incorporated citations into each point (see numbers in square brackets) to indicate the specific part^[Each representation can be a text entry and optionally a further attached piece of evidence. Attachments are split into pages, and along with the text entry are referred to here as a "part". Each representation can therefore comprise multiple parts, which are given a Part ID. These can be cross referenced to the Summaries of Public Submissions report.] of the representation where points were made, thereby promoting transparency of sources. Finally, @tbl:themes provides a breakdown of the number of submissions that relate to each Theme (noting that submissions may be associated with more than one Theme).
|
planning_ai/main.py
CHANGED
|
@@ -13,7 +13,7 @@ from planning_ai.graph import create_graph
|
|
| 13 |
from planning_ai.logging import logger
|
| 14 |
|
| 15 |
|
| 16 |
-
def read_docs(representations_document: str
|
| 17 |
logger.warning("Reading documents...")
|
| 18 |
df = (
|
| 19 |
pl.scan_parquet(Paths.STAGING / "gcpt3.parquet")
|
|
@@ -83,36 +83,28 @@ def read_docs(representations_document: str, doc_type: str):
|
|
| 83 |
if doc.page_content and len(doc.page_content.split(" ")) > 25
|
| 84 |
}.values()
|
| 85 |
)
|
| 86 |
-
return [
|
| 87 |
-
{"document": doc, "filename": doc.metadata["filename"], "doc_type": doc_type}
|
| 88 |
-
for doc in docs
|
| 89 |
-
]
|
| 90 |
|
| 91 |
|
| 92 |
-
def main(
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
.to_list()
|
| 99 |
-
)
|
| 100 |
-
for rep in representations_documents:
|
| 101 |
-
docs = read_docs(rep, doc_type)
|
| 102 |
-
n_docs = len(docs)
|
| 103 |
|
| 104 |
-
|
| 105 |
-
|
| 106 |
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
|
| 111 |
-
|
| 112 |
-
|
| 113 |
|
| 114 |
-
|
| 115 |
-
|
| 116 |
|
| 117 |
|
| 118 |
if __name__ == "__main__":
|
|
|
|
| 13 |
from planning_ai.logging import logger
|
| 14 |
|
| 15 |
|
| 16 |
+
def read_docs(representations_document: str):
|
| 17 |
logger.warning("Reading documents...")
|
| 18 |
df = (
|
| 19 |
pl.scan_parquet(Paths.STAGING / "gcpt3.parquet")
|
|
|
|
| 83 |
if doc.page_content and len(doc.page_content.split(" ")) > 25
|
| 84 |
}.values()
|
| 85 |
)
|
| 86 |
+
return [{"document": doc, "filename": doc.metadata["filename"]} for doc in docs]
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
|
| 89 |
+
def main():
|
| 90 |
+
with open(Paths.RAW / "title.txt", "r") as f:
|
| 91 |
+
rep = f.read().strip()
|
| 92 |
+
|
| 93 |
+
docs = read_docs(rep)
|
| 94 |
+
n_docs = len(docs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
+
logger.info(f"{n_docs} documents being processed!")
|
| 97 |
+
app = create_graph()
|
| 98 |
|
| 99 |
+
step = None
|
| 100 |
+
for step in app.stream({"documents": docs, "n_docs": n_docs}):
|
| 101 |
+
print(step.keys())
|
| 102 |
|
| 103 |
+
if step is None:
|
| 104 |
+
raise ValueError("No steps were processed!")
|
| 105 |
|
| 106 |
+
build_final_report(step, rep)
|
| 107 |
+
build_summaries_document(step, rep)
|
| 108 |
|
| 109 |
|
| 110 |
if __name__ == "__main__":
|
planning_ai/nodes/hallucination_node.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
| 1 |
from langgraph.types import Send
|
| 2 |
|
| 3 |
-
from planning_ai.chains.fix_chain import
|
| 4 |
from planning_ai.chains.hallucination_chain import hallucination_chain
|
| 5 |
-
from planning_ai.chains.map_chain import create_dynamic_map_chain
|
| 6 |
from planning_ai.logging import logger
|
| 7 |
from planning_ai.states import DocumentState, OverallState
|
| 8 |
|
|
@@ -35,7 +34,7 @@ def check_hallucination(state: DocumentState):
|
|
| 35 |
|
| 36 |
try:
|
| 37 |
response = hallucination_chain.invoke(
|
| 38 |
-
{"document": state["document"], "summary": state["summary"]
|
| 39 |
)
|
| 40 |
is_hallucinated = response.score == 0
|
| 41 |
refinement_attempts = state["refinement_attempts"] + 1
|
|
@@ -83,13 +82,12 @@ def fix_hallucination(state: DocumentState):
|
|
| 83 |
hallucinations.
|
| 84 |
"""
|
| 85 |
logger.warning(f"Fixing hallucinations for document {state['filename']}")
|
| 86 |
-
|
| 87 |
-
fix_chain = create_dynamic_map_chain(themes, fix_template)
|
| 88 |
try:
|
| 89 |
response = fix_chain.invoke(
|
| 90 |
{
|
| 91 |
"context": state["document"],
|
| 92 |
-
"summary": state["summary"]
|
| 93 |
"explanation": state["hallucination"].explanation,
|
| 94 |
}
|
| 95 |
)
|
|
|
|
| 1 |
from langgraph.types import Send
|
| 2 |
|
| 3 |
+
from planning_ai.chains.fix_chain import fix_chain
|
| 4 |
from planning_ai.chains.hallucination_chain import hallucination_chain
|
|
|
|
| 5 |
from planning_ai.logging import logger
|
| 6 |
from planning_ai.states import DocumentState, OverallState
|
| 7 |
|
|
|
|
| 34 |
|
| 35 |
try:
|
| 36 |
response = hallucination_chain.invoke(
|
| 37 |
+
{"document": state["document"], "summary": state["summary"]}
|
| 38 |
)
|
| 39 |
is_hallucinated = response.score == 0
|
| 40 |
refinement_attempts = state["refinement_attempts"] + 1
|
|
|
|
| 82 |
hallucinations.
|
| 83 |
"""
|
| 84 |
logger.warning(f"Fixing hallucinations for document {state['filename']}")
|
| 85 |
+
|
|
|
|
| 86 |
try:
|
| 87 |
response = fix_chain.invoke(
|
| 88 |
{
|
| 89 |
"context": state["document"],
|
| 90 |
+
"summary": state["summary"],
|
| 91 |
"explanation": state["hallucination"].explanation,
|
| 92 |
}
|
| 93 |
)
|
planning_ai/nodes/map_node.py
CHANGED
|
@@ -4,8 +4,8 @@ from langgraph.types import Send
|
|
| 4 |
from presidio_analyzer import AnalyzerEngine
|
| 5 |
from presidio_anonymizer import AnonymizerEngine
|
| 6 |
|
| 7 |
-
from planning_ai.chains.map_chain import
|
| 8 |
-
from planning_ai.chains.themes_chain import
|
| 9 |
from planning_ai.logging import logger
|
| 10 |
from planning_ai.states import DocumentState, OverallState
|
| 11 |
|
|
@@ -15,7 +15,7 @@ anonymizer = AnonymizerEngine()
|
|
| 15 |
nlp = spacy.load("en_core_web_lg")
|
| 16 |
|
| 17 |
|
| 18 |
-
def
|
| 19 |
"""Retrieve themes from a document's content.
|
| 20 |
|
| 21 |
This function uses the `themes_chain` to extract themes from the document's
|
|
@@ -28,39 +28,23 @@ def retrieve_themes(state: DocumentState) -> DocumentState:
|
|
| 28 |
DocumentState: The updated document state with themes and scores.
|
| 29 |
"""
|
| 30 |
try:
|
| 31 |
-
result =
|
| 32 |
-
if not result.
|
| 33 |
state["themes"] = []
|
| 34 |
return state
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
except Exception as e:
|
| 37 |
logger.error(f"Theme selection error: {e}")
|
| 38 |
-
|
| 39 |
-
state["themes"] = [d for d in
|
| 40 |
-
return state
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
def add_entities(state: OverallState) -> OverallState:
|
| 44 |
-
"""Add named entities to all documents in the state.
|
| 45 |
-
|
| 46 |
-
This function processes each document using a spaCy NLP pipeline to extract
|
| 47 |
-
named entities and adds them to the document state.
|
| 48 |
-
|
| 49 |
-
Args:
|
| 50 |
-
state (OverallState): The overall state containing multiple documents.
|
| 51 |
-
|
| 52 |
-
Returns:
|
| 53 |
-
OverallState: The updated state with entities added to each document.
|
| 54 |
-
"""
|
| 55 |
-
logger.info("Adding entities to all documents.")
|
| 56 |
-
for idx, document in enumerate(
|
| 57 |
-
nlp.pipe(
|
| 58 |
-
[doc["document"].page_content for doc in state["documents"]],
|
| 59 |
-
)
|
| 60 |
-
):
|
| 61 |
-
state["documents"][idx]["entities"] = [
|
| 62 |
-
{"entity": ent.text, "label": ent.label_} for ent in document.ents
|
| 63 |
-
]
|
| 64 |
return state
|
| 65 |
|
| 66 |
|
|
@@ -102,30 +86,9 @@ def generate_summary(state: DocumentState) -> dict:
|
|
| 102 |
logger.info(f"Starting PII removal for: {state['filename']}")
|
| 103 |
state["document"].page_content = remove_pii(state["document"].page_content)
|
| 104 |
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
state = retrieve_themes(state)
|
| 108 |
-
elif state["doc_type"] == "SPT":
|
| 109 |
-
logger.info(f"Retrieving SPT for: {state['filename']}")
|
| 110 |
-
state = retrieve_spt(state)
|
| 111 |
-
|
| 112 |
-
if not state["themes"]:
|
| 113 |
-
logger.warning(f"No themes found for {state['filename']}")
|
| 114 |
-
return {
|
| 115 |
-
"documents": [
|
| 116 |
-
{
|
| 117 |
-
**state,
|
| 118 |
-
"summary": "",
|
| 119 |
-
"refinement_attempts": 0,
|
| 120 |
-
"is_hallucinated": True,
|
| 121 |
-
"processed": True,
|
| 122 |
-
"failed": True,
|
| 123 |
-
}
|
| 124 |
-
]
|
| 125 |
-
}
|
| 126 |
|
| 127 |
-
themes = [theme["theme"].value for theme in state["themes"]]
|
| 128 |
-
map_chain = create_dynamic_map_chain(themes=themes, prompt=map_template)
|
| 129 |
try:
|
| 130 |
response = map_chain.invoke({"context": state["document"].page_content})
|
| 131 |
except Exception as e:
|
|
|
|
| 4 |
from presidio_analyzer import AnalyzerEngine
|
| 5 |
from presidio_anonymizer import AnonymizerEngine
|
| 6 |
|
| 7 |
+
from planning_ai.chains.map_chain import map_chain
|
| 8 |
+
from planning_ai.chains.themes_chain import chapters_chain
|
| 9 |
from planning_ai.logging import logger
|
| 10 |
from planning_ai.states import DocumentState, OverallState
|
| 11 |
|
|
|
|
| 15 |
nlp = spacy.load("en_core_web_lg")
|
| 16 |
|
| 17 |
|
| 18 |
+
def retrieve_chapters(state: DocumentState) -> DocumentState:
|
| 19 |
"""Retrieve themes from a document's content.
|
| 20 |
|
| 21 |
This function uses the `themes_chain` to extract themes from the document's
|
|
|
|
| 28 |
DocumentState: The updated document state with themes and scores.
|
| 29 |
"""
|
| 30 |
try:
|
| 31 |
+
result = chapters_chain.invoke({"document": state["document"].page_content})
|
| 32 |
+
if not result.chapters:
|
| 33 |
state["themes"] = []
|
| 34 |
return state
|
| 35 |
+
chapters = [chapter.model_dump() for chapter in result.chapters]
|
| 36 |
+
chapters = [
|
| 37 |
+
{
|
| 38 |
+
"chapter": chapter["chapter"].value,
|
| 39 |
+
"score": chapter["score"],
|
| 40 |
+
"description": chapter["description"],
|
| 41 |
+
}
|
| 42 |
+
for chapter in chapters
|
| 43 |
+
]
|
| 44 |
except Exception as e:
|
| 45 |
logger.error(f"Theme selection error: {e}")
|
| 46 |
+
chapters = []
|
| 47 |
+
state["themes"] = [d for d in chapters if d["score"] >= 4]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
return state
|
| 49 |
|
| 50 |
|
|
|
|
| 86 |
logger.info(f"Starting PII removal for: {state['filename']}")
|
| 87 |
state["document"].page_content = remove_pii(state["document"].page_content)
|
| 88 |
|
| 89 |
+
logger.info(f"Retrieving themes for: {state['filename']}")
|
| 90 |
+
state = retrieve_chapters(state)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
|
|
|
|
|
|
| 92 |
try:
|
| 93 |
response = map_chain.invoke({"context": state["document"].page_content})
|
| 94 |
except Exception as e:
|
planning_ai/nodes/reduce_node.py
CHANGED
|
@@ -1,66 +1,27 @@
|
|
| 1 |
-
import json
|
| 2 |
-
from pathlib import Path
|
| 3 |
-
|
| 4 |
import polars as pl
|
| 5 |
|
| 6 |
from planning_ai.chains.policy_chain import policy_chain
|
| 7 |
from planning_ai.chains.reduce_chain import reduce_chain, reduce_chain_final
|
| 8 |
from planning_ai.logging import logger
|
| 9 |
from planning_ai.states import OverallState
|
| 10 |
-
from planning_ai.themes import THEMES_AND_POLICIES
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
def save_summaries_to_json(docs):
|
| 14 |
-
"""Saves summaries to JSON files.
|
| 15 |
-
|
| 16 |
-
Args:
|
| 17 |
-
out (list): A list of summary dictionaries.
|
| 18 |
-
"""
|
| 19 |
-
out = [
|
| 20 |
-
{
|
| 21 |
-
"document": doc["document"].model_dump()["page_content"],
|
| 22 |
-
**doc["document"].metadata,
|
| 23 |
-
"filename": doc["filename"],
|
| 24 |
-
"entities": doc["entities"],
|
| 25 |
-
"themes": doc["themes"].model_dump(),
|
| 26 |
-
"summary": doc["summary"].model_dump()["summary"],
|
| 27 |
-
"policies": doc["policies"],
|
| 28 |
-
"notes": doc["notes"],
|
| 29 |
-
"refinement_attempts": doc["refinement_attempts"],
|
| 30 |
-
"hallucination": doc["hallucination"].model_dump(),
|
| 31 |
-
"is_hallucinated": doc["is_hallucinated"],
|
| 32 |
-
"failed": doc["failed"],
|
| 33 |
-
}
|
| 34 |
-
for doc in docs
|
| 35 |
-
]
|
| 36 |
-
for doc in out:
|
| 37 |
-
filename = Path(str(doc["filename"])).stem
|
| 38 |
-
with open(f"data/out/summaries/{filename}.json", "w") as f:
|
| 39 |
-
json.dump(doc, f)
|
| 40 |
|
| 41 |
|
| 42 |
def extract_policies_from_docs(docs):
|
| 43 |
-
policies = {"doc_id": [], "themes": [], "
|
| 44 |
for doc in docs:
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
policies["policies"].append(policy.policy.name)
|
| 53 |
-
policies["details"].append(policy.note)
|
| 54 |
-
policies["stance"].append(
|
| 55 |
-
doc["document"].metadata["representations_support/object"]
|
| 56 |
-
)
|
| 57 |
return pl.DataFrame(policies)
|
| 58 |
|
| 59 |
|
| 60 |
def add_doc_id(final_docs):
|
| 61 |
out_docs = []
|
| 62 |
for id, doc in enumerate(final_docs):
|
| 63 |
-
doc["summary"].summary = f"Document ID: [{id}]\n\n{doc['summary'].summary}"
|
| 64 |
doc["doc_id"] = id
|
| 65 |
out_docs.append(doc)
|
| 66 |
return out_docs
|
|
@@ -76,7 +37,7 @@ def batch_generate_executive_summaries(summaries):
|
|
| 76 |
list: A list of final responses.
|
| 77 |
"""
|
| 78 |
summaries_text = [
|
| 79 |
-
f"Document ID: {[s['doc_id']]}\n\n{s['summary']
|
| 80 |
]
|
| 81 |
final_responses = []
|
| 82 |
batch_size = 50
|
|
@@ -93,11 +54,11 @@ def batch_generate_executive_summaries(summaries):
|
|
| 93 |
def generate_policy_output(policy_groups):
|
| 94 |
out = []
|
| 95 |
for policy in (
|
| 96 |
-
policy_groups.group_by(["themes", "
|
| 97 |
.agg(pl.col("details"), pl.col("doc_id"))
|
| 98 |
.rows(named=True)
|
| 99 |
):
|
| 100 |
-
logger.info(f"Processing
|
| 101 |
zipped = [
|
| 102 |
f"{bullet} Doc ID: {id}"
|
| 103 |
for (bullet, id) in zip(policy["details"], policy["doc_id"], strict=True)
|
|
@@ -105,20 +66,15 @@ def generate_policy_output(policy_groups):
|
|
| 105 |
try:
|
| 106 |
reduced = policy_chain.invoke(
|
| 107 |
{
|
| 108 |
-
"
|
| 109 |
-
"policy": policy["policies"],
|
| 110 |
"details": zipped,
|
| 111 |
}
|
| 112 |
)
|
| 113 |
out.extend(policy | p for p in reduced.dict()["policies"])
|
| 114 |
except Exception as e:
|
| 115 |
-
logger.error(f"Failed to generate policies for {policy['
|
| 116 |
continue
|
| 117 |
-
return (
|
| 118 |
-
pl.DataFrame(out)
|
| 119 |
-
.group_by(["themes", "policies", "stance"])
|
| 120 |
-
.agg(["detail", "doc_id"])
|
| 121 |
-
)
|
| 122 |
|
| 123 |
|
| 124 |
def generate_final_report(state: OverallState):
|
|
@@ -129,9 +85,13 @@ def generate_final_report(state: OverallState):
|
|
| 129 |
|
| 130 |
|
| 131 |
def final_output(final_docs):
|
| 132 |
-
docs = [doc for doc in final_docs if not doc["failed"]]
|
| 133 |
|
| 134 |
-
failed_docs = [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
docs = add_doc_id(docs)
|
| 136 |
|
| 137 |
policy_groups = extract_policies_from_docs(docs)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import polars as pl
|
| 2 |
|
| 3 |
from planning_ai.chains.policy_chain import policy_chain
|
| 4 |
from planning_ai.chains.reduce_chain import reduce_chain, reduce_chain_final
|
| 5 |
from planning_ai.logging import logger
|
| 6 |
from planning_ai.states import OverallState
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
|
| 9 |
def extract_policies_from_docs(docs):
|
| 10 |
+
policies = {"doc_id": [], "themes": [], "details": [], "stance": []}
|
| 11 |
for doc in docs:
|
| 12 |
+
for policy in doc["themes"]:
|
| 13 |
+
policies["doc_id"].append(doc["doc_id"])
|
| 14 |
+
policies["themes"].append(policy["chapter"])
|
| 15 |
+
policies["details"].append(policy["description"])
|
| 16 |
+
policies["stance"].append(
|
| 17 |
+
doc["document"].metadata["representations_support/object"]
|
| 18 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
return pl.DataFrame(policies)
|
| 20 |
|
| 21 |
|
| 22 |
def add_doc_id(final_docs):
|
| 23 |
out_docs = []
|
| 24 |
for id, doc in enumerate(final_docs):
|
|
|
|
| 25 |
doc["doc_id"] = id
|
| 26 |
out_docs.append(doc)
|
| 27 |
return out_docs
|
|
|
|
| 37 |
list: A list of final responses.
|
| 38 |
"""
|
| 39 |
summaries_text = [
|
| 40 |
+
f"Document ID: {[s['doc_id']]}\n\n{s['summary']}" for s in summaries
|
| 41 |
]
|
| 42 |
final_responses = []
|
| 43 |
batch_size = 50
|
|
|
|
| 54 |
def generate_policy_output(policy_groups):
|
| 55 |
out = []
|
| 56 |
for policy in (
|
| 57 |
+
policy_groups.group_by(["themes", "stance"])
|
| 58 |
.agg(pl.col("details"), pl.col("doc_id"))
|
| 59 |
.rows(named=True)
|
| 60 |
):
|
| 61 |
+
logger.info(f"Processing chapter: {policy['themes']}...")
|
| 62 |
zipped = [
|
| 63 |
f"{bullet} Doc ID: {id}"
|
| 64 |
for (bullet, id) in zip(policy["details"], policy["doc_id"], strict=True)
|
|
|
|
| 66 |
try:
|
| 67 |
reduced = policy_chain.invoke(
|
| 68 |
{
|
| 69 |
+
"chapter": policy["themes"],
|
|
|
|
| 70 |
"details": zipped,
|
| 71 |
}
|
| 72 |
)
|
| 73 |
out.extend(policy | p for p in reduced.dict()["policies"])
|
| 74 |
except Exception as e:
|
| 75 |
+
logger.error(f"Failed to generate policies for {policy['themes']}: {e}")
|
| 76 |
continue
|
| 77 |
+
return pl.DataFrame(out).group_by(["themes", "stance"]).agg(["detail", "doc_id"])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
|
| 80 |
def generate_final_report(state: OverallState):
|
|
|
|
| 85 |
|
| 86 |
|
| 87 |
def final_output(final_docs):
|
| 88 |
+
docs = [doc for doc in final_docs if doc["themes"] and not doc["failed"]]
|
| 89 |
|
| 90 |
+
failed_docs = [
|
| 91 |
+
doc["document"].model_dump()
|
| 92 |
+
for doc in final_docs
|
| 93 |
+
if not doc["themes"] or doc["failed"]
|
| 94 |
+
]
|
| 95 |
docs = add_doc_id(docs)
|
| 96 |
|
| 97 |
policy_groups = extract_policies_from_docs(docs)
|
reports/DOCS/_extensions/nrennie/PrettyPDF/logo.png
ADDED
|
uv.lock
CHANGED
|
@@ -833,18 +833,6 @@ wheels = [
|
|
| 833 |
{ url = "https://files.pythonhosted.org/packages/7b/8f/c4d9bafc34ad7ad5d8dc16dd1347ee0e507a52c3adb6bfa8887e1c6a26ba/executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa", size = 26702 },
|
| 834 |
]
|
| 835 |
|
| 836 |
-
[[package]]
|
| 837 |
-
name = "extra-streamlit-components"
|
| 838 |
-
version = "0.1.71"
|
| 839 |
-
source = { registry = "https://pypi.org/simple" }
|
| 840 |
-
dependencies = [
|
| 841 |
-
{ name = "streamlit" },
|
| 842 |
-
]
|
| 843 |
-
sdist = { url = "https://files.pythonhosted.org/packages/16/a7/580b13af828ef38888196f8b2c03fa97afa89cdb7946438ca5f3271e9a81/extra_streamlit_components-0.1.71.tar.gz", hash = "sha256:d18314cf2ed009f95641882b50aa3bdb11b6a0eb6403fb43dbc8af1722419617", size = 2250093 }
|
| 844 |
-
wheels = [
|
| 845 |
-
{ url = "https://files.pythonhosted.org/packages/25/57/1115e9b974478fac83ba9cd79def8b3770a91b7a9001c46a76491071f2fe/extra_streamlit_components-0.1.71-py3-none-any.whl", hash = "sha256:c8e6f98446adecd3002756362e50d0669693b7673afaa89cebfced6415cc6bd3", size = 4858597 },
|
| 846 |
-
]
|
| 847 |
-
|
| 848 |
[[package]]
|
| 849 |
name = "faker"
|
| 850 |
version = "36.2.2"
|
|
@@ -1043,14 +1031,14 @@ wheels = [
|
|
| 1043 |
|
| 1044 |
[[package]]
|
| 1045 |
name = "googleapis-common-protos"
|
| 1046 |
-
version = "1.69.
|
| 1047 |
source = { registry = "https://pypi.org/simple" }
|
| 1048 |
dependencies = [
|
| 1049 |
{ name = "protobuf" },
|
| 1050 |
]
|
| 1051 |
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
| 1052 |
wheels = [
|
| 1053 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 1054 |
]
|
| 1055 |
|
| 1056 |
[[package]]
|
|
@@ -1834,15 +1822,15 @@ wheels = [
|
|
| 1834 |
|
| 1835 |
[[package]]
|
| 1836 |
name = "langgraph-checkpoint"
|
| 1837 |
-
version = "2.0.
|
| 1838 |
source = { registry = "https://pypi.org/simple" }
|
| 1839 |
dependencies = [
|
| 1840 |
{ name = "langchain-core" },
|
| 1841 |
{ name = "msgpack" },
|
| 1842 |
]
|
| 1843 |
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
| 1844 |
wheels = [
|
| 1845 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 1846 |
]
|
| 1847 |
|
| 1848 |
[[package]]
|
|
@@ -1860,20 +1848,20 @@ wheels = [
|
|
| 1860 |
|
| 1861 |
[[package]]
|
| 1862 |
name = "langgraph-sdk"
|
| 1863 |
-
version = "0.1.
|
| 1864 |
source = { registry = "https://pypi.org/simple" }
|
| 1865 |
dependencies = [
|
| 1866 |
{ name = "httpx" },
|
| 1867 |
{ name = "orjson" },
|
| 1868 |
]
|
| 1869 |
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
| 1870 |
wheels = [
|
| 1871 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 1872 |
]
|
| 1873 |
|
| 1874 |
[[package]]
|
| 1875 |
name = "langsmith"
|
| 1876 |
-
version = "0.3.
|
| 1877 |
source = { registry = "https://pypi.org/simple" }
|
| 1878 |
dependencies = [
|
| 1879 |
{ name = "httpx" },
|
|
@@ -1884,9 +1872,9 @@ dependencies = [
|
|
| 1884 |
{ name = "requests-toolbelt" },
|
| 1885 |
{ name = "zstandard" },
|
| 1886 |
]
|
| 1887 |
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
| 1888 |
wheels = [
|
| 1889 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 1890 |
]
|
| 1891 |
|
| 1892 |
[[package]]
|
|
@@ -2236,11 +2224,11 @@ wheels = [
|
|
| 2236 |
|
| 2237 |
[[package]]
|
| 2238 |
name = "narwhals"
|
| 2239 |
-
version = "1.29.
|
| 2240 |
source = { registry = "https://pypi.org/simple" }
|
| 2241 |
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
| 2242 |
wheels = [
|
| 2243 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 2244 |
]
|
| 2245 |
|
| 2246 |
[[package]]
|
|
@@ -2844,7 +2832,7 @@ wheels = [
|
|
| 2844 |
|
| 2845 |
[[package]]
|
| 2846 |
name = "posthog"
|
| 2847 |
-
version = "3.
|
| 2848 |
source = { registry = "https://pypi.org/simple" }
|
| 2849 |
dependencies = [
|
| 2850 |
{ name = "backoff" },
|
|
@@ -2854,9 +2842,9 @@ dependencies = [
|
|
| 2854 |
{ name = "requests" },
|
| 2855 |
{ name = "six" },
|
| 2856 |
]
|
| 2857 |
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
| 2858 |
wheels = [
|
| 2859 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 2860 |
]
|
| 2861 |
|
| 2862 |
[[package]]
|
|
@@ -3416,11 +3404,11 @@ wheels = [
|
|
| 3416 |
|
| 3417 |
[[package]]
|
| 3418 |
name = "python-json-logger"
|
| 3419 |
-
version = "3.
|
| 3420 |
source = { registry = "https://pypi.org/simple" }
|
| 3421 |
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
| 3422 |
wheels = [
|
| 3423 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 3424 |
]
|
| 3425 |
|
| 3426 |
[[package]]
|
|
@@ -4031,19 +4019,20 @@ wheels = [
|
|
| 4031 |
|
| 4032 |
[[package]]
|
| 4033 |
name = "streamlit-authenticator"
|
| 4034 |
-
version = "0.4.
|
| 4035 |
source = { registry = "https://pypi.org/simple" }
|
| 4036 |
dependencies = [
|
| 4037 |
{ name = "bcrypt" },
|
| 4038 |
{ name = "captcha" },
|
| 4039 |
{ name = "cryptography" },
|
| 4040 |
-
{ name = "extra-streamlit-components" },
|
| 4041 |
{ name = "pyjwt" },
|
| 4042 |
{ name = "pyyaml" },
|
| 4043 |
{ name = "streamlit" },
|
|
|
|
| 4044 |
]
|
|
|
|
| 4045 |
wheels = [
|
| 4046 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 4047 |
]
|
| 4048 |
|
| 4049 |
[[package]]
|
|
@@ -4140,6 +4129,18 @@ wheels = [
|
|
| 4140 |
{ url = "https://files.pythonhosted.org/packages/5e/cf/1dba1380eb3b632f1f86c80533a3fca1376a938517044048122adf816a90/streamlit_image_coordinates-0.1.9-py3-none-any.whl", hash = "sha256:e577d475707ce8a3f7be1825027af6b4d7b609a456f4b25b794756ed2436ab06", size = 7049 },
|
| 4141 |
]
|
| 4142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4143 |
[[package]]
|
| 4144 |
name = "streamlit-keyup"
|
| 4145 |
version = "0.3.0"
|
|
|
|
| 833 |
{ url = "https://files.pythonhosted.org/packages/7b/8f/c4d9bafc34ad7ad5d8dc16dd1347ee0e507a52c3adb6bfa8887e1c6a26ba/executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa", size = 26702 },
|
| 834 |
]
|
| 835 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 836 |
[[package]]
|
| 837 |
name = "faker"
|
| 838 |
version = "36.2.2"
|
|
|
|
| 1031 |
|
| 1032 |
[[package]]
|
| 1033 |
name = "googleapis-common-protos"
|
| 1034 |
+
version = "1.69.1"
|
| 1035 |
source = { registry = "https://pypi.org/simple" }
|
| 1036 |
dependencies = [
|
| 1037 |
{ name = "protobuf" },
|
| 1038 |
]
|
| 1039 |
+
sdist = { url = "https://files.pythonhosted.org/packages/41/4f/d8be74b88621131dfd1ed70e5aff2c47f2bdf2289a70736bbf3eb0e7bc70/googleapis_common_protos-1.69.1.tar.gz", hash = "sha256:e20d2d8dda87da6fe7340afbbdf4f0bcb4c8fae7e6cadf55926c31f946b0b9b1", size = 144514 }
|
| 1040 |
wheels = [
|
| 1041 |
+
{ url = "https://files.pythonhosted.org/packages/16/cb/2f4aa605b16df1e031dd7c322c597613eef933e8dd5b6a4414330b21e791/googleapis_common_protos-1.69.1-py2.py3-none-any.whl", hash = "sha256:4077f27a6900d5946ee5a369fab9c8ded4c0ef1c6e880458ea2f70c14f7b70d5", size = 293229 },
|
| 1042 |
]
|
| 1043 |
|
| 1044 |
[[package]]
|
|
|
|
| 1822 |
|
| 1823 |
[[package]]
|
| 1824 |
name = "langgraph-checkpoint"
|
| 1825 |
+
version = "2.0.17"
|
| 1826 |
source = { registry = "https://pypi.org/simple" }
|
| 1827 |
dependencies = [
|
| 1828 |
{ name = "langchain-core" },
|
| 1829 |
{ name = "msgpack" },
|
| 1830 |
]
|
| 1831 |
+
sdist = { url = "https://files.pythonhosted.org/packages/90/92/f0d6c3e2b2e131c687a9ec87c6e1a430287c430160038e8dfaa4d0db9aab/langgraph_checkpoint-2.0.17.tar.gz", hash = "sha256:255c249f03369c41252f888bc1e1e481bf4fdecf6b3854a39e4935dc34152bc0", size = 34932 }
|
| 1832 |
wheels = [
|
| 1833 |
+
{ url = "https://files.pythonhosted.org/packages/4d/61/35faa34145ddaffc70eb851b85561c96bbad4718ea6f34ef3c717e748c15/langgraph_checkpoint-2.0.17-py3-none-any.whl", hash = "sha256:7da9cd9af41bda5074afef0dcdbd6fa9a050f68beed9d7f80593a253412bf020", size = 39103 },
|
| 1834 |
]
|
| 1835 |
|
| 1836 |
[[package]]
|
|
|
|
| 1848 |
|
| 1849 |
[[package]]
|
| 1850 |
name = "langgraph-sdk"
|
| 1851 |
+
version = "0.1.55"
|
| 1852 |
source = { registry = "https://pypi.org/simple" }
|
| 1853 |
dependencies = [
|
| 1854 |
{ name = "httpx" },
|
| 1855 |
{ name = "orjson" },
|
| 1856 |
]
|
| 1857 |
+
sdist = { url = "https://files.pythonhosted.org/packages/7a/6c/8286151a21124dc0189b57495541c2e3cace317056f60feb04076b438f82/langgraph_sdk-0.1.55.tar.gz", hash = "sha256:89a0240157a27822cc4edd1c9e72bc852e20f5c71165a4c9b91eeffa11fd6a6b", size = 42690 }
|
| 1858 |
wheels = [
|
| 1859 |
+
{ url = "https://files.pythonhosted.org/packages/4e/64/4b75f4b57f0c8f39bdb43aa74b1d2edcdb604b5baa58465ccc54b8b906c5/langgraph_sdk-0.1.55-py3-none-any.whl", hash = "sha256:266e92a558eb738da1ef04c29fbfc2157cd3a977b80905d9509a2cb79331f8fc", size = 45785 },
|
| 1860 |
]
|
| 1861 |
|
| 1862 |
[[package]]
|
| 1863 |
name = "langsmith"
|
| 1864 |
+
version = "0.3.12"
|
| 1865 |
source = { registry = "https://pypi.org/simple" }
|
| 1866 |
dependencies = [
|
| 1867 |
{ name = "httpx" },
|
|
|
|
| 1872 |
{ name = "requests-toolbelt" },
|
| 1873 |
{ name = "zstandard" },
|
| 1874 |
]
|
| 1875 |
+
sdist = { url = "https://files.pythonhosted.org/packages/48/32/0ef5ad579ae096f40fc108b6920e742267a0e9c07d778c1d381586616715/langsmith-0.3.12.tar.gz", hash = "sha256:045b49d0401d0e985d025ff0cf69743ab9a429e309ce5d533eab3c774d004bc2", size = 324149 }
|
| 1876 |
wheels = [
|
| 1877 |
+
{ url = "https://files.pythonhosted.org/packages/9a/92/9702c45974c4dbea978f8af1cfb077677b96f98df12b1638be8eff5ae5ff/langsmith-0.3.12-py3-none-any.whl", hash = "sha256:cf7926bd12d56adbd74a294ebbfc5a34c413172bfbdcd763175cc472b45afbea", size = 335663 },
|
| 1878 |
]
|
| 1879 |
|
| 1880 |
[[package]]
|
|
|
|
| 2224 |
|
| 2225 |
[[package]]
|
| 2226 |
name = "narwhals"
|
| 2227 |
+
version = "1.29.1"
|
| 2228 |
source = { registry = "https://pypi.org/simple" }
|
| 2229 |
+
sdist = { url = "https://files.pythonhosted.org/packages/a7/17/7d35094da0820ae941d8ce51842f253da36c6f95360ea0afabfc18bc02c6/narwhals-1.29.1.tar.gz", hash = "sha256:c408acf09e90c116f247cf34f24a3a89d147e3e235b1d3c708cfd1960baf320a", size = 251464 }
|
| 2230 |
wheels = [
|
| 2231 |
+
{ url = "https://files.pythonhosted.org/packages/f1/22/380df533b08a57bc9013bb5714f33c571e1447828d83213a66adaefc0a04/narwhals-1.29.1-py3-none-any.whl", hash = "sha256:2f68cfbb2562672c4dfa54f158ed8c2828e9920ef784981cd9114e419c444216", size = 308220 },
|
| 2232 |
]
|
| 2233 |
|
| 2234 |
[[package]]
|
|
|
|
| 2832 |
|
| 2833 |
[[package]]
|
| 2834 |
name = "posthog"
|
| 2835 |
+
version = "3.19.0"
|
| 2836 |
source = { registry = "https://pypi.org/simple" }
|
| 2837 |
dependencies = [
|
| 2838 |
{ name = "backoff" },
|
|
|
|
| 2842 |
{ name = "requests" },
|
| 2843 |
{ name = "six" },
|
| 2844 |
]
|
| 2845 |
+
sdist = { url = "https://files.pythonhosted.org/packages/d4/fe/5f785ed1514caa4687738e112716904d4fce8752068f9cad2afaa8207b05/posthog-3.19.0.tar.gz", hash = "sha256:7fe5c9e494fc2cca9baa2bd8074c0844d572df46a54378101bc20eec2776027e", size = 66172 }
|
| 2846 |
wheels = [
|
| 2847 |
+
{ url = "https://files.pythonhosted.org/packages/5c/28/57f95743a17af817ea56f0f3aa280af677884f0d6b59c8edf4c30fc5ac2e/posthog-3.19.0-py2.py3-none-any.whl", hash = "sha256:c294bc0a939e21ecf88d625496f8073cc566c28ec2a917a47d5d32ba33e90a7f", size = 77800 },
|
| 2848 |
]
|
| 2849 |
|
| 2850 |
[[package]]
|
|
|
|
| 3404 |
|
| 3405 |
[[package]]
|
| 3406 |
name = "python-json-logger"
|
| 3407 |
+
version = "3.3.0"
|
| 3408 |
source = { registry = "https://pypi.org/simple" }
|
| 3409 |
+
sdist = { url = "https://files.pythonhosted.org/packages/9e/de/d3144a0bceede957f961e975f3752760fbe390d57fbe194baf709d8f1f7b/python_json_logger-3.3.0.tar.gz", hash = "sha256:12b7e74b17775e7d565129296105bbe3910842d9d0eb083fc83a6a617aa8df84", size = 16642 }
|
| 3410 |
wheels = [
|
| 3411 |
+
{ url = "https://files.pythonhosted.org/packages/08/20/0f2523b9e50a8052bc6a8b732dfc8568abbdc42010aef03a2d750bdab3b2/python_json_logger-3.3.0-py3-none-any.whl", hash = "sha256:dd980fae8cffb24c13caf6e158d3d61c0d6d22342f932cb6e9deedab3d35eec7", size = 15163 },
|
| 3412 |
]
|
| 3413 |
|
| 3414 |
[[package]]
|
|
|
|
| 4019 |
|
| 4020 |
[[package]]
|
| 4021 |
name = "streamlit-authenticator"
|
| 4022 |
+
version = "0.4.3"
|
| 4023 |
source = { registry = "https://pypi.org/simple" }
|
| 4024 |
dependencies = [
|
| 4025 |
{ name = "bcrypt" },
|
| 4026 |
{ name = "captcha" },
|
| 4027 |
{ name = "cryptography" },
|
|
|
|
| 4028 |
{ name = "pyjwt" },
|
| 4029 |
{ name = "pyyaml" },
|
| 4030 |
{ name = "streamlit" },
|
| 4031 |
+
{ name = "streamlit-javascript" },
|
| 4032 |
]
|
| 4033 |
+
sdist = { url = "https://files.pythonhosted.org/packages/bf/6d/7c59285447bdb18c03e37888f8d1f593e241f4af275c36e46fddced8590a/streamlit_authenticator-0.4.3.tar.gz", hash = "sha256:d05dd8656bdff0b4881b40c36108bf068e7aad8424c20cf87c9e657aac8d4a92", size = 39752 }
|
| 4034 |
wheels = [
|
| 4035 |
+
{ url = "https://files.pythonhosted.org/packages/ed/5a/039d6303526306740d214a8edd360a465a1710491b3cb0284799bbd7131c/streamlit_authenticator-0.4.3-py3-none-any.whl", hash = "sha256:1f7347d480cf71c76bb915cff0a62516d087115ad1e72f99708b944705baf78d", size = 43196 },
|
| 4036 |
]
|
| 4037 |
|
| 4038 |
[[package]]
|
|
|
|
| 4129 |
{ url = "https://files.pythonhosted.org/packages/5e/cf/1dba1380eb3b632f1f86c80533a3fca1376a938517044048122adf816a90/streamlit_image_coordinates-0.1.9-py3-none-any.whl", hash = "sha256:e577d475707ce8a3f7be1825027af6b4d7b609a456f4b25b794756ed2436ab06", size = 7049 },
|
| 4130 |
]
|
| 4131 |
|
| 4132 |
+
[[package]]
|
| 4133 |
+
name = "streamlit-javascript"
|
| 4134 |
+
version = "0.1.5"
|
| 4135 |
+
source = { registry = "https://pypi.org/simple" }
|
| 4136 |
+
dependencies = [
|
| 4137 |
+
{ name = "streamlit" },
|
| 4138 |
+
]
|
| 4139 |
+
sdist = { url = "https://files.pythonhosted.org/packages/a5/69/723ebc7a58057b1e6f54d0c23c86488054d756e0e470daf4db703fe02a63/streamlit-javascript-0.1.5.tar.gz", hash = "sha256:9da5176522a0acf2c39d3b0bec9f856fdd8ea3c70bb1066841a546ab1348ae1d", size = 512374 }
|
| 4140 |
+
wheels = [
|
| 4141 |
+
{ url = "https://files.pythonhosted.org/packages/87/81/0c9e9e4d2dab97224efe105ba44f47b259f37d8e7673b94f5b5523fe2c8f/streamlit_javascript-0.1.5-py3-none-any.whl", hash = "sha256:36ca4d8c46fd5b6526d1a705530472e03cb6e5bb24694330649f21d5c436d280", size = 518387 },
|
| 4142 |
+
]
|
| 4143 |
+
|
| 4144 |
[[package]]
|
| 4145 |
name = "streamlit-keyup"
|
| 4146 |
version = "0.3.0"
|