cjber commited on
Commit
2d44ef6
·
1 Parent(s): 5767260

fix: allow for specifying chapters

Browse files
.streamlit/config.toml CHANGED
@@ -3,4 +3,3 @@ primaryColor="#0A3D91"
3
  backgroundColor="#f0f0f5"
4
  secondaryBackgroundColor="#e0e0ef"
5
  textColor="#262730"
6
- font="sans serif"
 
3
  backgroundColor="#f0f0f5"
4
  secondaryBackgroundColor="#e0e0ef"
5
  textColor="#262730"
 
app.py CHANGED
@@ -1,11 +1,14 @@
1
  import base64
 
2
  import time
3
  from os import getenv
4
 
5
  import polars as pl
6
  import py7zr
 
7
  import streamlit as st
8
  import streamlit_authenticator as stauth
 
9
  from streamlit_extras.stylable_container import stylable_container
10
 
11
  from planning_ai.common.utils import Paths
@@ -150,16 +153,10 @@ authenticator = stauth.Authenticate(
150
  UPLOAD_DIR = Paths.RAW / "gcpt3"
151
 
152
 
153
- def handle_authentication():
154
- """Handle user authentication."""
155
- try:
156
- authenticator.login()
157
- except Exception as e:
158
- st.error(e)
159
-
160
-
161
  def initialize_session_state():
162
  """Initialize session state variables."""
 
 
163
  if "files_extracted" not in st.session_state:
164
  st.session_state["files_extracted"] = False
165
  if "completed" not in st.session_state:
@@ -170,6 +167,72 @@ def initialize_session_state():
170
  st.session_state["end_time"] = None
171
 
172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  def upload_and_extract_files():
174
  """Handle file upload and extraction."""
175
  main1, main2 = st.columns(2)
@@ -185,11 +248,7 @@ def upload_and_extract_files():
185
  2. **Executive Report** documents contain first an executive summary of the key points extracted from response documents, following this, a **Profile of Submissions** plots the demographic and geographic distribution of responses. Finally this document details **Themes and Policies**, where key themes and policies by response are highlighted, with notable information from responses bullet-pointed. This document contains inline citations, which relate back to the numbers associated with responses in the **Representation Summary Documents**. Citations are included to allow readers to manually verify the claims and points made by the AI model.
186
  """
187
  )
188
- st.write("---")
189
- st.title("Select Document Type")
190
- doc_type = st.selectbox(
191
- "Select the type of document:", ["Themes & Policies", "SPT"]
192
- )
193
  with main2:
194
  st.title("Upload JDi files")
195
  st.write(
@@ -243,10 +302,9 @@ def upload_and_extract_files():
243
  )
244
  except Exception as e:
245
  st.error(f"Failed to extract files {e}")
246
- return doc_type
247
 
248
 
249
- def build_report(doc_type):
250
  """Build the report from extracted files."""
251
  # Remove old files
252
  _ = [file.unlink() for file in (Paths.OUT / "summaries").rglob("*.pdf")]
@@ -277,7 +335,7 @@ def build_report(doc_type):
277
  except Exception as e:
278
  st.error(f"An error occurred during PDF text extraction: {e}")
279
  with st.spinner("Building report...", show_time=True):
280
- report_main(doc_type=doc_type)
281
  st.session_state["end_time"] = time.time()
282
  st.session_state["completed"] = True
283
  total_time = (
@@ -319,10 +377,10 @@ def display_download_buttons():
319
  with st.expander("**Executive Reports**"):
320
  for i, rep in enumerate(representations_documents):
321
  summaries_pdf_path = (
322
- Paths.SUMMARY / f"Summary_of_Submitted_Representations-{rep}.pdf"
323
  )
324
  summaries_docx_path = (
325
- Paths.SUMMARY / f"Summary_of_Submitted_Representations-{rep}.docx"
326
  )
327
  with st.container():
328
  st.subheader(f"Executive Report for {rep}")
@@ -332,7 +390,7 @@ def display_download_buttons():
332
  st.download_button(
333
  label="Download PDF Version",
334
  data=pdf_file,
335
- file_name=f"Summary_of_Submitted_Representations-{rep}.pdf",
336
  mime="application/pdf",
337
  use_container_width=True,
338
  key=f"exec_pdf_{i}_{hash(rep)}",
@@ -343,7 +401,7 @@ def display_download_buttons():
343
  st.download_button(
344
  label="Download DOCX Version",
345
  data=docx_file,
346
- file_name=f"Summary_of_Submitted_Representations-{rep}.docx",
347
  mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
348
  use_container_width=True,
349
  key=f"exec_docx_{i}_{hash(rep)}",
@@ -355,8 +413,12 @@ def display_download_buttons():
355
  # Create a container for the Representation Summaries
356
  with st.expander("**Representation Summaries**"):
357
  for i, rep in enumerate(representations_documents):
358
- report_pdf_path = Paths.SUMMARY / f"Summary_Documents-{rep}.pdf"
359
- report_docx_path = Paths.SUMMARY / f"Summary_Documents-{rep}.docx"
 
 
 
 
360
  with st.container():
361
  st.subheader(f"Representation Summary for {rep}")
362
  col1, col2 = st.columns(2)
@@ -365,7 +427,7 @@ def display_download_buttons():
365
  st.download_button(
366
  label="Download PDF Version",
367
  data=pdf_file,
368
- file_name=f"Summary_Documents-{rep}.pdf",
369
  mime="application/pdf",
370
  use_container_width=True,
371
  key=f"rep_pdf_{i}_{hash(rep)}",
@@ -376,7 +438,7 @@ def display_download_buttons():
376
  st.download_button(
377
  label="Download DOCX Version",
378
  data=docx_file,
379
- file_name=f"Summary_Documents-{rep}.docx",
380
  mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
381
  use_container_width=True,
382
  key=f"rep_docx_{i}_{hash(rep)}",
@@ -387,13 +449,16 @@ def display_download_buttons():
387
 
388
 
389
  def reset_session():
390
- st.session_state["completed"] = False
391
  st.session_state["files_extracted"] = False
 
 
 
392
 
393
 
394
  def main():
395
  """Main function to run the Streamlit app."""
396
- handle_authentication()
397
  initialize_session_state()
398
 
399
  # Handle authentication states
@@ -407,28 +472,32 @@ def main():
407
  reset_session()
408
  return
409
 
410
- # Authenticated user flow
411
- with stylable_container(
412
- key="Logout",
413
- css_styles="""
414
- button {
415
- float: right;
416
- }
417
- """,
418
- ):
419
- authenticator.logout() # show logout button
 
 
 
 
420
 
421
- # Step 1: Upload and extract files
422
- if not st.session_state["files_extracted"]:
423
- doc_type = upload_and_extract_files()
424
 
425
- # Step 2: Build report if files are ready
426
- if st.session_state["files_extracted"]:
427
- build_report(doc_type)
428
 
429
- # Step 3: Show download buttons when complete
430
- if st.session_state["completed"]:
431
- display_download_buttons()
432
 
433
 
434
  if __name__ == "__main__":
 
1
  import base64
2
+ import re
3
  import time
4
  from os import getenv
5
 
6
  import polars as pl
7
  import py7zr
8
+ import requests
9
  import streamlit as st
10
  import streamlit_authenticator as stauth
11
+ from bs4 import BeautifulSoup
12
  from streamlit_extras.stylable_container import stylable_container
13
 
14
  from planning_ai.common.utils import Paths
 
153
  UPLOAD_DIR = Paths.RAW / "gcpt3"
154
 
155
 
 
 
 
 
 
 
 
 
156
  def initialize_session_state():
157
  """Initialize session state variables."""
158
+ if "chapters" not in st.session_state:
159
+ st.session_state["chapters"] = False
160
  if "files_extracted" not in st.session_state:
161
  st.session_state["files_extracted"] = False
162
  if "completed" not in st.session_state:
 
167
  st.session_state["end_time"] = None
168
 
169
 
170
+ def get_chapters(consultation_url: str):
171
+ if not consultation_url:
172
+ return "None", ["None"]
173
+ response = requests.get(consultation_url)
174
+ if not response.ok:
175
+ st.error("Failed to fetch consultation document")
176
+ return "", []
177
+ soup = BeautifulSoup(response.text, "html.parser")
178
+ h2_tags = soup.find_all("h2")
179
+
180
+ if not len(h2_tags) >= 2:
181
+ st.error("Invalid page format - not enough <h2> headers")
182
+ return "", []
183
+
184
+ first_h2 = h2_tags[0]
185
+ second_h2 = h2_tags[1]
186
+
187
+ # Collect links between the first and second <h2>
188
+ links_between = []
189
+ for sibling in first_h2.find_all_next():
190
+ if sibling == second_h2: # Stop when reaching the second <h2>
191
+ break
192
+ if sibling.name == "a": # If it's a link
193
+ link_text = sibling.text.strip()
194
+ if link_text:
195
+ links_between.append(link_text)
196
+ cleaned_links = [re.sub(r"\s*\(.*?\)$", "", link) for link in links_between]
197
+ cleaned_title = first_h2.text.strip()
198
+ return cleaned_title, cleaned_links
199
+
200
+
201
+ def specify_chapters():
202
+ st.title("Specify Chapters")
203
+ st.write(
204
+ "Please specify the Consultation Document URL from the Consultation Hub. This will autopopulate the chapter headings for the final document. \n\n**Please ensure that the final chapter headings are correct.**"
205
+ )
206
+
207
+ chapters = []
208
+ consultation_url = st.text_input(
209
+ "Consultation Document URL",
210
+ key="consultation_url",
211
+ placeholder="https://oc2.greatercambridgeplanning.org/document/1314",
212
+ )
213
+ title, chapters = get_chapters(consultation_url)
214
+ st.write(f"**Title:** {title}")
215
+ st.write("**Chapters:**", "\n- " + "\n- ".join(chapters))
216
+ st.write(
217
+ "**If the chapter headings are incorrect, please add them manually below, separated by commas.**"
218
+ )
219
+ chapters = st.text_input(
220
+ "Chapter Headings",
221
+ key="chapter_headings",
222
+ placeholder=", ".join(chapters),
223
+ value=", ".join(chapters),
224
+ )
225
+ chapters = [chapter.strip() for chapter in chapters.split(",")]
226
+ with open(Paths.RAW / "chapters.txt", "w") as f:
227
+ f.write("\n".join(chapters))
228
+ with open(Paths.RAW / "title.txt", "w") as f:
229
+ f.write(title)
230
+
231
+ st.button(
232
+ "Save Chapters", on_click=lambda: st.session_state.update({"chapters": True})
233
+ )
234
+
235
+
236
  def upload_and_extract_files():
237
  """Handle file upload and extraction."""
238
  main1, main2 = st.columns(2)
 
248
  2. **Executive Report** documents contain first an executive summary of the key points extracted from response documents, following this, a **Profile of Submissions** plots the demographic and geographic distribution of responses. Finally this document details **Themes and Policies**, where key themes and policies by response are highlighted, with notable information from responses bullet-pointed. This document contains inline citations, which relate back to the numbers associated with responses in the **Representation Summary Documents**. Citations are included to allow readers to manually verify the claims and points made by the AI model.
249
  """
250
  )
251
+
 
 
 
 
252
  with main2:
253
  st.title("Upload JDi files")
254
  st.write(
 
302
  )
303
  except Exception as e:
304
  st.error(f"Failed to extract files {e}")
 
305
 
306
 
307
+ def build_report():
308
  """Build the report from extracted files."""
309
  # Remove old files
310
  _ = [file.unlink() for file in (Paths.OUT / "summaries").rglob("*.pdf")]
 
335
  except Exception as e:
336
  st.error(f"An error occurred during PDF text extraction: {e}")
337
  with st.spinner("Building report...", show_time=True):
338
+ report_main()
339
  st.session_state["end_time"] = time.time()
340
  st.session_state["completed"] = True
341
  total_time = (
 
377
  with st.expander("**Executive Reports**"):
378
  for i, rep in enumerate(representations_documents):
379
  summaries_pdf_path = (
380
+ Paths.SUMMARY / f"Overview_of_Public_Submissions-{rep}.pdf"
381
  )
382
  summaries_docx_path = (
383
+ Paths.SUMMARY / f"Overview_of_Public_Submissions-{rep}.docx"
384
  )
385
  with st.container():
386
  st.subheader(f"Executive Report for {rep}")
 
390
  st.download_button(
391
  label="Download PDF Version",
392
  data=pdf_file,
393
+ file_name=f"Overview_of_Public_Submissions-{rep}.pdf",
394
  mime="application/pdf",
395
  use_container_width=True,
396
  key=f"exec_pdf_{i}_{hash(rep)}",
 
401
  st.download_button(
402
  label="Download DOCX Version",
403
  data=docx_file,
404
+ file_name=f"Overview_of_Public_Submissions-{rep}.docx",
405
  mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
406
  use_container_width=True,
407
  key=f"exec_docx_{i}_{hash(rep)}",
 
413
  # Create a container for the Representation Summaries
414
  with st.expander("**Representation Summaries**"):
415
  for i, rep in enumerate(representations_documents):
416
+ report_pdf_path = (
417
+ Paths.SUMMARY / f"Summaries_of_Public_Submissions-{rep}.pdf"
418
+ )
419
+ report_docx_path = (
420
+ Paths.SUMMARY / f"Summaries_of_Public_Submissions-{rep}.docx"
421
+ )
422
  with st.container():
423
  st.subheader(f"Representation Summary for {rep}")
424
  col1, col2 = st.columns(2)
 
427
  st.download_button(
428
  label="Download PDF Version",
429
  data=pdf_file,
430
+ file_name=f"Summaries_of_Public_Submissions-{rep}.pdf",
431
  mime="application/pdf",
432
  use_container_width=True,
433
  key=f"rep_pdf_{i}_{hash(rep)}",
 
438
  st.download_button(
439
  label="Download DOCX Version",
440
  data=docx_file,
441
+ file_name=f"Summaries_of_Public_Submissions-{rep}.docx",
442
  mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
443
  use_container_width=True,
444
  key=f"rep_docx_{i}_{hash(rep)}",
 
449
 
450
 
451
  def reset_session():
452
+ st.session_state["chapters"] = False
453
  st.session_state["files_extracted"] = False
454
+ st.session_state["completed"] = False
455
+ st.session_state["start_time"] = None
456
+ st.session_state["end_time"] = None
457
 
458
 
459
  def main():
460
  """Main function to run the Streamlit app."""
461
+ authenticator.login()
462
  initialize_session_state()
463
 
464
  # Handle authentication states
 
472
  reset_session()
473
  return
474
 
475
+ if st.session_state["authentication_status"]:
476
+ with stylable_container(
477
+ key="Logout",
478
+ css_styles="""
479
+ button {
480
+ float: right;
481
+ }
482
+ """,
483
+ ):
484
+ authenticator.logout() # show logout button
485
+
486
+ # Step 1: Specify chapters
487
+ if not st.session_state["chapters"]:
488
+ specify_chapters()
489
 
490
+ # Step 2: Upload and extract files
491
+ if not st.session_state["files_extracted"] and st.session_state["chapters"]:
492
+ upload_and_extract_files()
493
 
494
+ # Step 3: Build report if files are ready
495
+ if st.session_state["files_extracted"]:
496
+ build_report()
497
 
498
+ # Step 4: Show download buttons when complete
499
+ if st.session_state["completed"]:
500
+ display_download_buttons()
501
 
502
 
503
  if __name__ == "__main__":
data/covers/Overview_of_Public_Submissions.docx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d25f4fe1da660ebcb4655a27b9a8bca9a1dab73962900f858309a683b8fbc58d
3
+ size 1595802
data/covers/{cover2 1.pdf → Overview_of_Public_Submissions.pdf} RENAMED
Binary files a/data/covers/cover2 1.pdf and b/data/covers/Overview_of_Public_Submissions.pdf differ
 
data/covers/Summaries_of_Public_Submissions.docx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6bc5191e10d13f15337f649b829c6f210bdda541bcde3ad4fe05d63f099a5b0
3
+ size 1595689
data/covers/{cover_summary_responses.pdf → Summaries_of_Public_Submissions.pdf} RENAMED
Binary files a/data/covers/cover_summary_responses.pdf and b/data/covers/Summaries_of_Public_Submissions.pdf differ
 
data/covers/reference.docx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b4999b22cc124005ceb7adfc0ade6977005841b9af2a5c3dea717ae6b3fafe0
3
+ size 5057
data/raw/chapters.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Chapter 1: Introduction and purpose
2
+ Chapter 2: Ambitions for the Campus and development to date
3
+ Chapter 3: Site context
4
+ Chapter 4: Cambridge Biomedical Campus development principles
5
+ Chapter 5: Obligations and mitigation
data/raw/title.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Draft Cambridge Biomedical Campus Supplementary Planning Document
logo.png ADDED
planning_ai/chains/fix_chain.py CHANGED
@@ -1,9 +1,15 @@
1
- from planning_ai.chains.map_chain import create_dynamic_map_chain
 
 
2
  from planning_ai.common.utils import Paths
 
3
 
4
  with open(Paths.PROMPTS / "fix_hallucination.txt", "r") as f:
5
  fix_template = f.read()
6
 
 
 
 
7
  if __name__ == "__main__":
8
  test_document = """
9
  The Local Plan proposes a mass development north-west of Cambridge despite marked growth
@@ -12,7 +18,6 @@ if __name__ == "__main__":
12
  Papworth Everard has grown beyond recognition. This in itself is a matter of concern.
13
  """
14
  test_themes = {"Great Places", "Homes", "Climate Change"}
15
- fix_chain = create_dynamic_map_chain(test_themes, fix_template)
16
  result = fix_chain.invoke(
17
  {
18
  "summary": "This plan is great because they are building a nuclear power plant.",
@@ -20,4 +25,3 @@ if __name__ == "__main__":
20
  "context": test_document,
21
  }
22
  )
23
- __import__("pprint").pprint(dict(result))
 
1
+ from langchain_core.output_parsers import StrOutputParser
2
+ from langchain_core.prompts import ChatPromptTemplate
3
+
4
  from planning_ai.common.utils import Paths
5
+ from planning_ai.llms.llm import GPT4o
6
 
7
  with open(Paths.PROMPTS / "fix_hallucination.txt", "r") as f:
8
  fix_template = f.read()
9
 
10
+ fix_prompt = ChatPromptTemplate.from_messages([("system", fix_template)])
11
+ fix_chain = fix_prompt | GPT4o | StrOutputParser()
12
+
13
  if __name__ == "__main__":
14
  test_document = """
15
  The Local Plan proposes a mass development north-west of Cambridge despite marked growth
 
18
  Papworth Everard has grown beyond recognition. This in itself is a matter of concern.
19
  """
20
  test_themes = {"Great Places", "Homes", "Climate Change"}
 
21
  result = fix_chain.invoke(
22
  {
23
  "summary": "This plan is great because they are building a nuclear power plant.",
 
25
  "context": test_document,
26
  }
27
  )
 
planning_ai/chains/map_chain.py CHANGED
@@ -1,77 +1,14 @@
1
- from enum import Enum, auto
2
- from typing import Optional, Type
3
-
4
  from langchain_core.prompts import ChatPromptTemplate
5
- from pydantic import BaseModel, create_model
6
 
7
  from planning_ai.common.utils import Paths
8
  from planning_ai.llms.llm import GPT4o
9
- from planning_ai.themes import THEMES_AND_POLICIES
10
 
11
  with open(Paths.PROMPTS / "map.txt", "r") as f:
12
  map_template = f.read()
13
 
14
-
15
- def create_policy_enum(
16
- policy_groups: list[str], name: str = "DynamicPolicyEnum"
17
- ) -> Enum:
18
- """
19
- Create a dynamic enum for policies based on the given policy groups.
20
-
21
- Args:
22
- policy_groups (list[str]): A set of policy group names.
23
- name (str): Name of the enum to be created.
24
-
25
- Returns:
26
- Type[Enum]: A dynamically created Enum class for the policies.
27
- """
28
- return Enum(name, {policy: auto() for policy in policy_groups})
29
-
30
-
31
- def create_brief_summary_model(policy_enum: Enum) -> Type[BaseModel]:
32
- """
33
- Dynamically create a BriefSummary model using the provided policy enum.
34
-
35
- Args:
36
- policy_enum (Type[Enum]): The dynamically created policy enum.
37
-
38
- Returns:
39
- Type[BaseModel]: A dynamically generated Pydantic model for BriefSummary.
40
- """
41
-
42
- class Policy(BaseModel):
43
- policy: policy_enum
44
- note: str
45
-
46
- return create_model(
47
- "DynamicBriefSummary",
48
- summary=(str, ...),
49
- policies=(Optional[list[Policy]], ...),
50
- __module__=__name__,
51
- __config__={"extra": "forbid"},
52
- )
53
-
54
-
55
- def create_dynamic_map_chain(themes, prompt: str, doc_type: str):
56
-
57
- policy_groups = []
58
- for theme in themes:
59
- if theme in THEMES_AND_POLICIES:
60
- policy_groups.extend(THEMES_AND_POLICIES[theme])
61
-
62
- PolicyEnum = create_policy_enum(policy_groups)
63
- DynamicBriefSummary = create_brief_summary_model(PolicyEnum)
64
-
65
- SLLM = GPT4o.with_structured_output(DynamicBriefSummary, strict=True)
66
-
67
- prompt = (
68
- f"{prompt}\n\nAvailable Policies:\n\n- "
69
- + "\n- ".join(policy_groups)
70
- + "\n\nContext:\n\n{context}"
71
- )
72
- map_prompt = ChatPromptTemplate.from_messages([("system", prompt)])
73
- return map_prompt | SLLM
74
-
75
 
76
  if __name__ == "__main__":
77
  test_document = """
@@ -80,8 +17,5 @@ if __name__ == "__main__":
80
  the major settlement of Cambourne has been created - now over the projected 3,000 homes and
81
  Papworth Everard has grown beyond recognition. This in itself is a matter of concern.
82
  """
83
- test_themes = {"Homes", "Great Places"}
84
 
85
- dynamic_map_chain = create_dynamic_map_chain(test_themes, prompt=map_template)
86
- result = dynamic_map_chain.invoke({"context": test_document, "themes": test_themes})
87
- __import__("pprint").pprint(dict(result))
 
1
+ from langchain_core.output_parsers import StrOutputParser
 
 
2
  from langchain_core.prompts import ChatPromptTemplate
 
3
 
4
  from planning_ai.common.utils import Paths
5
  from planning_ai.llms.llm import GPT4o
 
6
 
7
  with open(Paths.PROMPTS / "map.txt", "r") as f:
8
  map_template = f.read()
9
 
10
+ map_prompt = ChatPromptTemplate.from_messages([("system", map_template)])
11
+ map_chain = map_prompt | GPT4o | StrOutputParser()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  if __name__ == "__main__":
14
  test_document = """
 
17
  the major settlement of Cambourne has been created - now over the projected 3,000 homes and
18
  Papworth Everard has grown beyond recognition. This in itself is a matter of concern.
19
  """
 
20
 
21
+ result = map_chain.invoke({"context": test_document})
 
 
planning_ai/chains/policy_chain.py CHANGED
@@ -1,7 +1,7 @@
1
  from typing import Optional
2
 
3
  from langchain_core.prompts import ChatPromptTemplate
4
- from pydantic import BaseModel
5
 
6
  from planning_ai.common.utils import Paths
7
  from planning_ai.llms.llm import GPT4o
@@ -13,8 +13,8 @@ with open(Paths.PROMPTS / "policy.txt", "r") as f:
13
  class Policy(BaseModel):
14
  """Return condensed details and their associated doc_ids"""
15
 
16
- detail: str
17
- doc_id: list[int]
18
 
19
 
20
  class PolicyList(BaseModel):
@@ -37,7 +37,5 @@ if __name__ == "__main__":
37
  ]
38
  test_docids = [1, 13, 21]
39
 
40
- result = policy_chain.invoke(
41
- {"theme": "Climate Change", "policy": test_policy, "details": test_bullet}
42
- )
43
  print(result)
 
1
  from typing import Optional
2
 
3
  from langchain_core.prompts import ChatPromptTemplate
4
+ from pydantic import BaseModel, Field
5
 
6
  from planning_ai.common.utils import Paths
7
  from planning_ai.llms.llm import GPT4o
 
13
  class Policy(BaseModel):
14
  """Return condensed details and their associated doc_ids"""
15
 
16
+ detail: str = Field(description="The policy detail")
17
+ doc_id: list[int] = Field(description="The associated doc_ids")
18
 
19
 
20
  class PolicyList(BaseModel):
 
37
  ]
38
  test_docids = [1, 13, 21]
39
 
40
+ result = policy_chain.invoke({"chapter": "Climate Change", "details": test_bullet})
 
 
41
  print(result)
planning_ai/chains/prompts/chapters.txt ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Please identify any relevant topics that accurately relate to the provided representation. For each topic, provide a relevance score between **0 (not relevant)** and **5 (highly relevant)** based on how strongly the topic is present or connected in the document. You may select none, one, or multiple topics as applicable.
2
+
3
+ ---
4
+
5
+ ### **Representation Content:**
6
+
7
+ {document}
8
+
9
+ ---
10
+
11
+ ### **Key Guidelines:**
12
+ - **0 (Not Relevant)**: The topic is **not present** or does not apply to the representation.
13
+ - **1-2 (Low Relevance)**: The topic is **mentioned briefly** but without substantial impact or significance to the representation's key messages.
14
+ - **3 (Moderate Relevance)**: The topic is **discussed** with some importance, but it may not be a central focus.
15
+ - **4 (High Relevance)**: The topic is **significantly discussed** and closely aligns with the main ideas or objectives of the representation.
16
+ - **5 (Highly Relevant)**: The topic is **central** to the representation and its message, making it crucial for understanding the overall content.
17
+
18
+ ---
19
+
20
+ ### **Task:**
21
+
22
+ 1. **Topic identification**: Identify related topics in the document from those provided. Focus on capturing topics that are explicitly mentioned or strongly implied. Avoid inferring new topics beyond those stated. Select **only** relevant topics, do not include those that are only to the content indirectly.
23
+
24
+ 2. **Topic scores**: For each identified topic attribute a score denoting the relevance based on the guidelines provided. Ensure the score aligns with the relevance of the topic within the document.
25
+
26
+ 3. **Topic Notes**: For each identified topic, state information from the representation that **directly** relates to it. Ensure the **full** context is retained, so the section can be understood independently. Topic notes may overlap. If a note does not have a clear link to the topic, omit both the topic and the note.
planning_ai/chains/prompts/map.txt CHANGED
@@ -1,16 +1,9 @@
1
- You have been provided with a response to a policy document, known as a representation. The representation highlights key points with respect to the policy document and provides feedback. The policy document contains a set of policies some of which will be relevant to the representation Your task is to identify these policies and extract relevant sections from the representation that correspond to them.
2
 
3
- Your tasks are as follows:
4
-
5
- 1. **Summary**: Provide a concise summary of the representation, highlighting the main points and any significant details. If the representation makes direct reference to the content of the policy document, please include a reference to the relevant section.
6
-
7
- 2. **Policy Identification**: Carefully review the representation and identify all relevant policies from the provided list. Focus on capturing policies that are explicitly mentioned or strongly implied. Avoid inferring new policies beyond those stated. Select **only** relevant policies, do not include those that are only to the content indirectly.
8
-
9
- 3. **Policy Notes**: For each identified policy, state information from the representation that **directly** relates to it. Ensure the **full** context is retained, so the section can be understood independently. Policy notes may overlap. If a note does not have a clear link to the policy, omit both the policy and the note.
10
-
11
- Your output must be formatted in valid JSON as specified. Ensure clarity and accuracy in your extraction process.
12
 
13
  **Always use British English**
14
 
15
- Select policies from the provided list using their exact names only:
16
 
 
 
1
+ You have been provided with a response to a policy document, known as a representation. The representation highlights key points with respect to the policy document and provides feedback.
2
 
3
+ Provide a concise summary of the representation, highlighting the main points and any significant details. If the representation makes direct reference to the content of the policy document, please include a reference to the relevant section. If the document does not provide any information, or refers only to a document that you do not have access to, state this, and do not make assumptions.
 
 
 
 
 
 
 
 
4
 
5
  **Always use British English**
6
 
7
+ Response:
8
 
9
+ {context}
planning_ai/chains/prompts/policy.txt CHANGED
@@ -1,13 +1,13 @@
1
- You are tasked with refining a list of details related to a specific planning policy and theme. Your goal is to:
2
 
3
  1. Extract and emphasise the core action or idea from each detail.
4
- 2. Remove any non-essential context, such as the policy name or irrelevant details, along with their associated document IDs.
5
  3. Combine details that convey **identical** points into a single, concise point, merging their related document IDs.
6
- 4. Exclude any details that do not pertain to the policy **and** theme provided.
7
 
8
- It is most important to ensure that all information contained within the final details are clearly related to their associated policy. The removal of too much contextual information may result in bullet points that do not clearly relate with the associated policy. Do not be afraid to omit details and citations that do not relate with the provided theme and policy. Do not attempt to find a tangible link, it is likely that there may be none.
9
 
10
- Ensure that all returned details use proper sentence structure. Only include document IDs within the 'doc_id' JSON attribute; **not** in the 'details' output.
11
 
12
  **Always use British English**
13
 
@@ -15,9 +15,7 @@ Ensure that all returned details use proper sentence structure. Only include doc
15
 
16
  **Provided information**
17
 
18
- Theme: {theme}
19
-
20
- Policy: {policy}
21
 
22
  Details:
23
 
 
1
+ You are tasked with refining a list of details relating to a specific topic in a policy document. Your goal is to:
2
 
3
  1. Extract and emphasise the core action or idea from each detail.
4
+ 2. Remove any non-essential context, such as the topic name or irrelevant details, along with their associated document IDs.
5
  3. Combine details that convey **identical** points into a single, concise point, merging their related document IDs.
6
+ 4. Exclude any details that do not pertain to the chapter provided.
7
 
8
+ It is most important to ensure that all information contained within the final details are clearly related to their associated topic. The removal of too much contextual information may result in bullet points that do not clearly relate with the associated policy. Do not be afraid to omit details and citations that do not relate with the provided topic. Do not attempt to find a tangible link, it is likely that there may be none.
9
 
10
+ You must return the **details** and **doc_ids** separately. Do **not** include document IDs within the **details** text.
11
 
12
  **Always use British English**
13
 
 
15
 
16
  **Provided information**
17
 
18
+ Topic: {chapter}
 
 
19
 
20
  Details:
21
 
planning_ai/chains/prompts/themes.txt DELETED
@@ -1,40 +0,0 @@
1
- Please identify any relevant themes from the list below that accurately relate to the document. For each theme, provide a relevance score between **0 (not relevant)** and **5 (highly relevant)** based on how strongly the theme is present or connected in the document. You may select none, one, or multiple themes as applicable.
2
-
3
- ---
4
-
5
- ### **Available Themes:**
6
-
7
- **Climate change:** Help Cambridge transition to net zero carbon by 2050, by ensuring that development is sited in places that help to limit carbon emissions, is designed to the highest achievable standards for energy and water use, and is resilient to current and future climate risks.
8
-
9
- **Biodiversity and green spaces:** Increase and improve our network of habitats for wildlife, and green spaces for people, ensuring that development leaves the natural environment better than it was before.
10
-
11
- **Wellbeing and social inclusion:** Help people in Greater Cambridge to lead healthier and happier lives, ensuring that everyone benefits from the development of new homes and jobs.
12
-
13
- **Great places:** Sustain the unique character of Cambridge and South Cambridgeshire, and complement it with beautiful and distinctive development, creating a place where people want to live, work and play.
14
-
15
- **Jobs:** Encourage a flourishing and mixed economy in Greater Cambridge which includes a wide range of jobs, while maintaining our area's global reputation for innovation.
16
-
17
- **Homes:** Plan for enough housing to meet our needs, including significant quantities of housing that is affordable to rent and buy, and different kinds of homes to suit our diverse communities.
18
-
19
- **Infrastructure:** Plan for transport, water, energy and digital networks; and health, education and cultural facilities; in the right places and built at the right times to serve our growing communities.
20
-
21
- ---
22
-
23
- ### **Document Content:**
24
-
25
- {document}
26
-
27
- ---
28
-
29
- ### **Key Guidelines:**
30
- - **0 (Not Relevant)**: The theme is **not present** or does not apply to the document.
31
- - **1-2 (Low Relevance)**: The theme is **mentioned briefly** but without substantial impact or significance to the document's key messages.
32
- - **3 (Moderate Relevance)**: The theme is **discussed** with some importance, but it may not be a central focus.
33
- - **4 (High Relevance)**: The theme is **significantly discussed** and closely aligns with the main ideas or objectives of the document.
34
- - **5 (Highly Relevant)**: The theme is **central** to the document and its message, making it crucial for understanding the overall content.
35
-
36
- ---
37
-
38
- ### **Task:**
39
-
40
- For each theme, assess the relevance of the theme in the document and provide a score. This will allow us to better understand which themes are central to the document's content, enabling a more targeted and accurate summary.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
planning_ai/chains/themes_chain.py CHANGED
@@ -7,34 +7,51 @@ from pydantic import BaseModel
7
  from planning_ai.common.utils import Paths
8
  from planning_ai.llms.llm import GPT4o
9
 
 
 
 
10
 
11
- class Theme(Enum):
12
- climate_change = "Climate Change"
13
- biodiversity = "Biodiversity and Green Spaces"
14
- wellbeing = "Wellbeing and Social Inclusion"
15
- great_places = "Great Places"
16
- jobs = "Jobs"
17
- homes = "Homes"
18
- infrastructure = "Infrastructure"
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
 
21
- class ThemeScore(BaseModel):
22
- theme: Theme
23
  score: int
 
24
 
25
 
26
- class ThemeSelector(BaseModel):
27
- themes: Optional[list[ThemeScore]]
28
 
29
 
30
- with open(Paths.PROMPTS / "themes.txt", "r") as f:
31
- themes_template = f.read()
32
 
33
- themes_prompt = ChatPromptTemplate.from_messages([("system", themes_template)])
34
 
35
- SLLM = GPT4o.with_structured_output(ThemeSelector, strict=True)
36
 
37
- themes_chain = themes_prompt | SLLM
38
 
39
 
40
  if __name__ == "__main__":
@@ -45,5 +62,5 @@ if __name__ == "__main__":
45
  Papworth Everard has grown beyond recognition. This in itself is a matter of concern.
46
  """
47
 
48
- result = themes_chain.invoke({"document": test_document})
49
  __import__("pprint").pprint(dict(result))
 
7
  from planning_ai.common.utils import Paths
8
  from planning_ai.llms.llm import GPT4o
9
 
10
+ # Read the chapter lines from the file
11
+ with open(Paths.RAW / "chapters.txt", "r") as f:
12
+ chapters = [line.strip() for line in f.readlines() if line.strip()]
13
 
14
+
15
+ def create_dynamic_enum(chapters):
16
+ """
17
+ Dynamically create an Enum class from the provided chapters list.
18
+ The names of the enum members are derived from the chapter names.
19
+ """
20
+ # Prepare the enum name and value mappings
21
+ enum_members = {}
22
+ for chapter in chapters:
23
+ # Create valid Python identifier by replacing spaces with underscores and removing special characters
24
+ name = chapter.replace(" ", "_").replace("-", "_").replace("'", "")
25
+ name = "".join(c for c in name if c.isalnum() or c == "_")
26
+ # Assign each name and value
27
+ enum_members[name] = chapter
28
+
29
+ # Create the Enum class dynamically
30
+ return Enum("Chapter", enum_members)
31
+
32
+
33
+ # Create the dynamic enum
34
+ Chapter = create_dynamic_enum(chapters)
35
 
36
 
37
+ class ChapterScore(BaseModel):
38
+ chapter: Chapter
39
  score: int
40
+ description: str
41
 
42
 
43
+ class ChapterSelector(BaseModel):
44
+ chapters: Optional[list[ChapterScore]]
45
 
46
 
47
+ with open(Paths.PROMPTS / "chapters.txt", "r") as f:
48
+ chapters_template = f.read()
49
 
50
+ chapters_prompt = ChatPromptTemplate.from_messages([("system", chapters_template)])
51
 
52
+ SLLM = GPT4o.with_structured_output(ChapterSelector, strict=True)
53
 
54
+ chapters_chain = chapters_prompt | SLLM
55
 
56
 
57
  if __name__ == "__main__":
 
62
  Papworth Everard has grown beyond recognition. This in itself is a matter of concern.
63
  """
64
 
65
+ result = chapters_chain.invoke({"document": test_document})
66
  __import__("pprint").pprint(dict(result))
planning_ai/chapters.py DELETED
@@ -1,25 +0,0 @@
1
- [
2
- "Introduction",
3
- "Approach to Planning Obligations",
4
- "How to use this Supplementary Planning Document",
5
- "Affordable Housing",
6
- "Green Infrastructure",
7
- "Biodiversity",
8
- "Community Facilities",
9
- "Social and Community Support Services",
10
- "Libraries and Lifelong Learning",
11
- "Transport and Highways",
12
- "Education",
13
- "Public Art",
14
- "Burial Space",
15
- "Public Open Space",
16
- "Indoor Sports, including Swimming",
17
- "Public Realm",
18
- "Waste and Recycling",
19
- "Emergency Services",
20
- "Planning Obligations to support local employment and skills",
21
- "Planning Obligations to support affordable workspace",
22
- "Public Rights of Way",
23
- "Healthcare",
24
- "Other Potential Development Specific Requirements",
25
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
planning_ai/documents/document.py CHANGED
@@ -30,6 +30,7 @@ def _process_postcodes(final):
30
  """
31
  documents = final["documents"]
32
  postcodes = [doc["document"].metadata["respondentpostcode"] for doc in documents]
 
33
  postcodes = (
34
  pl.DataFrame({"postcode": postcodes})["postcode"]
35
  .value_counts()
@@ -42,13 +43,30 @@ def _process_postcodes(final):
42
  postcodes = postcodes.join(onspd, on="postcode", how="left")
43
  outside_pcs = postcodes.filter(pl.col("osward").is_null()).drop_nulls("postcode")
44
  pcs_url = "https://api.postcodes.io/postcodes"
45
- outside_pcs = outside_pcs.with_columns(
46
- pl.col("postcode")
47
- .map_elements(
48
- lambda x: requests.get(f"{pcs_url}/{x}").json()["result"]["admin_ward"],
49
- return_dtype=pl.String,
50
- )
51
- .alias("osward")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  )
53
  return postcodes.drop_nulls(subset=["osward"]), outside_pcs
54
 
@@ -63,16 +81,16 @@ def _process_policies(final):
63
  tuple: A tuple containing strings of support, object, and other policies.
64
  """
65
 
66
- def process_policy_group(policy_group, theme, stance):
67
  details = "".join(
68
- f"\n### {row['policies']}\n\n"
69
  + "".join(
70
  f"- {detail} {doc_id}\n"
71
  for detail, doc_id in zip(row["detail"], row["doc_id"])
72
  )
73
- for row in policy_group.rows(named=True)
74
  )
75
- return f"## {theme} - {stance}\n\n{details}\n"
76
 
77
  policies_df = final["policies"]
78
 
@@ -80,16 +98,15 @@ def _process_policies(final):
80
  object_policies = ""
81
  other_policies = ""
82
 
83
- for (theme, stance), policy in policies_df.group_by(
84
  ["themes", "stance"], maintain_order=True
85
  ):
86
  if stance == "Support":
87
- support_policies += process_policy_group(policy, theme, stance)
88
  elif stance == "Object":
89
- object_policies += process_policy_group(policy, theme, stance)
90
  else:
91
- other_policies += process_policy_group(policy, theme, stance)
92
-
93
  return support_policies, object_policies, other_policies
94
 
95
 
@@ -130,9 +147,8 @@ def _process_themes(final):
130
  str: A markdown table of themes with their counts and percentages.
131
  """
132
  documents = final["documents"]
133
- themes = Counter(
134
- [theme["theme"].value for doc in documents for theme in doc["themes"]]
135
- )
136
  themes = pl.DataFrame(themes).transpose(include_header=True)
137
  themes_breakdown = themes.with_columns(
138
  ((pl.col("column_0") / pl.sum("column_0")) * 100).round(2).alias("percentage")
@@ -255,7 +271,6 @@ def fig_oa(postcodes, rep):
255
 
256
 
257
  def fig_wards(postcodes, rep):
258
- camb_lads = gpd.read_parquet(Paths.RAW / "camb_lads.parquet")
259
  ward_boundaries = gpd.read_parquet(Paths.RAW / "camb_wards.parquet")
260
  ward_pcs = postcodes.group_by("osward").sum()
261
  ward_boundaries_prop = ward_boundaries.merge(
@@ -264,7 +279,7 @@ def fig_wards(postcodes, rep):
264
 
265
  _, ax = plt.subplots(figsize=(8, 8))
266
  ward_boundaries.plot(ax=ax, color="none", edgecolor="black", linewidth=1.5)
267
- camb_lads.plot(ax=ax, color="white", edgecolor="gray", linewidth=0.5)
268
  ward_boundaries_prop.plot(
269
  ax=ax,
270
  column="count",
@@ -272,7 +287,6 @@ def fig_wards(postcodes, rep):
272
  legend=True,
273
  legend_kwds={"label": "Number of Representations"},
274
  )
275
- ward_boundaries.plot(ax=ax, color="none", edgecolor="grey", linewidth=0.5)
276
  cbar = ax.get_figure().axes[-1] # Get the colorbar axis
277
  cbar.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f"{int(x)}"))
278
 
@@ -347,42 +361,6 @@ def build_final_report(out, rep):
347
  .unique("id")
348
  .collect()
349
  )
350
- unused_documents = out["generate_final_report"]["unused_documents"]
351
-
352
- unused_pdfs = (
353
- pl.DataFrame(
354
- [
355
- doc["metadata"]
356
- for doc in unused_documents
357
- if "representations_id" not in doc["metadata"]
358
- ]
359
- )
360
- .select(["id", "pdf_id", "page_label"])
361
- .rename({"pdf_id": "representations_id"})
362
- .with_columns(
363
- pl.col("representations_id").cast(pl.Int64), pl.lit("PDF").alias("type")
364
- )
365
- )
366
- unused_docs = (
367
- pl.DataFrame(
368
- [
369
- doc["metadata"]
370
- for doc in unused_documents
371
- if "representations_id" in doc["metadata"]
372
- ]
373
- )
374
- .select(["id", "representations_id"])
375
- .with_columns(pl.lit("").alias("page_label"), pl.lit("Text").alias("type"))
376
- )
377
- unused_tbl = pl.concat([unused_pdfs, unused_docs]).rename(
378
- {
379
- "id": "JDi ID",
380
- "representations_id": "Representations ID",
381
- "page_label": "Page Number",
382
- "type": "Type",
383
- }
384
- )
385
- unused_tbl = unused_tbl.to_pandas().to_markdown(index=False)
386
 
387
  support_policies, object_policies, other_policies = _process_policies(final)
388
  postcodes, outside_pcs = _process_postcodes(final)
@@ -396,13 +374,16 @@ def build_final_report(out, rep):
396
  outside_pcs = (
397
  outside_pcs.group_by("osward")
398
  .sum()[["osward", "count"]]
399
- .rename({"osward": "Ward", "count": "Number of Representations"})
 
 
 
400
  .to_pandas()
401
  .to_markdown(index=False)
402
  )
403
 
404
  quarto_doc = (
405
- f"---\ntitle: '**{rep}**'\n"
406
  r"""
407
  mainfont: Liberation Sans
408
  fontsize: 12pt
@@ -438,7 +419,7 @@ header-includes: |
438
  "\n# Executive Summary\n\n"
439
  f"{final['executive']}\n\n"
440
  f"There were a total of {len(responses):,} responses. Of these, representations left "
441
- "comment, or indicated the following support and objection of the plan:\n\n"
442
  f"{stances}\n\n"
443
  "# Introduction\n\n"
444
  f"{introduction_paragraph}\n\n"
@@ -446,32 +427,25 @@ header-includes: |
446
  f"{figures_paragraph}\n\n"
447
  f"![Total number of representations submitted by Ward within Greater Cambridgeshire\\label{{fig-wards}}](./data/out/summary/figs/wards-{rep}.pdf)\n\n"
448
  f": Postcodes outside the Greater Cambridge Ward areas {{#tbl:outside}}\n\n{outside_pcs}n\n"
449
- f"![Proportional frequency of representations submitted by 2021 Output Area relative to the national average\\label{{fig-oas}}](./data/out/summary/figs/oas-{rep}.pdf)\n\n"
450
- f"![Distribution of representations submitted by the of index of multiple deprivation (2019) relative to the national average\\label{{fig-imd}}](./data/out/summary/figs/imd_decile-{rep}.pdf)\n\n"
451
  r"\newpage"
452
  "\n\n# Themes and Policies\n\n"
453
  f"{themes_paragraph}\n\n"
454
- f": Breakdown of representation themes {{#tbl:themes}}\n\n{themes}\n\n"
455
  "## Supporting Representations\n\n"
456
- "The following section presents a list of all points raised in representations that support the plan"
457
- ", grouped by theme and policy.\n\n"
458
  f"{support_policies or '_No supporting representations._'}\n\n"
459
  "## Objecting Representations\n\n"
460
- "The following section presents a list of all points raised in representations that object to "
461
- "the plan, grouped by theme and policy.\n\n"
462
  f"{object_policies or '_No objecting representations._'}\n\n"
463
  "## Comment\n\n"
464
- "The following section presents a list of all points raised in representations that do not support "
465
- "or object to the plan, grouped by theme and policy.\n\n"
466
  f"{other_policies or '_No other representations._'}\n\n"
467
- "## Unused Documents\n\n"
468
- "For full transparency, this section details those documents that were excluded from this report on Table @tbl:unused."
469
- "These documents are typically very short, and contain information that provides no relation to policies or themes.\n\n TODO: expand."
470
- f": Unused representations {{#tbl:unused}}\n\n{unused_tbl}\n\n"
471
  )
472
 
473
- out_path = Paths.SUMMARY / f"Summary_of_Submitted_Representations-{rep}.md"
474
- out_file = Paths.SUMMARY / f"Summary_of_Submitted_Representations-{rep}"
475
  with open(out_path, "w") as f:
476
  f.write(quarto_doc)
477
  try:
@@ -486,7 +460,7 @@ header-includes: |
486
  subprocess.run(command, check=True, capture_output=True)
487
  command = [
488
  "pdftk",
489
- "data/covers/cover2 1.pdf",
490
  f"{out_file}.pdf",
491
  "cat",
492
  "output",
@@ -507,7 +481,7 @@ header-includes: |
507
  subprocess.run(command, check=True, capture_output=True)
508
  command = [
509
  "pandoc",
510
- "data/covers/Cover2 1.docx",
511
  f"{out_file}.docx",
512
  "-o",
513
  f"{out_file}.docx",
@@ -516,60 +490,22 @@ header-includes: |
516
  ]
517
  subprocess.run(command, check=True, capture_output=True)
518
  except subprocess.CalledProcessError as e:
519
- logging.error(
520
- f"Error during Summary_of_Submitted_Representations.md render: {e}"
521
- )
522
 
523
 
524
  def build_summaries_document(out, rep):
525
  sub = r"Document ID: \[\d+\]\n\n"
526
  summary_intro = load_txt("planning_ai/documents/summary_intro.txt")
527
- unused_documents = out["generate_final_report"]["unused_documents"]
528
-
529
- unused_pdfs = (
530
- pl.DataFrame(
531
- [
532
- doc["metadata"]
533
- for doc in unused_documents
534
- if "representations_id" not in doc["metadata"]
535
- ]
536
- )
537
- .select(["id", "pdf_id", "page_label"])
538
- .rename({"pdf_id": "representations_id"})
539
- .with_columns(
540
- pl.col("representations_id").cast(pl.Int64), pl.lit("PDF").alias("type")
541
- )
542
- )
543
- unused_docs = (
544
- pl.DataFrame(
545
- [
546
- doc["metadata"]
547
- for doc in unused_documents
548
- if "representations_id" in doc["metadata"]
549
- ]
550
- )
551
- .select(["id", "representations_id"])
552
- .with_columns(pl.lit("").alias("page_label"), pl.lit("Text").alias("type"))
553
- )
554
- unused_tbl = pl.concat([unused_pdfs, unused_docs]).rename(
555
- {
556
- "id": "JDi ID",
557
- "representations_id": "Representations ID",
558
- "page_label": "Page Number",
559
- "type": "Type",
560
- }
561
- )
562
- unused_tbl = unused_tbl.to_pandas().to_markdown(index=False)
563
  full_text = "".join(
564
- f"**Document ID**: {document['doc_id']}\n\n"
565
  f"**Representations ID**: {document['document'].metadata['id']}\n\n"
566
- f"**Representations Name**: {document['document'].metadata['representations_document']}\n\n"
567
- f"\n\n{re.sub(sub, '', document['summary'].summary)}\n\n"
568
  "---\n\n"
569
  for document in out["generate_final_report"]["documents"]
570
  )
571
  header = (
572
- f"---\ntitle: '**{rep}**'\n"
573
  r"""
574
  mainfont: Liberation Sans
575
  fontsize: 12pt
@@ -604,18 +540,11 @@ header-includes: |
604
  """
605
  f"\n{summary_intro}\n\n"
606
  )
607
- unused_text = (
608
- "\n\n## Unused Documents\n\n"
609
- "For full transparency, this section details those documents that were excluded from this report on Table @tbl:unused.\n\n"
610
- f": Unused representations {{#tbl:unused}}\n\n{unused_tbl}\n\n"
611
- "These documents are typically very short, and contain information that provides no relation to policies or themes."
612
- )
613
 
614
- out_path = Paths.SUMMARY / f"Summary_Documents-{rep}.md"
615
- out_file = Paths.SUMMARY / f"Summary_Documents-{rep}"
616
  with open(out_path, "w") as f:
617
- f.write(f"{header}{full_text}{unused_text}")
618
-
619
  try:
620
  command = [
621
  "pandoc",
@@ -628,7 +557,7 @@ header-includes: |
628
  subprocess.run(command, check=True, capture_output=True)
629
  command = [
630
  "pdftk",
631
- "data/covers/cover_summary_responses.pdf",
632
  f"{out_file}.pdf",
633
  "cat",
634
  "output",
@@ -649,7 +578,7 @@ header-includes: |
649
  subprocess.run(command, check=True, capture_output=True)
650
  command = [
651
  "pandoc",
652
- "data/covers/Cover 1.docx",
653
  f"{out_file}.docx",
654
  "-o",
655
  f"{out_file}.docx",
 
30
  """
31
  documents = final["documents"]
32
  postcodes = [doc["document"].metadata["respondentpostcode"] for doc in documents]
33
+ missing = ["Missing" for pcd in postcodes if not pcd]
34
  postcodes = (
35
  pl.DataFrame({"postcode": postcodes})["postcode"]
36
  .value_counts()
 
43
  postcodes = postcodes.join(onspd, on="postcode", how="left")
44
  outside_pcs = postcodes.filter(pl.col("osward").is_null()).drop_nulls("postcode")
45
  pcs_url = "https://api.postcodes.io/postcodes"
46
+
47
+ def get_pcs(postcode):
48
+ response = requests.get(f"{pcs_url}/{postcode}")
49
+ if response.status_code == 200:
50
+ out = response.json()["result"]
51
+ admin_ward = out.get("admin_ward")
52
+ admin_district = out.get("admin_district")
53
+ return f"{admin_ward}, {admin_district}"
54
+ else:
55
+ return "Unknown"
56
+
57
+ outside_pcs = pl.concat(
58
+ [
59
+ outside_pcs.with_columns(
60
+ pl.col("postcode")
61
+ .map_elements(lambda x: get_pcs(x), return_dtype=pl.String)
62
+ .alias("osward")
63
+ )
64
+ .select(["postcode", "osward", "count"])
65
+ .with_columns(pl.col("count").cast(pl.Int32)),
66
+ pl.DataFrame(
67
+ {"postcode": missing, "osward": "Unknown", "count": len(missing)}
68
+ ).with_columns(pl.col("count").cast(pl.Int32)),
69
+ ],
70
  )
71
  return postcodes.drop_nulls(subset=["osward"]), outside_pcs
72
 
 
81
  tuple: A tuple containing strings of support, object, and other policies.
82
  """
83
 
84
+ def process_policy_group(policy_group):
85
  details = "".join(
86
+ f"\n### {row['themes']} - {row['stance']}\n\n"
87
  + "".join(
88
  f"- {detail} {doc_id}\n"
89
  for detail, doc_id in zip(row["detail"], row["doc_id"])
90
  )
91
+ for row in policy_group.rows(named=True)[:1]
92
  )
93
+ return details
94
 
95
  policies_df = final["policies"]
96
 
 
98
  object_policies = ""
99
  other_policies = ""
100
 
101
+ for (_, stance), policy in policies_df.group_by(
102
  ["themes", "stance"], maintain_order=True
103
  ):
104
  if stance == "Support":
105
+ support_policies += process_policy_group(policy)
106
  elif stance == "Object":
107
+ object_policies += process_policy_group(policy)
108
  else:
109
+ other_policies += process_policy_group(policy)
 
110
  return support_policies, object_policies, other_policies
111
 
112
 
 
147
  str: A markdown table of themes with their counts and percentages.
148
  """
149
  documents = final["documents"]
150
+ documents[0]["themes"]
151
+ themes = Counter([theme["chapter"] for doc in documents for theme in doc["themes"]])
 
152
  themes = pl.DataFrame(themes).transpose(include_header=True)
153
  themes_breakdown = themes.with_columns(
154
  ((pl.col("column_0") / pl.sum("column_0")) * 100).round(2).alias("percentage")
 
271
 
272
 
273
  def fig_wards(postcodes, rep):
 
274
  ward_boundaries = gpd.read_parquet(Paths.RAW / "camb_wards.parquet")
275
  ward_pcs = postcodes.group_by("osward").sum()
276
  ward_boundaries_prop = ward_boundaries.merge(
 
279
 
280
  _, ax = plt.subplots(figsize=(8, 8))
281
  ward_boundaries.plot(ax=ax, color="none", edgecolor="black", linewidth=1.5)
282
+ ward_boundaries.plot(ax=ax, color="white", edgecolor="grey", linewidth=0.5)
283
  ward_boundaries_prop.plot(
284
  ax=ax,
285
  column="count",
 
287
  legend=True,
288
  legend_kwds={"label": "Number of Representations"},
289
  )
 
290
  cbar = ax.get_figure().axes[-1] # Get the colorbar axis
291
  cbar.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f"{int(x)}"))
292
 
 
361
  .unique("id")
362
  .collect()
363
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
364
 
365
  support_policies, object_policies, other_policies = _process_policies(final)
366
  postcodes, outside_pcs = _process_postcodes(final)
 
374
  outside_pcs = (
375
  outside_pcs.group_by("osward")
376
  .sum()[["osward", "count"]]
377
+ .filter(pl.col("osward") != "Unknown")
378
+ .rename(
379
+ {"osward": "Ward, Local Authority", "count": "Number of Representations"}
380
+ )
381
  .to_pandas()
382
  .to_markdown(index=False)
383
  )
384
 
385
  quarto_doc = (
386
+ f"---\ntitle: '**Overview of Public Submissions: {rep}**'\n"
387
  r"""
388
  mainfont: Liberation Sans
389
  fontsize: 12pt
 
419
  "\n# Executive Summary\n\n"
420
  f"{final['executive']}\n\n"
421
  f"There were a total of {len(responses):,} responses. Of these, representations left "
422
+ "comment, or indicated the following support and objection of the consultation document:\n\n"
423
  f"{stances}\n\n"
424
  "# Introduction\n\n"
425
  f"{introduction_paragraph}\n\n"
 
427
  f"{figures_paragraph}\n\n"
428
  f"![Total number of representations submitted by Ward within Greater Cambridgeshire\\label{{fig-wards}}](./data/out/summary/figs/wards-{rep}.pdf)\n\n"
429
  f": Postcodes outside the Greater Cambridge Ward areas {{#tbl:outside}}\n\n{outside_pcs}n\n"
430
+ f"![The proportion of representations submitted by 2021 Output Area Classification^[Wyszomierski, J., Longley, P.A., Singleton, A.D., Gale, C. & O’Brien, O. (2024) A neighbourhood Output Area Classification from the 2021 and 2022 UK censuses. The Geographical Journal, 190, e12550. Available from: https://doi.org/10.1111/geoj.12550] relative to the national average\\label{{fig-oas}}](./data/out/summary/figs/oas-{rep}.pdf)\n\n"
431
+ f"![The proportion of representations submitted by the Index of Multiple Deprivation (2019) relative to the national average](./data/out/summary/figs/imd_decile-{rep}.pdf)\n\n"
432
  r"\newpage"
433
  "\n\n# Themes and Policies\n\n"
434
  f"{themes_paragraph}\n\n"
435
+ f": Breakdown of representations by section {{#tbl:themes}}\n\n{themes}\n\n"
436
  "## Supporting Representations\n\n"
437
+ "The following section presents a list of all points raised in representations that support the consultation document, grouped by sections."
 
438
  f"{support_policies or '_No supporting representations._'}\n\n"
439
  "## Objecting Representations\n\n"
440
+ "The following section presents a list of all points raised in representations that object to the consultation document, grouped by sections."
 
441
  f"{object_policies or '_No objecting representations._'}\n\n"
442
  "## Comment\n\n"
443
+ "The following section presents a list of all points raised in representations that do not support or object to the consultation document, grouped by sections."
 
444
  f"{other_policies or '_No other representations._'}\n\n"
 
 
 
 
445
  )
446
 
447
+ out_path = Paths.SUMMARY / f"Overview_of_Public_Submissions-{rep}.md"
448
+ out_file = Paths.SUMMARY / f"Overview_of_Public_Submissions-{rep}"
449
  with open(out_path, "w") as f:
450
  f.write(quarto_doc)
451
  try:
 
460
  subprocess.run(command, check=True, capture_output=True)
461
  command = [
462
  "pdftk",
463
+ "data/covers/Overview_of_Public_Submissions.pdf",
464
  f"{out_file}.pdf",
465
  "cat",
466
  "output",
 
481
  subprocess.run(command, check=True, capture_output=True)
482
  command = [
483
  "pandoc",
484
+ "data/covers/Overview_of_Public_Submissions.docx",
485
  f"{out_file}.docx",
486
  "-o",
487
  f"{out_file}.docx",
 
490
  ]
491
  subprocess.run(command, check=True, capture_output=True)
492
  except subprocess.CalledProcessError as e:
493
+ logging.error(f"Error during Overview_of_Public_Submissions render: {e}")
 
 
494
 
495
 
496
  def build_summaries_document(out, rep):
497
  sub = r"Document ID: \[\d+\]\n\n"
498
  summary_intro = load_txt("planning_ai/documents/summary_intro.txt")
499
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
500
  full_text = "".join(
501
+ f"**Part ID**: {document['doc_id']}\n\n"
502
  f"**Representations ID**: {document['document'].metadata['id']}\n\n"
503
+ f"\n\n{re.sub(sub, '', document['summary'])}\n\n"
 
504
  "---\n\n"
505
  for document in out["generate_final_report"]["documents"]
506
  )
507
  header = (
508
+ f"---\ntitle: '**Summaries of Public Submissions: {rep}**'\n"
509
  r"""
510
  mainfont: Liberation Sans
511
  fontsize: 12pt
 
540
  """
541
  f"\n{summary_intro}\n\n"
542
  )
 
 
 
 
 
 
543
 
544
+ out_path = Paths.SUMMARY / f"Summaries_of_Public_Submissions-{rep}.md"
545
+ out_file = Paths.SUMMARY / f"Summaries_of_Public_Submissions-{rep}"
546
  with open(out_path, "w") as f:
547
+ f.write(f"{header}{full_text}")
 
548
  try:
549
  command = [
550
  "pandoc",
 
557
  subprocess.run(command, check=True, capture_output=True)
558
  command = [
559
  "pdftk",
560
+ "data/covers/Summaries_of_Public_Submissions.pdf",
561
  f"{out_file}.pdf",
562
  "cat",
563
  "output",
 
578
  subprocess.run(command, check=True, capture_output=True)
579
  command = [
580
  "pandoc",
581
+ "data/covers/Summaries_of_Public_Submissions.docx",
582
  f"{out_file}.docx",
583
  "-o",
584
  f"{out_file}.docx",
planning_ai/documents/themes.txt CHANGED
@@ -1 +1,3 @@
1
- The following section provides a detailed breakdown of notable details from the **representations**, grouped by the **Themes and Policies** set out in the **Greater Cambridgeshire Local Plan**. Both the Themes and associated Policies are automatically determined through an analysis of the summary content by an LLM agent. Each Theme is organised according to whether representations were specified as supportive, opposed, or provide a general comment. This section offers a comprehensive overview of those key issues raised by members of the public or organisations with respect to these Themes and Policies. We have incorporated citations into each point (see numbers in square brackets) to indicate the specific document^[Each representation made can have a number of documents associated with them. Each document ID is unique, and can be referenced in the **Summary Responses** report.] where each representation was made, thereby promoting transparency of sources. Finally, @tbl-themes provides a breakdown of the number of submissions that relate to each Theme (noting that submissions may be associated with more than one Theme).
 
 
 
1
+ The following section provides a detailed breakdown of notable details from the **representations**, grouped by the **Themes and Policies** set out in the **Greater Cambridgeshire Local Plan**. Both the Themes and associated Policies are automatically determined through an analysis of the summary content by an LLM agent. Each Theme is organised according to whether representations were specified as supportive, opposed, or provide a general comment.
2
+
3
+ This section offers a comprehensive overview of those key issues raised by members of the public or organisations with respect to sections of the consultation document. We have incorporated citations into each point (see numbers in square brackets) to indicate the specific part^[Each representation can be a text entry and optionally a further attached piece of evidence. Attachments are split into pages, and along with the text entry are referred to here as a "part". Each representation can therefore comprise multiple parts, which are given a Part ID. These can be cross referenced to the Summaries of Public Submissions report.] of the representation where points were made, thereby promoting transparency of sources. Finally, @tbl:themes provides a breakdown of the number of submissions that relate to each Theme (noting that submissions may be associated with more than one Theme).
planning_ai/main.py CHANGED
@@ -13,7 +13,7 @@ from planning_ai.graph import create_graph
13
  from planning_ai.logging import logger
14
 
15
 
16
- def read_docs(representations_document: str, doc_type: str):
17
  logger.warning("Reading documents...")
18
  df = (
19
  pl.scan_parquet(Paths.STAGING / "gcpt3.parquet")
@@ -83,36 +83,28 @@ def read_docs(representations_document: str, doc_type: str):
83
  if doc.page_content and len(doc.page_content.split(" ")) > 25
84
  }.values()
85
  )
86
- return [
87
- {"document": doc, "filename": doc.metadata["filename"], "doc_type": doc_type}
88
- for doc in docs
89
- ]
90
 
91
 
92
- def main(doc_type: str = "Themes & Policies"):
93
- representations_documents = (
94
- pl.scan_parquet(Paths.STAGING / "gcpt3.parquet")
95
- .select(pl.col("representations_document"))
96
- .unique()
97
- .collect()["representations_document"]
98
- .to_list()
99
- )
100
- for rep in representations_documents:
101
- docs = read_docs(rep, doc_type)
102
- n_docs = len(docs)
103
 
104
- logger.info(f"{n_docs} documents being processed!")
105
- app = create_graph()
106
 
107
- step = None
108
- for step in app.stream({"documents": docs, "n_docs": n_docs}):
109
- print(step.keys())
110
 
111
- if step is None:
112
- raise ValueError("No steps were processed!")
113
 
114
- build_final_report(step, rep)
115
- build_summaries_document(step, rep)
116
 
117
 
118
  if __name__ == "__main__":
 
13
  from planning_ai.logging import logger
14
 
15
 
16
+ def read_docs(representations_document: str):
17
  logger.warning("Reading documents...")
18
  df = (
19
  pl.scan_parquet(Paths.STAGING / "gcpt3.parquet")
 
83
  if doc.page_content and len(doc.page_content.split(" ")) > 25
84
  }.values()
85
  )
86
+ return [{"document": doc, "filename": doc.metadata["filename"]} for doc in docs]
 
 
 
87
 
88
 
89
+ def main():
90
+ with open(Paths.RAW / "title.txt", "r") as f:
91
+ rep = f.read().strip()
92
+
93
+ docs = read_docs(rep)
94
+ n_docs = len(docs)
 
 
 
 
 
95
 
96
+ logger.info(f"{n_docs} documents being processed!")
97
+ app = create_graph()
98
 
99
+ step = None
100
+ for step in app.stream({"documents": docs, "n_docs": n_docs}):
101
+ print(step.keys())
102
 
103
+ if step is None:
104
+ raise ValueError("No steps were processed!")
105
 
106
+ build_final_report(step, rep)
107
+ build_summaries_document(step, rep)
108
 
109
 
110
  if __name__ == "__main__":
planning_ai/nodes/hallucination_node.py CHANGED
@@ -1,8 +1,7 @@
1
  from langgraph.types import Send
2
 
3
- from planning_ai.chains.fix_chain import fix_template
4
  from planning_ai.chains.hallucination_chain import hallucination_chain
5
- from planning_ai.chains.map_chain import create_dynamic_map_chain
6
  from planning_ai.logging import logger
7
  from planning_ai.states import DocumentState, OverallState
8
 
@@ -35,7 +34,7 @@ def check_hallucination(state: DocumentState):
35
 
36
  try:
37
  response = hallucination_chain.invoke(
38
- {"document": state["document"], "summary": state["summary"].summary}
39
  )
40
  is_hallucinated = response.score == 0
41
  refinement_attempts = state["refinement_attempts"] + 1
@@ -83,13 +82,12 @@ def fix_hallucination(state: DocumentState):
83
  hallucinations.
84
  """
85
  logger.warning(f"Fixing hallucinations for document {state['filename']}")
86
- themes = [theme["theme"].value for theme in state["themes"]]
87
- fix_chain = create_dynamic_map_chain(themes, fix_template)
88
  try:
89
  response = fix_chain.invoke(
90
  {
91
  "context": state["document"],
92
- "summary": state["summary"].summary,
93
  "explanation": state["hallucination"].explanation,
94
  }
95
  )
 
1
  from langgraph.types import Send
2
 
3
+ from planning_ai.chains.fix_chain import fix_chain
4
  from planning_ai.chains.hallucination_chain import hallucination_chain
 
5
  from planning_ai.logging import logger
6
  from planning_ai.states import DocumentState, OverallState
7
 
 
34
 
35
  try:
36
  response = hallucination_chain.invoke(
37
+ {"document": state["document"], "summary": state["summary"]}
38
  )
39
  is_hallucinated = response.score == 0
40
  refinement_attempts = state["refinement_attempts"] + 1
 
82
  hallucinations.
83
  """
84
  logger.warning(f"Fixing hallucinations for document {state['filename']}")
85
+
 
86
  try:
87
  response = fix_chain.invoke(
88
  {
89
  "context": state["document"],
90
+ "summary": state["summary"],
91
  "explanation": state["hallucination"].explanation,
92
  }
93
  )
planning_ai/nodes/map_node.py CHANGED
@@ -4,8 +4,8 @@ from langgraph.types import Send
4
  from presidio_analyzer import AnalyzerEngine
5
  from presidio_anonymizer import AnonymizerEngine
6
 
7
- from planning_ai.chains.map_chain import create_dynamic_map_chain, map_template
8
- from planning_ai.chains.themes_chain import themes_chain
9
  from planning_ai.logging import logger
10
  from planning_ai.states import DocumentState, OverallState
11
 
@@ -15,7 +15,7 @@ anonymizer = AnonymizerEngine()
15
  nlp = spacy.load("en_core_web_lg")
16
 
17
 
18
- def retrieve_themes(state: DocumentState) -> DocumentState:
19
  """Retrieve themes from a document's content.
20
 
21
  This function uses the `themes_chain` to extract themes from the document's
@@ -28,39 +28,23 @@ def retrieve_themes(state: DocumentState) -> DocumentState:
28
  DocumentState: The updated document state with themes and scores.
29
  """
30
  try:
31
- result = themes_chain.invoke({"document": state["document"].page_content})
32
- if not result.themes:
33
  state["themes"] = []
34
  return state
35
- themes = [theme.model_dump() for theme in result.themes]
 
 
 
 
 
 
 
 
36
  except Exception as e:
37
  logger.error(f"Theme selection error: {e}")
38
- themes = []
39
- state["themes"] = [d for d in themes if d["score"] >= 4]
40
- return state
41
-
42
-
43
- def add_entities(state: OverallState) -> OverallState:
44
- """Add named entities to all documents in the state.
45
-
46
- This function processes each document using a spaCy NLP pipeline to extract
47
- named entities and adds them to the document state.
48
-
49
- Args:
50
- state (OverallState): The overall state containing multiple documents.
51
-
52
- Returns:
53
- OverallState: The updated state with entities added to each document.
54
- """
55
- logger.info("Adding entities to all documents.")
56
- for idx, document in enumerate(
57
- nlp.pipe(
58
- [doc["document"].page_content for doc in state["documents"]],
59
- )
60
- ):
61
- state["documents"][idx]["entities"] = [
62
- {"entity": ent.text, "label": ent.label_} for ent in document.ents
63
- ]
64
  return state
65
 
66
 
@@ -102,30 +86,9 @@ def generate_summary(state: DocumentState) -> dict:
102
  logger.info(f"Starting PII removal for: {state['filename']}")
103
  state["document"].page_content = remove_pii(state["document"].page_content)
104
 
105
- if state["doc_type"] == "Themes & Policies":
106
- logger.info(f"Retrieving themes for: {state['filename']}")
107
- state = retrieve_themes(state)
108
- elif state["doc_type"] == "SPT":
109
- logger.info(f"Retrieving SPT for: {state['filename']}")
110
- state = retrieve_spt(state)
111
-
112
- if not state["themes"]:
113
- logger.warning(f"No themes found for {state['filename']}")
114
- return {
115
- "documents": [
116
- {
117
- **state,
118
- "summary": "",
119
- "refinement_attempts": 0,
120
- "is_hallucinated": True,
121
- "processed": True,
122
- "failed": True,
123
- }
124
- ]
125
- }
126
 
127
- themes = [theme["theme"].value for theme in state["themes"]]
128
- map_chain = create_dynamic_map_chain(themes=themes, prompt=map_template)
129
  try:
130
  response = map_chain.invoke({"context": state["document"].page_content})
131
  except Exception as e:
 
4
  from presidio_analyzer import AnalyzerEngine
5
  from presidio_anonymizer import AnonymizerEngine
6
 
7
+ from planning_ai.chains.map_chain import map_chain
8
+ from planning_ai.chains.themes_chain import chapters_chain
9
  from planning_ai.logging import logger
10
  from planning_ai.states import DocumentState, OverallState
11
 
 
15
  nlp = spacy.load("en_core_web_lg")
16
 
17
 
18
+ def retrieve_chapters(state: DocumentState) -> DocumentState:
19
  """Retrieve themes from a document's content.
20
 
21
  This function uses the `themes_chain` to extract themes from the document's
 
28
  DocumentState: The updated document state with themes and scores.
29
  """
30
  try:
31
+ result = chapters_chain.invoke({"document": state["document"].page_content})
32
+ if not result.chapters:
33
  state["themes"] = []
34
  return state
35
+ chapters = [chapter.model_dump() for chapter in result.chapters]
36
+ chapters = [
37
+ {
38
+ "chapter": chapter["chapter"].value,
39
+ "score": chapter["score"],
40
+ "description": chapter["description"],
41
+ }
42
+ for chapter in chapters
43
+ ]
44
  except Exception as e:
45
  logger.error(f"Theme selection error: {e}")
46
+ chapters = []
47
+ state["themes"] = [d for d in chapters if d["score"] >= 4]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  return state
49
 
50
 
 
86
  logger.info(f"Starting PII removal for: {state['filename']}")
87
  state["document"].page_content = remove_pii(state["document"].page_content)
88
 
89
+ logger.info(f"Retrieving themes for: {state['filename']}")
90
+ state = retrieve_chapters(state)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
 
 
92
  try:
93
  response = map_chain.invoke({"context": state["document"].page_content})
94
  except Exception as e:
planning_ai/nodes/reduce_node.py CHANGED
@@ -1,66 +1,27 @@
1
- import json
2
- from pathlib import Path
3
-
4
  import polars as pl
5
 
6
  from planning_ai.chains.policy_chain import policy_chain
7
  from planning_ai.chains.reduce_chain import reduce_chain, reduce_chain_final
8
  from planning_ai.logging import logger
9
  from planning_ai.states import OverallState
10
- from planning_ai.themes import THEMES_AND_POLICIES
11
-
12
-
13
- def save_summaries_to_json(docs):
14
- """Saves summaries to JSON files.
15
-
16
- Args:
17
- out (list): A list of summary dictionaries.
18
- """
19
- out = [
20
- {
21
- "document": doc["document"].model_dump()["page_content"],
22
- **doc["document"].metadata,
23
- "filename": doc["filename"],
24
- "entities": doc["entities"],
25
- "themes": doc["themes"].model_dump(),
26
- "summary": doc["summary"].model_dump()["summary"],
27
- "policies": doc["policies"],
28
- "notes": doc["notes"],
29
- "refinement_attempts": doc["refinement_attempts"],
30
- "hallucination": doc["hallucination"].model_dump(),
31
- "is_hallucinated": doc["is_hallucinated"],
32
- "failed": doc["failed"],
33
- }
34
- for doc in docs
35
- ]
36
- for doc in out:
37
- filename = Path(str(doc["filename"])).stem
38
- with open(f"data/out/summaries/{filename}.json", "w") as f:
39
- json.dump(doc, f)
40
 
41
 
42
  def extract_policies_from_docs(docs):
43
- policies = {"doc_id": [], "themes": [], "policies": [], "details": [], "stance": []}
44
  for doc in docs:
45
- if not doc["summary"].policies:
46
- continue
47
- for policy in doc["summary"].policies:
48
- for theme, p in THEMES_AND_POLICIES.items():
49
- if policy.policy.name in p:
50
- policies["doc_id"].append(doc["doc_id"])
51
- policies["themes"].append(theme)
52
- policies["policies"].append(policy.policy.name)
53
- policies["details"].append(policy.note)
54
- policies["stance"].append(
55
- doc["document"].metadata["representations_support/object"]
56
- )
57
  return pl.DataFrame(policies)
58
 
59
 
60
  def add_doc_id(final_docs):
61
  out_docs = []
62
  for id, doc in enumerate(final_docs):
63
- doc["summary"].summary = f"Document ID: [{id}]\n\n{doc['summary'].summary}"
64
  doc["doc_id"] = id
65
  out_docs.append(doc)
66
  return out_docs
@@ -76,7 +37,7 @@ def batch_generate_executive_summaries(summaries):
76
  list: A list of final responses.
77
  """
78
  summaries_text = [
79
- f"Document ID: {[s['doc_id']]}\n\n{s['summary'].summary}" for s in summaries
80
  ]
81
  final_responses = []
82
  batch_size = 50
@@ -93,11 +54,11 @@ def batch_generate_executive_summaries(summaries):
93
  def generate_policy_output(policy_groups):
94
  out = []
95
  for policy in (
96
- policy_groups.group_by(["themes", "policies", "stance"])
97
  .agg(pl.col("details"), pl.col("doc_id"))
98
  .rows(named=True)
99
  ):
100
- logger.info(f"Processing policies: {policy['policies']}...")
101
  zipped = [
102
  f"{bullet} Doc ID: {id}"
103
  for (bullet, id) in zip(policy["details"], policy["doc_id"], strict=True)
@@ -105,20 +66,15 @@ def generate_policy_output(policy_groups):
105
  try:
106
  reduced = policy_chain.invoke(
107
  {
108
- "theme": policy["themes"],
109
- "policy": policy["policies"],
110
  "details": zipped,
111
  }
112
  )
113
  out.extend(policy | p for p in reduced.dict()["policies"])
114
  except Exception as e:
115
- logger.error(f"Failed to generate policies for {policy['policies']}: {e}")
116
  continue
117
- return (
118
- pl.DataFrame(out)
119
- .group_by(["themes", "policies", "stance"])
120
- .agg(["detail", "doc_id"])
121
- )
122
 
123
 
124
  def generate_final_report(state: OverallState):
@@ -129,9 +85,13 @@ def generate_final_report(state: OverallState):
129
 
130
 
131
  def final_output(final_docs):
132
- docs = [doc for doc in final_docs if not doc["failed"]]
133
 
134
- failed_docs = [doc["document"].model_dump() for doc in final_docs if doc["failed"]]
 
 
 
 
135
  docs = add_doc_id(docs)
136
 
137
  policy_groups = extract_policies_from_docs(docs)
 
 
 
 
1
  import polars as pl
2
 
3
  from planning_ai.chains.policy_chain import policy_chain
4
  from planning_ai.chains.reduce_chain import reduce_chain, reduce_chain_final
5
  from planning_ai.logging import logger
6
  from planning_ai.states import OverallState
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
 
9
  def extract_policies_from_docs(docs):
10
+ policies = {"doc_id": [], "themes": [], "details": [], "stance": []}
11
  for doc in docs:
12
+ for policy in doc["themes"]:
13
+ policies["doc_id"].append(doc["doc_id"])
14
+ policies["themes"].append(policy["chapter"])
15
+ policies["details"].append(policy["description"])
16
+ policies["stance"].append(
17
+ doc["document"].metadata["representations_support/object"]
18
+ )
 
 
 
 
 
19
  return pl.DataFrame(policies)
20
 
21
 
22
  def add_doc_id(final_docs):
23
  out_docs = []
24
  for id, doc in enumerate(final_docs):
 
25
  doc["doc_id"] = id
26
  out_docs.append(doc)
27
  return out_docs
 
37
  list: A list of final responses.
38
  """
39
  summaries_text = [
40
+ f"Document ID: {[s['doc_id']]}\n\n{s['summary']}" for s in summaries
41
  ]
42
  final_responses = []
43
  batch_size = 50
 
54
  def generate_policy_output(policy_groups):
55
  out = []
56
  for policy in (
57
+ policy_groups.group_by(["themes", "stance"])
58
  .agg(pl.col("details"), pl.col("doc_id"))
59
  .rows(named=True)
60
  ):
61
+ logger.info(f"Processing chapter: {policy['themes']}...")
62
  zipped = [
63
  f"{bullet} Doc ID: {id}"
64
  for (bullet, id) in zip(policy["details"], policy["doc_id"], strict=True)
 
66
  try:
67
  reduced = policy_chain.invoke(
68
  {
69
+ "chapter": policy["themes"],
 
70
  "details": zipped,
71
  }
72
  )
73
  out.extend(policy | p for p in reduced.dict()["policies"])
74
  except Exception as e:
75
+ logger.error(f"Failed to generate policies for {policy['themes']}: {e}")
76
  continue
77
+ return pl.DataFrame(out).group_by(["themes", "stance"]).agg(["detail", "doc_id"])
 
 
 
 
78
 
79
 
80
  def generate_final_report(state: OverallState):
 
85
 
86
 
87
  def final_output(final_docs):
88
+ docs = [doc for doc in final_docs if doc["themes"] and not doc["failed"]]
89
 
90
+ failed_docs = [
91
+ doc["document"].model_dump()
92
+ for doc in final_docs
93
+ if not doc["themes"] or doc["failed"]
94
+ ]
95
  docs = add_doc_id(docs)
96
 
97
  policy_groups = extract_policies_from_docs(docs)
reports/DOCS/_extensions/nrennie/PrettyPDF/logo.png ADDED
uv.lock CHANGED
@@ -833,18 +833,6 @@ wheels = [
833
  { url = "https://files.pythonhosted.org/packages/7b/8f/c4d9bafc34ad7ad5d8dc16dd1347ee0e507a52c3adb6bfa8887e1c6a26ba/executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa", size = 26702 },
834
  ]
835
 
836
- [[package]]
837
- name = "extra-streamlit-components"
838
- version = "0.1.71"
839
- source = { registry = "https://pypi.org/simple" }
840
- dependencies = [
841
- { name = "streamlit" },
842
- ]
843
- sdist = { url = "https://files.pythonhosted.org/packages/16/a7/580b13af828ef38888196f8b2c03fa97afa89cdb7946438ca5f3271e9a81/extra_streamlit_components-0.1.71.tar.gz", hash = "sha256:d18314cf2ed009f95641882b50aa3bdb11b6a0eb6403fb43dbc8af1722419617", size = 2250093 }
844
- wheels = [
845
- { url = "https://files.pythonhosted.org/packages/25/57/1115e9b974478fac83ba9cd79def8b3770a91b7a9001c46a76491071f2fe/extra_streamlit_components-0.1.71-py3-none-any.whl", hash = "sha256:c8e6f98446adecd3002756362e50d0669693b7673afaa89cebfced6415cc6bd3", size = 4858597 },
846
- ]
847
-
848
  [[package]]
849
  name = "faker"
850
  version = "36.2.2"
@@ -1043,14 +1031,14 @@ wheels = [
1043
 
1044
  [[package]]
1045
  name = "googleapis-common-protos"
1046
- version = "1.69.0"
1047
  source = { registry = "https://pypi.org/simple" }
1048
  dependencies = [
1049
  { name = "protobuf" },
1050
  ]
1051
- sdist = { url = "https://files.pythonhosted.org/packages/c2/92/6bb11dad062ad7cc40665d0a8986193d54f1a0032b510e84e7182df9e661/googleapis_common_protos-1.69.0.tar.gz", hash = "sha256:5a46d58af72846f59009b9c4710425b9af2139555c71837081706b213b298187", size = 61264 }
1052
  wheels = [
1053
- { url = "https://files.pythonhosted.org/packages/1d/66/0025e2b7a2ae353acea03cf9d4a96ae32ef02c116944e2eb11f559cf4b7b/googleapis_common_protos-1.69.0-py2.py3-none-any.whl", hash = "sha256:17835fdc4fa8da1d61cfe2d4d5d57becf7c61d4112f8d81c67eaa9d7ce43042d", size = 169749 },
1054
  ]
1055
 
1056
  [[package]]
@@ -1834,15 +1822,15 @@ wheels = [
1834
 
1835
  [[package]]
1836
  name = "langgraph-checkpoint"
1837
- version = "2.0.16"
1838
  source = { registry = "https://pypi.org/simple" }
1839
  dependencies = [
1840
  { name = "langchain-core" },
1841
  { name = "msgpack" },
1842
  ]
1843
- sdist = { url = "https://files.pythonhosted.org/packages/01/66/5d4a2013a84c511be289bb4a5ef91cbaad28c091b6b366fdb79710a1458b/langgraph_checkpoint-2.0.16.tar.gz", hash = "sha256:49ba8cfa12b2aae845ccc3b1fbd1d7a8d3a6c4a2e387ab3a92fca40dd3d4baa5", size = 34206 }
1844
  wheels = [
1845
- { url = "https://files.pythonhosted.org/packages/7c/63/03bc3dd304ead45b53313cab8727329e1d139a2d220f2d030c72242c860e/langgraph_checkpoint-2.0.16-py3-none-any.whl", hash = "sha256:dfab51076a6eddb5f9e146cfe1b977e3dd6419168b2afa23ff3f4e47973bf06f", size = 38291 },
1846
  ]
1847
 
1848
  [[package]]
@@ -1860,20 +1848,20 @@ wheels = [
1860
 
1861
  [[package]]
1862
  name = "langgraph-sdk"
1863
- version = "0.1.53"
1864
  source = { registry = "https://pypi.org/simple" }
1865
  dependencies = [
1866
  { name = "httpx" },
1867
  { name = "orjson" },
1868
  ]
1869
- sdist = { url = "https://files.pythonhosted.org/packages/39/b2/a261cfbf91a4499396ba0993cf5601076301dd22883d3c0901e905253917/langgraph_sdk-0.1.53.tar.gz", hash = "sha256:12906ed965905fa27e0c28d9fa07dc6fd89e6895ff321ff049fdf3965d057cc4", size = 42369 }
1870
  wheels = [
1871
- { url = "https://files.pythonhosted.org/packages/fc/97/3492a07b454cc74bf49938e83f0a95c608a8bc5c3dda338091d3c66e3ec5/langgraph_sdk-0.1.53-py3-none-any.whl", hash = "sha256:4fab62caad73661ffe4c3ababedcd0d7bfaaba986bee4416b9c28948458a3af5", size = 45441 },
1872
  ]
1873
 
1874
  [[package]]
1875
  name = "langsmith"
1876
- version = "0.3.11"
1877
  source = { registry = "https://pypi.org/simple" }
1878
  dependencies = [
1879
  { name = "httpx" },
@@ -1884,9 +1872,9 @@ dependencies = [
1884
  { name = "requests-toolbelt" },
1885
  { name = "zstandard" },
1886
  ]
1887
- sdist = { url = "https://files.pythonhosted.org/packages/ea/34/c4c0eddad03e00457cd6be1a88c288cd4419da8d368d8f519a29abe5392c/langsmith-0.3.11.tar.gz", hash = "sha256:ddf29d24352e99de79c9618aaf95679214324e146c5d3d9475a7ddd2870018b1", size = 323815 }
1888
  wheels = [
1889
- { url = "https://files.pythonhosted.org/packages/ff/68/514ffa62860202a5a0a3acbf5c05017ef9df38d4437d2cb44a3cf93d617b/langsmith-0.3.11-py3-none-any.whl", hash = "sha256:0cca22737ef07d3b038a437c141deda37e00add56022582680188b681bec095e", size = 335265 },
1890
  ]
1891
 
1892
  [[package]]
@@ -2236,11 +2224,11 @@ wheels = [
2236
 
2237
  [[package]]
2238
  name = "narwhals"
2239
- version = "1.29.0"
2240
  source = { registry = "https://pypi.org/simple" }
2241
- sdist = { url = "https://files.pythonhosted.org/packages/e6/f7/caa23ebc4aed3ef2314441c44e1d842e701adc6af57587ffda9263c03b6e/narwhals-1.29.0.tar.gz", hash = "sha256:1021c345d56c66ff0cc8e6d03ca8c543d01ffc411630973a5cb69ee86824d823", size = 248349 }
2242
  wheels = [
2243
- { url = "https://files.pythonhosted.org/packages/ee/f6/1fcd6b3d0e21d9b75e71ae68fbc92bbb9b9b1f4f33dd81c61d8f53378b30/narwhals-1.29.0-py3-none-any.whl", hash = "sha256:653aa8e5eb435816e7b50c8def17e7e5e3324c2ffd8a3eec03fef85792e9cf5e", size = 305214 },
2244
  ]
2245
 
2246
  [[package]]
@@ -2844,7 +2832,7 @@ wheels = [
2844
 
2845
  [[package]]
2846
  name = "posthog"
2847
- version = "3.18.1"
2848
  source = { registry = "https://pypi.org/simple" }
2849
  dependencies = [
2850
  { name = "backoff" },
@@ -2854,9 +2842,9 @@ dependencies = [
2854
  { name = "requests" },
2855
  { name = "six" },
2856
  ]
2857
- sdist = { url = "https://files.pythonhosted.org/packages/a5/1c/aa6bb26491108e9e350cd7af4d4b0a54d48c755cc76b2c2d90ef2916b8b3/posthog-3.18.1.tar.gz", hash = "sha256:ce115b8422f26c57cd4143499115b741f5683c93d0b5b87bab391579aaef084b", size = 65573 }
2858
  wheels = [
2859
- { url = "https://files.pythonhosted.org/packages/04/c2/407c8cf3edf4fe33b82de3fee11178d083ee0b6e3eb28ff8072caaa85907/posthog-3.18.1-py2.py3-none-any.whl", hash = "sha256:6865104b7cf3a5b13949e2bc2aab9b37b5fbf5f9e045fa55b9eabe21b3850200", size = 76762 },
2860
  ]
2861
 
2862
  [[package]]
@@ -3416,11 +3404,11 @@ wheels = [
3416
 
3417
  [[package]]
3418
  name = "python-json-logger"
3419
- version = "3.2.1"
3420
  source = { registry = "https://pypi.org/simple" }
3421
- sdist = { url = "https://files.pythonhosted.org/packages/e3/c4/358cd13daa1d912ef795010897a483ab2f0b41c9ea1b35235a8b2f7d15a7/python_json_logger-3.2.1.tar.gz", hash = "sha256:8eb0554ea17cb75b05d2848bc14fb02fbdbd9d6972120781b974380bfa162008", size = 16287 }
3422
  wheels = [
3423
- { url = "https://files.pythonhosted.org/packages/4b/72/2f30cf26664fcfa0bd8ec5ee62ec90c03bd485e4a294d92aabc76c5203a5/python_json_logger-3.2.1-py3-none-any.whl", hash = "sha256:cdc17047eb5374bd311e748b42f99d71223f3b0e186f4206cc5d52aefe85b090", size = 14924 },
3424
  ]
3425
 
3426
  [[package]]
@@ -4031,19 +4019,20 @@ wheels = [
4031
 
4032
  [[package]]
4033
  name = "streamlit-authenticator"
4034
- version = "0.4.2"
4035
  source = { registry = "https://pypi.org/simple" }
4036
  dependencies = [
4037
  { name = "bcrypt" },
4038
  { name = "captcha" },
4039
  { name = "cryptography" },
4040
- { name = "extra-streamlit-components" },
4041
  { name = "pyjwt" },
4042
  { name = "pyyaml" },
4043
  { name = "streamlit" },
 
4044
  ]
 
4045
  wheels = [
4046
- { url = "https://files.pythonhosted.org/packages/83/47/837b158e1a5b0d187d20c6be22c46d84d12a8d3e8d7113b67ebb33e221c9/streamlit_authenticator-0.4.2-py3-none-any.whl", hash = "sha256:442acccef6af65e2b0feb15d5e9f68707f204c1d31c60673690d87179c7ca5b2", size = 43197 },
4047
  ]
4048
 
4049
  [[package]]
@@ -4140,6 +4129,18 @@ wheels = [
4140
  { url = "https://files.pythonhosted.org/packages/5e/cf/1dba1380eb3b632f1f86c80533a3fca1376a938517044048122adf816a90/streamlit_image_coordinates-0.1.9-py3-none-any.whl", hash = "sha256:e577d475707ce8a3f7be1825027af6b4d7b609a456f4b25b794756ed2436ab06", size = 7049 },
4141
  ]
4142
 
 
 
 
 
 
 
 
 
 
 
 
 
4143
  [[package]]
4144
  name = "streamlit-keyup"
4145
  version = "0.3.0"
 
833
  { url = "https://files.pythonhosted.org/packages/7b/8f/c4d9bafc34ad7ad5d8dc16dd1347ee0e507a52c3adb6bfa8887e1c6a26ba/executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa", size = 26702 },
834
  ]
835
 
 
 
 
 
 
 
 
 
 
 
 
 
836
  [[package]]
837
  name = "faker"
838
  version = "36.2.2"
 
1031
 
1032
  [[package]]
1033
  name = "googleapis-common-protos"
1034
+ version = "1.69.1"
1035
  source = { registry = "https://pypi.org/simple" }
1036
  dependencies = [
1037
  { name = "protobuf" },
1038
  ]
1039
+ sdist = { url = "https://files.pythonhosted.org/packages/41/4f/d8be74b88621131dfd1ed70e5aff2c47f2bdf2289a70736bbf3eb0e7bc70/googleapis_common_protos-1.69.1.tar.gz", hash = "sha256:e20d2d8dda87da6fe7340afbbdf4f0bcb4c8fae7e6cadf55926c31f946b0b9b1", size = 144514 }
1040
  wheels = [
1041
+ { url = "https://files.pythonhosted.org/packages/16/cb/2f4aa605b16df1e031dd7c322c597613eef933e8dd5b6a4414330b21e791/googleapis_common_protos-1.69.1-py2.py3-none-any.whl", hash = "sha256:4077f27a6900d5946ee5a369fab9c8ded4c0ef1c6e880458ea2f70c14f7b70d5", size = 293229 },
1042
  ]
1043
 
1044
  [[package]]
 
1822
 
1823
  [[package]]
1824
  name = "langgraph-checkpoint"
1825
+ version = "2.0.17"
1826
  source = { registry = "https://pypi.org/simple" }
1827
  dependencies = [
1828
  { name = "langchain-core" },
1829
  { name = "msgpack" },
1830
  ]
1831
+ sdist = { url = "https://files.pythonhosted.org/packages/90/92/f0d6c3e2b2e131c687a9ec87c6e1a430287c430160038e8dfaa4d0db9aab/langgraph_checkpoint-2.0.17.tar.gz", hash = "sha256:255c249f03369c41252f888bc1e1e481bf4fdecf6b3854a39e4935dc34152bc0", size = 34932 }
1832
  wheels = [
1833
+ { url = "https://files.pythonhosted.org/packages/4d/61/35faa34145ddaffc70eb851b85561c96bbad4718ea6f34ef3c717e748c15/langgraph_checkpoint-2.0.17-py3-none-any.whl", hash = "sha256:7da9cd9af41bda5074afef0dcdbd6fa9a050f68beed9d7f80593a253412bf020", size = 39103 },
1834
  ]
1835
 
1836
  [[package]]
 
1848
 
1849
  [[package]]
1850
  name = "langgraph-sdk"
1851
+ version = "0.1.55"
1852
  source = { registry = "https://pypi.org/simple" }
1853
  dependencies = [
1854
  { name = "httpx" },
1855
  { name = "orjson" },
1856
  ]
1857
+ sdist = { url = "https://files.pythonhosted.org/packages/7a/6c/8286151a21124dc0189b57495541c2e3cace317056f60feb04076b438f82/langgraph_sdk-0.1.55.tar.gz", hash = "sha256:89a0240157a27822cc4edd1c9e72bc852e20f5c71165a4c9b91eeffa11fd6a6b", size = 42690 }
1858
  wheels = [
1859
+ { url = "https://files.pythonhosted.org/packages/4e/64/4b75f4b57f0c8f39bdb43aa74b1d2edcdb604b5baa58465ccc54b8b906c5/langgraph_sdk-0.1.55-py3-none-any.whl", hash = "sha256:266e92a558eb738da1ef04c29fbfc2157cd3a977b80905d9509a2cb79331f8fc", size = 45785 },
1860
  ]
1861
 
1862
  [[package]]
1863
  name = "langsmith"
1864
+ version = "0.3.12"
1865
  source = { registry = "https://pypi.org/simple" }
1866
  dependencies = [
1867
  { name = "httpx" },
 
1872
  { name = "requests-toolbelt" },
1873
  { name = "zstandard" },
1874
  ]
1875
+ sdist = { url = "https://files.pythonhosted.org/packages/48/32/0ef5ad579ae096f40fc108b6920e742267a0e9c07d778c1d381586616715/langsmith-0.3.12.tar.gz", hash = "sha256:045b49d0401d0e985d025ff0cf69743ab9a429e309ce5d533eab3c774d004bc2", size = 324149 }
1876
  wheels = [
1877
+ { url = "https://files.pythonhosted.org/packages/9a/92/9702c45974c4dbea978f8af1cfb077677b96f98df12b1638be8eff5ae5ff/langsmith-0.3.12-py3-none-any.whl", hash = "sha256:cf7926bd12d56adbd74a294ebbfc5a34c413172bfbdcd763175cc472b45afbea", size = 335663 },
1878
  ]
1879
 
1880
  [[package]]
 
2224
 
2225
  [[package]]
2226
  name = "narwhals"
2227
+ version = "1.29.1"
2228
  source = { registry = "https://pypi.org/simple" }
2229
+ sdist = { url = "https://files.pythonhosted.org/packages/a7/17/7d35094da0820ae941d8ce51842f253da36c6f95360ea0afabfc18bc02c6/narwhals-1.29.1.tar.gz", hash = "sha256:c408acf09e90c116f247cf34f24a3a89d147e3e235b1d3c708cfd1960baf320a", size = 251464 }
2230
  wheels = [
2231
+ { url = "https://files.pythonhosted.org/packages/f1/22/380df533b08a57bc9013bb5714f33c571e1447828d83213a66adaefc0a04/narwhals-1.29.1-py3-none-any.whl", hash = "sha256:2f68cfbb2562672c4dfa54f158ed8c2828e9920ef784981cd9114e419c444216", size = 308220 },
2232
  ]
2233
 
2234
  [[package]]
 
2832
 
2833
  [[package]]
2834
  name = "posthog"
2835
+ version = "3.19.0"
2836
  source = { registry = "https://pypi.org/simple" }
2837
  dependencies = [
2838
  { name = "backoff" },
 
2842
  { name = "requests" },
2843
  { name = "six" },
2844
  ]
2845
+ sdist = { url = "https://files.pythonhosted.org/packages/d4/fe/5f785ed1514caa4687738e112716904d4fce8752068f9cad2afaa8207b05/posthog-3.19.0.tar.gz", hash = "sha256:7fe5c9e494fc2cca9baa2bd8074c0844d572df46a54378101bc20eec2776027e", size = 66172 }
2846
  wheels = [
2847
+ { url = "https://files.pythonhosted.org/packages/5c/28/57f95743a17af817ea56f0f3aa280af677884f0d6b59c8edf4c30fc5ac2e/posthog-3.19.0-py2.py3-none-any.whl", hash = "sha256:c294bc0a939e21ecf88d625496f8073cc566c28ec2a917a47d5d32ba33e90a7f", size = 77800 },
2848
  ]
2849
 
2850
  [[package]]
 
3404
 
3405
  [[package]]
3406
  name = "python-json-logger"
3407
+ version = "3.3.0"
3408
  source = { registry = "https://pypi.org/simple" }
3409
+ sdist = { url = "https://files.pythonhosted.org/packages/9e/de/d3144a0bceede957f961e975f3752760fbe390d57fbe194baf709d8f1f7b/python_json_logger-3.3.0.tar.gz", hash = "sha256:12b7e74b17775e7d565129296105bbe3910842d9d0eb083fc83a6a617aa8df84", size = 16642 }
3410
  wheels = [
3411
+ { url = "https://files.pythonhosted.org/packages/08/20/0f2523b9e50a8052bc6a8b732dfc8568abbdc42010aef03a2d750bdab3b2/python_json_logger-3.3.0-py3-none-any.whl", hash = "sha256:dd980fae8cffb24c13caf6e158d3d61c0d6d22342f932cb6e9deedab3d35eec7", size = 15163 },
3412
  ]
3413
 
3414
  [[package]]
 
4019
 
4020
  [[package]]
4021
  name = "streamlit-authenticator"
4022
+ version = "0.4.3"
4023
  source = { registry = "https://pypi.org/simple" }
4024
  dependencies = [
4025
  { name = "bcrypt" },
4026
  { name = "captcha" },
4027
  { name = "cryptography" },
 
4028
  { name = "pyjwt" },
4029
  { name = "pyyaml" },
4030
  { name = "streamlit" },
4031
+ { name = "streamlit-javascript" },
4032
  ]
4033
+ sdist = { url = "https://files.pythonhosted.org/packages/bf/6d/7c59285447bdb18c03e37888f8d1f593e241f4af275c36e46fddced8590a/streamlit_authenticator-0.4.3.tar.gz", hash = "sha256:d05dd8656bdff0b4881b40c36108bf068e7aad8424c20cf87c9e657aac8d4a92", size = 39752 }
4034
  wheels = [
4035
+ { url = "https://files.pythonhosted.org/packages/ed/5a/039d6303526306740d214a8edd360a465a1710491b3cb0284799bbd7131c/streamlit_authenticator-0.4.3-py3-none-any.whl", hash = "sha256:1f7347d480cf71c76bb915cff0a62516d087115ad1e72f99708b944705baf78d", size = 43196 },
4036
  ]
4037
 
4038
  [[package]]
 
4129
  { url = "https://files.pythonhosted.org/packages/5e/cf/1dba1380eb3b632f1f86c80533a3fca1376a938517044048122adf816a90/streamlit_image_coordinates-0.1.9-py3-none-any.whl", hash = "sha256:e577d475707ce8a3f7be1825027af6b4d7b609a456f4b25b794756ed2436ab06", size = 7049 },
4130
  ]
4131
 
4132
+ [[package]]
4133
+ name = "streamlit-javascript"
4134
+ version = "0.1.5"
4135
+ source = { registry = "https://pypi.org/simple" }
4136
+ dependencies = [
4137
+ { name = "streamlit" },
4138
+ ]
4139
+ sdist = { url = "https://files.pythonhosted.org/packages/a5/69/723ebc7a58057b1e6f54d0c23c86488054d756e0e470daf4db703fe02a63/streamlit-javascript-0.1.5.tar.gz", hash = "sha256:9da5176522a0acf2c39d3b0bec9f856fdd8ea3c70bb1066841a546ab1348ae1d", size = 512374 }
4140
+ wheels = [
4141
+ { url = "https://files.pythonhosted.org/packages/87/81/0c9e9e4d2dab97224efe105ba44f47b259f37d8e7673b94f5b5523fe2c8f/streamlit_javascript-0.1.5-py3-none-any.whl", hash = "sha256:36ca4d8c46fd5b6526d1a705530472e03cb6e5bb24694330649f21d5c436d280", size = 518387 },
4142
+ ]
4143
+
4144
  [[package]]
4145
  name = "streamlit-keyup"
4146
  version = "0.3.0"