Ryan2219 commited on
Commit
44a7676
·
verified ·
1 Parent(s): b5bb6b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1157 -1155
app.py CHANGED
@@ -1,1156 +1,1158 @@
1
- import gradio as gr
2
- import json
3
- import os
4
- import re
5
- from google import genai
6
- from google.genai import types
7
- import chromadb
8
- from chromadb.utils import embedding_functions
9
- from collections import Counter
10
- import base64
11
- import io
12
- from PIL import Image
13
- import matplotlib.pyplot as plt
14
- import openai
15
- from datetime import datetime
16
- import threading
17
-
18
- os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
19
- os.environ["GEMINI_API_KEY"] = os.getenv("GEMINI_API_KEY")
20
-
21
- # Global state for the interface
22
- class InterfaceState:
23
- def __init__(self):
24
- self.log_messages = []
25
- self.analysis_messages = []
26
- self.current_chapter = ""
27
- self.current_images = []
28
- self.staged_audit_images = []
29
- self.final_answer = ""
30
- self.done = False
31
- self.lock = threading.Lock()
32
-
33
- def add_log(self, message):
34
- timestamp = datetime.now().strftime("%H:%M:%S")
35
- with self.lock:
36
- self.log_messages.append(f"**[{timestamp}]** {message}")
37
- return "\n\n".join(self.log_messages)
38
-
39
- def add_analysis(self, message):
40
- timestamp = datetime.now().strftime("%H:%M:%S")
41
- with self.lock:
42
- self.analysis_messages.append(f"**[{timestamp}]** {message}")
43
- return "\n\n".join(self.analysis_messages)
44
-
45
- def set_chapter(self, chapter_text):
46
- with self.lock:
47
- self.current_chapter = chapter_text
48
- return chapter_text
49
-
50
- def add_image(self, img_pil):
51
- with self.lock:
52
- self.current_images.append(img_pil)
53
- return self.current_images.copy()
54
-
55
- def add_staged_image_part(self, image_part):
56
- """Thread-safe method to stage images for the Gemini Audit."""
57
- with self.lock:
58
- self.staged_audit_images.append(image_part)
59
- # Log it so we can verify it happened in the console
60
- print(f"DEBUG: Staged image part. Total staged: {len(self.staged_audit_images)}")
61
-
62
- def get_staged_images(self):
63
- """Safely retrieve the staged images for the audit turn."""
64
- with self.lock:
65
- return list(self.staged_audit_images) # Return a copy to prevent mutation
66
-
67
- def clear(self):
68
- with self.lock:
69
- self.log_messages.clear()
70
- self.analysis_messages.clear()
71
- self.current_chapter = ""
72
- self.current_images.clear()
73
- self.final_answer = ""
74
- self.done = False
75
-
76
-
77
-
78
- state = InterfaceState()
79
-
80
- # Load your data (same as original)
81
- with open('Preprocessed Files/page_metadata.json', 'r') as json_file:
82
- page_metadata = json.load(json_file)
83
- page_metadata = {int(k): v for k, v in page_metadata.items()}
84
-
85
- with open('Preprocessed Files/text_list.json', 'r') as json_file:
86
- text_list = json.load(json_file)
87
-
88
- with open('Preprocessed Files/tile_metadata.json', 'r') as json_file:
89
- tile_metadata = json.load(json_file)
90
- tile_metadata = {
91
- int(outer_k): {
92
- int(inner_k): inner_v
93
- for inner_k, inner_v in outer_v.items()
94
- }
95
- for outer_k, outer_v in tile_metadata.items()
96
- }
97
-
98
- def load_fullpage_images(folder="Images"):
99
- files = os.listdir(folder)
100
- page_files = []
101
- for f in files:
102
- match = re.search(r"page_(\d+)_fullpage\.png", f)
103
- if match:
104
- page_num = int(match.group(1))
105
- page_files.append((page_num, f))
106
- page_files.sort(key=lambda x: x[0])
107
- image_bytes_list = []
108
- for page_num, filename in page_files:
109
- path = os.path.join(folder, filename)
110
- with open(path, "rb") as f:
111
- img_bytes = f.read()
112
- image_bytes_list.append(img_bytes)
113
- return image_bytes_list
114
-
115
- def load_tile_images(page):
116
- files = os.listdir('Tiles')
117
- page_files = []
118
- for f in files:
119
- match = re.search(f"page_{page}_tile_(\d+)\.png", f)
120
- if match:
121
- page_num = int(match.group(1))
122
- page_files.append((page_num, f))
123
- page_files.sort(key=lambda x: x[0])
124
- image_bytes_list = []
125
- for page_num, filename in page_files:
126
- path = os.path.join('Tiles', filename)
127
- with open(path, "rb") as f:
128
- img_bytes = f.read()
129
- image_bytes_list.append(img_bytes)
130
- return image_bytes_list
131
-
132
- image_bytes_list = load_fullpage_images()
133
-
134
- tile_bytes = {}
135
- for page in range(44):
136
- tile_list = load_tile_images(page)
137
- if tile_list:
138
- tile_bytes[page] = load_tile_images(page)
139
-
140
- # Vector Code Base
141
- chroma_client = chromadb.PersistentClient(path="nyc_code_db")
142
- embedding_model = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
143
- collection = chroma_client.get_collection(name="nyc_building_codes", embedding_function=embedding_model)
144
-
145
- all_pending_images = []
146
-
147
- # Modified tool functions with Gradio updates
148
- def search_page_text(page_number: int, research_goal: str):
149
- state.add_log(f'🔍 Searching page **{page_metadata[page_number]["sheet_title"]}** for details')
150
-
151
- state.add_analysis(
152
- f'🔍 Searching page {page_metadata[page_number]["sheet_title"]} with prompt\n{research_goal}'
153
- )
154
-
155
- raw_text = text_list[page_number]
156
-
157
- client = openai.OpenAI()
158
- response = client.chat.completions.create(
159
- model="gpt-5-mini",
160
- messages=[
161
- {"role": "system", "content": """
162
- You are a Fast NYC Plans Examiner Signal Agent.
163
-
164
- Your ONLY job is to extract **code-relevant signals** from the OCR text of a SINGLE drawing page.
165
- You do NOT interpret the law and you do NOT summarize design intent.
166
-
167
- Your output will be used to CONSTRAIN a downstream legal research agent.
168
-
169
- ========================
170
- WHAT TO EXTRACT
171
- ========================
172
- Look only for information that determines which parts of the NYC Code apply such as:
173
-
174
- - Occupancy classification (e.g., R-2, A-3, M, S, F, mixed-use)
175
- - Building height (stories, feet, high-rise indicators)
176
- - Construction type (I, II, III, IV, V)
177
- - Fire protection systems (sprinklers, standpipes, fire alarm, smoke control)
178
- - Means of egress references (stairs, exits, exit access, doors, corridors)
179
- - Structural system hints (steel, concrete, load-bearing walls, columns, transfer girders)
180
- - Mechanical / fuel / plumbing system mentions (boilers, gas piping, HVAC type, shafts)
181
- - Zoning or special district references (if present)
182
- - Scope flags (new building, alteration, addition, change of occupancy, retrofit)
183
-
184
- However only return relevant signals to the provided research goal.
185
-
186
- ========================
187
- OUTPUT FORMAT (STRICT MARKDOWN)
188
- ========================
189
- Return ONLY the following sections:
190
-
191
- ### Code-Relevant Signals
192
- - Bullet list of extracted facts
193
-
194
- ### Likely Governing Code Domains
195
- - One-line list chosen from: Administrative, Building, Mechanical, FuelGas, Plumbing, Fire
196
-
197
- ### Text Evidence
198
- - Short quoted snippets from the page that support each signal
199
-
200
- ========================
201
- RULES
202
- ========================
203
- - Do NOT speculate
204
- - If a signal is not present, omit it
205
- - Prefer exact phrases over paraphrase
206
- - Keep total length under 500 words
207
- - No legal conclusions, no compliance advice
208
- """},
209
- {"role": "user", "content": f"PAGE TEXT:\n{raw_text}\n\nRESEARCH GOAL: {research_goal}\n\nReturn a breif but comprehensive Markdown summary of your findings and justification with text snippets."}
210
- ]
211
- )
212
-
213
- analysis_text = response.choices[0].message.content
214
-
215
- state.add_analysis(
216
- f"🟦 Text Analyst (Page {page_number})\n{analysis_text}"
217
- )
218
-
219
- return {
220
- "page": page_number,
221
- "summary": analysis_text
222
- }
223
-
224
- def discover_code_locations(query: str):
225
- state.add_log(f'📚 Searching NYC Code for: **{query}**')
226
-
227
- results = collection.query(
228
- query_texts=[query],
229
- n_results=25,
230
- include=["metadatas", "documents"]
231
- )
232
-
233
- if not results['metadatas'][0]:
234
- return "No results found. Try a different technical keyword."
235
-
236
- metas = results['metadatas'][0]
237
- docs = results['documents'][0]
238
-
239
- category_chapter_pairs = [f"{m['code_type']} | Ch. {m['parent_major']}" for m in metas]
240
- counts = Counter(category_chapter_pairs)
241
- chapter_summary = "\n".join([f"- {pair} ({count} hits)" for pair, count in counts.most_common(5)])
242
-
243
- section_reports = []
244
- for m, doc in zip(metas, docs):
245
- report = (
246
- f"ID: {m['section_full']} | Code: {m['code_type']} | Chapter: {m['parent_major']}\n"
247
- f"Snippet: {doc}"
248
- )
249
- section_reports.append(report)
250
-
251
- output = (
252
- "### CODE DISCOVERY REPORT ###\n"
253
- f"MOST RELEVANT CHAPTERS:\n{chapter_summary}\n\n"
254
- "TOP RELEVANT SECTIONS:\n" +
255
- "\n---\n".join(section_reports)
256
- )
257
-
258
- return output
259
-
260
- def fetch_full_chapter(code_type: str, chapter_id: str):
261
- state.add_log(f'📖 Fetching Chapter **{chapter_id}** from **{code_type}** code')
262
-
263
- try:
264
- chapter_data = collection.get(
265
- where={
266
- "$and": [
267
- {"code_type": {"$eq": code_type}},
268
- {"parent_major": {"$eq": chapter_id}}
269
- ]
270
- },
271
- include=["documents", "metadatas"]
272
- )
273
-
274
- if not chapter_data['documents']:
275
- return f"No documentation found for {code_type} Chapter {chapter_id}."
276
-
277
- sections = sorted(zip(chapter_data['metadatas'], chapter_data['documents']),
278
- key=lambda x: x[0]['section_full'])
279
-
280
- full_text = f"## FULL LEGAL TEXT: {code_type.upper()} CODE - CHAPTER {chapter_id}\n\n"
281
-
282
- for meta, doc in sections:
283
- blocks = doc.split("[CONT.]:")
284
- unique_blocks = []
285
- for b in blocks:
286
- clean_b = b.strip()
287
- if clean_b and clean_b not in unique_blocks:
288
- unique_blocks.append(clean_b)
289
-
290
- clean_doc = " ".join(unique_blocks)
291
- full_text += f"### SECTION {meta['section_full']}\n{clean_doc}\n\n---\n\n"
292
-
293
- # Update the chapter display
294
- state.set_chapter(full_text)
295
-
296
- return full_text
297
-
298
- except Exception as e:
299
- return f"Error retrieving chapter content: {str(e)}"
300
-
301
- def nyc_legal_sub_agent(research_goal: str):
302
- state.add_log(f'⚖️ Investigating NYC Code for: **{research_goal}**')
303
-
304
- state.add_analysis(
305
- f"⚖️ Legal Analyst is searching\n{research_goal}"
306
- )
307
-
308
- client = openai.OpenAI()
309
-
310
- internal_tools = [
311
- {
312
- "type": "function",
313
- "function": {
314
- "name": "discover_code_locations",
315
- "description": "Scans NYC code in a semantic vector database. Use this FIRST to find which chapters/sections are relevant.",
316
- "parameters": {
317
- "type": "object",
318
- "properties": {
319
- "query": {"type": "string", "description": "semantic search string for a vector database (Not a keyword search use a full sentence)"}
320
- },
321
- "required": ["query"]
322
- }
323
- }
324
- },
325
- {
326
- "type": "function",
327
- "function": {
328
- "name": "fetch_full_chapter",
329
- "description": "Retrieves the full legal text of a specific chapter for deep analysis.",
330
- "parameters": {
331
- "type": "object",
332
- "properties": {
333
- "code_type": {
334
- "type": "string",
335
- "enum": ["Administrative", "Building", "FuelGas", "Mechanical", "Plumbing"],
336
- "description": "The specific NYC code volume to search."
337
- },
338
- "chapter_id": {"type": "string", "description": "The chapter number string"}
339
- },
340
- "required": ["code_type", "chapter_id"]
341
- }
342
- }
343
- }
344
- ]
345
-
346
- messages = [
347
- {"role": "system", "content": """
348
- You are a Senior NYC Building Code Consultant and Legal Research Agent.
349
-
350
- Your task is to produce a **definitive, citation-backed legal report** that can be used directly by a downstream orchestration agent.
351
- Accuracy, traceability, and completeness matter more than brevity.
352
-
353
- ========================
354
- PRIMARY OBJECTIVE
355
- ========================
356
- Given a research goal, identify and analyze relevant NYC Code provisions, including:
357
- - Governing sections
358
- - Exceptions
359
- - Cross-references
360
- - Related chapters that modify, limit, or expand the rule
361
-
362
- Every legal claim MUST be supported by a specific code citation.
363
-
364
- You are operating in FAST LEGAL MODE.
365
-
366
- SEARCH BUDGET:
367
- - Maximum of 2 calls to `discover_code_locations`
368
- - Maximum of 2 calls to `fetch_full_chapter`
369
-
370
- STOP CONDITIONS:
371
- - If the first chapter fetch contains governing text AND exceptions, STOP and synthesize.
372
- - Only fetch a second chapter if the first chapter explicitly cross-references another chapter.
373
-
374
- PRIORITY ORDER:
375
- 1) Governing rule section
376
- 2) Exceptions
377
- 3) Cross-references that MODIFY the rule
378
- Ignore definitions and administrative content unless directly referenced.
379
-
380
- GOOD ENOUGH STANDARD:
381
- If you can identify:
382
- - The governing section
383
- - At least one exception or limitation
384
- You must STOP and report.
385
-
386
- ========================
387
- TOOL STRATEGY (MANDATORY)
388
- ========================
389
- This is a semantic vector database, NOT a keyword index. Always search in full English questions.
390
-
391
- 1) FIRST — Call `discover_code_locations`
392
- - Use a natural-language query describing the legal requirement you are trying to find
393
- - Example: "What NYC Building Code sections regulate emergency egress width in residential buildings"
394
- NEVER use a keyword search thi will not work you are searching a vector database.
395
- If you know what chaoter you need call the fetch_full_chapter tool instead.
396
- If you perform TWO consecutive `discover_code_locations` calls
397
- and both return no new relevant chapters or sections:
398
-
399
- You MUST stop searching and do one of the following:
400
- - Conclude that the table/section does NOT exist as a standalone provision in the NYC Code corpus, OR
401
- - Conclude that the requirement is embedded within the previously retrieved sections
402
-
403
- Then proceed to report findings using the closest governing section.
404
-
405
- DO NOT continue reformulating the same query.
406
- You MUST NOT call `discover_code_locations` more than once for the same legal concept.
407
- If a new query is semantically similar to a prior query, STOP and move forward with analysis.
408
-
409
- 2) SECOND — Call `fetch_full_chapter`
410
- - If multiple relevant sections appear in the same chapter
411
- - OR if a section contains exceptions, references, or conditional language
412
- - OR if you know what section of the code is relevant and want to see a full chapter
413
-
414
- 3) THIRD — Follow Cross-References
415
- - If a section says "See Section X", "As required by Chapter Y", or "Except as permitted in..."
416
- - You MUST search and retrieve those sections as well
417
-
418
- 4) STOP ONLY WHEN
419
- - All exceptions are reviewed
420
- - All cross-references are resolved
421
- - No additional modifying sections remain
422
-
423
- ========================
424
- OUTPUT FORMAT (STRICT)
425
- ========================
426
- Return a structured legal report in the following format:
427
-
428
- ### Legal Summary
429
- Brief, plain-language explanation of what the code requires.
430
-
431
- ### Governing Code Sections
432
- - **[Code Type] §[Section Number] — [Title]**
433
- - Summary:
434
- - Key Requirements:
435
- - Applicability Conditions:
436
- - Exceptions:
437
-
438
- ### Cross-References Analyzed
439
- - **§[Section Number] — [Title]**
440
- - Why It Matters:
441
- - Impact on Main Rule:
442
-
443
- ### Edge Cases & Enforcement Notes
444
- - Special conditions (building type, occupancy class, height, system type, jurisdictional notes)
445
- - Common misinterpretations
446
- - DOB or FDNY enforcement implications (if relevant)
447
-
448
- ### Compliance Checklist
449
- - Bullet list of actionable compliance steps derived from the code
450
-
451
- ========================
452
- QUALITY RULES
453
- ========================
454
- - NEVER summarize without citing
455
- - NEVER assume jurisdiction, building type, or occupancy unless the code explicitly states it
456
- - If legal text is ambiguous, flag it as **Interpretive**
457
- - Prefer quoting short legal phrases when clarity matters
458
-
459
- ========================
460
- TONE
461
- ========================
462
- Professional. Precise. Legal-research quality. No speculation.
463
- """},
464
- {"role": "user", "content": f"Analyze the NYC building code with this goal: {research_goal}"}
465
- ]
466
-
467
- for _ in range(20):
468
- response = client.chat.completions.create(
469
- model="gpt-5-mini",
470
- messages=messages,
471
- tools=internal_tools,
472
- tool_choice="auto"
473
- )
474
-
475
- msg = response.choices[0].message
476
- messages.append(msg)
477
-
478
- if not msg.tool_calls:
479
-
480
- break
481
-
482
- for tool_call in msg.tool_calls:
483
- func_name = tool_call.function.name
484
- args = json.loads(tool_call.function.arguments)
485
-
486
- if func_name == "discover_code_locations":
487
- result = discover_code_locations(args['query'])
488
- elif func_name == "fetch_full_chapter":
489
- result = fetch_full_chapter(args['code_type'], args['chapter_id'])
490
-
491
- messages.append({
492
- "role": "tool",
493
- "tool_call_id": tool_call.id,
494
- "content": result
495
- })
496
-
497
- state.add_analysis(
498
- f"🟨 Legal Analyst\n{msg.content}"
499
- )
500
-
501
- return msg.content
502
-
503
- def merge_tiles(tile_indexes: list[int], page_num: int):
504
- state.add_log(f'🔬 Stitching tiles **{tile_indexes}** from page **{page_num}**')
505
-
506
- images = []
507
- positions = []
508
-
509
- tiles = tile_bytes[page_num]
510
- tiles_coords_dict = tile_metadata[page_num]
511
-
512
- for index in tile_indexes:
513
- if index < 0 or index >= len(tiles):
514
- raise ValueError(f"Tile index {index} out of range")
515
-
516
- img_bytes = tiles[index]
517
- if img_bytes is None:
518
- raise ValueError(f"No image bytes found for tile {index}")
519
-
520
- img = Image.open(io.BytesIO(img_bytes)).convert('RGBA')
521
- images.append(img)
522
-
523
- x = tiles_coords_dict[index]['coords'][0]
524
- y = tiles_coords_dict[index]['coords'][1]
525
- positions.append((x, y))
526
-
527
- if not images:
528
- return None
529
-
530
- min_x = min(x for x, y in positions)
531
- min_y = min(y for x, y in positions)
532
- normalized_positions = [(x - min_x, y - min_y) for x, y in positions]
533
-
534
- total_width = max(pos[0] + img.width for pos, img in zip(normalized_positions, images))
535
- total_height = max(pos[1] + img.height for pos, img in zip(normalized_positions, images))
536
-
537
- stitched_image = Image.new('RGB', (total_width, total_height), (255, 255, 255))
538
-
539
- for img, pos in zip(images, normalized_positions):
540
- stitched_image.paste(img, pos)
541
-
542
- # Add to image gallery
543
-
544
- output_buffer = io.BytesIO()
545
- stitched_image.save(output_buffer, format='PNG')
546
- stitched_bytes = output_buffer.getvalue()
547
-
548
- return stitched_bytes
549
-
550
- def extract_json(s: str):
551
- s = s.strip()
552
- start = s.find("{")
553
- end = s.rfind("}")
554
- if start == -1 or end == -1 or end < start:
555
- raise ValueError("No JSON object found in model output:\n" + repr(s))
556
- json_str = s[start:end+1]
557
- return json.loads(json_str)
558
-
559
- def sanitize_tile_indices(data):
560
- """
561
- Forcefully converts various LLM outputs into a clean list of integers.
562
- Handles: [1, 2], ["1", "2"], "1, 2, 3", "[1, 2, 3]", and None.
563
- """
564
- if not data:
565
- return []
566
-
567
- # If it's already a list, ensure all elements are integers
568
- if isinstance(data, list):
569
- clean_list = []
570
- for item in data:
571
- try:
572
- # This handles strings inside the list like ["1", "2"]
573
- clean_list.append(int(str(item).strip()))
574
- except (ValueError, TypeError):
575
- continue
576
- return clean_list
577
-
578
- # If it's a string, use Regex to find all sequences of digits
579
- if isinstance(data, str):
580
- # findall returns all non-overlapping matches of the pattern
581
- numbers = re.findall(r'\d+', data)
582
- return [int(n) for n in numbers]
583
-
584
- return []
585
-
586
- def execute_page_expert(expert_instructions: str, page_num: int):
587
- state.add_log(f'👁️ Spawning Page Expert for page **{page_num}**')
588
- state.add_analysis(f"👁️ Page Expert searching for {expert_instructions}")
589
- state.add_log(f'📄 Attaching full-page context for page **{page_num}**')
590
- state.add_analysis(
591
- f"📄 Full-page context attached for page `{page_num}`"
592
- )
593
-
594
- full_page_img = Image.open(
595
- io.BytesIO(image_bytes_list[page_num])
596
- )
597
- state.add_image(full_page_img)
598
-
599
- client = openai.OpenAI()
600
-
601
- tools = [
602
- {
603
- "type": "function",
604
- "function": {
605
- "name": "merge_tiles",
606
- "description": "Stitches high-resolution image tiles together into a single zoomed-in view. Use this to read small text, dimensions, or symbols.",
607
- "parameters": {
608
- "type": "object",
609
- "properties": {
610
- "tile_indexes": {
611
- "type": "array",
612
- "items": {"type": "integer"},
613
- "description": "A list of integer tile IDs from the Grid Map to stitch together."
614
- }
615
- },
616
- "required": ["tile_indexes"]
617
- }
618
- }
619
- }
620
- ]
621
-
622
- page_text = text_list[page_num]
623
- relevant_tile_meta = tile_metadata[page_num]
624
- b64_full_page = base64.b64encode(image_bytes_list[page_num]).decode()
625
-
626
- system_prompt = """
627
- You are a Lead AEC Visual Investigator supporting a Compliance Planner.
628
-
629
- Your mission is to extract **verifiable, high-fidelity evidence** from this drawing page.
630
- You must ground every claim in either:
631
- - a **Zoomed Tile Image** (via `merge_tiles`) or
632
- - a **Direct Text Quote** from the OCR page text.
633
-
634
- Guesses, assumptions, and general descriptions are not allowed.
635
-
636
- ========================
637
- MANDATORY WORKFLOW
638
- ========================
639
- 1) ORIENT
640
- - Review the full-page image and the Grid Map to identify candidate regions.
641
- - Decide which tiles likely contain the required evidence. Utilize the tile metadata to assist with this tasl.
642
-
643
- 2) ZOOM (REQUIRED)
644
- - You MUST call `merge_tiles(tile_indexes=[...])` before making ANY factual claim about symbols, dimensions, labels, or locations.
645
- - Always request ALL tiles needed in a SINGLE call.
646
- - If the first zoom is insufficient, call again with additional tiles.
647
- - Call the zoom until you have found all relevant tiles, refer to the tile metadata to assist in your search.
648
-
649
- 3) VERIFY
650
- - Read the zoomed image carefully.
651
- - Extract exact values, tags, room names, and directional cues.
652
-
653
- 4) REPORT
654
- - Return the Findings Packet in strict JSON format.
655
-
656
- ========================
657
- WHAT COUNTS AS PROOF
658
- ========================
659
- - Dimension values (e.g., “36\"”, “1 HR RATED”)
660
- - Explicit labels (e.g., “EXIT”, “STAIR A”, “R-2”, “COLUMN C3”)
661
- - Symbol legends that define a mark
662
- - Path continuity that can be visually traced across tiles
663
- - OCR text snippets
664
-
665
- ========================
666
- FINDINGS RULES
667
- ========================
668
- - Every bullet in `findings` MUST cite either:
669
- - `[Tile <ID>]` or
670
- - `"Quoted text"`
671
- - If a claim cannot be verified from the zoomed tiles or text, mark it as **Unverified**.
672
- - Be comprehensive in this report, your supervisor only has access to the report you give in findings, not the full page text or other image data you have.
673
- - Do NOT repeat planner instructions — only report what you observe.
674
-
675
- ========================
676
- VISUAL POINTERS RULES
677
- ========================
678
- - Exclude orientation-only or whitespace tiles.
679
- - Include ALL tiles needed to re-trace a path or confirm a relationship.
680
- - **Your superviser will ONLY see the tiles that you reference here, be comprehensive when returning these tiles.**
681
-
682
- ========================
683
- FULL PAGE USEFULNESS
684
- ========================
685
- Set `true` ONLY if the finding requires spatial context across the entire page, or if your zoom is missing information.
686
- (e.g., tracing egress path, riser continuity, system routing).
687
- Otherwise set `false`.
688
-
689
- ========================
690
- JSON FORMAT (STRICT)
691
- ========================
692
- {
693
- "findings": "<markdown string with bullet points and citations>",
694
- "visual_pointers": [list of <int>],
695
- "textual_evidence": ["<exact quotes from PAGE TEXT>"],
696
- "full_page_usefulness": <true|false>,
697
- "limitations": "<what could not be verified and why>"
698
- }
699
-
700
- ========================
701
- FAILURE CONDITIONS
702
- ========================
703
- - If no relevant evidence exists on this page, return:
704
- {
705
- "findings": "No relevant technical evidence found for the planner's instruction.",
706
- "visual_pointers": [],
707
- "textual_evidence": [],
708
- "full_page_usefulness": false,
709
- "limitations": "This page does not contain the requested information or it is not legible at available resolution."
710
- }
711
-
712
- Return ONLY valid JSON.
713
- """
714
-
715
- messages = [
716
- {"role": "system", "content": system_prompt},
717
- {
718
- "role": "user",
719
- "content": [
720
- {"type": "text", "text": f"Planner Instruction:\n{expert_instructions}"},
721
- {"type": "text", "text": f"Page Context:\n{page_text}"},
722
- {"type": "text", "text": f"Available Grid Map:\n{json.dumps(relevant_tile_meta)}"},
723
- {
724
- "type": "image_url",
725
- "image_url": {
726
- "url": f"data:image/png;base64,{b64_full_page}"
727
- }
728
- }
729
- ]
730
- }
731
- ]
732
-
733
- MAX_TURNS = 3
734
-
735
- for turn in range(MAX_TURNS):
736
- response = client.chat.completions.create(
737
- model="gpt-4o",
738
- messages=messages,
739
- tools=tools,
740
- tool_choice="auto"
741
- )
742
-
743
- msg = response.choices[0].message
744
- messages.append(msg)
745
-
746
- if msg.content:
747
- try:
748
- res = extract_json(msg.content)
749
-
750
-
751
- state.add_analysis(
752
- f"🟨 Page Analyst\n{res.get('findings','')}"
753
- )
754
- raw_pointers = res.get("visual_pointers", [])
755
- tile_idxs = sanitize_tile_indices(raw_pointers)
756
-
757
-
758
- if tile_idxs and tile_idxs != '[]':
759
- stitched_bytes = merge_tiles(
760
- tile_indexes=tile_idxs,
761
- page_num=page_num
762
- )
763
-
764
- state.add_log(f'📸 Staging {len(tile_idxs)} tiles for final audit...')
765
-
766
- # Store these to use AFTER the chat finishes
767
- state.add_staged_image_part(
768
- types.Part.from_bytes(
769
- data=stitched_bytes, # <-- 'data=' is required here
770
- mime_type="image/png"
771
- )
772
- )
773
-
774
-
775
- stitched_img = Image.open(
776
- io.BytesIO(stitched_bytes)
777
- )
778
- state.add_image(stitched_img)
779
-
780
-
781
- state.add_staged_image_part(
782
- types.Part.from_bytes(
783
- data=image_bytes_list[page_num], # <-- 'data=' is required here
784
- mime_type="image/png"
785
- )
786
- )
787
-
788
- return res
789
- except:
790
- pass
791
-
792
- if msg.tool_calls:
793
- tool_results = []
794
- image_blocks = []
795
-
796
- for call in msg.tool_calls:
797
- if call.function.name == "merge_tiles":
798
- args = json.loads(call.function.arguments)
799
- idxs = args["tile_indexes"]
800
-
801
- stitched_bytes = merge_tiles(
802
- tile_indexes=idxs,
803
- page_num=page_num
804
- )
805
-
806
- b64_tile = base64.b64encode(stitched_bytes).decode()
807
-
808
- tool_results.append({
809
- "role": "tool",
810
- "tool_call_id": call.id,
811
- "content": json.dumps({
812
- "status": "success",
813
- "tiles": idxs
814
- })
815
- })
816
-
817
- image_blocks.append(
818
- {
819
- "type": "image_url",
820
- "image_url": {
821
- "url": f"data:image/png;base64,{b64_tile}"
822
- }
823
- }
824
- )
825
-
826
- for tool_msg in tool_results:
827
- messages.append(tool_msg)
828
-
829
- messages.append({
830
- "role": "user",
831
- "content": [
832
- {
833
- "type": "text",
834
- "text": "Here are the high-resolution zooms you requested. Analyze exits, locations, and any capacity labels."
835
- },
836
- *image_blocks
837
- ]
838
- })
839
-
840
- continue
841
-
842
- messages.append({
843
- "role": "user",
844
- "content": "Return the FINAL JSON now."
845
- })
846
-
847
- raise RuntimeError("No FINAL JSON output from Page Expert")
848
-
849
- # Set up Gemini planner
850
- tools_list = [search_page_text, nyc_legal_sub_agent, execute_page_expert]
851
- import time
852
- planner = genai.Client()
853
- planner_model = "gemini-3-flash-preview"
854
- planner_prompt = f"""
855
- You are the Lead Architectural Compliance Planner for NYC Building Code and Zoning review.
856
-
857
- Your role is to coordinate specialist sub-agents and deliver a **proof-carrying compliance verdict**
858
- based ONLY on:
859
- - OCR-extracted drawing text
860
- - High-resolution visual evidence (tile zooms)
861
- - Official NYC Code citations
862
-
863
- You must NOT speculate or rely on architectural norms.
864
-
865
- ========================
866
- DRAWING INDEX (Page Metadata)
867
- ========================
868
- Use this index to select pages for visual inspection.
869
- Avoid irrelevant sheets (e.g., Site, Civil, Utility, Stormwater) unless zoning or site compliance is explicitly required.
870
- {json.dumps(page_metadata)}
871
-
872
- ========================
873
- SPECIALIST SUB-AGENTS
874
- ========================
875
- None of these agents have access to your chat history or internal thought process.
876
- They know only how to access information (text, images or code) and what information you give them in the research goal.
877
- If they need more context or specific instructions YOU MUST PROVIDE IT WHEN CALLING THEM in the research goal.
878
-
879
- 1) `search_page_text`
880
- Purpose: FAST signal extractor.
881
- Use to identify code-triggering facts:
882
- - Occupancy classification
883
- - Building height / stories / high-rise
884
- - Construction type
885
- - Scope of work (new, alteration, addition, change of occupancy)
886
- - Fire protection systems
887
- Output is used ONLY to constrain legal research.
888
-
889
- 2) `nyc_legal_sub_agent`
890
- Purpose: Definitive legal authority.
891
- Use to retrieve governing NYC Code sections, exceptions, and cross-references.
892
- Always pass a focused topic derived from Phase 1 signals.
893
-
894
- 3) `execute_page_expert`
895
- Purpose: High-resolution visual verification.
896
- Use to confirm compliance or non-compliance by zooming tiles.
897
- This agent provides the ONLY acceptable visual proof.
898
-
899
- ========================
900
- MANDATORY PHASED WORKFLOW
901
- ========================
902
- PHASE 1 — SIGNAL EXTRACTION
903
- - Use `search_page_text` on candidate pages to determine:
904
- occupancy, height, construction type, system presence, and scope.
905
- - If signals are missing or ambiguous, expand to additional pages.
906
- - Do NOT proceed until you have enough facts to define legal applicability.
907
-
908
- PHASE 2 — LEGAL SCOPING
909
- - Convert Phase 1 signals into a focused legal topic.
910
- - Call `nyc_legal_sub_agent`.
911
- - Extract governing sections, exceptions, and edge cases.
912
-
913
- PHASE 3 — VISUAL VERIFICATION
914
- - Identify the SINGLE most relevant page for proof.
915
- - Call `execute_page_expert` with precise instructions tied to legal requirements
916
- (e.g., “Verify exit door clear width at Stair A serving R-2 occupancy”).
917
- - Ensure returned findings include tile IDs and/or text quotes.
918
-
919
- PHASE 4 — SYNTHESIS & VERDICT
920
- - Compare visual findings directly against legal requirements.
921
- - Resolve conflicts:
922
- - If legal text and visual evidence disagree → flag as **Non-Compliant or Ambiguous**
923
- - If evidence is missing → flag as **Unverified**
924
- - Cite both:
925
- - NYC Code Section(s)
926
- - Tile ID(s) or OCR quotes
927
-
928
- ========================
929
- FINAL OUTPUT FORMAT (STRICT MARKDOWN)
930
- ========================
931
- ### Compliance Verdict
932
- **Status:** Compliant | Non-Compliant | Unverified | Ambiguous
933
-
934
- ### Legal Basis
935
- - **[Code Type] §[Section] — [Title]**
936
- - Requirement:
937
- - Exceptions Considered:
938
-
939
- ### Visual Evidence
940
- - Finding: <short statement>
941
- - Proof: [Tile ID(s)] or "Quoted OCR Text"
942
-
943
- ### Reasoning
944
- - Step-by-step comparison between legal requirement and observed condition
945
-
946
- ### Limitations
947
- - What could not be verified and why
948
-
949
- ========================
950
- CONTROL RULES
951
- ========================
952
- - NEVER call `nyc_legal_sub_agent` before `search_page_text`
953
- - NEVER issue a final verdict without calling `execute_page_expert`
954
- - If no page contains sufficient proof, return **Unverified**
955
- - Prefer false negatives over false positives
956
- *** CRITICAL VISUAL PROTOCOL ***
957
- - When `execute_page_expert` returns, it will explicitly state "VISUAL_PROOF_PENDING".
958
- - When you see this, your ONLY response must be: "Awaiting visual proof."
959
- - DO NOT attempt to guess the verdict.
960
- - DO NOT complain about missing images.
961
- - Simply wait. The user will immediately send the images in the next turn.
962
-
963
-
964
- ========================
965
- QUALITY STANDARD
966
- ========================
967
- This output should be defensible to a DOB plan examiner or legal reviewer.
968
- Every claim must be traceable to law and evidence.
969
- """
970
-
971
- config = types.GenerateContentConfig(
972
- system_instruction=planner_prompt,
973
- tools=tools_list
974
- )
975
-
976
- chat = planner.chats.create(model=planner_model, config=config)
977
-
978
-
979
- def agent_worker(user_question):
980
- state.clear()
981
- state.add_log(f'🚀 Starting analysis for: **{user_question}**')
982
- state.add_analysis("🧠 Planner initialized. Awaiting tool calls...")
983
-
984
- # 1. Initialize the Stateful Chat
985
- chat = planner.chats.create(model=planner_model, config=config)
986
- response = chat.send_message(user_question)
987
-
988
- # 2. Track images throughout the conversation
989
-
990
- # 3. Standard Tool Loop (Phases 1-3)
991
- while response.candidates[0].content.parts[0].function_call:
992
- tool_responses = []
993
-
994
- for part in response.candidates[0].content.parts:
995
- if part.function_call:
996
- name = part.function_call.name
997
- args = part.function_call.args
998
- state.add_log(f'🛠️ Tool Call: **{name}**')
999
-
1000
- func = globals()[name]
1001
- result = func(**args)
1002
-
1003
- tool_responses.append(
1004
- types.Part.from_function_response(name=name, response={"result": result})
1005
- )
1006
-
1007
- # Send tool results back to the stateful chat
1008
- response = chat.send_message(tool_responses)
1009
-
1010
- # -----------------------------------------------------------------
1011
- # PHASE 4: THE POST-CHAT HANDOFF (The "Visual Audit")
1012
- # -----------------------------------------------------------------
1013
-
1014
- # At this point, the while loop has ended.
1015
- # 'response.text' contains the model's preliminary answer.
1016
-
1017
- audit_images = state.get_staged_images()
1018
-
1019
- if audit_images:
1020
- state.add_log(f"👁️ Preliminary answer received. Performing audit with {len(audit_images)} images...")
1021
-
1022
- # 1. Construct the audit parts
1023
- # Ensure 'text=' is used for the Part constructor
1024
- audit_parts = [
1025
- types.Part.from_text(
1026
- text="You have provided a preliminary verdict. Now, look at these images "
1027
- "to verify your findings. If the visual evidence contradicts your "
1028
- "text-based search, update your verdict now. "
1029
- ),
1030
- *audit_images
1031
- ]
1032
-
1033
- try:
1034
- # 2. Send directly through the 'chat' session
1035
- # This automatically appends to history and maintains the session state
1036
- final_response = chat.send_message(audit_parts)
1037
-
1038
- state.final_answer = final_response.text
1039
-
1040
- except Exception as e:
1041
- # If the above fails, try the explicit message keyword
1042
- state.add_log("🔄 Retrying audit with explicit message keyword...")
1043
- final_response = chat.send_message(message=audit_parts)
1044
- state.final_answer = final_response.text
1045
-
1046
- else:
1047
- state.add_log("⚠️ No images found in state. Skipping visual audit.")
1048
- state.final_answer = response.text
1049
-
1050
- state.add_log('🏁 **ANALYSIS COMPLETE**')
1051
- state.done = True
1052
-
1053
-
1054
- def run_agentic_workflow(user_question):
1055
- state.done = False
1056
- state.final_answer = ""
1057
-
1058
- thread = threading.Thread(
1059
- target=agent_worker,
1060
- args=(user_question,),
1061
- daemon=True
1062
- )
1063
- thread.start()
1064
-
1065
- while not state.done:
1066
- with state.lock:
1067
- logs = "\n\n".join(state.log_messages)
1068
- analysis = "\n\n".join(state.analysis_messages)
1069
- chapter = state.current_chapter
1070
- images = list(state.current_images)
1071
-
1072
- yield (
1073
- logs,
1074
- analysis,
1075
- chapter,
1076
- images,
1077
- "*Analysis in progress...*"
1078
- )
1079
- time.sleep(0.25)
1080
-
1081
- with state.lock:
1082
- logs = "\n\n".join(state.log_messages)
1083
- analysis = "\n\n".join(state.analysis_messages)
1084
- chapter = state.current_chapter
1085
- images = list(state.current_images)
1086
- final = state.final_answer
1087
-
1088
- yield (
1089
- logs,
1090
- analysis,
1091
- chapter,
1092
- images,
1093
- final
1094
- )
1095
-
1096
-
1097
- # Build Gradio Interface
1098
- with gr.Blocks(title="AEC Compliance Agent") as demo:
1099
- gr.Markdown("# 🏗️ AEC Compliance Analysis Agent")
1100
- gr.Markdown("Ask questions about NYC Building Code compliance for your construction drawings.")
1101
-
1102
- with gr.Row():
1103
- with gr.Column(scale=1):
1104
- question_input = gr.Textbox(
1105
- label="Your Question",
1106
- placeholder="e.g., Does this building comply with egress requirements for 738 occupants?",
1107
- lines=3
1108
- )
1109
- submit_btn = gr.Button("🔍 Analyze", variant="primary", size="lg")
1110
-
1111
- gr.Markdown("### 📋 Analysis Log")
1112
- log_output = gr.Markdown(value="", height=400)
1113
-
1114
- with gr.Column(scale=1):
1115
- gr.Markdown("### 🧠 Sub-Agent Analysis")
1116
- analysis_output = gr.Markdown(value="", height=600)
1117
-
1118
- with gr.Column(scale=1):
1119
- gr.Markdown("### 📖 Code Chapter")
1120
- chapter_output = gr.Markdown(value="*No chapter loaded yet*", height=600)
1121
-
1122
- with gr.Row():
1123
- gr.Markdown("### 🖼️ Retrieved Images")
1124
-
1125
- with gr.Row():
1126
- image_gallery = gr.Gallery(
1127
- label="Visual Evidence",
1128
- show_label=True,
1129
- columns=2,
1130
- height=400,
1131
- object_fit="contain"
1132
- )
1133
-
1134
- with gr.Row():
1135
- gr.Markdown("### ✅ Final Compliance Verdict")
1136
-
1137
- with gr.Row():
1138
- final_output = gr.Markdown(value="*Analysis pending...*")
1139
-
1140
- submit_btn.click(
1141
- fn=run_agentic_workflow,
1142
- inputs=[question_input],
1143
- outputs=[
1144
- log_output,
1145
- analysis_output, # NEW SLOT
1146
- chapter_output,
1147
- image_gallery,
1148
- final_output
1149
- ]
1150
- )
1151
-
1152
- if __name__ == "__main__":
1153
- demo.queue().launch(
1154
- inbrowser=True,
1155
- auth=("username", os.getenv("PASSWORD")) # only share the password
 
 
1156
  )
 
1
+ import gradio as gr
2
+ import json
3
+ import os
4
+ import re
5
+ from google import genai
6
+ from google.genai import types
7
+ import chromadb
8
+ from chromadb.utils import embedding_functions
9
+ from collections import Counter
10
+ import base64
11
+ import io
12
+ from PIL import Image
13
+ import matplotlib.pyplot as plt
14
+ import openai
15
+ from datetime import datetime
16
+ import threading
17
+
18
+ os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
19
+ os.environ["GEMINI_API_KEY"] = os.getenv("GEMINI_API_KEY")
20
+
21
+ # Global state for the interface
22
+ class InterfaceState:
23
+ def __init__(self):
24
+ self.log_messages = []
25
+ self.analysis_messages = []
26
+ self.current_chapter = ""
27
+ self.current_images = []
28
+ self.staged_audit_images = []
29
+ self.final_answer = ""
30
+ self.done = False
31
+ self.lock = threading.Lock()
32
+
33
+ def add_log(self, message):
34
+ timestamp = datetime.now().strftime("%H:%M:%S")
35
+ with self.lock:
36
+ self.log_messages.append(f"**[{timestamp}]** {message}")
37
+ return "\n\n".join(self.log_messages)
38
+
39
+ def add_analysis(self, message):
40
+ timestamp = datetime.now().strftime("%H:%M:%S")
41
+ with self.lock:
42
+ self.analysis_messages.append(f"**[{timestamp}]** {message}")
43
+ return "\n\n".join(self.analysis_messages)
44
+
45
+ def set_chapter(self, chapter_text):
46
+ with self.lock:
47
+ self.current_chapter = chapter_text
48
+ return chapter_text
49
+
50
+ def add_image(self, img_pil):
51
+ with self.lock:
52
+ self.current_images.append(img_pil)
53
+ return self.current_images.copy()
54
+
55
+ def add_staged_image_part(self, image_part):
56
+ """Thread-safe method to stage images for the Gemini Audit."""
57
+ with self.lock:
58
+ self.staged_audit_images.append(image_part)
59
+ # Log it so we can verify it happened in the console
60
+ print(f"DEBUG: Staged image part. Total staged: {len(self.staged_audit_images)}")
61
+
62
+ def get_staged_images(self):
63
+ """Safely retrieve the staged images for the audit turn."""
64
+ with self.lock:
65
+ return list(self.staged_audit_images) # Return a copy to prevent mutation
66
+
67
+ def clear(self):
68
+ with self.lock:
69
+ self.log_messages.clear()
70
+ self.analysis_messages.clear()
71
+ self.current_chapter = ""
72
+ self.current_images.clear()
73
+ self.final_answer = ""
74
+ self.done = False
75
+
76
+
77
+
78
+ state = InterfaceState()
79
+
80
+ # Load your data (same as original)
81
+ with open('Preprocessed Files/page_metadata.json', 'r') as json_file:
82
+ page_metadata = json.load(json_file)
83
+ page_metadata = {int(k): v for k, v in page_metadata.items()}
84
+
85
+ with open('Preprocessed Files/text_list.json', 'r') as json_file:
86
+ text_list = json.load(json_file)
87
+
88
+ with open('Preprocessed Files/tile_metadata.json', 'r') as json_file:
89
+ tile_metadata = json.load(json_file)
90
+ tile_metadata = {
91
+ int(outer_k): {
92
+ int(inner_k): inner_v
93
+ for inner_k, inner_v in outer_v.items()
94
+ }
95
+ for outer_k, outer_v in tile_metadata.items()
96
+ }
97
+
98
+ def load_fullpage_images(folder="Images"):
99
+ files = os.listdir(folder)
100
+ page_files = []
101
+ for f in files:
102
+ match = re.search(r"page_(\d+)_fullpage\.png", f)
103
+ if match:
104
+ page_num = int(match.group(1))
105
+ page_files.append((page_num, f))
106
+ page_files.sort(key=lambda x: x[0])
107
+ image_bytes_list = []
108
+ for page_num, filename in page_files:
109
+ path = os.path.join(folder, filename)
110
+ with open(path, "rb") as f:
111
+ img_bytes = f.read()
112
+ image_bytes_list.append(img_bytes)
113
+ return image_bytes_list
114
+
115
+ def load_tile_images(page):
116
+ files = os.listdir('Tiles')
117
+ page_files = []
118
+ for f in files:
119
+ match = re.search(f"page_{page}_tile_(\d+)\.png", f)
120
+ if match:
121
+ page_num = int(match.group(1))
122
+ page_files.append((page_num, f))
123
+ page_files.sort(key=lambda x: x[0])
124
+ image_bytes_list = []
125
+ for page_num, filename in page_files:
126
+ path = os.path.join('Tiles', filename)
127
+ with open(path, "rb") as f:
128
+ img_bytes = f.read()
129
+ image_bytes_list.append(img_bytes)
130
+ return image_bytes_list
131
+
132
+ image_bytes_list = load_fullpage_images()
133
+
134
+ tile_bytes = {}
135
+ for page in range(44):
136
+ tile_list = load_tile_images(page)
137
+ if tile_list:
138
+ tile_bytes[page] = load_tile_images(page)
139
+
140
+ # Vector Code Base
141
+ chroma_client = chromadb.PersistentClient(path="nyc_code_db")
142
+ embedding_model = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
143
+ collection = chroma_client.get_collection(name="nyc_building_codes", embedding_function=embedding_model)
144
+
145
+ all_pending_images = []
146
+
147
+ # Modified tool functions with Gradio updates
148
+ def search_page_text(page_number: int, research_goal: str):
149
+ state.add_log(f'🔍 Searching page **{page_metadata[page_number]["sheet_title"]}** for details')
150
+
151
+ state.add_analysis(
152
+ f'🔍 Searching page {page_metadata[page_number]["sheet_title"]} with prompt\n{research_goal}'
153
+ )
154
+
155
+ raw_text = text_list[page_number]
156
+
157
+ client = openai.OpenAI()
158
+ response = client.chat.completions.create(
159
+ model="gpt-5-mini",
160
+ messages=[
161
+ {"role": "system", "content": """
162
+ You are a Fast NYC Plans Examiner Signal Agent.
163
+
164
+ Your ONLY job is to extract **code-relevant signals** from the OCR text of a SINGLE drawing page.
165
+ You do NOT interpret the law and you do NOT summarize design intent.
166
+
167
+ Your output will be used to CONSTRAIN a downstream legal research agent.
168
+
169
+ ========================
170
+ WHAT TO EXTRACT
171
+ ========================
172
+ Look only for information that determines which parts of the NYC Code apply such as:
173
+
174
+ - Occupancy classification (e.g., R-2, A-3, M, S, F, mixed-use)
175
+ - Building height (stories, feet, high-rise indicators)
176
+ - Construction type (I, II, III, IV, V)
177
+ - Fire protection systems (sprinklers, standpipes, fire alarm, smoke control)
178
+ - Means of egress references (stairs, exits, exit access, doors, corridors)
179
+ - Structural system hints (steel, concrete, load-bearing walls, columns, transfer girders)
180
+ - Mechanical / fuel / plumbing system mentions (boilers, gas piping, HVAC type, shafts)
181
+ - Zoning or special district references (if present)
182
+ - Scope flags (new building, alteration, addition, change of occupancy, retrofit)
183
+
184
+ However only return relevant signals to the provided research goal.
185
+
186
+ ========================
187
+ OUTPUT FORMAT (STRICT MARKDOWN)
188
+ ========================
189
+ Return ONLY the following sections:
190
+
191
+ ### Code-Relevant Signals
192
+ - Bullet list of extracted facts
193
+
194
+ ### Likely Governing Code Domains
195
+ - One-line list chosen from: Administrative, Building, Mechanical, FuelGas, Plumbing, Fire
196
+
197
+ ### Text Evidence
198
+ - Short quoted snippets from the page that support each signal
199
+
200
+ ========================
201
+ RULES
202
+ ========================
203
+ - Do NOT speculate
204
+ - If a signal is not present, omit it
205
+ - Prefer exact phrases over paraphrase
206
+ - Keep total length under 500 words
207
+ - No legal conclusions, no compliance advice
208
+ """},
209
+ {"role": "user", "content": f"PAGE TEXT:\n{raw_text}\n\nRESEARCH GOAL: {research_goal}\n\nReturn a breif but comprehensive Markdown summary of your findings and justification with text snippets."}
210
+ ]
211
+ )
212
+
213
+ analysis_text = response.choices[0].message.content
214
+
215
+ state.add_analysis(
216
+ f"🟦 Text Analyst (Page {page_number})\n{analysis_text}"
217
+ )
218
+
219
+ return {
220
+ "page": page_number,
221
+ "summary": analysis_text
222
+ }
223
+
224
+ def discover_code_locations(query: str):
225
+ state.add_log(f'📚 Searching NYC Code for: **{query}**')
226
+
227
+ results = collection.query(
228
+ query_texts=[query],
229
+ n_results=25,
230
+ include=["metadatas", "documents"]
231
+ )
232
+
233
+ if not results['metadatas'][0]:
234
+ return "No results found. Try a different technical keyword."
235
+
236
+ metas = results['metadatas'][0]
237
+ docs = results['documents'][0]
238
+
239
+ category_chapter_pairs = [f"{m['code_type']} | Ch. {m['parent_major']}" for m in metas]
240
+ counts = Counter(category_chapter_pairs)
241
+ chapter_summary = "\n".join([f"- {pair} ({count} hits)" for pair, count in counts.most_common(5)])
242
+
243
+ section_reports = []
244
+ for m, doc in zip(metas, docs):
245
+ report = (
246
+ f"ID: {m['section_full']} | Code: {m['code_type']} | Chapter: {m['parent_major']}\n"
247
+ f"Snippet: {doc}"
248
+ )
249
+ section_reports.append(report)
250
+
251
+ output = (
252
+ "### CODE DISCOVERY REPORT ###\n"
253
+ f"MOST RELEVANT CHAPTERS:\n{chapter_summary}\n\n"
254
+ "TOP RELEVANT SECTIONS:\n" +
255
+ "\n---\n".join(section_reports)
256
+ )
257
+
258
+ return output
259
+
260
+ def fetch_full_chapter(code_type: str, chapter_id: str):
261
+ state.add_log(f'📖 Fetching Chapter **{chapter_id}** from **{code_type}** code')
262
+
263
+ try:
264
+ chapter_data = collection.get(
265
+ where={
266
+ "$and": [
267
+ {"code_type": {"$eq": code_type}},
268
+ {"parent_major": {"$eq": chapter_id}}
269
+ ]
270
+ },
271
+ include=["documents", "metadatas"]
272
+ )
273
+
274
+ if not chapter_data['documents']:
275
+ return f"No documentation found for {code_type} Chapter {chapter_id}."
276
+
277
+ sections = sorted(zip(chapter_data['metadatas'], chapter_data['documents']),
278
+ key=lambda x: x[0]['section_full'])
279
+
280
+ full_text = f"## FULL LEGAL TEXT: {code_type.upper()} CODE - CHAPTER {chapter_id}\n\n"
281
+
282
+ for meta, doc in sections:
283
+ blocks = doc.split("[CONT.]:")
284
+ unique_blocks = []
285
+ for b in blocks:
286
+ clean_b = b.strip()
287
+ if clean_b and clean_b not in unique_blocks:
288
+ unique_blocks.append(clean_b)
289
+
290
+ clean_doc = " ".join(unique_blocks)
291
+ full_text += f"### SECTION {meta['section_full']}\n{clean_doc}\n\n---\n\n"
292
+
293
+ # Update the chapter display
294
+ state.set_chapter(full_text)
295
+
296
+ return full_text
297
+
298
+ except Exception as e:
299
+ return f"Error retrieving chapter content: {str(e)}"
300
+
301
+ def nyc_legal_sub_agent(research_goal: str):
302
+ state.add_log(f'⚖️ Investigating NYC Code for: **{research_goal}**')
303
+
304
+ state.add_analysis(
305
+ f"⚖️ Legal Analyst is searching\n{research_goal}"
306
+ )
307
+
308
+ client = openai.OpenAI()
309
+
310
+ internal_tools = [
311
+ {
312
+ "type": "function",
313
+ "function": {
314
+ "name": "discover_code_locations",
315
+ "description": "Scans NYC code in a semantic vector database. Use this FIRST to find which chapters/sections are relevant.",
316
+ "parameters": {
317
+ "type": "object",
318
+ "properties": {
319
+ "query": {"type": "string", "description": "semantic search string for a vector database (Not a keyword search use a full sentence)"}
320
+ },
321
+ "required": ["query"]
322
+ }
323
+ }
324
+ },
325
+ {
326
+ "type": "function",
327
+ "function": {
328
+ "name": "fetch_full_chapter",
329
+ "description": "Retrieves the full legal text of a specific chapter for deep analysis.",
330
+ "parameters": {
331
+ "type": "object",
332
+ "properties": {
333
+ "code_type": {
334
+ "type": "string",
335
+ "enum": ["Administrative", "Building", "FuelGas", "Mechanical", "Plumbing"],
336
+ "description": "The specific NYC code volume to search."
337
+ },
338
+ "chapter_id": {"type": "string", "description": "The chapter number string"}
339
+ },
340
+ "required": ["code_type", "chapter_id"]
341
+ }
342
+ }
343
+ }
344
+ ]
345
+
346
+ messages = [
347
+ {"role": "system", "content": """
348
+ You are a Senior NYC Building Code Consultant and Legal Research Agent.
349
+
350
+ Your task is to produce a **definitive, citation-backed legal report** that can be used directly by a downstream orchestration agent.
351
+ Accuracy, traceability, and completeness matter more than brevity.
352
+
353
+ ========================
354
+ PRIMARY OBJECTIVE
355
+ ========================
356
+ Given a research goal, identify and analyze relevant NYC Code provisions, including:
357
+ - Governing sections
358
+ - Exceptions
359
+ - Cross-references
360
+ - Related chapters that modify, limit, or expand the rule
361
+
362
+ Every legal claim MUST be supported by a specific code citation.
363
+
364
+ You are operating in FAST LEGAL MODE.
365
+
366
+ SEARCH BUDGET:
367
+ - Maximum of 2 calls to `discover_code_locations`
368
+ - Maximum of 2 calls to `fetch_full_chapter`
369
+
370
+ STOP CONDITIONS:
371
+ - If the first chapter fetch contains governing text AND exceptions, STOP and synthesize.
372
+ - Only fetch a second chapter if the first chapter explicitly cross-references another chapter.
373
+
374
+ PRIORITY ORDER:
375
+ 1) Governing rule section
376
+ 2) Exceptions
377
+ 3) Cross-references that MODIFY the rule
378
+ Ignore definitions and administrative content unless directly referenced.
379
+
380
+ GOOD ENOUGH STANDARD:
381
+ If you can identify:
382
+ - The governing section
383
+ - At least one exception or limitation
384
+ You must STOP and report.
385
+
386
+ ========================
387
+ TOOL STRATEGY (MANDATORY)
388
+ ========================
389
+ This is a semantic vector database, NOT a keyword index. Always search in full English questions.
390
+
391
+ 1) FIRST — Call `discover_code_locations`
392
+ - Use a natural-language query describing the legal requirement you are trying to find
393
+ - Example: "What NYC Building Code sections regulate emergency egress width in residential buildings"
394
+ NEVER use a keyword search thi will not work you are searching a vector database.
395
+ If you know what chaoter you need call the fetch_full_chapter tool instead.
396
+ If you perform TWO consecutive `discover_code_locations` calls
397
+ and both return no new relevant chapters or sections:
398
+
399
+ You MUST stop searching and do one of the following:
400
+ - Conclude that the table/section does NOT exist as a standalone provision in the NYC Code corpus, OR
401
+ - Conclude that the requirement is embedded within the previously retrieved sections
402
+
403
+ Then proceed to report findings using the closest governing section.
404
+
405
+ DO NOT continue reformulating the same query.
406
+ You MUST NOT call `discover_code_locations` more than once for the same legal concept.
407
+ If a new query is semantically similar to a prior query, STOP and move forward with analysis.
408
+
409
+ 2) SECOND — Call `fetch_full_chapter`
410
+ - If multiple relevant sections appear in the same chapter
411
+ - OR if a section contains exceptions, references, or conditional language
412
+ - OR if you know what section of the code is relevant and want to see a full chapter
413
+
414
+ 3) THIRD — Follow Cross-References
415
+ - If a section says "See Section X", "As required by Chapter Y", or "Except as permitted in..."
416
+ - You MUST search and retrieve those sections as well
417
+
418
+ 4) STOP ONLY WHEN
419
+ - All exceptions are reviewed
420
+ - All cross-references are resolved
421
+ - No additional modifying sections remain
422
+
423
+ ========================
424
+ OUTPUT FORMAT (STRICT)
425
+ ========================
426
+ Return a structured legal report in the following format:
427
+
428
+ ### Legal Summary
429
+ Brief, plain-language explanation of what the code requires.
430
+
431
+ ### Governing Code Sections
432
+ - **[Code Type] §[Section Number] — [Title]**
433
+ - Summary:
434
+ - Key Requirements:
435
+ - Applicability Conditions:
436
+ - Exceptions:
437
+
438
+ ### Cross-References Analyzed
439
+ - **§[Section Number] — [Title]**
440
+ - Why It Matters:
441
+ - Impact on Main Rule:
442
+
443
+ ### Edge Cases & Enforcement Notes
444
+ - Special conditions (building type, occupancy class, height, system type, jurisdictional notes)
445
+ - Common misinterpretations
446
+ - DOB or FDNY enforcement implications (if relevant)
447
+
448
+ ### Compliance Checklist
449
+ - Bullet list of actionable compliance steps derived from the code
450
+
451
+ ========================
452
+ QUALITY RULES
453
+ ========================
454
+ - NEVER summarize without citing
455
+ - NEVER assume jurisdiction, building type, or occupancy unless the code explicitly states it
456
+ - If legal text is ambiguous, flag it as **Interpretive**
457
+ - Prefer quoting short legal phrases when clarity matters
458
+
459
+ ========================
460
+ TONE
461
+ ========================
462
+ Professional. Precise. Legal-research quality. No speculation.
463
+ """},
464
+ {"role": "user", "content": f"Analyze the NYC building code with this goal: {research_goal}"}
465
+ ]
466
+
467
+ for _ in range(20):
468
+ response = client.chat.completions.create(
469
+ model="gpt-5-mini",
470
+ messages=messages,
471
+ tools=internal_tools,
472
+ tool_choice="auto"
473
+ )
474
+
475
+ msg = response.choices[0].message
476
+ messages.append(msg)
477
+
478
+ if not msg.tool_calls:
479
+
480
+ break
481
+
482
+ for tool_call in msg.tool_calls:
483
+ func_name = tool_call.function.name
484
+ args = json.loads(tool_call.function.arguments)
485
+
486
+ if func_name == "discover_code_locations":
487
+ result = discover_code_locations(args['query'])
488
+ elif func_name == "fetch_full_chapter":
489
+ result = fetch_full_chapter(args['code_type'], args['chapter_id'])
490
+
491
+ messages.append({
492
+ "role": "tool",
493
+ "tool_call_id": tool_call.id,
494
+ "content": result
495
+ })
496
+
497
+ state.add_analysis(
498
+ f"🟨 Legal Analyst\n{msg.content}"
499
+ )
500
+
501
+ return msg.content
502
+
503
+ def merge_tiles(tile_indexes: list[int], page_num: int):
504
+ state.add_log(f'🔬 Stitching tiles **{tile_indexes}** from page **{page_num}**')
505
+
506
+ images = []
507
+ positions = []
508
+
509
+ tiles = tile_bytes[page_num]
510
+ tiles_coords_dict = tile_metadata[page_num]
511
+
512
+ for index in tile_indexes:
513
+ if index < 0 or index >= len(tiles):
514
+ raise ValueError(f"Tile index {index} out of range")
515
+
516
+ img_bytes = tiles[index]
517
+ if img_bytes is None:
518
+ raise ValueError(f"No image bytes found for tile {index}")
519
+
520
+ img = Image.open(io.BytesIO(img_bytes)).convert('RGBA')
521
+ images.append(img)
522
+
523
+ x = tiles_coords_dict[index]['coords'][0]
524
+ y = tiles_coords_dict[index]['coords'][1]
525
+ positions.append((x, y))
526
+
527
+ if not images:
528
+ return None
529
+
530
+ min_x = min(x for x, y in positions)
531
+ min_y = min(y for x, y in positions)
532
+ normalized_positions = [(x - min_x, y - min_y) for x, y in positions]
533
+
534
+ total_width = max(pos[0] + img.width for pos, img in zip(normalized_positions, images))
535
+ total_height = max(pos[1] + img.height for pos, img in zip(normalized_positions, images))
536
+
537
+ stitched_image = Image.new('RGB', (total_width, total_height), (255, 255, 255))
538
+
539
+ for img, pos in zip(images, normalized_positions):
540
+ stitched_image.paste(img, pos)
541
+
542
+ # Add to image gallery
543
+
544
+ output_buffer = io.BytesIO()
545
+ stitched_image.save(output_buffer, format='PNG')
546
+ stitched_bytes = output_buffer.getvalue()
547
+
548
+ return stitched_bytes
549
+
550
+ def extract_json(s: str):
551
+ s = s.strip()
552
+ start = s.find("{")
553
+ end = s.rfind("}")
554
+ if start == -1 or end == -1 or end < start:
555
+ raise ValueError("No JSON object found in model output:\n" + repr(s))
556
+ json_str = s[start:end+1]
557
+ return json.loads(json_str)
558
+
559
+ def sanitize_tile_indices(data):
560
+ """
561
+ Forcefully converts various LLM outputs into a clean list of integers.
562
+ Handles: [1, 2], ["1", "2"], "1, 2, 3", "[1, 2, 3]", and None.
563
+ """
564
+ if not data:
565
+ return []
566
+
567
+ # If it's already a list, ensure all elements are integers
568
+ if isinstance(data, list):
569
+ clean_list = []
570
+ for item in data:
571
+ try:
572
+ # This handles strings inside the list like ["1", "2"]
573
+ clean_list.append(int(str(item).strip()))
574
+ except (ValueError, TypeError):
575
+ continue
576
+ return clean_list
577
+
578
+ # If it's a string, use Regex to find all sequences of digits
579
+ if isinstance(data, str):
580
+ # findall returns all non-overlapping matches of the pattern
581
+ numbers = re.findall(r'\d+', data)
582
+ return [int(n) for n in numbers]
583
+
584
+ return []
585
+
586
+ def execute_page_expert(expert_instructions: str, page_num: int):
587
+ state.add_log(f'👁️ Spawning Page Expert for page **{page_num}**')
588
+ state.add_analysis(f"👁️ Page Expert searching for {expert_instructions}")
589
+ state.add_log(f'📄 Attaching full-page context for page **{page_num}**')
590
+ state.add_analysis(
591
+ f"📄 Full-page context attached for page `{page_num}`"
592
+ )
593
+
594
+ full_page_img = Image.open(
595
+ io.BytesIO(image_bytes_list[page_num])
596
+ )
597
+ state.add_image(full_page_img)
598
+
599
+ client = openai.OpenAI()
600
+
601
+ tools = [
602
+ {
603
+ "type": "function",
604
+ "function": {
605
+ "name": "merge_tiles",
606
+ "description": "Stitches high-resolution image tiles together into a single zoomed-in view. Use this to read small text, dimensions, or symbols.",
607
+ "parameters": {
608
+ "type": "object",
609
+ "properties": {
610
+ "tile_indexes": {
611
+ "type": "array",
612
+ "items": {"type": "integer"},
613
+ "description": "A list of integer tile IDs from the Grid Map to stitch together."
614
+ }
615
+ },
616
+ "required": ["tile_indexes"]
617
+ }
618
+ }
619
+ }
620
+ ]
621
+
622
+ page_text = text_list[page_num]
623
+ relevant_tile_meta = tile_metadata[page_num]
624
+ b64_full_page = base64.b64encode(image_bytes_list[page_num]).decode()
625
+
626
+ system_prompt = """
627
+ You are a Lead AEC Visual Investigator supporting a Compliance Planner.
628
+
629
+ Your mission is to extract **verifiable, high-fidelity evidence** from this drawing page.
630
+ You must ground every claim in either:
631
+ - a **Zoomed Tile Image** (via `merge_tiles`) or
632
+ - a **Direct Text Quote** from the OCR page text.
633
+
634
+ Guesses, assumptions, and general descriptions are not allowed.
635
+
636
+ ========================
637
+ MANDATORY WORKFLOW
638
+ ========================
639
+ 1) ORIENT
640
+ - Review the full-page image and the Grid Map to identify candidate regions.
641
+ - Decide which tiles likely contain the required evidence. Utilize the tile metadata to assist with this tasl.
642
+
643
+ 2) ZOOM (REQUIRED)
644
+ - You MUST call `merge_tiles(tile_indexes=[...])` before making ANY factual claim about symbols, dimensions, labels, or locations.
645
+ - Always request ALL tiles needed in a SINGLE call.
646
+ - If the first zoom is insufficient, call again with additional tiles.
647
+ - Call the zoom until you have found all relevant tiles, refer to the tile metadata to assist in your search.
648
+
649
+ 3) VERIFY
650
+ - Read the zoomed image carefully.
651
+ - Extract exact values, tags, room names, and directional cues.
652
+
653
+ 4) REPORT
654
+ - Return the Findings Packet in strict JSON format.
655
+
656
+ ========================
657
+ WHAT COUNTS AS PROOF
658
+ ========================
659
+ - Dimension values (e.g., “36\"”, “1 HR RATED”)
660
+ - Explicit labels (e.g., “EXIT”, “STAIR A”, “R-2”, “COLUMN C3”)
661
+ - Symbol legends that define a mark
662
+ - Path continuity that can be visually traced across tiles
663
+ - OCR text snippets
664
+
665
+ ========================
666
+ FINDINGS RULES
667
+ ========================
668
+ - Every bullet in `findings` MUST cite either:
669
+ - `[Tile <ID>]` or
670
+ - `"Quoted text"`
671
+ - If a claim cannot be verified from the zoomed tiles or text, mark it as **Unverified**.
672
+ - Be comprehensive in this report, your supervisor only has access to the report you give in findings, not the full page text or other image data you have.
673
+ - Do NOT repeat planner instructions — only report what you observe.
674
+
675
+ ========================
676
+ VISUAL POINTERS RULES
677
+ ========================
678
+ - Exclude orientation-only or whitespace tiles.
679
+ - Include ALL tiles needed to re-trace a path or confirm a relationship.
680
+ - **Your superviser will ONLY see the tiles that you reference here, be comprehensive when returning these tiles.**
681
+
682
+ ========================
683
+ FULL PAGE USEFULNESS
684
+ ========================
685
+ Set `true` ONLY if the finding requires spatial context across the entire page, or if your zoom is missing information.
686
+ (e.g., tracing egress path, riser continuity, system routing).
687
+ Otherwise set `false`.
688
+
689
+ ========================
690
+ JSON FORMAT (STRICT)
691
+ ========================
692
+ {
693
+ "findings": "<markdown string with bullet points and citations>",
694
+ "visual_pointers": [list of <int>],
695
+ "textual_evidence": ["<exact quotes from PAGE TEXT>"],
696
+ "full_page_usefulness": <true|false>,
697
+ "limitations": "<what could not be verified and why>"
698
+ }
699
+
700
+ ========================
701
+ FAILURE CONDITIONS
702
+ ========================
703
+ - If no relevant evidence exists on this page, return:
704
+ {
705
+ "findings": "No relevant technical evidence found for the planner's instruction.",
706
+ "visual_pointers": [],
707
+ "textual_evidence": [],
708
+ "full_page_usefulness": false,
709
+ "limitations": "This page does not contain the requested information or it is not legible at available resolution."
710
+ }
711
+
712
+ Return ONLY valid JSON.
713
+ """
714
+
715
+ messages = [
716
+ {"role": "system", "content": system_prompt},
717
+ {
718
+ "role": "user",
719
+ "content": [
720
+ {"type": "text", "text": f"Planner Instruction:\n{expert_instructions}"},
721
+ {"type": "text", "text": f"Page Context:\n{page_text}"},
722
+ {"type": "text", "text": f"Available Grid Map:\n{json.dumps(relevant_tile_meta)}"},
723
+ {
724
+ "type": "image_url",
725
+ "image_url": {
726
+ "url": f"data:image/png;base64,{b64_full_page}"
727
+ }
728
+ }
729
+ ]
730
+ }
731
+ ]
732
+
733
+ MAX_TURNS = 3
734
+
735
+ for turn in range(MAX_TURNS):
736
+ response = client.chat.completions.create(
737
+ model="gpt-4o",
738
+ messages=messages,
739
+ tools=tools,
740
+ tool_choice="auto"
741
+ )
742
+
743
+ msg = response.choices[0].message
744
+ messages.append(msg)
745
+
746
+ if msg.content:
747
+ try:
748
+ res = extract_json(msg.content)
749
+
750
+
751
+ state.add_analysis(
752
+ f"🟨 Page Analyst\n{res.get('findings','')}"
753
+ )
754
+ raw_pointers = res.get("visual_pointers", [])
755
+ tile_idxs = sanitize_tile_indices(raw_pointers)
756
+
757
+
758
+ if tile_idxs and tile_idxs != '[]':
759
+ stitched_bytes = merge_tiles(
760
+ tile_indexes=tile_idxs,
761
+ page_num=page_num
762
+ )
763
+
764
+ state.add_log(f'📸 Staging {len(tile_idxs)} tiles for final audit...')
765
+
766
+ # Store these to use AFTER the chat finishes
767
+ state.add_staged_image_part(
768
+ types.Part.from_bytes(
769
+ data=stitched_bytes, # <-- 'data=' is required here
770
+ mime_type="image/png"
771
+ )
772
+ )
773
+
774
+
775
+ stitched_img = Image.open(
776
+ io.BytesIO(stitched_bytes)
777
+ )
778
+ state.add_image(stitched_img)
779
+
780
+
781
+ state.add_staged_image_part(
782
+ types.Part.from_bytes(
783
+ data=image_bytes_list[page_num], # <-- 'data=' is required here
784
+ mime_type="image/png"
785
+ )
786
+ )
787
+
788
+ return res
789
+ except:
790
+ pass
791
+
792
+ if msg.tool_calls:
793
+ tool_results = []
794
+ image_blocks = []
795
+
796
+ for call in msg.tool_calls:
797
+ if call.function.name == "merge_tiles":
798
+ args = json.loads(call.function.arguments)
799
+ idxs = args["tile_indexes"]
800
+
801
+ stitched_bytes = merge_tiles(
802
+ tile_indexes=idxs,
803
+ page_num=page_num
804
+ )
805
+
806
+ b64_tile = base64.b64encode(stitched_bytes).decode()
807
+
808
+ tool_results.append({
809
+ "role": "tool",
810
+ "tool_call_id": call.id,
811
+ "content": json.dumps({
812
+ "status": "success",
813
+ "tiles": idxs
814
+ })
815
+ })
816
+
817
+ image_blocks.append(
818
+ {
819
+ "type": "image_url",
820
+ "image_url": {
821
+ "url": f"data:image/png;base64,{b64_tile}"
822
+ }
823
+ }
824
+ )
825
+
826
+ for tool_msg in tool_results:
827
+ messages.append(tool_msg)
828
+
829
+ messages.append({
830
+ "role": "user",
831
+ "content": [
832
+ {
833
+ "type": "text",
834
+ "text": "Here are the high-resolution zooms you requested. Analyze exits, locations, and any capacity labels."
835
+ },
836
+ *image_blocks
837
+ ]
838
+ })
839
+
840
+ continue
841
+
842
+ messages.append({
843
+ "role": "user",
844
+ "content": "Return the FINAL JSON now."
845
+ })
846
+
847
+ raise RuntimeError("No FINAL JSON output from Page Expert")
848
+
849
+ # Set up Gemini planner
850
+ tools_list = [search_page_text, nyc_legal_sub_agent, execute_page_expert]
851
+ import time
852
+ planner = genai.Client()
853
+ planner_model = "gemini-3-flash-preview"
854
+ planner_prompt = f"""
855
+ You are the Lead Architectural Compliance Planner for NYC Building Code and Zoning review.
856
+
857
+ Your role is to coordinate specialist sub-agents and deliver a **proof-carrying compliance verdict**
858
+ based ONLY on:
859
+ - OCR-extracted drawing text
860
+ - High-resolution visual evidence (tile zooms)
861
+ - Official NYC Code citations
862
+
863
+ You must NOT speculate or rely on architectural norms.
864
+
865
+ ========================
866
+ DRAWING INDEX (Page Metadata)
867
+ ========================
868
+ Use this index to select pages for visual inspection.
869
+ Avoid irrelevant sheets (e.g., Site, Civil, Utility, Stormwater) unless zoning or site compliance is explicitly required.
870
+ {json.dumps(page_metadata)}
871
+
872
+ ========================
873
+ SPECIALIST SUB-AGENTS
874
+ ========================
875
+ None of these agents have access to your chat history or internal thought process.
876
+ They know only how to access information (text, images or code) and what information you give them in the research goal.
877
+ If they need more context or specific instructions YOU MUST PROVIDE IT WHEN CALLING THEM in the research goal.
878
+
879
+ 1) `search_page_text`
880
+ Purpose: FAST signal extractor.
881
+ Use to identify code-triggering facts:
882
+ - Occupancy classification
883
+ - Building height / stories / high-rise
884
+ - Construction type
885
+ - Scope of work (new, alteration, addition, change of occupancy)
886
+ - Fire protection systems
887
+ Output is used ONLY to constrain legal research.
888
+
889
+ 2) `nyc_legal_sub_agent`
890
+ Purpose: Definitive legal authority.
891
+ Use to retrieve governing NYC Code sections, exceptions, and cross-references.
892
+ Always pass a focused topic derived from Phase 1 signals.
893
+
894
+ 3) `execute_page_expert`
895
+ Purpose: High-resolution visual verification.
896
+ Use to confirm compliance or non-compliance by zooming tiles.
897
+ This agent provides the ONLY acceptable visual proof.
898
+
899
+ ========================
900
+ MANDATORY PHASED WORKFLOW
901
+ ========================
902
+ PHASE 1 — SIGNAL EXTRACTION
903
+ - Use `search_page_text` on candidate pages to determine:
904
+ occupancy, height, construction type, system presence, and scope.
905
+ - If signals are missing or ambiguous, expand to additional pages.
906
+ - Do NOT proceed until you have enough facts to define legal applicability.
907
+
908
+ PHASE 2 — LEGAL SCOPING
909
+ - Convert Phase 1 signals into a focused legal topic.
910
+ - Call `nyc_legal_sub_agent`.
911
+ - Extract governing sections, exceptions, and edge cases.
912
+
913
+ PHASE 3 — VISUAL VERIFICATION
914
+ - Identify the SINGLE most relevant page for proof.
915
+ - Call `execute_page_expert` with precise instructions tied to legal requirements
916
+ (e.g., “Verify exit door clear width at Stair A serving R-2 occupancy”).
917
+ - Ensure returned findings include tile IDs and/or text quotes.
918
+
919
+ PHASE 4 — SYNTHESIS & VERDICT
920
+ - Compare visual findings directly against legal requirements.
921
+ - Resolve conflicts:
922
+ - If legal text and visual evidence disagree → flag as **Non-Compliant or Ambiguous**
923
+ - If evidence is missing → flag as **Unverified**
924
+ - Cite both:
925
+ - NYC Code Section(s)
926
+ - Tile ID(s) or OCR quotes
927
+
928
+ **NEVER CALL THE SAME AGENT FOR THE SAME TASK TWICE REFER TO PREVIOUS ANSWERS WHEN ABLE**
929
+
930
+ ========================
931
+ FINAL OUTPUT FORMAT (STRICT MARKDOWN)
932
+ ========================
933
+ ### Compliance Verdict
934
+ **Status:** Compliant | Non-Compliant | Unverified | Ambiguous
935
+
936
+ ### Legal Basis
937
+ - **[Code Type] §[Section] — [Title]**
938
+ - Requirement:
939
+ - Exceptions Considered:
940
+
941
+ ### Visual Evidence
942
+ - Finding: <short statement>
943
+ - Proof: [Tile ID(s)] or "Quoted OCR Text"
944
+
945
+ ### Reasoning
946
+ - Step-by-step comparison between legal requirement and observed condition
947
+
948
+ ### Limitations
949
+ - What could not be verified and why
950
+
951
+ ========================
952
+ CONTROL RULES
953
+ ========================
954
+ - NEVER call `nyc_legal_sub_agent` before `search_page_text`
955
+ - NEVER issue a final verdict without calling `execute_page_expert`
956
+ - If no page contains sufficient proof, return **Unverified**
957
+ - Prefer false negatives over false positives
958
+ *** CRITICAL VISUAL PROTOCOL ***
959
+ - When `execute_page_expert` returns, it will explicitly state "VISUAL_PROOF_PENDING".
960
+ - When you see this, your ONLY response must be: "Awaiting visual proof."
961
+ - DO NOT attempt to guess the verdict.
962
+ - DO NOT complain about missing images.
963
+ - Simply wait. The user will immediately send the images in the next turn.
964
+
965
+
966
+ ========================
967
+ QUALITY STANDARD
968
+ ========================
969
+ This output should be defensible to a DOB plan examiner or legal reviewer.
970
+ Every claim must be traceable to law and evidence.
971
+ """
972
+
973
+ config = types.GenerateContentConfig(
974
+ system_instruction=planner_prompt,
975
+ tools=tools_list
976
+ )
977
+
978
+ chat = planner.chats.create(model=planner_model, config=config)
979
+
980
+
981
+ def agent_worker(user_question):
982
+ state.clear()
983
+ state.add_log(f'🚀 Starting analysis for: **{user_question}**')
984
+ state.add_analysis("🧠 Planner initialized. Awaiting tool calls...")
985
+
986
+ # 1. Initialize the Stateful Chat
987
+ chat = planner.chats.create(model=planner_model, config=config)
988
+ response = chat.send_message(user_question)
989
+
990
+ # 2. Track images throughout the conversation
991
+
992
+ # 3. Standard Tool Loop (Phases 1-3)
993
+ while response.candidates[0].content.parts[0].function_call:
994
+ tool_responses = []
995
+
996
+ for part in response.candidates[0].content.parts:
997
+ if part.function_call:
998
+ name = part.function_call.name
999
+ args = part.function_call.args
1000
+ state.add_log(f'🛠️ Tool Call: **{name}**')
1001
+
1002
+ func = globals()[name]
1003
+ result = func(**args)
1004
+
1005
+ tool_responses.append(
1006
+ types.Part.from_function_response(name=name, response={"result": result})
1007
+ )
1008
+
1009
+ # Send tool results back to the stateful chat
1010
+ response = chat.send_message(tool_responses)
1011
+
1012
+ # -----------------------------------------------------------------
1013
+ # PHASE 4: THE POST-CHAT HANDOFF (The "Visual Audit")
1014
+ # -----------------------------------------------------------------
1015
+
1016
+ # At this point, the while loop has ended.
1017
+ # 'response.text' contains the model's preliminary answer.
1018
+
1019
+ audit_images = state.get_staged_images()
1020
+
1021
+ if audit_images:
1022
+ state.add_log(f"👁️ Preliminary answer received. Performing audit with {len(audit_images)} images...")
1023
+
1024
+ # 1. Construct the audit parts
1025
+ # Ensure 'text=' is used for the Part constructor
1026
+ audit_parts = [
1027
+ types.Part.from_text(
1028
+ text="You have provided a preliminary verdict. Now, look at these images "
1029
+ "to verify your findings. If the visual evidence contradicts your "
1030
+ "text-based search, update your verdict now. "
1031
+ ),
1032
+ *audit_images
1033
+ ]
1034
+
1035
+ try:
1036
+ # 2. Send directly through the 'chat' session
1037
+ # This automatically appends to history and maintains the session state
1038
+ final_response = chat.send_message(audit_parts)
1039
+
1040
+ state.final_answer = final_response.text
1041
+
1042
+ except Exception as e:
1043
+ # If the above fails, try the explicit message keyword
1044
+ state.add_log("🔄 Retrying audit with explicit message keyword...")
1045
+ final_response = chat.send_message(message=audit_parts)
1046
+ state.final_answer = final_response.text
1047
+
1048
+ else:
1049
+ state.add_log("⚠️ No images found in state. Skipping visual audit.")
1050
+ state.final_answer = response.text
1051
+
1052
+ state.add_log('🏁 **ANALYSIS COMPLETE**')
1053
+ state.done = True
1054
+
1055
+
1056
+ def run_agentic_workflow(user_question):
1057
+ state.done = False
1058
+ state.final_answer = ""
1059
+
1060
+ thread = threading.Thread(
1061
+ target=agent_worker,
1062
+ args=(user_question,),
1063
+ daemon=True
1064
+ )
1065
+ thread.start()
1066
+
1067
+ while not state.done:
1068
+ with state.lock:
1069
+ logs = "\n\n".join(state.log_messages)
1070
+ analysis = "\n\n".join(state.analysis_messages)
1071
+ chapter = state.current_chapter
1072
+ images = list(state.current_images)
1073
+
1074
+ yield (
1075
+ logs,
1076
+ analysis,
1077
+ chapter,
1078
+ images,
1079
+ "*Analysis in progress...*"
1080
+ )
1081
+ time.sleep(0.25)
1082
+
1083
+ with state.lock:
1084
+ logs = "\n\n".join(state.log_messages)
1085
+ analysis = "\n\n".join(state.analysis_messages)
1086
+ chapter = state.current_chapter
1087
+ images = list(state.current_images)
1088
+ final = state.final_answer
1089
+
1090
+ yield (
1091
+ logs,
1092
+ analysis,
1093
+ chapter,
1094
+ images,
1095
+ final
1096
+ )
1097
+
1098
+
1099
+ # Build Gradio Interface
1100
+ with gr.Blocks(title="AEC Compliance Agent") as demo:
1101
+ gr.Markdown("# 🏗️ AEC Compliance Analysis Agent")
1102
+ gr.Markdown("Ask questions about NYC Building Code compliance for your construction drawings.")
1103
+
1104
+ with gr.Row():
1105
+ with gr.Column(scale=1):
1106
+ question_input = gr.Textbox(
1107
+ label="Your Question",
1108
+ placeholder="e.g., Does this building comply with egress requirements for 738 occupants?",
1109
+ lines=3
1110
+ )
1111
+ submit_btn = gr.Button("🔍 Analyze", variant="primary", size="lg")
1112
+
1113
+ gr.Markdown("### 📋 Analysis Log")
1114
+ log_output = gr.Markdown(value="", height=400)
1115
+
1116
+ with gr.Column(scale=1):
1117
+ gr.Markdown("### 🧠 Sub-Agent Analysis")
1118
+ analysis_output = gr.Markdown(value="", height=600)
1119
+
1120
+ with gr.Column(scale=1):
1121
+ gr.Markdown("### 📖 Code Chapter")
1122
+ chapter_output = gr.Markdown(value="*No chapter loaded yet*", height=600)
1123
+
1124
+ with gr.Row():
1125
+ gr.Markdown("### 🖼️ Retrieved Images")
1126
+
1127
+ with gr.Row():
1128
+ image_gallery = gr.Gallery(
1129
+ label="Visual Evidence",
1130
+ show_label=True,
1131
+ columns=2,
1132
+ height=400,
1133
+ object_fit="contain"
1134
+ )
1135
+
1136
+ with gr.Row():
1137
+ gr.Markdown("### ✅ Final Compliance Verdict")
1138
+
1139
+ with gr.Row():
1140
+ final_output = gr.Markdown(value="*Analysis pending...*")
1141
+
1142
+ submit_btn.click(
1143
+ fn=run_agentic_workflow,
1144
+ inputs=[question_input],
1145
+ outputs=[
1146
+ log_output,
1147
+ analysis_output, # NEW SLOT
1148
+ chapter_output,
1149
+ image_gallery,
1150
+ final_output
1151
+ ]
1152
+ )
1153
+
1154
+ if __name__ == "__main__":
1155
+ demo.queue().launch(
1156
+ inbrowser=True,
1157
+ auth=("username", os.getenv("PASSWORD")) # only share the password
1158
  )