Spaces:

NYSERDA-CRE-Working-Group
/

Code_compliance_Check

Sleeping

App Files Files Community

Ryan2219 commited on Jan 23

Commit

b5bb6b7

verified ·

1 Parent(s): b2d54a4

Upload app.py

Browse files

Files changed (1) hide show

app.py +135 -67

app.py CHANGED Viewed

@@ -25,6 +25,7 @@ class InterfaceState:
         self.analysis_messages = []
         self.current_chapter = ""
         self.current_images = []
         self.final_answer = ""
         self.done = False
         self.lock = threading.Lock()
@@ -50,6 +51,18 @@ class InterfaceState:
         with self.lock:
             self.current_images.append(img_pil)
             return self.current_images.copy()
     def clear(self):
         with self.lock:
@@ -59,6 +72,8 @@ class InterfaceState:
             self.current_images.clear()
             self.final_answer = ""
             self.done = False
 state = InterfaceState()
@@ -127,6 +142,8 @@ chroma_client = chromadb.PersistentClient(path="nyc_code_db")
 embedding_model = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
 collection = chroma_client.get_collection(name="nyc_building_codes", embedding_function=embedding_model)
 # Modified tool functions with Gradio updates
 def search_page_text(page_number: int, research_goal: str):
     state.add_log(f'🔍 Searching page **{page_metadata[page_number]["sheet_title"]}** for details')
@@ -539,6 +556,33 @@ def extract_json(s: str):
     json_str = s[start:end+1]
     return json.loads(json_str)
 def execute_page_expert(expert_instructions: str, page_num: int):
     state.add_log(f'👁️ Spawning Page Expert for page **{page_num}**')
     state.add_analysis(f"👁️ Page Expert searching for {expert_instructions}")
@@ -686,7 +730,7 @@ def execute_page_expert(expert_instructions: str, page_num: int):
         }
     ]
-    MAX_TURNS = 10
     for turn in range(MAX_TURNS):
         response = client.chat.completions.create(
@@ -702,23 +746,46 @@ def execute_page_expert(expert_instructions: str, page_num: int):
         if msg.content:
             try:
                 res = extract_json(msg.content)
                 state.add_analysis(
                     f"🟨 Page Analyst\n{res.get('findings','')}"
                 )
-                tile_idxs = res.get("visual_pointers", [])
-                stitched_bytes = merge_tiles(
-                    tile_indexes=tile_idxs,
-                    page_num=page_num
                 )
-                stitched_img = Image.open(
-                    io.BytesIO(stitched_bytes)
-                )
-                state.add_image(stitched_img)
-                return extract_json(msg.content)
             except:
                 pass
@@ -783,7 +850,7 @@ def execute_page_expert(expert_instructions: str, page_num: int):
 tools_list = [search_page_text, nyc_legal_sub_agent, execute_page_expert]
 import time
 planner = genai.Client()
-planner_model = "gemini-3-pro-preview"
 planner_prompt = f"""
     You are the Lead Architectural Compliance Planner for NYC Building Code and Zoning review.
@@ -886,6 +953,13 @@ planner_prompt = f"""
     - NEVER issue a final verdict without calling `execute_page_expert`
     - If no page contains sufficient proof, return **Unverified**
     - Prefer false negatives over false positives
     ========================
     QUALITY STANDARD
@@ -907,81 +981,75 @@ def agent_worker(user_question):
     state.add_log(f'🚀 Starting analysis for: **{user_question}**')
     state.add_analysis("🧠 Planner initialized. Awaiting tool calls...")
     response = chat.send_message(user_question)
     while response.candidates[0].content.parts[0].function_call:
         tool_responses = []
-        pending_images = []
         for part in response.candidates[0].content.parts:
             if part.function_call:
                 name = part.function_call.name
                 args = part.function_call.args
-                state.add_log(f'🛠️ Planner calling: **{name}**')
-                state.add_analysis(
-                    f"### 🛠️ Tool Call: `{name}`\n"
-                    f"```json\n{json.dumps(args, indent=2)}\n```"
-                )
                 func = globals()[name]
                 result = func(**args)
-                # -----------------------------
-                # STREAM REAL TOOL OUTPUTS
-                # -----------------------------
-                # search_page_text
-                # execute_page_expert
-                if name == "execute_page_expert":
-                    tile_idxs = result.get("visual_pointers", [])
-                    page_num = args.get("page_num")
-                    if tile_idxs:
-                        state.add_log(f'📸 Stitching high-res proof for tiles: **{tile_idxs}**')
-                        state.add_analysis(
-                            f"📸 Visual proof requested for tiles `{tile_idxs}` on page `{page_num}`"
-                        )
-                        stitched_bytes = merge_tiles(
-                            tile_indexes=tile_idxs,
-                            page_num=page_num
-                        )
-                        pending_images.append(
-                            types.Part.from_bytes(stitched_bytes, mime_type="image/png")
-                            )
-                        pending_images.append(types.Part.from_bytes(
-                                image_bytes_list[page_num],
-                                mime_type="image/png"
-                            ))
                 tool_responses.append(
-                    types.Part.from_function_response(
-                        name=name,
-                        response={"result": result}
-                    )
                 )
-        state.add_analysis("🧠 Returning tool outputs to planner...")
         response = chat.send_message(tool_responses)
-        if pending_images:
-            state.add_log(f'📸 Sending {len(pending_images)} images to Planner...')
-            # We send the images with a prompt telling the model what they are
-            pending_images.insert(0, "Here is the visual proof generated by the tool. Please use this to confirm your final answer:")
-            # This generates the ACTUAL final answer that sees the image
-            response = chat.send_message(pending_images)
     state.add_log('🏁 **ANALYSIS COMPLETE**')
-    state.add_analysis("✅ Planner finished. Final verdict generated.")
-    state.final_answer = response.text
     state.done = True
 def run_agentic_workflow(user_question):
     state.done = False

         self.analysis_messages = []
         self.current_chapter = ""
         self.current_images = []
+        self.staged_audit_images = []
         self.final_answer = ""
         self.done = False
         self.lock = threading.Lock()
         with self.lock:
             self.current_images.append(img_pil)
             return self.current_images.copy()
+    def add_staged_image_part(self, image_part):
+        """Thread-safe method to stage images for the Gemini Audit."""
+        with self.lock:
+            self.staged_audit_images.append(image_part)
+            # Log it so we can verify it happened in the console
+            print(f"DEBUG: Staged image part. Total staged: {len(self.staged_audit_images)}")
+    def get_staged_images(self):
+        """Safely retrieve the staged images for the audit turn."""
+        with self.lock:
+            return list(self.staged_audit_images) # Return a copy to prevent mutation
     def clear(self):
         with self.lock:
             self.current_images.clear()
             self.final_answer = ""
             self.done = False
 state = InterfaceState()
 embedding_model = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
 collection = chroma_client.get_collection(name="nyc_building_codes", embedding_function=embedding_model)
+all_pending_images = []
 # Modified tool functions with Gradio updates
 def search_page_text(page_number: int, research_goal: str):
     state.add_log(f'🔍 Searching page **{page_metadata[page_number]["sheet_title"]}** for details')
     json_str = s[start:end+1]
     return json.loads(json_str)
+def sanitize_tile_indices(data):
+    """
+    Forcefully converts various LLM outputs into a clean list of integers.
+    Handles: [1, 2], ["1", "2"], "1, 2, 3", "[1, 2, 3]", and None.
+    """
+    if not data:
+        return []
+    # If it's already a list, ensure all elements are integers
+    if isinstance(data, list):
+        clean_list = []
+        for item in data:
+            try:
+                # This handles strings inside the list like ["1", "2"]
+                clean_list.append(int(str(item).strip()))
+            except (ValueError, TypeError):
+                continue
+        return clean_list
+    # If it's a string, use Regex to find all sequences of digits
+    if isinstance(data, str):
+        # findall returns all non-overlapping matches of the pattern
+        numbers = re.findall(r'\d+', data)
+        return [int(n) for n in numbers]
+    return []
 def execute_page_expert(expert_instructions: str, page_num: int):
     state.add_log(f'👁️ Spawning Page Expert for page **{page_num}**')
     state.add_analysis(f"👁️ Page Expert searching for {expert_instructions}")
         }
     ]
+    MAX_TURNS = 3
     for turn in range(MAX_TURNS):
         response = client.chat.completions.create(
         if msg.content:
             try:
                 res = extract_json(msg.content)
                 state.add_analysis(
                     f"🟨 Page Analyst\n{res.get('findings','')}"
                 )
+                raw_pointers = res.get("visual_pointers", [])
+                tile_idxs = sanitize_tile_indices(raw_pointers)
+                if tile_idxs and tile_idxs != '[]':
+                    stitched_bytes = merge_tiles(
+                        tile_indexes=tile_idxs,
+                        page_num=page_num
+                    )
+                    state.add_log(f'📸 Staging {len(tile_idxs)} tiles for final audit...')
+                    # Store these to use AFTER the chat finishes
+                    state.add_staged_image_part(
+                        types.Part.from_bytes(
+                            data=stitched_bytes,  # <-- 'data=' is required here
+                            mime_type="image/png"
+                        )
+                    )
+                    stitched_img = Image.open(
+                        io.BytesIO(stitched_bytes)
+                    )
+                    state.add_image(stitched_img)
+                state.add_staged_image_part(
+                    types.Part.from_bytes(
+                        data=image_bytes_list[page_num],  # <-- 'data=' is required here
+                        mime_type="image/png"
+                    )
                 )
+                return res
             except:
                 pass
 tools_list = [search_page_text, nyc_legal_sub_agent, execute_page_expert]
 import time
 planner = genai.Client()
+planner_model = "gemini-3-flash-preview"
 planner_prompt = f"""
     You are the Lead Architectural Compliance Planner for NYC Building Code and Zoning review.
     - NEVER issue a final verdict without calling `execute_page_expert`
     - If no page contains sufficient proof, return **Unverified**
     - Prefer false negatives over false positives
+    *** CRITICAL VISUAL PROTOCOL ***
+    - When `execute_page_expert` returns, it will explicitly state "VISUAL_PROOF_PENDING".
+    - When you see this, your ONLY response must be: "Awaiting visual proof."
+    - DO NOT attempt to guess the verdict.
+    - DO NOT complain about missing images.
+    - Simply wait. The user will immediately send the images in the next turn.
     ========================
     QUALITY STANDARD
     state.add_log(f'🚀 Starting analysis for: **{user_question}**')
     state.add_analysis("🧠 Planner initialized. Awaiting tool calls...")
+    # 1. Initialize the Stateful Chat
+    chat = planner.chats.create(model=planner_model, config=config)
     response = chat.send_message(user_question)
+    # 2. Track images throughout the conversation
+    # 3. Standard Tool Loop (Phases 1-3)
     while response.candidates[0].content.parts[0].function_call:
         tool_responses = []
         for part in response.candidates[0].content.parts:
             if part.function_call:
                 name = part.function_call.name
                 args = part.function_call.args
+                state.add_log(f'🛠️ Tool Call: **{name}**')
                 func = globals()[name]
                 result = func(**args)
                 tool_responses.append(
+                    types.Part.from_function_response(name=name, response={"result": result})
                 )
+        # Send tool results back to the stateful chat
         response = chat.send_message(tool_responses)
+    # -----------------------------------------------------------------
+    # PHASE 4: THE POST-CHAT HANDOFF (The "Visual Audit")
+    # -----------------------------------------------------------------
+    # At this point, the while loop has ended.
+    # 'response.text' contains the model's preliminary answer.
+    audit_images = state.get_staged_images()
+    if audit_images:
+        state.add_log(f"👁️ Preliminary answer received. Performing audit with {len(audit_images)} images...")
+        # 1. Construct the audit parts
+        # Ensure 'text=' is used for the Part constructor
+        audit_parts = [
+            types.Part.from_text(
+                text="You have provided a preliminary verdict. Now, look at these images "
+                     "to verify your findings. If the visual evidence contradicts your "
+                     "text-based search, update your verdict now. "
+                ),
+            *audit_images
+        ]
+        try:
+            # 2. Send directly through the 'chat' session
+            # This automatically appends to history and maintains the session state
+            final_response = chat.send_message(audit_parts)
+            state.final_answer = final_response.text
+        except Exception as e:
+            # If the above fails, try the explicit message keyword
+            state.add_log("🔄 Retrying audit with explicit message keyword...")
+            final_response = chat.send_message(message=audit_parts)
+            state.final_answer = final_response.text
+    else:
+        state.add_log("⚠️ No images found in state. Skipping visual audit.")
+        state.final_answer = response.text
     state.add_log('🏁 **ANALYSIS COMPLETE**')
     state.done = True
 def run_agentic_workflow(user_question):
     state.done = False