Spaces:

NYSERDA-CRE-Working-Group
/

Code_compliance_Check

Sleeping

App Files Files Community

Ryan2219 commited on Jan 22

Commit

2ef8f78

verified ·

1 Parent(s): 9271105

Upload app.py

Browse files

Files changed (1) hide show

app.py +35 -24

app.py CHANGED Viewed

@@ -931,29 +931,23 @@ def agent_worker(user_question):
                 func = globals()[name]
                 result = func(**args)
-                # --- SPECIAL HANDLING FOR VISUAL EXPERT ---
                 if name == "execute_page_expert":
                     tile_idxs = result.get("visual_pointers", [])
                     page_num = args.get("page_num")
                     if tile_idxs:
                         state.add_log(f'📸 Staging images for tiles: {tile_idxs}')
-                        # A) Prepare the images
                         stitched_bytes = merge_tiles(tile_idxs, page_num)
                         pending_images.append(types.Part.from_bytes(stitched_bytes, mime_type="image/png"))
                         pending_images.append(types.Part.from_bytes(image_bytes_list[page_num], mime_type="image/png"))
-                        # B) OVERWRITE the text result to force the "Pause"
-                        # This tells the LLM: "Don't answer yet!"
-                        result = {
-                            "status": "SUCCESS",
-                            "visual_proof_status": "VISUAL_PROOF_PENDING",
-                            "instruction": "Images have been generated. STOP. Reply 'Awaiting visual proof' and wait for the next user message containing the images."
-                        }
-                # Append the standard function response
                 tool_responses.append(
                     types.Part.from_function_response(
                         name=name,
@@ -961,31 +955,48 @@ def agent_worker(user_question):
                     )
                 )
-        # 3. Send the Tool Output (Closes the Function Turn)
-        # The model will read "VISUAL_PROOF_PENDING" and should reply "Awaiting visual proof"
-        state.add_analysis("🧠 Sending tool results (expecting pause)...")
-        response = chat.send_message(tool_responses)
-        # 4. Check if we have pending images to inject
         if pending_images:
-            state.add_log(f'📸 Uploading {len(pending_images)} images to Planner context...')
-            # Create the payload with images + Context Wrapper
             image_message = [
                 types.Part.from_text(
-                    "Here is the VISUAL PROOF generated by the execute_page_expert tool.\n"
-                    "Please analyze these images to confirm the compliance verdict."
                 )
             ] + pending_images
-            # Send the images as a NEW User Turn
-            # This triggers the ACTUAL analysis and final verdict
             response = chat.send_message(image_message)
-    # 5. Final Output
     state.add_log('🏁 **ANALYSIS COMPLETE**')
     state.final_answer = response.text
     state.done = True
 def run_agentic_workflow(user_question):
     state.done = False

                 func = globals()[name]
                 result = func(**args)
+                # 1. Handle Visuals
                 if name == "execute_page_expert":
                     tile_idxs = result.get("visual_pointers", [])
                     page_num = args.get("page_num")
                     if tile_idxs:
                         state.add_log(f'📸 Staging images for tiles: {tile_idxs}')
                         stitched_bytes = merge_tiles(tile_idxs, page_num)
+                        # Add to pending images
                         pending_images.append(types.Part.from_bytes(stitched_bytes, mime_type="image/png"))
                         pending_images.append(types.Part.from_bytes(image_bytes_list[page_num], mime_type="image/png"))
+                        # Update text result to reference the incoming images
+                        result["note"] = "Visual evidence generated. See next message for images."
+                # 2. Collect the Tool Response Part
                 tool_responses.append(
                     types.Part.from_function_response(
                         name=name,
                     )
                 )
+        state.add_analysis("🧠 Injecting tool outputs and sending images...")
+        # =========================================================================
+        # THE GPT-STYLE FIX: Manual History Injection
+        # =========================================================================
         if pending_images:
+            # Step A: Manually append the Tool Responses to history.
+            # We create a Content object (or dict) with role='function'.
+            # This "closes" the function loop in the history without triggering the model yet.
+            # Note: Depending on your specific SDK version, you might need
+            # from google.ai.generativelanguage_v1beta.types import Content
+            # But usually a dict works fine in the python SDK:
+            tool_content = {
+                "role": "function",
+                "parts": tool_responses
+            }
+            chat.history.append(tool_content)
+            # Step B: Send the images as the "User" follow-up.
+            # The model sees: [FunctionCall] -> [FunctionResponse] -> [User Images]
+            # It will now generate the Verdict based on both.
             image_message = [
                 types.Part.from_text(
+                    "Here is the VISUAL PROOF generated by the tool. "
+                    "Analyze these images to confirm the compliance verdict."
                 )
             ] + pending_images
             response = chat.send_message(image_message)
+        else:
+            # Standard path: If no images, just send the tool response normally
+            response = chat.send_message(tool_responses)
     state.add_log('🏁 **ANALYSIS COMPLETE**')
     state.final_answer = response.text
     state.done = True
 def run_agentic_workflow(user_question):
     state.done = False