Ryan2219 commited on
Commit
2ef8f78
·
verified ·
1 Parent(s): 9271105

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -24
app.py CHANGED
@@ -931,29 +931,23 @@ def agent_worker(user_question):
931
  func = globals()[name]
932
  result = func(**args)
933
 
934
- # --- SPECIAL HANDLING FOR VISUAL EXPERT ---
935
  if name == "execute_page_expert":
936
  tile_idxs = result.get("visual_pointers", [])
937
  page_num = args.get("page_num")
938
 
939
  if tile_idxs:
940
  state.add_log(f'📸 Staging images for tiles: {tile_idxs}')
941
-
942
- # A) Prepare the images
943
  stitched_bytes = merge_tiles(tile_idxs, page_num)
944
 
 
945
  pending_images.append(types.Part.from_bytes(stitched_bytes, mime_type="image/png"))
946
  pending_images.append(types.Part.from_bytes(image_bytes_list[page_num], mime_type="image/png"))
 
 
 
947
 
948
- # B) OVERWRITE the text result to force the "Pause"
949
- # This tells the LLM: "Don't answer yet!"
950
- result = {
951
- "status": "SUCCESS",
952
- "visual_proof_status": "VISUAL_PROOF_PENDING",
953
- "instruction": "Images have been generated. STOP. Reply 'Awaiting visual proof' and wait for the next user message containing the images."
954
- }
955
-
956
- # Append the standard function response
957
  tool_responses.append(
958
  types.Part.from_function_response(
959
  name=name,
@@ -961,31 +955,48 @@ def agent_worker(user_question):
961
  )
962
  )
963
 
964
- # 3. Send the Tool Output (Closes the Function Turn)
965
- # The model will read "VISUAL_PROOF_PENDING" and should reply "Awaiting visual proof"
966
- state.add_analysis("🧠 Sending tool results (expecting pause)...")
967
- response = chat.send_message(tool_responses)
968
 
969
- # 4. Check if we have pending images to inject
 
 
 
970
  if pending_images:
971
- state.add_log(f'📸 Uploading {len(pending_images)} images to Planner context...')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
972
 
973
- # Create the payload with images + Context Wrapper
974
  image_message = [
975
  types.Part.from_text(
976
- "Here is the VISUAL PROOF generated by the execute_page_expert tool.\n"
977
- "Please analyze these images to confirm the compliance verdict."
978
  )
979
  ] + pending_images
980
 
981
- # Send the images as a NEW User Turn
982
- # This triggers the ACTUAL analysis and final verdict
983
  response = chat.send_message(image_message)
 
 
 
 
984
 
985
- # 5. Final Output
986
  state.add_log('🏁 **ANALYSIS COMPLETE**')
987
  state.final_answer = response.text
988
  state.done = True
 
989
 
990
  def run_agentic_workflow(user_question):
991
  state.done = False
 
931
  func = globals()[name]
932
  result = func(**args)
933
 
934
+ # 1. Handle Visuals
935
  if name == "execute_page_expert":
936
  tile_idxs = result.get("visual_pointers", [])
937
  page_num = args.get("page_num")
938
 
939
  if tile_idxs:
940
  state.add_log(f'📸 Staging images for tiles: {tile_idxs}')
 
 
941
  stitched_bytes = merge_tiles(tile_idxs, page_num)
942
 
943
+ # Add to pending images
944
  pending_images.append(types.Part.from_bytes(stitched_bytes, mime_type="image/png"))
945
  pending_images.append(types.Part.from_bytes(image_bytes_list[page_num], mime_type="image/png"))
946
+
947
+ # Update text result to reference the incoming images
948
+ result["note"] = "Visual evidence generated. See next message for images."
949
 
950
+ # 2. Collect the Tool Response Part
 
 
 
 
 
 
 
 
951
  tool_responses.append(
952
  types.Part.from_function_response(
953
  name=name,
 
955
  )
956
  )
957
 
958
+ state.add_analysis("🧠 Injecting tool outputs and sending images...")
 
 
 
959
 
960
+ # =========================================================================
961
+ # THE GPT-STYLE FIX: Manual History Injection
962
+ # =========================================================================
963
+
964
  if pending_images:
965
+ # Step A: Manually append the Tool Responses to history.
966
+ # We create a Content object (or dict) with role='function'.
967
+ # This "closes" the function loop in the history without triggering the model yet.
968
+
969
+ # Note: Depending on your specific SDK version, you might need
970
+ # from google.ai.generativelanguage_v1beta.types import Content
971
+ # But usually a dict works fine in the python SDK:
972
+
973
+ tool_content = {
974
+ "role": "function",
975
+ "parts": tool_responses
976
+ }
977
+ chat.history.append(tool_content)
978
+
979
+ # Step B: Send the images as the "User" follow-up.
980
+ # The model sees: [FunctionCall] -> [FunctionResponse] -> [User Images]
981
+ # It will now generate the Verdict based on both.
982
 
 
983
  image_message = [
984
  types.Part.from_text(
985
+ "Here is the VISUAL PROOF generated by the tool. "
986
+ "Analyze these images to confirm the compliance verdict."
987
  )
988
  ] + pending_images
989
 
 
 
990
  response = chat.send_message(image_message)
991
+
992
+ else:
993
+ # Standard path: If no images, just send the tool response normally
994
+ response = chat.send_message(tool_responses)
995
 
 
996
  state.add_log('🏁 **ANALYSIS COMPLETE**')
997
  state.final_answer = response.text
998
  state.done = True
999
+
1000
 
1001
  def run_agentic_workflow(user_question):
1002
  state.done = False