Ryan2219 commited on
Commit
b2d54a4
·
verified ·
1 Parent(s): 2ef8f78

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -55
app.py CHANGED
@@ -886,13 +886,6 @@ planner_prompt = f"""
886
  - NEVER issue a final verdict without calling `execute_page_expert`
887
  - If no page contains sufficient proof, return **Unverified**
888
  - Prefer false negatives over false positives
889
- *** CRITICAL VISUAL PROTOCOL ***
890
- - When `execute_page_expert` returns, it will explicitly state "VISUAL_PROOF_PENDING".
891
- - When you see this, your ONLY response must be: "Awaiting visual proof."
892
- - DO NOT attempt to guess the verdict.
893
- - DO NOT complain about missing images.
894
- - Simply wait. The user will immediately send the images in the next turn.
895
-
896
 
897
  ========================
898
  QUALITY STANDARD
@@ -912,91 +905,83 @@ chat = planner.chats.create(model=planner_model, config=config)
912
  def agent_worker(user_question):
913
  state.clear()
914
  state.add_log(f'🚀 Starting analysis for: **{user_question}**')
 
915
 
916
- # 1. Start the conversation
917
  response = chat.send_message(user_question)
918
 
919
- # 2. Loop through tool calls
920
  while response.candidates[0].content.parts[0].function_call:
921
  tool_responses = []
922
- pending_images = [] # Reset every turn
923
 
924
  for part in response.candidates[0].content.parts:
925
  if part.function_call:
926
  name = part.function_call.name
927
  args = part.function_call.args
 
928
  state.add_log(f'🛠️ Planner calling: **{name}**')
 
 
 
 
929
 
930
- # Execute tool
931
  func = globals()[name]
932
  result = func(**args)
933
 
934
- # 1. Handle Visuals
 
 
 
 
 
 
935
  if name == "execute_page_expert":
 
936
  tile_idxs = result.get("visual_pointers", [])
937
  page_num = args.get("page_num")
938
 
939
  if tile_idxs:
940
- state.add_log(f'📸 Staging images for tiles: {tile_idxs}')
941
- stitched_bytes = merge_tiles(tile_idxs, page_num)
 
 
 
 
 
 
 
942
 
943
- # Add to pending images
944
- pending_images.append(types.Part.from_bytes(stitched_bytes, mime_type="image/png"))
945
- pending_images.append(types.Part.from_bytes(image_bytes_list[page_num], mime_type="image/png"))
 
 
 
 
946
 
947
- # Update text result to reference the incoming images
948
- result["note"] = "Visual evidence generated. See next message for images."
949
 
950
- # 2. Collect the Tool Response Part
951
  tool_responses.append(
952
  types.Part.from_function_response(
953
  name=name,
954
  response={"result": result}
955
  )
956
  )
 
957
 
958
- state.add_analysis("🧠 Injecting tool outputs and sending images...")
959
-
960
- # =========================================================================
961
- # THE GPT-STYLE FIX: Manual History Injection
962
- # =========================================================================
963
-
964
  if pending_images:
965
- # Step A: Manually append the Tool Responses to history.
966
- # We create a Content object (or dict) with role='function'.
967
- # This "closes" the function loop in the history without triggering the model yet.
968
-
969
- # Note: Depending on your specific SDK version, you might need
970
- # from google.ai.generativelanguage_v1beta.types import Content
971
- # But usually a dict works fine in the python SDK:
972
-
973
- tool_content = {
974
- "role": "function",
975
- "parts": tool_responses
976
- }
977
- chat.history.append(tool_content)
978
-
979
- # Step B: Send the images as the "User" follow-up.
980
- # The model sees: [FunctionCall] -> [FunctionResponse] -> [User Images]
981
- # It will now generate the Verdict based on both.
982
-
983
- image_message = [
984
- types.Part.from_text(
985
- "Here is the VISUAL PROOF generated by the tool. "
986
- "Analyze these images to confirm the compliance verdict."
987
- )
988
- ] + pending_images
989
 
990
- response = chat.send_message(image_message)
 
991
 
992
- else:
993
- # Standard path: If no images, just send the tool response normally
994
- response = chat.send_message(tool_responses)
995
 
996
  state.add_log('🏁 **ANALYSIS COMPLETE**')
 
997
  state.final_answer = response.text
998
  state.done = True
999
-
1000
 
1001
  def run_agentic_workflow(user_question):
1002
  state.done = False
 
886
  - NEVER issue a final verdict without calling `execute_page_expert`
887
  - If no page contains sufficient proof, return **Unverified**
888
  - Prefer false negatives over false positives
 
 
 
 
 
 
 
889
 
890
  ========================
891
  QUALITY STANDARD
 
905
  def agent_worker(user_question):
906
  state.clear()
907
  state.add_log(f'🚀 Starting analysis for: **{user_question}**')
908
+ state.add_analysis("🧠 Planner initialized. Awaiting tool calls...")
909
 
 
910
  response = chat.send_message(user_question)
911
 
 
912
  while response.candidates[0].content.parts[0].function_call:
913
  tool_responses = []
914
+ pending_images = []
915
 
916
  for part in response.candidates[0].content.parts:
917
  if part.function_call:
918
  name = part.function_call.name
919
  args = part.function_call.args
920
+
921
  state.add_log(f'🛠️ Planner calling: **{name}**')
922
+ state.add_analysis(
923
+ f"### 🛠️ Tool Call: `{name}`\n"
924
+ f"```json\n{json.dumps(args, indent=2)}\n```"
925
+ )
926
 
 
927
  func = globals()[name]
928
  result = func(**args)
929
 
930
+ # -----------------------------
931
+ # STREAM REAL TOOL OUTPUTS
932
+ # -----------------------------
933
+
934
+ # search_page_text
935
+ # execute_page_expert
936
+
937
  if name == "execute_page_expert":
938
+
939
  tile_idxs = result.get("visual_pointers", [])
940
  page_num = args.get("page_num")
941
 
942
  if tile_idxs:
943
+ state.add_log(f'📸 Stitching high-res proof for tiles: **{tile_idxs}**')
944
+ state.add_analysis(
945
+ f"📸 Visual proof requested for tiles `{tile_idxs}` on page `{page_num}`"
946
+ )
947
+
948
+ stitched_bytes = merge_tiles(
949
+ tile_indexes=tile_idxs,
950
+ page_num=page_num
951
+ )
952
 
953
+ pending_images.append(
954
+ types.Part.from_bytes(stitched_bytes, mime_type="image/png")
955
+ )
956
+ pending_images.append(types.Part.from_bytes(
957
+ image_bytes_list[page_num],
958
+ mime_type="image/png"
959
+ ))
960
 
 
 
961
 
 
962
  tool_responses.append(
963
  types.Part.from_function_response(
964
  name=name,
965
  response={"result": result}
966
  )
967
  )
968
+
969
 
970
+ state.add_analysis("🧠 Returning tool outputs to planner...")
971
+ response = chat.send_message(tool_responses)
 
 
 
 
972
  if pending_images:
973
+ state.add_log(f'📸 Sending {len(pending_images)} images to Planner...')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
974
 
975
+ # We send the images with a prompt telling the model what they are
976
+ pending_images.insert(0, "Here is the visual proof generated by the tool. Please use this to confirm your final answer:")
977
 
978
+ # This generates the ACTUAL final answer that sees the image
979
+ response = chat.send_message(pending_images)
 
980
 
981
  state.add_log('🏁 **ANALYSIS COMPLETE**')
982
+ state.add_analysis("✅ Planner finished. Final verdict generated.")
983
  state.final_answer = response.text
984
  state.done = True
 
985
 
986
  def run_agentic_workflow(user_question):
987
  state.done = False