Upload app.py
Browse files
app.py
CHANGED
|
@@ -931,29 +931,23 @@ def agent_worker(user_question):
|
|
| 931 |
func = globals()[name]
|
| 932 |
result = func(**args)
|
| 933 |
|
| 934 |
-
#
|
| 935 |
if name == "execute_page_expert":
|
| 936 |
tile_idxs = result.get("visual_pointers", [])
|
| 937 |
page_num = args.get("page_num")
|
| 938 |
|
| 939 |
if tile_idxs:
|
| 940 |
state.add_log(f'📸 Staging images for tiles: {tile_idxs}')
|
| 941 |
-
|
| 942 |
-
# A) Prepare the images
|
| 943 |
stitched_bytes = merge_tiles(tile_idxs, page_num)
|
| 944 |
|
|
|
|
| 945 |
pending_images.append(types.Part.from_bytes(stitched_bytes, mime_type="image/png"))
|
| 946 |
pending_images.append(types.Part.from_bytes(image_bytes_list[page_num], mime_type="image/png"))
|
|
|
|
|
|
|
|
|
|
| 947 |
|
| 948 |
-
|
| 949 |
-
# This tells the LLM: "Don't answer yet!"
|
| 950 |
-
result = {
|
| 951 |
-
"status": "SUCCESS",
|
| 952 |
-
"visual_proof_status": "VISUAL_PROOF_PENDING",
|
| 953 |
-
"instruction": "Images have been generated. STOP. Reply 'Awaiting visual proof' and wait for the next user message containing the images."
|
| 954 |
-
}
|
| 955 |
-
|
| 956 |
-
# Append the standard function response
|
| 957 |
tool_responses.append(
|
| 958 |
types.Part.from_function_response(
|
| 959 |
name=name,
|
|
@@ -961,31 +955,48 @@ def agent_worker(user_question):
|
|
| 961 |
)
|
| 962 |
)
|
| 963 |
|
| 964 |
-
|
| 965 |
-
# The model will read "VISUAL_PROOF_PENDING" and should reply "Awaiting visual proof"
|
| 966 |
-
state.add_analysis("🧠 Sending tool results (expecting pause)...")
|
| 967 |
-
response = chat.send_message(tool_responses)
|
| 968 |
|
| 969 |
-
#
|
|
|
|
|
|
|
|
|
|
| 970 |
if pending_images:
|
| 971 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 972 |
|
| 973 |
-
# Create the payload with images + Context Wrapper
|
| 974 |
image_message = [
|
| 975 |
types.Part.from_text(
|
| 976 |
-
"Here is the VISUAL PROOF generated by the
|
| 977 |
-
"
|
| 978 |
)
|
| 979 |
] + pending_images
|
| 980 |
|
| 981 |
-
# Send the images as a NEW User Turn
|
| 982 |
-
# This triggers the ACTUAL analysis and final verdict
|
| 983 |
response = chat.send_message(image_message)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 984 |
|
| 985 |
-
# 5. Final Output
|
| 986 |
state.add_log('🏁 **ANALYSIS COMPLETE**')
|
| 987 |
state.final_answer = response.text
|
| 988 |
state.done = True
|
|
|
|
| 989 |
|
| 990 |
def run_agentic_workflow(user_question):
|
| 991 |
state.done = False
|
|
|
|
| 931 |
func = globals()[name]
|
| 932 |
result = func(**args)
|
| 933 |
|
| 934 |
+
# 1. Handle Visuals
|
| 935 |
if name == "execute_page_expert":
|
| 936 |
tile_idxs = result.get("visual_pointers", [])
|
| 937 |
page_num = args.get("page_num")
|
| 938 |
|
| 939 |
if tile_idxs:
|
| 940 |
state.add_log(f'📸 Staging images for tiles: {tile_idxs}')
|
|
|
|
|
|
|
| 941 |
stitched_bytes = merge_tiles(tile_idxs, page_num)
|
| 942 |
|
| 943 |
+
# Add to pending images
|
| 944 |
pending_images.append(types.Part.from_bytes(stitched_bytes, mime_type="image/png"))
|
| 945 |
pending_images.append(types.Part.from_bytes(image_bytes_list[page_num], mime_type="image/png"))
|
| 946 |
+
|
| 947 |
+
# Update text result to reference the incoming images
|
| 948 |
+
result["note"] = "Visual evidence generated. See next message for images."
|
| 949 |
|
| 950 |
+
# 2. Collect the Tool Response Part
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 951 |
tool_responses.append(
|
| 952 |
types.Part.from_function_response(
|
| 953 |
name=name,
|
|
|
|
| 955 |
)
|
| 956 |
)
|
| 957 |
|
| 958 |
+
state.add_analysis("🧠 Injecting tool outputs and sending images...")
|
|
|
|
|
|
|
|
|
|
| 959 |
|
| 960 |
+
# =========================================================================
|
| 961 |
+
# THE GPT-STYLE FIX: Manual History Injection
|
| 962 |
+
# =========================================================================
|
| 963 |
+
|
| 964 |
if pending_images:
|
| 965 |
+
# Step A: Manually append the Tool Responses to history.
|
| 966 |
+
# We create a Content object (or dict) with role='function'.
|
| 967 |
+
# This "closes" the function loop in the history without triggering the model yet.
|
| 968 |
+
|
| 969 |
+
# Note: Depending on your specific SDK version, you might need
|
| 970 |
+
# from google.ai.generativelanguage_v1beta.types import Content
|
| 971 |
+
# But usually a dict works fine in the python SDK:
|
| 972 |
+
|
| 973 |
+
tool_content = {
|
| 974 |
+
"role": "function",
|
| 975 |
+
"parts": tool_responses
|
| 976 |
+
}
|
| 977 |
+
chat.history.append(tool_content)
|
| 978 |
+
|
| 979 |
+
# Step B: Send the images as the "User" follow-up.
|
| 980 |
+
# The model sees: [FunctionCall] -> [FunctionResponse] -> [User Images]
|
| 981 |
+
# It will now generate the Verdict based on both.
|
| 982 |
|
|
|
|
| 983 |
image_message = [
|
| 984 |
types.Part.from_text(
|
| 985 |
+
"Here is the VISUAL PROOF generated by the tool. "
|
| 986 |
+
"Analyze these images to confirm the compliance verdict."
|
| 987 |
)
|
| 988 |
] + pending_images
|
| 989 |
|
|
|
|
|
|
|
| 990 |
response = chat.send_message(image_message)
|
| 991 |
+
|
| 992 |
+
else:
|
| 993 |
+
# Standard path: If no images, just send the tool response normally
|
| 994 |
+
response = chat.send_message(tool_responses)
|
| 995 |
|
|
|
|
| 996 |
state.add_log('🏁 **ANALYSIS COMPLETE**')
|
| 997 |
state.final_answer = response.text
|
| 998 |
state.done = True
|
| 999 |
+
|
| 1000 |
|
| 1001 |
def run_agentic_workflow(user_question):
|
| 1002 |
state.done = False
|