Upload app.py
Browse files
app.py
CHANGED
|
@@ -886,13 +886,6 @@ planner_prompt = f"""
|
|
| 886 |
- NEVER issue a final verdict without calling `execute_page_expert`
|
| 887 |
- If no page contains sufficient proof, return **Unverified**
|
| 888 |
- Prefer false negatives over false positives
|
| 889 |
-
*** CRITICAL VISUAL PROTOCOL ***
|
| 890 |
-
- When `execute_page_expert` returns, it will explicitly state "VISUAL_PROOF_PENDING".
|
| 891 |
-
- When you see this, your ONLY response must be: "Awaiting visual proof."
|
| 892 |
-
- DO NOT attempt to guess the verdict.
|
| 893 |
-
- DO NOT complain about missing images.
|
| 894 |
-
- Simply wait. The user will immediately send the images in the next turn.
|
| 895 |
-
|
| 896 |
|
| 897 |
========================
|
| 898 |
QUALITY STANDARD
|
|
@@ -912,91 +905,83 @@ chat = planner.chats.create(model=planner_model, config=config)
|
|
| 912 |
def agent_worker(user_question):
|
| 913 |
state.clear()
|
| 914 |
state.add_log(f'🚀 Starting analysis for: **{user_question}**')
|
|
|
|
| 915 |
|
| 916 |
-
# 1. Start the conversation
|
| 917 |
response = chat.send_message(user_question)
|
| 918 |
|
| 919 |
-
# 2. Loop through tool calls
|
| 920 |
while response.candidates[0].content.parts[0].function_call:
|
| 921 |
tool_responses = []
|
| 922 |
-
pending_images = []
|
| 923 |
|
| 924 |
for part in response.candidates[0].content.parts:
|
| 925 |
if part.function_call:
|
| 926 |
name = part.function_call.name
|
| 927 |
args = part.function_call.args
|
|
|
|
| 928 |
state.add_log(f'🛠️ Planner calling: **{name}**')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 929 |
|
| 930 |
-
# Execute tool
|
| 931 |
func = globals()[name]
|
| 932 |
result = func(**args)
|
| 933 |
|
| 934 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 935 |
if name == "execute_page_expert":
|
|
|
|
| 936 |
tile_idxs = result.get("visual_pointers", [])
|
| 937 |
page_num = args.get("page_num")
|
| 938 |
|
| 939 |
if tile_idxs:
|
| 940 |
-
state.add_log(f'📸
|
| 941 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 942 |
|
| 943 |
-
|
| 944 |
-
|
| 945 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 946 |
|
| 947 |
-
# Update text result to reference the incoming images
|
| 948 |
-
result["note"] = "Visual evidence generated. See next message for images."
|
| 949 |
|
| 950 |
-
# 2. Collect the Tool Response Part
|
| 951 |
tool_responses.append(
|
| 952 |
types.Part.from_function_response(
|
| 953 |
name=name,
|
| 954 |
response={"result": result}
|
| 955 |
)
|
| 956 |
)
|
|
|
|
| 957 |
|
| 958 |
-
state.add_analysis("🧠
|
| 959 |
-
|
| 960 |
-
# =========================================================================
|
| 961 |
-
# THE GPT-STYLE FIX: Manual History Injection
|
| 962 |
-
# =========================================================================
|
| 963 |
-
|
| 964 |
if pending_images:
|
| 965 |
-
|
| 966 |
-
# We create a Content object (or dict) with role='function'.
|
| 967 |
-
# This "closes" the function loop in the history without triggering the model yet.
|
| 968 |
-
|
| 969 |
-
# Note: Depending on your specific SDK version, you might need
|
| 970 |
-
# from google.ai.generativelanguage_v1beta.types import Content
|
| 971 |
-
# But usually a dict works fine in the python SDK:
|
| 972 |
-
|
| 973 |
-
tool_content = {
|
| 974 |
-
"role": "function",
|
| 975 |
-
"parts": tool_responses
|
| 976 |
-
}
|
| 977 |
-
chat.history.append(tool_content)
|
| 978 |
-
|
| 979 |
-
# Step B: Send the images as the "User" follow-up.
|
| 980 |
-
# The model sees: [FunctionCall] -> [FunctionResponse] -> [User Images]
|
| 981 |
-
# It will now generate the Verdict based on both.
|
| 982 |
-
|
| 983 |
-
image_message = [
|
| 984 |
-
types.Part.from_text(
|
| 985 |
-
"Here is the VISUAL PROOF generated by the tool. "
|
| 986 |
-
"Analyze these images to confirm the compliance verdict."
|
| 987 |
-
)
|
| 988 |
-
] + pending_images
|
| 989 |
|
| 990 |
-
|
|
|
|
| 991 |
|
| 992 |
-
|
| 993 |
-
|
| 994 |
-
response = chat.send_message(tool_responses)
|
| 995 |
|
| 996 |
state.add_log('🏁 **ANALYSIS COMPLETE**')
|
|
|
|
| 997 |
state.final_answer = response.text
|
| 998 |
state.done = True
|
| 999 |
-
|
| 1000 |
|
| 1001 |
def run_agentic_workflow(user_question):
|
| 1002 |
state.done = False
|
|
|
|
| 886 |
- NEVER issue a final verdict without calling `execute_page_expert`
|
| 887 |
- If no page contains sufficient proof, return **Unverified**
|
| 888 |
- Prefer false negatives over false positives
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 889 |
|
| 890 |
========================
|
| 891 |
QUALITY STANDARD
|
|
|
|
| 905 |
def agent_worker(user_question):
|
| 906 |
state.clear()
|
| 907 |
state.add_log(f'🚀 Starting analysis for: **{user_question}**')
|
| 908 |
+
state.add_analysis("🧠 Planner initialized. Awaiting tool calls...")
|
| 909 |
|
|
|
|
| 910 |
response = chat.send_message(user_question)
|
| 911 |
|
|
|
|
| 912 |
while response.candidates[0].content.parts[0].function_call:
|
| 913 |
tool_responses = []
|
| 914 |
+
pending_images = []
|
| 915 |
|
| 916 |
for part in response.candidates[0].content.parts:
|
| 917 |
if part.function_call:
|
| 918 |
name = part.function_call.name
|
| 919 |
args = part.function_call.args
|
| 920 |
+
|
| 921 |
state.add_log(f'🛠️ Planner calling: **{name}**')
|
| 922 |
+
state.add_analysis(
|
| 923 |
+
f"### 🛠️ Tool Call: `{name}`\n"
|
| 924 |
+
f"```json\n{json.dumps(args, indent=2)}\n```"
|
| 925 |
+
)
|
| 926 |
|
|
|
|
| 927 |
func = globals()[name]
|
| 928 |
result = func(**args)
|
| 929 |
|
| 930 |
+
# -----------------------------
|
| 931 |
+
# STREAM REAL TOOL OUTPUTS
|
| 932 |
+
# -----------------------------
|
| 933 |
+
|
| 934 |
+
# search_page_text
|
| 935 |
+
# execute_page_expert
|
| 936 |
+
|
| 937 |
if name == "execute_page_expert":
|
| 938 |
+
|
| 939 |
tile_idxs = result.get("visual_pointers", [])
|
| 940 |
page_num = args.get("page_num")
|
| 941 |
|
| 942 |
if tile_idxs:
|
| 943 |
+
state.add_log(f'📸 Stitching high-res proof for tiles: **{tile_idxs}**')
|
| 944 |
+
state.add_analysis(
|
| 945 |
+
f"📸 Visual proof requested for tiles `{tile_idxs}` on page `{page_num}`"
|
| 946 |
+
)
|
| 947 |
+
|
| 948 |
+
stitched_bytes = merge_tiles(
|
| 949 |
+
tile_indexes=tile_idxs,
|
| 950 |
+
page_num=page_num
|
| 951 |
+
)
|
| 952 |
|
| 953 |
+
pending_images.append(
|
| 954 |
+
types.Part.from_bytes(stitched_bytes, mime_type="image/png")
|
| 955 |
+
)
|
| 956 |
+
pending_images.append(types.Part.from_bytes(
|
| 957 |
+
image_bytes_list[page_num],
|
| 958 |
+
mime_type="image/png"
|
| 959 |
+
))
|
| 960 |
|
|
|
|
|
|
|
| 961 |
|
|
|
|
| 962 |
tool_responses.append(
|
| 963 |
types.Part.from_function_response(
|
| 964 |
name=name,
|
| 965 |
response={"result": result}
|
| 966 |
)
|
| 967 |
)
|
| 968 |
+
|
| 969 |
|
| 970 |
+
state.add_analysis("🧠 Returning tool outputs to planner...")
|
| 971 |
+
response = chat.send_message(tool_responses)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 972 |
if pending_images:
|
| 973 |
+
state.add_log(f'📸 Sending {len(pending_images)} images to Planner...')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 974 |
|
| 975 |
+
# We send the images with a prompt telling the model what they are
|
| 976 |
+
pending_images.insert(0, "Here is the visual proof generated by the tool. Please use this to confirm your final answer:")
|
| 977 |
|
| 978 |
+
# This generates the ACTUAL final answer that sees the image
|
| 979 |
+
response = chat.send_message(pending_images)
|
|
|
|
| 980 |
|
| 981 |
state.add_log('🏁 **ANALYSIS COMPLETE**')
|
| 982 |
+
state.add_analysis("✅ Planner finished. Final verdict generated.")
|
| 983 |
state.final_answer = response.text
|
| 984 |
state.done = True
|
|
|
|
| 985 |
|
| 986 |
def run_agentic_workflow(user_question):
|
| 987 |
state.done = False
|