Upload app.py
Browse files
app.py
CHANGED
|
@@ -25,6 +25,7 @@ class InterfaceState:
|
|
| 25 |
self.analysis_messages = []
|
| 26 |
self.current_chapter = ""
|
| 27 |
self.current_images = []
|
|
|
|
| 28 |
self.final_answer = ""
|
| 29 |
self.done = False
|
| 30 |
self.lock = threading.Lock()
|
|
@@ -50,6 +51,18 @@ class InterfaceState:
|
|
| 50 |
with self.lock:
|
| 51 |
self.current_images.append(img_pil)
|
| 52 |
return self.current_images.copy()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
def clear(self):
|
| 55 |
with self.lock:
|
|
@@ -59,6 +72,8 @@ class InterfaceState:
|
|
| 59 |
self.current_images.clear()
|
| 60 |
self.final_answer = ""
|
| 61 |
self.done = False
|
|
|
|
|
|
|
| 62 |
|
| 63 |
state = InterfaceState()
|
| 64 |
|
|
@@ -127,6 +142,8 @@ chroma_client = chromadb.PersistentClient(path="nyc_code_db")
|
|
| 127 |
embedding_model = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
|
| 128 |
collection = chroma_client.get_collection(name="nyc_building_codes", embedding_function=embedding_model)
|
| 129 |
|
|
|
|
|
|
|
| 130 |
# Modified tool functions with Gradio updates
|
| 131 |
def search_page_text(page_number: int, research_goal: str):
|
| 132 |
state.add_log(f'🔍 Searching page **{page_metadata[page_number]["sheet_title"]}** for details')
|
|
@@ -539,6 +556,33 @@ def extract_json(s: str):
|
|
| 539 |
json_str = s[start:end+1]
|
| 540 |
return json.loads(json_str)
|
| 541 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 542 |
def execute_page_expert(expert_instructions: str, page_num: int):
|
| 543 |
state.add_log(f'👁️ Spawning Page Expert for page **{page_num}**')
|
| 544 |
state.add_analysis(f"👁️ Page Expert searching for {expert_instructions}")
|
|
@@ -686,7 +730,7 @@ def execute_page_expert(expert_instructions: str, page_num: int):
|
|
| 686 |
}
|
| 687 |
]
|
| 688 |
|
| 689 |
-
MAX_TURNS =
|
| 690 |
|
| 691 |
for turn in range(MAX_TURNS):
|
| 692 |
response = client.chat.completions.create(
|
|
@@ -702,23 +746,46 @@ def execute_page_expert(expert_instructions: str, page_num: int):
|
|
| 702 |
if msg.content:
|
| 703 |
try:
|
| 704 |
res = extract_json(msg.content)
|
|
|
|
|
|
|
| 705 |
state.add_analysis(
|
| 706 |
f"🟨 Page Analyst\n{res.get('findings','')}"
|
| 707 |
)
|
| 708 |
-
|
| 709 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 710 |
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 714 |
)
|
| 715 |
|
| 716 |
-
|
| 717 |
-
io.BytesIO(stitched_bytes)
|
| 718 |
-
)
|
| 719 |
-
state.add_image(stitched_img)
|
| 720 |
-
|
| 721 |
-
return extract_json(msg.content)
|
| 722 |
except:
|
| 723 |
pass
|
| 724 |
|
|
@@ -783,7 +850,7 @@ def execute_page_expert(expert_instructions: str, page_num: int):
|
|
| 783 |
tools_list = [search_page_text, nyc_legal_sub_agent, execute_page_expert]
|
| 784 |
import time
|
| 785 |
planner = genai.Client()
|
| 786 |
-
planner_model = "gemini-3-
|
| 787 |
planner_prompt = f"""
|
| 788 |
You are the Lead Architectural Compliance Planner for NYC Building Code and Zoning review.
|
| 789 |
|
|
@@ -886,6 +953,13 @@ planner_prompt = f"""
|
|
| 886 |
- NEVER issue a final verdict without calling `execute_page_expert`
|
| 887 |
- If no page contains sufficient proof, return **Unverified**
|
| 888 |
- Prefer false negatives over false positives
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 889 |
|
| 890 |
========================
|
| 891 |
QUALITY STANDARD
|
|
@@ -907,81 +981,75 @@ def agent_worker(user_question):
|
|
| 907 |
state.add_log(f'🚀 Starting analysis for: **{user_question}**')
|
| 908 |
state.add_analysis("🧠 Planner initialized. Awaiting tool calls...")
|
| 909 |
|
|
|
|
|
|
|
| 910 |
response = chat.send_message(user_question)
|
| 911 |
|
|
|
|
|
|
|
|
|
|
| 912 |
while response.candidates[0].content.parts[0].function_call:
|
| 913 |
tool_responses = []
|
| 914 |
-
pending_images = []
|
| 915 |
|
| 916 |
for part in response.candidates[0].content.parts:
|
| 917 |
if part.function_call:
|
| 918 |
name = part.function_call.name
|
| 919 |
args = part.function_call.args
|
| 920 |
-
|
| 921 |
-
state.add_log(f'🛠️ Planner calling: **{name}**')
|
| 922 |
-
state.add_analysis(
|
| 923 |
-
f"### 🛠️ Tool Call: `{name}`\n"
|
| 924 |
-
f"```json\n{json.dumps(args, indent=2)}\n```"
|
| 925 |
-
)
|
| 926 |
|
| 927 |
func = globals()[name]
|
| 928 |
result = func(**args)
|
| 929 |
|
| 930 |
-
# -----------------------------
|
| 931 |
-
# STREAM REAL TOOL OUTPUTS
|
| 932 |
-
# -----------------------------
|
| 933 |
-
|
| 934 |
-
# search_page_text
|
| 935 |
-
# execute_page_expert
|
| 936 |
-
|
| 937 |
-
if name == "execute_page_expert":
|
| 938 |
-
|
| 939 |
-
tile_idxs = result.get("visual_pointers", [])
|
| 940 |
-
page_num = args.get("page_num")
|
| 941 |
-
|
| 942 |
-
if tile_idxs:
|
| 943 |
-
state.add_log(f'📸 Stitching high-res proof for tiles: **{tile_idxs}**')
|
| 944 |
-
state.add_analysis(
|
| 945 |
-
f"📸 Visual proof requested for tiles `{tile_idxs}` on page `{page_num}`"
|
| 946 |
-
)
|
| 947 |
-
|
| 948 |
-
stitched_bytes = merge_tiles(
|
| 949 |
-
tile_indexes=tile_idxs,
|
| 950 |
-
page_num=page_num
|
| 951 |
-
)
|
| 952 |
-
|
| 953 |
-
pending_images.append(
|
| 954 |
-
types.Part.from_bytes(stitched_bytes, mime_type="image/png")
|
| 955 |
-
)
|
| 956 |
-
pending_images.append(types.Part.from_bytes(
|
| 957 |
-
image_bytes_list[page_num],
|
| 958 |
-
mime_type="image/png"
|
| 959 |
-
))
|
| 960 |
-
|
| 961 |
-
|
| 962 |
tool_responses.append(
|
| 963 |
-
types.Part.from_function_response(
|
| 964 |
-
name=name,
|
| 965 |
-
response={"result": result}
|
| 966 |
-
)
|
| 967 |
)
|
| 968 |
-
|
| 969 |
|
| 970 |
-
|
| 971 |
response = chat.send_message(tool_responses)
|
| 972 |
-
|
| 973 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 974 |
|
| 975 |
-
|
| 976 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 977 |
|
| 978 |
-
|
| 979 |
-
|
| 980 |
-
|
|
|
|
| 981 |
state.add_log('🏁 **ANALYSIS COMPLETE**')
|
| 982 |
-
state.add_analysis("✅ Planner finished. Final verdict generated.")
|
| 983 |
-
state.final_answer = response.text
|
| 984 |
state.done = True
|
|
|
|
| 985 |
|
| 986 |
def run_agentic_workflow(user_question):
|
| 987 |
state.done = False
|
|
|
|
| 25 |
self.analysis_messages = []
|
| 26 |
self.current_chapter = ""
|
| 27 |
self.current_images = []
|
| 28 |
+
self.staged_audit_images = []
|
| 29 |
self.final_answer = ""
|
| 30 |
self.done = False
|
| 31 |
self.lock = threading.Lock()
|
|
|
|
| 51 |
with self.lock:
|
| 52 |
self.current_images.append(img_pil)
|
| 53 |
return self.current_images.copy()
|
| 54 |
+
|
| 55 |
+
def add_staged_image_part(self, image_part):
|
| 56 |
+
"""Thread-safe method to stage images for the Gemini Audit."""
|
| 57 |
+
with self.lock:
|
| 58 |
+
self.staged_audit_images.append(image_part)
|
| 59 |
+
# Log it so we can verify it happened in the console
|
| 60 |
+
print(f"DEBUG: Staged image part. Total staged: {len(self.staged_audit_images)}")
|
| 61 |
+
|
| 62 |
+
def get_staged_images(self):
|
| 63 |
+
"""Safely retrieve the staged images for the audit turn."""
|
| 64 |
+
with self.lock:
|
| 65 |
+
return list(self.staged_audit_images) # Return a copy to prevent mutation
|
| 66 |
|
| 67 |
def clear(self):
|
| 68 |
with self.lock:
|
|
|
|
| 72 |
self.current_images.clear()
|
| 73 |
self.final_answer = ""
|
| 74 |
self.done = False
|
| 75 |
+
|
| 76 |
+
|
| 77 |
|
| 78 |
state = InterfaceState()
|
| 79 |
|
|
|
|
| 142 |
embedding_model = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
|
| 143 |
collection = chroma_client.get_collection(name="nyc_building_codes", embedding_function=embedding_model)
|
| 144 |
|
| 145 |
+
all_pending_images = []
|
| 146 |
+
|
| 147 |
# Modified tool functions with Gradio updates
|
| 148 |
def search_page_text(page_number: int, research_goal: str):
|
| 149 |
state.add_log(f'🔍 Searching page **{page_metadata[page_number]["sheet_title"]}** for details')
|
|
|
|
| 556 |
json_str = s[start:end+1]
|
| 557 |
return json.loads(json_str)
|
| 558 |
|
| 559 |
+
def sanitize_tile_indices(data):
|
| 560 |
+
"""
|
| 561 |
+
Forcefully converts various LLM outputs into a clean list of integers.
|
| 562 |
+
Handles: [1, 2], ["1", "2"], "1, 2, 3", "[1, 2, 3]", and None.
|
| 563 |
+
"""
|
| 564 |
+
if not data:
|
| 565 |
+
return []
|
| 566 |
+
|
| 567 |
+
# If it's already a list, ensure all elements are integers
|
| 568 |
+
if isinstance(data, list):
|
| 569 |
+
clean_list = []
|
| 570 |
+
for item in data:
|
| 571 |
+
try:
|
| 572 |
+
# This handles strings inside the list like ["1", "2"]
|
| 573 |
+
clean_list.append(int(str(item).strip()))
|
| 574 |
+
except (ValueError, TypeError):
|
| 575 |
+
continue
|
| 576 |
+
return clean_list
|
| 577 |
+
|
| 578 |
+
# If it's a string, use Regex to find all sequences of digits
|
| 579 |
+
if isinstance(data, str):
|
| 580 |
+
# findall returns all non-overlapping matches of the pattern
|
| 581 |
+
numbers = re.findall(r'\d+', data)
|
| 582 |
+
return [int(n) for n in numbers]
|
| 583 |
+
|
| 584 |
+
return []
|
| 585 |
+
|
| 586 |
def execute_page_expert(expert_instructions: str, page_num: int):
|
| 587 |
state.add_log(f'👁️ Spawning Page Expert for page **{page_num}**')
|
| 588 |
state.add_analysis(f"👁️ Page Expert searching for {expert_instructions}")
|
|
|
|
| 730 |
}
|
| 731 |
]
|
| 732 |
|
| 733 |
+
MAX_TURNS = 3
|
| 734 |
|
| 735 |
for turn in range(MAX_TURNS):
|
| 736 |
response = client.chat.completions.create(
|
|
|
|
| 746 |
if msg.content:
|
| 747 |
try:
|
| 748 |
res = extract_json(msg.content)
|
| 749 |
+
|
| 750 |
+
|
| 751 |
state.add_analysis(
|
| 752 |
f"🟨 Page Analyst\n{res.get('findings','')}"
|
| 753 |
)
|
| 754 |
+
raw_pointers = res.get("visual_pointers", [])
|
| 755 |
+
tile_idxs = sanitize_tile_indices(raw_pointers)
|
| 756 |
+
|
| 757 |
+
|
| 758 |
+
if tile_idxs and tile_idxs != '[]':
|
| 759 |
+
stitched_bytes = merge_tiles(
|
| 760 |
+
tile_indexes=tile_idxs,
|
| 761 |
+
page_num=page_num
|
| 762 |
+
)
|
| 763 |
+
|
| 764 |
+
state.add_log(f'📸 Staging {len(tile_idxs)} tiles for final audit...')
|
| 765 |
+
|
| 766 |
+
# Store these to use AFTER the chat finishes
|
| 767 |
+
state.add_staged_image_part(
|
| 768 |
+
types.Part.from_bytes(
|
| 769 |
+
data=stitched_bytes, # <-- 'data=' is required here
|
| 770 |
+
mime_type="image/png"
|
| 771 |
+
)
|
| 772 |
+
)
|
| 773 |
+
|
| 774 |
|
| 775 |
+
stitched_img = Image.open(
|
| 776 |
+
io.BytesIO(stitched_bytes)
|
| 777 |
+
)
|
| 778 |
+
state.add_image(stitched_img)
|
| 779 |
+
|
| 780 |
+
|
| 781 |
+
state.add_staged_image_part(
|
| 782 |
+
types.Part.from_bytes(
|
| 783 |
+
data=image_bytes_list[page_num], # <-- 'data=' is required here
|
| 784 |
+
mime_type="image/png"
|
| 785 |
+
)
|
| 786 |
)
|
| 787 |
|
| 788 |
+
return res
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 789 |
except:
|
| 790 |
pass
|
| 791 |
|
|
|
|
| 850 |
tools_list = [search_page_text, nyc_legal_sub_agent, execute_page_expert]
|
| 851 |
import time
|
| 852 |
planner = genai.Client()
|
| 853 |
+
planner_model = "gemini-3-flash-preview"
|
| 854 |
planner_prompt = f"""
|
| 855 |
You are the Lead Architectural Compliance Planner for NYC Building Code and Zoning review.
|
| 856 |
|
|
|
|
| 953 |
- NEVER issue a final verdict without calling `execute_page_expert`
|
| 954 |
- If no page contains sufficient proof, return **Unverified**
|
| 955 |
- Prefer false negatives over false positives
|
| 956 |
+
*** CRITICAL VISUAL PROTOCOL ***
|
| 957 |
+
- When `execute_page_expert` returns, it will explicitly state "VISUAL_PROOF_PENDING".
|
| 958 |
+
- When you see this, your ONLY response must be: "Awaiting visual proof."
|
| 959 |
+
- DO NOT attempt to guess the verdict.
|
| 960 |
+
- DO NOT complain about missing images.
|
| 961 |
+
- Simply wait. The user will immediately send the images in the next turn.
|
| 962 |
+
|
| 963 |
|
| 964 |
========================
|
| 965 |
QUALITY STANDARD
|
|
|
|
| 981 |
state.add_log(f'🚀 Starting analysis for: **{user_question}**')
|
| 982 |
state.add_analysis("🧠 Planner initialized. Awaiting tool calls...")
|
| 983 |
|
| 984 |
+
# 1. Initialize the Stateful Chat
|
| 985 |
+
chat = planner.chats.create(model=planner_model, config=config)
|
| 986 |
response = chat.send_message(user_question)
|
| 987 |
|
| 988 |
+
# 2. Track images throughout the conversation
|
| 989 |
+
|
| 990 |
+
# 3. Standard Tool Loop (Phases 1-3)
|
| 991 |
while response.candidates[0].content.parts[0].function_call:
|
| 992 |
tool_responses = []
|
|
|
|
| 993 |
|
| 994 |
for part in response.candidates[0].content.parts:
|
| 995 |
if part.function_call:
|
| 996 |
name = part.function_call.name
|
| 997 |
args = part.function_call.args
|
| 998 |
+
state.add_log(f'🛠️ Tool Call: **{name}**')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 999 |
|
| 1000 |
func = globals()[name]
|
| 1001 |
result = func(**args)
|
| 1002 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1003 |
tool_responses.append(
|
| 1004 |
+
types.Part.from_function_response(name=name, response={"result": result})
|
|
|
|
|
|
|
|
|
|
| 1005 |
)
|
|
|
|
| 1006 |
|
| 1007 |
+
# Send tool results back to the stateful chat
|
| 1008 |
response = chat.send_message(tool_responses)
|
| 1009 |
+
|
| 1010 |
+
# -----------------------------------------------------------------
|
| 1011 |
+
# PHASE 4: THE POST-CHAT HANDOFF (The "Visual Audit")
|
| 1012 |
+
# -----------------------------------------------------------------
|
| 1013 |
+
|
| 1014 |
+
# At this point, the while loop has ended.
|
| 1015 |
+
# 'response.text' contains the model's preliminary answer.
|
| 1016 |
+
|
| 1017 |
+
audit_images = state.get_staged_images()
|
| 1018 |
+
|
| 1019 |
+
if audit_images:
|
| 1020 |
+
state.add_log(f"👁️ Preliminary answer received. Performing audit with {len(audit_images)} images...")
|
| 1021 |
+
|
| 1022 |
+
# 1. Construct the audit parts
|
| 1023 |
+
# Ensure 'text=' is used for the Part constructor
|
| 1024 |
+
audit_parts = [
|
| 1025 |
+
types.Part.from_text(
|
| 1026 |
+
text="You have provided a preliminary verdict. Now, look at these images "
|
| 1027 |
+
"to verify your findings. If the visual evidence contradicts your "
|
| 1028 |
+
"text-based search, update your verdict now. "
|
| 1029 |
+
),
|
| 1030 |
+
*audit_images
|
| 1031 |
+
]
|
| 1032 |
+
|
| 1033 |
+
try:
|
| 1034 |
+
# 2. Send directly through the 'chat' session
|
| 1035 |
+
# This automatically appends to history and maintains the session state
|
| 1036 |
+
final_response = chat.send_message(audit_parts)
|
| 1037 |
|
| 1038 |
+
state.final_answer = final_response.text
|
| 1039 |
+
|
| 1040 |
+
except Exception as e:
|
| 1041 |
+
# If the above fails, try the explicit message keyword
|
| 1042 |
+
state.add_log("🔄 Retrying audit with explicit message keyword...")
|
| 1043 |
+
final_response = chat.send_message(message=audit_parts)
|
| 1044 |
+
state.final_answer = final_response.text
|
| 1045 |
|
| 1046 |
+
else:
|
| 1047 |
+
state.add_log("⚠️ No images found in state. Skipping visual audit.")
|
| 1048 |
+
state.final_answer = response.text
|
| 1049 |
+
|
| 1050 |
state.add_log('🏁 **ANALYSIS COMPLETE**')
|
|
|
|
|
|
|
| 1051 |
state.done = True
|
| 1052 |
+
|
| 1053 |
|
| 1054 |
def run_agentic_workflow(user_question):
|
| 1055 |
state.done = False
|