gabejavitt commited on
Commit
ddd60f9
Β·
verified Β·
1 Parent(s): 1bcb5c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -66
app.py CHANGED
@@ -138,10 +138,39 @@ def find_file(path: str) -> Optional[Path]:
138
 
139
  return None
140
 
 
141
  # =============================================================================
142
  # PLANNING & REFLECTION TOOLS
143
  # =============================================================================
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  class PlanInput(BaseModel):
146
  question: str = Field(description="Brief summary of the task (keep under 100 chars)")
147
 
@@ -273,6 +302,7 @@ def validate_answer(proposed_answer: str, original_question: str) -> str:
273
  return "βœ… VALIDATION PASSED: Answer looks good! Proceed with final_answer_tool now."
274
 
275
 
 
276
  # =============================================================================
277
  # CORE TOOLS
278
  # =============================================================================
@@ -822,41 +852,6 @@ def parse_tool_call_from_string(content: str, tools: List) -> List[ToolCall]:
822
  return []
823
 
824
 
825
- # =============================================================================
826
- # CONDITIONAL EDGE FUNCTION
827
- # =============================================================================
828
- def should_continue(state: AgentState):
829
- """Decide whether to continue, call tools, or end."""
830
- last_message = state['messages'][-1]
831
- current_turn = state.get('turn', 0)
832
-
833
- # Check for final_answer_tool
834
- if isinstance(last_message, AIMessage) and last_message.tool_calls:
835
- for tool_call in last_message.tool_calls:
836
- if tool_call.get("name") == "final_answer_tool":
837
- print("--- Condition: final_answer_tool called, ending. ---")
838
- return END
839
-
840
- # Check turn limit
841
- if current_turn >= MAX_TURNS:
842
- print(f"--- Condition: Max turns ({MAX_TURNS}) reached. Ending. ---")
843
- return END
844
-
845
- # Route to tools if tool calls exist
846
- if isinstance(last_message, AIMessage) and last_message.tool_calls:
847
- print("--- Condition: Tools called, routing to tools node. ---")
848
- return "tools"
849
-
850
- # Loop prevention
851
- if len(state['messages']) > 2 and isinstance(last_message, AIMessage) and isinstance(state['messages'][-2], AIMessage):
852
- print(f"--- Condition: Detected 2+ consecutive AI messages (Turn {current_turn}). Ending to prevent loop. ---")
853
- return END
854
-
855
- # Loop back to agent
856
- print(f"--- Condition: No tool call (Turn {current_turn}). Continuing to agent. ---")
857
- return "agent"
858
-
859
-
860
  # =============================================================================
861
  # ENHANCED AGENT CLASS WITH PLANNING & REFLECTION
862
  # =============================================================================
@@ -896,31 +891,58 @@ class PlanningReflectionAgent:
896
  🎯 YOUR MISSION: Provide the EXACT answer in the EXACT format requested.
897
 
898
  ═══════════════════════════════════════════════════════════════
899
- πŸ“‹ MANDATORY PROTOCOL - FOLLOW THIS RELIGIOUSLY:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
900
  ═══════════════════════════════════════════════════════════════
901
 
902
- **PHASE 1: PLANNING (For complex/multi-step questions)**
903
- β”œβ”€ 1. Call create_plan() to think through your approach
904
- β”œβ”€ 2. Identify what information you need
905
- └─ 3. Determine the sequence of steps
906
-
907
- **PHASE 2: EXECUTION (One step at a time)**
908
- β”œβ”€ 1. Take ONE action per turn
909
- β”œβ”€ 2. Use the RIGHT tool for each task:
910
- β”‚ β€’ Simple math β†’ calculator()
911
- β”‚ β€’ Complex data β†’ code_interpreter()
912
- β”‚ β€’ Web info β†’ search_tool()
913
- β”‚ β€’ Specific page β†’ scrape_and_retrieve()
914
- β”‚ β€’ Files β†’ read_file()
915
  β”œβ”€ 3. After EACH tool, evaluate the result
916
  └─ 4. Ask: "Do I have enough to answer now?"
917
 
918
- **PHASE 3: REFLECTION (If stuck)**
 
 
 
 
 
 
919
  β”œβ”€ If no progress after 3-5 turns β†’ call reflect_on_progress()
920
  β”œβ”€ If tools keep failing β†’ try different approach
921
  └─ If going in circles β†’ step back and reconsider
922
 
923
- **PHASE 4: VALIDATION & SUBMISSION**
924
  β”œβ”€ 1. When you have the answer β†’ call validate_answer()
925
  β”œβ”€ 2. If validation passes β†’ call final_answer_tool()
926
  └─ 3. If validation fails β†’ fix the issue first
@@ -929,21 +951,28 @@ class PlanningReflectionAgent:
929
  πŸŽ“ EXAMPLES - LEARN FROM THESE:
930
  ═══════════════════════════════════════════════════════════════
931
 
932
- **Example 1: Simple Math**
 
 
 
 
 
 
 
933
  Q: What is 127 Γ— 83?
934
  Turn 1: calculator("127 * 83") β†’ 10541
935
  Turn 2: validate_answer("10541", "What is 127 Γ— 83?") β†’ βœ… Pass
936
  Turn 3: final_answer_tool("10541")
937
 
938
- **Example 2: Multi-step Research**
939
  Q: What was the population of Einstein's birthplace in 1900?
940
- Turn 1: create_plan("What was the population of Einstein's birthplace in 1900?")
941
  Turn 2: search_tool("Albert Einstein birthplace") β†’ Ulm, Germany
942
  Turn 3: search_tool("Ulm Germany population 1900") β†’ approximately 50,000
943
  Turn 4: validate_answer("50000", "What was the population...") β†’ βœ… Pass
944
  Turn 5: final_answer_tool("50000")
945
 
946
- **Example 3: File + Calculation**
947
  Q: What's the average of the 'score' column in data.csv?
948
  Turn 1: list_directory(".") β†’ [files shown]
949
  Turn 2: read_file("data.csv") β†’ [content]
@@ -952,11 +981,11 @@ Turn 3: code_interpreter("import pandas as pd; df = pd.read_csv('data.csv'); pri
952
  Turn 4: validate_answer("78.5", "What's the average...") β†’ βœ… Pass
953
  Turn 5: final_answer_tool("78.5")
954
 
955
- **Example 4: Getting Unstuck**
956
  Q: What's the GDP of the 2016 Olympics host?
957
  Turn 1: search_tool("2016 Olympics") β†’ [general info, no clear answer]
958
  Turn 2: search_tool("Olympics 2016 location") β†’ [still unclear]
959
- Turn 3: reflect_on_progress("Tried searching but not getting clear host country")
960
  β†’ Try: "2016 Summer Olympics host country"
961
  Turn 4: search_tool("2016 Summer Olympics host country") β†’ Brazil
962
  Turn 5: search_tool("Brazil GDP 2016") β†’ $1.796 trillion
@@ -967,13 +996,13 @@ Turn 7: final_answer_tool("1.796 trillion")
967
  ⚠️ CRITICAL RULES - NEVER VIOLATE THESE:
968
  ═══════════════════════════════════════════════════════════════
969
 
970
- 1. **NO GUESSING**: Always use tools. Never use your own knowledge.
971
- 2. **ONE STEP AT A TIME**: Don't try to do multiple things in one turn.
972
- 3. **EXACT FORMAT**: Answer must be EXACTLY what was asked for.
973
- 4. **NO FLUFF**: Never add "The answer is" or explanations in final answer.
974
- 5. **ALWAYS VALIDATE**: Call validate_answer() before final_answer_tool().
975
- 6. **PLAN COMPLEX TASKS**: Multi-step questions need create_plan() first.
976
- 7. **REFLECT WHEN STUCK**: If no progress after 5 turns, call reflect_on_progress().
977
 
978
  ═══════════════════════════════════════════════════════════════
979
  πŸ“š AVAILABLE TOOLS:
@@ -982,7 +1011,10 @@ Turn 7: final_answer_tool("1.796 trillion")
982
  {tool_descriptions}
983
 
984
  ═══════════════════════════════════════════════════════════════
985
- 🎯 REMEMBER: Quality over speed. Think carefully, plan ahead, execute methodically.
 
 
 
986
  ═══════════════════════════════════════════════════════════════
987
  """
988
 
@@ -1145,7 +1177,7 @@ Turn 7: final_answer_tool("1.796 trillion")
1145
 
1146
  self.graph = graph_builder.compile()
1147
  print("βœ… Planning & Reflection Agent graph compiled successfully.")
1148
-
1149
 
1150
  def __call__(self, question: str) -> str:
1151
  print(f"\n--- Starting Agent Run for Question ---")
 
138
 
139
  return None
140
 
141
+
142
  # =============================================================================
143
  # PLANNING & REFLECTION TOOLS
144
  # =============================================================================
145
 
146
+ class ThinkInput(BaseModel):
147
+ reasoning: str = Field(description="Your step-by-step reasoning for a logic puzzle (keep under 200 chars)")
148
+
149
+ @tool(args_schema=ThinkInput)
150
+ def think_through_logic(reasoning: str) -> str:
151
+ """
152
+ Use this to work through logic puzzles, riddles, or reasoning problems.
153
+
154
+ Call this when:
155
+ - The question is a riddle or brain teaser
156
+ - You need to reason through a logical problem
157
+ - No external information is needed, just thinking
158
+
159
+ After thinking through the logic, use calculator if math is involved,
160
+ then validate_answer and final_answer_tool.
161
+
162
+ NOTE: Keep reasoning summary brief (under 200 chars).
163
+ """
164
+ print(f"🧠 Thinking through logic: {reasoning[:100]}...")
165
+
166
+ return f"""βœ… Logic reasoning recorded: {reasoning}
167
+
168
+ Now:
169
+ 1. If there's any math to calculate, use calculator()
170
+ 2. Once you have the answer, call validate_answer()
171
+ 3. Then call final_answer_tool() with just the answer"""
172
+
173
+
174
  class PlanInput(BaseModel):
175
  question: str = Field(description="Brief summary of the task (keep under 100 chars)")
176
 
 
302
  return "βœ… VALIDATION PASSED: Answer looks good! Proceed with final_answer_tool now."
303
 
304
 
305
+ # =============================================================================
306
  # =============================================================================
307
  # CORE TOOLS
308
  # =============================================================================
 
852
  return []
853
 
854
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
855
  # =============================================================================
856
  # ENHANCED AGENT CLASS WITH PLANNING & REFLECTION
857
  # =============================================================================
 
891
  🎯 YOUR MISSION: Provide the EXACT answer in the EXACT format requested.
892
 
893
  ═══════════════════════════════════════════════════════════════
894
+ πŸ“‹ QUESTION TYPES & STRATEGIES:
895
+ ═══════════════════════════════════════════════════════════════
896
+
897
+ **TYPE 1: LOGIC PUZZLES / RIDDLES** (No tools needed)
898
+ - Riddles, brain teasers, logical reasoning problems
899
+ - Strategy: Think through the logic, use calculator for any math
900
+ - Example: "If all but 30 of 200 coins are face-up, make equal face-down piles"
901
+ β†’ This is pure logic. Think it through, then use final_answer_tool
902
+
903
+ **TYPE 2: FACTUAL QUESTIONS** (Need web search)
904
+ - Who, what, when, where questions about real world
905
+ - Strategy: search_tool β†’ scrape_and_retrieve if needed
906
+ - Example: "What was Einstein's birthplace population in 1900?"
907
+
908
+ **TYPE 3: DATA ANALYSIS** (Need files + code)
909
+ - Questions about CSV, Excel, or other data files
910
+ - Strategy: list_directory β†’ read_file β†’ code_interpreter
911
+ - Example: "What's the average of column X in data.csv?"
912
+
913
+ **TYPE 4: CALCULATIONS** (Need calculator/code)
914
+ - Math problems, computations
915
+ - Strategy: calculator for simple math, code_interpreter for complex
916
+ - Example: "What is 127 Γ— 83 + sqrt(144)?"
917
+
918
+ ═══════════════════════════════════════════════════════════════
919
+ πŸ“‹ MANDATORY PROTOCOL:
920
  ═══════════════════════════════════════════════════════════════
921
 
922
+ **PHASE 1: IDENTIFY QUESTION TYPE**
923
+ β”œβ”€ Is this a logic puzzle? β†’ Think through it, use calculator if needed
924
+ β”œβ”€ Need real-world facts? β†’ Use search/scrape tools
925
+ β”œβ”€ Need to analyze files? β†’ Use file/code tools
926
+ └─ Just math? β†’ Use calculator
927
+
928
+ **PHASE 2: FOR TOOL-BASED QUESTIONS**
929
+ β”œβ”€ 1. Call create_plan() for multi-step questions
930
+ β”œβ”€ 2. Execute ONE step at a time
 
 
 
 
931
  β”œβ”€ 3. After EACH tool, evaluate the result
932
  └─ 4. Ask: "Do I have enough to answer now?"
933
 
934
+ **PHASE 3: FOR LOGIC PUZZLES**
935
+ β”œβ”€ 1. Think through the logic step-by-step
936
+ β”œβ”€ 2. Use calculator ONLY if there's arithmetic
937
+ β”œβ”€ 3. Once you've solved it, call validate_answer()
938
+ └─ 4. Then call final_answer_tool()
939
+
940
+ **PHASE 4: REFLECTION (If stuck)**
941
  β”œβ”€ If no progress after 3-5 turns β†’ call reflect_on_progress()
942
  β”œβ”€ If tools keep failing β†’ try different approach
943
  └─ If going in circles β†’ step back and reconsider
944
 
945
+ **PHASE 5: VALIDATION & SUBMISSION**
946
  β”œβ”€ 1. When you have the answer β†’ call validate_answer()
947
  β”œβ”€ 2. If validation passes β†’ call final_answer_tool()
948
  └─ 3. If validation fails β†’ fix the issue first
 
951
  πŸŽ“ EXAMPLES - LEARN FROM THESE:
952
  ═══════════════════════════════════════════════════════════════
953
 
954
+ **Example 1: Logic Puzzle (NO TOOLS EXCEPT CALCULATOR/FINAL)**
955
+ Q: If you have 200 coins with 30 face-down, and divide into 2 piles with equal face-down...
956
+ Turn 1: Think through: If I take 30 coins and flip them all, one pile has X face-down...
957
+ Turn 2: calculator("30") β†’ 30
958
+ Turn 3: validate_answer("30", original_q) β†’ βœ… Pass
959
+ Turn 4: final_answer_tool("30")
960
+
961
+ **Example 2: Simple Math**
962
  Q: What is 127 Γ— 83?
963
  Turn 1: calculator("127 * 83") β†’ 10541
964
  Turn 2: validate_answer("10541", "What is 127 Γ— 83?") β†’ βœ… Pass
965
  Turn 3: final_answer_tool("10541")
966
 
967
+ **Example 3: Multi-step Research**
968
  Q: What was the population of Einstein's birthplace in 1900?
969
+ Turn 1: create_plan("Brief: Einstein birthplace pop 1900")
970
  Turn 2: search_tool("Albert Einstein birthplace") β†’ Ulm, Germany
971
  Turn 3: search_tool("Ulm Germany population 1900") β†’ approximately 50,000
972
  Turn 4: validate_answer("50000", "What was the population...") β†’ βœ… Pass
973
  Turn 5: final_answer_tool("50000")
974
 
975
+ **Example 4: File + Calculation**
976
  Q: What's the average of the 'score' column in data.csv?
977
  Turn 1: list_directory(".") β†’ [files shown]
978
  Turn 2: read_file("data.csv") β†’ [content]
 
981
  Turn 4: validate_answer("78.5", "What's the average...") β†’ βœ… Pass
982
  Turn 5: final_answer_tool("78.5")
983
 
984
+ **Example 5: Getting Unstuck**
985
  Q: What's the GDP of the 2016 Olympics host?
986
  Turn 1: search_tool("2016 Olympics") β†’ [general info, no clear answer]
987
  Turn 2: search_tool("Olympics 2016 location") β†’ [still unclear]
988
+ Turn 3: reflect_on_progress("Searching but not getting host country")
989
  β†’ Try: "2016 Summer Olympics host country"
990
  Turn 4: search_tool("2016 Summer Olympics host country") β†’ Brazil
991
  Turn 5: search_tool("Brazil GDP 2016") β†’ $1.796 trillion
 
996
  ⚠️ CRITICAL RULES - NEVER VIOLATE THESE:
997
  ═══════════════════════════════════════════════════════════════
998
 
999
+ 1. **IDENTIFY QUESTION TYPE FIRST**: Logic puzzle vs. factual vs. data vs. math
1000
+ 2. **LOGIC PUZZLES**: Don't use search/file tools. Just think + validate + final_answer
1001
+ 3. **ONE STEP AT A TIME**: Don't try to do multiple things in one turn
1002
+ 4. **EXACT FORMAT**: Answer must be EXACTLY what was asked for
1003
+ 5. **NO FLUFF**: Never add "The answer is" or explanations in final answer
1004
+ 6. **ALWAYS VALIDATE**: Call validate_answer() before final_answer_tool()
1005
+ 7. **DON'T LOOP**: If 2 consecutive turns produce no tool calls, you're stuck - call a tool!
1006
 
1007
  ═══════════════════════════════════════════════════════════════
1008
  πŸ“š AVAILABLE TOOLS:
 
1011
  {tool_descriptions}
1012
 
1013
  ═══════════════════════════════════════════════════════════════
1014
+ 🎯 REMEMBER:
1015
+ - Logic puzzles: Think β†’ Calculator (if needed) β†’ Validate β†’ Final Answer
1016
+ - Factual questions: Plan β†’ Search β†’ Validate β†’ Final Answer
1017
+ - Always call a tool - never just output reasoning text!
1018
  ═══════════════════════════════════════════════════════════════
1019
  """
1020
 
 
1177
 
1178
  self.graph = graph_builder.compile()
1179
  print("βœ… Planning & Reflection Agent graph compiled successfully.")
1180
+
1181
 
1182
  def __call__(self, question: str) -> str:
1183
  print(f"\n--- Starting Agent Run for Question ---")