Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -138,10 +138,39 @@ def find_file(path: str) -> Optional[Path]:
|
|
| 138 |
|
| 139 |
return None
|
| 140 |
|
|
|
|
| 141 |
# =============================================================================
|
| 142 |
# PLANNING & REFLECTION TOOLS
|
| 143 |
# =============================================================================
|
| 144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
class PlanInput(BaseModel):
|
| 146 |
question: str = Field(description="Brief summary of the task (keep under 100 chars)")
|
| 147 |
|
|
@@ -273,6 +302,7 @@ def validate_answer(proposed_answer: str, original_question: str) -> str:
|
|
| 273 |
return "β
VALIDATION PASSED: Answer looks good! Proceed with final_answer_tool now."
|
| 274 |
|
| 275 |
|
|
|
|
| 276 |
# =============================================================================
|
| 277 |
# CORE TOOLS
|
| 278 |
# =============================================================================
|
|
@@ -822,41 +852,6 @@ def parse_tool_call_from_string(content: str, tools: List) -> List[ToolCall]:
|
|
| 822 |
return []
|
| 823 |
|
| 824 |
|
| 825 |
-
# =============================================================================
|
| 826 |
-
# CONDITIONAL EDGE FUNCTION
|
| 827 |
-
# =============================================================================
|
| 828 |
-
def should_continue(state: AgentState):
|
| 829 |
-
"""Decide whether to continue, call tools, or end."""
|
| 830 |
-
last_message = state['messages'][-1]
|
| 831 |
-
current_turn = state.get('turn', 0)
|
| 832 |
-
|
| 833 |
-
# Check for final_answer_tool
|
| 834 |
-
if isinstance(last_message, AIMessage) and last_message.tool_calls:
|
| 835 |
-
for tool_call in last_message.tool_calls:
|
| 836 |
-
if tool_call.get("name") == "final_answer_tool":
|
| 837 |
-
print("--- Condition: final_answer_tool called, ending. ---")
|
| 838 |
-
return END
|
| 839 |
-
|
| 840 |
-
# Check turn limit
|
| 841 |
-
if current_turn >= MAX_TURNS:
|
| 842 |
-
print(f"--- Condition: Max turns ({MAX_TURNS}) reached. Ending. ---")
|
| 843 |
-
return END
|
| 844 |
-
|
| 845 |
-
# Route to tools if tool calls exist
|
| 846 |
-
if isinstance(last_message, AIMessage) and last_message.tool_calls:
|
| 847 |
-
print("--- Condition: Tools called, routing to tools node. ---")
|
| 848 |
-
return "tools"
|
| 849 |
-
|
| 850 |
-
# Loop prevention
|
| 851 |
-
if len(state['messages']) > 2 and isinstance(last_message, AIMessage) and isinstance(state['messages'][-2], AIMessage):
|
| 852 |
-
print(f"--- Condition: Detected 2+ consecutive AI messages (Turn {current_turn}). Ending to prevent loop. ---")
|
| 853 |
-
return END
|
| 854 |
-
|
| 855 |
-
# Loop back to agent
|
| 856 |
-
print(f"--- Condition: No tool call (Turn {current_turn}). Continuing to agent. ---")
|
| 857 |
-
return "agent"
|
| 858 |
-
|
| 859 |
-
|
| 860 |
# =============================================================================
|
| 861 |
# ENHANCED AGENT CLASS WITH PLANNING & REFLECTION
|
| 862 |
# =============================================================================
|
|
@@ -896,31 +891,58 @@ class PlanningReflectionAgent:
|
|
| 896 |
π― YOUR MISSION: Provide the EXACT answer in the EXACT format requested.
|
| 897 |
|
| 898 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 899 |
-
π
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 900 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 901 |
|
| 902 |
-
**PHASE 1:
|
| 903 |
-
ββ
|
| 904 |
-
ββ
|
| 905 |
-
|
| 906 |
-
|
| 907 |
-
|
| 908 |
-
|
| 909 |
-
ββ
|
| 910 |
-
|
| 911 |
-
β β’ Complex data β code_interpreter()
|
| 912 |
-
β β’ Web info β search_tool()
|
| 913 |
-
β β’ Specific page β scrape_and_retrieve()
|
| 914 |
-
β β’ Files β read_file()
|
| 915 |
ββ 3. After EACH tool, evaluate the result
|
| 916 |
ββ 4. Ask: "Do I have enough to answer now?"
|
| 917 |
|
| 918 |
-
**PHASE 3:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 919 |
ββ If no progress after 3-5 turns β call reflect_on_progress()
|
| 920 |
ββ If tools keep failing β try different approach
|
| 921 |
ββ If going in circles β step back and reconsider
|
| 922 |
|
| 923 |
-
**PHASE
|
| 924 |
ββ 1. When you have the answer β call validate_answer()
|
| 925 |
ββ 2. If validation passes β call final_answer_tool()
|
| 926 |
ββ 3. If validation fails β fix the issue first
|
|
@@ -929,21 +951,28 @@ class PlanningReflectionAgent:
|
|
| 929 |
π EXAMPLES - LEARN FROM THESE:
|
| 930 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 931 |
|
| 932 |
-
**Example 1:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 933 |
Q: What is 127 Γ 83?
|
| 934 |
Turn 1: calculator("127 * 83") β 10541
|
| 935 |
Turn 2: validate_answer("10541", "What is 127 Γ 83?") β β
Pass
|
| 936 |
Turn 3: final_answer_tool("10541")
|
| 937 |
|
| 938 |
-
**Example
|
| 939 |
Q: What was the population of Einstein's birthplace in 1900?
|
| 940 |
-
Turn 1: create_plan("
|
| 941 |
Turn 2: search_tool("Albert Einstein birthplace") β Ulm, Germany
|
| 942 |
Turn 3: search_tool("Ulm Germany population 1900") β approximately 50,000
|
| 943 |
Turn 4: validate_answer("50000", "What was the population...") β β
Pass
|
| 944 |
Turn 5: final_answer_tool("50000")
|
| 945 |
|
| 946 |
-
**Example
|
| 947 |
Q: What's the average of the 'score' column in data.csv?
|
| 948 |
Turn 1: list_directory(".") β [files shown]
|
| 949 |
Turn 2: read_file("data.csv") β [content]
|
|
@@ -952,11 +981,11 @@ Turn 3: code_interpreter("import pandas as pd; df = pd.read_csv('data.csv'); pri
|
|
| 952 |
Turn 4: validate_answer("78.5", "What's the average...") β β
Pass
|
| 953 |
Turn 5: final_answer_tool("78.5")
|
| 954 |
|
| 955 |
-
**Example
|
| 956 |
Q: What's the GDP of the 2016 Olympics host?
|
| 957 |
Turn 1: search_tool("2016 Olympics") β [general info, no clear answer]
|
| 958 |
Turn 2: search_tool("Olympics 2016 location") β [still unclear]
|
| 959 |
-
Turn 3: reflect_on_progress("
|
| 960 |
β Try: "2016 Summer Olympics host country"
|
| 961 |
Turn 4: search_tool("2016 Summer Olympics host country") β Brazil
|
| 962 |
Turn 5: search_tool("Brazil GDP 2016") β $1.796 trillion
|
|
@@ -967,13 +996,13 @@ Turn 7: final_answer_tool("1.796 trillion")
|
|
| 967 |
β οΈ CRITICAL RULES - NEVER VIOLATE THESE:
|
| 968 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 969 |
|
| 970 |
-
1. **
|
| 971 |
-
2. **
|
| 972 |
-
3. **
|
| 973 |
-
4. **
|
| 974 |
-
5. **
|
| 975 |
-
6. **
|
| 976 |
-
7. **
|
| 977 |
|
| 978 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 979 |
π AVAILABLE TOOLS:
|
|
@@ -982,7 +1011,10 @@ Turn 7: final_answer_tool("1.796 trillion")
|
|
| 982 |
{tool_descriptions}
|
| 983 |
|
| 984 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 985 |
-
π― REMEMBER:
|
|
|
|
|
|
|
|
|
|
| 986 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 987 |
"""
|
| 988 |
|
|
@@ -1145,7 +1177,7 @@ Turn 7: final_answer_tool("1.796 trillion")
|
|
| 1145 |
|
| 1146 |
self.graph = graph_builder.compile()
|
| 1147 |
print("β
Planning & Reflection Agent graph compiled successfully.")
|
| 1148 |
-
|
| 1149 |
|
| 1150 |
def __call__(self, question: str) -> str:
|
| 1151 |
print(f"\n--- Starting Agent Run for Question ---")
|
|
|
|
| 138 |
|
| 139 |
return None
|
| 140 |
|
| 141 |
+
|
| 142 |
# =============================================================================
|
| 143 |
# PLANNING & REFLECTION TOOLS
|
| 144 |
# =============================================================================
|
| 145 |
|
| 146 |
+
class ThinkInput(BaseModel):
|
| 147 |
+
reasoning: str = Field(description="Your step-by-step reasoning for a logic puzzle (keep under 200 chars)")
|
| 148 |
+
|
| 149 |
+
@tool(args_schema=ThinkInput)
|
| 150 |
+
def think_through_logic(reasoning: str) -> str:
|
| 151 |
+
"""
|
| 152 |
+
Use this to work through logic puzzles, riddles, or reasoning problems.
|
| 153 |
+
|
| 154 |
+
Call this when:
|
| 155 |
+
- The question is a riddle or brain teaser
|
| 156 |
+
- You need to reason through a logical problem
|
| 157 |
+
- No external information is needed, just thinking
|
| 158 |
+
|
| 159 |
+
After thinking through the logic, use calculator if math is involved,
|
| 160 |
+
then validate_answer and final_answer_tool.
|
| 161 |
+
|
| 162 |
+
NOTE: Keep reasoning summary brief (under 200 chars).
|
| 163 |
+
"""
|
| 164 |
+
print(f"π§ Thinking through logic: {reasoning[:100]}...")
|
| 165 |
+
|
| 166 |
+
return f"""β
Logic reasoning recorded: {reasoning}
|
| 167 |
+
|
| 168 |
+
Now:
|
| 169 |
+
1. If there's any math to calculate, use calculator()
|
| 170 |
+
2. Once you have the answer, call validate_answer()
|
| 171 |
+
3. Then call final_answer_tool() with just the answer"""
|
| 172 |
+
|
| 173 |
+
|
| 174 |
class PlanInput(BaseModel):
|
| 175 |
question: str = Field(description="Brief summary of the task (keep under 100 chars)")
|
| 176 |
|
|
|
|
| 302 |
return "β
VALIDATION PASSED: Answer looks good! Proceed with final_answer_tool now."
|
| 303 |
|
| 304 |
|
| 305 |
+
# =============================================================================
|
| 306 |
# =============================================================================
|
| 307 |
# CORE TOOLS
|
| 308 |
# =============================================================================
|
|
|
|
| 852 |
return []
|
| 853 |
|
| 854 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 855 |
# =============================================================================
|
| 856 |
# ENHANCED AGENT CLASS WITH PLANNING & REFLECTION
|
| 857 |
# =============================================================================
|
|
|
|
| 891 |
π― YOUR MISSION: Provide the EXACT answer in the EXACT format requested.
|
| 892 |
|
| 893 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 894 |
+
π QUESTION TYPES & STRATEGIES:
|
| 895 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 896 |
+
|
| 897 |
+
**TYPE 1: LOGIC PUZZLES / RIDDLES** (No tools needed)
|
| 898 |
+
- Riddles, brain teasers, logical reasoning problems
|
| 899 |
+
- Strategy: Think through the logic, use calculator for any math
|
| 900 |
+
- Example: "If all but 30 of 200 coins are face-up, make equal face-down piles"
|
| 901 |
+
β This is pure logic. Think it through, then use final_answer_tool
|
| 902 |
+
|
| 903 |
+
**TYPE 2: FACTUAL QUESTIONS** (Need web search)
|
| 904 |
+
- Who, what, when, where questions about real world
|
| 905 |
+
- Strategy: search_tool β scrape_and_retrieve if needed
|
| 906 |
+
- Example: "What was Einstein's birthplace population in 1900?"
|
| 907 |
+
|
| 908 |
+
**TYPE 3: DATA ANALYSIS** (Need files + code)
|
| 909 |
+
- Questions about CSV, Excel, or other data files
|
| 910 |
+
- Strategy: list_directory β read_file β code_interpreter
|
| 911 |
+
- Example: "What's the average of column X in data.csv?"
|
| 912 |
+
|
| 913 |
+
**TYPE 4: CALCULATIONS** (Need calculator/code)
|
| 914 |
+
- Math problems, computations
|
| 915 |
+
- Strategy: calculator for simple math, code_interpreter for complex
|
| 916 |
+
- Example: "What is 127 Γ 83 + sqrt(144)?"
|
| 917 |
+
|
| 918 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 919 |
+
π MANDATORY PROTOCOL:
|
| 920 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 921 |
|
| 922 |
+
**PHASE 1: IDENTIFY QUESTION TYPE**
|
| 923 |
+
ββ Is this a logic puzzle? β Think through it, use calculator if needed
|
| 924 |
+
ββ Need real-world facts? β Use search/scrape tools
|
| 925 |
+
ββ Need to analyze files? β Use file/code tools
|
| 926 |
+
ββ Just math? β Use calculator
|
| 927 |
+
|
| 928 |
+
**PHASE 2: FOR TOOL-BASED QUESTIONS**
|
| 929 |
+
ββ 1. Call create_plan() for multi-step questions
|
| 930 |
+
ββ 2. Execute ONE step at a time
|
|
|
|
|
|
|
|
|
|
|
|
|
| 931 |
ββ 3. After EACH tool, evaluate the result
|
| 932 |
ββ 4. Ask: "Do I have enough to answer now?"
|
| 933 |
|
| 934 |
+
**PHASE 3: FOR LOGIC PUZZLES**
|
| 935 |
+
ββ 1. Think through the logic step-by-step
|
| 936 |
+
ββ 2. Use calculator ONLY if there's arithmetic
|
| 937 |
+
ββ 3. Once you've solved it, call validate_answer()
|
| 938 |
+
ββ 4. Then call final_answer_tool()
|
| 939 |
+
|
| 940 |
+
**PHASE 4: REFLECTION (If stuck)**
|
| 941 |
ββ If no progress after 3-5 turns β call reflect_on_progress()
|
| 942 |
ββ If tools keep failing β try different approach
|
| 943 |
ββ If going in circles β step back and reconsider
|
| 944 |
|
| 945 |
+
**PHASE 5: VALIDATION & SUBMISSION**
|
| 946 |
ββ 1. When you have the answer β call validate_answer()
|
| 947 |
ββ 2. If validation passes β call final_answer_tool()
|
| 948 |
ββ 3. If validation fails β fix the issue first
|
|
|
|
| 951 |
π EXAMPLES - LEARN FROM THESE:
|
| 952 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 953 |
|
| 954 |
+
**Example 1: Logic Puzzle (NO TOOLS EXCEPT CALCULATOR/FINAL)**
|
| 955 |
+
Q: If you have 200 coins with 30 face-down, and divide into 2 piles with equal face-down...
|
| 956 |
+
Turn 1: Think through: If I take 30 coins and flip them all, one pile has X face-down...
|
| 957 |
+
Turn 2: calculator("30") β 30
|
| 958 |
+
Turn 3: validate_answer("30", original_q) β β
Pass
|
| 959 |
+
Turn 4: final_answer_tool("30")
|
| 960 |
+
|
| 961 |
+
**Example 2: Simple Math**
|
| 962 |
Q: What is 127 Γ 83?
|
| 963 |
Turn 1: calculator("127 * 83") β 10541
|
| 964 |
Turn 2: validate_answer("10541", "What is 127 Γ 83?") β β
Pass
|
| 965 |
Turn 3: final_answer_tool("10541")
|
| 966 |
|
| 967 |
+
**Example 3: Multi-step Research**
|
| 968 |
Q: What was the population of Einstein's birthplace in 1900?
|
| 969 |
+
Turn 1: create_plan("Brief: Einstein birthplace pop 1900")
|
| 970 |
Turn 2: search_tool("Albert Einstein birthplace") β Ulm, Germany
|
| 971 |
Turn 3: search_tool("Ulm Germany population 1900") β approximately 50,000
|
| 972 |
Turn 4: validate_answer("50000", "What was the population...") β β
Pass
|
| 973 |
Turn 5: final_answer_tool("50000")
|
| 974 |
|
| 975 |
+
**Example 4: File + Calculation**
|
| 976 |
Q: What's the average of the 'score' column in data.csv?
|
| 977 |
Turn 1: list_directory(".") β [files shown]
|
| 978 |
Turn 2: read_file("data.csv") β [content]
|
|
|
|
| 981 |
Turn 4: validate_answer("78.5", "What's the average...") β β
Pass
|
| 982 |
Turn 5: final_answer_tool("78.5")
|
| 983 |
|
| 984 |
+
**Example 5: Getting Unstuck**
|
| 985 |
Q: What's the GDP of the 2016 Olympics host?
|
| 986 |
Turn 1: search_tool("2016 Olympics") β [general info, no clear answer]
|
| 987 |
Turn 2: search_tool("Olympics 2016 location") β [still unclear]
|
| 988 |
+
Turn 3: reflect_on_progress("Searching but not getting host country")
|
| 989 |
β Try: "2016 Summer Olympics host country"
|
| 990 |
Turn 4: search_tool("2016 Summer Olympics host country") β Brazil
|
| 991 |
Turn 5: search_tool("Brazil GDP 2016") β $1.796 trillion
|
|
|
|
| 996 |
β οΈ CRITICAL RULES - NEVER VIOLATE THESE:
|
| 997 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 998 |
|
| 999 |
+
1. **IDENTIFY QUESTION TYPE FIRST**: Logic puzzle vs. factual vs. data vs. math
|
| 1000 |
+
2. **LOGIC PUZZLES**: Don't use search/file tools. Just think + validate + final_answer
|
| 1001 |
+
3. **ONE STEP AT A TIME**: Don't try to do multiple things in one turn
|
| 1002 |
+
4. **EXACT FORMAT**: Answer must be EXACTLY what was asked for
|
| 1003 |
+
5. **NO FLUFF**: Never add "The answer is" or explanations in final answer
|
| 1004 |
+
6. **ALWAYS VALIDATE**: Call validate_answer() before final_answer_tool()
|
| 1005 |
+
7. **DON'T LOOP**: If 2 consecutive turns produce no tool calls, you're stuck - call a tool!
|
| 1006 |
|
| 1007 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1008 |
π AVAILABLE TOOLS:
|
|
|
|
| 1011 |
{tool_descriptions}
|
| 1012 |
|
| 1013 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1014 |
+
π― REMEMBER:
|
| 1015 |
+
- Logic puzzles: Think β Calculator (if needed) β Validate β Final Answer
|
| 1016 |
+
- Factual questions: Plan β Search β Validate β Final Answer
|
| 1017 |
+
- Always call a tool - never just output reasoning text!
|
| 1018 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1019 |
"""
|
| 1020 |
|
|
|
|
| 1177 |
|
| 1178 |
self.graph = graph_builder.compile()
|
| 1179 |
print("β
Planning & Reflection Agent graph compiled successfully.")
|
| 1180 |
+
|
| 1181 |
|
| 1182 |
def __call__(self, question: str) -> str:
|
| 1183 |
print(f"\n--- Starting Agent Run for Question ---")
|