File size: 11,557 Bytes
da9a42e b40e9bd da9a42e b40e9bd da9a42e 3e43a4b da9a42e b40e9bd da9a42e b40e9bd da9a42e b40e9bd da9a42e 3e43a4b bc8cde1 3e43a4b da9a42e bc8cde1 3e43a4b bc8cde1 da9a42e 3e43a4b da9a42e bc8cde1 da9a42e 3e43a4b da9a42e bc8cde1 b40e9bd da9a42e 3e43a4b da9a42e b40e9bd bc8cde1 b40e9bd 3e43a4b b40e9bd bc8cde1 3e43a4b bc8cde1 3e43a4b b40e9bd bc8cde1 b40e9bd bc8cde1 b40e9bd 3e43a4b b40e9bd 3e43a4b bc8cde1 3e43a4b da9a42e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\ankelodon_multiagent_system\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"from agent import build_workflow\n",
"from config import config\n",
"from tools.code_interpreter import safe_code_run"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"graph = build_workflow()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"💡 ════════════════════\n",
"💡 USER QUERY \n",
"💡 ════════════════════\n",
" • files: none provided\n",
"=== COMPLEXITY ASSESSMENT ===\n",
"Complexity: simple\n",
"Needs planning: False\n",
"Reasoning: This is a single-step arithmetic question (2+2). Although calculations technically require a tool per the special considerations, this is trivial and requires only one immediate operation, so it is SIMPLE.\n",
"=== SIMPLE EXECUTION ===\n",
"Response generated for simple query.\n",
"=== GENERATING EXECUTION REPORT ===\n",
"Report generated - Confidence: high\n",
"Key findings: 3\n",
"Data sources: 2\n",
"query_summary=\"User asked for the numeric result of the arithmetic expression '2+2'.\" approach_used=\"Direct evaluation using basic arithmetic: interpreted '+' as standard integer addition and computed the sum mentally without invoking external tools or files.\" tools_executed=[] key_findings=[\"The expression '2+2' was interpreted as standard integer addition.\", 'Computed result is 4.', 'No external tools or data were required to compute the result.'] data_sources=['Basic arithmetic rules (internal knowledge)', 'Conversation history confirming the query and an earlier direct answer'] assumptions_made=[\"The '+' operator denotes standard arithmetic addition on integers.\", 'Numbers are in the usual base-10 system and no special context (e.g., modular arithmetic or symbolic manipulation) was intended.'] confidence_level='high' limitations=['If the user intended a nonstandard context (modulo arithmetic, different base, or overloaded operator semantics), the answer could differ.', 'Extremely simple query; few realistic limitations beyond contextual ambiguity.'] final_answer='4'\n",
"=== ENHANCED ANSWER CRITIQUE ===\n",
"Quality Score: 8/10\n",
"Complete: True\n",
"Accurate: True\n",
"Issues found: [\"Performed the calculation mentally rather than using an external computational tool (triggers the evaluation framework's manual-calculation penalty).\"]\n",
"=== REPLAN DECISION ===\n",
"Iteration: 1/10\n",
"Quality score: 8\n",
"Needs replanning: False\n",
"Quality acceptable, ending execution\n"
]
}
],
"source": [
"query = \"What is 2+2\"\n",
"result = graph.invoke({\"query\" : query, \"current_step\": 0, \"reasoning_done\": False, \"files\" : [], \"files_contents\" : {}, \"iteration_count\" : 0, \"max_iterations\" : 10, \"plan\" : None} , config = config)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"FINAL ANSWER: 4\n"
]
}
],
"source": [
"print(result[\"final_answer\"])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'messages': [SystemMessage(content='You are a COMPLEXITY ASSESSOR for a multi-tool agent system.\\nYour job is to analyze user queries and determine their complexity level and processing requirements.\\n\\nCOMPLEXITY LEVELS:\\n1. SIMPLE: Direct questions that can be answered immediately without tools or with single tool use\\n - Examples: \"What is photosynthesis?\", \"Define machine learning\", \"What\\'s the capital of France?\"\\n - NOTE: Simple math like \"2+2\" still requires calculator tool but counts as SIMPLE\\n\\n !ALSO: It can be a logical reasoning or explanation task that does not require tools.\\n \\n2. MODERATE: Questions requiring 2-4 tool calls or basic multi-step analysis\\n - Examples: \"Search for recent news about AI\", \"Analyze this CSV file for trends\", \"Calculate ROI from this data\"\\n - \"Compare two datasets\", \"Summarize multiple documents\"\\n \\n3. COMPLEX: Multi-step problems requiring planning, multiple tools, and sophisticated reasoning\\n - Examples: \"Research market trends and create investment strategy\", \"Analyze multiple data sources and predict outcomes\"\\n - \"Build comprehensive report from various inputs\", \"Multi-stage data processing with validation\"\\n\\nMOST OF THE LOGICAL TASKS ARE SIMPLE, UNLESS THEY REQUIRE TOOLS.\\n\\nASSESSMENT CRITERIA:\\n- Number of distinct steps likely needed (1 = Simple, 2-4 = Moderate, 5+ = Complex)\\n- Tool complexity and dependencies between steps\\n- Data processing requirements and validation needs\\n- Need for intermediate reasoning and synthesis\\n- Risk of failure without proper step-by-step planning\\n- Presence of calculations (automatically requires tool usage)\\n\\nSPECIAL CONSIDERATIONS:\\n- Any calculation/counting task requires tools (affects complexity assessment)\\n- File analysis tasks usually need multiple steps (load + analyze + calculate)\\n- Research tasks typically need search + fetch + synthesis steps\\n- Comparison tasks need separate analysis steps for each item being compared\\n\\nRULES:\\n- SIMPLE queries may bypass planning for non-calculation tasks\\n- MODERATE queries benefit from lightweight planning\\n- COMPLEX queries require full planning with fallbacks\\n- When in doubt, err toward higher complexity\\n- Calculation tasks are never truly \"simple\" due to mandatory tool usage\\n\\nAnalyze the query and respond with your assessment.', additional_kwargs={}, response_metadata={}, id='db109164-6e6e-4c1f-82bb-93d6d9b64e6a'),\n",
" HumanMessage(content='Query: What is 2+2', additional_kwargs={}, response_metadata={}, id='6b9afadb-3463-40a2-989b-19f8a237f7fc'),\n",
" AIMessage(content='2 + 2 = 4', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 80, 'prompt_tokens': 1638, 'total_tokens': 1718, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 64, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-5-mini-2025-08-07', 'system_fingerprint': None, 'id': 'chatcmpl-CId3zSwgGIoDxYMuwG2xJfCLDiVuM', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--210d298d-a542-4458-8933-93ebf4c7bac0-0', usage_metadata={'input_tokens': 1638, 'output_tokens': 80, 'total_tokens': 1718, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 64}})],\n",
" 'query': 'What is 2+2',\n",
" 'final_answer': 'FINAL ANSWER: 4',\n",
" 'plan': None,\n",
" 'complexity_assessment': ComplexityLevel(level='simple', reasoning='This is a single-step arithmetic question (2+2). Although calculations technically require a tool per the special considerations, this is trivial and requires only one immediate operation, so it is SIMPLE.', needs_planning=False, suggested_approach='Perform the basic arithmetic (2+2) and return the result (4). No detailed planning or multi-step processing needed.'),\n",
" 'current_step': 0,\n",
" 'reasoning_done': False,\n",
" 'files': [],\n",
" 'critique_feedback': CritiqueFeedback(quality_score=8, is_complete=True, is_accurate=True, missing_elements=[], errors_found=[\"Performed the calculation mentally rather than using an external computational tool (triggers the evaluation framework's manual-calculation penalty).\"], suggested_improvements=['Use a computational tool or explicitly show the calculation steps even for trivial arithmetic to avoid the manual-calculation policy violation (e.g., evaluate with a calculator tool or print the operation and result).', \"Explicitly state assumptions up front (that '+' is standard integer addition in base 10) and, when relevant, ask a clarifying question if the user might have meant a nonstandard interpretation (modular arithmetic, different base, operator overloading).\", 'For transparency, include a short note citing the arithmetic rule used (e.g., basic integer addition) when delivering the result, even though the operation is trivial.'], needs_replanning=False, replan_instructions=None),\n",
" 'iteration_count': 1,\n",
" 'max_iterations': 10,\n",
" 'execution_report': ExecutionReport(query_summary=\"User asked for the numeric result of the arithmetic expression '2+2'.\", approach_used=\"Direct evaluation using basic arithmetic: interpreted '+' as standard integer addition and computed the sum mentally without invoking external tools or files.\", tools_executed=[], key_findings=[\"The expression '2+2' was interpreted as standard integer addition.\", 'Computed result is 4.', 'No external tools or data were required to compute the result.'], data_sources=['Basic arithmetic rules (internal knowledge)', 'Conversation history confirming the query and an earlier direct answer'], assumptions_made=[\"The '+' operator denotes standard arithmetic addition on integers.\", 'Numbers are in the usual base-10 system and no special context (e.g., modular arithmetic or symbolic manipulation) was intended.'], confidence_level='high', limitations=['If the user intended a nonstandard context (modulo arithmetic, different base, or overloaded operator semantics), the answer could differ.', 'Extremely simple query; few realistic limitations beyond contextual ambiguity.'], final_answer='4')}"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"#TO-DO\n",
"#1. Check routing with REPLANNER -> может придумывать несуществующие инструменты -> PARTIALLY COMPLETED\n",
"#2. Add crawling tool \n",
"#3. Enhance description of coder tool and прописать более четко в промпте важность вывода через print() или return или result/_ -> COMPLETED?\n",
"#4. Смягчить критика COMPLETED"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|