Spaces:

YongdongWang
/

DART-LLM_Task_Decomposer

No application file

File size: 6,136 Bytes

92ef79b

#!/usr/bin/env python3
"""
Test script for persistent editing functionality
Tests multiple edit cycles to ensure task plans persist correctly
"""

import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

from gradio_llm_interface import GradioLlmInterface
import json

def test_edit_cycle():
    """Test complete edit cycle: generate → edit → update → deploy → edit again"""
    print("Testing Complete Edit Cycle...")
    print("=" * 50)
    
    interface = GradioLlmInterface()
    
    # Step 1: Initial task plan (simulating LLM generation)
    initial_plan = {
        "tasks": [
            {
                "task": "move_soil_1",
                "instruction_function": {
                    "name": "move_soil",
                    "robot_ids": ["robot_excavator_01"],
                    "dependencies": [],
                    "object_keywords": ["soil_pile"]
                }
            }
        ]
    }
    
    state = {'pending_task_plan': initial_plan}
    print("✓ Step 1: Initial task plan created")
    
    # Step 2: Open editor with initial plan
    editor_result = interface.show_task_plan_editor(state)
    if not editor_result or len(editor_result) != 4:
        print("✗ Step 2: Failed to open editor")
        return False
    
    editor_update, dag_btn, validate_btn, status = editor_result
    if "move_soil" in editor_update.get('value', ''):
        print("✓ Step 2: Editor opened with correct initial plan")
    else:
        print("✗ Step 2: Editor does not contain initial plan")
        return False
    
    # Step 3: Edit the plan
    edited_json = """{
  "tasks": [
    {
      "task": "move_soil_1_edited",
      "instruction_function": {
        "name": "move_soil_edited",
        "robot_ids": ["robot_excavator_01", "robot_dump_truck_01"],
        "dependencies": [],
        "object_keywords": ["soil_pile", "edited_keyword"]
      }
    },
    {
      "task": "transport_soil_1",
      "instruction_function": {
        "name": "transport_soil",
        "robot_ids": ["robot_dump_truck_01"],
        "dependencies": ["move_soil_1_edited"],
        "object_keywords": ["destination"]
      }
    }
  ]
}"""
    
    update_result = interface.update_dag_from_editor(edited_json, state)
    if not update_result or len(update_result) != 6:
        print("✗ Step 3: Failed to update DAG from editor")
        return False
    
    print("✓ Step 3: DAG updated with edited plan")
    
    # Step 4: Deploy the plan
    deploy_result = interface.validate_and_deploy_task_plan(state)
    if not deploy_result:
        print("✗ Step 4: Failed to deploy plan")
        return False
    
    print("✓ Step 4: Plan deployed successfully")
    
    # Step 5: Try to edit again (this should show the deployed plan)
    second_editor_result = interface.show_task_plan_editor(state)
    if not second_editor_result or len(second_editor_result) != 4:
        print("✗ Step 5: Failed to open editor second time")
        return False
    
    second_editor_update, _, _, second_status = second_editor_result
    if "move_soil_1_edited" in second_editor_update.get('value', ''):
        print("✓ Step 5: Editor opened with deployed plan (persistent editing working)")
        return True
    else:
        print("✗ Step 5: Editor lost the deployed plan content")
        print(f"  Editor content: {second_editor_update.get('value', 'No content')[:100]}...")
        return False

def test_empty_state_handling():
    """Test editor behavior with completely empty state"""
    print("\nTesting Empty State Handling...")
    print("=" * 40)
    
    interface = GradioLlmInterface()
    empty_state = {}
    
    result = interface.show_task_plan_editor(empty_state)
    if result and len(result) == 4:
        editor_update, _, _, status = result
        if "example_task_1" in editor_update.get('value', ''):
            print("✓ Empty state shows example template")
            return True
        else:
            print("✗ Empty state does not show proper template")
            return False
    else:
        print("✗ Failed to handle empty state")
        return False

def test_malformed_state_handling():
    """Test editor behavior with malformed state data"""
    print("\nTesting Malformed State Handling...")
    print("=" * 40)
    
    interface = GradioLlmInterface()
    
    # Test with empty tasks array
    malformed_state = {
        'pending_task_plan': {
            'tasks': []
        }
    }
    
    result = interface.show_task_plan_editor(malformed_state)
    if result and len(result) == 4:
        editor_update, _, _, status = result
        if "example_task_1" in editor_update.get('value', ''):
            print("✓ Malformed state (empty tasks) handled correctly")
            return True
        else:
            print("✗ Malformed state not handled properly")
            return False
    else:
        print("✗ Failed to handle malformed state")
        return False

def main():
    """Run all persistent editing tests"""
    print("🔄 Persistent Editing Tests")
    print("=" * 50)
    
    tests = [
        test_edit_cycle,
        test_empty_state_handling,
        test_malformed_state_handling
    ]
    
    passed = 0
    total = len(tests)
    
    for test in tests:
        try:
            if test():
                passed += 1
        except Exception as e:
            print(f"✗ Test failed with exception: {e}")
    
    print("\n" + "=" * 50)
    print(f"Persistent Editing Tests passed: {passed}/{total}")
    
    if passed == total:
        print("🎉 All persistent editing tests passed!")
        print("\n🔄 Persistent Editing Features:")
        print("  ✓ Task plans persist through edit cycles")
        print("  ✓ Deployed plans can be re-edited")
        print("  ✓ State management handles edge cases")
        print("  ✓ Proper fallback to templates when needed")
        return True
    else:
        print("❌ Some persistent editing tests failed!")
        return False

if __name__ == "__main__":
    success = main()
    sys.exit(0 if success else 1)