File size: 7,698 Bytes
46074e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
def evaluate_simulation_outcome(pybullet_feedback, obstacle_far_edge_x=0.8):
    """Evaluate the simulation outcome based on PyBullet feedback"""
    
    # Extract robot position
    robot_x_pos = pybullet_feedback['robot_position'][0]
    
    # Check if robot crossed the obstacle
    robot_crossed_obstacle = robot_x_pos > obstacle_far_edge_x
    
    # Check if robot remains upright
    robot_remains_upright = pybullet_feedback['is_robot_upright']
    
    # Check for significant collision during pass
    # Simplified logic: If robot crossed, minor contacts are okay
    # If robot didn't cross and had contacts, that's a failure
    obstacle_contacts_exist = pybullet_feedback['obstacle_contacts_exist']
    
    if robot_crossed_obstacle:
        # If robot crossed, we consider it successful regardless of minor contacts
        no_significant_collision = True
    else:
        # If robot didn't cross and had contacts, it's likely stuck
        no_significant_collision = not obstacle_contacts_exist
    
    # Overall success requires all criteria to be met
    overall_success = (robot_crossed_obstacle and 
                      robot_remains_upright and 
                      no_significant_collision)
    
    # Determine specific failure point
    specific_failure_point = "none"
    if not robot_remains_upright:
        specific_failure_point = "fell_over"
    elif obstacle_contacts_exist and not robot_crossed_obstacle:
        specific_failure_point = "collided_and_stuck"
    elif not robot_crossed_obstacle:
        specific_failure_point = "failed_to_reach_or_cross"
    
    evaluation_results = {
        "robot_crossed_obstacle": robot_crossed_obstacle,
        "no_significant_collision_with_obstacle_during_pass": no_significant_collision,
        "robot_remains_upright": robot_remains_upright,
        "overall_success": overall_success,
        "specific_failure_point": specific_failure_point,
        "final_robot_x_position": robot_x_pos
    }
    
    return evaluation_results

def evaluate_simulation_outcome_with_criteria(pybullet_feedback, obstacle_far_edge_x=0.8, llm_success_conditions=None):
    """Enhanced evaluation that maps simulation results to LLM-defined success criteria"""
    
    # First get the standard evaluation
    standard_eval = evaluate_simulation_outcome(pybullet_feedback, obstacle_far_edge_x)
    
    # Add criteria-specific evaluation
    if llm_success_conditions:
        criteria_evaluation = {}
        
        for condition in llm_success_conditions:
            condition_lower = condition.lower()
            
            # Map LLM conditions to simulation results
            if "cross" in condition_lower and "obstacle" in condition_lower:
                criteria_evaluation[condition] = "ACHIEVED" if standard_eval['robot_crossed_obstacle'] else "FAILED"
            
            elif "stability" in condition_lower or "stable" in condition_lower or "upright" in condition_lower:
                criteria_evaluation[condition] = "ACHIEVED" if standard_eval['robot_remains_upright'] else "FAILED"
            
            elif "collision" in condition_lower or "stuck" in condition_lower:
                criteria_evaluation[condition] = "ACHIEVED" if standard_eval['no_significant_collision_with_obstacle_during_pass'] else "FAILED"
            
            elif "reach" in condition_lower and "position" in condition_lower:
                criteria_evaluation[condition] = "ACHIEVED" if standard_eval['robot_crossed_obstacle'] else "FAILED"
            
            elif "quick" in condition_lower or "fast" in condition_lower or "efficient" in condition_lower:
                # For speed criteria, consider time and distance
                distance_ratio = standard_eval['final_robot_x_position'] / obstacle_far_edge_x if obstacle_far_edge_x > 0 else 0
                criteria_evaluation[condition] = "ACHIEVED" if distance_ratio > 0.9 else "PARTIALLY_ACHIEVED" if distance_ratio > 0.5 else "FAILED"
            
            elif "stop" in condition_lower or "halt" in condition_lower:
                # For stopping criteria, assume achieved if robot is stable and crossed
                criteria_evaluation[condition] = "ACHIEVED" if (standard_eval['robot_crossed_obstacle'] and standard_eval['robot_remains_upright']) else "FAILED"
            
            elif "land" in condition_lower:
                # For landing criteria (drones), assume achieved if stable and crossed
                criteria_evaluation[condition] = "ACHIEVED" if (standard_eval['robot_crossed_obstacle'] and standard_eval['robot_remains_upright']) else "FAILED"
            
            else:
                # Generic condition - map to overall success
                criteria_evaluation[condition] = "ACHIEVED" if standard_eval['overall_success'] else "UNDETERMINED"
        
        standard_eval["criteria_evaluation"] = criteria_evaluation
    
    return standard_eval

def format_feedback_for_llm(evaluation_results):
    """Format evaluation results into human-readable feedback for LLM"""
    
    success_status = "Succeeded" if evaluation_results['overall_success'] else "Failed"
    
    # Build collision note
    if evaluation_results['no_significant_collision_with_obstacle_during_pass']:
        collision_note = "No impeding contacts"
    else:
        collision_note = "Contacts detected with obstacle"
    
    # Build failure reason
    failure_reason = evaluation_results['specific_failure_point'] if not evaluation_results['overall_success'] else 'N/A'
    
    feedback = (
        f"Feedback: Obstacle passage attempt: {success_status}. "
        f"Crossed Obstacle: {evaluation_results['robot_crossed_obstacle']}. "
        f"Remained Upright: {evaluation_results['robot_remains_upright']}. "
        f"Collision Note: {collision_note}. "
        f"Failure reason: {failure_reason}. "
        f"Final X position: {evaluation_results['final_robot_x_position']:.2f}m."
    )
    
    return feedback

def format_feedback_for_llm_with_criteria(evaluation_results, llm_success_conditions=None):
    """Enhanced feedback format that includes LLM criteria evaluation"""
    
    # Start with standard feedback
    feedback = format_feedback_for_llm(evaluation_results)
    
    # Add criteria-specific feedback if available
    if llm_success_conditions and 'criteria_evaluation' in evaluation_results:
        feedback += "\n\nAssessment against your stated success conditions:"
        
        criteria_eval = evaluation_results['criteria_evaluation']
        for condition in llm_success_conditions:
            status = criteria_eval.get(condition, "UNDETERMINED")
            
            if status == "ACHIEVED":
                feedback += f"\n✅ '{condition}': ACHIEVED"
            elif status == "PARTIALLY_ACHIEVED":
                feedback += f"\n⚠️ '{condition}': PARTIALLY_ACHIEVED"
            elif status == "FAILED":
                feedback += f"\n❌ '{condition}': FAILED"
            else:
                feedback += f"\n❓ '{condition}': UNDETERMINED (based on core simulation checks)"
        
        # Add interpretation guidance
        feedback += f"\n\nCore simulation metrics:"
        feedback += f"\n- Distance traveled: {evaluation_results['final_robot_x_position']:.3f}m (target: >0.8m)"
        feedback += f"\n- Obstacle crossed: {'Yes' if evaluation_results['robot_crossed_obstacle'] else 'No'}"
        feedback += f"\n- Remained stable: {'Yes' if evaluation_results['robot_remains_upright'] else 'No'}"
        feedback += f"\n- Clean pass: {'Yes' if evaluation_results['no_significant_collision_with_obstacle_during_pass'] else 'No'}"
    
    return feedback