Spaces:
No application file
No application file
File size: 7,698 Bytes
46074e2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
def evaluate_simulation_outcome(pybullet_feedback, obstacle_far_edge_x=0.8):
"""Evaluate the simulation outcome based on PyBullet feedback"""
# Extract robot position
robot_x_pos = pybullet_feedback['robot_position'][0]
# Check if robot crossed the obstacle
robot_crossed_obstacle = robot_x_pos > obstacle_far_edge_x
# Check if robot remains upright
robot_remains_upright = pybullet_feedback['is_robot_upright']
# Check for significant collision during pass
# Simplified logic: If robot crossed, minor contacts are okay
# If robot didn't cross and had contacts, that's a failure
obstacle_contacts_exist = pybullet_feedback['obstacle_contacts_exist']
if robot_crossed_obstacle:
# If robot crossed, we consider it successful regardless of minor contacts
no_significant_collision = True
else:
# If robot didn't cross and had contacts, it's likely stuck
no_significant_collision = not obstacle_contacts_exist
# Overall success requires all criteria to be met
overall_success = (robot_crossed_obstacle and
robot_remains_upright and
no_significant_collision)
# Determine specific failure point
specific_failure_point = "none"
if not robot_remains_upright:
specific_failure_point = "fell_over"
elif obstacle_contacts_exist and not robot_crossed_obstacle:
specific_failure_point = "collided_and_stuck"
elif not robot_crossed_obstacle:
specific_failure_point = "failed_to_reach_or_cross"
evaluation_results = {
"robot_crossed_obstacle": robot_crossed_obstacle,
"no_significant_collision_with_obstacle_during_pass": no_significant_collision,
"robot_remains_upright": robot_remains_upright,
"overall_success": overall_success,
"specific_failure_point": specific_failure_point,
"final_robot_x_position": robot_x_pos
}
return evaluation_results
def evaluate_simulation_outcome_with_criteria(pybullet_feedback, obstacle_far_edge_x=0.8, llm_success_conditions=None):
"""Enhanced evaluation that maps simulation results to LLM-defined success criteria"""
# First get the standard evaluation
standard_eval = evaluate_simulation_outcome(pybullet_feedback, obstacle_far_edge_x)
# Add criteria-specific evaluation
if llm_success_conditions:
criteria_evaluation = {}
for condition in llm_success_conditions:
condition_lower = condition.lower()
# Map LLM conditions to simulation results
if "cross" in condition_lower and "obstacle" in condition_lower:
criteria_evaluation[condition] = "ACHIEVED" if standard_eval['robot_crossed_obstacle'] else "FAILED"
elif "stability" in condition_lower or "stable" in condition_lower or "upright" in condition_lower:
criteria_evaluation[condition] = "ACHIEVED" if standard_eval['robot_remains_upright'] else "FAILED"
elif "collision" in condition_lower or "stuck" in condition_lower:
criteria_evaluation[condition] = "ACHIEVED" if standard_eval['no_significant_collision_with_obstacle_during_pass'] else "FAILED"
elif "reach" in condition_lower and "position" in condition_lower:
criteria_evaluation[condition] = "ACHIEVED" if standard_eval['robot_crossed_obstacle'] else "FAILED"
elif "quick" in condition_lower or "fast" in condition_lower or "efficient" in condition_lower:
# For speed criteria, consider time and distance
distance_ratio = standard_eval['final_robot_x_position'] / obstacle_far_edge_x if obstacle_far_edge_x > 0 else 0
criteria_evaluation[condition] = "ACHIEVED" if distance_ratio > 0.9 else "PARTIALLY_ACHIEVED" if distance_ratio > 0.5 else "FAILED"
elif "stop" in condition_lower or "halt" in condition_lower:
# For stopping criteria, assume achieved if robot is stable and crossed
criteria_evaluation[condition] = "ACHIEVED" if (standard_eval['robot_crossed_obstacle'] and standard_eval['robot_remains_upright']) else "FAILED"
elif "land" in condition_lower:
# For landing criteria (drones), assume achieved if stable and crossed
criteria_evaluation[condition] = "ACHIEVED" if (standard_eval['robot_crossed_obstacle'] and standard_eval['robot_remains_upright']) else "FAILED"
else:
# Generic condition - map to overall success
criteria_evaluation[condition] = "ACHIEVED" if standard_eval['overall_success'] else "UNDETERMINED"
standard_eval["criteria_evaluation"] = criteria_evaluation
return standard_eval
def format_feedback_for_llm(evaluation_results):
"""Format evaluation results into human-readable feedback for LLM"""
success_status = "Succeeded" if evaluation_results['overall_success'] else "Failed"
# Build collision note
if evaluation_results['no_significant_collision_with_obstacle_during_pass']:
collision_note = "No impeding contacts"
else:
collision_note = "Contacts detected with obstacle"
# Build failure reason
failure_reason = evaluation_results['specific_failure_point'] if not evaluation_results['overall_success'] else 'N/A'
feedback = (
f"Feedback: Obstacle passage attempt: {success_status}. "
f"Crossed Obstacle: {evaluation_results['robot_crossed_obstacle']}. "
f"Remained Upright: {evaluation_results['robot_remains_upright']}. "
f"Collision Note: {collision_note}. "
f"Failure reason: {failure_reason}. "
f"Final X position: {evaluation_results['final_robot_x_position']:.2f}m."
)
return feedback
def format_feedback_for_llm_with_criteria(evaluation_results, llm_success_conditions=None):
"""Enhanced feedback format that includes LLM criteria evaluation"""
# Start with standard feedback
feedback = format_feedback_for_llm(evaluation_results)
# Add criteria-specific feedback if available
if llm_success_conditions and 'criteria_evaluation' in evaluation_results:
feedback += "\n\nAssessment against your stated success conditions:"
criteria_eval = evaluation_results['criteria_evaluation']
for condition in llm_success_conditions:
status = criteria_eval.get(condition, "UNDETERMINED")
if status == "ACHIEVED":
feedback += f"\n✅ '{condition}': ACHIEVED"
elif status == "PARTIALLY_ACHIEVED":
feedback += f"\n⚠️ '{condition}': PARTIALLY_ACHIEVED"
elif status == "FAILED":
feedback += f"\n❌ '{condition}': FAILED"
else:
feedback += f"\n❓ '{condition}': UNDETERMINED (based on core simulation checks)"
# Add interpretation guidance
feedback += f"\n\nCore simulation metrics:"
feedback += f"\n- Distance traveled: {evaluation_results['final_robot_x_position']:.3f}m (target: >0.8m)"
feedback += f"\n- Obstacle crossed: {'Yes' if evaluation_results['robot_crossed_obstacle'] else 'No'}"
feedback += f"\n- Remained stable: {'Yes' if evaluation_results['robot_remains_upright'] else 'No'}"
feedback += f"\n- Clean pass: {'Yes' if evaluation_results['no_significant_collision_with_obstacle_during_pass'] else 'No'}"
return feedback |