Agent2Robot / evaluation.py
sam133's picture
Implement full Gradio UI and backend logic for agentic vehicle design - Add complete Agent2Robot interface with real-time updates, LLM-driven iterative design optimization, PyBullet physics simulation integration, comprehensive evaluation and feedback systems, hackathon demo and documentation files - Ready for deployment to Hugging Face Space
46074e2
raw
history blame
7.7 kB
def evaluate_simulation_outcome(pybullet_feedback, obstacle_far_edge_x=0.8):
"""Evaluate the simulation outcome based on PyBullet feedback"""
# Extract robot position
robot_x_pos = pybullet_feedback['robot_position'][0]
# Check if robot crossed the obstacle
robot_crossed_obstacle = robot_x_pos > obstacle_far_edge_x
# Check if robot remains upright
robot_remains_upright = pybullet_feedback['is_robot_upright']
# Check for significant collision during pass
# Simplified logic: If robot crossed, minor contacts are okay
# If robot didn't cross and had contacts, that's a failure
obstacle_contacts_exist = pybullet_feedback['obstacle_contacts_exist']
if robot_crossed_obstacle:
# If robot crossed, we consider it successful regardless of minor contacts
no_significant_collision = True
else:
# If robot didn't cross and had contacts, it's likely stuck
no_significant_collision = not obstacle_contacts_exist
# Overall success requires all criteria to be met
overall_success = (robot_crossed_obstacle and
robot_remains_upright and
no_significant_collision)
# Determine specific failure point
specific_failure_point = "none"
if not robot_remains_upright:
specific_failure_point = "fell_over"
elif obstacle_contacts_exist and not robot_crossed_obstacle:
specific_failure_point = "collided_and_stuck"
elif not robot_crossed_obstacle:
specific_failure_point = "failed_to_reach_or_cross"
evaluation_results = {
"robot_crossed_obstacle": robot_crossed_obstacle,
"no_significant_collision_with_obstacle_during_pass": no_significant_collision,
"robot_remains_upright": robot_remains_upright,
"overall_success": overall_success,
"specific_failure_point": specific_failure_point,
"final_robot_x_position": robot_x_pos
}
return evaluation_results
def evaluate_simulation_outcome_with_criteria(pybullet_feedback, obstacle_far_edge_x=0.8, llm_success_conditions=None):
"""Enhanced evaluation that maps simulation results to LLM-defined success criteria"""
# First get the standard evaluation
standard_eval = evaluate_simulation_outcome(pybullet_feedback, obstacle_far_edge_x)
# Add criteria-specific evaluation
if llm_success_conditions:
criteria_evaluation = {}
for condition in llm_success_conditions:
condition_lower = condition.lower()
# Map LLM conditions to simulation results
if "cross" in condition_lower and "obstacle" in condition_lower:
criteria_evaluation[condition] = "ACHIEVED" if standard_eval['robot_crossed_obstacle'] else "FAILED"
elif "stability" in condition_lower or "stable" in condition_lower or "upright" in condition_lower:
criteria_evaluation[condition] = "ACHIEVED" if standard_eval['robot_remains_upright'] else "FAILED"
elif "collision" in condition_lower or "stuck" in condition_lower:
criteria_evaluation[condition] = "ACHIEVED" if standard_eval['no_significant_collision_with_obstacle_during_pass'] else "FAILED"
elif "reach" in condition_lower and "position" in condition_lower:
criteria_evaluation[condition] = "ACHIEVED" if standard_eval['robot_crossed_obstacle'] else "FAILED"
elif "quick" in condition_lower or "fast" in condition_lower or "efficient" in condition_lower:
# For speed criteria, consider time and distance
distance_ratio = standard_eval['final_robot_x_position'] / obstacle_far_edge_x if obstacle_far_edge_x > 0 else 0
criteria_evaluation[condition] = "ACHIEVED" if distance_ratio > 0.9 else "PARTIALLY_ACHIEVED" if distance_ratio > 0.5 else "FAILED"
elif "stop" in condition_lower or "halt" in condition_lower:
# For stopping criteria, assume achieved if robot is stable and crossed
criteria_evaluation[condition] = "ACHIEVED" if (standard_eval['robot_crossed_obstacle'] and standard_eval['robot_remains_upright']) else "FAILED"
elif "land" in condition_lower:
# For landing criteria (drones), assume achieved if stable and crossed
criteria_evaluation[condition] = "ACHIEVED" if (standard_eval['robot_crossed_obstacle'] and standard_eval['robot_remains_upright']) else "FAILED"
else:
# Generic condition - map to overall success
criteria_evaluation[condition] = "ACHIEVED" if standard_eval['overall_success'] else "UNDETERMINED"
standard_eval["criteria_evaluation"] = criteria_evaluation
return standard_eval
def format_feedback_for_llm(evaluation_results):
"""Format evaluation results into human-readable feedback for LLM"""
success_status = "Succeeded" if evaluation_results['overall_success'] else "Failed"
# Build collision note
if evaluation_results['no_significant_collision_with_obstacle_during_pass']:
collision_note = "No impeding contacts"
else:
collision_note = "Contacts detected with obstacle"
# Build failure reason
failure_reason = evaluation_results['specific_failure_point'] if not evaluation_results['overall_success'] else 'N/A'
feedback = (
f"Feedback: Obstacle passage attempt: {success_status}. "
f"Crossed Obstacle: {evaluation_results['robot_crossed_obstacle']}. "
f"Remained Upright: {evaluation_results['robot_remains_upright']}. "
f"Collision Note: {collision_note}. "
f"Failure reason: {failure_reason}. "
f"Final X position: {evaluation_results['final_robot_x_position']:.2f}m."
)
return feedback
def format_feedback_for_llm_with_criteria(evaluation_results, llm_success_conditions=None):
"""Enhanced feedback format that includes LLM criteria evaluation"""
# Start with standard feedback
feedback = format_feedback_for_llm(evaluation_results)
# Add criteria-specific feedback if available
if llm_success_conditions and 'criteria_evaluation' in evaluation_results:
feedback += "\n\nAssessment against your stated success conditions:"
criteria_eval = evaluation_results['criteria_evaluation']
for condition in llm_success_conditions:
status = criteria_eval.get(condition, "UNDETERMINED")
if status == "ACHIEVED":
feedback += f"\n✅ '{condition}': ACHIEVED"
elif status == "PARTIALLY_ACHIEVED":
feedback += f"\n⚠️ '{condition}': PARTIALLY_ACHIEVED"
elif status == "FAILED":
feedback += f"\n❌ '{condition}': FAILED"
else:
feedback += f"\n❓ '{condition}': UNDETERMINED (based on core simulation checks)"
# Add interpretation guidance
feedback += f"\n\nCore simulation metrics:"
feedback += f"\n- Distance traveled: {evaluation_results['final_robot_x_position']:.3f}m (target: >0.8m)"
feedback += f"\n- Obstacle crossed: {'Yes' if evaluation_results['robot_crossed_obstacle'] else 'No'}"
feedback += f"\n- Remained stable: {'Yes' if evaluation_results['robot_remains_upright'] else 'No'}"
feedback += f"\n- Clean pass: {'Yes' if evaluation_results['no_significant_collision_with_obstacle_during_pass'] else 'No'}"
return feedback