Spaces:
No application file
No application file
sam133
� Restructure repo following successful MCP Hackathon pattern: Clean modular architecture with mcp_client.py, design_tools.py, and organized app.py
fe37569
| def evaluate_simulation_outcome(pybullet_feedback, obstacle_far_edge_x=0.8): | |
| """Evaluate the simulation outcome based on PyBullet feedback""" | |
| # Extract robot position | |
| robot_x_pos = pybullet_feedback['robot_position'][0] | |
| # Check if robot crossed the obstacle | |
| robot_crossed_obstacle = robot_x_pos > obstacle_far_edge_x | |
| # Check if robot remains upright | |
| robot_remains_upright = pybullet_feedback['is_robot_upright'] | |
| # Check for significant collision during pass | |
| # Simplified logic: If robot crossed, minor contacts are okay | |
| # If robot didn't cross and had contacts, that's a failure | |
| obstacle_contacts_exist = pybullet_feedback['obstacle_contacts_exist'] | |
| if robot_crossed_obstacle: | |
| # If robot crossed, we consider it successful regardless of minor contacts | |
| no_significant_collision = True | |
| else: | |
| # If robot didn't cross and had contacts, it's likely stuck | |
| no_significant_collision = not obstacle_contacts_exist | |
| # Overall success requires all criteria to be met | |
| overall_success = (robot_crossed_obstacle and | |
| robot_remains_upright and | |
| no_significant_collision) | |
| # Determine specific failure point | |
| specific_failure_point = "none" | |
| if not robot_remains_upright: | |
| specific_failure_point = "fell_over" | |
| elif obstacle_contacts_exist and not robot_crossed_obstacle: | |
| specific_failure_point = "collided_and_stuck" | |
| elif not robot_crossed_obstacle: | |
| specific_failure_point = "failed_to_reach_or_cross" | |
| evaluation_results = { | |
| "robot_crossed_obstacle": robot_crossed_obstacle, | |
| "no_significant_collision_with_obstacle_during_pass": no_significant_collision, | |
| "robot_remains_upright": robot_remains_upright, | |
| "overall_success": overall_success, | |
| "specific_failure_point": specific_failure_point, | |
| "final_robot_x_position": robot_x_pos | |
| } | |
| return evaluation_results | |
| def evaluate_simulation_outcome_with_criteria(pybullet_feedback, obstacle_far_edge_x=0.8, llm_success_conditions=None): | |
| """Enhanced evaluation that maps simulation results to LLM-defined success criteria""" | |
| # First get the standard evaluation | |
| standard_eval = evaluate_simulation_outcome(pybullet_feedback, obstacle_far_edge_x) | |
| # Add criteria-specific evaluation | |
| if llm_success_conditions: | |
| criteria_evaluation = {} | |
| for condition in llm_success_conditions: | |
| condition_lower = condition.lower() | |
| # Map LLM conditions to simulation results | |
| if "cross" in condition_lower and "obstacle" in condition_lower: | |
| criteria_evaluation[condition] = "ACHIEVED" if standard_eval['robot_crossed_obstacle'] else "FAILED" | |
| elif "stability" in condition_lower or "stable" in condition_lower or "upright" in condition_lower: | |
| criteria_evaluation[condition] = "ACHIEVED" if standard_eval['robot_remains_upright'] else "FAILED" | |
| elif "collision" in condition_lower or "stuck" in condition_lower: | |
| criteria_evaluation[condition] = "ACHIEVED" if standard_eval['no_significant_collision_with_obstacle_during_pass'] else "FAILED" | |
| elif "reach" in condition_lower and "position" in condition_lower: | |
| criteria_evaluation[condition] = "ACHIEVED" if standard_eval['robot_crossed_obstacle'] else "FAILED" | |
| elif "quick" in condition_lower or "fast" in condition_lower or "efficient" in condition_lower: | |
| # For speed criteria, consider time and distance | |
| distance_ratio = standard_eval['final_robot_x_position'] / obstacle_far_edge_x if obstacle_far_edge_x > 0 else 0 | |
| criteria_evaluation[condition] = "ACHIEVED" if distance_ratio > 0.9 else "PARTIALLY_ACHIEVED" if distance_ratio > 0.5 else "FAILED" | |
| elif "stop" in condition_lower or "halt" in condition_lower: | |
| # For stopping criteria, assume achieved if robot is stable and crossed | |
| criteria_evaluation[condition] = "ACHIEVED" if (standard_eval['robot_crossed_obstacle'] and standard_eval['robot_remains_upright']) else "FAILED" | |
| elif "land" in condition_lower: | |
| # For landing criteria (drones), assume achieved if stable and crossed | |
| criteria_evaluation[condition] = "ACHIEVED" if (standard_eval['robot_crossed_obstacle'] and standard_eval['robot_remains_upright']) else "FAILED" | |
| else: | |
| # Generic condition - map to overall success | |
| criteria_evaluation[condition] = "ACHIEVED" if standard_eval['overall_success'] else "UNDETERMINED" | |
| standard_eval["criteria_evaluation"] = criteria_evaluation | |
| return standard_eval | |
| def format_feedback_for_llm(evaluation_results): | |
| """Format evaluation results into human-readable feedback for LLM""" | |
| success_status = "Succeeded" if evaluation_results['overall_success'] else "Failed" | |
| # Build collision note | |
| if evaluation_results['no_significant_collision_with_obstacle_during_pass']: | |
| collision_note = "No impeding contacts" | |
| else: | |
| collision_note = "Contacts detected with obstacle" | |
| # Build failure reason | |
| failure_reason = evaluation_results['specific_failure_point'] if not evaluation_results['overall_success'] else 'N/A' | |
| feedback = ( | |
| f"Feedback: Obstacle passage attempt: {success_status}. " | |
| f"Crossed Obstacle: {evaluation_results['robot_crossed_obstacle']}. " | |
| f"Remained Upright: {evaluation_results['robot_remains_upright']}. " | |
| f"Collision Note: {collision_note}. " | |
| f"Failure reason: {failure_reason}. " | |
| f"Final X position: {evaluation_results['final_robot_x_position']:.2f}m." | |
| ) | |
| return feedback | |
| def format_feedback_for_llm_with_criteria(evaluation_results, llm_success_conditions=None): | |
| """Enhanced feedback format that includes LLM criteria evaluation""" | |
| # Start with standard feedback | |
| feedback = format_feedback_for_llm(evaluation_results) | |
| # Add criteria-specific feedback if available | |
| if llm_success_conditions and 'criteria_evaluation' in evaluation_results: | |
| feedback += "\n\nAssessment against your stated success conditions:" | |
| criteria_eval = evaluation_results['criteria_evaluation'] | |
| for condition in llm_success_conditions: | |
| status = criteria_eval.get(condition, "UNDETERMINED") | |
| if status == "ACHIEVED": | |
| feedback += f"\n✅ '{condition}': ACHIEVED" | |
| elif status == "PARTIALLY_ACHIEVED": | |
| feedback += f"\n⚠️ '{condition}': PARTIALLY_ACHIEVED" | |
| elif status == "FAILED": | |
| feedback += f"\n❌ '{condition}': FAILED" | |
| else: | |
| feedback += f"\n❓ '{condition}': UNDETERMINED (based on core simulation checks)" | |
| # Add interpretation guidance | |
| feedback += f"\n\nCore simulation metrics:" | |
| feedback += f"\n- Distance traveled: {evaluation_results['final_robot_x_position']:.3f}m (target: >0.8m)" | |
| feedback += f"\n- Obstacle crossed: {'Yes' if evaluation_results['robot_crossed_obstacle'] else 'No'}" | |
| feedback += f"\n- Remained stable: {'Yes' if evaluation_results['robot_remains_upright'] else 'No'}" | |
| feedback += f"\n- Clean pass: {'Yes' if evaluation_results['no_significant_collision_with_obstacle_during_pass'] else 'No'}" | |
| return feedback |