| | |
| | """ |
| | BuildwellAI Model V2 - Complete Dataset Preparation Script |
| | |
| | Combines all data sources: |
| | 1. BSI Flex 8670 Building Safety Competence Benchmark (CSV) |
| | 2. UK Building Control Competency Benchmark 2025 (CSV) |
| | 3. qa-buildwell-ai.csv (Q&A pairs) |
| | 4. dataset_thinking_mode.jsonl |
| | 5. dataset_tool_calling.jsonl |
| | 6. Auto-generated MCP training data for all 42 MCPs |
| | |
| | Output: Combined, validated, shuffled dataset ready for fine-tuning. |
| | """ |
| |
|
| | import os |
| | import sys |
| | import json |
| | import csv |
| | import random |
| | import hashlib |
| | from pathlib import Path |
| | from typing import List, Dict, Any |
| | from datetime import datetime |
| |
|
| | |
| | |
| | |
| |
|
| | BASE_DIR = Path("/opt/buildwellai/buildwellai-llm-models/buildwellai-model-v2") |
| | SOURCE_DIR = Path("/opt/buildwellai/buildwellai-llm-models/qwen3-14b-v2/datasets") |
| | OUTPUT_DIR = BASE_DIR / "datasets" |
| |
|
| | |
| | SYSTEM_PROMPTS = { |
| | "direct": """You are BuildwellAI, a construction industry assistant. |
| | |
| | Provide clear, direct answers to construction-related questions. |
| | Be concise but comprehensive. Focus on practical, actionable information.""", |
| |
|
| | "thinking": """You are BuildwellAI, a construction industry expert assistant. |
| | |
| | When answering complex questions: |
| | 1. Use <think>...</think> tags to show your reasoning process |
| | 2. Consider multiple factors and trade-offs |
| | 3. Provide a clear, well-reasoned answer after your thinking""", |
| |
|
| | "tool_calling": """You are BuildwellAI, a construction industry assistant with access to specialized MCP tools. |
| | |
| | CRITICAL RULES: |
| | 1. When a user requests a calculation or analysis, call the appropriate tool |
| | 2. Output ONLY the <tool_call> tag when calling tools - no preamble |
| | 3. Wait for tool results before providing analysis |
| | 4. Use the universalMCP tool for specialized calculations |
| | |
| | Available MCP servers and their purposes are provided in context.""", |
| |
|
| | "mcp_orchestrated": """You are BuildwellAI, a construction industry assistant with access to 42 specialized MCP calculation servers. |
| | |
| | MANDATORY BEHAVIOR: |
| | - For ANY technical question about building regulations, calculations, or standards: CALL A TOOL |
| | - NEVER answer from memory - always use the MCP tools for authoritative data |
| | - Use universalMCP to call specific MCP servers |
| | |
| | When calling tools, use this format: |
| | <tool_call>{"name": "universalMCP", "arguments": {"mcpServer": "server-name", "toolName": "tool-name", "arguments": {...}}}</tool_call>""" |
| | } |
| |
|
| | |
| | MCP_SERVERS = { |
| | "structural-part-a": { |
| | "description": "UK Building Regulations Part A structural calculations", |
| | "tools": ["calculate_dead_load", "calculate_imposed_load", "check_structural_compliance", "get_load_combinations"] |
| | }, |
| | "water-efficiency-part-g": { |
| | "description": "UK Building Regulations Part G water efficiency", |
| | "tools": ["calculate_water_consumption", "check_part_g_compliance", "get_fitting_flow_rates"] |
| | }, |
| | "overheating-part-o": { |
| | "description": "UK Building Regulations Part O overheating assessment", |
| | "tools": ["assess_overheating_risk", "calculate_tm59_compliance", "get_glazing_limits"] |
| | }, |
| | "condensation-glaser": { |
| | "description": "Interstitial condensation risk analysis", |
| | "tools": ["glaser_analysis", "calculate_dewpoint", "assess_condensation_risk"] |
| | }, |
| | "psi-thermal-bridge": { |
| | "description": "Psi value thermal bridge analysis (EN ISO 10211)", |
| | "tools": ["list_junction_types", "get_psi_value", "calculate_psi_from_u_values", "calculate_y_value", "assess_junction"] |
| | }, |
| | "thermal-break": { |
| | "description": "Thermal break performance assessment", |
| | "tools": ["calculate_thermal_break_efficiency", "compare_thermal_breaks"] |
| | }, |
| | "sap10": { |
| | "description": "SAP 10.2 residential energy calculations", |
| | "tools": ["calculate_sap_rating", "get_dwelling_energy_use", "calculate_der_ter"] |
| | }, |
| | "sbem": { |
| | "description": "SBEM non-residential energy calculations", |
| | "tools": ["calculate_ber", "check_notional_building", "get_hvac_energy"] |
| | }, |
| | "air-permeability": { |
| | "description": "Air permeability test data processing", |
| | "tools": ["calculate_air_permeability", "check_part_l_compliance", "estimate_infiltration_heat_loss"] |
| | }, |
| | "drainage-swmm": { |
| | "description": "Drainage runoff calculations (SWMM methodology)", |
| | "tools": ["calculate_runoff", "design_pipe_network", "simulate_storm_event"] |
| | }, |
| | "suds": { |
| | "description": "SuDS flow and volume calculations (CIRIA C753)", |
| | "tools": ["design_suds", "calculate_attenuation", "check_discharge_rate"] |
| | }, |
| | "hydraulic-drainage": { |
| | "description": "Hydraulic drainage network analysis", |
| | "tools": ["analyze_drainage_network", "calculate_pipe_capacity", "check_gradients"] |
| | }, |
| | "embodied-carbon": { |
| | "description": "Embodied carbon assessment (ICE Database 2023)", |
| | "tools": ["calculate_embodied_carbon", "get_material_carbon_factors", "compare_materials"] |
| | }, |
| | "smoke-ventilation": { |
| | "description": "Smoke ventilation design (BS 7346, BS EN 12101)", |
| | "tools": ["design_smoke_shaft", "calculate_aov_size", "check_smoke_clearance"] |
| | }, |
| | "evacuation": { |
| | "description": "Evacuation time modelling (PD 7974-6)", |
| | "tools": ["calculate_evacuation_time", "model_egress_flow", "assess_phased_evacuation"] |
| | }, |
| | "disproportionate-collapse": { |
| | "description": "Disproportionate collapse assessment (EN 1991-1-7)", |
| | "tools": ["check_robustness_class", "calculate_tie_forces", "assess_key_elements"] |
| | }, |
| | "sound-insulation": { |
| | "description": "Sound insulation calculations (Approved Document E)", |
| | "tools": ["calculate_sound_reduction", "check_part_e_compliance", "estimate_flanking"] |
| | }, |
| | "fire-safety": { |
| | "description": "Fire safety assessment (Approved Document B)", |
| | "tools": ["calculate_travel_distance", "check_compartmentation", "assess_means_of_escape", "get_fire_resistance_requirements"] |
| | }, |
| | "flood-risk": { |
| | "description": "Flood risk assessment (NPPF, FEH)", |
| | "tools": ["calculate_flood_risk", "determine_flood_zone", "assess_sequential_test"] |
| | }, |
| | "biodiversity-net-gain": { |
| | "description": "Biodiversity net gain calculations (Environment Act 2021)", |
| | "tools": ["calculate_bng_units", "assess_habitat_condition", "check_10_percent_gain"] |
| | }, |
| | "daylight-factor": { |
| | "description": "Daylight factor calculations (BS 8206-2)", |
| | "tools": ["calculate_daylight_factor", "check_room_depth", "assess_no_sky_line"] |
| | }, |
| | "adf-modelling": { |
| | "description": "Average daylight factor modelling (BS EN 17037)", |
| | "tools": ["calculate_adf", "check_target_illuminance", "model_daylight_distribution"] |
| | }, |
| | "sunlight-overshadowing": { |
| | "description": "Sunlight/overshadowing analysis (BRE 209)", |
| | "tools": ["calculate_sunlight_hours", "assess_overshadowing", "check_bre_guidelines"] |
| | }, |
| | "cwct-facade": { |
| | "description": "CWCT facade performance testing", |
| | "tools": ["check_weather_tightness", "calculate_structural_capacity", "assess_thermal_movement"] |
| | }, |
| | "dynamic-thermal": { |
| | "description": "Dynamic thermal simulation", |
| | "tools": ["simulate_annual_energy", "calculate_peak_loads", "optimize_hvac_sizing"] |
| | }, |
| | "breeam": { |
| | "description": "BREEAM sustainability assessment", |
| | "tools": ["calculate_breeam_credits", "get_category_requirements", "assess_rating_path"] |
| | }, |
| | "well": { |
| | "description": "WELL Building Standard assessment", |
| | "tools": ["check_well_features", "calculate_optimization_credits", "assess_preconditions"] |
| | }, |
| | "passivhaus": { |
| | "description": "Passivhaus/PHPP calculations", |
| | "tools": ["calculate_heating_demand", "check_airtightness", "assess_phpp_balance", "get_standard_requirements"] |
| | }, |
| | "contaminated-land": { |
| | "description": "Contaminated land assessment (CLEA)", |
| | "tools": ["calculate_gac", "assess_risk_pathway", "determine_remediation"] |
| | }, |
| | "cfd-fire-smoke": { |
| | "description": "CFD fire/smoke simulation", |
| | "tools": ["simulate_smoke_spread", "calculate_visibility", "assess_tenability"] |
| | }, |
| | "wufi-hygrothermal": { |
| | "description": "WUFI hygrothermal analysis", |
| | "tools": ["simulate_moisture_transport", "assess_mould_risk", "check_interstitial_condensation"] |
| | }, |
| | "lca": { |
| | "description": "Life Cycle Assessment (EN 15978, RICS 2023)", |
| | "tools": ["calculate_whole_life_carbon", "get_lca_benchmarks", "compare_design_options"] |
| | }, |
| | "hydraulic-transport": { |
| | "description": "Hydraulic transport analysis", |
| | "tools": ["calculate_slurry_flow", "design_pneumatic_conveying", "assess_pipe_wear"] |
| | }, |
| | "daylighting": { |
| | "description": "Daylighting design and analysis", |
| | "tools": ["optimize_glazing_ratio", "calculate_solar_gain", "model_light_distribution"] |
| | }, |
| | "energy-modelling": { |
| | "description": "Building energy modelling", |
| | "tools": ["calculate_annual_energy", "optimize_fabric", "compare_hvac_options"] |
| | }, |
| | "indoor-air-quality": { |
| | "description": "Indoor air quality assessment", |
| | "tools": ["calculate_ventilation_rate", "assess_co2_levels", "check_pollutant_concentrations"] |
| | }, |
| | "pedestrian-comfort": { |
| | "description": "Pedestrian comfort analysis (Lawson criteria)", |
| | "tools": ["assess_wind_comfort", "check_lawson_criteria", "identify_mitigation"] |
| | }, |
| | "wind-microclimate": { |
| | "description": "Wind microclimate assessment", |
| | "tools": ["model_wind_patterns", "calculate_amplification", "assess_safety"] |
| | }, |
| | "vibration": { |
| | "description": "Ground-borne vibration assessment (BS 6472-1)", |
| | "tools": ["calculate_vdv", "assess_human_perception", "check_building_damage_threshold"] |
| | }, |
| | "swept-path": { |
| | "description": "Vehicle swept path analysis", |
| | "tools": ["calculate_swept_path", "check_vehicle_clearance", "optimize_turning_radius"] |
| | }, |
| | "carbon-offset": { |
| | "description": "Carbon offset calculations", |
| | "tools": ["calculate_offset_requirement", "estimate_offset_cost", "compare_offset_options"] |
| | }, |
| | "ventilation-part-f": { |
| | "description": "UK Building Regulations Part F ventilation compliance", |
| | "tools": ["calculate_ventilation_rates", "check_part_f_compliance", "design_mev_system"] |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| |
|
| | def generate_mcp_training_examples() -> List[Dict]: |
| | """Generate training examples for all 42 MCP servers with realistic prompts.""" |
| | examples = [] |
| |
|
| | |
| | MCP_PROMPTS = { |
| | "psi-thermal-bridge": [ |
| | "What are the SAP Appendix K junction types?", |
| | "List all the thermal bridge junction codes", |
| | "What's the PSI value for junction E5?", |
| | "I need the default PSI value for a ground floor junction", |
| | "Calculate the Y-value for my dwelling with these junctions", |
| | "What PSI value should I use for a steel lintel?", |
| | "Show me all the external wall junction types", |
| | "What's the temperature factor for junction E1?", |
| | "I have a balcony thermal bridge - what junction code is that?", |
| | "What's the difference between E5 and E19 junctions?", |
| | "Calculate total heat loss from thermal bridges", |
| | "What PSI values apply to party wall junctions?", |
| | "I need to assess thermal bridging at the roof-wall junction", |
| | "What's the enhanced construction detail PSI for E10?", |
| | "List roof junction types R1 to R11", |
| | ], |
| | "sap10": [ |
| | "Calculate the SAP rating for a 3-bed semi-detached house", |
| | "What's the target emission rate for my new dwelling?", |
| | "I need to run a SAP 10.2 calculation", |
| | "What's the dwelling emission rate for this property?", |
| | "Calculate primary energy consumption for the dwelling", |
| | "Will this house achieve EPC band B?", |
| | "What's the fabric energy efficiency for my design?", |
| | "I need to check Part L compliance for a new build", |
| | "Calculate space heating demand", |
| | "What's the carbon emission rate?", |
| | "Run SAP assessment for a 2-bed flat", |
| | "What hot water energy consumption should I expect?", |
| | "Calculate the notional dwelling values", |
| | "What improvements would get this to EPC A?", |
| | "Assess the dwelling against Part L 2021", |
| | ], |
| | "breeam": [ |
| | "What BREEAM credits can I achieve for Ene 01?", |
| | "Calculate BREEAM score for this office building", |
| | "What are the mandatory credits for BREEAM Excellent?", |
| | "How many Wat 01 credits for 40% water reduction?", |
| | "What's required for BREEAM Outstanding?", |
| | "List all credits in the Energy category", |
| | "What exemplary credits are available?", |
| | "Calculate minimum standards for BREEAM Very Good", |
| | "What Hea 02 credits for daylight compliance?", |
| | "I need to hit BREEAM Excellent - what's the pathway?", |
| | "What Mat 01 credits for responsible sourcing?", |
| | "Calculate Land Use and Ecology credits", |
| | "What innovation credits can we claim?", |
| | "Assess transport accessibility credits", |
| | "What Pol 03 credits for flood risk?", |
| | ], |
| | "fire-safety": [ |
| | "What's the maximum travel distance for an open plan office?", |
| | "Calculate escape route capacity for 500 occupants", |
| | "What fire resistance is required for a 12-storey building?", |
| | "Is a sprinkler system required for this building?", |
| | "What compartment sizes apply to this retail unit?", |
| | "Calculate the number of escape stairs needed", |
| | "What's the minimum door width for 200 people?", |
| | "Does this care home need a stay-put strategy?", |
| | "What fire stopping is required at service penetrations?", |
| | "Calculate fire engineering parameters for BS 9999", |
| | "What protected stairway width do I need?", |
| | "Is a firefighting shaft required?", |
| | "What external wall fire spread requirements apply?", |
| | "Calculate occupancy for this restaurant", |
| | "What emergency lighting is required?", |
| | ], |
| | "passivhaus": [ |
| | "What's the heating demand limit for Passivhaus Classic?", |
| | "Calculate airtightness requirement for certification", |
| | "What's the primary energy demand limit?", |
| | "Does this design meet Passivhaus Plus criteria?", |
| | "What window U-values are needed for Passivhaus?", |
| | "Calculate the thermal bridge free design requirement", |
| | "What MVHR efficiency is required?", |
| | "Check if overheating criteria are met", |
| | "What's the specific space heating demand?", |
| | "Calculate renewable energy generation requirements", |
| | "What's the maximum heating load?", |
| | "Does this meet PHI Low Energy Building standard?", |
| | "What treated floor area should I use?", |
| | "Calculate the frequency of overheating", |
| | "What Passivhaus windows are suitable for this climate?", |
| | ], |
| | "embodied-carbon": [ |
| | "Calculate embodied carbon for this concrete frame", |
| | "What's the carbon factor for structural steel?", |
| | "Compare embodied carbon of CLT vs concrete", |
| | "What's the A1-A3 carbon for this building?", |
| | "Calculate whole life carbon including B and C stages", |
| | "What carbon savings from using recycled aggregate?", |
| | "What's the embodied carbon benchmark for offices?", |
| | "Calculate carbon for foundation concrete", |
| | "Compare brick vs timber frame carbon impact", |
| | "What's the GWP for mineral wool insulation?", |
| | "Calculate carbon sequestration for timber structure", |
| | "What RICS benchmarks apply to residential?", |
| | "What's the upfront carbon for this design?", |
| | "Calculate carbon intensity per square metre", |
| | "What are the LETI 2030 targets?", |
| | ], |
| | "condensation-glaser": [ |
| | "Run a Glaser condensation risk assessment", |
| | "Is there interstitial condensation risk in this wall?", |
| | "Check dewpoint temperature through the construction", |
| | "Will moisture accumulate in the insulation layer?", |
| | "Calculate vapour pressure through the wall buildup", |
| | "Is a vapour barrier needed for this roof?", |
| | "What's the condensation risk for a cold bridged detail?", |
| | "Check internal surface condensation risk", |
| | "Calculate critical surface humidity", |
| | "What's the monthly moisture balance?", |
| | "Is there mould growth risk on internal surfaces?", |
| | "Check timber frame wall for condensation", |
| | "What ventilated cavity width prevents condensation?", |
| | "Calculate interstitial humidity levels", |
| | "Assess warm roof vs cold roof condensation risk", |
| | ], |
| | "overheating-part-o": [ |
| | "Check Part O compliance for this bedroom", |
| | "Calculate overheating risk using TM59", |
| | "Does the living room pass the overheating criteria?", |
| | "What glazing ratio is allowed facing south-west?", |
| | "Calculate hours above 26°C threshold", |
| | "Is cross-ventilation sufficient for Part O?", |
| | "What shading is needed to pass overheating?", |
| | "Run TM52 assessment for this office", |
| | "Calculate adaptive comfort temperature", |
| | "What percentage of occupied hours exceed threshold?", |
| | "Check if night-time ventilation is adequate", |
| | "What g-value glazing is needed?", |
| | "Calculate bedroom overheating for 32°C night", |
| | "Does this flat need mechanical cooling?", |
| | "Assess overheating for a top floor flat", |
| | ], |
| | "structural-part-a": [ |
| | "Calculate dead and imposed loads for this floor", |
| | "What load combination factors apply?", |
| | "Check beam capacity for this loading", |
| | "Calculate foundation bearing pressure", |
| | "What's the maximum span for 47x200 joists at 400 centres?", |
| | "Check deflection limits for this beam", |
| | "Calculate wind load on the gable wall", |
| | "What steel section is needed for 6m span?", |
| | "Check Part A compliance for this structure", |
| | "Calculate load transfer through the structure", |
| | "What's the shear capacity of this connection?", |
| | "Check lateral stability requirements", |
| | "Calculate roof load including snow", |
| | "What padstone size is needed under this beam?", |
| | "Check wall stability for this height", |
| | ], |
| | "lca": [ |
| | "Run whole life carbon assessment", |
| | "Calculate lifecycle carbon stages A-C", |
| | "What's the operational energy over 60 years?", |
| | "Compare design options for lifecycle impact", |
| | "What module D benefits apply?", |
| | "Calculate end of life carbon", |
| | "What's the use stage carbon for this building?", |
| | "Apply EN 15978 assessment methodology", |
| | "Calculate replacement cycles for cladding", |
| | "What's the reference study period?", |
| | "Compare lifecycle cost and carbon", |
| | "What RICS 2023 methodology should I use?", |
| | "Calculate maintenance phase carbon", |
| | "What system boundaries apply?", |
| | "Assess refurbishment vs new build LCA", |
| | ], |
| | "sound-insulation": [ |
| | "What Rw is needed between these flats?", |
| | "Calculate airborne sound insulation", |
| | "Does this wall meet Part E requirements?", |
| | "What's the DnT,w requirement for this dwelling?", |
| | "Calculate impact sound insulation for floor", |
| | "What L'nT,w should the floor achieve?", |
| | "Check flanking transmission paths", |
| | "What's the sound reduction index for this partition?", |
| | "Calculate pre-completion testing requirements", |
| | "Does this separating wall meet Robust Details?", |
| | "What acoustic performance for party floor?", |
| | "Calculate Ctr correction for traffic noise", |
| | "What internal wall sound insulation is needed?", |
| | "Check dwelling entrance door acoustic rating", |
| | "What ceiling treatment improves impact sound?", |
| | ], |
| | "daylight-factor": [ |
| | "Calculate average daylight factor for this room", |
| | "Does the room meet 2% daylight factor?", |
| | "What glazing ratio for adequate daylight?", |
| | "Check no-sky line position in the room", |
| | "Calculate daylight uniformity ratio", |
| | "What room depth is acceptable for daylight?", |
| | "Check BS EN 17037 target illuminance", |
| | "Calculate daylight factor at back of room", |
| | "What external obstruction angle affects daylight?", |
| | "Does this classroom meet BB101 daylight?", |
| | "Calculate minimum daylight to kitchen", |
| | "What ADF is required for living room?", |
| | "Check daylight distribution across floor plate", |
| | "What glazing transmittance for 2% DF?", |
| | "Calculate supplementary electric lighting need", |
| | ], |
| | "smoke-ventilation": [ |
| | "Design smoke shaft for this residential building", |
| | "Calculate AOV size for this lobby", |
| | "What smoke clearance rate is required?", |
| | "Size the natural smoke vent for this stair", |
| | "What mechanical extract rate for car park?", |
| | "Calculate smoke reservoir depth", |
| | "What inlet air is needed for balanced design?", |
| | "Check BS EN 12101-6 design parameters", |
| | "Size smoke extract fan capacity", |
| | "What free area is needed for smoke ventilation?", |
| | "Calculate pressure differential for lobby", |
| | "Design depressurisation system for firefighting stair", |
| | "What smoke control for single escape stair?", |
| | "Calculate make-up air velocity", |
| | "Size emergency ventilation shaft", |
| | ], |
| | "evacuation": [ |
| | "Calculate total evacuation time for this building", |
| | "What's the flow rate through this exit?", |
| | "Model phased evacuation for 20-storey building", |
| | "Calculate pre-movement time for office", |
| | "What's the merging flow at the final exit?", |
| | "Check stair capacity for simultaneous evacuation", |
| | "Calculate required safe egress time", |
| | "What's the available safe egress time?", |
| | "Model evacuation for care home residents", |
| | "Calculate queue formation at exit doors", |
| | "What egress time for mobility impaired?", |
| | "Check refuge area sizing", |
| | "Calculate evacuation with phased alarm", |
| | "What's the walking speed on stairs?", |
| | "Model evacuation with one exit blocked", |
| | ], |
| | "water-efficiency-part-g": [ |
| | "Calculate water consumption per person per day", |
| | "Does this dwelling meet 125 litres/person/day?", |
| | "What's the optional water target of 110 lpppd?", |
| | "Calculate water use for dual flush WC", |
| | "What flow rate for Part G compliant taps?", |
| | "Check fittings against water calculator", |
| | "What shower flow rate meets the target?", |
| | "Calculate dwelling water consumption", |
| | "What water fittings achieve 110 target?", |
| | "Run Part G water efficiency calculation", |
| | "What bath size for water compliance?", |
| | "Calculate greywater recycling benefit", |
| | "What rainwater harvesting credits apply?", |
| | "Check washing machine water consumption", |
| | "Calculate potable water demand", |
| | ], |
| | "ventilation-part-f": [ |
| | "Calculate whole dwelling ventilation rate", |
| | "What extract rates for kitchen and bathroom?", |
| | "Does this ventilation strategy meet Part F?", |
| | "Size the MVHR unit for this dwelling", |
| | "What background ventilator size is needed?", |
| | "Calculate minimum purge ventilation", |
| | "What continuous extract for System 3?", |
| | "Check trickle ventilator equivalent area", |
| | "What's the air supply rate per person?", |
| | "Size extract fan for utility room", |
| | "Calculate Part F for open plan kitchen", |
| | "What crossflow ventilation opening size?", |
| | "Check passive stack ventilation sizing", |
| | "What boost extract rate for cooker hood?", |
| | "Calculate MEV system airflow rates", |
| | ], |
| | "air-permeability": [ |
| | "What air permeability target for Part L?", |
| | "Calculate heat loss from infiltration", |
| | "What's the design air permeability?", |
| | "Process air test results at 50Pa", |
| | "What air tightness for Passivhaus?", |
| | "Calculate infiltration heat loss coefficient", |
| | "What backstop air permeability applies?", |
| | "Check pulse test results", |
| | "What's the equivalent leakage area?", |
| | "Calculate annual infiltration heat loss", |
| | "What improvements reduce air leakage?", |
| | "Process blower door test data", |
| | "What ACH50 is required?", |
| | "Calculate shelter factor effect", |
| | "What air barrier strategy is recommended?", |
| | ], |
| | "biodiversity-net-gain": [ |
| | "Calculate baseline biodiversity units", |
| | "What's the 10% BNG requirement?", |
| | "Calculate habitat condition score", |
| | "What strategic significance multiplier applies?", |
| | "Run biodiversity metric 4.0 calculation", |
| | "What hedgerow units are on site?", |
| | "Calculate off-site offset requirement", |
| | "What habitat creation achieves 10% gain?", |
| | "Check distinctiveness score for grassland", |
| | "Calculate river units for watercourse", |
| | "What temporal multiplier for new habitat?", |
| | "Assess difficulty of habitat creation", |
| | "What's the habitat trading rules?", |
| | "Calculate net change in biodiversity", |
| | "What management plan is required?", |
| | ], |
| | "flood-risk": [ |
| | "What flood zone is this site in?", |
| | "Is this development appropriate in Flood Zone 3?", |
| | "Calculate finished floor level for flood risk", |
| | "What sequential test applies?", |
| | "Run exception test for vulnerable development", |
| | "What flood resilience measures are needed?", |
| | "Calculate 1% AEP flood level", |
| | "What climate change allowance applies?", |
| | "Check EA flood map data", |
| | "What's the surface water flood risk?", |
| | "Calculate freeboard requirement", |
| | "What FRA scope for major development?", |
| | "Is flood compensation storage required?", |
| | "What groundwater flood risk exists?", |
| | "Calculate flood storage loss and compensation", |
| | ], |
| | "suds": [ |
| | "Design attenuation tank for 1 in 100 year storm", |
| | "Calculate greenfield runoff rate", |
| | "What SuDS features achieve 4 pillars?", |
| | "Size permeable paving for car park", |
| | "Calculate swale dimensions", |
| | "What detention basin volume is needed?", |
| | "Design rain garden for treatment", |
| | "Calculate long term storage requirement", |
| | "What interception storage is required?", |
| | "Size filter drain for access road", |
| | "What SuDS treatment train is needed?", |
| | "Calculate half drain time", |
| | "Design bioretention system", |
| | "What infiltration rate should I use?", |
| | "Calculate discharge rate to watercourse", |
| | ], |
| | "well": [ |
| | "What WELL features meet Air concept?", |
| | "Calculate WELL certification pathway", |
| | "What preconditions are mandatory?", |
| | "How many optimizations for WELL Gold?", |
| | "Check WELL v2 Light requirements", |
| | "What Nourishment features apply to office?", |
| | "Calculate WELL points for this project", |
| | "What's required for WELL Platinum?", |
| | "Check Movement concept requirements", |
| | "What Thermal Comfort features needed?", |
| | "Calculate Sound concept compliance", |
| | "What Materials preconditions apply?", |
| | "Check Mind concept features", |
| | "What Community features for workplace?", |
| | "Calculate Innovation points available", |
| | ], |
| | "disproportionate-collapse": [ |
| | "What robustness class is this building?", |
| | "Calculate horizontal tie forces", |
| | "Check notional element removal", |
| | "What key element design is required?", |
| | "Calculate vertical tie requirements", |
| | "Is the building Class 2B?", |
| | "What peripheral ties are needed?", |
| | "Check alternative load path", |
| | "Calculate internal tie forces", |
| | "What corner column ties required?", |
| | "Design for accidental loading", |
| | "Check Eurocode 1991-1-7 requirements", |
| | "What is the tie force for 7.5m grid?", |
| | "Calculate column removal scenario", |
| | "What enhanced design for Class 3?", |
| | ], |
| | "dynamic-thermal": [ |
| | "Run annual energy simulation", |
| | "Calculate peak heating load", |
| | "What cooling load for this office?", |
| | "Model fabric heat loss", |
| | "Calculate solar gains through glazing", |
| | "What HVAC sizing from simulation?", |
| | "Run IES model for energy", |
| | "Calculate thermal mass effect", |
| | "What's the annual heating demand?", |
| | "Model natural ventilation strategy", |
| | "Calculate comfort hours", |
| | "What plant capacity is needed?", |
| | "Run summer overheating simulation", |
| | "Calculate energy use intensity", |
| | "Model mixed-mode ventilation", |
| | ], |
| | "sbem": [ |
| | "Calculate BER for this office building", |
| | "What's the target emission rate for SBEM?", |
| | "Run Part L2A compliance check", |
| | "Calculate notional building values", |
| | "What's the primary energy from SBEM?", |
| | "Check modular building approach", |
| | "Calculate heating and cooling loads", |
| | "What EPC rating will this achieve?", |
| | "Run SBEM for shell and core", |
| | "What improvement factor is needed?", |
| | "Calculate actual building emission rate", |
| | "Check HVAC system efficiencies", |
| | "What lighting efficacy for compliance?", |
| | "Calculate renewable contribution", |
| | "Run Part L2B assessment for extension", |
| | ], |
| | "thermal-break": [ |
| | "Calculate thermal break efficiency for steel beam", |
| | "What thermal break is needed for a balcony connection?", |
| | "Compare Schock Isokorb vs generic thermal break", |
| | "What temperature factor does this thermal break achieve?", |
| | "Calculate heat loss reduction from thermal break", |
| | "What PSI value improvement from thermal break?", |
| | "Size thermal break for cantilevered balcony", |
| | "What thermal break for steel to concrete connection?", |
| | "Calculate fRsi at thermal break location", |
| | "What structural capacity for Isokorb KXT?", |
| | "Compare thermal break performance ratings", |
| | "What thermal break for parapet connection?", |
| | "Calculate linear thermal transmittance with break", |
| | "What thermal break depth is required?", |
| | "Assess condensation risk at thermal break", |
| | ], |
| | "drainage-swmm": [ |
| | "Calculate peak runoff for 1:100 year storm", |
| | "Model drainage network capacity", |
| | "What pipe size for this catchment?", |
| | "Simulate storm event in SWMM", |
| | "Calculate time of concentration", |
| | "Design pipe network for development", |
| | "What's the runoff coefficient for this site?", |
| | "Model combined sewer overflow", |
| | "Calculate hydraulic grade line", |
| | "What storage is needed for attenuation?", |
| | "Simulate 30-year storm event", |
| | "Check pipe capacity under climate change", |
| | "Calculate critical storm duration", |
| | "Model surcharge in network", |
| | "Design outfall to watercourse", |
| | ], |
| | "hydraulic-drainage": [ |
| | "Analyze drainage network hydraulics", |
| | "Check pipe gradient for self-cleansing", |
| | "Calculate pipe capacity for foul drainage", |
| | "What flow velocity in this pipe?", |
| | "Design gravity drainage system", |
| | "Check hydraulic capacity of existing drain", |
| | "Calculate Manning's equation for pipe flow", |
| | "What pipe diameter for 2 l/s flow?", |
| | "Assess gradient for 100mm pipe", |
| | "Calculate roughness coefficient", |
| | "Design pumped drainage system", |
| | "Check ventilation for drainage system", |
| | "Calculate design flow from dwellings", |
| | "What fall is needed for 150mm drain?", |
| | "Assess surcharge capacity", |
| | ], |
| | "adf-modelling": [ |
| | "Calculate average daylight factor for office", |
| | "What ADF for living room compliance?", |
| | "Model daylight distribution across floorplate", |
| | "Does the room meet BS EN 17037 targets?", |
| | "Calculate uniformity ratio for daylight", |
| | "What glazing area for 2% ADF?", |
| | "Model combined sky and sun exposure", |
| | "Calculate median daylight illuminance", |
| | "What target ADF for kitchen?", |
| | "Assess daylight with external obstruction", |
| | "Calculate supplementary lighting need", |
| | "Model daylight for open plan office", |
| | "What room depth for adequate daylight?", |
| | "Calculate ADF at work plane height", |
| | "Assess glare probability", |
| | ], |
| | "sunlight-overshadowing": [ |
| | "Calculate annual probable sunlight hours", |
| | "Does the garden get BRE 209 compliant sunlight?", |
| | "Assess overshadowing from proposed building", |
| | "What's the 25% APSH for south-facing window?", |
| | "Calculate winter sunlight (Dec-Feb)", |
| | "Model shadow path diagram", |
| | "Assess sunlight to neighbouring property", |
| | "Calculate sun on ground at spring equinox", |
| | "What overshadowing impact on amenity space?", |
| | "Check 2-hour sun on March 21", |
| | "Model transient overshadowing", |
| | "Calculate loss of sunlight to existing building", |
| | "What APSH reduction is acceptable?", |
| | "Assess sunlight access to playground", |
| | "Model sun hours for rooftop amenity", |
| | ], |
| | "cwct-facade": [ |
| | "Check CWCT weather tightness classification", |
| | "Calculate facade wind load capacity", |
| | "What mullion deflection under wind?", |
| | "Assess thermal movement in cladding", |
| | "Design glass size for wind pressure", |
| | "Check structural silicone joint capacity", |
| | "Calculate bracket spacing for facade", |
| | "What tolerance for facade installation?", |
| | "Assess impact resistance requirements", |
| | "Design facade for CWCT test sequence A", |
| | "Calculate air permeability for curtain wall", |
| | "What water penetration resistance?", |
| | "Check facade for fire spread requirements", |
| | "Calculate thermal bridge at transom", |
| | "Assess dynamic wind load on facade", |
| | ], |
| | "contaminated-land": [ |
| | "Calculate generic assessment criteria", |
| | "What soil guideline values apply?", |
| | "Assess risk pathway for residential use", |
| | "Determine CLEA screening level", |
| | "What remediation target for lead?", |
| | "Calculate human health risk assessment", |
| | "Check groundwater protection requirements", |
| | "What asbestos screening level?", |
| | "Assess vapour intrusion pathway", |
| | "Calculate averaging area for sampling", |
| | "What clean cover depth required?", |
| | "Determine Category 4 screening level", |
| | "Assess source-pathway-receptor linkage", |
| | "What remediation verification needed?", |
| | "Calculate bioaccessibility factor", |
| | ], |
| | "cfd-fire-smoke": [ |
| | "Model smoke spread in atrium", |
| | "Calculate visibility during fire scenario", |
| | "Simulate tenability conditions", |
| | "What smoke layer height over time?", |
| | "Model smoke extraction effectiveness", |
| | "Calculate temperature at head height", |
| | "Simulate fire growth rate impact", |
| | "What make-up air velocity needed?", |
| | "Model smoke flow through opening", |
| | "Calculate carbon monoxide concentration", |
| | "Assess safe escape conditions", |
| | "Simulate sprinkler activation effect", |
| | "What smoke density at 2m height?", |
| | "Model smoke movement in car park", |
| | "Calculate buoyancy-driven flow", |
| | ], |
| | "wufi-hygrothermal": [ |
| | "Simulate moisture transport in wall", |
| | "Assess mould growth risk with WUFI", |
| | "Check interstitial condensation over year", |
| | "What relative humidity in insulation?", |
| | "Model timber moisture content cycle", |
| | "Calculate drying potential of wall", |
| | "Simulate vapour barrier performance", |
| | "What moisture accumulation over 5 years?", |
| | "Assess warm side vapour control", |
| | "Model rain-driven moisture ingress", |
| | "Calculate hygrothermal performance", |
| | "What critical RH threshold for mould?", |
| | "Simulate renovation wall performance", |
| | "Assess breathable membrane effectiveness", |
| | "Model cold bridge moisture risk", |
| | ], |
| | "hydraulic-transport": [ |
| | "Calculate slurry flow in pipe", |
| | "Design pneumatic conveying system", |
| | "What settling velocity for particles?", |
| | "Assess pipe wear from abrasive material", |
| | "Calculate critical deposition velocity", |
| | "Design vacuum transport system", |
| | "What pressure drop in slurry pipe?", |
| | "Assess solids concentration limit", |
| | "Calculate hindered settling velocity", |
| | "Design dense phase conveying", |
| | "What pipe size for sand slurry?", |
| | "Assess erosion rate in bend", |
| | "Calculate transport velocity for coal", |
| | "Design plug flow conveying", |
| | "What air velocity for pneumatic transport?", |
| | ], |
| | "daylighting": [ |
| | "Optimize glazing ratio for daylight", |
| | "Calculate solar gain coefficient", |
| | "Design light shelf for deeper daylight", |
| | "What window size for adequate light?", |
| | "Model reflectance for daylight distribution", |
| | "Calculate daylight penetration depth", |
| | "Design skylight for top lighting", |
| | "What glazing VLT for daylight?", |
| | "Model combined side and top lighting", |
| | "Calculate view factor to sky", |
| | "Design clerestory for even light", |
| | "What external shading for solar control?", |
| | "Model light pipe for windowless room", |
| | "Calculate useful daylight illuminance", |
| | "Design atrium for daylight distribution", |
| | ], |
| | "energy-modelling": [ |
| | "Calculate annual heating energy demand", |
| | "Model building energy consumption", |
| | "What fabric improvements reduce energy?", |
| | "Compare HVAC system energy use", |
| | "Calculate energy use intensity", |
| | "Model renewable energy contribution", |
| | "What baseload energy for building?", |
| | "Calculate peak demand for sizing", |
| | "Model occupancy impact on energy", |
| | "What U-value changes affect energy most?", |
| | "Calculate operational carbon from energy", |
| | "Model heat pump COP impact", |
| | "What lighting energy for office?", |
| | "Calculate hot water energy demand", |
| | "Model plug load energy consumption", |
| | ], |
| | "indoor-air-quality": [ |
| | "Calculate ventilation rate for IAQ", |
| | "What CO2 level indicates good IAQ?", |
| | "Assess PM2.5 filtration requirement", |
| | "Check pollutant concentration limits", |
| | "Calculate air changes for fresh air", |
| | "What VOC level is acceptable?", |
| | "Design filtration for urban location", |
| | "Calculate formaldehyde from materials", |
| | "What outside air rate per person?", |
| | "Assess radon mitigation requirement", |
| | "Calculate pollutant source strength", |
| | "What HEPA filtration for healthcare?", |
| | "Design demand-controlled ventilation", |
| | "Calculate NOx ingress from traffic", |
| | "What air cleaning for school?", |
| | ], |
| | "pedestrian-comfort": [ |
| | "Assess wind comfort at ground level", |
| | "Check Lawson sitting comfort criteria", |
| | "What wind mitigation for entrance?", |
| | "Calculate pedestrian safety threshold", |
| | "Design wind screen for seating area", |
| | "Check comfort for outdoor dining", |
| | "What wind conditions for thoroughfare?", |
| | "Assess corner acceleration effect", |
| | "Calculate downdraft from tall building", |
| | "Design canopy for wind protection", |
| | "Check comfort for balcony space", |
| | "What wind criteria for playground?", |
| | "Assess venturi effect between buildings", |
| | "Calculate gust equivalent mean", |
| | "Design landscaping for wind mitigation", |
| | ], |
| | "wind-microclimate": [ |
| | "Model wind patterns around building", |
| | "Calculate amplification factor for wind", |
| | "Assess pedestrian safety for high rise", |
| | "What wind acceleration between towers?", |
| | "Model corner effect wind speed", |
| | "Calculate wind comfort probability", |
| | "Assess channelling effect in street", |
| | "What wind conditions at podium level?", |
| | "Model downwash from building", |
| | "Calculate reference wind speed", |
| | "Assess wind for rooftop amenity", |
| | "What direction dominates wind?", |
| | "Model mitigation with porous screen", |
| | "Calculate wind comfort hours", |
| | "Assess existing conditions baseline", |
| | ], |
| | "vibration": [ |
| | "Calculate vibration dose value", |
| | "Assess human perception of vibration", |
| | "Check building damage from vibration", |
| | "What VDV limit for residential?", |
| | "Calculate vibration from railway", |
| | "Design vibration isolation for plant", |
| | "What PPV threshold for listed building?", |
| | "Assess groundborne vibration path", |
| | "Calculate vibration transfer to structure", |
| | "Design floating floor for vibration", |
| | "What vibration criteria for hospital?", |
| | "Assess piling vibration impact", |
| | "Calculate vibration from road traffic", |
| | "Design resilient mounting for equipment", |
| | "What vibration isolation efficiency?", |
| | ], |
| | "swept-path": [ |
| | "Calculate swept path for fire tender", |
| | "Design turning circle for refuse vehicle", |
| | "What clearance for articulated lorry?", |
| | "Check swept path for car park", |
| | "Design entrance for large delivery vehicle", |
| | "Calculate tracking width on bend", |
| | "What corner radius for bus?", |
| | "Assess reversing manoeuvre space", |
| | "Design loading bay access", |
| | "Calculate tail swing clearance", |
| | "What geometry for coach drop-off?", |
| | "Assess fire appliance access route", |
| | "Calculate swept path for ambulance", |
| | "Design turning head for cul-de-sac", |
| | "What dimensions for HGV access?", |
| | ], |
| | "carbon-offset": [ |
| | "Calculate carbon offset requirement", |
| | "What offset cost per tonne CO2?", |
| | "Compare offset options for development", |
| | "Design carbon reduction before offset", |
| | "What residual carbon needs offsetting?", |
| | "Calculate offset fund contribution", |
| | "Check local authority offset rate", |
| | "What carbon payback period?", |
| | "Calculate net zero carbon pathway", |
| | "Design to minimize offset requirement", |
| | "What quality criteria for offsets?", |
| | "Calculate annual offset requirement", |
| | "Design renewable energy for offset credit", |
| | "What additionality requirement?", |
| | "Calculate lifetime carbon for offset", |
| | ], |
| | } |
| |
|
| | |
| | generic_prompts = [ |
| | "Calculate {metric} for {context}", |
| | "What's the {metric} for {context}?", |
| | "I need to determine {metric} for {context}", |
| | "Check {topic} compliance for {context}", |
| | "Assess {topic} for {context}", |
| | "Run {topic} analysis on {context}", |
| | "What are the requirements for {context}?", |
| | "Help me with {topic} calculation for {context}", |
| | "Can you analyze {topic} for {context}?", |
| | "I need a {topic} assessment for {context}", |
| | "What {topic} data do you have for {context}?", |
| | "Perform {topic} check on {context}", |
| | "Calculate the {metric} requirements", |
| | "What standards apply to {context}?", |
| | "Evaluate {context} for {topic}", |
| | ] |
| |
|
| | |
| | contexts = { |
| | "structural": [ |
| | "a 6m steel beam", "a residential floor slab", "a load-bearing wall", "a roof truss", |
| | "a concrete column", "a timber joist", "a steel portal frame", "a reinforced concrete beam", |
| | "a masonry wall", "a cantilever balcony", "a basement retaining wall", "a floor diaphragm", |
| | "a steel connection", "a precast panel", "a composite slab", "a foundation pad" |
| | ], |
| | "thermal": [ |
| | "a cavity wall", "a flat roof", "a ground floor slab", "a timber frame wall", |
| | "a solid wall with external insulation", "a warm roof construction", "a cold roof", |
| | "a party wall", "a window reveal", "a door threshold", "a floor-wall junction", |
| | "an insulated concrete form wall", "a curtain wall system", "a roof-wall junction", |
| | "a steel lintel detail", "a balcony thermal break" |
| | ], |
| | "energy": [ |
| | "a 3-bedroom dwelling", "a 500m2 office", "a school building", "a retail unit", |
| | "a 2-bed flat", "a 4-bed detached house", "a care home", "a student accommodation", |
| | "a warehouse", "a restaurant", "a leisure centre", "a hospital ward", |
| | "a hotel", "a data centre", "a laboratory", "a community centre" |
| | ], |
| | "fire": [ |
| | "an 8-storey residential building", "an open-plan office", "a shopping centre", "a hotel", |
| | "a care home", "a school", "a hospital", "a warehouse", "a mixed-use development", |
| | "a basement car park", "a high-rise apartment block", "a student accommodation", |
| | "a cinema", "a nightclub", "a sports hall", "an underground station" |
| | ], |
| | "sustainability": [ |
| | "a new office development", "a residential scheme", "a mixed-use building", |
| | "a school refurbishment", "a hospital extension", "a retail park", "a logistics hub", |
| | "a university campus", "an affordable housing project", "a commercial fit-out", |
| | "a heritage conversion", "a net-zero housing development", "a science park" |
| | ], |
| | "drainage": [ |
| | "a 2-hectare site", "a commercial car park", "a housing development", |
| | "a school playground", "an industrial estate", "a retail car park", |
| | "a highway drainage scheme", "a sports field", "a green roof system", |
| | "a permeable paving area", "a flood attenuation basin", "a soakaway" |
| | ], |
| | "daylight": [ |
| | "a deep plan office", "a residential living room", "a classroom", |
| | "a hospital ward", "an atrium space", "a basement flat", "a north-facing room", |
| | "an open plan kitchen-diner", "a bedroom", "a retail showroom" |
| | ], |
| | "acoustics": [ |
| | "a party wall between dwellings", "a floor separating flats", "an internal partition", |
| | "a classroom wall", "a hotel room", "a recording studio", "a conference room", |
| | "a residential development near a railway", "an office near a busy road" |
| | ] |
| | } |
| |
|
| | |
| | regulations = [ |
| | "Building Regulations Part A", "Building Regulations Part B", "Building Regulations Part E", |
| | "Building Regulations Part F", "Building Regulations Part G", "Building Regulations Part L", |
| | "Building Regulations Part M", "Building Regulations Part O", "Building Regulations Part S", |
| | "BS 9999", "BS 5950", "Eurocode 2", "Eurocode 3", "Approved Document B", |
| | "BREEAM requirements", "Passivhaus standards", "WELL certification", "Part L 2021", |
| | "SAP 10.2", "SBEM compliance", "TM59 overheating criteria", "BRE 209 guidelines" |
| | ] |
| |
|
| | |
| | metrics = { |
| | "structural": ["dead load", "imposed load", "ultimate load", "serviceability deflection", "shear capacity", "bending moment"], |
| | "thermal": ["U-value", "PSI value", "Y-value", "thermal resistance", "condensation risk", "temperature factor"], |
| | "energy": ["SAP rating", "EPC band", "primary energy", "CO2 emissions", "heating demand", "cooling load"], |
| | "fire": ["travel distance", "fire resistance period", "smoke clearance time", "evacuation time", "compartment size"], |
| | "sustainability": ["BREEAM score", "embodied carbon", "operational carbon", "whole life carbon", "biodiversity units"], |
| | "drainage": ["runoff rate", "attenuation volume", "discharge rate", "flood risk level", "pipe capacity"], |
| | } |
| |
|
| | |
| | topics = { |
| | "structural": ["structural capacity", "stability", "robustness", "load path", "connection strength"], |
| | "thermal": ["thermal performance", "condensation risk", "thermal bridging", "heat loss", "moisture transfer"], |
| | "energy": ["energy efficiency", "carbon performance", "compliance pathway", "fabric performance", "ventilation strategy"], |
| | "fire": ["fire safety", "means of escape", "compartmentation", "smoke control", "structural fire resistance"], |
| | "sustainability": ["sustainability performance", "environmental impact", "carbon footprint", "biodiversity impact"], |
| | "drainage": ["drainage capacity", "flood risk", "surface water management", "SuDS performance"], |
| | } |
| |
|
| | def get_domain(mcp_name: str) -> str: |
| | """Determine domain from MCP name.""" |
| | if any(x in mcp_name for x in ["structural", "collapse", "vibration"]): |
| | return "structural" |
| | elif any(x in mcp_name for x in ["thermal", "psi", "condensation", "u-value", "wufi", "hygro"]): |
| | return "thermal" |
| | elif any(x in mcp_name for x in ["sap", "sbem", "energy", "passivhaus", "dynamic", "air-perm"]): |
| | return "energy" |
| | elif any(x in mcp_name for x in ["fire", "smoke", "evacuation", "cfd"]): |
| | return "fire" |
| | elif any(x in mcp_name for x in ["breeam", "well", "carbon", "lca", "biodiversity", "embodied"]): |
| | return "sustainability" |
| | elif any(x in mcp_name for x in ["drainage", "suds", "flood", "hydraulic", "water"]): |
| | return "drainage" |
| | elif any(x in mcp_name for x in ["daylight", "adf", "sunlight"]): |
| | return "daylight" |
| | elif any(x in mcp_name for x in ["sound", "acoustic"]): |
| | return "acoustics" |
| | else: |
| | return random.choice(["structural", "thermal", "energy"]) |
| |
|
| | |
| | for mcp_name, mcp_info in MCP_SERVERS.items(): |
| | description = mcp_info["description"] |
| | tools = mcp_info["tools"] |
| | domain = get_domain(mcp_name) |
| |
|
| | |
| | domain_contexts = contexts.get(domain, contexts["structural"]) |
| | domain_metrics = metrics.get(domain, metrics["structural"]) |
| | domain_topics = topics.get(domain, topics["structural"]) |
| |
|
| | |
| | specific_prompts = MCP_PROMPTS.get(mcp_name, []) |
| |
|
| | |
| | for prompt in specific_prompts: |
| | |
| | best_tool = tools[0] |
| | prompt_lower = prompt.lower() |
| |
|
| | for tool in tools: |
| | tool_words = tool.replace("_", " ").lower().split() |
| | if any(word in prompt_lower for word in tool_words): |
| | best_tool = tool |
| | break |
| |
|
| | tool_call = { |
| | "name": "universalMCP", |
| | "arguments": { |
| | "mcpServer": mcp_name, |
| | "toolName": best_tool, |
| | "arguments": {} |
| | } |
| | } |
| |
|
| | example = { |
| | "messages": [ |
| | {"role": "system", "content": SYSTEM_PROMPTS["mcp_orchestrated"]}, |
| | {"role": "user", "content": prompt}, |
| | {"role": "assistant", "content": f'<tool_call>{json.dumps(tool_call)}</tool_call>'} |
| | ] |
| | } |
| | examples.append(example) |
| |
|
| | |
| | variations = [ |
| | f"I need help with: {prompt}", |
| | f"Quick question - {prompt.lower()}", |
| | f"For my project, {prompt.lower()}", |
| | f"Can you help me? {prompt}", |
| | prompt.replace("?", ""), |
| | ] |
| |
|
| | for var in variations[:3]: |
| | var_example = { |
| | "messages": [ |
| | {"role": "system", "content": SYSTEM_PROMPTS["mcp_orchestrated"]}, |
| | {"role": "user", "content": var}, |
| | {"role": "assistant", "content": f'<tool_call>{json.dumps(tool_call)}</tool_call>'} |
| | ] |
| | } |
| | examples.append(var_example) |
| |
|
| | |
| | for tool in tools: |
| | tool_name_readable = tool.replace("_", " ") |
| |
|
| | |
| | for _ in range(8): |
| | context = random.choice(domain_contexts) |
| | metric = random.choice(domain_metrics) |
| | topic = random.choice(domain_topics) |
| | regulation = random.choice(regulations) |
| | template = random.choice(generic_prompts) |
| |
|
| | user_query = template.format( |
| | topic=topic, |
| | context=context, |
| | metric=metric, |
| | regulation=regulation |
| | ) |
| |
|
| | tool_call = { |
| | "name": "universalMCP", |
| | "arguments": { |
| | "mcpServer": mcp_name, |
| | "toolName": tool, |
| | "arguments": {} |
| | } |
| | } |
| |
|
| | example = { |
| | "messages": [ |
| | {"role": "system", "content": SYSTEM_PROMPTS["mcp_orchestrated"]}, |
| | {"role": "user", "content": user_query}, |
| | {"role": "assistant", "content": f'<tool_call>{json.dumps(tool_call)}</tool_call>'} |
| | ] |
| | } |
| | examples.append(example) |
| |
|
| | |
| | for i, tool in enumerate(tools): |
| | tool_name_readable = tool.replace("_", " ") |
| | context = random.choice(domain_contexts) |
| |
|
| | |
| | if specific_prompts and i < len(specific_prompts): |
| | user_query = specific_prompts[i] |
| | else: |
| | user_query = f"Can you {tool_name_readable} for {context}?" |
| |
|
| | tool_call = { |
| | "name": "universalMCP", |
| | "arguments": { |
| | "mcpServer": mcp_name, |
| | "toolName": tool, |
| | "arguments": {} |
| | } |
| | } |
| |
|
| | |
| | result_text = f"Analysis completed successfully. The {tool_name_readable} for {context} shows the following results: [detailed calculation output would appear here]. The values meet the applicable standards and regulations." |
| |
|
| | example_with_result = { |
| | "messages": [ |
| | {"role": "system", "content": SYSTEM_PROMPTS["mcp_orchestrated"]}, |
| | {"role": "user", "content": user_query}, |
| | {"role": "assistant", "content": f'<tool_call>{json.dumps(tool_call)}</tool_call>'}, |
| | {"role": "tool", "content": json.dumps({"success": True, "result": result_text})}, |
| | {"role": "assistant", "content": f"Based on the {tool_name_readable} analysis, here are the key findings:\n\n1. The calculation has been completed for {context}\n2. Results comply with the relevant {domain} standards\n3. The {mcp_name.replace('-', ' ')} assessment confirms acceptable performance\n\nWould you like me to provide more details on any specific aspect?"} |
| | ] |
| | } |
| | examples.append(example_with_result) |
| |
|
| | return examples |
| |
|
| |
|
| | def convert_csv_to_jsonl(csv_path: Path, mode: str = "direct") -> List[Dict]: |
| | """Convert CSV Q&A pairs to JSONL training format.""" |
| | examples = [] |
| |
|
| | |
| | with open(csv_path, 'rb') as f: |
| | content = f.read().replace(b'\x00', b'').decode('utf-8-sig', errors='ignore') |
| |
|
| | import io |
| | reader = csv.DictReader(io.StringIO(content)) |
| |
|
| | for row in reader: |
| | |
| | question = row.get('Question') or row.get('question') or row.get('Q', '') |
| | answer = row.get('Answer') or row.get('answer') or row.get('A') or row.get('Correct Answer', '') |
| |
|
| | if not question or not answer: |
| | continue |
| |
|
| | question = question.strip() |
| | answer = answer.strip() |
| |
|
| | if len(question) < 10 or len(answer) < 10: |
| | continue |
| |
|
| | |
| | if len(answer) > 200 or any(x in question.lower() for x in ['explain', 'how', 'why', 'compare']): |
| | system_prompt = SYSTEM_PROMPTS["thinking"] |
| | |
| | answer = f"<think>\nThis question about {question[:50]}... requires careful analysis.\nLet me consider the key factors.\n</think>\n\n{answer}" |
| | else: |
| | system_prompt = SYSTEM_PROMPTS["direct"] |
| |
|
| | example = { |
| | "messages": [ |
| | {"role": "system", "content": system_prompt}, |
| | {"role": "user", "content": question}, |
| | {"role": "assistant", "content": answer} |
| | ] |
| | } |
| | examples.append(example) |
| |
|
| | return examples |
| |
|
| |
|
| | def load_jsonl(path: Path) -> List[Dict]: |
| | """Load existing JSONL file.""" |
| | examples = [] |
| | with open(path, 'r', encoding='utf-8') as f: |
| | for line in f: |
| | line = line.strip() |
| | if line: |
| | try: |
| | examples.append(json.loads(line)) |
| | except json.JSONDecodeError: |
| | continue |
| | return examples |
| |
|
| |
|
| | def validate_example(example: Dict) -> bool: |
| | """Validate a training example has correct format.""" |
| | if "messages" not in example: |
| | return False |
| |
|
| | messages = example["messages"] |
| | if not isinstance(messages, list) or len(messages) < 2: |
| | return False |
| |
|
| | |
| | valid_roles = {"system", "user", "assistant", "tool"} |
| | for msg in messages: |
| | if not isinstance(msg, dict): |
| | return False |
| | if "role" not in msg or "content" not in msg: |
| | return False |
| | if msg["role"] not in valid_roles: |
| | return False |
| |
|
| | |
| | has_user = any(m["role"] == "user" for m in messages) |
| | has_assistant = any(m["role"] == "assistant" for m in messages) |
| |
|
| | return has_user and has_assistant |
| |
|
| |
|
| | def deduplicate_examples(examples: List[Dict]) -> List[Dict]: |
| | """Remove duplicate examples based on content hash.""" |
| | seen = set() |
| | unique = [] |
| |
|
| | for ex in examples: |
| | |
| | user_content = "".join( |
| | m["content"] for m in ex["messages"] |
| | if m["role"] == "user" |
| | ) |
| | content_hash = hashlib.md5(user_content.encode()).hexdigest() |
| |
|
| | if content_hash not in seen: |
| | seen.add(content_hash) |
| | unique.append(ex) |
| |
|
| | return unique |
| |
|
| |
|
| | def balance_dataset(examples: List[Dict], max_per_type: int = 15000) -> List[Dict]: |
| | """Balance dataset to prevent overfitting on any single type.""" |
| | categorized = { |
| | "direct": [], |
| | "thinking": [], |
| | "tool_calling": [], |
| | "mcp": [] |
| | } |
| |
|
| | for ex in examples: |
| | system = ex["messages"][0]["content"] if ex["messages"][0]["role"] == "system" else "" |
| | assistant = ex["messages"][-1]["content"] if ex["messages"][-1]["role"] == "assistant" else "" |
| |
|
| | if "universalMCP" in assistant or "mcpServer" in assistant: |
| | categorized["mcp"].append(ex) |
| | elif "<tool_call>" in assistant: |
| | categorized["tool_calling"].append(ex) |
| | elif "<think>" in assistant: |
| | categorized["thinking"].append(ex) |
| | else: |
| | categorized["direct"].append(ex) |
| |
|
| | balanced = [] |
| | for cat, items in categorized.items(): |
| | if len(items) > max_per_type: |
| | balanced.extend(random.sample(items, max_per_type)) |
| | else: |
| | balanced.extend(items) |
| |
|
| | return balanced |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def main(): |
| | print("=" * 70) |
| | print("BuildwellAI Model V2 - Dataset Preparation") |
| | print("=" * 70) |
| | print(f"Output directory: {OUTPUT_DIR}") |
| | print() |
| |
|
| | OUTPUT_DIR.mkdir(parents=True, exist_ok=True) |
| |
|
| | all_examples = [] |
| | stats = {} |
| |
|
| | |
| | print("Loading existing JSONL datasets...") |
| |
|
| | thinking_path = SOURCE_DIR / "dataset_thinking_mode.jsonl" |
| | if thinking_path.exists(): |
| | thinking = load_jsonl(thinking_path) |
| | all_examples.extend(thinking) |
| | stats["thinking_mode"] = len(thinking) |
| | print(f" - dataset_thinking_mode.jsonl: {len(thinking):,} examples") |
| |
|
| | tool_calling_path = SOURCE_DIR / "dataset_tool_calling.jsonl" |
| | if tool_calling_path.exists(): |
| | tools = load_jsonl(tool_calling_path) |
| | all_examples.extend(tools) |
| | stats["tool_calling"] = len(tools) |
| | print(f" - dataset_tool_calling.jsonl: {len(tools):,} examples") |
| |
|
| | |
| | print("\nConverting CSV files...") |
| |
|
| | csv_files = [ |
| | ("qa-buildwell-ai.csv", "qa_pairs"), |
| | ("UK Building Control Competency Benchmark 2025 - Benchmark.csv", "uk_benchmark"), |
| | ("BSI Flex 8670 Building Safety Competence Benchmark - generate a benchmark for all these questiosn.csv", "bsi_benchmark"), |
| | ] |
| |
|
| | for csv_name, stat_key in csv_files: |
| | csv_path = SOURCE_DIR / csv_name |
| | if csv_path.exists(): |
| | converted = convert_csv_to_jsonl(csv_path) |
| | all_examples.extend(converted) |
| | stats[stat_key] = len(converted) |
| | print(f" - {csv_name}: {len(converted):,} examples") |
| | else: |
| | print(f" - {csv_name}: NOT FOUND") |
| |
|
| | |
| | print("\nGenerating MCP training data for 42 servers...") |
| | mcp_examples = generate_mcp_training_examples() |
| | all_examples.extend(mcp_examples) |
| | stats["mcp_generated"] = len(mcp_examples) |
| | print(f" - Generated: {len(mcp_examples):,} MCP examples") |
| |
|
| | |
| | print("\nValidating examples...") |
| | valid_examples = [ex for ex in all_examples if validate_example(ex)] |
| | invalid_count = len(all_examples) - len(valid_examples) |
| | print(f" - Valid: {len(valid_examples):,}") |
| | print(f" - Invalid (removed): {invalid_count:,}") |
| |
|
| | |
| | print("\nDeduplicating...") |
| | unique_examples = deduplicate_examples(valid_examples) |
| | dupes_removed = len(valid_examples) - len(unique_examples) |
| | print(f" - Unique: {len(unique_examples):,}") |
| | print(f" - Duplicates removed: {dupes_removed:,}") |
| |
|
| | |
| | print("\nBalancing dataset...") |
| | balanced_examples = balance_dataset(unique_examples, max_per_type=15000) |
| | print(f" - Balanced: {len(balanced_examples):,}") |
| |
|
| | |
| | print("\nShuffling...") |
| | random.seed(42) |
| | random.shuffle(balanced_examples) |
| |
|
| | |
| | val_size = int(len(balanced_examples) * 0.05) |
| | train_examples = balanced_examples[val_size:] |
| | val_examples = balanced_examples[:val_size] |
| |
|
| | print(f"\nFinal split:") |
| | print(f" - Training: {len(train_examples):,}") |
| | print(f" - Validation: {len(val_examples):,}") |
| |
|
| | |
| | print("\nSaving datasets...") |
| |
|
| | train_path = OUTPUT_DIR / "train.jsonl" |
| | with open(train_path, 'w', encoding='utf-8') as f: |
| | for ex in train_examples: |
| | f.write(json.dumps(ex, ensure_ascii=False) + '\n') |
| | print(f" - {train_path}") |
| |
|
| | val_path = OUTPUT_DIR / "validation.jsonl" |
| | with open(val_path, 'w', encoding='utf-8') as f: |
| | for ex in val_examples: |
| | f.write(json.dumps(ex, ensure_ascii=False) + '\n') |
| | print(f" - {val_path}") |
| |
|
| | |
| | combined_path = OUTPUT_DIR / "combined.jsonl" |
| | with open(combined_path, 'w', encoding='utf-8') as f: |
| | for ex in balanced_examples: |
| | f.write(json.dumps(ex, ensure_ascii=False) + '\n') |
| | print(f" - {combined_path}") |
| |
|
| | |
| | stats_path = OUTPUT_DIR / "dataset_stats.json" |
| | final_stats = { |
| | "generated_at": datetime.now().isoformat(), |
| | "source_counts": stats, |
| | "total_before_processing": len(all_examples), |
| | "valid_examples": len(valid_examples), |
| | "unique_examples": len(unique_examples), |
| | "balanced_examples": len(balanced_examples), |
| | "train_examples": len(train_examples), |
| | "val_examples": len(val_examples), |
| | "mcp_servers_covered": len(MCP_SERVERS), |
| | } |
| |
|
| | with open(stats_path, 'w') as f: |
| | json.dump(final_stats, f, indent=2) |
| | print(f" - {stats_path}") |
| |
|
| | print("\n" + "=" * 70) |
| | print("DATASET PREPARATION COMPLETE") |
| | print("=" * 70) |
| | print(f"\nTotal training examples: {len(train_examples):,}") |
| | print(f"Ready for fine-tuning!") |
| |
|
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|