Spaces:
Sleeping
Sleeping
Rajan Sharma
commited on
Update schema_mapper.py
Browse files- schema_mapper.py +220 -71
schema_mapper.py
CHANGED
|
@@ -152,8 +152,127 @@ class MappingResult:
|
|
| 152 |
missing: List[str] = field(default_factory=list)
|
| 153 |
discovered: Dict[str, str] = field(default_factory=dict) # Discovered column types
|
| 154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
def map_concepts(scenario_text: str, registry: DataRegistry) -> MappingResult:
|
| 156 |
-
"""
|
| 157 |
result = MappingResult()
|
| 158 |
|
| 159 |
if not registry.names():
|
|
@@ -163,9 +282,6 @@ def map_concepts(scenario_text: str, registry: DataRegistry) -> MappingResult:
|
|
| 163 |
# Extract key terms from scenario
|
| 164 |
scenario_terms = _extract_key_terms_from_scenario(scenario_text)
|
| 165 |
|
| 166 |
-
# Generate dynamic patterns based on scenario
|
| 167 |
-
concept_patterns = _generate_dynamic_patterns(scenario_terms, UNIVERSAL_CONCEPT_PATTERNS)
|
| 168 |
-
|
| 169 |
# Collect all available columns
|
| 170 |
all_columns = []
|
| 171 |
for table in registry.iter_tables():
|
|
@@ -176,87 +292,120 @@ def map_concepts(scenario_text: str, registry: DataRegistry) -> MappingResult:
|
|
| 176 |
for col in table.df.columns:
|
| 177 |
all_columns.append((table.name, str(col)))
|
| 178 |
|
| 179 |
-
#
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
]
|
| 185 |
-
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
|
| 192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
high_scoring = [pair for pair, score in scores if score >= threshold]
|
| 197 |
|
| 198 |
-
|
| 199 |
-
tbl, col = high_scoring[0]
|
| 200 |
-
result.resolved[concept] = (tbl, col)
|
| 201 |
-
else:
|
| 202 |
-
# Multiple good matches - mark as ambiguous
|
| 203 |
-
result.ambiguous[concept] = high_scoring[:5] # Limit to top 5
|
| 204 |
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
def build_phase1_questions(scenario_text: str, registry: DataRegistry, mapping: MappingResult, max_questions: int = 6) -> str:
|
| 208 |
-
"""Build clarifying questions based on scenario and data gaps."""
|
| 209 |
questions = []
|
| 210 |
scenario_lower = scenario_text.lower() if scenario_text else ""
|
| 211 |
|
| 212 |
-
# Data structure questions
|
| 213 |
-
if not mapping.resolved and not mapping.ambiguous:
|
| 214 |
-
questions.append("**Data Structure**: I don't see clear patterns in your data. Could you describe what each column represents?")
|
| 215 |
-
return "\n".join(questions)
|
| 216 |
-
|
| 217 |
# Ambiguous mappings - ask for clarification
|
| 218 |
important_concepts = ['facility', 'organization', 'department', 'specialty', 'region']
|
| 219 |
for concept in important_concepts:
|
| 220 |
-
if concept in mapping.ambiguous:
|
| 221 |
-
options = [f"{tbl}.{col}" for tbl, col in mapping.ambiguous[concept][:4]]
|
| 222 |
-
questions.append(f"**Entity Identification**: Which column represents the main {concept.replace('_', ' ')}? Options: {', '.join(options)}")
|
| 223 |
-
if len(questions) >= max_questions:
|
| 224 |
-
break
|
| 225 |
-
|
| 226 |
-
# Metric clarification
|
| 227 |
-
metric_concepts = ['wait_time', 'cost', 'score', 'performance', 'quality']
|
| 228 |
-
for concept in metric_concepts:
|
| 229 |
-
if concept in mapping.ambiguous:
|
| 230 |
options = [f"{tbl}.{col}" for tbl, col in mapping.ambiguous[concept][:3]]
|
| 231 |
-
questions.append(f"**
|
| 232 |
-
if len(questions) >= max_questions:
|
| 233 |
-
break
|
| 234 |
|
| 235 |
# Missing critical data
|
| 236 |
-
if
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
# Scenario-specific questions
|
| 243 |
-
if any(term in scenario_lower for term in ['resource', 'allocation', 'priority']):
|
| 244 |
-
questions.append("**Resource Allocation**: What factors should guide resource prioritization? (e.g., volume, urgency, equity)")
|
| 245 |
-
|
| 246 |
-
if any(term in scenario_lower for term in ['comparison', 'benchmark', 'performance']):
|
| 247 |
-
questions.append("**Comparison Criteria**: How should different entities be compared? What constitutes good vs. poor performance?")
|
| 248 |
-
|
| 249 |
-
if any(term in scenario_lower for term in ['recommendation', 'decision', 'strategy']):
|
| 250 |
-
questions.append("**Decision Context**: What constraints or preferences should influence the recommendations? (e.g., budget limits, operational requirements)")
|
| 251 |
-
|
| 252 |
-
# Limit questions and format
|
| 253 |
-
questions = questions[:max_questions]
|
| 254 |
|
| 255 |
if not questions:
|
| 256 |
-
return "**
|
| 257 |
-
|
| 258 |
-
formatted_questions = ["**Clarification Questions**", ""]
|
| 259 |
-
for i, q in enumerate(questions, 1):
|
| 260 |
-
formatted_questions.append(f"{i}. {q}")
|
| 261 |
|
| 262 |
-
return "\n".join(
|
|
|
|
| 152 |
missing: List[str] = field(default_factory=list)
|
| 153 |
discovered: Dict[str, str] = field(default_factory=dict) # Discovered column types
|
| 154 |
|
| 155 |
+
def _extract_explicit_mappings_from_scenario(scenario_text: str, available_columns: List[Tuple[str, str]]) -> Dict[str, Tuple[str, str]]:
|
| 156 |
+
"""Extract explicit column mappings from scenario text."""
|
| 157 |
+
explicit_mappings = {}
|
| 158 |
+
|
| 159 |
+
if not scenario_text:
|
| 160 |
+
return explicit_mappings
|
| 161 |
+
|
| 162 |
+
scenario_lower = scenario_text.lower()
|
| 163 |
+
|
| 164 |
+
# Create a lookup of available columns (case-insensitive)
|
| 165 |
+
column_lookup = {}
|
| 166 |
+
for table_name, col_name in available_columns:
|
| 167 |
+
column_lookup[col_name.lower()] = (table_name, col_name)
|
| 168 |
+
|
| 169 |
+
# Pattern 1: Direct column descriptions like "Surgery_Median column contains..."
|
| 170 |
+
column_desc_patterns = [
|
| 171 |
+
r'(\w+)\s+column\s+(?:contains|reports|shows|includes|represents)',
|
| 172 |
+
r'column\s+(\w+)\s+(?:contains|reports|shows|includes|represents)',
|
| 173 |
+
r'(\w+)\s+(?:contains|reports|shows|includes|represents)'
|
| 174 |
+
]
|
| 175 |
+
|
| 176 |
+
for pattern in column_desc_patterns:
|
| 177 |
+
matches = re.findall(pattern, scenario_text, re.IGNORECASE)
|
| 178 |
+
for match in matches:
|
| 179 |
+
col_name = match.lower()
|
| 180 |
+
if col_name in column_lookup:
|
| 181 |
+
# Determine the concept based on context around the column name
|
| 182 |
+
context = scenario_text[max(0, scenario_text.lower().find(col_name)-50):scenario_text.lower().find(col_name)+100].lower()
|
| 183 |
+
|
| 184 |
+
if any(term in context for term in ['wait', 'time', 'delay', 'duration']):
|
| 185 |
+
if 'median' in col_name:
|
| 186 |
+
explicit_mappings['wait_median'] = column_lookup[col_name]
|
| 187 |
+
elif '90' in col_name or 'percentile' in col_name:
|
| 188 |
+
explicit_mappings['wait_p90'] = column_lookup[col_name]
|
| 189 |
+
else:
|
| 190 |
+
explicit_mappings['wait_time'] = column_lookup[col_name]
|
| 191 |
+
|
| 192 |
+
elif any(term in context for term in ['facility', 'hospital', 'clinic', 'site']):
|
| 193 |
+
explicit_mappings['facility'] = column_lookup[col_name]
|
| 194 |
+
|
| 195 |
+
elif any(term in context for term in ['specialty', 'service', 'department']):
|
| 196 |
+
explicit_mappings['specialty'] = column_lookup[col_name]
|
| 197 |
+
|
| 198 |
+
elif any(term in context for term in ['zone', 'region', 'area', 'district']):
|
| 199 |
+
explicit_mappings['region'] = column_lookup[col_name]
|
| 200 |
+
|
| 201 |
+
# Pattern 2: Task-based column identification like "calculate average for each facility"
|
| 202 |
+
task_patterns = [
|
| 203 |
+
(r'(?:for each|by)\s+(\w+)', ['facility', 'specialty', 'region']),
|
| 204 |
+
(r'(?:identify|rank|list)\s+(\w+)', ['facility', 'specialty', 'region']),
|
| 205 |
+
(r'average\s+(\w+)\s+(?:wait|time)', ['wait_median', 'wait_time']),
|
| 206 |
+
(r'median\s+(\w+)', ['wait_median']),
|
| 207 |
+
(r'90th\s+percentile\s+(\w+)', ['wait_p90'])
|
| 208 |
+
]
|
| 209 |
+
|
| 210 |
+
for pattern, concepts in task_patterns:
|
| 211 |
+
matches = re.findall(pattern, scenario_lower)
|
| 212 |
+
for match in matches:
|
| 213 |
+
match_lower = match.lower()
|
| 214 |
+
if match_lower in column_lookup:
|
| 215 |
+
for concept in concepts:
|
| 216 |
+
if concept not in explicit_mappings:
|
| 217 |
+
explicit_mappings[concept] = column_lookup[match_lower]
|
| 218 |
+
break
|
| 219 |
+
|
| 220 |
+
# Pattern 3: Direct column name matches from scenario
|
| 221 |
+
explicit_columns = re.findall(r'\b([A-Za-z_][A-Za-z0-9_]*)\b', scenario_text)
|
| 222 |
+
for col_candidate in explicit_columns:
|
| 223 |
+
col_lower = col_candidate.lower()
|
| 224 |
+
if col_lower in column_lookup:
|
| 225 |
+
# Smart concept assignment based on column name patterns
|
| 226 |
+
if not any(concept in explicit_mappings for concept in ['facility', 'organization', 'department']):
|
| 227 |
+
if re.search(r'facility|hospital|clinic|site|provider', col_lower):
|
| 228 |
+
explicit_mappings['facility'] = column_lookup[col_lower]
|
| 229 |
+
|
| 230 |
+
if not any(concept in explicit_mappings for concept in ['specialty', 'service']):
|
| 231 |
+
if re.search(r'specialty|service|department|type', col_lower):
|
| 232 |
+
explicit_mappings['specialty'] = column_lookup[col_lower]
|
| 233 |
+
|
| 234 |
+
if not any(concept in explicit_mappings for concept in ['region', 'zone']):
|
| 235 |
+
if re.search(r'zone|region|area|district', col_lower):
|
| 236 |
+
explicit_mappings['region'] = column_lookup[col_lower]
|
| 237 |
+
|
| 238 |
+
if not any(concept in explicit_mappings for concept in ['wait_median', 'wait_time']):
|
| 239 |
+
if re.search(r'.*median.*', col_lower) and re.search(r'wait|time|surgery|consult', col_lower):
|
| 240 |
+
explicit_mappings['wait_median'] = column_lookup[col_lower]
|
| 241 |
+
|
| 242 |
+
if not any(concept in explicit_mappings for concept in ['wait_p90']):
|
| 243 |
+
if re.search(r'.*(90|percentile).*', col_lower) and re.search(r'wait|time|surgery|consult', col_lower):
|
| 244 |
+
explicit_mappings['wait_p90'] = column_lookup[col_lower]
|
| 245 |
+
|
| 246 |
+
return explicit_mappings
|
| 247 |
+
|
| 248 |
+
def _extract_explicit_tasks_from_scenario(scenario_text: str) -> List[str]:
|
| 249 |
+
"""Extract explicit task requirements from scenario text."""
|
| 250 |
+
tasks = []
|
| 251 |
+
|
| 252 |
+
if not scenario_text:
|
| 253 |
+
return tasks
|
| 254 |
+
|
| 255 |
+
scenario_lower = scenario_text.lower()
|
| 256 |
+
|
| 257 |
+
# Task extraction patterns
|
| 258 |
+
task_patterns = [
|
| 259 |
+
r'(?:your tasks?(?:\s+are)?[:\s]+)([^.]*?)(?:\.|$)',
|
| 260 |
+
r'(?:you (?:should|need to|are to|must)[:\s]+)([^.]*?)(?:\.|$)',
|
| 261 |
+
r'(?:tasks?[:\s]+)([^.]*?)(?:\.|deliverables|$)',
|
| 262 |
+
r'(?:\d+\.?\s*)([^.]*?)(?:\.|$)' # Numbered tasks
|
| 263 |
+
]
|
| 264 |
+
|
| 265 |
+
for pattern in task_patterns:
|
| 266 |
+
matches = re.findall(pattern, scenario_text, re.IGNORECASE | re.DOTALL)
|
| 267 |
+
for match in matches:
|
| 268 |
+
task = match.strip()
|
| 269 |
+
if len(task) > 10 and any(verb in task.lower() for verb in ['identify', 'calculate', 'analyze', 'compare', 'assess', 'determine', 'rank', 'list']):
|
| 270 |
+
tasks.append(task)
|
| 271 |
+
|
| 272 |
+
return tasks
|
| 273 |
+
|
| 274 |
def map_concepts(scenario_text: str, registry: DataRegistry) -> MappingResult:
|
| 275 |
+
"""Enhanced mapping that extracts explicit information from scenario text."""
|
| 276 |
result = MappingResult()
|
| 277 |
|
| 278 |
if not registry.names():
|
|
|
|
| 282 |
# Extract key terms from scenario
|
| 283 |
scenario_terms = _extract_key_terms_from_scenario(scenario_text)
|
| 284 |
|
|
|
|
|
|
|
|
|
|
| 285 |
# Collect all available columns
|
| 286 |
all_columns = []
|
| 287 |
for table in registry.iter_tables():
|
|
|
|
| 292 |
for col in table.df.columns:
|
| 293 |
all_columns.append((table.name, str(col)))
|
| 294 |
|
| 295 |
+
# STEP 1: Extract explicit mappings from scenario text
|
| 296 |
+
explicit_mappings = _extract_explicit_mappings_from_scenario(scenario_text, all_columns)
|
| 297 |
+
|
| 298 |
+
# STEP 2: Use explicit mappings first
|
| 299 |
+
for concept, (table_name, col_name) in explicit_mappings.items():
|
| 300 |
+
result.resolved[concept] = (table_name, col_name)
|
| 301 |
+
|
| 302 |
+
# STEP 3: For unmapped concepts, use pattern matching with scenario context
|
| 303 |
+
remaining_patterns = {k: v for k, v in UNIVERSAL_CONCEPT_PATTERNS.items() if k not in result.resolved}
|
| 304 |
+
|
| 305 |
+
if remaining_patterns:
|
| 306 |
+
# Generate dynamic patterns based on scenario
|
| 307 |
+
concept_patterns = _generate_dynamic_patterns(scenario_terms, remaining_patterns)
|
| 308 |
|
| 309 |
+
# Map remaining concepts to columns
|
| 310 |
+
for concept, patterns in concept_patterns.items():
|
| 311 |
+
if concept in result.resolved:
|
| 312 |
+
continue # Skip already resolved
|
| 313 |
+
|
| 314 |
+
scores = [
|
| 315 |
+
((tbl, col), _score_column_match(col, patterns, scenario_terms))
|
| 316 |
+
for (tbl, col) in all_columns
|
| 317 |
+
]
|
| 318 |
+
|
| 319 |
+
scores.sort(key=lambda x: x[1], reverse=True)
|
| 320 |
+
|
| 321 |
+
if not scores or scores[0][1] == 0:
|
| 322 |
+
result.missing.append(concept)
|
| 323 |
+
continue
|
| 324 |
+
|
| 325 |
+
top_score = scores[0][1]
|
| 326 |
+
|
| 327 |
+
# Find all columns with similar high scores (potential ambiguity)
|
| 328 |
+
threshold = max(70, top_score - 15) # Higher threshold for explicit scenarios
|
| 329 |
+
high_scoring = [pair for pair, score in scores if score >= threshold]
|
| 330 |
+
|
| 331 |
+
if len(high_scoring) == 1:
|
| 332 |
+
tbl, col = high_scoring[0]
|
| 333 |
+
result.resolved[concept] = (tbl, col)
|
| 334 |
+
else:
|
| 335 |
+
# Check if scenario text makes disambiguation obvious
|
| 336 |
+
disambiguated = False
|
| 337 |
+
for (tbl, col), score in scores[:3]: # Check top 3
|
| 338 |
+
col_mentioned = col.lower() in scenario_text.lower()
|
| 339 |
+
if col_mentioned and score >= threshold:
|
| 340 |
+
result.resolved[concept] = (tbl, col)
|
| 341 |
+
disambiguated = True
|
| 342 |
+
break
|
| 343 |
+
|
| 344 |
+
if not disambiguated:
|
| 345 |
+
result.ambiguous[concept] = high_scoring[:3] # Limit to top 3
|
| 346 |
+
|
| 347 |
+
return result
|
| 348 |
+
|
| 349 |
+
def build_phase1_questions(scenario_text: str, registry: DataRegistry, mapping: MappingResult, max_questions: int = 4) -> str:
|
| 350 |
+
"""Build minimal clarifying questions, only when truly necessary."""
|
| 351 |
+
|
| 352 |
+
# Extract explicit tasks from scenario
|
| 353 |
+
explicit_tasks = _extract_explicit_tasks_from_scenario(scenario_text)
|
| 354 |
+
|
| 355 |
+
# Check if scenario provides comprehensive instructions
|
| 356 |
+
has_detailed_tasks = len(explicit_tasks) >= 3
|
| 357 |
+
has_data_descriptions = any(term in scenario_text.lower() for term in [
|
| 358 |
+
'column', 'dataset', 'file', 'csv', 'records', 'contains', 'includes'
|
| 359 |
+
])
|
| 360 |
+
|
| 361 |
+
# If scenario is comprehensive, minimize questions
|
| 362 |
+
if has_detailed_tasks and has_data_descriptions:
|
| 363 |
+
# Only ask about truly ambiguous mappings where scenario doesn't clarify
|
| 364 |
+
critical_questions = []
|
| 365 |
|
| 366 |
+
# Only ask about ambiguities that can't be resolved from context
|
| 367 |
+
for concept, options in mapping.ambiguous.items():
|
| 368 |
+
if len(options) > 1:
|
| 369 |
+
# Check if scenario text clearly indicates which column to use
|
| 370 |
+
scenario_lower = scenario_text.lower()
|
| 371 |
+
clear_preference = None
|
| 372 |
+
|
| 373 |
+
for table_name, col_name in options:
|
| 374 |
+
if col_name.lower() in scenario_lower:
|
| 375 |
+
mentions = scenario_lower.count(col_name.lower())
|
| 376 |
+
if mentions > 0:
|
| 377 |
+
clear_preference = f"{table_name}.{col_name}"
|
| 378 |
+
break
|
| 379 |
+
|
| 380 |
+
if not clear_preference and len(critical_questions) < max_questions:
|
| 381 |
+
option_strs = [f"{tbl}.{col}" for tbl, col in options[:3]]
|
| 382 |
+
critical_questions.append(f"**Column Clarification**: For {concept.replace('_', ' ')}, use: {', '.join(option_strs)}?")
|
| 383 |
|
| 384 |
+
if not critical_questions:
|
| 385 |
+
return "**Proceeding with Analysis**: Scenario and data mappings are clear. Analyzing now..."
|
|
|
|
| 386 |
|
| 387 |
+
return "**Quick Clarification**\n\n" + "\n".join(critical_questions)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
|
| 389 |
+
# Fallback to standard question generation for less comprehensive scenarios
|
|
|
|
|
|
|
|
|
|
| 390 |
questions = []
|
| 391 |
scenario_lower = scenario_text.lower() if scenario_text else ""
|
| 392 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
# Ambiguous mappings - ask for clarification
|
| 394 |
important_concepts = ['facility', 'organization', 'department', 'specialty', 'region']
|
| 395 |
for concept in important_concepts:
|
| 396 |
+
if concept in mapping.ambiguous and len(questions) < max_questions:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
options = [f"{tbl}.{col}" for tbl, col in mapping.ambiguous[concept][:3]]
|
| 398 |
+
questions.append(f"**Entity**: Which column represents {concept.replace('_', ' ')}? Options: {', '.join(options)}")
|
|
|
|
|
|
|
| 399 |
|
| 400 |
# Missing critical data
|
| 401 |
+
if len(questions) < max_questions:
|
| 402 |
+
if not any(concept in mapping.resolved for concept in ['facility', 'organization', 'department']):
|
| 403 |
+
questions.append("**Grouping**: What entities should be analyzed? (facilities, departments, regions, etc.)")
|
| 404 |
+
|
| 405 |
+
if not any(concept in mapping.resolved for concept in ['wait_time', 'wait_median', 'score', 'performance']):
|
| 406 |
+
questions.append("**Metric**: What is the primary metric to analyze? (wait times, scores, costs, etc.)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
|
| 408 |
if not questions:
|
| 409 |
+
return "**Analysis Ready**: Data structure understood. Proceeding with analysis..."
|
|
|
|
|
|
|
|
|
|
|
|
|
| 410 |
|
| 411 |
+
return "**Clarification Questions**\n\n" + "\n".join(f"{i+1}. {q}" for i, q in enumerate(questions))
|