Spaces:
Runtime error
Runtime error
File size: 6,309 Bytes
02ff91f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 | """
Task Decomposer — handles task ambiguity before episode starts.
Two modes: INTERACTIVE (asks for clarification) and AUTONOMOUS (infers defaults).
For hackathon: uses AUTONOMOUS mode (95% of enterprise use cases).
"""
from __future__ import annotations
from dataclasses import dataclass
from enum import Enum
import os
import yaml
class ComplexityClass(Enum):
ATOMIC = "atomic"
SIMPLE = "simple"
MODERATE = "moderate"
COMPLEX = "complex"
ENTERPRISE = "enterprise"
def _load_complexity_keywords(
keywords_path: str = "configs/complexity_keywords.yaml",
) -> dict[str, list[str]]:
try:
with open(keywords_path) as f:
return yaml.safe_load(f)
except FileNotFoundError:
raise FileNotFoundError(
f"complexity_keywords.yaml not found at {keywords_path}. "
"This file is required — do not delete it."
)
@dataclass
class EnrichedTask:
"""Task with inferred metadata for episode setup."""
original_description: str
enriched_description: str
complexity_class: str
expected_specialists: int
domain_hints: list[str]
is_ambiguous: bool
autonomously_enriched: bool
class TaskDecomposer:
"""
Analyzes task descriptions and enriches them with inferred metadata.
Fully implemented — no 'pass' stubs.
"""
DOMAIN_KEYWORDS = {
"frontend": ["react", "vue", "angular", "ui", "css", "frontend", "component"],
"backend": ["api", "server", "endpoint", "rest", "backend", "node", "express"],
"database": ["database", "schema", "sql", "mongodb", "postgresql", "redis"],
"devops": ["deploy", "docker", "kubernetes", "ci/cd", "pipeline", "cloud"],
"security": ["auth", "security", "encryption", "oauth", "jwt", "compliance"],
"product": ["requirement", "feature", "user story", "roadmap", "mvp"],
}
COMPLEXITY_SPECIALIST_MAP = {
"atomic": 1,
"simple": 2,
"moderate": 3,
"complex": 4,
"enterprise": 5,
}
def __init__(
self,
sector_cfg: dict | None = None,
keywords_path: str = "configs/complexity_keywords.yaml",
):
# sector.default_assumptions is required — no silent React/Node fallback
assumptions = (sector_cfg or {}).get("default_assumptions")
if assumptions is None:
raise ValueError(
"sector.default_assumptions is missing from training_config.yaml. "
"Add frontend/backend/database/team_size keys under sector.default_assumptions."
)
self._assumptions = assumptions
self._complexity_keywords = _load_complexity_keywords(keywords_path)
def decompose(self, task_description: str) -> EnrichedTask:
"""Main entry point. Returns an EnrichedTask."""
complexity = self._classify_complexity(task_description)
domains = self._detect_domains(task_description)
is_ambiguous = self._is_ambiguous(task_description)
enriched_desc = self.enrich_with_defaults(
task_description, complexity, domains, is_ambiguous
)
return EnrichedTask(
original_description=task_description,
enriched_description=enriched_desc,
complexity_class=complexity,
expected_specialists=self.COMPLEXITY_SPECIALIST_MAP[complexity],
domain_hints=domains,
is_ambiguous=is_ambiguous,
autonomously_enriched=is_ambiguous,
)
def _classify_complexity(self, description: str) -> str:
desc_lower = description.lower()
for complexity in ["enterprise", "complex", "moderate", "simple", "atomic"]:
keywords = self._complexity_keywords.get(complexity, [])
if any(kw in desc_lower for kw in keywords):
return complexity
word_count = len(description.split())
if word_count > 15:
return "moderate"
elif word_count > 8:
return "simple"
else:
return "atomic"
def _detect_domains(self, description: str) -> list[str]:
desc_lower = description.lower()
detected = []
for domain, keywords in self.DOMAIN_KEYWORDS.items():
if any(kw in desc_lower for kw in keywords):
detected.append(domain)
return detected if detected else ["general"]
def _is_ambiguous(self, description: str) -> bool:
if len(description.split()) < 4:
return True
vague_words = ["it", "this", "that", "something", "stuff", "thing"]
desc_lower = description.lower()
vague_count = sum(1 for w in vague_words if f" {w} " in f" {desc_lower} ")
return vague_count >= 2
def enrich_with_defaults(
self,
description: str,
complexity: str,
domains: list[str],
is_ambiguous: bool,
) -> str:
"""
Enrich ambiguous tasks with sector-configured technology assumptions.
Reads from self._assumptions (sector.default_assumptions in config).
"""
if not is_ambiguous:
return description
enriched = description
desc_lower = description.lower()
frontend_stack = self._assumptions.get("frontend", "")
backend_stack = self._assumptions.get("backend", "")
database_stack = self._assumptions.get("database", "")
team_size = self._assumptions.get("team_size", "")
if "frontend" in domains and frontend_stack:
if not any(w in desc_lower for w in frontend_stack.lower().split("/")):
enriched += f" (assume {frontend_stack} frontend)"
if "backend" in domains and backend_stack:
if not any(w in desc_lower for w in backend_stack.lower().split("/")):
enriched += f" (assume {backend_stack} backend)"
if "database" in domains and database_stack:
if not any(w in desc_lower for w in database_stack.lower().split("/")):
enriched += f" (assume {database_stack} database)"
if complexity in ["moderate", "complex"] and team_size and "scale" not in desc_lower:
enriched += f" for a team of {team_size}"
return enriched
|