Spaces:
Sleeping
Sleeping
| """ | |
| Domain Knowledge Base - Dynamic domain-specific aspect prototypes and skill mapping | |
| """ | |
| import os | |
| import json | |
| import logging | |
| from typing import Dict, List, Optional, Tuple | |
| from pathlib import Path | |
| logger = logging.getLogger(__name__) | |
| class DomainConfig: | |
| """Single domain configuration""" | |
| def __init__(self, config_data: Dict): | |
| self.domain_id = config_data.get('domain_id', 'unknown') | |
| self.display_name = config_data.get('display_name', 'Unknown Domain') | |
| self.description = config_data.get('description', '') | |
| self.core_skills = config_data.get('core_skills', []) | |
| self.aspect_prototypes = config_data.get('aspect_prototypes', {}) | |
| self.industry_benchmarks = config_data.get('industry_benchmarks', {}) | |
| self.skill_gaps_mapping = config_data.get('skill_gaps_mapping', {}) | |
| self.detection_keywords = config_data.get('detection_keywords', []) | |
| def get_aspect_seeds(self, aspect: str) -> List[str]: | |
| """Get seed phrases for a specific aspect""" | |
| return self.aspect_prototypes.get(aspect, []) | |
| def get_all_aspect_seeds(self) -> Dict[str, List[str]]: | |
| """Get all aspect prototypes""" | |
| return self.aspect_prototypes.copy() | |
| def get_skill_gap_info(self, skill: str) -> Optional[Dict]: | |
| """Get skill gap information including courses and certs""" | |
| return self.skill_gaps_mapping.get(skill) | |
| def get_benchmark(self, key: str, default=None): | |
| """Get industry benchmark value""" | |
| return self.industry_benchmarks.get(key, default) | |
| class DomainKnowledgeBase: | |
| """ | |
| Domain Knowledge Base - loads and manages domain configurations | |
| Provides domain-specific aspect prototypes for the Fidelity Criteria Transformer | |
| """ | |
| def __init__(self, domains_dir: str = None): | |
| """ | |
| Initialize DKB with domain configs from directory | |
| Args: | |
| domains_dir: Path to directory containing domain JSON files | |
| Defaults to ./domains/ relative to this file | |
| """ | |
| if domains_dir is None: | |
| domains_dir = os.path.join(os.path.dirname(__file__), 'domains') | |
| self.domains_dir = Path(domains_dir) | |
| self.domains: Dict[str, DomainConfig] = {} | |
| self._keyword_index: Dict[str, str] = {} # keyword -> domain_id | |
| self._load_all_domains() | |
| self._build_keyword_index() | |
| logger.info(f"DomainKnowledgeBase initialized with {len(self.domains)} domains") | |
| def _load_all_domains(self): | |
| """Load all domain configs from directory""" | |
| if not self.domains_dir.exists(): | |
| logger.warning(f"Domains directory not found: {self.domains_dir}") | |
| return | |
| for json_file in self.domains_dir.glob('*.json'): | |
| try: | |
| with open(json_file, 'r', encoding='utf-8') as f: | |
| config_data = json.load(f) | |
| domain_config = DomainConfig(config_data) | |
| self.domains[domain_config.domain_id] = domain_config | |
| logger.info(f"Loaded domain config: {domain_config.display_name}") | |
| except Exception as e: | |
| logger.error(f"Failed to load domain config {json_file}: {e}") | |
| def _build_keyword_index(self): | |
| """Build keyword -> domain mapping for detection""" | |
| for domain_id, config in self.domains.items(): | |
| for keyword in config.detection_keywords: | |
| self._keyword_index[keyword.lower()] = domain_id | |
| def get_domain(self, domain_id: str) -> Optional[DomainConfig]: | |
| """Get domain config by ID""" | |
| return self.domains.get(domain_id) | |
| def list_domains(self) -> List[str]: | |
| """List all available domain IDs""" | |
| return list(self.domains.keys()) | |
| def detect_domain(self, text: str, skills: List[str] = None) -> Tuple[str, float]: | |
| """ | |
| Detect most likely domain from text and/or skills | |
| Args: | |
| text: Text content (career goals, descriptions, etc.) | |
| skills: List of skill keywords | |
| Returns: | |
| (domain_id, confidence) tuple | |
| """ | |
| if not text and not skills: | |
| return ('general', 0.0) | |
| text_lower = (text or '').lower() | |
| skills_lower = [s.lower() for s in (skills or [])] | |
| domain_scores = {} | |
| for domain_id, config in self.domains.items(): | |
| score = 0.0 | |
| # Keyword matching from text | |
| for keyword in config.detection_keywords: | |
| if keyword.lower() in text_lower: | |
| score += 0.1 | |
| # Skill matching | |
| core_skills_lower = [s.lower() for s in config.core_skills] | |
| skill_matches = sum(1 for s in skills_lower if s in core_skills_lower) | |
| score += skill_matches * 0.15 | |
| domain_scores[domain_id] = min(score, 1.0) | |
| if not domain_scores: | |
| return ('general', 0.0) | |
| # Return domain with highest score | |
| best_domain = max(domain_scores, key=domain_scores.get) | |
| confidence = domain_scores[best_domain] | |
| # Minimum confidence threshold | |
| if confidence < 0.2: | |
| return ('general', confidence) | |
| return (best_domain, confidence) | |
| def get_aspect_prototypes_for_domain(self, domain_id: str) -> Dict[str, List[str]]: | |
| """Get all aspect prototypes for a domain""" | |
| config = self.domains.get(domain_id) | |
| if config: | |
| return config.get_all_aspect_seeds() | |
| return {} | |
| def get_merged_prototypes(self, detected_domain: str, | |
| base_aspects: Dict[str, List[str]]) -> Dict[str, List[str]]: | |
| """ | |
| Merge domain-specific prototypes with base aspects | |
| Domain-specific seeds are added to base seeds | |
| Args: | |
| detected_domain: Domain ID from detection | |
| base_aspects: Base aspect seeds (from TextModuleV2 defaults) | |
| Returns: | |
| Merged aspect seeds dictionary | |
| """ | |
| merged = {k: list(v) for k, v in base_aspects.items()} # Deep copy | |
| domain_config = self.domains.get(detected_domain) | |
| if not domain_config: | |
| return merged | |
| # Merge domain-specific prototypes | |
| for aspect, seeds in domain_config.aspect_prototypes.items(): | |
| if aspect in merged: | |
| # Prepend domain-specific seeds (higher priority) | |
| merged[aspect] = seeds + merged[aspect] | |
| else: | |
| merged[aspect] = seeds | |
| return merged | |
| def analyze_skill_gaps(self, student_skills: List[str], | |
| domain_id: str) -> List[Dict]: | |
| """ | |
| Analyze skill gaps for a student in a given domain | |
| Args: | |
| student_skills: List of skills the student has | |
| domain_id: Target domain | |
| Returns: | |
| List of skill gap objects with recommendations | |
| """ | |
| config = self.domains.get(domain_id) | |
| if not config: | |
| return [] | |
| student_skills_lower = [s.lower() for s in student_skills] | |
| gaps = [] | |
| for skill, gap_info in config.skill_gaps_mapping.items(): | |
| skill_lower = skill.lower() | |
| # Check if student has this skill | |
| has_skill = any(skill_lower in s or s in skill_lower | |
| for s in student_skills_lower) | |
| if not has_skill: | |
| gaps.append({ | |
| 'skill': skill, | |
| 'demand_score': gap_info.get('demand_score', 0.5), | |
| 'recommended_courses': gap_info.get('courses', []), | |
| 'certifications': gap_info.get('certifications', []), | |
| 'priority': 'high' if gap_info.get('demand_score', 0) > 0.7 else 'medium' | |
| }) | |
| # Sort by demand score | |
| gaps.sort(key=lambda x: x['demand_score'], reverse=True) | |
| return gaps | |
| def get_domain_summary(self, domain_id: str) -> Optional[Dict]: | |
| """Get summary of a domain for reporting""" | |
| config = self.domains.get(domain_id) | |
| if not config: | |
| return None | |
| return { | |
| 'domain_id': config.domain_id, | |
| 'display_name': config.display_name, | |
| 'description': config.description, | |
| 'core_skills_count': len(config.core_skills), | |
| 'aspects_count': len(config.aspect_prototypes), | |
| 'benchmarks': config.industry_benchmarks | |
| } | |
| # Singleton instance | |
| _dkb_instance: Optional[DomainKnowledgeBase] = None | |
| def get_domain_knowledge_base(domains_dir: str = None) -> DomainKnowledgeBase: | |
| """Get or create singleton DomainKnowledgeBase instance""" | |
| global _dkb_instance | |
| if _dkb_instance is None: | |
| _dkb_instance = DomainKnowledgeBase(domains_dir) | |
| return _dkb_instance | |