""" Configuration for the Coding Expert model """ # Core programming domains CODING_DOMAINS = { "programming_languages": { "python": { "level": "expert", "focus": ["data structures", "algorithms", "web development", "machine learning"] }, "javascript": { "level": "expert", "focus": ["frontend", "backend", "frameworks", "performance"] }, "java": { "level": "expert", "focus": ["enterprise", "concurrency", "frameworks", "design patterns"] }, "c++": { "level": "expert", "focus": ["systems", "performance", "templates", "memory management"] }, "go": { "level": "expert", "focus": ["concurrency", "networking", "performance", "cloud"] } }, "frameworks": { "web": { "django": "expert", "flask": "expert", "fastapi": "expert", "react": "expert", "vue": "expert", "angular": "expert" }, "mobile": { "flutter": "expert", "react_native": "expert", "swift": "expert", "kotlin": "expert" }, "cloud": { "aws": "expert", "gcp": "expert", "azure": "expert", "kubernetes": "expert" } }, "tools": { "ci_cd": ["github_actions", "jenkins", "circleci", "gitlab_ci"], "version_control": ["git", "mercurial"], "package_management": ["pip", "npm", "maven", "gradle", "cargo"], "ide": ["vscode", "pycharm", "intellij", "vim", "emacs"] } } # Core coding tasks CODING_TASKS = { "problem_solving": { "level": "expert", "subtasks": [ "algorithm_design", "data_structure_selection", "complexity_analysis", "optimization" ] }, "code_review": { "level": "expert", "subtasks": [ "architecture_review", "security_review", "performance_review", "code_style_review" ] }, "debugging": { "level": "expert", "subtasks": [ "memory_leaks", "race_conditions", "performance_bottlenecks", "concurrency_issues" ] }, "testing": { "level": "expert", "subtasks": [ "unit_testing", "integration_testing", "performance_testing", "security_testing" ] }, "architecture_design": { "level": "expert", "subtasks": [ "microservices", "distributed_systems", "scalability", "fault_tolerance" ] } } # Core datasets CODING_DATASETS = { "CodeSearchNet": { "source": "codeium/codeium", "split": "train", "fields": ["code", "docstring", "language", "function_name"], "description": "HuggingFace - multi-language code corpus", "tasks": ["code_search", "code_completion", "documentation"] }, "HumanEval": { "source": "openai/human_eval", "split": "test", "fields": ["task_id", "prompt", "canonical_solution", "test", "entry_point"], "description": "OpenAI's functional code evaluation dataset", "tasks": ["code_generation", "function_implementation", "unit_testing"] }, "MBPP": { "source": "mbpp/mbpp", "split": "train", "fields": ["task_id", "text", "code", "test_list", "challenge_test_list"], "description": "Mostly Basic Python Problems", "tasks": ["problem_solving", "code_generation", "unit_testing"] }, "Spider": { "source": "yale-lily/spider", "split": "train", "fields": ["query", "question", "db_id", "sql"], "description": "Text-to-SQL mapping", "tasks": ["sql_generation", "text_to_sql", "database_queries"] }, "DeepFix": { "source": "deepfix/deepfix", "split": "train", "fields": ["code", "fixed_code", "error_type"], "description": "Bug fixing dataset", "tasks": ["bug_fixing", "error_detection", "code_correction"] }, "CodeXGLUE": { "source": "microsoft/CodeXGLUE", "split": "train", "fields": ["code", "docstring", "task", "language"], "description": "Multitask code understanding/generation benchmark", "tasks": ["code_translation", "code_summarization", "code_generation"] } } # Print configuration summary def print_config_summary(): print("\nCoding Expert Configuration Summary:") print(f"Number of domains: {len(CODING_DOMAINS)}") print(f"Number of languages: {len(CODING_DOMAINS['programming_languages'])}") print(f"Number of tasks: {len(CODING_TASKS)}") print(f"Number of datasets: {len(CODING_DATASETS)}") print("\nDataset Details:") for name, config in CODING_DATASETS.items(): print(f"\n{name}:") print(f"Description: {config['description']}") print(f"Tasks: {', '.join(config['tasks'])}") print(f"Fields: {', '.join(config['fields'])}") if __name__ == "__main__": print_config_summary()