|
|
""" |
|
|
Configuration for the Coding Expert model |
|
|
""" |
|
|
|
|
|
|
|
|
CODING_DOMAINS = { |
|
|
"programming_languages": { |
|
|
"python": { |
|
|
"level": "expert", |
|
|
"focus": ["data structures", "algorithms", "web development", "machine learning"] |
|
|
}, |
|
|
"javascript": { |
|
|
"level": "expert", |
|
|
"focus": ["frontend", "backend", "frameworks", "performance"] |
|
|
}, |
|
|
"java": { |
|
|
"level": "expert", |
|
|
"focus": ["enterprise", "concurrency", "frameworks", "design patterns"] |
|
|
}, |
|
|
"c++": { |
|
|
"level": "expert", |
|
|
"focus": ["systems", "performance", "templates", "memory management"] |
|
|
}, |
|
|
"go": { |
|
|
"level": "expert", |
|
|
"focus": ["concurrency", "networking", "performance", "cloud"] |
|
|
} |
|
|
}, |
|
|
"frameworks": { |
|
|
"web": { |
|
|
"django": "expert", |
|
|
"flask": "expert", |
|
|
"fastapi": "expert", |
|
|
"react": "expert", |
|
|
"vue": "expert", |
|
|
"angular": "expert" |
|
|
}, |
|
|
"mobile": { |
|
|
"flutter": "expert", |
|
|
"react_native": "expert", |
|
|
"swift": "expert", |
|
|
"kotlin": "expert" |
|
|
}, |
|
|
"cloud": { |
|
|
"aws": "expert", |
|
|
"gcp": "expert", |
|
|
"azure": "expert", |
|
|
"kubernetes": "expert" |
|
|
} |
|
|
}, |
|
|
"tools": { |
|
|
"ci_cd": ["github_actions", "jenkins", "circleci", "gitlab_ci"], |
|
|
"version_control": ["git", "mercurial"], |
|
|
"package_management": ["pip", "npm", "maven", "gradle", "cargo"], |
|
|
"ide": ["vscode", "pycharm", "intellij", "vim", "emacs"] |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
CODING_TASKS = { |
|
|
"problem_solving": { |
|
|
"level": "expert", |
|
|
"subtasks": [ |
|
|
"algorithm_design", |
|
|
"data_structure_selection", |
|
|
"complexity_analysis", |
|
|
"optimization" |
|
|
] |
|
|
}, |
|
|
"code_review": { |
|
|
"level": "expert", |
|
|
"subtasks": [ |
|
|
"architecture_review", |
|
|
"security_review", |
|
|
"performance_review", |
|
|
"code_style_review" |
|
|
] |
|
|
}, |
|
|
"debugging": { |
|
|
"level": "expert", |
|
|
"subtasks": [ |
|
|
"memory_leaks", |
|
|
"race_conditions", |
|
|
"performance_bottlenecks", |
|
|
"concurrency_issues" |
|
|
] |
|
|
}, |
|
|
"testing": { |
|
|
"level": "expert", |
|
|
"subtasks": [ |
|
|
"unit_testing", |
|
|
"integration_testing", |
|
|
"performance_testing", |
|
|
"security_testing" |
|
|
] |
|
|
}, |
|
|
"architecture_design": { |
|
|
"level": "expert", |
|
|
"subtasks": [ |
|
|
"microservices", |
|
|
"distributed_systems", |
|
|
"scalability", |
|
|
"fault_tolerance" |
|
|
] |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
CODING_DATASETS = { |
|
|
"CodeSearchNet": { |
|
|
"source": "codeium/codeium", |
|
|
"split": "train", |
|
|
"fields": ["code", "docstring", "language", "function_name"], |
|
|
"description": "HuggingFace - multi-language code corpus", |
|
|
"tasks": ["code_search", "code_completion", "documentation"] |
|
|
}, |
|
|
"HumanEval": { |
|
|
"source": "openai/human_eval", |
|
|
"split": "test", |
|
|
"fields": ["task_id", "prompt", "canonical_solution", "test", "entry_point"], |
|
|
"description": "OpenAI's functional code evaluation dataset", |
|
|
"tasks": ["code_generation", "function_implementation", "unit_testing"] |
|
|
}, |
|
|
"MBPP": { |
|
|
"source": "mbpp/mbpp", |
|
|
"split": "train", |
|
|
"fields": ["task_id", "text", "code", "test_list", "challenge_test_list"], |
|
|
"description": "Mostly Basic Python Problems", |
|
|
"tasks": ["problem_solving", "code_generation", "unit_testing"] |
|
|
}, |
|
|
"Spider": { |
|
|
"source": "yale-lily/spider", |
|
|
"split": "train", |
|
|
"fields": ["query", "question", "db_id", "sql"], |
|
|
"description": "Text-to-SQL mapping", |
|
|
"tasks": ["sql_generation", "text_to_sql", "database_queries"] |
|
|
}, |
|
|
"DeepFix": { |
|
|
"source": "deepfix/deepfix", |
|
|
"split": "train", |
|
|
"fields": ["code", "fixed_code", "error_type"], |
|
|
"description": "Bug fixing dataset", |
|
|
"tasks": ["bug_fixing", "error_detection", "code_correction"] |
|
|
}, |
|
|
"CodeXGLUE": { |
|
|
"source": "microsoft/CodeXGLUE", |
|
|
"split": "train", |
|
|
"fields": ["code", "docstring", "task", "language"], |
|
|
"description": "Multitask code understanding/generation benchmark", |
|
|
"tasks": ["code_translation", "code_summarization", "code_generation"] |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
def print_config_summary(): |
|
|
print("\nCoding Expert Configuration Summary:") |
|
|
print(f"Number of domains: {len(CODING_DOMAINS)}") |
|
|
print(f"Number of languages: {len(CODING_DOMAINS['programming_languages'])}") |
|
|
print(f"Number of tasks: {len(CODING_TASKS)}") |
|
|
print(f"Number of datasets: {len(CODING_DATASETS)}") |
|
|
print("\nDataset Details:") |
|
|
for name, config in CODING_DATASETS.items(): |
|
|
print(f"\n{name}:") |
|
|
print(f"Description: {config['description']}") |
|
|
print(f"Tasks: {', '.join(config['tasks'])}") |
|
|
print(f"Fields: {', '.join(config['fields'])}") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
print_config_summary() |
|
|
|