hollywoodfrancis's picture
Upload 6 files
6061012 verified
"""
Configuration for the Coding Expert model
"""
# Core programming domains
CODING_DOMAINS = {
"programming_languages": {
"python": {
"level": "expert",
"focus": ["data structures", "algorithms", "web development", "machine learning"]
},
"javascript": {
"level": "expert",
"focus": ["frontend", "backend", "frameworks", "performance"]
},
"java": {
"level": "expert",
"focus": ["enterprise", "concurrency", "frameworks", "design patterns"]
},
"c++": {
"level": "expert",
"focus": ["systems", "performance", "templates", "memory management"]
},
"go": {
"level": "expert",
"focus": ["concurrency", "networking", "performance", "cloud"]
}
},
"frameworks": {
"web": {
"django": "expert",
"flask": "expert",
"fastapi": "expert",
"react": "expert",
"vue": "expert",
"angular": "expert"
},
"mobile": {
"flutter": "expert",
"react_native": "expert",
"swift": "expert",
"kotlin": "expert"
},
"cloud": {
"aws": "expert",
"gcp": "expert",
"azure": "expert",
"kubernetes": "expert"
}
},
"tools": {
"ci_cd": ["github_actions", "jenkins", "circleci", "gitlab_ci"],
"version_control": ["git", "mercurial"],
"package_management": ["pip", "npm", "maven", "gradle", "cargo"],
"ide": ["vscode", "pycharm", "intellij", "vim", "emacs"]
}
}
# Core coding tasks
CODING_TASKS = {
"problem_solving": {
"level": "expert",
"subtasks": [
"algorithm_design",
"data_structure_selection",
"complexity_analysis",
"optimization"
]
},
"code_review": {
"level": "expert",
"subtasks": [
"architecture_review",
"security_review",
"performance_review",
"code_style_review"
]
},
"debugging": {
"level": "expert",
"subtasks": [
"memory_leaks",
"race_conditions",
"performance_bottlenecks",
"concurrency_issues"
]
},
"testing": {
"level": "expert",
"subtasks": [
"unit_testing",
"integration_testing",
"performance_testing",
"security_testing"
]
},
"architecture_design": {
"level": "expert",
"subtasks": [
"microservices",
"distributed_systems",
"scalability",
"fault_tolerance"
]
}
}
# Core datasets
CODING_DATASETS = {
"CodeSearchNet": {
"source": "codeium/codeium",
"split": "train",
"fields": ["code", "docstring", "language", "function_name"],
"description": "HuggingFace - multi-language code corpus",
"tasks": ["code_search", "code_completion", "documentation"]
},
"HumanEval": {
"source": "openai/human_eval",
"split": "test",
"fields": ["task_id", "prompt", "canonical_solution", "test", "entry_point"],
"description": "OpenAI's functional code evaluation dataset",
"tasks": ["code_generation", "function_implementation", "unit_testing"]
},
"MBPP": {
"source": "mbpp/mbpp",
"split": "train",
"fields": ["task_id", "text", "code", "test_list", "challenge_test_list"],
"description": "Mostly Basic Python Problems",
"tasks": ["problem_solving", "code_generation", "unit_testing"]
},
"Spider": {
"source": "yale-lily/spider",
"split": "train",
"fields": ["query", "question", "db_id", "sql"],
"description": "Text-to-SQL mapping",
"tasks": ["sql_generation", "text_to_sql", "database_queries"]
},
"DeepFix": {
"source": "deepfix/deepfix",
"split": "train",
"fields": ["code", "fixed_code", "error_type"],
"description": "Bug fixing dataset",
"tasks": ["bug_fixing", "error_detection", "code_correction"]
},
"CodeXGLUE": {
"source": "microsoft/CodeXGLUE",
"split": "train",
"fields": ["code", "docstring", "task", "language"],
"description": "Multitask code understanding/generation benchmark",
"tasks": ["code_translation", "code_summarization", "code_generation"]
}
}
# Print configuration summary
def print_config_summary():
print("\nCoding Expert Configuration Summary:")
print(f"Number of domains: {len(CODING_DOMAINS)}")
print(f"Number of languages: {len(CODING_DOMAINS['programming_languages'])}")
print(f"Number of tasks: {len(CODING_TASKS)}")
print(f"Number of datasets: {len(CODING_DATASETS)}")
print("\nDataset Details:")
for name, config in CODING_DATASETS.items():
print(f"\n{name}:")
print(f"Description: {config['description']}")
print(f"Tasks: {', '.join(config['tasks'])}")
print(f"Fields: {', '.join(config['fields'])}")
if __name__ == "__main__":
print_config_summary()