| """ |
| GitHub Task Manager for MCPMark Evaluation Pipeline |
| ==================================================== |
| |
| This module provides utilities for discovering, filtering, and managing |
| GitHub-based evaluation tasks. |
| |
| The task manager is responsible for: |
| - Task discovery and filtering |
| - Task verification and result processing |
| - Task-specific logic (NOT LLM execution) |
| """ |
|
|
| import sys |
| from dataclasses import dataclass |
| from pathlib import Path |
| from typing import Any, Dict, List, Optional |
|
|
| from src.base.task_manager import BaseTask, BaseTaskManager |
| from src.logger import get_logger |
|
|
| logger = get_logger(__name__) |
|
|
|
|
| @dataclass |
| class GitHubTask(BaseTask): |
| """Represents a single evaluation task for GitHub service.""" |
|
|
| |
| repository_url: Optional[str] = None |
| branch_name: Optional[str] = None |
| pr_number: Optional[int] = None |
| issue_number: Optional[int] = None |
| expected_actions: Optional[List[str]] = None |
|
|
| |
| task_name: str = "" |
|
|
| |
|
|
|
|
| class GitHubTaskManager(BaseTaskManager): |
| """Manages task discovery, filtering, and verification for GitHub-based MCPMark evaluation.""" |
|
|
| def __init__(self, tasks_root: Path = None, task_suite: str = "standard"): |
| """Initialize GitHub task manager. |
| |
| Args: |
| tasks_root: Path to the tasks directory |
| """ |
| if tasks_root is None: |
| tasks_root = Path(__file__).resolve().parents[3] / "tasks" |
|
|
| |
| super().__init__( |
| tasks_root, |
| mcp_service="github", |
| task_class=GitHubTask, |
| task_organization="file", |
| task_suite=task_suite, |
| ) |
|
|
| |
| |
| |
| |
|
|
| def _create_task_from_files( |
| self, category_id: str, task_files_info: Dict[str, Any] |
| ) -> Optional[GitHubTask]: |
| """Instantiate a GitHubTask from the dictionary yielded by _find_task_files.""" |
| import json |
| |
| |
| meta_path = task_files_info["instruction_path"].parent / "meta.json" |
| final_category_id = category_id |
| task_id = task_files_info["task_id"] |
| |
| if meta_path.exists(): |
| try: |
| with open(meta_path, 'r') as f: |
| meta_data = json.load(f) |
| |
| final_category_id = meta_data.get("category_id", category_id) |
| task_id = meta_data.get("task_id", task_id) |
| except Exception as e: |
| logger.warning(f"Failed to load meta.json from {meta_path}: {e}") |
|
|
| return GitHubTask( |
| task_instruction_path=task_files_info["instruction_path"], |
| task_verification_path=task_files_info["verification_path"], |
| service="github", |
| category_id=final_category_id, |
| task_id=task_id, |
| task_name=task_files_info["task_id"], |
| ) |
|
|
| def _get_verification_command(self, task: GitHubTask) -> List[str]: |
| """Get the verification command for GitHub tasks.""" |
| return [sys.executable, str(task.task_verification_path)] |
|
|
| def get_task_instruction(self, task: GitHubTask) -> str: |
| """Return task instruction prefixed with repository context. |
| |
| Adds an English prefix to every GitHub task instruction so that the |
| agent knows **exactly** which repository to operate on, following the |
| pattern requested by the user: |
| |
| Please execute the following task in my repository {owner}/{repo_name}: |
| |
| If the repository URL has not yet been injected into the ``task`` (for |
| example when the state manager has not run), we fall back to a more |
| generic prefix without owner/repo placeholder. |
| """ |
| |
| base_instruction = task.get_task_instruction() |
|
|
| |
| prefix: str |
| if task.repository_url: |
| |
| url_parts = task.repository_url.rstrip("/").replace(".git", "").split("/") |
| if len(url_parts) >= 2: |
| owner, repo_name = url_parts[-2], url_parts[-1] |
| prefix = f"Please execute the following task in my repository {owner}/{repo_name}:" |
| else: |
| prefix = "Please execute the following task:" |
| else: |
| prefix = "Please execute the following task:" |
|
|
| |
| instruction_with_prefix = f"{prefix}\n\n{base_instruction.strip()}" |
| |
| |
| return self._format_task_instruction(instruction_with_prefix) |
|
|