import sys import os import requests from typing import Dict, Optional, Tuple import base64 import json from dotenv import load_dotenv def _get_github_api( endpoint: str, headers: Dict[str, str], org: str ) -> Tuple[bool, Optional[Dict]]: """Make a GET request to GitHub API and return (success, response).""" url = f"https://api.github.com/repos/{org}/harmony/{endpoint}" try: response = requests.get(url, headers=headers) if response.status_code == 200: return True, response.json() elif response.status_code == 404: return False, None else: print(f"API error for {endpoint}: {response.status_code}", file=sys.stderr) return False, None except Exception as e: print(f"Exception for {endpoint}: {e}", file=sys.stderr) return False, None def _check_branch_exists(branch_name: str, headers: Dict[str, str], org: str) -> bool: """Verify that a branch exists in the repository.""" success, _ = _get_github_api(f"branches/{branch_name}", headers, org) return success def _get_file_content( branch: str, file_path: str, headers: Dict[str, str], org: str ) -> Optional[str]: """Get the content of a file from a specific branch.""" success, result = _get_github_api(f"contents/{file_path}?ref={branch}", headers, org) if not success or not result: return None try: content = base64.b64decode(result.get("content", "")).decode("utf-8") return content except Exception as e: print(f"Content decode error for {file_path}: {e}", file=sys.stderr) return None def _check_branch_commits_json(content: str) -> bool: """Verify BRANCH_COMMITS.json has correct structure and expected data.""" expected_data = { "pr/45-googlefan256-main": [ { "sha": "9fa3f54cf2a2501c7dcbf554d5fbdd0de619fdda", "author": "googlefan256", "message": "Update format.md", "files_changed": 1, }, { "sha": "3efbf742533a375fc148d75513597e139329578b", "author": "scott-oai", "message": "Merge pull request #29 from axion66/improve-readme-and-checks", "files_changed": 1, }, { "sha": "9d653a4c7382abc42d115014d195d9354e7ad357", "author": "scott-oai", "message": "Merge pull request #30 from Yuan-ManX/harmony-format", "files_changed": 1, }, ], "pr/25-neuralsorcerer-patch-1": [ { "sha": "c505a03e9c9a388a511b6125756097eee523742a", "author": "neuralsorcerer", "message": "fix: `meta_sep` token and add to registry", "files_changed": 1, }, { "sha": "c044bf33f7e835ca6a723ccc97848de25dba5164", "author": "neuralsorcerer", "message": "fix: `meta_sep` token in `encoding.rs`", "files_changed": 1, }, { "sha": "b255cbeb6274adbea774f26fd9590922ce8874ed", "author": "scott-oai", "message": "Merge pull request #18 from openai/dev/scl/better-ci", "files_changed": 6, }, ], "pr/41-amirhosseinghanipour-fix-race-conditions-and-offline-api": [ { "sha": "1dca6392934bf4e3c403b2ecc2104e8ff3f67f45", "author": "amirhosseinghanipour", "message": "fix race conditions and add offline tokenizer loading api", "files_changed": 8, }, { "sha": "9528c7b4a00a3307fd9685fc1328aee11c3d9c90", "author": "scott-oai", "message": "version bump", "files_changed": 2, }, { "sha": "82b3afb9eb043343f322c937262cc50405e892c3", "author": "scott-oai", "message": "Merge pull request #26 from jordan-wu-97/jordan/fix-function-call-atomic-bool", "files_changed": 6, }, ], } try: data = json.loads(content) # Check if all required branches are present for branch in expected_data.keys(): if branch not in data: print( f"Missing branch {branch} in BRANCH_COMMITS.json", file=sys.stderr ) return False # Verify the exact content matches expected data for branch, expected_commits in expected_data.items(): actual_commits = data.get(branch, []) if len(actual_commits) != 3: print( f"Branch {branch} should have exactly 3 commits, found {len(actual_commits)}", file=sys.stderr, ) return False for i, expected_commit in enumerate(expected_commits): if i >= len(actual_commits): print( f"Missing commit {i + 1} for branch {branch}", file=sys.stderr ) return False actual_commit = actual_commits[i] for field in ["sha", "author", "files_changed"]: if actual_commit.get(field) != expected_commit.get(field): print( f"Mismatch in {field} for commit {i + 1} in branch {branch}", file=sys.stderr, ) print( f"Expected: {expected_commit.get(field)}, Got: {actual_commit.get(field)}", file=sys.stderr, ) return False # For message field, use substring matching to be more flexible expected_message = expected_commit.get("message", "") actual_message = actual_commit.get("message", "") if expected_message not in actual_message: print( f"Mismatch in message for commit {i + 1} in branch {branch}", file=sys.stderr, ) print( f"Expected: {expected_message}, Got: {actual_message}", file=sys.stderr, ) return False return True except json.JSONDecodeError as e: print(f"Invalid JSON in BRANCH_COMMITS.json: {e}", file=sys.stderr) return False except Exception as e: print(f"Error checking BRANCH_COMMITS.json: {e}", file=sys.stderr) return False def _check_cross_branch_analysis(content: str) -> bool: """Verify CROSS_BRANCH_ANALYSIS.md contains required sections and data.""" # Check for required section header if "## Top Contributors" not in content: print( "Missing section '## Top Contributors' in CROSS_BRANCH_ANALYSIS.md", file=sys.stderr, ) return False # Check for required keyword if "contributors" not in content.lower(): print( "Missing keyword 'contributors' in CROSS_BRANCH_ANALYSIS.md", file=sys.stderr, ) return False # Verify the top 3 contributors with correct counts from main branch (order matters) expected_contributors = [ "scott-oai: 35 commits", "egorsmkv: 4 commits", "axion66: 2 commits", ] for contributor in expected_contributors: if contributor not in content: print( f"Missing or incorrect contributor entry: {contributor}", file=sys.stderr, ) return False return True def _check_merge_timeline(content: str) -> bool: """Verify MERGE_TIMELINE.txt has correct format and expected merge commits.""" expected_timeline = [ "2025-08-06 | Merge pull request #29 from axion66/improve-readme-and-checks | 3efbf742533a375fc148d75513597e139329578b", "2025-08-06 | Merge pull request #30 from Yuan-ManX/harmony-format | 9d653a4c7382abc42d115014d195d9354e7ad357", "2025-08-06 | Merge pull request #28 from dkqjrm/fix-typo-format-md | 161e5fe2a57c63e9f8353c4c5b8faa3c3854bb5f", "2025-08-05 | Merge pull request #26 from jordan-wu-97/jordan/fix-function-call-atomic-bool | 82b3afb9eb043343f322c937262cc50405e892c3", "2025-08-05 | Merge pull request #18 from openai/dev/scl/better-ci | b255cbeb6274adbea774f26fd9590922ce8874ed", "2025-08-05 | Merge pull request #21 from Tialo/main | 058ef3257c24fb099aac7960c10ce51c8e55d9fe", "2025-08-05 | Merge branch 'main' into dev/scl/better-ci | 6375a15ea1b0a486cbb1468964cf8f5800ff5a5c", "2025-08-05 | Merge pull request #8 from RustedBytes/main | f6179119ca894eda4124c86d408c01fdbf5281f0", "2025-08-05 | Merge branch 'main' into main | eb86106b6980790b94f5702dc510483c66027277", "2025-08-05 | Merge pull request #17 from openai/dev/scl/add-docs-to-cargo | 64bca4cf327ebeafa0bbd0345650d86e2d02142f", ] # Verify each expected timeline entry exists in the content for i, expected_line in enumerate(expected_timeline): if expected_line not in content: print(f"Missing expected timeline entry {i + 1} in MERGE_TIMELINE.txt", file=sys.stderr) print(f"Expected: {expected_line}", file=sys.stderr) return False return True def verify_task() -> bool: """Verify the multi-branch commit aggregation task.""" # Get GitHub token from environment load_dotenv(".mcp_env") github_token = os.environ.get("MCP_GITHUB_TOKEN") if not github_token: print("Error: MCP_GITHUB_TOKEN environment variable not set", file=sys.stderr) return False # Get GitHub organization from environment github_org = os.environ.get("GITHUB_EVAL_ORG") if not github_org: print("Error: GITHUB_EVAL_ORG environment variable not set", file=sys.stderr) return False headers = { "Authorization": f"Bearer {github_token}", "Accept": "application/vnd.github.v3+json", } # 1. Check if branch 'history-report-2025' exists if not _check_branch_exists("history-report-2025", headers, github_org): print("Branch 'history-report-2025' does not exist", file=sys.stderr) return False print("✓ Branch 'history-report-2025' exists") # 2. Check BRANCH_COMMITS.json content = _get_file_content("history-report-2025", "BRANCH_COMMITS.json", headers, github_org) if not content: print( "File 'BRANCH_COMMITS.json' not found in 'history-report-2025' branch", file=sys.stderr, ) return False if not _check_branch_commits_json(content): return False print("✓ BRANCH_COMMITS.json has correct structure and data") # 3. Check CROSS_BRANCH_ANALYSIS.md content = _get_file_content( "history-report-2025", "CROSS_BRANCH_ANALYSIS.md", headers, github_org ) if not content: print( "File 'CROSS_BRANCH_ANALYSIS.md' not found in 'history-report-2025' branch", file=sys.stderr, ) return False if not _check_cross_branch_analysis(content): return False print("✓ CROSS_BRANCH_ANALYSIS.md contains required sections and data") # 4. Check MERGE_TIMELINE.txt content = _get_file_content("history-report-2025", "MERGE_TIMELINE.txt", headers, github_org) if not content: print( "File 'MERGE_TIMELINE.txt' not found in 'history-report-2025' branch", file=sys.stderr, ) return False if not _check_merge_timeline(content): return False print("✓ MERGE_TIMELINE.txt has correct format and data") print("\nAll verification checks passed! ✅") return True if __name__ == "__main__": success = verify_task() sys.exit(0 if success else 1)