Spaces:
Runtime error
Runtime error
| import json | |
| import logging | |
| import os | |
| import subprocess | |
| import sys | |
| import tempfile | |
| import time | |
| from pathlib import Path | |
| from typing import Dict | |
| import requests | |
| GITHUB_USERNAME = os.getenv("GITHUB_USERNAME") | |
| GITHUB_ACCESS_TOKEN = os.getenv("GITHUB_TOKEN") | |
| GITHUB_EMAIL = os.getenv("GITHUB_EMAIL") | |
| GITHUB_ORG = os.getenv("MAI_GITHUB_ORG") | |
| MAI_TM_PUBLISH_TODO_REPO = os.environ["MAI_TMS_PUBLISH_TODO_REPO"] | |
| GITHUB_API_ENDPOINT = f"https://api.github.com/orgs/{GITHUB_ORG}/repos" | |
| DEBUG = os.getenv("DEBUG", False) | |
| quiet = "-q" if DEBUG else "" | |
| def create_github_repo(repo_path: Path, repo_name: str): | |
| logging.info("[INFO] Creating GitHub repo...") | |
| # configure git users | |
| subprocess.run(f"git config --global user.name {GITHUB_USERNAME}".split()) | |
| subprocess.run(f"git config --global user.email {GITHUB_EMAIL}".split()) | |
| # Initialize a Git repository | |
| subprocess.run(f"git init {quiet}".split(), cwd=str(repo_path)) | |
| # Commit the changes | |
| subprocess.run("git add . ".split(), cwd=str(repo_path)) | |
| subprocess.run( | |
| f"git commit {quiet} -m".split() + ["Initial commit"], cwd=str(repo_path) | |
| ) | |
| # Create a new repository on GitHub | |
| response = requests.post( | |
| GITHUB_API_ENDPOINT, | |
| json={ | |
| "name": repo_name, | |
| "private": True, | |
| }, | |
| auth=(GITHUB_USERNAME, GITHUB_ACCESS_TOKEN), | |
| ) | |
| response.raise_for_status() | |
| time.sleep(3) | |
| # Add the GitHub remote to the local Git repository and push the changes | |
| remote_url = f"https://{GITHUB_ORG}:{GITHUB_ACCESS_TOKEN}@github.com/{GITHUB_ORG}/{repo_name}.git" | |
| subprocess.run( | |
| f"git remote add origin {remote_url}", cwd=str(repo_path), shell=True | |
| ) | |
| # rename default branch to main | |
| subprocess.run("git branch -M main".split(), cwd=str(repo_path)) | |
| subprocess.run(f"git push {quiet} -u origin main".split(), cwd=str(repo_path)) | |
| return response.json()["html_url"] | |
| def convert_raw_align_to_tm(align_fn: Path, tm_path: Path): | |
| if DEBUG: | |
| logging.debug("[INFO] Conerting raw alignment to TM repo...") | |
| def load_alignment(fn: Path): | |
| content = fn.read_text() | |
| if not content: | |
| return [] | |
| for seg_pair in content.splitlines(): | |
| if not seg_pair: | |
| continue | |
| if "\t" in seg_pair: | |
| try: | |
| bo_seg, en_seg = seg_pair.split("\t", 1) | |
| except Exception as e: | |
| logging.error(f"{e} in {fn}") | |
| raise | |
| else: | |
| bo_seg = seg_pair | |
| en_seg = "\n" | |
| yield bo_seg, en_seg | |
| text_bo_fn = tm_path / f"{tm_path.name}-bo.txt" | |
| text_en_fn = tm_path / f"{tm_path.name}-en.txt" | |
| with open(text_bo_fn, "w", encoding="utf-8") as bo_file, open( | |
| text_en_fn, "w", encoding="utf-8" | |
| ) as en_file: | |
| for bo_seg, en_seg in load_alignment(align_fn): | |
| bo_file.write(bo_seg + "\n") | |
| en_file.write(en_seg + "\n") | |
| return tm_path | |
| def get_github_dev_url(raw_github_url: str) -> str: | |
| base_url = "https://github.dev" | |
| _, file_path = raw_github_url.split(".com") | |
| blob_file_path = file_path.replace("main", "blob/main") | |
| return base_url + blob_file_path | |
| def add_input_in_readme(input_dict: Dict[str, str], path: Path) -> Path: | |
| input_readme_fn = path / "README.md" | |
| text_id = input_dict["text_id"] | |
| bo_file_url = get_github_dev_url(input_dict["bo_file_url"]) | |
| en_file_url = get_github_dev_url(input_dict["en_file_url"]) | |
| input_string = "## Input\n- [BO{}]({})\n- [EN{}]({})".format( | |
| text_id, bo_file_url, text_id, en_file_url | |
| ) | |
| input_readme_fn.write_text(input_string) | |
| return path | |
| def add_to_publish_todo_repo(org, repo_name, file_path, access_token): | |
| base_url = f"https://api.github.com/repos/{org}/{repo_name}/contents/" | |
| headers = { | |
| "Authorization": f"Bearer {access_token}", | |
| "Accept": "application/vnd.github.v3+json", | |
| } | |
| url = base_url + file_path | |
| response = requests.get(url, headers=headers) | |
| if response.status_code == 200: | |
| print(f"[INFO] '{file_path}' already added.") | |
| return | |
| payload = {"message": f"Add {file_path}", "content": ""} | |
| response = requests.put(url, headers=headers, json=payload) | |
| if response.status_code == 201: | |
| print(f"[INFO] '{file_path}' added to publish todo") | |
| else: | |
| print(f"[ERROR] Failed to add '{file_path}'.") | |
| print(f"[ERROR] Response: {response.text}") | |
| def create_tm(align_fn: Path, text_pair: Dict[str, str]): | |
| align_fn = Path(align_fn) | |
| text_id = text_pair["text_id"] | |
| with tempfile.TemporaryDirectory() as tmp_dir: | |
| output_dir = Path(tmp_dir) | |
| repo_name = f"TM{text_id}" | |
| tm_path = output_dir / repo_name | |
| tm_path.mkdir(exist_ok=True, parents=True) | |
| repo_path = convert_raw_align_to_tm(align_fn, tm_path) | |
| repo_path = add_input_in_readme(text_pair, tm_path) | |
| repo_url = create_github_repo(repo_path, repo_name) | |
| logging.info(f"TM repo created: {repo_url}") | |
| add_to_publish_todo_repo(GITHUB_ORG, MAI_TM_PUBLISH_TODO_REPO, repo_name, GITHUB_ACCESS_TOKEN) | |
| return repo_url | |
| if __name__ == "__main__": | |
| align_fn = Path(sys.argv[1]) | |
| create_tm(align_fn) | |