import os import tempfile import shutil from git import Repo from typing import List, Tuple import hashlib from urllib.parse import urlparse class GitHubService: def __init__(self, github_token: str = ""): self.github_token = github_token def generate_repo_id(self, repo_url: str) -> str: """Generate a unique ID for the repository""" return hashlib.md5(repo_url.encode()).hexdigest() def parse_github_url(self, url: str) -> Tuple[str, str]: """Extract owner and repo name from GitHub URL""" parsed = urlparse(url) path_parts = parsed.path.strip('/').split('/') if len(path_parts) >= 2: return path_parts[0], path_parts[1] raise ValueError("Invalid GitHub URL format") async def clone_repository(self, repo_url: str, branch: str = "main") -> str: """Clone repository to temporary directory""" temp_dir = tempfile.mkdtemp() try: if self.github_token: # Use token for private repos or higher rate limits auth_url = repo_url.replace("https://", f"https://{self.github_token}@") Repo.clone_from(auth_url, temp_dir, branch=branch, depth=1) else: Repo.clone_from(repo_url, temp_dir, branch=branch, depth=1) return temp_dir except Exception as e: shutil.rmtree(temp_dir, ignore_errors=True) raise Exception(f"Failed to clone repository: {str(e)}") def cleanup_repo(self, repo_path: str): """Clean up cloned repository""" if os.path.exists(repo_path): shutil.rmtree(repo_path, ignore_errors=True)