git-chat / services /github_service.py
lakkiroy's picture
Upload folder using huggingface_hub
200bf6d verified
import os
import tempfile
import shutil
from git import Repo
from typing import List, Tuple
import hashlib
from urllib.parse import urlparse
class GitHubService:
def __init__(self, github_token: str = ""):
self.github_token = github_token
def generate_repo_id(self, repo_url: str) -> str:
"""Generate a unique ID for the repository"""
return hashlib.md5(repo_url.encode()).hexdigest()
def parse_github_url(self, url: str) -> Tuple[str, str]:
"""Extract owner and repo name from GitHub URL"""
parsed = urlparse(url)
path_parts = parsed.path.strip('/').split('/')
if len(path_parts) >= 2:
return path_parts[0], path_parts[1]
raise ValueError("Invalid GitHub URL format")
async def clone_repository(self, repo_url: str, branch: str = "main") -> str:
"""Clone repository to temporary directory"""
temp_dir = tempfile.mkdtemp()
try:
if self.github_token:
# Use token for private repos or higher rate limits
auth_url = repo_url.replace("https://", f"https://{self.github_token}@")
Repo.clone_from(auth_url, temp_dir, branch=branch, depth=1)
else:
Repo.clone_from(repo_url, temp_dir, branch=branch, depth=1)
return temp_dir
except Exception as e:
shutil.rmtree(temp_dir, ignore_errors=True)
raise Exception(f"Failed to clone repository: {str(e)}")
def cleanup_repo(self, repo_path: str):
"""Clean up cloned repository"""
if os.path.exists(repo_path):
shutil.rmtree(repo_path, ignore_errors=True)