"""
GitHub utilities for repository operations.
"""

import os
import re
import subprocess
import tempfile
from pathlib import Path
from typing import Optional, Dict, Any, List


def clone_repository(
    repo_url: str,
    target_dir: Optional[str] = None,
    depth: int = 1,
    timeout: int = 60
) -> Optional[str]:
    """
    Clone a GitHub repository.
    
    Args:
        repo_url: GitHub repository URL
        target_dir: Target directory (optional, creates temp if None)
        depth: Clone depth (1 for shallow clone)
        timeout: Timeout in seconds
        
    Returns:
        Path to cloned repository, or None if failed
    """
    try:
        # Create target directory
        if target_dir is None:
            target_dir = tempfile.mkdtemp(prefix="reproagent_repo_")
        else:
            Path(target_dir).mkdir(parents=True, exist_ok=True)
        
        print(f"📥 Cloning {repo_url} to {target_dir}...")
        
        # Clone with git
        cmd = ['git', 'clone', '--depth', str(depth), repo_url, target_dir]
        
        result = subprocess.run(
            cmd,
            capture_output=True,
            text=True,
            timeout=timeout
        )
        
        if result.returncode == 0:
            print(f"✅ Repository cloned successfully")
            return target_dir
        else:
            print(f"❌ Clone failed: {result.stderr}")
            return None
    
    except subprocess.TimeoutExpired:
        print(f"❌ Clone timeout after {timeout}s")
        return None
    
    except Exception as e:
        print(f"❌ Clone error: {e}")
        return None


def get_repo_info(repo_path: str) -> Dict[str, Any]:
    """
    Get information about a git repository.
    
    Args:
        repo_path: Path to repository
        
    Returns:
        Dictionary with repo info
    """
    info = {
        'path': repo_path,
        'exists': False,
        'is_git_repo': False,
        'remote_url': None,
        'branch': None,
        'last_commit': None,
        'file_count': 0,
        'size_mb': 0
    }
    
    repo_dir = Path(repo_path)
    
    if not repo_dir.exists():
        return info
    
    info['exists'] = True
    
    # Check if git repo
    git_dir = repo_dir / '.git'
    if not git_dir.exists():
        return info
    
    info['is_git_repo'] = True
    
    # Get remote URL
    try:
        result = subprocess.run(
            ['git', '-C', repo_path, 'config', '--get', 'remote.origin.url'],
            capture_output=True,
            text=True,
            timeout=5
        )
        if result.returncode == 0:
            info['remote_url'] = result.stdout.strip()
    except:
        pass
    
    # Get current branch
    try:
        result = subprocess.run(
            ['git', '-C', repo_path, 'rev-parse', '--abbrev-ref', 'HEAD'],
            capture_output=True,
            text=True,
            timeout=5
        )
        if result.returncode == 0:
            info['branch'] = result.stdout.strip()
    except:
        pass
    
    # Get last commit
    try:
        result = subprocess.run(
            ['git', '-C', repo_path, 'log', '-1', '--pretty=format:%H %s'],
            capture_output=True,
            text=True,
            timeout=5
        )
        if result.returncode == 0:
            info['last_commit'] = result.stdout.strip()
    except:
        pass
    
    # Count files
    try:
        file_count = sum(1 for _ in repo_dir.rglob('*') if _.is_file())
        info['file_count'] = file_count
    except:
        pass
    
    # Calculate size
    try:
        total_size = sum(f.stat().st_size for f in repo_dir.rglob('*') if f.is_file())
        info['size_mb'] = total_size / (1024 * 1024)
    except:
        pass
    
    return info


def extract_github_urls(text: str) -> List[str]:
    """
    Extract GitHub URLs from text using regex.
    
    Args:
        text: Text to search
        
    Returns:
        List of GitHub URLs
    """
    pattern = r'https?://github\.com/[\w\-]+/[\w\-.]+'
    matches = re.findall(pattern, text)
    
    # Remove duplicates and clean
    urls = []
    for url in matches:
        # Remove trailing punctuation
        url = re.sub(r'[.,;)\]]+$', '', url)
        if url not in urls:
            urls.append(url)
    
    return urls


def parse_github_url(url: str) -> Optional[Dict[str, str]]:
    """
    Parse GitHub URL into components.
    
    Args:
        url: GitHub URL
        
    Returns:
        Dict with owner, repo, etc., or None if invalid
    """
    pattern = r'https?://github\.com/(?P<owner>[\w\-]+)/(?P<repo>[\w\-\.]+)'
    match = re.match(pattern, url)
    
    if match:
        return {
            'owner': match.group('owner'),
            'repo': match.group('repo'),
            'url': url
        }
    
    return None


def find_python_files(repo_path: str) -> List[str]:
    """
    Find all Python files in repository.
    
    Args:
        repo_path: Path to repository
        
    Returns:
        List of Python file paths (relative)
    """
    repo_dir = Path(repo_path)
    
    if not repo_dir.exists():
        return []
    
    python_files = []
    
    for py_file in repo_dir.rglob('*.py'):
        # Skip hidden directories and common non-code dirs
        parts = py_file.parts
        if any(p.startswith('.') or p in ['__pycache__', 'venv', 'env', 'build', 'dist'] for p in parts):
            continue
        
        rel_path = py_file.relative_to(repo_dir)
        python_files.append(str(rel_path))
    
    return python_files


def find_config_files(repo_path: str) -> Dict[str, Optional[str]]:
    """
    Find common configuration files.
    
    Args:
        repo_path: Path to repository
        
    Returns:
        Dict mapping config type to path
    """
    repo_dir = Path(repo_path)
    
    config_files = {
        'requirements': None,
        'setup': None,
        'pyproject': None,
        'dockerfile': None,
        'readme': None,
        'license': None
    }
    
    if not repo_dir.exists():
        return config_files
    
    # Check for each type
    if (repo_dir / 'requirements.txt').exists():
        config_files['requirements'] = 'requirements.txt'
    
    if (repo_dir / 'setup.py').exists():
        config_files['setup'] = 'setup.py'
    
    if (repo_dir / 'pyproject.toml').exists():
        config_files['pyproject'] = 'pyproject.toml'
    
    if (repo_dir / 'Dockerfile').exists():
        config_files['dockerfile'] = 'Dockerfile'
    
    # README (check multiple variants)
    for readme_name in ['README.md', 'README.rst', 'README.txt', 'README']:
        if (repo_dir / readme_name).exists():
            config_files['readme'] = readme_name
            break
    
    # LICENSE
    for license_name in ['LICENSE', 'LICENSE.md', 'LICENSE.txt']:
        if (repo_dir / license_name).exists():
            config_files['license'] = license_name
            break
    
    return config_files


# Test
if __name__ == "__main__":
    # Test URL extraction
    test_text = """
    Check out our code at https://github.com/example/awesome-repo
    Also see https://github.com/another/project.
    """
    
    urls = extract_github_urls(test_text)
    print("Found URLs:", urls)
    
    for url in urls:
        parsed = parse_github_url(url)
        print(f"Parsed: {parsed}")