import gradio as gr
import requests
import os
import base64
from urllib.parse import urlparse
import google.generativeai as genai


# ── Helpers ────────────────────────────────────────────────────────────────────

def parse_github_url(url: str) -> tuple[str, str]:
    url = url.strip().rstrip("/")
    if "github.com" not in url:
        raise ValueError("Please enter a valid GitHub URL (e.g. https://github.com/owner/repo)")
    parts = urlparse(url).path.strip("/").split("/")
    if len(parts) < 2:
        raise ValueError("Could not extract owner/repo — make sure the URL includes both.")
    return parts[0], parts[1]


def fetch_repo_files(owner: str, repo: str, github_token: str | None = None) -> dict[str, str]:
    headers = {"Accept": "application/vnd.github+json"}
    if github_token:
        headers["Authorization"] = f"Bearer {github_token}"

    # Get recursive file tree
    tree_url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/HEAD?recursive=1"
    resp = requests.get(tree_url, headers=headers, timeout=15)

    if resp.status_code == 404:
        raise ValueError("Repository not found or is private. For private repos, add a GitHub token.")
    if resp.status_code == 403:
        raise ValueError("GitHub API rate limit exceeded. Add a GitHub Personal Access Token to continue.")
    resp.raise_for_status()

    tree = resp.json()

    SKIP_DIRS = {"node_modules", ".git", "__pycache__", "venv", "env",
                 "dist", "build", ".next", "vendor", ".venv", "coverage"}
    SKIP_EXTS = {".png", ".jpg", ".jpeg", ".gif", ".svg", ".ico", ".pdf",
                 ".zip", ".woff", ".ttf", ".eot", ".mp4", ".mp3", ".lock",
                 ".bin", ".exe", ".so", ".dylib"}

    candidates = []
    for item in tree.get("tree", []):
        if item["type"] != "blob":
            continue
        path = item["path"]
        if any(seg in SKIP_DIRS for seg in path.split("/")):
            continue
        ext = os.path.splitext(path)[1].lower()
        if ext in SKIP_EXTS:
            continue
        if item.get("size", 0) > 60_000:          # skip files > 60 KB
            continue
        candidates.append(path)

    # Prioritise: README first, then root-level, then shallow paths
    def priority(p: str):
        name = p.lower()
        if "readme" in name:  return 0
        if p.count("/") == 0: return 1
        if p.count("/") == 1: return 2
        return 3 + p.count("/")

    selected = sorted(candidates, key=priority)[:18]

    file_contents: dict[str, str] = {}
    for path in selected:
        try:
            url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}"
            r = requests.get(url, headers=headers, timeout=10)
            if r.status_code == 200:
                data = r.json()
                if data.get("encoding") == "base64":
                    raw = base64.b64decode(data["content"]).decode("utf-8", errors="replace")
                    file_contents[path] = raw[:3_500]   # cap per-file
        except Exception:
            continue

    return file_contents


# ── Analysis ────────────────────────────────────────────────────────────────────

SYSTEM_PROMPT = """You are a senior software engineer conducting a professional code review.
Be specific, constructive, and reference actual file names and patterns you observed.
Structure your response exactly as requested."""


def build_analysis_prompt(owner: str, repo: str, file_contents: dict[str, str]) -> str:
    files_block = ""
    for path, content in file_contents.items():
        files_block += f"\n\n{'─'*60}\n📄 FILE: {path}\n{'─'*60}\n{content}"

    return f"""Repository under review: github.com/{owner}/{repo}
Total files sampled: {len(file_contents)}

{files_block}

────────────────────────────────────────────────────────────
Please provide a structured analysis with the following sections:

## 🏗️ Code Quality & Structure

Evaluate:
- Overall architecture and folder/file organisation
- Naming conventions (variables, functions, classes, files)
- Function/class design — are they focused and well-sized?
- Error handling — is it present and robust?
- Code duplication or DRY violations
- Highlight 2–3 specific files as examples (good or bad)

## 📖 Documentation & README

Evaluate:
- README completeness: does it cover purpose, setup, usage, examples?
- Inline comments — are they useful or absent?
- Function/API documentation (docstrings, JSDoc, etc.)
- What's missing that a new contributor would need?

## 🏆 Scores

Rate each area out of 10:
- Code Quality & Structure: ?/10
- Documentation & README: ?/10
- Overall Repository Health: ?/10

Include one sentence justifying each score.

## ✅ Top 5 Actionable Recommendations

Numbered list. Be specific — mention file names or patterns where possible.
Order from most to least impactful.
"""


def analyze_repo(repo_url: str, github_token: str, gemini_api_key: str, progress=gr.Progress()):
    repo_url = repo_url or ""
    github_token = github_token or ""
    gemini_api_key = gemini_api_key or ""

    if not repo_url.strip():
        return "❌ **Error:** Please enter a GitHub repository URL."

    api_key = os.environ.get("GEMINI_API_KEY") or gemini_api_key.strip()
    if not api_key:
        return "❌ **Error:** Please enter your Gemini API key (free at [aistudio.google.com](https://aistudio.google.com))."

    try:
        progress(0.10, desc="Parsing repository URL…")
        owner, repo = parse_github_url(repo_url)

        progress(0.30, desc=f"Fetching files from {owner}/{repo}…")
        gh_token = github_token.strip() or None
        file_contents = fetch_repo_files(owner, repo, gh_token)

        if not file_contents:
            return "❌ **Error:** No readable source files found. The repo may be empty or contain only binary files."

        progress(0.65, desc="Running AI analysis with Gemini 2.0 Flash…")

        # Configure Gemini
        genai.configure(api_key=api_key)
        model = genai.GenerativeModel(
            model_name="gemini-2.0-flash",
            system_instruction=SYSTEM_PROMPT
        )

        response = model.generate_content(
            build_analysis_prompt(owner, repo, file_contents),
            generation_config=genai.GenerationConfig(
                max_output_tokens=2048,
                temperature=0.3,
            )
        )

        progress(1.0, desc="Done!")

        report = response.text
        header = (
            f"## 🔍 Analysis Report — `{owner}/{repo}`\n"
            f"*{len(file_contents)} files sampled · Powered by Gemini 2.0 Flash*\n\n---\n\n"
        )
        return header + report

    except ValueError as exc:
        return f"❌ **Error:** {exc}"
    except requests.exceptions.Timeout:
        return "❌ **Error:** GitHub API timed out. Try again in a moment."
    except Exception as exc:
        return f"❌ **Unexpected error:** {exc}"


# ── UI ──────────────────────────────────────────────────────────────────────────

CSS = """
#title  { text-align: center; margin-bottom: 4px; }
#sub    { text-align: center; color: #6b7280; margin-bottom: 24px; }
#footer { text-align: center; color: #9ca3af; font-size: 0.85rem; margin-top: 16px; }
"""

with gr.Blocks(title="GitHub Repo Analyzer") as demo:

    gr.Markdown("# 🔍 GitHub Repo Analyzer", elem_id="title")
    gr.Markdown(
        "AI-powered **Code Quality & Documentation** analysis — paste any public repo and get a full report in seconds.\n\n"
        "_Powered by **Gemini 2.0 Flash** — blazing fast & free._",
        elem_id="sub",
    )

    with gr.Row():
        with gr.Column(scale=3):
            repo_url_input = gr.Textbox(
                label="GitHub Repository URL",
                placeholder="https://github.com/owner/repository",
                lines=1,
            )
        with gr.Column(scale=1):
            analyze_btn = gr.Button("🔍 Analyze", variant="primary", size="lg")

    with gr.Accordion("⚙️ API Keys", open=False):
        gr.Markdown(
            "💡 _If the Space owner has set `GEMINI_API_KEY` as a HF Secret, you don't need to fill this in._\n\n"
            "Get a **free** Gemini API key at [aistudio.google.com](https://aistudio.google.com) — no credit card needed."
        )
        with gr.Row():
            gemini_key_input = gr.Textbox(
                label="Gemini API Key (free)",
                placeholder="AIza_xxxxxxxxxxxx",
                type="password",
                lines=1,
            )
            github_token_input = gr.Textbox(
                label="GitHub Token (optional — raises rate limit to 5,000 req/hr)",
                placeholder="ghp_xxxxxxxxxxxx",
                type="password",
                lines=1,
            )

    output_md = gr.Markdown(value="*Your report will appear here after analysis.*")

    analyze_btn.click(
        fn=analyze_repo,
        inputs=[repo_url_input, github_token_input, gemini_key_input],
        outputs=output_md,
    )

    gr.Examples(
        examples=[
            ["https://github.com/tiangolo/fastapi"],
            ["https://github.com/gradio-app/gradio"],
            ["https://github.com/psf/requests"],
        ],
        inputs=repo_url_input,
        label="Try an example repo",
    )

    gr.Markdown(
        "---\nBuilt with **Gemini 2.0 Flash** · [Get your free key](https://aistudio.google.com) · "
        "Made by [Worply](https://worply.tech)",
        elem_id="footer",
    )


if __name__ == "__main__":
    demo.launch(theme=gr.themes.Soft(primary_hue="violet"), css=CSS, ssr_mode=False)