import gradio as gr import requests import os import base64 from urllib.parse import urlparse import google.generativeai as genai # ── Helpers ──────────────────────────────────────────────────────────────────── def parse_github_url(url: str) -> tuple[str, str]: url = url.strip().rstrip("/") if "github.com" not in url: raise ValueError("Please enter a valid GitHub URL (e.g. https://github.com/owner/repo)") parts = urlparse(url).path.strip("/").split("/") if len(parts) < 2: raise ValueError("Could not extract owner/repo — make sure the URL includes both.") return parts[0], parts[1] def fetch_repo_files(owner: str, repo: str, github_token: str | None = None) -> dict[str, str]: headers = {"Accept": "application/vnd.github+json"} if github_token: headers["Authorization"] = f"Bearer {github_token}" # Get recursive file tree tree_url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/HEAD?recursive=1" resp = requests.get(tree_url, headers=headers, timeout=15) if resp.status_code == 404: raise ValueError("Repository not found or is private. For private repos, add a GitHub token.") if resp.status_code == 403: raise ValueError("GitHub API rate limit exceeded. Add a GitHub Personal Access Token to continue.") resp.raise_for_status() tree = resp.json() SKIP_DIRS = {"node_modules", ".git", "__pycache__", "venv", "env", "dist", "build", ".next", "vendor", ".venv", "coverage"} SKIP_EXTS = {".png", ".jpg", ".jpeg", ".gif", ".svg", ".ico", ".pdf", ".zip", ".woff", ".ttf", ".eot", ".mp4", ".mp3", ".lock", ".bin", ".exe", ".so", ".dylib"} candidates = [] for item in tree.get("tree", []): if item["type"] != "blob": continue path = item["path"] if any(seg in SKIP_DIRS for seg in path.split("/")): continue ext = os.path.splitext(path)[1].lower() if ext in SKIP_EXTS: continue if item.get("size", 0) > 60_000: # skip files > 60 KB continue candidates.append(path) # Prioritise: README first, then root-level, then shallow paths def priority(p: str): name = p.lower() if "readme" in name: return 0 if p.count("/") == 0: return 1 if p.count("/") == 1: return 2 return 3 + p.count("/") selected = sorted(candidates, key=priority)[:18] file_contents: dict[str, str] = {} for path in selected: try: url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}" r = requests.get(url, headers=headers, timeout=10) if r.status_code == 200: data = r.json() if data.get("encoding") == "base64": raw = base64.b64decode(data["content"]).decode("utf-8", errors="replace") file_contents[path] = raw[:3_500] # cap per-file except Exception: continue return file_contents # ── Analysis ──────────────────────────────────────────────────────────────────── SYSTEM_PROMPT = """You are a senior software engineer conducting a professional code review. Be specific, constructive, and reference actual file names and patterns you observed. Structure your response exactly as requested.""" def build_analysis_prompt(owner: str, repo: str, file_contents: dict[str, str]) -> str: files_block = "" for path, content in file_contents.items(): files_block += f"\n\n{'─'*60}\n📄 FILE: {path}\n{'─'*60}\n{content}" return f"""Repository under review: github.com/{owner}/{repo} Total files sampled: {len(file_contents)} {files_block} ──────────────────────────────────────────────────────────── Please provide a structured analysis with the following sections: ## 🏗️ Code Quality & Structure Evaluate: - Overall architecture and folder/file organisation - Naming conventions (variables, functions, classes, files) - Function/class design — are they focused and well-sized? - Error handling — is it present and robust? - Code duplication or DRY violations - Highlight 2–3 specific files as examples (good or bad) ## 📖 Documentation & README Evaluate: - README completeness: does it cover purpose, setup, usage, examples? - Inline comments — are they useful or absent? - Function/API documentation (docstrings, JSDoc, etc.) - What's missing that a new contributor would need? ## 🏆 Scores Rate each area out of 10: - Code Quality & Structure: ?/10 - Documentation & README: ?/10 - Overall Repository Health: ?/10 Include one sentence justifying each score. ## ✅ Top 5 Actionable Recommendations Numbered list. Be specific — mention file names or patterns where possible. Order from most to least impactful. """ def analyze_repo(repo_url: str, github_token: str, gemini_api_key: str, progress=gr.Progress()): repo_url = repo_url or "" github_token = github_token or "" gemini_api_key = gemini_api_key or "" if not repo_url.strip(): return "❌ **Error:** Please enter a GitHub repository URL." api_key = os.environ.get("GEMINI_API_KEY") or gemini_api_key.strip() if not api_key: return "❌ **Error:** Please enter your Gemini API key (free at [aistudio.google.com](https://aistudio.google.com))." try: progress(0.10, desc="Parsing repository URL…") owner, repo = parse_github_url(repo_url) progress(0.30, desc=f"Fetching files from {owner}/{repo}…") gh_token = github_token.strip() or None file_contents = fetch_repo_files(owner, repo, gh_token) if not file_contents: return "❌ **Error:** No readable source files found. The repo may be empty or contain only binary files." progress(0.65, desc="Running AI analysis with Gemini 2.0 Flash…") # Configure Gemini genai.configure(api_key=api_key) model = genai.GenerativeModel( model_name="gemini-2.0-flash", system_instruction=SYSTEM_PROMPT ) response = model.generate_content( build_analysis_prompt(owner, repo, file_contents), generation_config=genai.GenerationConfig( max_output_tokens=2048, temperature=0.3, ) ) progress(1.0, desc="Done!") report = response.text header = ( f"## 🔍 Analysis Report — `{owner}/{repo}`\n" f"*{len(file_contents)} files sampled · Powered by Gemini 2.0 Flash*\n\n---\n\n" ) return header + report except ValueError as exc: return f"❌ **Error:** {exc}" except requests.exceptions.Timeout: return "❌ **Error:** GitHub API timed out. Try again in a moment." except Exception as exc: return f"❌ **Unexpected error:** {exc}" # ── UI ────────────────────────────────────────────────────────────────────────── CSS = """ #title { text-align: center; margin-bottom: 4px; } #sub { text-align: center; color: #6b7280; margin-bottom: 24px; } #footer { text-align: center; color: #9ca3af; font-size: 0.85rem; margin-top: 16px; } """ with gr.Blocks(title="GitHub Repo Analyzer") as demo: gr.Markdown("# 🔍 GitHub Repo Analyzer", elem_id="title") gr.Markdown( "AI-powered **Code Quality & Documentation** analysis — paste any public repo and get a full report in seconds.\n\n" "_Powered by **Gemini 2.0 Flash** — blazing fast & free._", elem_id="sub", ) with gr.Row(): with gr.Column(scale=3): repo_url_input = gr.Textbox( label="GitHub Repository URL", placeholder="https://github.com/owner/repository", lines=1, ) with gr.Column(scale=1): analyze_btn = gr.Button("🔍 Analyze", variant="primary", size="lg") with gr.Accordion("⚙️ API Keys", open=False): gr.Markdown( "💡 _If the Space owner has set `GEMINI_API_KEY` as a HF Secret, you don't need to fill this in._\n\n" "Get a **free** Gemini API key at [aistudio.google.com](https://aistudio.google.com) — no credit card needed." ) with gr.Row(): gemini_key_input = gr.Textbox( label="Gemini API Key (free)", placeholder="AIza_xxxxxxxxxxxx", type="password", lines=1, ) github_token_input = gr.Textbox( label="GitHub Token (optional — raises rate limit to 5,000 req/hr)", placeholder="ghp_xxxxxxxxxxxx", type="password", lines=1, ) output_md = gr.Markdown(value="*Your report will appear here after analysis.*") analyze_btn.click( fn=analyze_repo, inputs=[repo_url_input, github_token_input, gemini_key_input], outputs=output_md, ) gr.Examples( examples=[ ["https://github.com/tiangolo/fastapi"], ["https://github.com/gradio-app/gradio"], ["https://github.com/psf/requests"], ], inputs=repo_url_input, label="Try an example repo", ) gr.Markdown( "---\nBuilt with **Gemini 2.0 Flash** · [Get your free key](https://aistudio.google.com) · " "Made by [Worply](https://worply.tech)", elem_id="footer", ) if __name__ == "__main__": demo.launch(theme=gr.themes.Soft(primary_hue="violet"), css=CSS, ssr_mode=False)