Wall06's picture
Update app.py
83ed75e verified
import gradio as gr
import requests
import os
import base64
from urllib.parse import urlparse
import google.generativeai as genai
# ── Helpers ────────────────────────────────────────────────────────────────────
def parse_github_url(url: str) -> tuple[str, str]:
url = url.strip().rstrip("/")
if "github.com" not in url:
raise ValueError("Please enter a valid GitHub URL (e.g. https://github.com/owner/repo)")
parts = urlparse(url).path.strip("/").split("/")
if len(parts) < 2:
raise ValueError("Could not extract owner/repo β€” make sure the URL includes both.")
return parts[0], parts[1]
def fetch_repo_files(owner: str, repo: str, github_token: str | None = None) -> dict[str, str]:
headers = {"Accept": "application/vnd.github+json"}
if github_token:
headers["Authorization"] = f"Bearer {github_token}"
# Get recursive file tree
tree_url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/HEAD?recursive=1"
resp = requests.get(tree_url, headers=headers, timeout=15)
if resp.status_code == 404:
raise ValueError("Repository not found or is private. For private repos, add a GitHub token.")
if resp.status_code == 403:
raise ValueError("GitHub API rate limit exceeded. Add a GitHub Personal Access Token to continue.")
resp.raise_for_status()
tree = resp.json()
SKIP_DIRS = {"node_modules", ".git", "__pycache__", "venv", "env",
"dist", "build", ".next", "vendor", ".venv", "coverage"}
SKIP_EXTS = {".png", ".jpg", ".jpeg", ".gif", ".svg", ".ico", ".pdf",
".zip", ".woff", ".ttf", ".eot", ".mp4", ".mp3", ".lock",
".bin", ".exe", ".so", ".dylib"}
candidates = []
for item in tree.get("tree", []):
if item["type"] != "blob":
continue
path = item["path"]
if any(seg in SKIP_DIRS for seg in path.split("/")):
continue
ext = os.path.splitext(path)[1].lower()
if ext in SKIP_EXTS:
continue
if item.get("size", 0) > 60_000: # skip files > 60 KB
continue
candidates.append(path)
# Prioritise: README first, then root-level, then shallow paths
def priority(p: str):
name = p.lower()
if "readme" in name: return 0
if p.count("/") == 0: return 1
if p.count("/") == 1: return 2
return 3 + p.count("/")
selected = sorted(candidates, key=priority)[:18]
file_contents: dict[str, str] = {}
for path in selected:
try:
url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}"
r = requests.get(url, headers=headers, timeout=10)
if r.status_code == 200:
data = r.json()
if data.get("encoding") == "base64":
raw = base64.b64decode(data["content"]).decode("utf-8", errors="replace")
file_contents[path] = raw[:3_500] # cap per-file
except Exception:
continue
return file_contents
# ── Analysis ────────────────────────────────────────────────────────────────────
SYSTEM_PROMPT = """You are a senior software engineer conducting a professional code review.
Be specific, constructive, and reference actual file names and patterns you observed.
Structure your response exactly as requested."""
def build_analysis_prompt(owner: str, repo: str, file_contents: dict[str, str]) -> str:
files_block = ""
for path, content in file_contents.items():
files_block += f"\n\n{'─'*60}\nπŸ“„ FILE: {path}\n{'─'*60}\n{content}"
return f"""Repository under review: github.com/{owner}/{repo}
Total files sampled: {len(file_contents)}
{files_block}
────────────────────────────────────────────────────────────
Please provide a structured analysis with the following sections:
## πŸ—οΈ Code Quality & Structure
Evaluate:
- Overall architecture and folder/file organisation
- Naming conventions (variables, functions, classes, files)
- Function/class design β€” are they focused and well-sized?
- Error handling β€” is it present and robust?
- Code duplication or DRY violations
- Highlight 2–3 specific files as examples (good or bad)
## πŸ“– Documentation & README
Evaluate:
- README completeness: does it cover purpose, setup, usage, examples?
- Inline comments β€” are they useful or absent?
- Function/API documentation (docstrings, JSDoc, etc.)
- What's missing that a new contributor would need?
## πŸ† Scores
Rate each area out of 10:
- Code Quality & Structure: ?/10
- Documentation & README: ?/10
- Overall Repository Health: ?/10
Include one sentence justifying each score.
## βœ… Top 5 Actionable Recommendations
Numbered list. Be specific β€” mention file names or patterns where possible.
Order from most to least impactful.
"""
def analyze_repo(repo_url: str, github_token: str, gemini_api_key: str, progress=gr.Progress()):
repo_url = repo_url or ""
github_token = github_token or ""
gemini_api_key = gemini_api_key or ""
if not repo_url.strip():
return "❌ **Error:** Please enter a GitHub repository URL."
api_key = os.environ.get("GEMINI_API_KEY") or gemini_api_key.strip()
if not api_key:
return "❌ **Error:** Please enter your Gemini API key (free at [aistudio.google.com](https://aistudio.google.com))."
try:
progress(0.10, desc="Parsing repository URL…")
owner, repo = parse_github_url(repo_url)
progress(0.30, desc=f"Fetching files from {owner}/{repo}…")
gh_token = github_token.strip() or None
file_contents = fetch_repo_files(owner, repo, gh_token)
if not file_contents:
return "❌ **Error:** No readable source files found. The repo may be empty or contain only binary files."
progress(0.65, desc="Running AI analysis with Gemini 2.0 Flash…")
# Configure Gemini
genai.configure(api_key=api_key)
model = genai.GenerativeModel(
model_name="gemini-2.0-flash",
system_instruction=SYSTEM_PROMPT
)
response = model.generate_content(
build_analysis_prompt(owner, repo, file_contents),
generation_config=genai.GenerationConfig(
max_output_tokens=2048,
temperature=0.3,
)
)
progress(1.0, desc="Done!")
report = response.text
header = (
f"## πŸ” Analysis Report β€” `{owner}/{repo}`\n"
f"*{len(file_contents)} files sampled Β· Powered by Gemini 2.0 Flash*\n\n---\n\n"
)
return header + report
except ValueError as exc:
return f"❌ **Error:** {exc}"
except requests.exceptions.Timeout:
return "❌ **Error:** GitHub API timed out. Try again in a moment."
except Exception as exc:
return f"❌ **Unexpected error:** {exc}"
# ── UI ──────────────────────────────────────────────────────────────────────────
CSS = """
#title { text-align: center; margin-bottom: 4px; }
#sub { text-align: center; color: #6b7280; margin-bottom: 24px; }
#footer { text-align: center; color: #9ca3af; font-size: 0.85rem; margin-top: 16px; }
"""
with gr.Blocks(title="GitHub Repo Analyzer") as demo:
gr.Markdown("# πŸ” GitHub Repo Analyzer", elem_id="title")
gr.Markdown(
"AI-powered **Code Quality & Documentation** analysis β€” paste any public repo and get a full report in seconds.\n\n"
"_Powered by **Gemini 2.0 Flash** β€” blazing fast & free._",
elem_id="sub",
)
with gr.Row():
with gr.Column(scale=3):
repo_url_input = gr.Textbox(
label="GitHub Repository URL",
placeholder="https://github.com/owner/repository",
lines=1,
)
with gr.Column(scale=1):
analyze_btn = gr.Button("πŸ” Analyze", variant="primary", size="lg")
with gr.Accordion("βš™οΈ API Keys", open=False):
gr.Markdown(
"πŸ’‘ _If the Space owner has set `GEMINI_API_KEY` as a HF Secret, you don't need to fill this in._\n\n"
"Get a **free** Gemini API key at [aistudio.google.com](https://aistudio.google.com) β€” no credit card needed."
)
with gr.Row():
gemini_key_input = gr.Textbox(
label="Gemini API Key (free)",
placeholder="AIza_xxxxxxxxxxxx",
type="password",
lines=1,
)
github_token_input = gr.Textbox(
label="GitHub Token (optional β€” raises rate limit to 5,000 req/hr)",
placeholder="ghp_xxxxxxxxxxxx",
type="password",
lines=1,
)
output_md = gr.Markdown(value="*Your report will appear here after analysis.*")
analyze_btn.click(
fn=analyze_repo,
inputs=[repo_url_input, github_token_input, gemini_key_input],
outputs=output_md,
)
gr.Examples(
examples=[
["https://github.com/tiangolo/fastapi"],
["https://github.com/gradio-app/gradio"],
["https://github.com/psf/requests"],
],
inputs=repo_url_input,
label="Try an example repo",
)
gr.Markdown(
"---\nBuilt with **Gemini 2.0 Flash** Β· [Get your free key](https://aistudio.google.com) Β· "
"Made by [Worply](https://worply.tech)",
elem_id="footer",
)
if __name__ == "__main__":
demo.launch(theme=gr.themes.Soft(primary_hue="violet"), css=CSS, ssr_mode=False)