"""Git history narrator — turns raw commits into editorial prose.""" import os import re import requests from app.core.ai import call_ai_json _GITHUB_API = "https://api.github.com" _SYSTEM = """You are a senior engineering writer and technical storyteller. You transform dry git commit logs into compelling, editorial-quality engineering narratives. Write like a thoughtful tech lead preparing a sprint retrospective for the whole company. Be specific about what was built, avoid jargon where plain language works, and surface the human story behind the code. Return ONLY valid JSON — no markdown fences.""" _PROMPT_TMPL = """Narrate the following git history into an editorial engineering report. GIT HISTORY: --- {log} --- Return a JSON object with EXACTLY these keys: {{ "period_label": "", "highlights": [ {{ "title": "", "narrative": "<2-3 paragraph editorial story — what changed, why it matters, what comes next>", "key_commit": "", "impact": "" }} ], "tech_debt": [ {{ "icon": "", "title": "", "description": "<2-3 sentences on what was fixed/refactored and the measurable benefit>" }} ], "milestones": [ {{ "status": "", "title": "", "narrative": "", "contributors": [""] }} ], "commits": [ {{ "hash": "<7-char hash or empty string>", "message": "", "author": "", "time": "", "type": "" }} ], "summary_stats": {{ "total_commits": , "contributors": , "features": , "fixes": }} }} Aim for 2-3 highlights, 2-3 tech_debt items, 2-3 milestones. If the log is sparse, extrapolate intelligently from what's there. commits should list ALL commits from the log (max 20).""" def _fetch_github_commits(owner: str, repo: str, limit: int = 30) -> str: """Fetch recent commits from GitHub and format as git log text.""" token = os.environ.get("GITHUB_TOKEN", "") headers = {"Accept": "application/vnd.github.v3+json"} if token: headers["Authorization"] = f"Bearer {token}" r = requests.get(f"{_GITHUB_API}/repos/{owner}/{repo}/commits?per_page={limit}", headers=headers, timeout=20) r.raise_for_status() lines = [] for c in r.json(): sha = c["sha"][:7] msg = c["commit"]["message"].split("\n")[0] name = c["commit"]["author"]["name"] date = c["commit"]["author"]["date"] lines.append(f"commit {sha}\nAuthor: {name}\nDate: {date}\n\n {msg}\n") return "\n".join(lines) def _parse_github_url(text: str): """Return (owner, repo) if text looks like a GitHub URL, else None.""" m = re.search(r"github\.com/([^/\s]+)/([^/\s]+?)(?:\.git)?(?:\s|$)", text) if m: return m.group(1), m.group(2) parts = text.strip().split("/") if len(parts) == 2 and " " not in text: return parts[0], parts[1] return None def narrate(raw_input: str) -> dict: """Narrate a git history. raw_input can be a GitHub URL or raw git log text.""" parsed = _parse_github_url(raw_input.strip()) if parsed: try: owner, repo = parsed log = _fetch_github_commits(owner, repo) except Exception as e: log = raw_input # fallback to treating as raw log else: log = raw_input prompt = _PROMPT_TMPL.format(log=log[:8000]) return call_ai_json([{"role": "user", "content": prompt}], system=_SYSTEM) or {}