devkit / app /tools /git_narrator /narrator.py
Mohammed AL Sarraj
initial deploy
950dcd2
"""Git history narrator — turns raw commits into editorial prose."""
import os
import re
import requests
from app.core.ai import call_ai_json
_GITHUB_API = "https://api.github.com"
_SYSTEM = """You are a senior engineering writer and technical storyteller.
You transform dry git commit logs into compelling, editorial-quality engineering narratives.
Write like a thoughtful tech lead preparing a sprint retrospective for the whole company.
Be specific about what was built, avoid jargon where plain language works, and surface the human story behind the code.
Return ONLY valid JSON — no markdown fences."""
_PROMPT_TMPL = """Narrate the following git history into an editorial engineering report.
GIT HISTORY:
---
{log}
---
Return a JSON object with EXACTLY these keys:
{{
"period_label": "<inferred time period, e.g. 'Sprint 42' or 'Week of Jan 15, 2025'>",
"highlights": [
{{
"title": "<punchy title for the biggest story>",
"narrative": "<2-3 paragraph editorial story — what changed, why it matters, what comes next>",
"key_commit": "<most important commit message verbatim>",
"impact": "<one sentence business impact>"
}}
],
"tech_debt": [
{{
"icon": "<material symbol name, e.g. cleaning_services|speed|bug_report|tune>",
"title": "<short title>",
"description": "<2-3 sentences on what was fixed/refactored and the measurable benefit>"
}}
],
"milestones": [
{{
"status": "<shipped|in_progress|planned>",
"title": "<milestone name>",
"narrative": "<editorial paragraph on this milestone>",
"contributors": ["<name or handle>"]
}}
],
"commits": [
{{
"hash": "<7-char hash or empty string>",
"message": "<commit message>",
"author": "<author name or handle>",
"time": "<relative time e.g. '2 hours ago'>",
"type": "<feat|fix|docs|refactor|chore|test|perf>"
}}
],
"summary_stats": {{
"total_commits": <integer>,
"contributors": <integer>,
"features": <integer>,
"fixes": <integer>
}}
}}
Aim for 2-3 highlights, 2-3 tech_debt items, 2-3 milestones.
If the log is sparse, extrapolate intelligently from what's there.
commits should list ALL commits from the log (max 20)."""
def _fetch_github_commits(owner: str, repo: str, limit: int = 30) -> str:
"""Fetch recent commits from GitHub and format as git log text."""
token = os.environ.get("GITHUB_TOKEN", "")
headers = {"Accept": "application/vnd.github.v3+json"}
if token:
headers["Authorization"] = f"Bearer {token}"
r = requests.get(f"{_GITHUB_API}/repos/{owner}/{repo}/commits?per_page={limit}",
headers=headers, timeout=20)
r.raise_for_status()
lines = []
for c in r.json():
sha = c["sha"][:7]
msg = c["commit"]["message"].split("\n")[0]
name = c["commit"]["author"]["name"]
date = c["commit"]["author"]["date"]
lines.append(f"commit {sha}\nAuthor: {name}\nDate: {date}\n\n {msg}\n")
return "\n".join(lines)
def _parse_github_url(text: str):
"""Return (owner, repo) if text looks like a GitHub URL, else None."""
m = re.search(r"github\.com/([^/\s]+)/([^/\s]+?)(?:\.git)?(?:\s|$)", text)
if m:
return m.group(1), m.group(2)
parts = text.strip().split("/")
if len(parts) == 2 and " " not in text:
return parts[0], parts[1]
return None
def narrate(raw_input: str) -> dict:
"""Narrate a git history. raw_input can be a GitHub URL or raw git log text."""
parsed = _parse_github_url(raw_input.strip())
if parsed:
try:
owner, repo = parsed
log = _fetch_github_commits(owner, repo)
except Exception as e:
log = raw_input # fallback to treating as raw log
else:
log = raw_input
prompt = _PROMPT_TMPL.format(log=log[:8000])
return call_ai_json([{"role": "user", "content": prompt}], system=_SYSTEM) or {}