Spaces:

Moealsarraj
/

devkit

Sleeping

devkit / app /tools /git_narrator /narrator.py

Mohammed AL Sarraj

initial deploy

950dcd2 about 1 month ago

3.98 kB

	"""Git history narrator — turns raw commits into editorial prose."""
	import os
	import re
	import requests
	from app.core.ai import call_ai_json

	_GITHUB_API = "https://api.github.com"
	_SYSTEM = """You are a senior engineering writer and technical storyteller.
	You transform dry git commit logs into compelling, editorial-quality engineering narratives.
	Write like a thoughtful tech lead preparing a sprint retrospective for the whole company.
	Be specific about what was built, avoid jargon where plain language works, and surface the human story behind the code.
	Return ONLY valid JSON — no markdown fences."""

	_PROMPT_TMPL = """Narrate the following git history into an editorial engineering report.

	GIT HISTORY:
	---
	{log}
	---

	Return a JSON object with EXACTLY these keys:
	{{
	"period_label": "<inferred time period, e.g. 'Sprint 42' or 'Week of Jan 15, 2025'>",
	"highlights": [
	{{
	"title": "<punchy title for the biggest story>",
	"narrative": "<2-3 paragraph editorial story — what changed, why it matters, what comes next>",
	"key_commit": "<most important commit message verbatim>",
	"impact": "<one sentence business impact>"
	}}
	],
	"tech_debt": [
	{{
	"icon": "<material symbol name, e.g. cleaning_services\|speed\|bug_report\|tune>",
	"title": "<short title>",
	"description": "<2-3 sentences on what was fixed/refactored and the measurable benefit>"
	}}
	],
	"milestones": [
	{{
	"status": "<shipped\|in_progress\|planned>",
	"title": "<milestone name>",
	"narrative": "<editorial paragraph on this milestone>",
	"contributors": ["<name or handle>"]
	}}
	],
	"commits": [
	{{
	"hash": "<7-char hash or empty string>",
	"message": "<commit message>",
	"author": "<author name or handle>",
	"time": "<relative time e.g. '2 hours ago'>",
	"type": "<feat\|fix\|docs\|refactor\|chore\|test\|perf>"
	}}
	],
	"summary_stats": {{
	"total_commits": <integer>,
	"contributors": <integer>,
	"features": <integer>,
	"fixes": <integer>
	}}
	}}

	Aim for 2-3 highlights, 2-3 tech_debt items, 2-3 milestones.
	If the log is sparse, extrapolate intelligently from what's there.
	commits should list ALL commits from the log (max 20)."""


	def _fetch_github_commits(owner: str, repo: str, limit: int = 30) -> str:
	"""Fetch recent commits from GitHub and format as git log text."""
	token = os.environ.get("GITHUB_TOKEN", "")
	headers = {"Accept": "application/vnd.github.v3+json"}
	if token:
	headers["Authorization"] = f"Bearer {token}"
	r = requests.get(f"{_GITHUB_API}/repos/{owner}/{repo}/commits?per_page={limit}",
	headers=headers, timeout=20)
	r.raise_for_status()
	lines = []
	for c in r.json():
	sha = c["sha"][:7]
	msg = c["commit"]["message"].split("\n")[0]
	name = c["commit"]["author"]["name"]
	date = c["commit"]["author"]["date"]
	lines.append(f"commit {sha}\nAuthor: {name}\nDate: {date}\n\n {msg}\n")
	return "\n".join(lines)


	def _parse_github_url(text: str):
	"""Return (owner, repo) if text looks like a GitHub URL, else None."""
	m = re.search(r"github\.com/([^/\s]+)/([^/\s]+?)(?:\.git)?(?:\s\|$)", text)
	if m:
	return m.group(1), m.group(2)
	parts = text.strip().split("/")
	if len(parts) == 2 and " " not in text:
	return parts[0], parts[1]
	return None


	def narrate(raw_input: str) -> dict:
	"""Narrate a git history. raw_input can be a GitHub URL or raw git log text."""
	parsed = _parse_github_url(raw_input.strip())
	if parsed:
	try:
	owner, repo = parsed
	log = _fetch_github_commits(owner, repo)
	except Exception as e:
	log = raw_input # fallback to treating as raw log
	else:
	log = raw_input

	prompt = _PROMPT_TMPL.format(log=log[:8000])
	return call_ai_json([{"role": "user", "content": prompt}], system=_SYSTEM) or {}