Spaces:

Moealsarraj
/

devkit

Sleeping

devkit / app /tools /doc_forge /doc_generator.py

Mohammed AL Sarraj

initial deploy

950dcd2 about 1 month ago

5.5 kB

	"""AI-powered documentation generator for DocForge."""
	import json
	from app.core.ai import call_ai, call_ai_json

	_README_SYSTEM = """You are a senior technical writer and developer advocate.
	Generate a comprehensive, beautiful GitHub README.md for the given repository.
	Output ONLY the raw Markdown content — no JSON, no preamble, no code fences.
	README must include: title with badges, description, features list, installation,
	usage with examples, configuration, contributing guide, and license section.
	Use real emoji sparingly. Make it genuinely useful, not generic."""

	_README_META_SYSTEM = """You are a technical analyst. Given a repository description, return ONLY valid JSON.
	Return a JSON object with EXACTLY these keys:
	{"summary": "2-3 sentence plain English summary", "tech_stack": ["tech1", "tech2"], "key_features": ["feature 1", "feature 2"], "complexity": "beginner\|intermediate\|advanced"}
	No markdown fences, no preamble."""

	_ARCH_SYSTEM = """You are a software architect. Analyze the repository and write a clear architecture document.
	Output ONLY raw Markdown — no JSON, no preamble, no code fences around the whole document.
	Structure your response with these sections:
	## Architecture Overview
	(2-3 paragraphs explaining the overall design)
	## Key Components
	(bullet list: component name — file path — what it does)
	## Data Flow
	(numbered steps describing how data moves through the system)
	## Mermaid Diagram
	(a ```mermaid code block with a graph LR or flowchart diagram)"""

	_ARCH_META_SYSTEM = """You are a technical analyst. Return ONLY valid JSON — no markdown, no preamble.
	{"components": [{"name": "X", "role": "Y", "file": "path/to/file"}], "mermaid": "graph LR\\n A --> B"}"""

	_API_SYSTEM = """You are a technical writer. Extract and document all API endpoints,
	functions, and classes from the code files provided.
	Output ONLY raw Markdown — no JSON, no preamble.
	Structure with these sections:
	## API Endpoints
	(table: Method \| Path \| Description \| Returns)
	## Functions
	(### FunctionName signature, then description and params as a bullet list)
	## Classes
	(### ClassName, then description and method list)"""


	def _build_context(repo_info: dict, tree: list[str], files: dict[str, str]) -> str:
	ctx = f"Repository: {repo_info['full_name']}\n"
	ctx += f"Description: {repo_info.get('description', 'No description')}\n"
	ctx += f"Primary language: {repo_info.get('language', 'Unknown')}\n"
	ctx += f"Stars: {repo_info.get('stars', 0)} Forks: {repo_info.get('forks', 0)}\n"
	if repo_info.get("topics"):
	ctx += f"Topics: {', '.join(repo_info['topics'])}\n"
	ctx += f"\nFile tree ({len(tree)} files, showing first 30):\n"
	ctx += "\n".join(f" {p}" for p in tree[:30])
	ctx += "\n\nKey file contents:\n"
	for path, content in list(files.items())[:5]:
	ctx += f"\n--- {path} ---\n{content[:1500]}\n"
	# Hard cap: Groq llama-3.1-8b has ~8k token context; keep prompt under ~12k chars
	return ctx[:12000]


	def generate_readme(repo_info: dict, tree: list[str],
	files: dict[str, str], api_key_row=None) -> dict:
	ctx = _build_context(repo_info, tree, files)
	# Generate README as plain text (more reliable than embedding in JSON)
	readme_text = call_ai(
	[{"role": "user", "content": f"Generate a README.md for this repository:\n\n{ctx}"}],
	system=_README_SYSTEM,
	max_tokens=2048,
	api_key_row=api_key_row,
	)
	# Generate metadata as simple JSON
	try:
	meta = call_ai_json(
	[{"role": "user", "content": f"Analyze this repository and return metadata JSON:\n{ctx[:3000]}"}],
	system=_README_META_SYSTEM,
	max_tokens=512,
	api_key_row=api_key_row,
	)
	if not isinstance(meta, dict):
	meta = {}
	except Exception:
	meta = {}
	return {
	"readme": readme_text,
	"summary": meta.get("summary", ""),
	"tech_stack": meta.get("tech_stack", []),
	"key_features": meta.get("key_features", []),
	"complexity": meta.get("complexity", "intermediate"),
	}


	def generate_architecture(repo_info: dict, tree: list[str],
	files: dict[str, str], api_key_row=None) -> dict:
	ctx = _build_context(repo_info, tree, files)
	overview_md = call_ai(
	[{"role": "user", "content": f"Write an architecture document for this repository:\n\n{ctx}"}],
	system=_ARCH_SYSTEM,
	max_tokens=2048,
	api_key_row=api_key_row,
	)
	try:
	meta = call_ai_json(
	[{"role": "user", "content": f"List the key components and a Mermaid diagram for this repo:\n{ctx[:3000]}"}],
	system=_ARCH_META_SYSTEM,
	max_tokens=1024,
	api_key_row=api_key_row,
	)
	if not isinstance(meta, dict):
	meta = {}
	except Exception:
	meta = {}
	return {
	"overview": overview_md,
	"components": meta.get("components", []),
	"mermaid": meta.get("mermaid", ""),
	}


	def generate_api_docs(repo_info: dict, tree: list[str],
	files: dict[str, str], api_key_row=None) -> dict:
	ctx = _build_context(repo_info, tree, files)
	api_md = call_ai(
	[{"role": "user", "content": f"Document the API, functions, and classes from this codebase:\n\n{ctx}"}],
	system=_API_SYSTEM,
	max_tokens=2048,
	api_key_row=api_key_row,
	)
	return {"content": api_md}