Spaces:
Sleeping
Sleeping
| """AI-powered documentation generator for DocForge.""" | |
| import json | |
| from app.core.ai import call_ai, call_ai_json | |
| _README_SYSTEM = """You are a senior technical writer and developer advocate. | |
| Generate a comprehensive, beautiful GitHub README.md for the given repository. | |
| Output ONLY the raw Markdown content — no JSON, no preamble, no code fences. | |
| README must include: title with badges, description, features list, installation, | |
| usage with examples, configuration, contributing guide, and license section. | |
| Use real emoji sparingly. Make it genuinely useful, not generic.""" | |
| _README_META_SYSTEM = """You are a technical analyst. Given a repository description, return ONLY valid JSON. | |
| Return a JSON object with EXACTLY these keys: | |
| {"summary": "2-3 sentence plain English summary", "tech_stack": ["tech1", "tech2"], "key_features": ["feature 1", "feature 2"], "complexity": "beginner|intermediate|advanced"} | |
| No markdown fences, no preamble.""" | |
| _ARCH_SYSTEM = """You are a software architect. Analyze the repository and write a clear architecture document. | |
| Output ONLY raw Markdown — no JSON, no preamble, no code fences around the whole document. | |
| Structure your response with these sections: | |
| ## Architecture Overview | |
| (2-3 paragraphs explaining the overall design) | |
| ## Key Components | |
| (bullet list: component name — file path — what it does) | |
| ## Data Flow | |
| (numbered steps describing how data moves through the system) | |
| ## Mermaid Diagram | |
| (a ```mermaid code block with a graph LR or flowchart diagram)""" | |
| _ARCH_META_SYSTEM = """You are a technical analyst. Return ONLY valid JSON — no markdown, no preamble. | |
| {"components": [{"name": "X", "role": "Y", "file": "path/to/file"}], "mermaid": "graph LR\\n A --> B"}""" | |
| _API_SYSTEM = """You are a technical writer. Extract and document all API endpoints, | |
| functions, and classes from the code files provided. | |
| Output ONLY raw Markdown — no JSON, no preamble. | |
| Structure with these sections: | |
| ## API Endpoints | |
| (table: Method | Path | Description | Returns) | |
| ## Functions | |
| (### FunctionName signature, then description and params as a bullet list) | |
| ## Classes | |
| (### ClassName, then description and method list)""" | |
| def _build_context(repo_info: dict, tree: list[str], files: dict[str, str]) -> str: | |
| ctx = f"Repository: {repo_info['full_name']}\n" | |
| ctx += f"Description: {repo_info.get('description', 'No description')}\n" | |
| ctx += f"Primary language: {repo_info.get('language', 'Unknown')}\n" | |
| ctx += f"Stars: {repo_info.get('stars', 0)} Forks: {repo_info.get('forks', 0)}\n" | |
| if repo_info.get("topics"): | |
| ctx += f"Topics: {', '.join(repo_info['topics'])}\n" | |
| ctx += f"\nFile tree ({len(tree)} files, showing first 30):\n" | |
| ctx += "\n".join(f" {p}" for p in tree[:30]) | |
| ctx += "\n\nKey file contents:\n" | |
| for path, content in list(files.items())[:5]: | |
| ctx += f"\n--- {path} ---\n{content[:1500]}\n" | |
| # Hard cap: Groq llama-3.1-8b has ~8k token context; keep prompt under ~12k chars | |
| return ctx[:12000] | |
| def generate_readme(repo_info: dict, tree: list[str], | |
| files: dict[str, str], api_key_row=None) -> dict: | |
| ctx = _build_context(repo_info, tree, files) | |
| # Generate README as plain text (more reliable than embedding in JSON) | |
| readme_text = call_ai( | |
| [{"role": "user", "content": f"Generate a README.md for this repository:\n\n{ctx}"}], | |
| system=_README_SYSTEM, | |
| max_tokens=2048, | |
| api_key_row=api_key_row, | |
| ) | |
| # Generate metadata as simple JSON | |
| try: | |
| meta = call_ai_json( | |
| [{"role": "user", "content": f"Analyze this repository and return metadata JSON:\n{ctx[:3000]}"}], | |
| system=_README_META_SYSTEM, | |
| max_tokens=512, | |
| api_key_row=api_key_row, | |
| ) | |
| if not isinstance(meta, dict): | |
| meta = {} | |
| except Exception: | |
| meta = {} | |
| return { | |
| "readme": readme_text, | |
| "summary": meta.get("summary", ""), | |
| "tech_stack": meta.get("tech_stack", []), | |
| "key_features": meta.get("key_features", []), | |
| "complexity": meta.get("complexity", "intermediate"), | |
| } | |
| def generate_architecture(repo_info: dict, tree: list[str], | |
| files: dict[str, str], api_key_row=None) -> dict: | |
| ctx = _build_context(repo_info, tree, files) | |
| overview_md = call_ai( | |
| [{"role": "user", "content": f"Write an architecture document for this repository:\n\n{ctx}"}], | |
| system=_ARCH_SYSTEM, | |
| max_tokens=2048, | |
| api_key_row=api_key_row, | |
| ) | |
| try: | |
| meta = call_ai_json( | |
| [{"role": "user", "content": f"List the key components and a Mermaid diagram for this repo:\n{ctx[:3000]}"}], | |
| system=_ARCH_META_SYSTEM, | |
| max_tokens=1024, | |
| api_key_row=api_key_row, | |
| ) | |
| if not isinstance(meta, dict): | |
| meta = {} | |
| except Exception: | |
| meta = {} | |
| return { | |
| "overview": overview_md, | |
| "components": meta.get("components", []), | |
| "mermaid": meta.get("mermaid", ""), | |
| } | |
| def generate_api_docs(repo_info: dict, tree: list[str], | |
| files: dict[str, str], api_key_row=None) -> dict: | |
| ctx = _build_context(repo_info, tree, files) | |
| api_md = call_ai( | |
| [{"role": "user", "content": f"Document the API, functions, and classes from this codebase:\n\n{ctx}"}], | |
| system=_API_SYSTEM, | |
| max_tokens=2048, | |
| api_key_row=api_key_row, | |
| ) | |
| return {"content": api_md} | |