File size: 5,496 Bytes
950dcd2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
"""AI-powered documentation generator for DocForge."""
import json
from app.core.ai import call_ai, call_ai_json

_README_SYSTEM = """You are a senior technical writer and developer advocate.
Generate a comprehensive, beautiful GitHub README.md for the given repository.
Output ONLY the raw Markdown content — no JSON, no preamble, no code fences.
README must include: title with badges, description, features list, installation,
usage with examples, configuration, contributing guide, and license section.
Use real emoji sparingly. Make it genuinely useful, not generic."""

_README_META_SYSTEM = """You are a technical analyst. Given a repository description, return ONLY valid JSON.
Return a JSON object with EXACTLY these keys:
{"summary": "2-3 sentence plain English summary", "tech_stack": ["tech1", "tech2"], "key_features": ["feature 1", "feature 2"], "complexity": "beginner|intermediate|advanced"}
No markdown fences, no preamble."""

_ARCH_SYSTEM = """You are a software architect. Analyze the repository and write a clear architecture document.
Output ONLY raw Markdown — no JSON, no preamble, no code fences around the whole document.
Structure your response with these sections:
## Architecture Overview
(2-3 paragraphs explaining the overall design)
## Key Components
(bullet list: component name — file path — what it does)
## Data Flow
(numbered steps describing how data moves through the system)
## Mermaid Diagram
(a ```mermaid code block with a graph LR or flowchart diagram)"""

_ARCH_META_SYSTEM = """You are a technical analyst. Return ONLY valid JSON — no markdown, no preamble.
{"components": [{"name": "X", "role": "Y", "file": "path/to/file"}], "mermaid": "graph LR\\n  A --> B"}"""

_API_SYSTEM = """You are a technical writer. Extract and document all API endpoints,
functions, and classes from the code files provided.
Output ONLY raw Markdown — no JSON, no preamble.
Structure with these sections:
## API Endpoints
(table: Method | Path | Description | Returns)
## Functions
(### FunctionName signature, then description and params as a bullet list)
## Classes
(### ClassName, then description and method list)"""


def _build_context(repo_info: dict, tree: list[str], files: dict[str, str]) -> str:
    ctx = f"Repository: {repo_info['full_name']}\n"
    ctx += f"Description: {repo_info.get('description', 'No description')}\n"
    ctx += f"Primary language: {repo_info.get('language', 'Unknown')}\n"
    ctx += f"Stars: {repo_info.get('stars', 0)}  Forks: {repo_info.get('forks', 0)}\n"
    if repo_info.get("topics"):
        ctx += f"Topics: {', '.join(repo_info['topics'])}\n"
    ctx += f"\nFile tree ({len(tree)} files, showing first 30):\n"
    ctx += "\n".join(f"  {p}" for p in tree[:30])
    ctx += "\n\nKey file contents:\n"
    for path, content in list(files.items())[:5]:
        ctx += f"\n--- {path} ---\n{content[:1500]}\n"
    # Hard cap: Groq llama-3.1-8b has ~8k token context; keep prompt under ~12k chars
    return ctx[:12000]


def generate_readme(repo_info: dict, tree: list[str],
                    files: dict[str, str], api_key_row=None) -> dict:
    ctx = _build_context(repo_info, tree, files)
    # Generate README as plain text (more reliable than embedding in JSON)
    readme_text = call_ai(
        [{"role": "user", "content": f"Generate a README.md for this repository:\n\n{ctx}"}],
        system=_README_SYSTEM,
        max_tokens=2048,
        api_key_row=api_key_row,
    )
    # Generate metadata as simple JSON
    try:
        meta = call_ai_json(
            [{"role": "user", "content": f"Analyze this repository and return metadata JSON:\n{ctx[:3000]}"}],
            system=_README_META_SYSTEM,
            max_tokens=512,
            api_key_row=api_key_row,
        )
        if not isinstance(meta, dict):
            meta = {}
    except Exception:
        meta = {}
    return {
        "readme": readme_text,
        "summary": meta.get("summary", ""),
        "tech_stack": meta.get("tech_stack", []),
        "key_features": meta.get("key_features", []),
        "complexity": meta.get("complexity", "intermediate"),
    }


def generate_architecture(repo_info: dict, tree: list[str],
                           files: dict[str, str], api_key_row=None) -> dict:
    ctx = _build_context(repo_info, tree, files)
    overview_md = call_ai(
        [{"role": "user", "content": f"Write an architecture document for this repository:\n\n{ctx}"}],
        system=_ARCH_SYSTEM,
        max_tokens=2048,
        api_key_row=api_key_row,
    )
    try:
        meta = call_ai_json(
            [{"role": "user", "content": f"List the key components and a Mermaid diagram for this repo:\n{ctx[:3000]}"}],
            system=_ARCH_META_SYSTEM,
            max_tokens=1024,
            api_key_row=api_key_row,
        )
        if not isinstance(meta, dict):
            meta = {}
    except Exception:
        meta = {}
    return {
        "overview": overview_md,
        "components": meta.get("components", []),
        "mermaid": meta.get("mermaid", ""),
    }


def generate_api_docs(repo_info: dict, tree: list[str],
                      files: dict[str, str], api_key_row=None) -> dict:
    ctx = _build_context(repo_info, tree, files)
    api_md = call_ai(
        [{"role": "user", "content": f"Document the API, functions, and classes from this codebase:\n\n{ctx}"}],
        system=_API_SYSTEM,
        max_tokens=2048,
        api_key_row=api_key_row,
    )
    return {"content": api_md}