diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..f348aec2ae46fd47876f833eb93896b07af6d1dc --- /dev/null +++ b/.dockerignore @@ -0,0 +1,4 @@ +__pycache__ +*.pyc +.env +*.db diff --git a/.env.example b/.env.example new file mode 100644 index 0000000000000000000000000000000000000000..72e218f9341906588d43118a0deec74ab9116e1e --- /dev/null +++ b/.env.example @@ -0,0 +1,5 @@ +GROQ_API_KEY= +CEREBRAS_API_KEY= +OPENROUTER_API_KEY= +MISTRAL_API_KEY= +SECRET_KEY=change-me diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..e6a9136fb72eb119c710d969f332e3ba32263fde --- /dev/null +++ b/Dockerfile @@ -0,0 +1,7 @@ +FROM python:3.12-slim +WORKDIR /app +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +COPY . . +EXPOSE 7860 +CMD ["gunicorn", "wsgi:app", "--bind", "0.0.0.0:7860", "--workers", "2", "--timeout", "120", "--access-logfile", "-"] diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d317a689c9373beeec3bb77f80b5de60951fc3ee --- /dev/null +++ b/app/__init__.py @@ -0,0 +1,42 @@ +"""DevKit — Flask app factory.""" +import os +from flask import Flask +from flask_wtf.csrf import CSRFProtect + +_csrf = CSRFProtect() + + +def create_app(): + app = Flask(__name__, template_folder="templates") + app.config["SECRET_KEY"] = os.environ.get("SECRET_KEY", "dev-devkit-2026") + app.config["MAX_CONTENT_LENGTH"] = 30 * 1024 * 1024 + _csrf.init_app(app) + + from app.home.routes import bp as home_bp + app.register_blueprint(home_bp) + + from app.tools.schema_detective.routes import bp as schema_detective_bp + app.register_blueprint(schema_detective_bp, url_prefix='/schema-detective') + + from app.tools.test_forge.routes import bp as test_forge_bp + app.register_blueprint(test_forge_bp, url_prefix='/test-forge') + + from app.tools.sql_whisperer.routes import bp as sql_whisperer_bp + app.register_blueprint(sql_whisperer_bp, url_prefix='/sql-whisperer') + + from app.tools.doc_forge.routes import bp as doc_forge_bp + app.register_blueprint(doc_forge_bp, url_prefix='/doc-forge') + + from app.tools.changelog_ai.routes import bp as changelog_ai_bp + app.register_blueprint(changelog_ai_bp, url_prefix='/changelog-ai') + + from app.tools.git_narrator.routes import bp as git_narrator_bp + app.register_blueprint(git_narrator_bp, url_prefix='/git-narrator') + + from flask import jsonify + @app.errorhandler(Exception) + def _handle_exc(e): + code = getattr(e, "code", 500) + return jsonify({"error": str(e)}), code + + return app \ No newline at end of file diff --git a/app/__pycache__/__init__.cpython-314.pyc b/app/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ad087cefaa777f87ecce333cf34dc42bbd1474fa Binary files /dev/null and b/app/__pycache__/__init__.cpython-314.pyc differ diff --git a/app/core/__init__.py b/app/core/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/core/__pycache__/__init__.cpython-314.pyc b/app/core/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..da8eb4b0fe1f9095c6f73af499313d5c64d9b7d5 Binary files /dev/null and b/app/core/__pycache__/__init__.cpython-314.pyc differ diff --git a/app/core/__pycache__/ai.cpython-314.pyc b/app/core/__pycache__/ai.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..286e1adbe1a028342c5e49ece06471d10e4f1356 Binary files /dev/null and b/app/core/__pycache__/ai.cpython-314.pyc differ diff --git a/app/core/ai.py b/app/core/ai.py new file mode 100644 index 0000000000000000000000000000000000000000..2b6805aa3fb7ad58c671019bc0836295ad8b33b2 --- /dev/null +++ b/app/core/ai.py @@ -0,0 +1,219 @@ +"""Multi-provider AI engine. Runtime chain: Groq -> Cerebras -> OpenRouter -> Mistral -> Ollama.""" +import json, logging, os, re, requests + +logger = logging.getLogger(__name__) +_OLLAMA_BASE = "http://localhost:11434" + +_PROVIDER_URLS = { + "groq": "https://api.groq.com/openai/v1/chat/completions", + "cerebras": "https://api.cerebras.ai/v1/chat/completions", + "openrouter": "https://openrouter.ai/api/v1/chat/completions", + "mistral": "https://api.mistral.ai/v1/chat/completions", + "openai": "https://api.openai.com/v1/chat/completions", +} +_FREE_MODELS = { + "groq": "llama-3.1-8b-instant", + "cerebras": "llama3.1-8b", + "openrouter": "google/gemma-3-12b-it:free", + "mistral": "mistral-small-latest", +} +_PREMIUM_MODELS = { + "groq": "llama-3.3-70b-versatile", + "cerebras": "qwen-3-235b-a22b-instruct-2507", + "openrouter": "google/gemma-3-27b-it:free", + "mistral": "mistral-medium-latest", + "openai": "gpt-4o-mini", +} +_CHAIN_CFG = [ + {"name": "groq", "key_env": "GROQ_API_KEY", "timeout": 30, "extra": {}}, + {"name": "cerebras", "key_env": "CEREBRAS_API_KEY", "timeout": 30, "extra": {}}, + {"name": "openrouter", "key_env": "OPENROUTER_API_KEY", "timeout": 45, + "extra": {"HTTP-Referer": "https://github.com/Moealsarraj", "X-Title": "AI Tools"}}, + {"name": "mistral", "key_env": "MISTRAL_API_KEY", "timeout": 40, "extra": {}}, +] + +# Build the runtime provider list — all providers with valid keys +_PROVIDERS = [] +for _p in _CHAIN_CFG: + _k = os.environ.get(_p["key_env"], "") + if _k: + _PROVIDERS.append({ + "name": _p["name"], + "url": _PROVIDER_URLS[_p["name"]], + "model": _FREE_MODELS[_p["name"]], + "key": _k, + "timeout": _p["timeout"], + "extra": _p["extra"], + }) + +# Ollama fallback +_OLLAMA_PROVIDER = None +try: + _r = requests.get(f"{_OLLAMA_BASE}/api/tags", timeout=3) + if _r.status_code == 200: + _installed = [m["name"] for m in _r.json().get("models", [])] + if _installed: + _OLLAMA_PROVIDER = {"name": "ollama", "model": _installed[0]} +except Exception: + pass + +_AI_AVAILABLE = bool(_PROVIDERS or _OLLAMA_PROVIDER) + +_RE_THINK = re.compile(r".*?", re.DOTALL) +_RE_OPEN = re.compile(r"^```[a-z]*\n?", re.MULTILINE) +_RE_CLOSE = re.compile(r"\n?```$", re.MULTILINE) + +def _clean(raw: str) -> str: + raw = _RE_THINK.sub("", raw).strip() + raw = _RE_OPEN.sub("", raw) + return _RE_CLOSE.sub("", raw).strip() + +def _post_openai(url, key, model, messages, max_tokens, extra_headers, timeout=60): + headers = {"Authorization": f"Bearer {key}", "Content-Type": "application/json"} + headers.update(extra_headers) + r = requests.post(url, headers=headers, + json={"model": model, "messages": messages, "max_tokens": max_tokens}, + timeout=timeout) + r.raise_for_status() + return _clean(r.json()["choices"][0]["message"]["content"]) + +def call_ai(messages: list, system: str = "", max_tokens: int = 2048, + api_key_row: dict | None = None) -> str: + if system: + messages = [{"role": "system", "content": system}] + messages + # Custom API key path (used by e.g. Wasit/Amin integrations) + if api_key_row: + provider = api_key_row.get("provider", "openai") + key = api_key_row["key"] + url = api_key_row.get("url") or _PROVIDER_URLS.get(provider, "") + model = api_key_row.get("model") or _PREMIUM_MODELS.get(provider, "gpt-4o-mini") + if not url: + raise ValueError(f"No endpoint known for provider {provider!r}") + if provider == "claude": + r = requests.post("https://api.anthropic.com/v1/messages", + headers={"x-api-key": key, "anthropic-version": "2023-06-01", + "content-type": "application/json"}, + json={"model": "claude-sonnet-4-6", "max_tokens": max_tokens, "messages": messages}, + timeout=60) + r.raise_for_status() + return _clean(r.json()["content"][0]["text"]) + return _post_openai(url, key, model, messages, max_tokens, {}) + if not _AI_AVAILABLE: + raise RuntimeError("No AI provider. Set GROQ_API_KEY or similar in .env") + # Ollama-only path + if not _PROVIDERS and _OLLAMA_PROVIDER: + r = requests.post(f"{_OLLAMA_BASE}/api/chat", + json={"model": _OLLAMA_PROVIDER["model"], "messages": messages, "stream": False}, + timeout=120) + r.raise_for_status() + return _clean(r.json()["message"]["content"]) + # Runtime chain: try each provider, fall back on 429 or transient errors + last_exc = None + for prov in _PROVIDERS: + try: + return _post_openai( + prov["url"], prov["key"], prov["model"], + messages, max_tokens, prov["extra"], prov["timeout"] + ) + except requests.exceptions.HTTPError as e: + status = e.response.status_code if e.response is not None else 0 + if status in (429, 503, 502): + logger.debug("Provider %s returned %s, trying next", prov["name"], status) + last_exc = e + continue + raise + except (requests.exceptions.ConnectionError, + requests.exceptions.Timeout) as e: + last_exc = e + continue + # Try Ollama as last resort + if _OLLAMA_PROVIDER: + r = requests.post(f"{_OLLAMA_BASE}/api/chat", + json={"model": _OLLAMA_PROVIDER["model"], "messages": messages, "stream": False}, + timeout=120) + r.raise_for_status() + return _clean(r.json()["message"]["content"]) + raise last_exc or RuntimeError("All AI providers failed or rate-limited") + +def _repair_json(text: str) -> str: + """Escape literal control characters inside JSON string values.""" + result = [] + in_str = False + esc = False + for c in text: + if esc: + result.append(c) + esc = False + continue + if c == '\\' and in_str: + result.append(c) + esc = True + continue + if c == '"': + in_str = not in_str + result.append(c) + continue + if in_str and c == '\n': + result.append('\\n') + continue + if in_str and c == '\r': + result.append('\\r') + continue + if in_str and c == '\t': + result.append('\\t') + continue + result.append(c) + return ''.join(result) + +def _extract_json(raw: str): + """Try progressively harder to extract valid JSON from raw text.""" + raw = raw.strip() + # Direct parse + try: + return json.loads(raw) + except json.JSONDecodeError: + pass + # Repair literal newlines inside strings then retry + repaired = _repair_json(raw) + try: + return json.loads(repaired) + except json.JSONDecodeError: + pass + # Find first { or [ then walk to find matching closer + for source in (repaired, raw): + for start_ch, end_ch in [('{', '}'), ('[', ']')]: + idx = source.find(start_ch) + if idx == -1: + continue + depth = 0 + in_str = False + esc = False + for i in range(idx, len(source)): + c = source[i] + if esc: + esc = False + continue + if c == '\\' and in_str: + esc = True + continue + if c == '"': + in_str = not in_str + continue + if in_str: + continue + if c == start_ch: + depth += 1 + elif c == end_ch: + depth -= 1 + if depth == 0: + candidate = source[idx:i+1] + try: + return json.loads(candidate) + except json.JSONDecodeError: + break + raise ValueError(f"AI returned non-JSON: {raw[:200]}") + +def call_ai_json(messages: list, system: str = "", max_tokens: int = 2048, + api_key_row: dict | None = None) -> dict | list: + raw = call_ai(messages, system=system, max_tokens=max_tokens, api_key_row=api_key_row) + return _extract_json(raw) diff --git a/app/core/file_reader.py b/app/core/file_reader.py new file mode 100644 index 0000000000000000000000000000000000000000..a9a4a422450a233b1cbd2a9a977a8b4db6c4d4db --- /dev/null +++ b/app/core/file_reader.py @@ -0,0 +1,99 @@ +"""File text extractor — supports .docx, .pdf, .txt. + +Reusable: copy this file to any Flask project's app/core/ directory. +Dependencies: pypdf>=4.0 (for PDF support — add to requirements.txt) +DOCX and TXT use Python built-ins only (no extra packages needed). +""" +import io +import zipfile +import xml.etree.ElementTree as ET +from pathlib import Path + +ALLOWED_EXTENSIONS = {".pdf", ".docx", ".txt"} +MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB + +_WORD_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main" + + +def extract_text(file_storage) -> str: + """Extract plain text from a Werkzeug FileStorage object. + + Supports .pdf, .docx, .txt files up to 10 MB. + Returns extracted text as a string. + Raises ValueError for unsupported types, oversized files, or parse errors. + """ + filename = file_storage.filename or "" + ext = Path(filename).suffix.lower() + + if ext not in ALLOWED_EXTENSIONS: + raise ValueError( + f"Unsupported file type '{ext or '(none)'}'. Allowed: PDF, DOCX, TXT" + ) + + data = file_storage.read() + if len(data) > MAX_FILE_SIZE: + raise ValueError("File too large (max 10 MB)") + if not data: + raise ValueError("File is empty") + + if ext == ".txt": + return data.decode("utf-8", errors="replace").strip() + if ext == ".docx": + return _read_docx(io.BytesIO(data)) + if ext == ".pdf": + return _read_pdf(io.BytesIO(data)) + + raise ValueError(f"Unhandled extension: {ext}") + + +def _read_docx(stream: io.BytesIO) -> str: + """Extract text from a .docx file using built-in zipfile + xml.etree (no deps).""" + try: + with zipfile.ZipFile(stream) as z: + with z.open("word/document.xml") as f: + tree = ET.parse(f) + except (zipfile.BadZipFile, KeyError) as exc: + raise ValueError(f"Could not read Word document: {exc}") + + root = tree.getroot() + paragraphs = [] + for para in root.iter(f"{{{_WORD_NS}}}p"): + # Collect all text runs, preserving spaces + parts = [] + for node in para.iter(): + if node.tag == f"{{{_WORD_NS}}}t" and node.text: + parts.append(node.text) + elif node.tag == f"{{{_WORD_NS}}}br": + parts.append("\n") + text = "".join(parts).strip() + if text: + paragraphs.append(text) + + text = "\n\n".join(paragraphs) + if not text.strip(): + raise ValueError("No readable text found in the Word document") + return text + + +def _read_pdf(stream: io.BytesIO) -> str: + """Extract text from a PDF using pypdf.""" + try: + from pypdf import PdfReader + except ImportError: + raise ValueError("pypdf not installed — run: pip install pypdf") + + try: + reader = PdfReader(stream) + except Exception as exc: + raise ValueError(f"Could not read PDF: {exc}") + + pages = [] + for page in reader.pages: + text = page.extract_text() or "" + if text.strip(): + pages.append(text.strip()) + + text = "\n\n".join(pages) + if not text.strip(): + raise ValueError("No readable text found in the PDF (may be image-based)") + return text diff --git a/app/home/__init__.py b/app/home/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/home/__pycache__/__init__.cpython-314.pyc b/app/home/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e07ddf4e4ba08a43da81f65f79dc4a5f6c85e96f Binary files /dev/null and b/app/home/__pycache__/__init__.cpython-314.pyc differ diff --git a/app/home/__pycache__/routes.cpython-314.pyc b/app/home/__pycache__/routes.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0486d60fb4b6d9d29c34bcc81a8fe3460fd5d4d1 Binary files /dev/null and b/app/home/__pycache__/routes.cpython-314.pyc differ diff --git a/app/home/routes.py b/app/home/routes.py new file mode 100644 index 0000000000000000000000000000000000000000..a6dd4d0b28fda514b4bd7c008e2b2f3eede68b65 --- /dev/null +++ b/app/home/routes.py @@ -0,0 +1,8 @@ +"""DevKit landing page.""" +from flask import Blueprint, render_template + +bp = Blueprint("home", __name__, template_folder="templates") + +@bp.route("/") +def index(): + return render_template("home/index.html") diff --git a/app/home/templates/home/index.html b/app/home/templates/home/index.html new file mode 100644 index 0000000000000000000000000000000000000000..2e0e3f564461ee5017efafbab5de229105c77161 --- /dev/null +++ b/app/home/templates/home/index.html @@ -0,0 +1,152 @@ +{% extends "base.html" %} +{% block title %}DevKit — Developer Toolkit{% endblock %} + +{% block content %} +
+ + + + +
+ + + + + +
+ + +
+ Workspace + No signup required · Free +
+ + +
+
+ + + + +
+{% endblock %} diff --git a/app/templates/base.html b/app/templates/base.html new file mode 100644 index 0000000000000000000000000000000000000000..513db0bdd4a3940c02f44232bdea48014f897ea3 --- /dev/null +++ b/app/templates/base.html @@ -0,0 +1,121 @@ + + + + + +{% block title %}Competitive Intel{% endblock %} + + + + + +{% block extra_head %}{% endblock %} + + + +{% block content %}{% endblock %} +{% block extra_scripts %}{% endblock %} + + diff --git a/app/tools/__init__.py b/app/tools/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/tools/__pycache__/__init__.cpython-314.pyc b/app/tools/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3e2b23424c564210411ad2dbc191e23fa88b43c4 Binary files /dev/null and b/app/tools/__pycache__/__init__.cpython-314.pyc differ diff --git a/app/tools/changelog_ai/__init__.py b/app/tools/changelog_ai/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/tools/changelog_ai/__pycache__/__init__.cpython-314.pyc b/app/tools/changelog_ai/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b8a874c7a0b08e692f26d2fc1d1d0e7023c170ca Binary files /dev/null and b/app/tools/changelog_ai/__pycache__/__init__.cpython-314.pyc differ diff --git a/app/tools/changelog_ai/__pycache__/changelog.cpython-314.pyc b/app/tools/changelog_ai/__pycache__/changelog.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..767fa056d6ce8ba27aad0b4470410f7c9acccb3a Binary files /dev/null and b/app/tools/changelog_ai/__pycache__/changelog.cpython-314.pyc differ diff --git a/app/tools/changelog_ai/__pycache__/routes.cpython-314.pyc b/app/tools/changelog_ai/__pycache__/routes.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c1127b5076a6daf2e621665a86d2b3f7382f59c2 Binary files /dev/null and b/app/tools/changelog_ai/__pycache__/routes.cpython-314.pyc differ diff --git a/app/tools/changelog_ai/changelog.py b/app/tools/changelog_ai/changelog.py new file mode 100644 index 0000000000000000000000000000000000000000..101ee487b416d10a41730edccb35893a39bc11f5 --- /dev/null +++ b/app/tools/changelog_ai/changelog.py @@ -0,0 +1,94 @@ +"""Changelog.ai — transforms raw git commits into polished release notes.""" +from app.core.ai import call_ai_json + +_LANG_NAMES = { + "en": "English", "ar": "Arabic", "fr": "French", "es": "Spanish", + "de": "German", "zh": "Chinese (Simplified)", "ja": "Japanese", + "pt": "Portuguese", "ru": "Russian", "tr": "Turkish", + "ko": "Korean", "nl": "Dutch", "it": "Italian", "pl": "Polish", +} + +_SYSTEM = """You are an expert technical writer who specializes in software release communications. +You transform raw git commit history into clear, well-structured changelogs. +You adapt your tone precisely to the target audience without over-explaining. +Return ONLY valid JSON — no markdown fences, no preamble.""" + +_AUDIENCE_NOTES = { + "Developer": ( + "Write for software engineers. Use precise technical language. " + "Reference module names, APIs, and implementation details. " + "Include all commit types: feat, fix, perf, refactor. " + "Descriptions should be concise and technically accurate. " + "Skip chore/docs commits unless they have real developer impact." + ), + "User": ( + "Write for end users with no technical background. " + "Translate technical changes into plain English benefits. " + "Focus on what users can NOW DO or what problems are SOLVED. " + "Only include feat and fix sections — no internal refactors, no chores. " + "Use active voice, 'You can now...', 'We fixed...' language." + ), + "Executive": ( + "Write for C-suite and business stakeholders. " + "Focus exclusively on business impact, risk reduction, and strategic value. " + "Be brief: one punchy sentence per item maximum. " + "Skip low-level fixes unless they had business/availability impact. " + "Frame everything in terms of outcome, not implementation." + ), +} + +_PROMPT_TMPL = """Transform the following git commits into a polished changelog. + +TARGET AUDIENCE: {audience} +AUDIENCE INSTRUCTIONS: {audience_notes} + +GIT COMMITS / RAW CHANGES: +--- +{commits} +--- + +Return a JSON object with EXACTLY these keys: +{{ + "sections": [ + {{ + "type": "", + "items": [ + {{ + "title": "", + "description": "<1-2 sentence description adapted to the target audience>" + }} + ] + }} + ] +}} + +Rules: +- sections: only include types that have actual items; omit empty sections +- type ordering: breaking first, then features, then improvements, then fixes +- breaking: only if a commit explicitly breaks backward compatibility +- improvements: performance, refactors, dependency bumps with user impact +- Filter chore/doc-only commits unless they carry meaningful impact for this audience +- Each item title must be sentence-case and not end with a period +- Minimum 1 item per section; group related commits into one item when appropriate""" + + +def generate_changelog(commits: str, audience: str, language: str = "en") -> dict: + """Generate a polished changelog from raw git commits.""" + audience_notes = _AUDIENCE_NOTES.get(audience, _AUDIENCE_NOTES["Developer"]) + lang_name = _LANG_NAMES.get(language, "English") + lang_instruction = ( + f"\n\nIMPORTANT: Write ALL text values in the JSON response in {lang_name}. " + "This includes all item titles and descriptions. " + "The JSON keys (type, sections, items, title, description) must remain in English." + ) if language != "en" else "" + prompt = _PROMPT_TMPL.format( + audience=audience, + audience_notes=audience_notes, + commits=commits[:6000], + ) + lang_instruction + result = call_ai_json( + [{"role": "user", "content": prompt}], + system=_SYSTEM, + max_tokens=2048, + ) + return result or {} diff --git a/app/tools/changelog_ai/routes.py b/app/tools/changelog_ai/routes.py new file mode 100644 index 0000000000000000000000000000000000000000..ad3ebb47c82ecf2fa73a16c7ec035d9d1b01e2e1 --- /dev/null +++ b/app/tools/changelog_ai/routes.py @@ -0,0 +1,32 @@ +"""Changelog.ai routes.""" +from flask import Blueprint, render_template, request, jsonify +from .changelog import generate_changelog + +bp = Blueprint("changelog_ai", __name__, template_folder="templates") + +SUPPORTED_AUDIENCES = {"Developer", "User", "Executive"} + + +@bp.route("/") +def index(): + return render_template("changelog_ai/index.html") + + +@bp.route("/api/generate", methods=["POST"]) +def api_generate(): + body = request.get_json(silent=True) or {} + commits = (body.get("commits") or "").strip() + audience = (body.get("audience") or "Developer").strip() + language = (body.get("language") or "en").strip() + + if not commits: + return jsonify({"error": "Paste some commits or change notes first"}), 400 + if len(commits) < 20: + return jsonify({"error": "Too short — paste at least a few commit messages"}), 400 + if audience not in SUPPORTED_AUDIENCES: + return jsonify({"error": f"Unsupported audience: {audience}"}), 400 + + result = generate_changelog(commits, audience, language=language) + if not result: + return jsonify({"error": "AI failed to generate changelog — please try again"}), 502 + return jsonify(result) diff --git a/app/tools/changelog_ai/templates/changelog_ai/index.html b/app/tools/changelog_ai/templates/changelog_ai/index.html new file mode 100644 index 0000000000000000000000000000000000000000..6be57c77a9e974ca1788186c36636444f3cfd7b9 --- /dev/null +++ b/app/tools/changelog_ai/templates/changelog_ai/index.html @@ -0,0 +1,726 @@ +{% extends "base.html" %} +{% block title %}Changelog.ai — AI Release Notes Generator{% endblock %} + +{% block content %} + +
+
+ Changelog.ai + + +
+
+ + +
+
+ +
+ + + + + +
+ + +
+
+ Source Input +

Raw Git History

+
+ + +
+
+ + +
+
+ + +
+
+ + +
+ + +
+ + +
+ +
+ +
+ + +
+
+ + +
+ + +
+
+ + + +
+
+ + +
+ auto_awesome +

Paste your commits and click Generate

+

Set version, audience, and language, then generate

+
+ + + + + + + + +
+
+
+{% endblock %} + +{% block extra_scripts %} + + +{% endblock %} diff --git a/app/tools/doc_forge/__init__.py b/app/tools/doc_forge/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/tools/doc_forge/__pycache__/__init__.cpython-314.pyc b/app/tools/doc_forge/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..22e37f5576d9c5d03b54b3f2542b92486c96e83e Binary files /dev/null and b/app/tools/doc_forge/__pycache__/__init__.cpython-314.pyc differ diff --git a/app/tools/doc_forge/__pycache__/db.cpython-314.pyc b/app/tools/doc_forge/__pycache__/db.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ea2e90c3b5bc4c04f9e06351b97fc552b6353850 Binary files /dev/null and b/app/tools/doc_forge/__pycache__/db.cpython-314.pyc differ diff --git a/app/tools/doc_forge/__pycache__/doc_generator.cpython-314.pyc b/app/tools/doc_forge/__pycache__/doc_generator.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1fbee1d4c5df7e8903e3e79a9345cc9e6ae891ea Binary files /dev/null and b/app/tools/doc_forge/__pycache__/doc_generator.cpython-314.pyc differ diff --git a/app/tools/doc_forge/__pycache__/github_fetcher.cpython-314.pyc b/app/tools/doc_forge/__pycache__/github_fetcher.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..48c1a2055af3e241d6029084e9c042959b8edeb0 Binary files /dev/null and b/app/tools/doc_forge/__pycache__/github_fetcher.cpython-314.pyc differ diff --git a/app/tools/doc_forge/__pycache__/routes.cpython-314.pyc b/app/tools/doc_forge/__pycache__/routes.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5bd252c25b45d397690ff35e5390b80f83ebb7b0 Binary files /dev/null and b/app/tools/doc_forge/__pycache__/routes.cpython-314.pyc differ diff --git a/app/tools/doc_forge/db.py b/app/tools/doc_forge/db.py new file mode 100644 index 0000000000000000000000000000000000000000..e3facb69184b5b46ff893966f8a502c083a7e6c4 --- /dev/null +++ b/app/tools/doc_forge/db.py @@ -0,0 +1,88 @@ +"""SQLite cache for generated docs.""" +import json +import sqlite3 +import os + +_SCHEMA = """ +PRAGMA journal_mode=WAL; +PRAGMA foreign_keys=ON; + +CREATE TABLE IF NOT EXISTS repos ( + id INTEGER PRIMARY KEY, + owner TEXT NOT NULL, + repo TEXT NOT NULL, + info TEXT NOT NULL, -- JSON + tree TEXT NOT NULL, -- JSON array + created_at TEXT NOT NULL DEFAULT (datetime('now')), + UNIQUE(owner, repo) +); + +CREATE TABLE IF NOT EXISTS docs ( + id INTEGER PRIMARY KEY, + repo_id INTEGER NOT NULL REFERENCES repos(id) ON DELETE CASCADE, + doc_type TEXT NOT NULL, -- readme | architecture | api + content TEXT NOT NULL, -- JSON + generated_at TEXT NOT NULL DEFAULT (datetime('now')), + UNIQUE(repo_id, doc_type) +); + +CREATE INDEX IF NOT EXISTS idx_docs_repo ON docs(repo_id); +""" + +def get_db(path: str) -> sqlite3.Connection: + con = sqlite3.connect(path) + con.row_factory = sqlite3.Row + con.executescript(_SCHEMA) + con.commit() + return con + + +def upsert_repo(db, owner: str, repo: str, info: dict, tree: list) -> int: + db.execute(""" + INSERT INTO repos (owner, repo, info, tree) + VALUES (?, ?, ?, ?) + ON CONFLICT(owner, repo) DO UPDATE SET + info=excluded.info, tree=excluded.tree, created_at=datetime('now') + """, (owner, repo, json.dumps(info), json.dumps(tree))) + db.commit() + row = db.execute("SELECT id FROM repos WHERE owner=? AND repo=?", + (owner, repo)).fetchone() + return row["id"] + + +def upsert_doc(db, repo_id: int, doc_type: str, content: dict): + db.execute(""" + INSERT INTO docs (repo_id, doc_type, content) + VALUES (?, ?, ?) + ON CONFLICT(repo_id, doc_type) DO UPDATE SET + content=excluded.content, generated_at=datetime('now') + """, (repo_id, doc_type, json.dumps(content))) + db.commit() + + +def get_docs(db, owner: str, repo: str) -> dict | None: + row = db.execute("SELECT id FROM repos WHERE owner=? AND repo=?", + (owner, repo)).fetchone() + if not row: + return None + docs = db.execute("SELECT doc_type, content FROM docs WHERE repo_id=?", + (row["id"],)).fetchall() + if not docs: + return None + result = {} + for d in docs: + result[d["doc_type"]] = json.loads(d["content"]) + return result + + +def list_recent(db, limit: int = 10) -> list: + rows = db.execute(""" + SELECT r.owner, r.repo, r.info, r.created_at, + COUNT(d.id) as doc_count + FROM repos r LEFT JOIN docs d ON d.repo_id = r.id + GROUP BY r.id ORDER BY r.created_at DESC LIMIT ? + """, (limit,)).fetchall() + return [{"owner": r["owner"], "repo": r["repo"], + "info": json.loads(r["info"]), + "created_at": r["created_at"], "doc_count": r["doc_count"]} + for r in rows] diff --git a/app/tools/doc_forge/doc_generator.py b/app/tools/doc_forge/doc_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..cac96226c414dce84adf2fe4cfd368e78d724701 --- /dev/null +++ b/app/tools/doc_forge/doc_generator.py @@ -0,0 +1,127 @@ +"""AI-powered documentation generator for DocForge.""" +import json +from app.core.ai import call_ai, call_ai_json + +_README_SYSTEM = """You are a senior technical writer and developer advocate. +Generate a comprehensive, beautiful GitHub README.md for the given repository. +Output ONLY the raw Markdown content — no JSON, no preamble, no code fences. +README must include: title with badges, description, features list, installation, +usage with examples, configuration, contributing guide, and license section. +Use real emoji sparingly. Make it genuinely useful, not generic.""" + +_README_META_SYSTEM = """You are a technical analyst. Given a repository description, return ONLY valid JSON. +Return a JSON object with EXACTLY these keys: +{"summary": "2-3 sentence plain English summary", "tech_stack": ["tech1", "tech2"], "key_features": ["feature 1", "feature 2"], "complexity": "beginner|intermediate|advanced"} +No markdown fences, no preamble.""" + +_ARCH_SYSTEM = """You are a software architect. Analyze the repository and write a clear architecture document. +Output ONLY raw Markdown — no JSON, no preamble, no code fences around the whole document. +Structure your response with these sections: +## Architecture Overview +(2-3 paragraphs explaining the overall design) +## Key Components +(bullet list: component name — file path — what it does) +## Data Flow +(numbered steps describing how data moves through the system) +## Mermaid Diagram +(a ```mermaid code block with a graph LR or flowchart diagram)""" + +_ARCH_META_SYSTEM = """You are a technical analyst. Return ONLY valid JSON — no markdown, no preamble. +{"components": [{"name": "X", "role": "Y", "file": "path/to/file"}], "mermaid": "graph LR\\n A --> B"}""" + +_API_SYSTEM = """You are a technical writer. Extract and document all API endpoints, +functions, and classes from the code files provided. +Output ONLY raw Markdown — no JSON, no preamble. +Structure with these sections: +## API Endpoints +(table: Method | Path | Description | Returns) +## Functions +(### FunctionName signature, then description and params as a bullet list) +## Classes +(### ClassName, then description and method list)""" + + +def _build_context(repo_info: dict, tree: list[str], files: dict[str, str]) -> str: + ctx = f"Repository: {repo_info['full_name']}\n" + ctx += f"Description: {repo_info.get('description', 'No description')}\n" + ctx += f"Primary language: {repo_info.get('language', 'Unknown')}\n" + ctx += f"Stars: {repo_info.get('stars', 0)} Forks: {repo_info.get('forks', 0)}\n" + if repo_info.get("topics"): + ctx += f"Topics: {', '.join(repo_info['topics'])}\n" + ctx += f"\nFile tree ({len(tree)} files, showing first 30):\n" + ctx += "\n".join(f" {p}" for p in tree[:30]) + ctx += "\n\nKey file contents:\n" + for path, content in list(files.items())[:5]: + ctx += f"\n--- {path} ---\n{content[:1500]}\n" + # Hard cap: Groq llama-3.1-8b has ~8k token context; keep prompt under ~12k chars + return ctx[:12000] + + +def generate_readme(repo_info: dict, tree: list[str], + files: dict[str, str], api_key_row=None) -> dict: + ctx = _build_context(repo_info, tree, files) + # Generate README as plain text (more reliable than embedding in JSON) + readme_text = call_ai( + [{"role": "user", "content": f"Generate a README.md for this repository:\n\n{ctx}"}], + system=_README_SYSTEM, + max_tokens=2048, + api_key_row=api_key_row, + ) + # Generate metadata as simple JSON + try: + meta = call_ai_json( + [{"role": "user", "content": f"Analyze this repository and return metadata JSON:\n{ctx[:3000]}"}], + system=_README_META_SYSTEM, + max_tokens=512, + api_key_row=api_key_row, + ) + if not isinstance(meta, dict): + meta = {} + except Exception: + meta = {} + return { + "readme": readme_text, + "summary": meta.get("summary", ""), + "tech_stack": meta.get("tech_stack", []), + "key_features": meta.get("key_features", []), + "complexity": meta.get("complexity", "intermediate"), + } + + +def generate_architecture(repo_info: dict, tree: list[str], + files: dict[str, str], api_key_row=None) -> dict: + ctx = _build_context(repo_info, tree, files) + overview_md = call_ai( + [{"role": "user", "content": f"Write an architecture document for this repository:\n\n{ctx}"}], + system=_ARCH_SYSTEM, + max_tokens=2048, + api_key_row=api_key_row, + ) + try: + meta = call_ai_json( + [{"role": "user", "content": f"List the key components and a Mermaid diagram for this repo:\n{ctx[:3000]}"}], + system=_ARCH_META_SYSTEM, + max_tokens=1024, + api_key_row=api_key_row, + ) + if not isinstance(meta, dict): + meta = {} + except Exception: + meta = {} + return { + "overview": overview_md, + "components": meta.get("components", []), + "mermaid": meta.get("mermaid", ""), + } + + +def generate_api_docs(repo_info: dict, tree: list[str], + files: dict[str, str], api_key_row=None) -> dict: + ctx = _build_context(repo_info, tree, files) + api_md = call_ai( + [{"role": "user", "content": f"Document the API, functions, and classes from this codebase:\n\n{ctx}"}], + system=_API_SYSTEM, + max_tokens=2048, + api_key_row=api_key_row, + ) + return {"content": api_md} diff --git a/app/tools/doc_forge/github_fetcher.py b/app/tools/doc_forge/github_fetcher.py new file mode 100644 index 0000000000000000000000000000000000000000..b9fcc76d26431928ed8136373b53bcfe16be2c4b --- /dev/null +++ b/app/tools/doc_forge/github_fetcher.py @@ -0,0 +1,116 @@ +"""Fetches repository structure and key file contents from GitHub API.""" +import os +import re +import requests + +_GITHUB_API = "https://api.github.com" +_SKIP_DIRS = {".git", "node_modules", "__pycache__", ".venv", "venv", + "dist", "build", ".next", ".nuxt", "coverage", "htmlcov"} +_CODE_EXTS = {".py", ".js", ".ts", ".jsx", ".tsx", ".go", ".rs", ".java", + ".rb", ".php", ".cs", ".cpp", ".c", ".h", ".swift", ".kt"} +_DOC_EXTS = {".md", ".rst", ".txt", ".yaml", ".yml", ".toml", ".json"} +_PRIORITY = ["README.md", "readme.md", "README.rst", "main.py", "app.py", + "index.js", "index.ts", "main.go", "src/main.rs", "setup.py", + "pyproject.toml", "package.json", "go.mod", "Cargo.toml"] + + +def _headers(): + token = os.environ.get("GITHUB_TOKEN", "") + h = {"Accept": "application/vnd.github.v3+json"} + if token: + h["Authorization"] = f"Bearer {token}" + return h + + +def parse_repo_url(url: str) -> tuple[str, str]: + """Return (owner, repo) from a GitHub URL or owner/repo string.""" + url = url.strip().rstrip("/") + # Match github.com/owner/repo — ignore /tree/, /blob/, /issues/ etc. + m = re.search(r"github\.com/([^/]+)/([^/?#\s]+)", url) + if m: + repo = m.group(2) + if repo.endswith(".git"): + repo = repo[:-4] + return m.group(1), repo + # Plain "owner/repo" shorthand + parts = url.split("/") + if len(parts) == 2 and parts[0] and parts[1]: + return parts[0], parts[1] + raise ValueError(f"Cannot parse GitHub URL: {url!r}") + + +def get_repo_info(owner: str, repo: str) -> dict: + r = requests.get(f"{_GITHUB_API}/repos/{owner}/{repo}", + headers=_headers(), timeout=15) + r.raise_for_status() + d = r.json() + return { + "full_name": d.get("full_name", ""), + "description": d.get("description", ""), + "language": d.get("language", ""), + "stars": d.get("stargazers_count", 0), + "forks": d.get("forks_count", 0), + "topics": d.get("topics", []), + "default_branch": d.get("default_branch", "main"), + "url": d.get("html_url", ""), + } + + +def get_file_tree(owner: str, repo: str, branch: str = "main", + max_files: int = 150) -> list[str]: + """Return flat list of file paths, priority files first.""" + r = requests.get( + f"{_GITHUB_API}/repos/{owner}/{repo}/git/trees/{branch}?recursive=1", + headers=_headers(), timeout=20) + if r.status_code == 404: + # Try main vs master + alt = "master" if branch == "main" else "main" + r = requests.get( + f"{_GITHUB_API}/repos/{owner}/{repo}/git/trees/{alt}?recursive=1", + headers=_headers(), timeout=20) + r.raise_for_status() + blobs = [item["path"] for item in r.json().get("tree", []) + if item["type"] == "blob" + and not any(seg in _SKIP_DIRS for seg in item["path"].split("/"))] + + # Sort: priority first, then code, then docs, then rest + def rank(p): + name = p.split("/")[-1] + if p in _PRIORITY or name in _PRIORITY: + return 0 + ext = os.path.splitext(p)[1].lower() + if ext in _CODE_EXTS: + return 1 + if ext in _DOC_EXTS: + return 2 + return 3 + + return sorted(blobs, key=rank)[:max_files] + + +def fetch_file(owner: str, repo: str, path: str, branch: str = "main") -> str: + """Fetch raw content of a single file (max 50KB).""" + r = requests.get( + f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{path}", + headers=_headers(), timeout=15) + if r.status_code != 200: + return "" + text = r.text + return text[:50_000] # cap at 50KB per file + + +def fetch_key_files(owner: str, repo: str, tree: list[str], + branch: str = "main", max_chars: int = 60_000) -> dict[str, str]: + """Fetch the most important files up to max_chars total.""" + results: dict[str, str] = {} + total = 0 + # Always try priority files first + ordered = [p for p in _PRIORITY if p in tree] + [p for p in tree if p not in _PRIORITY] + for path in ordered: + if total >= max_chars: + break + content = fetch_file(owner, repo, path, branch) + if content: + results[path] = content + total += len(content) + return results diff --git a/app/tools/doc_forge/routes.py b/app/tools/doc_forge/routes.py new file mode 100644 index 0000000000000000000000000000000000000000..e6b1b63f628f34b7d0f1d7e48e10deca2f2ff718 --- /dev/null +++ b/app/tools/doc_forge/routes.py @@ -0,0 +1,121 @@ +"""DocForge routes — generate full documentation for any GitHub repo.""" +import json +import os +from flask import (Blueprint, render_template, request, jsonify, + current_app, Response) +from .github_fetcher import (parse_repo_url, get_repo_info, + get_file_tree, fetch_key_files) +from .doc_generator import (generate_readme, generate_architecture, + generate_api_docs) +from .db import get_db, upsert_repo, upsert_doc, get_docs, list_recent + +bp = Blueprint("doc_forge", __name__, template_folder="templates") + + +def _db(): + db_path = os.path.join(os.path.dirname(current_app.root_path), "docforge.db") + return get_db(db_path) + + +# ── Pages ────────────────────────────────────────────────────────────────────── + +@bp.route("/") +def index(): + db = _db() + recent = list_recent(db) + return render_template("doc_forge/index.html", recent=recent) + + +@bp.route("/docs//") +def docs_view(owner, repo): + db = _db() + data = get_docs(db, owner, repo) + if not data: + return render_template("doc_forge/index.html", recent=list_recent(db), + error=f"No docs found for {owner}/{repo}. Generate them first.") + return render_template("doc_forge/docs.html", owner=owner, repo=repo, data=data) + + +# ── API ──────────────────────────────────────────────────────────────────────── + +@bp.route("/api/analyze", methods=["POST"]) +def analyze(): + """Step 1: fetch repo metadata + file tree. Returns info without generating docs.""" + body = request.get_json(silent=True) or {} + url = (body.get("url") or "").strip() + if not url: + return jsonify({"error": "repo_url is required"}), 400 + try: + owner, repo = parse_repo_url(url) + info = get_repo_info(owner, repo) + tree = get_file_tree(owner, repo, info["default_branch"]) + return jsonify({ + "owner": owner, "repo": repo, + "info": info, "file_count": len(tree), + "tree_preview": tree[:20], + }) + except Exception as exc: + return jsonify({"error": str(exc)}), 400 + + +@bp.route("/api/generate", methods=["POST"]) +def generate(): + """Step 2: generate all docs for a repo.""" + body = request.get_json(silent=True) or {} + url = (body.get("url") or "").strip() + types = body.get("types", ["readme", "architecture", "api"]) + if not url: + return jsonify({"error": "url is required"}), 400 + try: + owner, repo = parse_repo_url(url) + info = get_repo_info(owner, repo) + tree = get_file_tree(owner, repo, info["default_branch"]) + files = fetch_key_files(owner, repo, tree, info["default_branch"]) + db = _db() + repo_id = upsert_repo(db, owner, repo, info, tree) + results = {} + + if "readme" in types: + readme = generate_readme(info, tree, files) + upsert_doc(db, repo_id, "readme", readme) + results["readme"] = readme + + if "architecture" in types: + arch = generate_architecture(info, tree, files) + upsert_doc(db, repo_id, "architecture", arch) + results["architecture"] = arch + + if "api" in types: + api_docs = generate_api_docs(info, tree, files) + upsert_doc(db, repo_id, "api", api_docs) + results["api"] = api_docs + + return jsonify({"owner": owner, "repo": repo, "info": info, "docs": results}) + except Exception as exc: + return jsonify({"error": str(exc)}), 500 + + +@bp.route("/api/download///") +def download(owner, repo, doc_type): + """Download a generated doc as a file.""" + db = _db() + data = get_docs(db, owner, repo) + if not data or doc_type not in data: + return jsonify({"error": "not found"}), 404 + content = data[doc_type] + if doc_type == "readme": + text = content.get("readme", "") + filename = "README.md" + mime = "text/markdown" + else: + text = json.dumps(content, indent=2, ensure_ascii=False) + filename = f"{doc_type}.json" + mime = "application/json" + return Response(text, mimetype=mime, + headers={"Content-Disposition": f'attachment; filename="{filename}"'}) + + +@bp.route("/api/recent") +def recent(): + db = _db() + return jsonify(list_recent(db)) diff --git a/app/tools/doc_forge/templates/doc_forge/docs.html b/app/tools/doc_forge/templates/doc_forge/docs.html new file mode 100644 index 0000000000000000000000000000000000000000..c955f1366df4349aaa26d6ec42753ed487db45f5 --- /dev/null +++ b/app/tools/doc_forge/templates/doc_forge/docs.html @@ -0,0 +1,417 @@ +{% extends "base.html" %} + +{% block title %}{{ owner }}/{{ repo }} — DocForge{% endblock %} + +{% block extra_head %} + + + + +{% endblock %} + +{% block content %} +
+ + {# ── Sidebar ───────────────────────────────────────────────────────────────── #} + + + {# ── Main Content ──────────────────────────────────────────────────────────── #} +
+
+ + {# Breadcrumb #} + + + {# Page header #} +
+
+

{{ owner }}/{{ repo }}

+ {% if data.readme and data.readme.summary %} +

{{ data.readme.summary }}

+ {% endif %} +
+ {% if data.readme and data.readme.tech_stack %} + {% for tech in data.readme.tech_stack[:6] %} + {{ tech }} + {% endfor %} + {% endif %} + {% if data.readme and data.readme.complexity %} + {{ data.readme.complexity }} + {% endif %} +
+
+
+ + +
+
+ + {# Tabs #} +
+ {% if data.readme %} + + {% endif %} + {% if data.architecture %} + + {% endif %} + {% if data.api %} + + {% endif %} +
+ + {# ── README Panel ──────────────────────────────────────────────────────── #} + {% if data.readme %} +
+
+
+
+ {% if data.readme.key_features %} +
+ {% for feat in data.readme.key_features[:4] %} +
+ check_circle +

{{ feat | e }}

+
+ {% endfor %} +
+ {% endif %} +
+ {% endif %} + + {# ── Architecture Panel ────────────────────────────────────────────────── #} + {% if data.architecture %} +
+ {% if data.architecture.overview %} +
+

+ + Overview +

+
+ +
+ {% endif %} + + {% if data.architecture.mermaid %} +
+

+ + System Diagram +

+
{{ data.architecture.mermaid | e }}
+
+ {% endif %} + + {% if data.architecture.components %} +
+

+ + Components +

+
+ {% for comp in data.architecture.components %} +
+

+ {{ (comp.name if comp is mapping else comp) | e }} +

+ {% if comp is mapping and comp.description %} +

{{ comp.description | e }}

+ {% endif %} + {% if comp is mapping and comp.responsibilities %} +
    + {% for r in comp.responsibilities %} +
  • + arrow_right + {{ r | e }} +
  • + {% endfor %} +
+ {% endif %} +
+ {% endfor %} +
+
+ {% endif %} + + {% if data.architecture.data_flow %} +
+

+ + Data Flow +

+

{{ data.architecture.data_flow | e }}

+
+ {% endif %} +
+ {% endif %} + + {# ── API Panel ────────────────────────────────────────────────────────── #} + {% if data.api %} +
+ + {% if data.api.endpoints %} +
+

+ + Endpoints +

+
+ + + + + + + + + + {% for ep in data.api.endpoints %} + {% set method = ep.method if ep is mapping else 'GET' %} + + + + + + {% endfor %} + +
MethodPathDescription
+ + {{ method | e }} + + + {{ (ep.path if ep is mapping else ep) | e }} + + {{ (ep.description if ep is mapping else '') | e }} +
+
+
+ {% endif %} + + {% if data.api.functions %} +
+

+ + Functions +

+
+ {% for fn in data.api.functions %} +
+

+ {{ (fn.name if fn is mapping else fn) | e }}{% if fn is mapping and fn.signature %}({{ fn.signature | e }}){% endif %} +

+ {% if fn is mapping and fn.description %} +

{{ fn.description | e }}

+ {% endif %} + {% if fn is mapping and fn.returns %} +

→ {{ fn.returns | e }}

+ {% endif %} +
+ {% endfor %} +
+
+ {% endif %} + + {% if data.api.classes %} +
+

+ + Classes +

+
+ {% for cls in data.api.classes %} +
+

+ class {{ (cls.name if cls is mapping else cls) | e }} +

+ {% if cls is mapping and cls.description %} +

{{ cls.description | e }}

+ {% endif %} +
+ {% endfor %} +
+
+ {% endif %} + + {% if not data.api.endpoints and not data.api.functions and not data.api.classes %} + {% if data.api.content %} +
+
+
+ + {% else %} +
+ api +

No API elements detected in this repository.

+
+ {% endif %} + {% endif %} +
+ {% endif %} + + +
+
+
+{% endblock %} + +{% block extra_scripts %} + +{% endblock %} diff --git a/app/tools/doc_forge/templates/doc_forge/index.html b/app/tools/doc_forge/templates/doc_forge/index.html new file mode 100644 index 0000000000000000000000000000000000000000..183b93bb81376411695d04697e948335f7c3715f --- /dev/null +++ b/app/tools/doc_forge/templates/doc_forge/index.html @@ -0,0 +1,430 @@ +{% extends "base.html" %} + +{% block title %}DocForge — AI Documentation Engine{% endblock %} + +{% block extra_head %} + +{% endblock %} + +{% block content %} +
+ + {# ── Error Banner ──────────────────────────────────────────────────────────── #} + {% if error %} +
+
+ error + {{ error }} +
+
+ {% endif %} + + {# ── Hero ──────────────────────────────────────────────────────────────────── #} +
+
+ auto_awesome + AI-POWERED DOCUMENTATION ENGINE +
+

+ Document any GitHub repo + instantly. +

+

+ Transform complex codebases into human-readable READMEs, architecture maps, and API references in seconds. Just paste a URL. +

+ + {# ── URL Input ─────────────────────────────────────────────────────────────── #} +
+
+ link + +
+ +
+
+
check_circle No Login Required
+
check_circle Instant Results
+
check_circle Free & Open Source
+
+ + {# ── Repo Preview Card (hidden until analyze) ──────────────────────────────── #} + + + {# ── Progress indicator (hidden until generating) ─────────────────────────── #} + +
+ + {# ── Bento: Features ──────────────────────────────────────────────────────── #} +
+
+ + {# Main card #} +
+
+
+ psychology +
+

Understand any codebase in seconds.

+

+ DocForge parses file trees, function relationships, and logic flows to explain + why code works — not just what it does. +

+
+
+ token + Multi-provider AI backbone +
+
+
+
+
+
+
+
PARSING TREE... [DONE]
+
MAPPING DEPS... [DONE]
+
GENERATING DOCS... [OK]
+
+
+ + {# Architecture card #} +
+
+
+ account_tree +
+

Visual Architecture Maps.

+

+ Auto-generated Mermaid diagrams show how data flows through the system. +

+
+
+ schema + Mermaid · Flowchart · ER +
+
+ + {# Export card #} +
+
+
+ data_object +
+

Export Ready.

+

+ Download README.md, architecture.json, or api.json directly to your project. +

+
+
+
+
+
+ MD · JSON +
+
+
+
+ + {# ── Feature Spotlight ────────────────────────────────────────────────────── #} +
+
+
+

The complete technical ledger for any project.

+
+
+
+ description +
+
+
AI-Written README
+

A complete, professional README with setup instructions, features, and badges — generated from your code.

+
+
+
+
+ account_tree +
+
+
Architecture Blueprint
+

Component maps, data flow diagrams, and a Mermaid chart you can drop straight into documentation.

+
+
+
+
+ api +
+
+
API Reference
+

Auto-detected endpoints, functions, and classes with parameters and return types documented.

+
+
+
+
+ + {# Mock doc preview #} +
+
+
+
+
+
+
+
+ docforge.ai/docs/owner/repo +
+
+
+
+ auto_stories +
+
+
Documentation Preview
+

README.md

+
+
+
+
+
+
+
+
Python
+
Flask
+
MIT
+
+
+
+
+
+
+
+
+ + {# ── Recent Repos ─────────────────────────────────────────────────────────── #} + {% if recent %} +
+

Recent Repositories

+ +
+ {% endif %} + + {# ── CTA ──────────────────────────────────────────────────────────────────── #} +
+
+
+

Ready to document your code?

+

Paste any public GitHub URL above and get a full documentation suite in under a minute.

+ +
+
+ +
+ +{# ── Footer ───────────────────────────────────────────────────────────────── #} +
+
+
+ DocForge +

AI-powered documentation for every GitHub repository.

+
+
+ GitHub +
+
+
+{% endblock %} + +{% block extra_scripts %} + +{% endblock %} diff --git a/app/tools/git_narrator/__init__.py b/app/tools/git_narrator/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/tools/git_narrator/__pycache__/__init__.cpython-314.pyc b/app/tools/git_narrator/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..52b21debb36cfcb86c72facabe6dbb9c99e8e782 Binary files /dev/null and b/app/tools/git_narrator/__pycache__/__init__.cpython-314.pyc differ diff --git a/app/tools/git_narrator/__pycache__/narrator.cpython-314.pyc b/app/tools/git_narrator/__pycache__/narrator.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7643d34683d85ad8abc454c8a23aed9792838166 Binary files /dev/null and b/app/tools/git_narrator/__pycache__/narrator.cpython-314.pyc differ diff --git a/app/tools/git_narrator/__pycache__/routes.cpython-314.pyc b/app/tools/git_narrator/__pycache__/routes.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4e41897c188bad83a4fabb0fbdd024595324a182 Binary files /dev/null and b/app/tools/git_narrator/__pycache__/routes.cpython-314.pyc differ diff --git a/app/tools/git_narrator/narrator.py b/app/tools/git_narrator/narrator.py new file mode 100644 index 0000000000000000000000000000000000000000..a80e9f55d003a327a80af3300abeb31714746683 --- /dev/null +++ b/app/tools/git_narrator/narrator.py @@ -0,0 +1,112 @@ +"""Git history narrator — turns raw commits into editorial prose.""" +import os +import re +import requests +from app.core.ai import call_ai_json + +_GITHUB_API = "https://api.github.com" +_SYSTEM = """You are a senior engineering writer and technical storyteller. +You transform dry git commit logs into compelling, editorial-quality engineering narratives. +Write like a thoughtful tech lead preparing a sprint retrospective for the whole company. +Be specific about what was built, avoid jargon where plain language works, and surface the human story behind the code. +Return ONLY valid JSON — no markdown fences.""" + +_PROMPT_TMPL = """Narrate the following git history into an editorial engineering report. + +GIT HISTORY: +--- +{log} +--- + +Return a JSON object with EXACTLY these keys: +{{ + "period_label": "", + "highlights": [ + {{ + "title": "", + "narrative": "<2-3 paragraph editorial story — what changed, why it matters, what comes next>", + "key_commit": "", + "impact": "" + }} + ], + "tech_debt": [ + {{ + "icon": "", + "title": "", + "description": "<2-3 sentences on what was fixed/refactored and the measurable benefit>" + }} + ], + "milestones": [ + {{ + "status": "", + "title": "", + "narrative": "", + "contributors": [""] + }} + ], + "commits": [ + {{ + "hash": "<7-char hash or empty string>", + "message": "", + "author": "", + "time": "", + "type": "" + }} + ], + "summary_stats": {{ + "total_commits": , + "contributors": , + "features": , + "fixes": + }} +}} + +Aim for 2-3 highlights, 2-3 tech_debt items, 2-3 milestones. +If the log is sparse, extrapolate intelligently from what's there. +commits should list ALL commits from the log (max 20).""" + + +def _fetch_github_commits(owner: str, repo: str, limit: int = 30) -> str: + """Fetch recent commits from GitHub and format as git log text.""" + token = os.environ.get("GITHUB_TOKEN", "") + headers = {"Accept": "application/vnd.github.v3+json"} + if token: + headers["Authorization"] = f"Bearer {token}" + r = requests.get(f"{_GITHUB_API}/repos/{owner}/{repo}/commits?per_page={limit}", + headers=headers, timeout=20) + r.raise_for_status() + lines = [] + for c in r.json(): + sha = c["sha"][:7] + msg = c["commit"]["message"].split("\n")[0] + name = c["commit"]["author"]["name"] + date = c["commit"]["author"]["date"] + lines.append(f"commit {sha}\nAuthor: {name}\nDate: {date}\n\n {msg}\n") + return "\n".join(lines) + + +def _parse_github_url(text: str): + """Return (owner, repo) if text looks like a GitHub URL, else None.""" + m = re.search(r"github\.com/([^/\s]+)/([^/\s]+?)(?:\.git)?(?:\s|$)", text) + if m: + return m.group(1), m.group(2) + parts = text.strip().split("/") + if len(parts) == 2 and " " not in text: + return parts[0], parts[1] + return None + + +def narrate(raw_input: str) -> dict: + """Narrate a git history. raw_input can be a GitHub URL or raw git log text.""" + parsed = _parse_github_url(raw_input.strip()) + if parsed: + try: + owner, repo = parsed + log = _fetch_github_commits(owner, repo) + except Exception as e: + log = raw_input # fallback to treating as raw log + else: + log = raw_input + + prompt = _PROMPT_TMPL.format(log=log[:8000]) + return call_ai_json([{"role": "user", "content": prompt}], system=_SYSTEM) or {} diff --git a/app/tools/git_narrator/routes.py b/app/tools/git_narrator/routes.py new file mode 100644 index 0000000000000000000000000000000000000000..6d955c5bc7e4a648ad3707c19f0061909cce7896 --- /dev/null +++ b/app/tools/git_narrator/routes.py @@ -0,0 +1,22 @@ +"""Git Narrator routes.""" +from flask import Blueprint, render_template, request, jsonify +from .narrator import narrate + +bp = Blueprint("git_narrator", __name__, template_folder="templates") + + +@bp.route("/") +def index(): + return render_template("git_narrator/index.html") + + +@bp.route("/api/narrate", methods=["POST"]) +def api_narrate(): + body = request.get_json(silent=True) or {} + raw = (body.get("content") or "").strip() + if not raw: + return jsonify({"error": "content is required (GitHub URL or git log text)"}), 400 + if len(raw) < 10: + return jsonify({"error": "Input too short — paste a URL or git log"}), 400 + result = narrate(raw) + return jsonify(result) diff --git a/app/tools/git_narrator/templates/git_narrator/index.html b/app/tools/git_narrator/templates/git_narrator/index.html new file mode 100644 index 0000000000000000000000000000000000000000..bfac1540eeefe8ae437542a061521045e8e3dae9 --- /dev/null +++ b/app/tools/git_narrator/templates/git_narrator/index.html @@ -0,0 +1,509 @@ +{% extends "base.html" %} +{% block title %}Git Narrator — Engineering Stories{% endblock %} + +{% block content %} + +{# ── Toolbar ─────────────────────────────────────────────────────────────────── #} +
+
+
+ auto_stories + Git Narrator +
+ | + +
+
+ + + +
+
+ +{# ── Body ─────────────────────────────────────────────────────────────────────── #} +
+ + {# ── Left: Terminal input zone ───────────────────────────────────────────────── #} + + + {# ── Right: Narrative output ─────────────────────────────────────────────────── #} +
+ + {# Empty state #} +
+
+ auto_stories +
+

The Narrative

+

+ Paste a GitHub URL or git log on the left. The AI writes your commit history as an editorial engineering report. +

+
+ {% for tag in ['Highlights', 'Tech Debt', 'Milestones', 'Contributors', 'Impact'] %} + {{ tag }} + {% endfor %} +
+
+ + {# Loading state #} + + + {# Narrative output #} + +
+
+ +{# ── Footer ─────────────────────────────────────────────────────────────────── #} +
+ +
+ + +
+
+ + + +{% endblock %} + +{% block extra_scripts %} + +{% endblock %} diff --git a/app/tools/schema_detective/__init__.py b/app/tools/schema_detective/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/tools/schema_detective/__pycache__/__init__.cpython-314.pyc b/app/tools/schema_detective/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..63a48dfa98488f837ea70cf095f3aaa7fb95d327 Binary files /dev/null and b/app/tools/schema_detective/__pycache__/__init__.cpython-314.pyc differ diff --git a/app/tools/schema_detective/__pycache__/detective.cpython-314.pyc b/app/tools/schema_detective/__pycache__/detective.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cb8f83ed084228a01c3dcb25f65ff4094051c7b5 Binary files /dev/null and b/app/tools/schema_detective/__pycache__/detective.cpython-314.pyc differ diff --git a/app/tools/schema_detective/__pycache__/routes.cpython-314.pyc b/app/tools/schema_detective/__pycache__/routes.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f5cc904bdd46bc3f5c16e45f626e96736a96e606 Binary files /dev/null and b/app/tools/schema_detective/__pycache__/routes.cpython-314.pyc differ diff --git a/app/tools/schema_detective/detective.py b/app/tools/schema_detective/detective.py new file mode 100644 index 0000000000000000000000000000000000000000..79031890cedf979ab87906fb43484f52c9d1cbcd --- /dev/null +++ b/app/tools/schema_detective/detective.py @@ -0,0 +1,252 @@ +"""Schema Detective — audits database schemas for design issues.""" +import re +from app.core.ai import call_ai_json, call_ai + +_SYSTEM = """You are a senior database architect and security engineer with deep expertise in SQL, +normalization, indexing strategies, database security, and production reliability. +You audit schemas with a critical eye — catching design flaws, security vulnerabilities, +and data integrity risks that cause real production incidents. +Return ONLY valid JSON — no markdown fences, no preamble.""" + + +_PROMPT_TMPL = """Audit the following database schema (and any embedded SQL) for ALL design issues, security vulnerabilities, and integrity risks. +{pre_context} +SCHEMA / SQL: +--- +{schema} +--- + +Return a JSON object with EXACTLY these keys: +{{ + "health_score": , + "summary": "<1-2 sentence plain-language verdict — what's the biggest risk and overall state>", + "findings": [ + {{ + "severity": "", + "title": "", + "description": "<1-2 sentence explanation of the problem and why it matters in production>", + "location": "", + "suggestion": "" + }} + ] +}} + +Severity definitions: +- critical: causes data loss, security breach, or catastrophic data corruption. Examples: + * SQL injection via dynamic query string concatenation with user input + * DELETE or UPDATE statements with no WHERE clause (wipes entire table) + * Passwords or secrets stored in plaintext (no hashing) + * Missing PRIMARY KEY on a business entity table + * Circular or missing FOREIGN KEY constraints breaking referential integrity + * NULL allowed on columns that are business-critical (email, user_id, amount) +- warning: degrades performance, risks integrity under load, or creates operational risk. Examples: + * Unindexed foreign key columns (causes full table scans on joins) + * TEXT type where VARCHAR(n) is appropriate (no length constraint) + * Missing NOT NULL on required business fields + * Missing updated_at / created_at audit columns + * Redundant or overlapping indexes (e.g. index on (a), (a,b), (a,b,c) — first is redundant) + * Missing transactions around multi-step operations +- advice: maintainability and future-proofing. Examples: + * Vague column or table names (t1, col1, data, misc, info, temp — not what they represent) + * Single-letter or numeric table aliases in stored procedures + * Missing soft-delete pattern (no is_deleted / deleted_at column) + * No CHECK constraints on enum-like columns + +FK direction rule: the child table (many side) holds the FK column that references the parent (one side). + Correct: products.category_id REFERENCES categories(id) — products is the child, categories is the parent. + Wrong: reversed direction or FK defined on the parent side. + +Naming convention rule: the real problem is meaningless names (t1, t2, col1, data, misc). + NEVER suggest appending _table, _tbl, _record — that is an anti-pattern. Suggest descriptive real names instead. + +Rules: +- Prioritize: list critical findings first, then warnings, then advice +- The CONFIRMED CRITICAL ISSUES above MUST appear in findings as severity=critical — expand on each with real-world impact +- Be specific: reference exact table/column names from the schema — do not generalise +- suggestion must be actionable: if it is a SQL change, show the corrected SQL +- health_score: start at 100, deduct 20-25 per critical, 5-10 per warning, 1-3 per advice; floor at 0 +- Minimum 3 findings if any issues exist; maximum 15 +- Do NOT fabricate issues that are not present in the schema""" + + +def _pre_scan(sql: str) -> list[dict]: + """Deterministic regex scan for the most dangerous SQL patterns. + Returns findings that are guaranteed regardless of what the AI notices.""" + findings = [] + + # 1. SQL Injection — dynamic exec of a variable built by string concatenation + # Covers: EXEC(@sql), EXECUTE(@sql), sp_executesql @sql + exec_pattern = re.compile( + r'(?i)(?:EXEC(?:UTE)?\s*\(\s*@\w+|sp_executesql\s+@\w+)', re.IGNORECASE + ) + # Also catch: SET @var = '...' + @user_input (dynamic SQL building) + concat_pattern = re.compile( + r"(?i)SET\s+@\w+\s*=\s*(?:'[^']*'\s*\+|@\w+\s*\+)", re.IGNORECASE + ) + lines = sql.splitlines() + injection_lines = [] + for i, line in enumerate(lines, 1): + if exec_pattern.search(line) or concat_pattern.search(line): + injection_lines.append(f"line {i}: {line.strip()[:80]}") + if injection_lines: + findings.append({ + "severity": "critical", + "title": "SQL Injection Vulnerability", + "description": ( + "Dynamic SQL is constructed by concatenating user-supplied variables into a " + "query string and then executed with EXEC/EXECUTE. An attacker can inject " + "arbitrary SQL, bypassing all access controls and exfiltrating or destroying data." + ), + "location": "; ".join(injection_lines[:3]), + "suggestion": ( + "Replace string concatenation with parameterized queries. " + "Use sp_executesql with typed parameters:\n" + " EXEC sp_executesql N'SELECT * FROM users WHERE id = @id', " + "N'@id INT', @id = @user_input;" + ), + }) + + # 2. DELETE without WHERE — bare DELETE FROM ; + for m in re.finditer(r'(?im)^\s*DELETE\s+FROM\s+(\w+)\s*;', sql): + table = m.group(1) + findings.append({ + "severity": "critical", + "title": "DELETE Without WHERE Clause", + "description": ( + f"DELETE FROM {table} has no WHERE clause — this permanently deletes " + f"every row in {table} on execution. One accidental or malicious call " + "destroys the entire table's data with no automatic rollback." + ), + "location": f"DELETE FROM {table}", + "suggestion": ( + f"Always scope deletes: DELETE FROM {table} WHERE ;\n" + f"If a full wipe is intentional, use TRUNCATE TABLE {table} explicitly " + "to make the intent obvious." + ), + }) + + # 3. UPDATE without WHERE — UPDATE
SET ... ; (no WHERE before semicolon) + for m in re.finditer( + r'(?is)UPDATE\s+(\w+)\s+SET\s+(?:(?!WHERE).)*?;', sql + ): + table = m.group(1) + stmt_preview = m.group(0).strip()[:80] + findings.append({ + "severity": "critical", + "title": "UPDATE Without WHERE Clause", + "description": ( + f"UPDATE {table} SET ... has no WHERE clause — this overwrites every " + f"row in {table}. Any typo or accidental execution corrupts the entire table." + ), + "location": stmt_preview, + "suggestion": ( + f"Add a WHERE clause: UPDATE {table} SET ... WHERE ;" + ), + }) + + # 4. Plaintext passwords in INSERT statements + # Look for INSERT ... (... password ...) VALUES (... 'literal' ...) + for m in re.finditer( + r"(?is)INSERT\s+INTO\s+\w+[^;]*?(?:password|passwd|pwd)[^;]*?VALUES[^;]*?'([^']{1,100})'[^;]*?;", + sql, + ): + findings.append({ + "severity": "critical", + "title": "Plaintext Password Stored", + "description": ( + "Passwords are inserted as plaintext string literals. Any database breach " + "or log exposure immediately reveals all user credentials with zero effort." + ), + "location": m.group(0).strip()[:80], + "suggestion": ( + "Hash passwords before storage using bcrypt, argon2id, or scrypt. " + "Never store or log plaintext passwords:\n" + " password_hash = bcrypt.hashpw(password.encode(), bcrypt.gensalt())\n" + " INSERT INTO users (password) VALUES (:password_hash)" + ), + }) + + return findings + + +def analyze_schema(schema: str) -> dict: + """Audit a database schema and return structured findings.""" + pre_findings = _pre_scan(schema) + + # Inject pre-detected issues into the prompt so the AI elaborates on them + if pre_findings: + titles = "\n".join(f" - {f['title']} (at: {f['location'][:60]})" for f in pre_findings) + pre_context = ( + f"\n\nCONFIRMED CRITICAL ISSUES (detected by static analysis — " + f"you MUST include ALL of these in findings as severity=critical " + f"and explain their full real-world impact):\n{titles}\n" + ) + else: + pre_context = "" + + prompt = _PROMPT_TMPL.format(schema=schema[:8000], pre_context=pre_context) + result = call_ai_json( + [{"role": "user", "content": prompt}], + system=_SYSTEM, + max_tokens=3072, + ) + + if not result or not isinstance(result, dict): + # AI failed — return pre-findings only + return { + "health_score": max(0, 100 - 22 * len(pre_findings)), + "summary": "Static analysis detected critical security issues. AI elaboration unavailable.", + "findings": pre_findings, + } + + if pre_findings: + # Deduplicate: remove any AI findings whose title overlaps with pre-detected ones + pre_titles = {f["title"].lower() for f in pre_findings} + ai_only = [ + f for f in result.get("findings", []) + if f.get("title", "").lower() not in pre_titles + ] + # Pre-findings lead the list (guaranteed, most critical) + result["findings"] = pre_findings + ai_only + # Re-anchor health_score downward for guaranteed criticals + result["health_score"] = max(0, result.get("health_score", 100) - 22 * len(pre_findings)) + + return result + + +_FIX_SYSTEM = ( + "You are a senior database architect. Given a SQL schema with known issues, " + "produce a complete corrected version that fixes all listed problems. " + "Return ONLY valid SQL — no markdown fences, no preamble, no trailing commentary." +) + +_FIX_PROMPT = """Fix the following SQL schema by addressing every listed issue. + +ORIGINAL SCHEMA: +--- +{schema} +--- + +ISSUES TO FIX: +{issues} + +Rules: +- Return the COMPLETE corrected schema — all original tables must be present +- Fix every issue listed: add missing PKs, hash passwords, add WHERE clauses, remove SQL injection, add indexes, etc. +- Add brief inline comments only where a fix is non-obvious (e.g. -- hashed via bcrypt) +- Do NOT add features that weren't in the original schema +- Output valid SQL only""" + + +def fix_schema(schema: str, findings: list) -> str: + """Return a corrected SQL schema that addresses all audit findings.""" + issues = "\n".join( + f"- [{f.get('severity','').upper()}] {f.get('title','')}: {f.get('suggestion','')[:300]}" + for f in findings + if f.get("suggestion") + ) + if not issues: + issues = "General cleanup — apply best practices for normalization, naming, and security." + + prompt = _FIX_PROMPT.format(schema=schema[:6000], issues=issues) + return call_ai([{"role": "user", "content": prompt}], system=_FIX_SYSTEM, max_tokens=3000).strip() diff --git a/app/tools/schema_detective/routes.py b/app/tools/schema_detective/routes.py new file mode 100644 index 0000000000000000000000000000000000000000..c65dc09f5c303e44d365487a120dda24c527d0ef --- /dev/null +++ b/app/tools/schema_detective/routes.py @@ -0,0 +1,40 @@ +"""Schema Detective routes.""" +from flask import Blueprint, render_template, request, jsonify +from .detective import analyze_schema, fix_schema + +bp = Blueprint("schema_detective", __name__, template_folder="templates") + + +@bp.route("/") +def index(): + return render_template("schema_detective/index.html") + + +@bp.route("/api/analyze", methods=["POST"]) +def api_analyze(): + body = request.get_json(silent=True) or {} + schema = (body.get("schema") or "").strip() + + if not schema: + return jsonify({"error": "Schema is required — paste your SQL or table definitions"}), 400 + if len(schema) < 30: + return jsonify({"error": "Schema too short — paste at least one CREATE TABLE statement"}), 400 + + result = analyze_schema(schema) + if not result: + return jsonify({"error": "AI failed to analyze schema — please try again"}), 502 + return jsonify(result) + + +@bp.route("/api/fix", methods=["POST"]) +def api_fix(): + body = request.get_json(silent=True) or {} + schema = (body.get("schema") or "").strip() + findings = body.get("findings") or [] + if not schema: + return jsonify({"error": "Schema is required"}), 400 + try: + fixed = fix_schema(schema, findings) + return jsonify({"fixed_schema": fixed}) + except Exception as e: + return jsonify({"error": f"Fix generation failed: {str(e)}"}), 502 diff --git a/app/tools/schema_detective/templates/schema_detective/index.html b/app/tools/schema_detective/templates/schema_detective/index.html new file mode 100644 index 0000000000000000000000000000000000000000..8582492347ceda460da927b4a0b5451728d5aca3 --- /dev/null +++ b/app/tools/schema_detective/templates/schema_detective/index.html @@ -0,0 +1,528 @@ +{% extends "base.html" %} +{% block title %}Schema Detective — Database Schema Auditor{% endblock %} + +{% block content %} + +
+
+ + search + + Schema Detective +
+
+ + + +
+
+ +
+ + + + +
+ + +
+
+ Schema Editor +
+ + Ctrl+Enter to run audit +
+
+ +
+ + +
+ + +
+ policy +

Paste your schema and click Run Audit

+

Detects missing PKs, bad indexes, naming issues, and more

+
+ + + + + + + + + +
+
+
+ + +
+
+
+
+ AI Engine: Ready +
+
+ Schema Detective +
+{% endblock %} + +{% block extra_scripts %} + + +{% endblock %} diff --git a/app/tools/sql_whisperer/__init__.py b/app/tools/sql_whisperer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/tools/sql_whisperer/__pycache__/__init__.cpython-314.pyc b/app/tools/sql_whisperer/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..85054bf6cb5c8acbc191140582c1a1448da8d2aa Binary files /dev/null and b/app/tools/sql_whisperer/__pycache__/__init__.cpython-314.pyc differ diff --git a/app/tools/sql_whisperer/__pycache__/routes.cpython-314.pyc b/app/tools/sql_whisperer/__pycache__/routes.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8431cd659448b266642c2a85b356d9131a12dc3f Binary files /dev/null and b/app/tools/sql_whisperer/__pycache__/routes.cpython-314.pyc differ diff --git a/app/tools/sql_whisperer/__pycache__/whisperer.cpython-314.pyc b/app/tools/sql_whisperer/__pycache__/whisperer.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..746749d0d45be279b758c208f9d0f1d5f115caf9 Binary files /dev/null and b/app/tools/sql_whisperer/__pycache__/whisperer.cpython-314.pyc differ diff --git a/app/tools/sql_whisperer/routes.py b/app/tools/sql_whisperer/routes.py new file mode 100644 index 0000000000000000000000000000000000000000..eb8ae0738086b59cc3d15fa5b799d7dd98d0b147 --- /dev/null +++ b/app/tools/sql_whisperer/routes.py @@ -0,0 +1,33 @@ +"""SQL Whisperer routes.""" +from flask import Blueprint, render_template, request, jsonify +from .whisperer import whisper + +bp = Blueprint("sql_whisperer", __name__, template_folder="templates") + +_DIALECTS = {"PostgreSQL", "MySQL", "SQLite", "SQL Server", "BigQuery"} + + +@bp.route("/") +def index(): + return render_template("sql_whisperer/index.html") + + +@bp.route("/api/whisper", methods=["POST"]) +def api_whisper(): + body = request.get_json(silent=True) or {} + question = (body.get("question") or "").strip() + schema = (body.get("schema") or "").strip() + dialect = body.get("dialect", "PostgreSQL") + + if not question: + return jsonify({"error": "question is required"}), 400 + if dialect not in _DIALECTS: + dialect = "PostgreSQL" + + try: + result = whisper(question, schema, dialect) + except Exception as e: + return jsonify({"error": "AI failed to convert query — please try again"}), 502 + if not result or not isinstance(result, dict): + return jsonify({"error": "AI failed to convert query — please try again"}), 502 + return jsonify(result) diff --git a/app/tools/sql_whisperer/templates/sql_whisperer/index.html b/app/tools/sql_whisperer/templates/sql_whisperer/index.html new file mode 100644 index 0000000000000000000000000000000000000000..cb7382ecfdca741615226965c8420c8d45b718e0 --- /dev/null +++ b/app/tools/sql_whisperer/templates/sql_whisperer/index.html @@ -0,0 +1,370 @@ +{% extends "base.html" %} +{% block title %}SQL Whisperer — NL to SQL{% endblock %} + +{% block content %} + +{# ── Toolbar ─────────────────────────────────────────────────────────────────── #} +
+
+
+ psychology + SQL Whisperer +
+ +
+ +
+ + +
+
+ +{# ── Body: split panel ───────────────────────────────────────────────────────── #} +
+ + {# Left — inputs #} +
+ + {# NL question #} +
+
+
+ auto_awesome + Ask in plain English +
+ +
+ +
+ + {# Schema #} +
+
+
+ table_chart + Database Schema + (optional) +
+ +
+ +
+ + {# Actions #} +
+ + +
+
+ + {# Right — results #} +
+ + {# Empty state #} +
+
+ terminal +
+

Output will appear here

+

Describe what you need in plain English, paste your schema, then hit Generate.

+
+ {% for tag in ['SELECT', 'JOIN', 'GROUP BY', 'CTE', 'Window Fn', 'Subquery'] %} + {{ tag }} + {% endfor %} +
+
+ + {# Loading state #} + + + {# Results #} + +
+ +
+ +{# ── Status bar ──────────────────────────────────────────────────────────────── #} +
+ +
+ + +
+
+ + + +{% endblock %} + +{% block extra_scripts %} + +{% endblock %} diff --git a/app/tools/sql_whisperer/whisperer.py b/app/tools/sql_whisperer/whisperer.py new file mode 100644 index 0000000000000000000000000000000000000000..bb8ed9103a9a99a3ec01cc1f55172633c2aa20a7 --- /dev/null +++ b/app/tools/sql_whisperer/whisperer.py @@ -0,0 +1,44 @@ +"""Natural language to SQL engine.""" +from app.core.ai import call_ai_json + +_SYSTEM = """You are an expert SQL engineer and database architect. +Convert natural language questions into precise, optimized SQL queries. +Always produce correct, runnable SQL. Explain your query so a junior dev can learn. +Return ONLY valid JSON — no markdown fences, no preamble. +CRITICAL: All JSON string values must be properly escaped. Use \\n for newlines inside strings.""" + +_PROMPT_TMPL = """Convert this natural language question into SQL. + +QUESTION: {question} + +DATABASE SCHEMA: +{schema} + +DIALECT: {dialect} + +Return JSON with EXACTLY these keys: +{{ + "sql": "", + "explanation": "", + "warnings": [""], + "alternatives": [ + {{"label": "", "sql": "", "trade_off": ""}} + ], + "sample_result_shape": "" +}} + +If the question is ambiguous, make the most reasonable assumption and note it in warnings. +If no schema is provided, generate SQL for a generic table structure that matches the question.""" + + +def whisper(question: str, schema: str, dialect: str = "PostgreSQL") -> dict: + prompt = _PROMPT_TMPL.format( + question=question[:2000], + schema=schema[:4000] if schema else "(no schema provided — infer reasonable table structure)", + dialect=dialect + ) + try: + result = call_ai_json([{"role": "user", "content": prompt}], system=_SYSTEM) + return result if isinstance(result, dict) else {} + except Exception: + return {} diff --git a/app/tools/test_forge/__init__.py b/app/tools/test_forge/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/tools/test_forge/__pycache__/__init__.cpython-314.pyc b/app/tools/test_forge/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..111c12211e9a2f29977bf742a05b8693c03785ac Binary files /dev/null and b/app/tools/test_forge/__pycache__/__init__.cpython-314.pyc differ diff --git a/app/tools/test_forge/__pycache__/forge.cpython-314.pyc b/app/tools/test_forge/__pycache__/forge.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..102d7622998b837d8c7254211f0390f912f950e4 Binary files /dev/null and b/app/tools/test_forge/__pycache__/forge.cpython-314.pyc differ diff --git a/app/tools/test_forge/__pycache__/routes.cpython-314.pyc b/app/tools/test_forge/__pycache__/routes.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3bb0d5160c46c6fa4fa76bd1ce153b3742f52f48 Binary files /dev/null and b/app/tools/test_forge/__pycache__/routes.cpython-314.pyc differ diff --git a/app/tools/test_forge/forge.py b/app/tools/test_forge/forge.py new file mode 100644 index 0000000000000000000000000000000000000000..97c54d55572a53ca4d7f366ccb6390e9ea781692 --- /dev/null +++ b/app/tools/test_forge/forge.py @@ -0,0 +1,80 @@ +"""Test Forge — generates complete test suites from source code via AI.""" +import json, re +from app.core.ai import call_ai + +_SYSTEM = """You are a senior software engineer and testing expert with deep knowledge of +testing best practices across all major languages and frameworks. +You write thorough, runnable test suites that cover happy paths, edge cases, +error scenarios, and boundary conditions.""" + + +_PROMPT_TMPL = """Generate a complete test suite for the following source code. + +FRAMEWORK: {framework} + +SOURCE CODE: +--- +{source_code} +--- + +ADDITIONAL INSTRUCTIONS: {instructions} + +Output the complete, ready-to-run test file — all imports, fixtures, setup, teardown. +Use the exact {framework} syntax. Do NOT use placeholder comments like "// add more tests here". + +At the very end of the file, on its own line, add this comment with real numbers filled in: +# FORGE_META:{{"test_count":,"coverage_estimate":,"edge_cases_count":,"notes":"<1 sentence>"}} + +Output the test file only — no markdown fences, no preamble, no trailing explanation.""" + + +# Language-specific comment prefixes for the metadata line +_COMMENT_PREFIX = { + "Python / pytest": "#", + "TypeScript / Jest": "//", + "JavaScript / Mocha": "//", + "Java / JUnit": "//", + "Go / testing": "//", + "Ruby / RSpec": "#", + "PHP / PHPUnit": "//", + "C# / xUnit": "//", +} + +_META_RE = re.compile(r'(?://|#)\s*FORGE_META:(\{.*\})', re.IGNORECASE) + + +def generate_tests(source_code: str, framework: str, instructions: str = "") -> dict: + """Generate a test suite for the given source code.""" + comment = _COMMENT_PREFIX.get(framework, "//") + prompt = _PROMPT_TMPL.format( + framework=framework, + source_code=source_code[:8000], + instructions=instructions.strip() or "None — generate comprehensive coverage", + comment=comment, + ) + raw = call_ai( + [{"role": "user", "content": prompt}], + system=_SYSTEM, + max_tokens=4096, + ) + if not raw: + return {} + + # Strip any accidental markdown fences + code = re.sub(r'^```[a-z]*\n?', '', raw.strip(), flags=re.MULTILINE) + code = re.sub(r'\n?```$', '', code, flags=re.MULTILINE).strip() + + # Extract metadata comment line + meta = {"test_count": 0, "coverage_estimate": 0, "edge_cases_count": 0, "notes": ""} + m = _META_RE.search(code) + if m: + try: + meta = json.loads(m.group(1)) + except json.JSONDecodeError: + pass + # Remove the meta comment line from the code + code = code[:m.start()].rstrip() + code[m.end():] + code = code.strip() + + meta["test_code"] = code + return meta diff --git a/app/tools/test_forge/routes.py b/app/tools/test_forge/routes.py new file mode 100644 index 0000000000000000000000000000000000000000..8aa1e3f9c2a978b8397f33fd488d259a33b554e2 --- /dev/null +++ b/app/tools/test_forge/routes.py @@ -0,0 +1,41 @@ +"""Test Forge routes.""" +from flask import Blueprint, render_template, request, jsonify +from .forge import generate_tests + +bp = Blueprint("test_forge", __name__, template_folder="templates") + +SUPPORTED_FRAMEWORKS = { + "TypeScript / Jest", + "Python / pytest", + "JavaScript / Mocha", + "Java / JUnit", + "Go / testing", + "Ruby / RSpec", + "PHP / PHPUnit", + "C# / xUnit", +} + + +@bp.route("/") +def index(): + return render_template("test_forge/index.html") + + +@bp.route("/api/generate", methods=["POST"]) +def api_generate(): + body = request.get_json(silent=True) or {} + source_code = (body.get("source_code") or "").strip() + framework = (body.get("framework") or "Python / pytest").strip() + instructions = (body.get("instructions") or "").strip() + + if not source_code: + return jsonify({"error": "source_code is required — paste your code"}), 400 + if len(source_code) < 20: + return jsonify({"error": "Source code too short — paste a real function or class"}), 400 + if framework not in SUPPORTED_FRAMEWORKS: + return jsonify({"error": f"Unsupported framework: {framework}"}), 400 + + result = generate_tests(source_code, framework, instructions) + if not result: + return jsonify({"error": "AI failed to generate tests — please try again"}), 502 + return jsonify(result) diff --git a/app/tools/test_forge/templates/test_forge/index.html b/app/tools/test_forge/templates/test_forge/index.html new file mode 100644 index 0000000000000000000000000000000000000000..d6b00e962fc275a92f491f257f20f7a8377b0068 --- /dev/null +++ b/app/tools/test_forge/templates/test_forge/index.html @@ -0,0 +1,464 @@ +{% extends "base.html" %} + +{% block title %}Test Forge — AI Test Suite Generator{% endblock %} + +{% block content %} + + + +
+ + + + +
+ +
+ Source Code + + Generated Suite +
+ + +
+ + +
+
+ Paste Your Code +
+ + +
+
+ + +
+
+ edit_note + +
+
+
+ + +
+
+ Generated Test Suite + Python / pytest +
+ + +
+ science +

Paste your code and click Generate Tests

+

Ctrl+Enter to generate · 8 frameworks supported

+
+ + + + + + + + + +
+
+
+
+ + +
+
+
+
+ AI Engine: Ready +
+
+
+ Python / pytest + Test Forge +
+
+{% endblock %} + +{% block extra_scripts %} + +{% endblock %} diff --git a/docforge.db b/docforge.db new file mode 100644 index 0000000000000000000000000000000000000000..4410bda55bad27954e72ab65919e742875d57998 Binary files /dev/null and b/docforge.db differ diff --git a/docforge.db-shm b/docforge.db-shm new file mode 100644 index 0000000000000000000000000000000000000000..b1b5b4536489f9bce89ab8b5b8b58a78f8338f77 Binary files /dev/null and b/docforge.db-shm differ diff --git a/docforge.db-wal b/docforge.db-wal new file mode 100644 index 0000000000000000000000000000000000000000..ed406883cf0f4d478385a4cabf7e394c6fc55b0f Binary files /dev/null and b/docforge.db-wal differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..82a2e95bdb7f79c9ab45959b1dd99a3ff2e7e8df --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +flask>=3.0 +flask-wtf>=1.2 +python-dotenv>=1.0 +requests>=2.31 +gunicorn>=21.2 +pypdf>=4.0 diff --git a/wsgi.py b/wsgi.py new file mode 100644 index 0000000000000000000000000000000000000000..0532a68da88aa774f669aaec95180699b7e0d6dc --- /dev/null +++ b/wsgi.py @@ -0,0 +1,10 @@ +import os +from pathlib import Path +_env = Path(__file__).parent / ".env" +if _env.exists(): + from dotenv import load_dotenv + load_dotenv(_env, override=True) +from app import create_app +app = create_app() +if __name__ == "__main__": + app.run(host="0.0.0.0", port=7861, debug=False)