Spaces:

Really-amin
/

promptforge

Runtime error

App Files Files Community

promptforge / backend /prompt_logic.py

Really-amin

Upload PromptForge v1.0 — Structured prompt generator for Google AI Studio

ea65915 verified 2 months ago

raw

history blame contribute delete

24.8 kB

	"""
	PromptForge v4.0 — Core prompt generation engine.
	Upgrades: target model formatting, extended personas, heuristics, word count, auto-tagging.
	"""
	from __future__ import annotations
	import re
	import uuid
	import textwrap
	from datetime import datetime
	from typing import Any, Dict, List, Optional, Tuple

	from schemas import (
	PromptManifest,
	StructuredPrompt,
	PersonaType,
	StyleType,
	InstructionSettings,
	TargetModel,
	)


	# ── Persona role strings ──────────────────────────────────────────────────────

	_PERSONA_ROLES: Dict[PersonaType, str] = {
	PersonaType.default: "General AI Assistant",
	PersonaType.senior_dev: "Senior Software Engineer with 10+ years of full-stack experience",
	PersonaType.data_scientist: "Senior Data Scientist specialising in ML/AI pipelines and statistical analysis",
	PersonaType.tech_writer: "Technical Writer producing clear, precise developer documentation and tutorials",
	PersonaType.product_mgr: "Product Manager focused on user-centric, data-driven decision making",
	PersonaType.security_eng: "Security Engineer with expertise in threat modelling, OWASP, and secure-by-design systems",
	PersonaType.devops_eng: "DevOps / Platform Engineer specialising in CI/CD, Kubernetes, and cloud-native infrastructure",
	PersonaType.ml_engineer: "Machine Learning Engineer experienced in LLM fine-tuning, model deployment, and MLOps",
	PersonaType.custom: "",
	}

	_STYLE_DESCRIPTIONS: Dict[StyleType, str] = {
	StyleType.professional: "Professional and precise — balance technical accuracy with readability. Use exact terminology.",
	StyleType.concise: "Ultra-concise — bullet points preferred, zero filler. Every word must earn its place.",
	StyleType.detailed: "Thoroughly detailed — explain every decision, include rationale, alternatives, and trade-offs.",
	StyleType.beginner: "Beginner-friendly — avoid jargon, explain every acronym, use analogies and step-by-step breakdowns.",
	StyleType.formal: "Formal prose — structured with headings, professional tone suitable for specifications or reports.",
	StyleType.creative: "Engaging and vivid — use narrative techniques to make even dry content memorable without sacrificing accuracy.",
	}

	_HEURISTIC_ROLES: List[Tuple[List[str], str]] = [
	(["react", "vue", "angular", "svelte", "nextjs", "remix", "component", "frontend", "ui", "ux", "tailwind", "css", "html"], "Senior Frontend Engineer"),
	(["api", "rest", "restful", "fastapi", "flask", "django", "express", "graphql", "grpc", "backend", "server", "endpoint", "microservice"], "Senior Backend Engineer"),
	(["sql", "database", "postgres", "postgresql", "mysql", "mongo", "mongodb", "redis", "dynamodb", "query", "schema", "orm", "migration"], "Database Architect"),
	(["test", "unittest", "pytest", "jest", "cypress", "playwright", "coverage", "tdd", "bdd", "e2e", "integration test", "mock"], "QA / Test Automation Engineer"),
	(["docker", "kubernetes", "k8s", "helm", "terraform", "ansible", "ci/cd", "github actions", "jenkins", "deploy", "cloud", "aws", "gcp", "azure", "infra"], "DevOps / Cloud Engineer"),
	(["machine learning", "ml", "deep learning", "neural", "train", "fine-tune", "dataset", "pytorch", "tensorflow", "hugging face", "llm", "transformer", "embedding"], "Machine Learning Engineer"),
	(["data analysis", "pandas", "numpy", "visualization", "matplotlib", "seaborn", "plotly", "chart", "plot", "etl", "pipeline", "spark", "dbt"], "Data Scientist"),
	(["security", "pentest", "vulnerability", "cve", "owasp", "auth", "oauth", "jwt", "encrypt", "decrypt", "ssl", "tls", "xss", "csrf"], "Security Engineer"),
	(["write", "blog", "article", "essay", "copy", "content", "documentation", "readme", "wiki", "tutorial", "guide", "how-to"], "Technical Writer"),
	(["summarize", "summary", "tldr", "abstract", "extract", "distill", "recap"], "Technical Summarizer"),
	(["translate", "localize", "i18n", "l10n", "language", "multilingual"], "Multilingual Specialist"),
	(["product", "roadmap", "user story", "backlog", "sprint", "okr", "kpi", "stakeholder", "feature", "discovery"], "Product Manager"),
	(["sql", "bi", "dashboard", "report", "analytics", "metrics", "tableau", "looker", "powerbi"], "Business Intelligence Analyst"),
	(["mobile", "ios", "android", "swift", "kotlin", "react native", "flutter", "expo"], "Mobile Engineer"),
	(["blockchain", "smart contract", "solidity", "ethereum", "web3", "defi", "nft", "dao"], "Blockchain / Web3 Engineer"),
	(["game", "unity", "unreal", "godot", "shader", "physics", "rendering", "game design"], "Game Developer"),
	]

	_CONSTRAINT_PATTERNS: List[Tuple[str, str]] = [
	(r"\btypescript\b", "Use TypeScript with strict mode enabled (`strict: true` in tsconfig)."),
	(r"\bpython\b", "Use Python 3.11+; follow PEP-8 style guide; include type hints everywhere."),
	(r"\btailwind(?:css)?\b", "Use TailwindCSS utility classes exclusively; avoid custom CSS unless unavoidable."),
	(r"\bunit test[s]?\b\|\bjest\b\|\bpytest\b\|\bvitest\b", "Include comprehensive unit tests with ≥80% line coverage."),
	(r"\bjson\b", "All structured data must be valid, parseable JSON; validate with a schema."),
	(r"\baccessib\w+\b\|\bwcag\b\|\ba11y\b", "Ensure WCAG 2.1 AA accessibility compliance (ARIA labels, keyboard nav, contrast ≥4.5:1)."),
	(r"\bresponsive\b", "Design must be fully responsive across mobile (320px+), tablet, and desktop."),
	(r"\bdocker\b", "Provide a multi-stage `Dockerfile` and a `docker-compose.yml`."),
	(r"\bno comment[s]?\b", "Do not include inline code comments."),
	(r"\bcomment[s]?\b", "Include clear, concise inline comments explaining every non-obvious logic block."),
	(r"\berror handling\b\|\bexception\b", "Include comprehensive error/exception handling with user-friendly messages and structured logging."),
	(r"\blogg?ing\b", "Add structured logging (JSON format preferred) at appropriate severity levels."),
	(r"\bpagination\b", "Implement cursor- or offset-based pagination with configurable page size."),
	(r"\bcach(e\|ing)\b", "Implement caching with appropriate TTL, cache-key strategy, and invalidation logic."),
	(r"\bsecurity\b\|\bauth(?:entication\|orization)?\b", "Follow OWASP Top-10 guidelines; validate and sanitize all inputs; never trust client data."),
	(r"\bdark ?mode\b", "Support both light and dark colour schemes via CSS custom properties or Tailwind dark:."),
	(r"\bi18n\b\|\binternat\w+\b", "Internationalise all user-facing strings; use i18n library (e.g., react-i18next, Fluent)."),
	(r"\bperformance\b\|\boptimiz\w+\b", "Profile and optimise for performance; include Big-O analysis where relevant."),
	(r"\bgit\b\|\bversion control\b", "Include `.gitignore`, conventional commit messages, and branch-naming guidance."),
	(r"\bwebsocket\b\|\breal.?time\b\|\bsse\b", "Use WebSockets or Server-Sent Events for real-time communication; handle reconnection."),
	(r"\bci/?cd\b\|\bgithub actions\b\|\bpipeline\b", "Define a CI/CD pipeline (GitHub Actions preferred); include lint, test, and build stages."),
	(r"\bmigration\b\|\bschema change\b", "Provide reversible database migrations with rollback scripts."),
	(r"\benv(?:ironment)? var\b\|\.env\b", "Document all environment variables in `.env.example`; never hard-code secrets."),
	]

	_SAFETY_DEFAULTS: List[str] = [
	"Do not produce harmful, misleading, or unethical content.",
	"Respect intellectual property; never reproduce copyrighted material verbatim.",
	"If the request is ambiguous or potentially harmful, ask for clarification before proceeding.",
	"Adhere to Google AI Studio usage policies and Responsible AI guidelines.",
	"Do not expose sensitive data, API keys, passwords, or PII in any output.",
	"Prefer established, well-maintained libraries over custom implementations for security-critical code.",
	]


	# ── Public API ────────────────────────────────────────────────────────────────


	def build_manifest(
	instruction: str,
	extra_context: Optional[str] = None,
	version: int = 1,
	existing_id: Optional[str] = None,
	persona: PersonaType = PersonaType.default,
	custom_persona: Optional[str] = None,
	style: StyleType = StyleType.professional,
	user_constraints: Optional[List[str]] = None,
	settings_id: Optional[str] = None,
	) -> PromptManifest:
	prompt_id = existing_id or str(uuid.uuid4())
	lower = instruction.lower()
	now = datetime.utcnow()

	role = _resolve_role(persona, custom_persona, lower)
	task = _format_task(instruction)
	input_fmt = _infer_input_format(lower)
	output_fmt = _infer_output_format(lower)
	constraints = _build_constraints(lower, user_constraints or [])
	style_desc = _STYLE_DESCRIPTIONS.get(style, _STYLE_DESCRIPTIONS[StyleType.professional])
	safety = list(_SAFETY_DEFAULTS)
	examples = _build_examples(lower, role)
	auto_tags = _auto_tag(lower)

	raw_text = _render_raw_prompt(
	role=role,
	task=task,
	input_fmt=input_fmt,
	output_fmt=output_fmt,
	constraints=constraints,
	style=style_desc,
	safety=safety,
	examples=examples,
	extra_context=extra_context,
	)
	explanation = _generate_explanation(
	role=role,
	instruction=instruction,
	constraints=constraints,
	persona=persona,
	style=style,
	)
	word_count = len(raw_text.split())

	structured = StructuredPrompt(
	role=role,
	task=task,
	input_format=input_fmt,
	output_format=output_fmt,
	constraints=constraints,
	style=style_desc,
	safety=safety,
	examples=examples,
	raw_prompt_text=raw_text,
	word_count=word_count,
	)

	return PromptManifest(
	prompt_id=prompt_id,
	version=version,
	created_at=now,
	updated_at=now,
	instruction=instruction,
	status="pending",
	structured_prompt=structured,
	explanation=explanation,
	settings_id=settings_id,
	persona_used=persona,
	style_used=style,
	tags=auto_tags,
	)


	def build_manifest_from_settings(settings: InstructionSettings) -> PromptManifest:
	return build_manifest(
	instruction=settings.instruction,
	extra_context=settings.extra_context,
	persona=settings.persona,
	custom_persona=settings.custom_persona,
	style=settings.style,
	user_constraints=settings.constraints,
	settings_id=settings.settings_id,
	)


	def apply_edits(manifest: PromptManifest, edits: Dict[str, Any]) -> PromptManifest:
	sp = manifest.structured_prompt.model_copy(update=edits)
	new_raw = _render_raw_prompt(
	role=sp.role,
	task=sp.task,
	input_fmt=sp.input_format,
	output_fmt=sp.output_format,
	constraints=sp.constraints,
	style=sp.style,
	safety=sp.safety,
	examples=sp.examples,
	)
	sp = sp.model_copy(
	update={"raw_prompt_text": new_raw, "word_count": len(new_raw.split())}
	)
	return manifest.model_copy(
	update={
	"structured_prompt": sp,
	"status": "approved",
	"updated_at": datetime.utcnow(),
	}
	)


	def refine_with_feedback(manifest: PromptManifest, feedback: str) -> PromptManifest:
	return build_manifest(
	instruction=manifest.instruction + "\n\nREFINEMENT REQUEST: " + feedback,
	version=manifest.version + 1,
	existing_id=manifest.prompt_id,
	persona=manifest.persona_used,
	style=manifest.style_used,
	settings_id=manifest.settings_id,
	)


	def generate_explanation(manifest: PromptManifest) -> Tuple[str, List[str]]:
	explanation = manifest.explanation or _generate_explanation(
	role=manifest.structured_prompt.role,
	instruction=manifest.instruction,
	constraints=manifest.structured_prompt.constraints,
	persona=manifest.persona_used,
	style=manifest.style_used,
	)
	decisions = _extract_key_decisions(manifest)
	return explanation, decisions


	def apply_target_formatting(structured: StructuredPrompt, target: TargetModel) -> StructuredPrompt:
	"""
	Adjust the raw prompt text based on target model conventions.
	This does NOT change the underlying structured data, only the final text representation.
	"""
	raw = structured.raw_prompt_text

	if target == TargetModel.openai:
	# OpenAI expects system/user/assistant format
	raw = (
	f"system:\nYou are a helpful assistant.\n\n"
	f"user:\n{structured.task}\n\n"
	f"assistant:\n"
	)
	elif target == TargetModel.anthropic:
	# Anthropic Claude uses \n\nHuman: ... \n\nAssistant:
	raw = f"\n\nHuman: {structured.task}\n\nAssistant:"
	elif target == TargetModel.generic:
	# Generic: just slightly adjust headers
	raw = raw.replace("## ROLE", "### Role").replace("## TASK", "### Task")
	# else google_ai_studio: keep as is (default)

	return structured.model_copy(update={"raw_prompt_text": raw})


	# ── Private helpers ───────────────────────────────────────────────────────────


	def _resolve_role(
	persona: PersonaType, custom_persona: Optional[str], lower: str
	) -> str:
	if persona == PersonaType.custom and custom_persona:
	return custom_persona
	if persona != PersonaType.default:
	return _PERSONA_ROLES.get(persona, "General AI Assistant")
	for keywords, role in _HEURISTIC_ROLES:
	if any(kw in lower for kw in keywords):
	return role
	return "General AI Assistant"


	def _format_task(instruction: str) -> str:
	task = instruction.strip()
	if not task.endswith((".", "!", "?")):
	task += "."
	return task


	def _infer_input_format(lower: str) -> str:
	if any(k in lower for k in ["json", "object", "dict", "payload", "request body"]):
	return "A JSON object containing the relevant fields described in the task. Validate the schema before processing."
	if any(k in lower for k in ["file", "upload", "csv", "pdf", "spreadsheet", "xlsx"]):
	return "A file provided as a path, URL, or base64-encoded string. Include MIME type where relevant."
	if any(k in lower for k in ["image", "photo", "screenshot", "diagram", "figure", "svg"]):
	return "An image as a URL or base64-encoded string. Specify width, height, and format metadata."
	if any(k in lower for k in ["url", "link", "website", "webpage", "endpoint"]):
	return "A URL or list of URLs. Validate reachability and parse with appropriate scraping/HTTP tools."
	if any(k in lower for k in ["sql", "query", "database"]):
	return "A database schema definition plus a natural-language query or set of requirements."
	return "A plain-text string describing the user's request, requirements, or content to process."


	def _infer_output_format(lower: str) -> str:
	if any(k in lower for k in ["json", "structured", "object", "dict"]):
	return "A well-formatted JSON object with clearly named, camelCase keys. No prose outside the JSON block. Include a JSON schema definition."
	if any(k in lower for k in ["markdown", "md", "readme", "documentation", "doc"]):
	return "A Markdown-formatted document with H1/H2/H3 hierarchy, fenced code blocks, and a table of contents."
	if any(k in lower for k in ["code", "script", "function", "class", "component", "snippet"]):
	return "Source code in a properly labelled fenced code block. Add a brief explanation before AND a usage example after."
	if any(k in lower for k in ["list", "bullet", "steps", "enumerat", "checklist"]):
	return "A numbered or bulleted list with concise, actionable items. Group related items under subheadings."
	if any(k in lower for k in ["report", "analysis", "summary", "audit"]):
	return "A structured report: Executive Summary → Findings → Recommendations → Appendix."
	if any(k in lower for k in ["table", "comparison", "matrix", "grid"]):
	return "A Markdown table with descriptive column headers, aligned cells, and a summary row."
	if any(k in lower for k in ["email", "letter", "message", "memo"]):
	return "Formatted email/letter with Subject, Greeting, Body, Sign-off. Formal register unless instructed otherwise."
	return "A clear, well-structured plain-text response with logical section breaks."


	def _build_constraints(lower: str, user_constraints: List[str]) -> List[str]:
	found: List[str] = []
	for pattern, constraint in _CONSTRAINT_PATTERNS:
	if re.search(pattern, lower, re.IGNORECASE):
	found.append(constraint)
	seen = set(found)
	for uc in user_constraints:
	clean = uc.strip()
	if clean and clean not in seen:
	found.append(clean)
	seen.add(clean)
	if not found:
	found.append("Keep the response concise and directly relevant to the stated task.")
	return found


	def _auto_tag(lower: str) -> List[str]:
	"""Infer tags from instruction text for display in history."""
	candidates = {
	"react": ["react", "jsx", "tsx"],
	"typescript": ["typescript", "tsx"],
	"python": ["python", "fastapi", "flask", "django"],
	"testing": ["test", "pytest", "jest", "coverage"],
	"devops": ["docker", "kubernetes", "ci/cd", "terraform"],
	"security": ["security", "auth", "jwt", "owasp"],
	"ml": ["machine learning", "llm", "pytorch", "tensorflow"],
	"frontend": ["css", "html", "tailwind", "ui", "component"],
	"backend": ["api", "backend", "server", "endpoint"],
	"database": ["sql", "database", "postgres", "mongo"],
	"mobile": ["ios", "android", "react native", "flutter"],
	"writing": ["blog", "article", "documentation", "readme"],
	}
	found = []
	for tag, keywords in candidates.items():
	if any(kw in lower for kw in keywords):
	found.append(tag)
	return found[:6]


	def _build_examples(lower: str, role: str) -> Optional[List[Dict[str, str]]]:
	examples = []
	if "react" in lower or "component" in lower:
	examples.append(
	{
	"input": "Create a reusable `<Button>` component.",
	"output": (
	"```tsx\ninterface ButtonProps {\n label: string;\n variant?: 'primary' \| 'secondary' \| 'danger';\n"
	" onClick: () => void;\n disabled?: boolean;\n}\n\nexport const Button = ({\n label, variant = 'primary', onClick, disabled = false\n}: ButtonProps) => (\n"
	" <button\n onClick={onClick}\n disabled={disabled}\n aria-disabled={disabled}\n"
	" className={`px-4 py-2 rounded font-semibold transition ${\n primary: 'bg-indigo-600 text-white hover:bg-indigo-700',\n"
	" secondary: 'bg-gray-100 text-gray-800 hover:bg-gray-200',\n danger: 'bg-red-600 text-white hover:bg-red-700',\n }[variant]}`}\n >\n {label}\n </button>\n);\n```"
	),
	}
	)
	if "summarize" in lower or "summary" in lower:
	examples.append(
	{
	"input": "Summarise the following paragraph in one sentence.",
	"output": "Original: 'The Apollo 11 mission…' → Summary: 'Apollo 11 was the first crewed mission to land on the Moon, on 20 July 1969.'",
	}
	)
	if "fastapi" in lower or ("api" in lower and "endpoint" in lower):
	examples.append(
	{
	"input": "Create a `GET /users` endpoint.",
	"output": (
	"```python\nfrom fastapi import APIRouter, Depends, HTTPException, status\n"
	"from sqlalchemy.orm import Session\nfrom .schemas import UserOut\nfrom .models import User\nfrom .database import get_db\n\n"
	"router = APIRouter(prefix='/users', tags=['Users'])\n\n"
	"@router.get('/', response_model=list[UserOut], summary='List all users')\nasync def list_users(\n"
	" skip: int = 0, limit: int = 100,\n db: Session = Depends(get_db),\n) -> list[User]:\n"
	" return db.query(User).offset(skip).limit(limit).all()\n```"
	),
	}
	)
	if ("sql" in lower or "query" in lower) and not examples:
	examples.append(
	{
	"input": "Get all users registered in the last 30 days.",
	"output": "```sql\nSELECT id, email, created_at\nFROM users\nWHERE created_at >= NOW() - INTERVAL '30 days'\nORDER BY created_at DESC;\n```",
	}
	)
	return examples if examples else None


	def _generate_explanation(
	role: str,
	instruction: str,
	constraints: List[str],
	persona: PersonaType,
	style: StyleType,
	) -> str:
	instr_preview = instruction[:90].rstrip() + ("…" if len(instruction) > 90 else "")
	style_desc = _STYLE_DESCRIPTIONS.get(style, "")[:80]
	constraint_bullets = "\n".join(f" • {c}" for c in constraints[:5])
	if len(constraints) > 5:
	constraint_bullets += f"\n • … and {len(constraints) - 5} more"

	return textwrap.dedent(f"""\
	Role Assignment — why "{role}"?
	The instruction "{instr_preview}" contains domain signals that map most precisely to a {role}. \
	Assigning the correct expert role primes the model to adopt the right vocabulary, \
	depth conventions, and problem-solving heuristics for this task.

	Style — "{style.value}"
	{style_desc}
	This style was applied to calibrate verbosity, formality, and technical depth to \
	the apparent audience and intent of the instruction.

	Constraints applied ({len(constraints)} total)
	These were inferred from keywords in the instruction and merged with any user-defined rules:
	{constraint_bullets}

	Safety guardrails
	Six default safety rules are always injected, aligned with Google AI Studio's Responsible AI \
	policies. They prevent harmful content, IP violations, and accidental data exposure.

	Few-shot examples
	Where domain patterns were detected (React, FastAPI, SQL, etc.), concrete input→output \
	examples are injected to anchor the model's output format and quality expectations.
	""").strip()


	def _extract_key_decisions(manifest: PromptManifest) -> List[str]:
	sp = manifest.structured_prompt
	decisions = [
	f"Role: {sp.role}",
	f"Style: {manifest.style_used.value} — {_STYLE_DESCRIPTIONS[manifest.style_used][:55]}…",
	f"Output type: {sp.output_format[:60]}…",
	f"{len(sp.constraints)} constraint(s) applied",
	f"{len(sp.safety)} safety guardrail(s) injected",
	f"~{sp.word_count} words in generated prompt",
	]
	if sp.examples:
	decisions.append(f"{len(sp.examples)} few-shot example(s) included")
	if manifest.tags:
	decisions.append(f"Auto-tagged: {', '.join(manifest.tags)}")
	return decisions


	def _render_raw_prompt(
	role: str,
	task: str,
	input_fmt: str,
	output_fmt: str,
	constraints: List[str],
	style: str,
	safety: List[str],
	examples: Optional[List[Dict[str, str]]] = None,
	extra_context: Optional[str] = None,
	) -> str:
	lines = [
	f"## ROLE",
	f"You are a {role}.",
	"",
	f"## TASK",
	task,
	"",
	f"## INPUT FORMAT",
	input_fmt,
	"",
	f"## OUTPUT FORMAT",
	output_fmt,
	"",
	"## CONSTRAINTS",
	]
	for i, c in enumerate(constraints, 1):
	lines.append(f"{i}. {c}")
	lines += [
	"",
	"## STYLE & TONE",
	style,
	"",
	"## SAFETY GUIDELINES",
	]
	for i, s in enumerate(safety, 1):
	lines.append(f"{i}. {s}")
	if extra_context:
	lines += ["", "## ADDITIONAL CONTEXT", extra_context]
	if examples:
	lines += ["", "## FEW-SHOT EXAMPLES"]
	for ex in examples:
	lines += [f"Input: {ex['input']}", f"Expected Output:\n{ex['output']}", ""]
	lines += [
	"",
	"---",
	"Generated by PromptForge v4.0 — optimised for Google AI Studio.",
	]
	return "\n".join(lines)