Spaces:

build-small-hackathon
/

post-audit

Running

App Files Files Community

post-audit / audit_client.py

pasternake

feat(ui): light/dark theme toggle (PR #7)

b90345a verified 21 days ago

Raw

History Blame Contribute Delete

7.27 kB

	"""HTTP client for Modal audit endpoint with mock fallback."""

	from __future__ import annotations

	import json
	import os
	import urllib.error
	import urllib.request
	from typing import Any

	from merge import parse_llm_json
	from prompts import FEW_SHOT_ASSISTANT, build_messages


	def get_modal_url() -> str \| None:
	return os.environ.get("MODAL_AUDIT_URL") or os.environ.get("MODAL_AUDIT_ENDPOINT")


	def get_modal_timeout() -> float:
	# A cold Modal container loads the quantized model (and on a fresh deploy
	# pulls the GGUF), which can push the first request past a minute. Generous default.
	return float(os.environ.get("MODAL_AUDIT_TIMEOUT", "300"))


	def get_ollama_model() -> str \| None:
	"""Local Ollama model tag (e.g. 'gemma4:e4b'); enables the local-LLM path."""
	return os.environ.get("OLLAMA_MODEL")


	def get_ollama_url() -> str:
	return os.environ.get("OLLAMA_URL", "http://localhost:11434")


	def get_ollama_timeout() -> float:
	# Local CPU/Metal inference is slow: a full audit is ~2.5 min on an M1 with
	# gemma3:4b, and the first (cold) call adds model-load time. Generous default.
	return float(os.environ.get("OLLAMA_TIMEOUT", "300"))


	def backend_label() -> str:
	"""Human-readable description of the inference backend run() will use."""
	if get_modal_url():
	return "live Gemma 4 E4B on Modal"
	model = get_ollama_model()
	if model:
	return f"local Ollama ({model})"
	return "mock LLM (set MODAL_AUDIT_URL or OLLAMA_MODEL)"


	def call_modal_audit(
	platform: str,
	goal: str,
	audience: str,
	post: str,
	*,
	timeout: float \| None = None,
	) -> dict[str, Any]:
	"""Dispatch to a backend: Modal endpoint, local Ollama, or deterministic mock."""
	if timeout is None:
	timeout = get_modal_timeout()
	url = get_modal_url()
	if not url:
	if get_ollama_model():
	return _call_ollama(platform, goal, audience, post)
	return _mock_llm_response(platform, goal, audience, post)

	payload = json.dumps(
	{
	"platform": platform,
	"goal": goal,
	"audience": audience,
	"post": post,
	}
	).encode("utf-8")
	# Modal serves a single fastapi_endpoint at the root of its URL — no path suffix.
	headers = {"Content-Type": "application/json"}
	token = os.environ.get("MODAL_AUDIT_TOKEN")
	if token:
	headers["X-Audit-Token"] = token
	req = urllib.request.Request(
	url.rstrip("/"),
	data=payload,
	headers=headers,
	method="POST",
	)
	try:
	with urllib.request.urlopen(req, timeout=timeout) as resp:
	data = json.loads(resp.read().decode("utf-8"))
	except urllib.error.HTTPError as exc:
	body = exc.read().decode("utf-8", errors="replace")
	raise RuntimeError(f"Modal HTTP {exc.code}: {body}") from exc

	if "raw" in data:
	return parse_llm_json(data["raw"])
	return data


	def _ollama_chat(model: str, messages: list[dict[str, str]], timeout: float) -> str:
	"""POST messages to Ollama's /api/chat with JSON-constrained output; return content."""
	body = json.dumps(
	{
	"model": model,
	"messages": messages,
	"stream": False,
	"format": "json", # constrain output to valid JSON
	"options": {"temperature": 0, "num_predict": 2048},
	}
	).encode("utf-8")
	req = urllib.request.Request(
	get_ollama_url().rstrip("/") + "/api/chat",
	data=body,
	headers={"Content-Type": "application/json"},
	method="POST",
	)
	try:
	with urllib.request.urlopen(req, timeout=timeout) as resp:
	data = json.loads(resp.read().decode("utf-8"))
	except urllib.error.HTTPError as exc:
	detail = exc.read().decode("utf-8", errors="replace")
	raise RuntimeError(f"Ollama HTTP {exc.code}: {detail}") from exc
	except urllib.error.URLError as exc:
	raise RuntimeError(
	f"Cannot reach Ollama at {get_ollama_url()} — is it running? ({exc.reason})"
	) from exc
	return data.get("message", {}).get("content", "")


	def _call_ollama(
	platform: str,
	goal: str,
	audience: str,
	post: str,
	*,
	timeout: float \| None = None,
	) -> dict[str, Any]:
	"""Run the audit against a local Ollama model. First call may be slow (model load)."""
	if timeout is None:
	timeout = get_ollama_timeout()
	model = get_ollama_model()
	messages = build_messages(platform, goal, audience, post)
	raw = _ollama_chat(model, messages, timeout)
	try:
	return parse_llm_json(raw)
	except (json.JSONDecodeError, ValueError):
	# One retry with an explicit instruction, mirroring the Modal path.
	retry = messages + [
	{"role": "user", "content": "Return ONLY valid JSON matching the schema. No other text."}
	]
	return parse_llm_json(_ollama_chat(model, retry, timeout))


	def _mock_llm_response(
	platform: str,
	goal: str,
	audience: str,
	post: str,
	) -> dict[str, Any]:
	"""Deterministic mock when Modal URL is unset — uses few-shot shape for webinar-like posts."""
	del platform, goal, audience
	lower = post.lower()
	if "link in bio" in lower or "webinar" in lower:
	return json.loads(FEW_SHOT_ASSISTANT)
	return {
	"briefCheck": {
	"status": "ok",
	"inferred": {
	"goal": "Unclear from post — needs editor review",
	"audience": "In-context colleagues",
	},
	"gaps": [],
	},
	"auditReport": {
	"goalAlignment": {
	"overall": 40,
	"cappedBy": [],
	"dimensions": [
	{"key": "hook", "score": 2, "rationale": "Opening lacks a clear stake or benefit."},
	{"key": "clarity", "score": 2, "rationale": "Multiple topics mixed in one dump."},
	{"key": "audienceFit", "score": 3, "rationale": "Jargon may fit insiders but structure is rough."},
	{"key": "goalService", "score": 2, "rationale": "Does not clearly drive the stated goal actions."},
	{"key": "cta", "score": 2, "rationale": "Call to action is logistics-only or missing deadline."},
	],
	"summary": "Mock audit (set MODAL_AUDIT_URL for live Gemma 4 E4B). Post needs structure and a clearer CTA.",
	},
	"warnings": [
	{
	"code": "MIXED_MESSAGES",
	"severity": "warning",
	"source": "llm",
	"message": "Artifact, task, and logistics appear mixed.",
	},
	{
	"code": "NO_CLEAR_CTA",
	"severity": "warning",
	"source": "llm",
	"message": "No explicit personal action with a deadline.",
	},
	],
	"rewriteHints": [
	"Lead with why the reader should act now.",
	"Separate artifact, task, and logistics into sections.",
	"Add one explicit CTA with a deadline.",
	],
	},
	}