import json import textwrap from typing import Dict, Any, List, Tuple, Optional import gradio as gr import requests import matplotlib.pyplot as plt from matplotlib.figure import Figure # ============================================================ # LLM CALLER (OPENAI-COMPATIBLE, GPT-4.1 BY DEFAULT) # ============================================================ def call_chat_completion( api_key: str, base_url: str, model: str, system_prompt: str, user_prompt: str, max_completion_tokens: int = 1800, ) -> str: """ OpenAI-compatible /v1/chat/completions helper. - Uses new-style `max_completion_tokens` (for GPT-4.1, GPT-4o, etc.) - Falls back to legacy `max_tokens` if needed. - Does NOT send temperature/top_p so it's safe with strict models. """ if not api_key: raise ValueError("LLM API key is required.") if not base_url: base_url = "https://api.openai.com" url = base_url.rstrip("/") + "/v1/chat/completions" headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", } payload = { "model": model, "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ], "max_completion_tokens": max_completion_tokens, } resp = requests.post(url, headers=headers, json=payload, timeout=60) # Fallback for providers that still expect `max_tokens` if resp.status_code == 400 and "max_completion_tokens" in resp.text: payload.pop("max_completion_tokens", None) payload["max_tokens"] = max_completion_tokens resp = requests.post(url, headers=headers, json=payload, timeout=60) if resp.status_code != 200: raise RuntimeError( f"LLM API error {resp.status_code}: {resp.text[:500]}" ) data = resp.json() try: return data["choices"][0]["message"]["content"] except Exception as e: raise RuntimeError( f"Unexpected LLM response format: {e}\n\n{json.dumps(data, indent=2)}" ) # ============================================================ # FIRECRAWL SCRAPER (OPTIONAL) # ============================================================ def call_firecrawl_scrape( firecrawl_key: str, url: str, formats: Optional[List[str]] = None, ) -> str: """ Calls Firecrawl's /v0/scrape endpoint to get cleaned markdown/HTML for a single URL. Docs: https://docs.firecrawl.dev/api-reference/endpoint/scrape """ if not firecrawl_key: raise ValueError("Firecrawl API key is missing.") if not url: raise ValueError("URL is required to use Firecrawl.") api_url = "https://api.firecrawl.dev/v0/scrape" headers = { "Authorization": f"Bearer {firecrawl_key}", "Content-Type": "application/json", } payload: Dict[str, Any] = {"url": url} if formats: payload["formats"] = formats resp = requests.post(api_url, headers=headers, json=payload, timeout=60) if resp.status_code != 200: raise RuntimeError( f"Firecrawl error {resp.status_code}: {resp.text[:400]}" ) data = resp.json() # Default: try markdown first, fall back to raw HTML or text if structure differs # Common shape: { "data": { "markdown": "..." } } if isinstance(data, dict): # Nested under "data" inner = data.get("data", {}) if isinstance(inner, dict): if "markdown" in inner and isinstance(inner["markdown"], str): return inner["markdown"] if "html" in inner and isinstance(inner["html"], str): return inner["html"] # If the service changes shape, last fallback: stringify return json.dumps(data) # ============================================================ # ANALYSIS PROMPT + PARSING # ============================================================ ANALYSIS_SYSTEM_PROMPT = """ You are an expert strategy analyst. Given some web content (or pasted text) plus a short user description, you will produce a concise, executive-ready analysis in JSON. Return ONLY JSON using this schema: { "executive_summary": "string", "key_points": ["string", ...], "opportunities": ["string", ...], "risks": ["string", ...], "recommended_actions": [ { "title": "string", "area": "string", "description": "string" } ] } """ def build_analysis_user_prompt( url: str, content_preview: str, user_notes: str, focus: str, ) -> str: truncated = content_preview[:6000] # keep context reasonable return f""" Source URL: {url or "N/A"} Focus area: {focus} User notes / context: {user_notes or "N/A"} Scraped or pasted content (truncated if long): \"\"\"{truncated}\"\"\" """.strip() def parse_analysis_json(raw_text: str) -> Dict[str, Any]: """Strip fences and extract JSON payload.""" txt = raw_text.strip() if txt.startswith("```"): parts = txt.split("```") txt = next((p for p in parts if "{" in p and "}" in p), parts[-1]) first = txt.find("{") last = txt.rfind("}") if first == -1 or last == -1: raise ValueError("No JSON detected in model output.") return json.loads(txt[first:last + 1]) def analysis_to_markdown(analysis: Dict[str, Any]) -> str: """Render the JSON analysis as a short executive brief in Markdown.""" def bullet(items: List[str]) -> str: if not items: return "_None identified._" return "\n".join(f"- {i}" for i in items) md: List[str] = [] md.append("## Executive Summary") md.append(analysis.get("executive_summary", "N/A")) md.append("\n## Key Points") md.append(bullet(analysis.get("key_points", []))) md.append("\n## Opportunities") md.append(bullet(analysis.get("opportunities", []))) md.append("\n## Risks") md.append(bullet(analysis.get("risks", []))) md.append("\n## Recommended Actions") actions = analysis.get("recommended_actions", []) if not actions: md.append("_None suggested yet — refine your prompt or focus._") else: for idx, act in enumerate(actions, start=1): title = act.get("title", f"Action {idx}") area = act.get("area", "General") desc = act.get("description", "") md.append(f"### {idx}. {title}") md.append(f"**Area:** {area}") md.append(desc or "_No description provided._") return "\n\n".join(md) # ============================================================ # SIMPLE DATA VISUAL — COUNTS BY CATEGORY # ============================================================ def analysis_to_figure(analysis: Dict[str, Any]) -> Figure: """ Basic bar chart: how many items per category (points, opportunities, risks, actions). Visualizes "density" of insights. """ labels = ["Key Points", "Opportunities", "Risks", "Actions"] values = [ len(analysis.get("key_points", []) or []), len(analysis.get("opportunities", []) or []), len(analysis.get("risks", []) or []), len(analysis.get("recommended_actions", []) or []), ] fig, ax = plt.subplots(figsize=(5, 3)) ax.bar(labels, values) ax.set_ylabel("Count") ax.set_title("Insight Density by Category") fig.tight_layout() return fig # ============================================================ # SAMPLE PRESETS # ============================================================ SAMPLE_CONFIGS: Dict[str, Dict[str, str]] = { "AI / Tech Policy Article": { "url": "https://www.whitehouse.gov/briefing-room/", "notes": "Focus on AI policy, workforce impact, and org-readiness.", "focus": "Policy / Regulation", }, "Competitor Product Page": { "url": "https://example.com/", "notes": "Assume this is a competitor's SaaS pricing page.", "focus": "Product / Market", }, "Industry Research Report": { "url": "https://example.org/report", "notes": "Treat as a long-form industry trend report.", "focus": "Industry / Strategy", }, } def load_sample(name: str) -> Tuple[str, str, str]: if not name or name not in SAMPLE_CONFIGS: return "", "", "General insight synthesis" cfg = SAMPLE_CONFIGS[name] return cfg["url"], cfg["notes"], cfg["focus"] # ============================================================ # MAIN HANDLER FOR GRADIO # ============================================================ def generate_brief_ui( llm_key_state: str, llm_key_input: str, base_url: str, model_name: str, firecrawl_key: str, url: str, pasted_text: str, user_notes: str, focus: str, ): """ Master UI handler: - decides whether to call Firecrawl (if key + URL) - merges scraped content with pasted text - calls LLM and renders outputs """ llm_key = llm_key_input or llm_key_state if not llm_key: return ( "⚠️ Please enter your LLM API key in the left panel.", "", analysis_to_figure({"key_points": [], "opportunities": [], "risks": [], "recommended_actions": []}), llm_key_state, ) if not url and not pasted_text: return ( "⚠️ Provide at least a URL or some pasted text.", "", analysis_to_figure({"key_points": [], "opportunities": [], "risks": [], "recommended_actions": []}), llm_key_state, ) # 1. Scrape via Firecrawl if URL + key are set scraped_content = "" if url and firecrawl_key: try: scraped_content = call_firecrawl_scrape(firecrawl_key, url, formats=["markdown"]) except Exception as e: scraped_content = f"(Firecrawl error: {e})" # 2. Compose content preview (scraped + pasted) content_preview_parts = [] if scraped_content: content_preview_parts.append(scraped_content) if pasted_text: content_preview_parts.append("\n\nUser-pasted text:\n" + pasted_text) content_preview = "\n\n".join(content_preview_parts) # 3. Build prompt and call LLM user_prompt = build_analysis_user_prompt(url, content_preview, user_notes, focus) model = model_name or "gpt-4.1" try: raw = call_chat_completion( api_key=llm_key, base_url=base_url, model=model, system_prompt=ANALYSIS_SYSTEM_PROMPT, user_prompt=user_prompt, max_completion_tokens=1800, ) analysis = parse_analysis_json(raw) md = analysis_to_markdown(analysis) fig = analysis_to_figure(analysis) json_out = json.dumps(analysis, indent=2, ensure_ascii=False) return md, json_out, fig, llm_key except Exception as e: empty_fig = analysis_to_figure({"key_points": [], "opportunities": [], "risks": [], "recommended_actions": []}) return f"❌ Error generating brief:\n\n{e}", "", empty_fig, llm_key_state # ============================================================ # GRADIO UI # ============================================================ with gr.Blocks(title="ZEN Web Insight Brief Builder") as demo: gr.Markdown( """ # 🌐 ZEN Web Insight Brief Builder Turn any URL (plus optional Firecrawl scrape) into a structured, actionable executive brief: 1. **Configure API keys** (LLM + optional Firecrawl) 2. **Paste a URL and/or text** 3. **Get an executive summary, risks, opportunities, and actions** """ ) llm_key_state = gr.State("") with gr.Row(): # LEFT: API + samples with gr.Column(scale=1): gr.Markdown("### 1 — API & Model Settings") llm_key_input = gr.Textbox( label="LLM API Key", placeholder="OpenAI or compatible key", type="password", ) base_url = gr.Textbox( label="LLM Base URL", value="https://api.openai.com", placeholder="e.g. https://api.openai.com", ) model_name = gr.Textbox( label="Model Name", value="gpt-4.1", placeholder="e.g. gpt-4.1, gpt-4o, etc.", ) gr.Markdown("#### Optional — Firecrawl (URL Scraper)") firecrawl_key = gr.Textbox( label="Firecrawl API Key (optional)", placeholder="Only needed if you want automatic URL scraping", type="password", ) gr.Markdown("#### Sample Config") sample_dropdown = gr.Dropdown( label="Load a sample scenario", choices=list(SAMPLE_CONFIGS.keys()), value=None, ) load_sample_btn = gr.Button("Load Sample") # RIGHT: content + config with gr.Column(scale=2): gr.Markdown("### 2 — Content & Focus") url_input = gr.Textbox( label="Source URL", placeholder="Paste a URL to analyze (works best with Firecrawl key, but optional)", ) pasted_text = gr.Textbox( label="Or paste content manually", placeholder="Paste article text, notes, or report sections here.", lines=8, ) user_notes = gr.Textbox( label="Your context / what you care about", placeholder="Example: Focus on youth workforce impacts and funding opportunities.", lines=3, ) focus = gr.Dropdown( label="Focus lens", choices=[ "Policy / Regulation", "Product / Market", "Industry / Strategy", "Risk & Compliance", "Custom / Other", ], value="Industry / Strategy", ) generate_btn = gr.Button("🚀 Generate Insight Brief", variant="primary") gr.Markdown("### 3 — Executive Brief") with gr.Row(): with gr.Column(scale=3): brief_md = gr.Markdown( label="Brief", value="Your executive brief will appear here after generation.", ) with gr.Column(scale=2): brief_json = gr.Code( label="Raw JSON (for automation / export)", language="json", ) gr.Markdown("### 4 — Insight Density Visual") brief_fig = gr.Plot(label="Insight Density by Category") # Wiring load_sample_btn.click( load_sample, inputs=[sample_dropdown], outputs=[url_input, user_notes, focus], ) generate_btn.click( generate_brief_ui, inputs=[ llm_key_state, llm_key_input, base_url, model_name, firecrawl_key, url_input, pasted_text, user_notes, focus, ], outputs=[brief_md, brief_json, brief_fig, llm_key_state], ) if __name__ == "__main__": demo.launch()