Spaces:

ZENLLC
/

mod4sec2.12test

Sleeping

File size: 15,315 Bytes

fb827e1

import json
import textwrap
from typing import Dict, Any, List, Tuple, Optional

import gradio as gr
import requests
import matplotlib.pyplot as plt
from matplotlib.figure import Figure


# ============================================================
#  LLM CALLER (OPENAI-COMPATIBLE, GPT-4.1 BY DEFAULT)
# ============================================================

def call_chat_completion(
    api_key: str,
    base_url: str,
    model: str,
    system_prompt: str,
    user_prompt: str,
    max_completion_tokens: int = 1800,
) -> str:
    """
    OpenAI-compatible /v1/chat/completions helper.

    - Uses new-style `max_completion_tokens` (for GPT-4.1, GPT-4o, etc.)
    - Falls back to legacy `max_tokens` if needed.
    - Does NOT send temperature/top_p so it's safe with strict models.
    """
    if not api_key:
        raise ValueError("LLM API key is required.")

    if not base_url:
        base_url = "https://api.openai.com"

    url = base_url.rstrip("/") + "/v1/chat/completions"

    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json",
    }

    payload = {
        "model": model,
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ],
        "max_completion_tokens": max_completion_tokens,
    }

    resp = requests.post(url, headers=headers, json=payload, timeout=60)

    # Fallback for providers that still expect `max_tokens`
    if resp.status_code == 400 and "max_completion_tokens" in resp.text:
        payload.pop("max_completion_tokens", None)
        payload["max_tokens"] = max_completion_tokens
        resp = requests.post(url, headers=headers, json=payload, timeout=60)

    if resp.status_code != 200:
        raise RuntimeError(
            f"LLM API error {resp.status_code}: {resp.text[:500]}"
        )

    data = resp.json()
    try:
        return data["choices"][0]["message"]["content"]
    except Exception as e:
        raise RuntimeError(
            f"Unexpected LLM response format: {e}\n\n{json.dumps(data, indent=2)}"
        )


# ============================================================
#  FIRECRAWL SCRAPER (OPTIONAL)
# ============================================================

def call_firecrawl_scrape(
    firecrawl_key: str,
    url: str,
    formats: Optional[List[str]] = None,
) -> str:
    """
    Calls Firecrawl's /v0/scrape endpoint to get cleaned markdown/HTML
    for a single URL.

    Docs: https://docs.firecrawl.dev/api-reference/endpoint/scrape
    """
    if not firecrawl_key:
        raise ValueError("Firecrawl API key is missing.")

    if not url:
        raise ValueError("URL is required to use Firecrawl.")

    api_url = "https://api.firecrawl.dev/v0/scrape"
    headers = {
        "Authorization": f"Bearer {firecrawl_key}",
        "Content-Type": "application/json",
    }

    payload: Dict[str, Any] = {"url": url}
    if formats:
        payload["formats"] = formats

    resp = requests.post(api_url, headers=headers, json=payload, timeout=60)

    if resp.status_code != 200:
        raise RuntimeError(
            f"Firecrawl error {resp.status_code}: {resp.text[:400]}"
        )

    data = resp.json()
    # Default: try markdown first, fall back to raw HTML or text if structure differs
    # Common shape: { "data": { "markdown": "..." } }
    if isinstance(data, dict):
        # Nested under "data"
        inner = data.get("data", {})
        if isinstance(inner, dict):
            if "markdown" in inner and isinstance(inner["markdown"], str):
                return inner["markdown"]
            if "html" in inner and isinstance(inner["html"], str):
                return inner["html"]
        # If the service changes shape, last fallback: stringify
    return json.dumps(data)


# ============================================================
#  ANALYSIS PROMPT + PARSING
# ============================================================

ANALYSIS_SYSTEM_PROMPT = """
You are an expert strategy analyst.

Given some web content (or pasted text) plus a short user description,
you will produce a concise, executive-ready analysis in JSON.

Return ONLY JSON using this schema:

{
  "executive_summary": "string",
  "key_points": ["string", ...],
  "opportunities": ["string", ...],
  "risks": ["string", ...],
  "recommended_actions": [
    {
      "title": "string",
      "area": "string",
      "description": "string"
    }
  ]
}
"""

def build_analysis_user_prompt(
    url: str,
    content_preview: str,
    user_notes: str,
    focus: str,
) -> str:
    truncated = content_preview[:6000]  # keep context reasonable
    return f"""
Source URL: {url or "N/A"}

Focus area: {focus}

User notes / context:
{user_notes or "N/A"}

Scraped or pasted content (truncated if long):
\"\"\"{truncated}\"\"\"
""".strip()


def parse_analysis_json(raw_text: str) -> Dict[str, Any]:
    """Strip fences and extract JSON payload."""
    txt = raw_text.strip()

    if txt.startswith("```"):
        parts = txt.split("```")
        txt = next((p for p in parts if "{" in p and "}" in p), parts[-1])

    first = txt.find("{")
    last = txt.rfind("}")
    if first == -1 or last == -1:
        raise ValueError("No JSON detected in model output.")

    return json.loads(txt[first:last + 1])


def analysis_to_markdown(analysis: Dict[str, Any]) -> str:
    """Render the JSON analysis as a short executive brief in Markdown."""

    def bullet(items: List[str]) -> str:
        if not items:
            return "_None identified._"
        return "\n".join(f"- {i}" for i in items)

    md: List[str] = []

    md.append("## Executive Summary")
    md.append(analysis.get("executive_summary", "N/A"))

    md.append("\n## Key Points")
    md.append(bullet(analysis.get("key_points", [])))

    md.append("\n## Opportunities")
    md.append(bullet(analysis.get("opportunities", [])))

    md.append("\n## Risks")
    md.append(bullet(analysis.get("risks", [])))

    md.append("\n## Recommended Actions")
    actions = analysis.get("recommended_actions", [])
    if not actions:
        md.append("_None suggested yet — refine your prompt or focus._")
    else:
        for idx, act in enumerate(actions, start=1):
            title = act.get("title", f"Action {idx}")
            area = act.get("area", "General")
            desc = act.get("description", "")
            md.append(f"### {idx}. {title}")
            md.append(f"**Area:** {area}")
            md.append(desc or "_No description provided._")

    return "\n\n".join(md)


# ============================================================
#  SIMPLE DATA VISUAL — COUNTS BY CATEGORY
# ============================================================

def analysis_to_figure(analysis: Dict[str, Any]) -> Figure:
    """
    Basic bar chart: how many items per category (points, opportunities, risks, actions).
    Visualizes "density" of insights.
    """
    labels = ["Key Points", "Opportunities", "Risks", "Actions"]
    values = [
        len(analysis.get("key_points", []) or []),
        len(analysis.get("opportunities", []) or []),
        len(analysis.get("risks", []) or []),
        len(analysis.get("recommended_actions", []) or []),
    ]

    fig, ax = plt.subplots(figsize=(5, 3))
    ax.bar(labels, values)
    ax.set_ylabel("Count")
    ax.set_title("Insight Density by Category")
    fig.tight_layout()
    return fig


# ============================================================
#  SAMPLE PRESETS
# ============================================================

SAMPLE_CONFIGS: Dict[str, Dict[str, str]] = {
    "AI / Tech Policy Article": {
        "url": "https://www.whitehouse.gov/briefing-room/",
        "notes": "Focus on AI policy, workforce impact, and org-readiness.",
        "focus": "Policy / Regulation",
    },
    "Competitor Product Page": {
        "url": "https://example.com/",
        "notes": "Assume this is a competitor's SaaS pricing page.",
        "focus": "Product / Market",
    },
    "Industry Research Report": {
        "url": "https://example.org/report",
        "notes": "Treat as a long-form industry trend report.",
        "focus": "Industry / Strategy",
    },
}

def load_sample(name: str) -> Tuple[str, str, str]:
    if not name or name not in SAMPLE_CONFIGS:
        return "", "", "General insight synthesis"
    cfg = SAMPLE_CONFIGS[name]
    return cfg["url"], cfg["notes"], cfg["focus"]


# ============================================================
#  MAIN HANDLER FOR GRADIO
# ============================================================

def generate_brief_ui(
    llm_key_state: str,
    llm_key_input: str,
    base_url: str,
    model_name: str,
    firecrawl_key: str,
    url: str,
    pasted_text: str,
    user_notes: str,
    focus: str,
):
    """
    Master UI handler:
    - decides whether to call Firecrawl (if key + URL)
    - merges scraped content with pasted text
    - calls LLM and renders outputs
    """
    llm_key = llm_key_input or llm_key_state
    if not llm_key:
        return (
            "⚠️ Please enter your LLM API key in the left panel.",
            "",
            analysis_to_figure({"key_points": [], "opportunities": [], "risks": [], "recommended_actions": []}),
            llm_key_state,
        )

    if not url and not pasted_text:
        return (
            "⚠️ Provide at least a URL or some pasted text.",
            "",
            analysis_to_figure({"key_points": [], "opportunities": [], "risks": [], "recommended_actions": []}),
            llm_key_state,
        )

    # 1. Scrape via Firecrawl if URL + key are set
    scraped_content = ""
    if url and firecrawl_key:
        try:
            scraped_content = call_firecrawl_scrape(firecrawl_key, url, formats=["markdown"])
        except Exception as e:
            scraped_content = f"(Firecrawl error: {e})"

    # 2. Compose content preview (scraped + pasted)
    content_preview_parts = []
    if scraped_content:
        content_preview_parts.append(scraped_content)
    if pasted_text:
        content_preview_parts.append("\n\nUser-pasted text:\n" + pasted_text)

    content_preview = "\n\n".join(content_preview_parts)

    # 3. Build prompt and call LLM
    user_prompt = build_analysis_user_prompt(url, content_preview, user_notes, focus)
    model = model_name or "gpt-4.1"

    try:
        raw = call_chat_completion(
            api_key=llm_key,
            base_url=base_url,
            model=model,
            system_prompt=ANALYSIS_SYSTEM_PROMPT,
            user_prompt=user_prompt,
            max_completion_tokens=1800,
        )

        analysis = parse_analysis_json(raw)
        md = analysis_to_markdown(analysis)
        fig = analysis_to_figure(analysis)
        json_out = json.dumps(analysis, indent=2, ensure_ascii=False)

        return md, json_out, fig, llm_key

    except Exception as e:
        empty_fig = analysis_to_figure({"key_points": [], "opportunities": [], "risks": [], "recommended_actions": []})
        return f"❌ Error generating brief:\n\n{e}", "", empty_fig, llm_key_state


# ============================================================
#  GRADIO UI
# ============================================================

with gr.Blocks(title="ZEN Web Insight Brief Builder") as demo:
    gr.Markdown(
        """
# 🌐 ZEN Web Insight Brief Builder

Turn any URL (plus optional Firecrawl scrape) into a structured,
actionable executive brief:

1. **Configure API keys** (LLM + optional Firecrawl)  
2. **Paste a URL and/or text**  
3. **Get an executive summary, risks, opportunities, and actions**
"""
    )

    llm_key_state = gr.State("")

    with gr.Row():
        # LEFT: API + samples
        with gr.Column(scale=1):
            gr.Markdown("### 1 — API & Model Settings")

            llm_key_input = gr.Textbox(
                label="LLM API Key",
                placeholder="OpenAI or compatible key",
                type="password",
            )

            base_url = gr.Textbox(
                label="LLM Base URL",
                value="https://api.openai.com",
                placeholder="e.g. https://api.openai.com",
            )

            model_name = gr.Textbox(
                label="Model Name",
                value="gpt-4.1",
                placeholder="e.g. gpt-4.1, gpt-4o, etc.",
            )

            gr.Markdown("#### Optional — Firecrawl (URL Scraper)")
            firecrawl_key = gr.Textbox(
                label="Firecrawl API Key (optional)",
                placeholder="Only needed if you want automatic URL scraping",
                type="password",
            )

            gr.Markdown("#### Sample Config")
            sample_dropdown = gr.Dropdown(
                label="Load a sample scenario",
                choices=list(SAMPLE_CONFIGS.keys()),
                value=None,
            )
            load_sample_btn = gr.Button("Load Sample")

        # RIGHT: content + config
        with gr.Column(scale=2):
            gr.Markdown("### 2 — Content & Focus")

            url_input = gr.Textbox(
                label="Source URL",
                placeholder="Paste a URL to analyze (works best with Firecrawl key, but optional)",
            )

            pasted_text = gr.Textbox(
                label="Or paste content manually",
                placeholder="Paste article text, notes, or report sections here.",
                lines=8,
            )

            user_notes = gr.Textbox(
                label="Your context / what you care about",
                placeholder="Example: Focus on youth workforce impacts and funding opportunities.",
                lines=3,
            )

            focus = gr.Dropdown(
                label="Focus lens",
                choices=[
                    "Policy / Regulation",
                    "Product / Market",
                    "Industry / Strategy",
                    "Risk & Compliance",
                    "Custom / Other",
                ],
                value="Industry / Strategy",
            )

            generate_btn = gr.Button("🚀 Generate Insight Brief", variant="primary")

    gr.Markdown("### 3 — Executive Brief")

    with gr.Row():
        with gr.Column(scale=3):
            brief_md = gr.Markdown(
                label="Brief",
                value="Your executive brief will appear here after generation.",
            )
        with gr.Column(scale=2):
            brief_json = gr.Code(
                label="Raw JSON (for automation / export)",
                language="json",
            )

    gr.Markdown("### 4 — Insight Density Visual")
    brief_fig = gr.Plot(label="Insight Density by Category")

    # Wiring
    load_sample_btn.click(
        load_sample,
        inputs=[sample_dropdown],
        outputs=[url_input, user_notes, focus],
    )

    generate_btn.click(
        generate_brief_ui,
        inputs=[
            llm_key_state,
            llm_key_input,
            base_url,
            model_name,
            firecrawl_key,
            url_input,
            pasted_text,
            user_notes,
            focus,
        ],
        outputs=[brief_md, brief_json, brief_fig, llm_key_state],
    )

if __name__ == "__main__":
    demo.launch()