ZENLLC commited on
Commit
fb827e1
Β·
verified Β·
1 Parent(s): 6c71173

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +497 -0
app.py ADDED
@@ -0,0 +1,497 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import textwrap
3
+ from typing import Dict, Any, List, Tuple, Optional
4
+
5
+ import gradio as gr
6
+ import requests
7
+ import matplotlib.pyplot as plt
8
+ from matplotlib.figure import Figure
9
+
10
+
11
+ # ============================================================
12
+ # LLM CALLER (OPENAI-COMPATIBLE, GPT-4.1 BY DEFAULT)
13
+ # ============================================================
14
+
15
+ def call_chat_completion(
16
+ api_key: str,
17
+ base_url: str,
18
+ model: str,
19
+ system_prompt: str,
20
+ user_prompt: str,
21
+ max_completion_tokens: int = 1800,
22
+ ) -> str:
23
+ """
24
+ OpenAI-compatible /v1/chat/completions helper.
25
+
26
+ - Uses new-style `max_completion_tokens` (for GPT-4.1, GPT-4o, etc.)
27
+ - Falls back to legacy `max_tokens` if needed.
28
+ - Does NOT send temperature/top_p so it's safe with strict models.
29
+ """
30
+ if not api_key:
31
+ raise ValueError("LLM API key is required.")
32
+
33
+ if not base_url:
34
+ base_url = "https://api.openai.com"
35
+
36
+ url = base_url.rstrip("/") + "/v1/chat/completions"
37
+
38
+ headers = {
39
+ "Authorization": f"Bearer {api_key}",
40
+ "Content-Type": "application/json",
41
+ }
42
+
43
+ payload = {
44
+ "model": model,
45
+ "messages": [
46
+ {"role": "system", "content": system_prompt},
47
+ {"role": "user", "content": user_prompt},
48
+ ],
49
+ "max_completion_tokens": max_completion_tokens,
50
+ }
51
+
52
+ resp = requests.post(url, headers=headers, json=payload, timeout=60)
53
+
54
+ # Fallback for providers that still expect `max_tokens`
55
+ if resp.status_code == 400 and "max_completion_tokens" in resp.text:
56
+ payload.pop("max_completion_tokens", None)
57
+ payload["max_tokens"] = max_completion_tokens
58
+ resp = requests.post(url, headers=headers, json=payload, timeout=60)
59
+
60
+ if resp.status_code != 200:
61
+ raise RuntimeError(
62
+ f"LLM API error {resp.status_code}: {resp.text[:500]}"
63
+ )
64
+
65
+ data = resp.json()
66
+ try:
67
+ return data["choices"][0]["message"]["content"]
68
+ except Exception as e:
69
+ raise RuntimeError(
70
+ f"Unexpected LLM response format: {e}\n\n{json.dumps(data, indent=2)}"
71
+ )
72
+
73
+
74
+ # ============================================================
75
+ # FIRECRAWL SCRAPER (OPTIONAL)
76
+ # ============================================================
77
+
78
+ def call_firecrawl_scrape(
79
+ firecrawl_key: str,
80
+ url: str,
81
+ formats: Optional[List[str]] = None,
82
+ ) -> str:
83
+ """
84
+ Calls Firecrawl's /v0/scrape endpoint to get cleaned markdown/HTML
85
+ for a single URL.
86
+
87
+ Docs: https://docs.firecrawl.dev/api-reference/endpoint/scrape
88
+ """
89
+ if not firecrawl_key:
90
+ raise ValueError("Firecrawl API key is missing.")
91
+
92
+ if not url:
93
+ raise ValueError("URL is required to use Firecrawl.")
94
+
95
+ api_url = "https://api.firecrawl.dev/v0/scrape"
96
+ headers = {
97
+ "Authorization": f"Bearer {firecrawl_key}",
98
+ "Content-Type": "application/json",
99
+ }
100
+
101
+ payload: Dict[str, Any] = {"url": url}
102
+ if formats:
103
+ payload["formats"] = formats
104
+
105
+ resp = requests.post(api_url, headers=headers, json=payload, timeout=60)
106
+
107
+ if resp.status_code != 200:
108
+ raise RuntimeError(
109
+ f"Firecrawl error {resp.status_code}: {resp.text[:400]}"
110
+ )
111
+
112
+ data = resp.json()
113
+ # Default: try markdown first, fall back to raw HTML or text if structure differs
114
+ # Common shape: { "data": { "markdown": "..." } }
115
+ if isinstance(data, dict):
116
+ # Nested under "data"
117
+ inner = data.get("data", {})
118
+ if isinstance(inner, dict):
119
+ if "markdown" in inner and isinstance(inner["markdown"], str):
120
+ return inner["markdown"]
121
+ if "html" in inner and isinstance(inner["html"], str):
122
+ return inner["html"]
123
+ # If the service changes shape, last fallback: stringify
124
+ return json.dumps(data)
125
+
126
+
127
+ # ============================================================
128
+ # ANALYSIS PROMPT + PARSING
129
+ # ============================================================
130
+
131
+ ANALYSIS_SYSTEM_PROMPT = """
132
+ You are an expert strategy analyst.
133
+
134
+ Given some web content (or pasted text) plus a short user description,
135
+ you will produce a concise, executive-ready analysis in JSON.
136
+
137
+ Return ONLY JSON using this schema:
138
+
139
+ {
140
+ "executive_summary": "string",
141
+ "key_points": ["string", ...],
142
+ "opportunities": ["string", ...],
143
+ "risks": ["string", ...],
144
+ "recommended_actions": [
145
+ {
146
+ "title": "string",
147
+ "area": "string",
148
+ "description": "string"
149
+ }
150
+ ]
151
+ }
152
+ """
153
+
154
+ def build_analysis_user_prompt(
155
+ url: str,
156
+ content_preview: str,
157
+ user_notes: str,
158
+ focus: str,
159
+ ) -> str:
160
+ truncated = content_preview[:6000] # keep context reasonable
161
+ return f"""
162
+ Source URL: {url or "N/A"}
163
+
164
+ Focus area: {focus}
165
+
166
+ User notes / context:
167
+ {user_notes or "N/A"}
168
+
169
+ Scraped or pasted content (truncated if long):
170
+ \"\"\"{truncated}\"\"\"
171
+ """.strip()
172
+
173
+
174
+ def parse_analysis_json(raw_text: str) -> Dict[str, Any]:
175
+ """Strip fences and extract JSON payload."""
176
+ txt = raw_text.strip()
177
+
178
+ if txt.startswith("```"):
179
+ parts = txt.split("```")
180
+ txt = next((p for p in parts if "{" in p and "}" in p), parts[-1])
181
+
182
+ first = txt.find("{")
183
+ last = txt.rfind("}")
184
+ if first == -1 or last == -1:
185
+ raise ValueError("No JSON detected in model output.")
186
+
187
+ return json.loads(txt[first:last + 1])
188
+
189
+
190
+ def analysis_to_markdown(analysis: Dict[str, Any]) -> str:
191
+ """Render the JSON analysis as a short executive brief in Markdown."""
192
+
193
+ def bullet(items: List[str]) -> str:
194
+ if not items:
195
+ return "_None identified._"
196
+ return "\n".join(f"- {i}" for i in items)
197
+
198
+ md: List[str] = []
199
+
200
+ md.append("## Executive Summary")
201
+ md.append(analysis.get("executive_summary", "N/A"))
202
+
203
+ md.append("\n## Key Points")
204
+ md.append(bullet(analysis.get("key_points", [])))
205
+
206
+ md.append("\n## Opportunities")
207
+ md.append(bullet(analysis.get("opportunities", [])))
208
+
209
+ md.append("\n## Risks")
210
+ md.append(bullet(analysis.get("risks", [])))
211
+
212
+ md.append("\n## Recommended Actions")
213
+ actions = analysis.get("recommended_actions", [])
214
+ if not actions:
215
+ md.append("_None suggested yet β€” refine your prompt or focus._")
216
+ else:
217
+ for idx, act in enumerate(actions, start=1):
218
+ title = act.get("title", f"Action {idx}")
219
+ area = act.get("area", "General")
220
+ desc = act.get("description", "")
221
+ md.append(f"### {idx}. {title}")
222
+ md.append(f"**Area:** {area}")
223
+ md.append(desc or "_No description provided._")
224
+
225
+ return "\n\n".join(md)
226
+
227
+
228
+ # ============================================================
229
+ # SIMPLE DATA VISUAL β€” COUNTS BY CATEGORY
230
+ # ============================================================
231
+
232
+ def analysis_to_figure(analysis: Dict[str, Any]) -> Figure:
233
+ """
234
+ Basic bar chart: how many items per category (points, opportunities, risks, actions).
235
+ Visualizes "density" of insights.
236
+ """
237
+ labels = ["Key Points", "Opportunities", "Risks", "Actions"]
238
+ values = [
239
+ len(analysis.get("key_points", []) or []),
240
+ len(analysis.get("opportunities", []) or []),
241
+ len(analysis.get("risks", []) or []),
242
+ len(analysis.get("recommended_actions", []) or []),
243
+ ]
244
+
245
+ fig, ax = plt.subplots(figsize=(5, 3))
246
+ ax.bar(labels, values)
247
+ ax.set_ylabel("Count")
248
+ ax.set_title("Insight Density by Category")
249
+ fig.tight_layout()
250
+ return fig
251
+
252
+
253
+ # ============================================================
254
+ # SAMPLE PRESETS
255
+ # ============================================================
256
+
257
+ SAMPLE_CONFIGS: Dict[str, Dict[str, str]] = {
258
+ "AI / Tech Policy Article": {
259
+ "url": "https://www.whitehouse.gov/briefing-room/",
260
+ "notes": "Focus on AI policy, workforce impact, and org-readiness.",
261
+ "focus": "Policy / Regulation",
262
+ },
263
+ "Competitor Product Page": {
264
+ "url": "https://example.com/",
265
+ "notes": "Assume this is a competitor's SaaS pricing page.",
266
+ "focus": "Product / Market",
267
+ },
268
+ "Industry Research Report": {
269
+ "url": "https://example.org/report",
270
+ "notes": "Treat as a long-form industry trend report.",
271
+ "focus": "Industry / Strategy",
272
+ },
273
+ }
274
+
275
+ def load_sample(name: str) -> Tuple[str, str, str]:
276
+ if not name or name not in SAMPLE_CONFIGS:
277
+ return "", "", "General insight synthesis"
278
+ cfg = SAMPLE_CONFIGS[name]
279
+ return cfg["url"], cfg["notes"], cfg["focus"]
280
+
281
+
282
+ # ============================================================
283
+ # MAIN HANDLER FOR GRADIO
284
+ # ============================================================
285
+
286
+ def generate_brief_ui(
287
+ llm_key_state: str,
288
+ llm_key_input: str,
289
+ base_url: str,
290
+ model_name: str,
291
+ firecrawl_key: str,
292
+ url: str,
293
+ pasted_text: str,
294
+ user_notes: str,
295
+ focus: str,
296
+ ):
297
+ """
298
+ Master UI handler:
299
+ - decides whether to call Firecrawl (if key + URL)
300
+ - merges scraped content with pasted text
301
+ - calls LLM and renders outputs
302
+ """
303
+ llm_key = llm_key_input or llm_key_state
304
+ if not llm_key:
305
+ return (
306
+ "⚠️ Please enter your LLM API key in the left panel.",
307
+ "",
308
+ analysis_to_figure({"key_points": [], "opportunities": [], "risks": [], "recommended_actions": []}),
309
+ llm_key_state,
310
+ )
311
+
312
+ if not url and not pasted_text:
313
+ return (
314
+ "⚠️ Provide at least a URL or some pasted text.",
315
+ "",
316
+ analysis_to_figure({"key_points": [], "opportunities": [], "risks": [], "recommended_actions": []}),
317
+ llm_key_state,
318
+ )
319
+
320
+ # 1. Scrape via Firecrawl if URL + key are set
321
+ scraped_content = ""
322
+ if url and firecrawl_key:
323
+ try:
324
+ scraped_content = call_firecrawl_scrape(firecrawl_key, url, formats=["markdown"])
325
+ except Exception as e:
326
+ scraped_content = f"(Firecrawl error: {e})"
327
+
328
+ # 2. Compose content preview (scraped + pasted)
329
+ content_preview_parts = []
330
+ if scraped_content:
331
+ content_preview_parts.append(scraped_content)
332
+ if pasted_text:
333
+ content_preview_parts.append("\n\nUser-pasted text:\n" + pasted_text)
334
+
335
+ content_preview = "\n\n".join(content_preview_parts)
336
+
337
+ # 3. Build prompt and call LLM
338
+ user_prompt = build_analysis_user_prompt(url, content_preview, user_notes, focus)
339
+ model = model_name or "gpt-4.1"
340
+
341
+ try:
342
+ raw = call_chat_completion(
343
+ api_key=llm_key,
344
+ base_url=base_url,
345
+ model=model,
346
+ system_prompt=ANALYSIS_SYSTEM_PROMPT,
347
+ user_prompt=user_prompt,
348
+ max_completion_tokens=1800,
349
+ )
350
+
351
+ analysis = parse_analysis_json(raw)
352
+ md = analysis_to_markdown(analysis)
353
+ fig = analysis_to_figure(analysis)
354
+ json_out = json.dumps(analysis, indent=2, ensure_ascii=False)
355
+
356
+ return md, json_out, fig, llm_key
357
+
358
+ except Exception as e:
359
+ empty_fig = analysis_to_figure({"key_points": [], "opportunities": [], "risks": [], "recommended_actions": []})
360
+ return f"❌ Error generating brief:\n\n{e}", "", empty_fig, llm_key_state
361
+
362
+
363
+ # ============================================================
364
+ # GRADIO UI
365
+ # ============================================================
366
+
367
+ with gr.Blocks(title="ZEN Web Insight Brief Builder") as demo:
368
+ gr.Markdown(
369
+ """
370
+ # 🌐 ZEN Web Insight Brief Builder
371
+
372
+ Turn any URL (plus optional Firecrawl scrape) into a structured,
373
+ actionable executive brief:
374
+
375
+ 1. **Configure API keys** (LLM + optional Firecrawl)
376
+ 2. **Paste a URL and/or text**
377
+ 3. **Get an executive summary, risks, opportunities, and actions**
378
+ """
379
+ )
380
+
381
+ llm_key_state = gr.State("")
382
+
383
+ with gr.Row():
384
+ # LEFT: API + samples
385
+ with gr.Column(scale=1):
386
+ gr.Markdown("### 1 β€” API & Model Settings")
387
+
388
+ llm_key_input = gr.Textbox(
389
+ label="LLM API Key",
390
+ placeholder="OpenAI or compatible key",
391
+ type="password",
392
+ )
393
+
394
+ base_url = gr.Textbox(
395
+ label="LLM Base URL",
396
+ value="https://api.openai.com",
397
+ placeholder="e.g. https://api.openai.com",
398
+ )
399
+
400
+ model_name = gr.Textbox(
401
+ label="Model Name",
402
+ value="gpt-4.1",
403
+ placeholder="e.g. gpt-4.1, gpt-4o, etc.",
404
+ )
405
+
406
+ gr.Markdown("#### Optional β€” Firecrawl (URL Scraper)")
407
+ firecrawl_key = gr.Textbox(
408
+ label="Firecrawl API Key (optional)",
409
+ placeholder="Only needed if you want automatic URL scraping",
410
+ type="password",
411
+ )
412
+
413
+ gr.Markdown("#### Sample Config")
414
+ sample_dropdown = gr.Dropdown(
415
+ label="Load a sample scenario",
416
+ choices=list(SAMPLE_CONFIGS.keys()),
417
+ value=None,
418
+ )
419
+ load_sample_btn = gr.Button("Load Sample")
420
+
421
+ # RIGHT: content + config
422
+ with gr.Column(scale=2):
423
+ gr.Markdown("### 2 β€” Content & Focus")
424
+
425
+ url_input = gr.Textbox(
426
+ label="Source URL",
427
+ placeholder="Paste a URL to analyze (works best with Firecrawl key, but optional)",
428
+ )
429
+
430
+ pasted_text = gr.Textbox(
431
+ label="Or paste content manually",
432
+ placeholder="Paste article text, notes, or report sections here.",
433
+ lines=8,
434
+ )
435
+
436
+ user_notes = gr.Textbox(
437
+ label="Your context / what you care about",
438
+ placeholder="Example: Focus on youth workforce impacts and funding opportunities.",
439
+ lines=3,
440
+ )
441
+
442
+ focus = gr.Dropdown(
443
+ label="Focus lens",
444
+ choices=[
445
+ "Policy / Regulation",
446
+ "Product / Market",
447
+ "Industry / Strategy",
448
+ "Risk & Compliance",
449
+ "Custom / Other",
450
+ ],
451
+ value="Industry / Strategy",
452
+ )
453
+
454
+ generate_btn = gr.Button("πŸš€ Generate Insight Brief", variant="primary")
455
+
456
+ gr.Markdown("### 3 β€” Executive Brief")
457
+
458
+ with gr.Row():
459
+ with gr.Column(scale=3):
460
+ brief_md = gr.Markdown(
461
+ label="Brief",
462
+ value="Your executive brief will appear here after generation.",
463
+ )
464
+ with gr.Column(scale=2):
465
+ brief_json = gr.Code(
466
+ label="Raw JSON (for automation / export)",
467
+ language="json",
468
+ )
469
+
470
+ gr.Markdown("### 4 β€” Insight Density Visual")
471
+ brief_fig = gr.Plot(label="Insight Density by Category")
472
+
473
+ # Wiring
474
+ load_sample_btn.click(
475
+ load_sample,
476
+ inputs=[sample_dropdown],
477
+ outputs=[url_input, user_notes, focus],
478
+ )
479
+
480
+ generate_btn.click(
481
+ generate_brief_ui,
482
+ inputs=[
483
+ llm_key_state,
484
+ llm_key_input,
485
+ base_url,
486
+ model_name,
487
+ firecrawl_key,
488
+ url_input,
489
+ pasted_text,
490
+ user_notes,
491
+ focus,
492
+ ],
493
+ outputs=[brief_md, brief_json, brief_fig, llm_key_state],
494
+ )
495
+
496
+ if __name__ == "__main__":
497
+ demo.launch()