chuckfinca Claude Opus 4.6 (1M context) commited on
Commit
39d86a4
·
1 Parent(s): 6968ab9

Add persona prompt and server-side citation processing

Browse files

Add BASE_PROMPT for Charles Feinn / AppSimple persona with
off-topic gating. Add process_citations() to parse inline
[filename: "quote"] citations, verify against workspace files,
replace with Unicode superscripts, and include sources array
in done event and trace upload.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +108 -7
app.py CHANGED
@@ -7,6 +7,7 @@ from __future__ import annotations
7
 
8
  import json
9
  import os
 
10
  import tempfile
11
  import time
12
  from collections.abc import Generator
@@ -46,11 +47,31 @@ hf_api = HfApi(token=HF_TOKEN) if HF_TOKEN else None
46
 
47
  SOURCE = "website"
48
 
 
 
 
 
 
 
 
49
  # ---------------------------------------------------------------------------
50
- # Global daily counter
51
  # ---------------------------------------------------------------------------
52
 
53
- _daily_count = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  _daily_date = date.today()
55
 
56
 
@@ -136,6 +157,71 @@ def upload_trace(result: dict) -> None:
136
  print(f"WARNING: trace upload failed: {exc}")
137
 
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  # ---------------------------------------------------------------------------
140
  # Stats formatting
141
  # ---------------------------------------------------------------------------
@@ -188,7 +274,7 @@ def chat(message: str, scratch_path: str, session_cost: float):
188
  scratch_path = tempfile.mkdtemp(prefix="lh-scratch-")
189
  scratch_dir = Path(scratch_path)
190
 
191
- system_prompt = build_system_prompt(base_prompt="", workspace=WORKSPACE_DIR)
192
  messages: list[Message] = [
193
  {"role": "system", "content": system_prompt},
194
  {"role": "user", "content": message},
@@ -229,7 +315,16 @@ def chat(message: str, scratch_path: str, session_cost: float):
229
 
230
  trace = agent_run.trace
231
  trace.wall_time_s = round(time.monotonic() - start, 2)
232
- answer = trace.answer or accumulated_answer or "(no answer)"
 
 
 
 
 
 
 
 
 
233
  stats = format_stats(trace)
234
  result = {
235
  "question": message,
@@ -237,6 +332,7 @@ def chat(message: str, scratch_path: str, session_cost: float):
237
  "passed": True,
238
  "assertions": {},
239
  "trace": asdict(trace),
 
240
  }
241
  upload_trace(result)
242
  trace_html = render_trace(result, max_chars=2000)
@@ -245,7 +341,7 @@ def chat(message: str, scratch_path: str, session_cost: float):
245
  remaining_msg = f"\n\n---\n{stats}\n\n*{remaining} question{'s' if remaining != 1 else ''} remaining today*"
246
 
247
  yield (
248
- f"{answer}{remaining_msg}",
249
  trace_html,
250
  scratch_path,
251
  session_cost,
@@ -488,7 +584,7 @@ def stream_question(question: str) -> Generator[str, None, None]:
488
  return
489
 
490
  scratch_dir = Path(tempfile.mkdtemp(prefix="lh-scratch-"))
491
- system_prompt = build_system_prompt(base_prompt="", workspace=WORKSPACE_DIR)
492
  messages: list[Message] = [
493
  {"role": "system", "content": system_prompt},
494
  {"role": "user", "content": question},
@@ -520,19 +616,24 @@ def stream_question(question: str) -> Generator[str, None, None]:
520
 
521
  trace = agent_run.trace
522
  trace.wall_time_s = round(time.monotonic() - start, 2)
 
 
 
523
  result = {
524
  "question": question,
525
  "source": SOURCE,
526
  "passed": True,
527
  "assertions": {},
528
  "trace": asdict(trace),
 
529
  }
530
  upload_trace(result)
531
  trace_html = render_trace(result, max_chars=2000)
532
 
533
  yield json.dumps({
534
  "type": "done",
535
- "answer": trace.answer or "",
 
536
  "stats": format_stats(trace),
537
  "trace_html": trace_html,
538
  "remaining": _remaining(),
 
7
 
8
  import json
9
  import os
10
+ import re
11
  import tempfile
12
  import time
13
  from collections.abc import Generator
 
47
 
48
  SOURCE = "website"
49
 
50
+ BASE_PROMPT = (
51
+ "You represent Charles Feinn and AppSimple. You have documents about his "
52
+ "professional background, services, projects, and capabilities. Use third person.\n\n"
53
+ "Do not speculate, manufacture connections to make a question fit, or answer "
54
+ "off-topic questions."
55
+ )
56
+
57
  # ---------------------------------------------------------------------------
58
+ # Global daily counter (initialized from trace repo on startup)
59
  # ---------------------------------------------------------------------------
60
 
61
+ def _count_todays_traces() -> int:
62
+ """Count trace files uploaded today (UTC) from the HF dataset repo."""
63
+ if not hf_api or not HF_TRACES_REPO:
64
+ return 0
65
+ today_prefix = datetime.now(timezone.utc).strftime("%Y%m%d")
66
+ try:
67
+ files = hf_api.list_repo_files(repo_id=HF_TRACES_REPO, repo_type="dataset")
68
+ return sum(1 for f in files if f.startswith(today_prefix))
69
+ except Exception as exc:
70
+ print(f"WARNING: could not read trace count: {exc}")
71
+ return 0
72
+
73
+
74
+ _daily_count = _count_todays_traces()
75
  _daily_date = date.today()
76
 
77
 
 
157
  print(f"WARNING: trace upload failed: {exc}")
158
 
159
 
160
+ # ---------------------------------------------------------------------------
161
+ # Citation processing
162
+ # ---------------------------------------------------------------------------
163
+
164
+ _CITATION_RE = re.compile(r'\[([^:\[\]]+):\s*"([^"]+)"\]')
165
+ _SUPERSCRIPT_DIGITS = str.maketrans(
166
+ "0123456789", "\u2070\u00b9\u00b2\u00b3\u2074\u2075\u2076\u2077\u2078\u2079"
167
+ )
168
+
169
+
170
+ def _superscript(n: int) -> str:
171
+ return str(n).translate(_SUPERSCRIPT_DIGITS)
172
+
173
+
174
+ def process_citations(
175
+ answer: str, workspace: Path | None
176
+ ) -> tuple[str, list[dict]]:
177
+ """Parse [filename: "quote"] citations, verify against workspace files."""
178
+ if not answer or not workspace:
179
+ return answer or "", []
180
+
181
+ sources: list[dict] = []
182
+ seen: dict[tuple[str, str], int] = {}
183
+
184
+ def _replace(match: re.Match) -> str:
185
+ filename = match.group(1).strip()
186
+ quote = match.group(2).strip()
187
+ key = (filename, quote)
188
+
189
+ if key in seen:
190
+ return _superscript(seen[key])
191
+
192
+ idx = len(sources) + 1
193
+ seen[key] = idx
194
+
195
+ matched = False
196
+ line = None
197
+ for candidate in [filename, f"{filename}.md"]:
198
+ filepath = workspace / candidate
199
+ if filepath.is_file():
200
+ try:
201
+ text = filepath.read_text(errors="replace")
202
+ pos = text.find(quote)
203
+ if pos == -1:
204
+ pos = text.lower().find(quote.lower())
205
+ if pos >= 0:
206
+ matched = True
207
+ line = text[:pos].count("\n") + 1
208
+ break
209
+ except OSError:
210
+ pass
211
+
212
+ sources.append({
213
+ "id": idx,
214
+ "doc": filename.replace(".md", "").replace("_", " "),
215
+ "quote": quote,
216
+ "line": line,
217
+ "matched": matched,
218
+ })
219
+ return _superscript(idx)
220
+
221
+ clean_answer = _CITATION_RE.sub(_replace, answer)
222
+ return clean_answer, sources
223
+
224
+
225
  # ---------------------------------------------------------------------------
226
  # Stats formatting
227
  # ---------------------------------------------------------------------------
 
274
  scratch_path = tempfile.mkdtemp(prefix="lh-scratch-")
275
  scratch_dir = Path(scratch_path)
276
 
277
+ system_prompt = build_system_prompt(base_prompt=BASE_PROMPT, workspace=WORKSPACE_DIR)
278
  messages: list[Message] = [
279
  {"role": "system", "content": system_prompt},
280
  {"role": "user", "content": message},
 
315
 
316
  trace = agent_run.trace
317
  trace.wall_time_s = round(time.monotonic() - start, 2)
318
+ raw_answer = trace.answer or accumulated_answer or "(no answer)"
319
+ clean_answer, sources = process_citations(raw_answer, WORKSPACE_DIR)
320
+
321
+ if sources:
322
+ source_lines = "\n".join(
323
+ f"{_superscript(s['id'])} {s['doc']}: \"{s['quote']}\""
324
+ for s in sources
325
+ )
326
+ clean_answer += f"\n\n---\n{source_lines}"
327
+
328
  stats = format_stats(trace)
329
  result = {
330
  "question": message,
 
332
  "passed": True,
333
  "assertions": {},
334
  "trace": asdict(trace),
335
+ "citations": sources,
336
  }
337
  upload_trace(result)
338
  trace_html = render_trace(result, max_chars=2000)
 
341
  remaining_msg = f"\n\n---\n{stats}\n\n*{remaining} question{'s' if remaining != 1 else ''} remaining today*"
342
 
343
  yield (
344
+ f"{clean_answer}{remaining_msg}",
345
  trace_html,
346
  scratch_path,
347
  session_cost,
 
584
  return
585
 
586
  scratch_dir = Path(tempfile.mkdtemp(prefix="lh-scratch-"))
587
+ system_prompt = build_system_prompt(base_prompt=BASE_PROMPT, workspace=WORKSPACE_DIR)
588
  messages: list[Message] = [
589
  {"role": "system", "content": system_prompt},
590
  {"role": "user", "content": question},
 
616
 
617
  trace = agent_run.trace
618
  trace.wall_time_s = round(time.monotonic() - start, 2)
619
+
620
+ clean_answer, sources = process_citations(trace.answer or "", WORKSPACE_DIR)
621
+
622
  result = {
623
  "question": question,
624
  "source": SOURCE,
625
  "passed": True,
626
  "assertions": {},
627
  "trace": asdict(trace),
628
+ "citations": sources,
629
  }
630
  upload_trace(result)
631
  trace_html = render_trace(result, max_chars=2000)
632
 
633
  yield json.dumps({
634
  "type": "done",
635
+ "answer": clean_answer,
636
+ "sources": sources,
637
  "stats": format_stats(trace),
638
  "trace_html": trace_html,
639
  "remaining": _remaining(),