Spaces:

chuckfinca
/

appsimple-assistant

Running

chuckfinca Claude Opus 4.6 (1M context) commited on Mar 29

Commit

39d86a4

1 Parent(s): 6968ab9

Add persona prompt and server-side citation processing

Add BASE_PROMPT for Charles Feinn / AppSimple persona with
off-topic gating. Add process_citations() to parse inline
[filename: "quote"] citations, verify against workspace files,
replace with Unicode superscripts, and include sources array
in done event and trace upload.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (1) hide show

app.py +108 -7

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ from __future__ import annotations
 import json
 import os
 import tempfile
 import time
 from collections.abc import Generator
@@ -46,11 +47,31 @@ hf_api = HfApi(token=HF_TOKEN) if HF_TOKEN else None
 SOURCE = "website"
 # ---------------------------------------------------------------------------
-# Global daily counter
 # ---------------------------------------------------------------------------
-_daily_count = 0
 _daily_date = date.today()
@@ -136,6 +157,71 @@ def upload_trace(result: dict) -> None:
         print(f"WARNING: trace upload failed: {exc}")
 # ---------------------------------------------------------------------------
 # Stats formatting
 # ---------------------------------------------------------------------------
@@ -188,7 +274,7 @@ def chat(message: str, scratch_path: str, session_cost: float):
         scratch_path = tempfile.mkdtemp(prefix="lh-scratch-")
     scratch_dir = Path(scratch_path)
-    system_prompt = build_system_prompt(base_prompt="", workspace=WORKSPACE_DIR)
     messages: list[Message] = [
         {"role": "system", "content": system_prompt},
         {"role": "user", "content": message},
@@ -229,7 +315,16 @@ def chat(message: str, scratch_path: str, session_cost: float):
     trace = agent_run.trace
     trace.wall_time_s = round(time.monotonic() - start, 2)
-    answer = trace.answer or accumulated_answer or "(no answer)"
     stats = format_stats(trace)
     result = {
         "question": message,
@@ -237,6 +332,7 @@ def chat(message: str, scratch_path: str, session_cost: float):
         "passed": True,
         "assertions": {},
         "trace": asdict(trace),
     }
     upload_trace(result)
     trace_html = render_trace(result, max_chars=2000)
@@ -245,7 +341,7 @@ def chat(message: str, scratch_path: str, session_cost: float):
     remaining_msg = f"\n\n---\n{stats}\n\n*{remaining} question{'s' if remaining != 1 else ''} remaining today*"
     yield (
-        f"{answer}{remaining_msg}",
         trace_html,
         scratch_path,
         session_cost,
@@ -488,7 +584,7 @@ def stream_question(question: str) -> Generator[str, None, None]:
         return
     scratch_dir = Path(tempfile.mkdtemp(prefix="lh-scratch-"))
-    system_prompt = build_system_prompt(base_prompt="", workspace=WORKSPACE_DIR)
     messages: list[Message] = [
         {"role": "system", "content": system_prompt},
         {"role": "user", "content": question},
@@ -520,19 +616,24 @@ def stream_question(question: str) -> Generator[str, None, None]:
     trace = agent_run.trace
     trace.wall_time_s = round(time.monotonic() - start, 2)
     result = {
         "question": question,
         "source": SOURCE,
         "passed": True,
         "assertions": {},
         "trace": asdict(trace),
     }
     upload_trace(result)
     trace_html = render_trace(result, max_chars=2000)
     yield json.dumps({
         "type": "done",
-        "answer": trace.answer or "",
         "stats": format_stats(trace),
         "trace_html": trace_html,
         "remaining": _remaining(),

 import json
 import os
+import re
 import tempfile
 import time
 from collections.abc import Generator
 SOURCE = "website"
+BASE_PROMPT = (
+    "You represent Charles Feinn and AppSimple. You have documents about his "
+    "professional background, services, projects, and capabilities. Use third person.\n\n"
+    "Do not speculate, manufacture connections to make a question fit, or answer "
+    "off-topic questions."
+)
 # ---------------------------------------------------------------------------
+# Global daily counter (initialized from trace repo on startup)
 # ---------------------------------------------------------------------------
+def _count_todays_traces() -> int:
+    """Count trace files uploaded today (UTC) from the HF dataset repo."""
+    if not hf_api or not HF_TRACES_REPO:
+        return 0
+    today_prefix = datetime.now(timezone.utc).strftime("%Y%m%d")
+    try:
+        files = hf_api.list_repo_files(repo_id=HF_TRACES_REPO, repo_type="dataset")
+        return sum(1 for f in files if f.startswith(today_prefix))
+    except Exception as exc:
+        print(f"WARNING: could not read trace count: {exc}")
+        return 0
+_daily_count = _count_todays_traces()
 _daily_date = date.today()
         print(f"WARNING: trace upload failed: {exc}")
+# ---------------------------------------------------------------------------
+# Citation processing
+# ---------------------------------------------------------------------------
+_CITATION_RE = re.compile(r'\[([^:\[\]]+):\s*"([^"]+)"\]')
+_SUPERSCRIPT_DIGITS = str.maketrans(
+    "0123456789", "\u2070\u00b9\u00b2\u00b3\u2074\u2075\u2076\u2077\u2078\u2079"
+)
+def _superscript(n: int) -> str:
+    return str(n).translate(_SUPERSCRIPT_DIGITS)
+def process_citations(
+    answer: str, workspace: Path | None
+) -> tuple[str, list[dict]]:
+    """Parse [filename: "quote"] citations, verify against workspace files."""
+    if not answer or not workspace:
+        return answer or "", []
+    sources: list[dict] = []
+    seen: dict[tuple[str, str], int] = {}
+    def _replace(match: re.Match) -> str:
+        filename = match.group(1).strip()
+        quote = match.group(2).strip()
+        key = (filename, quote)
+        if key in seen:
+            return _superscript(seen[key])
+        idx = len(sources) + 1
+        seen[key] = idx
+        matched = False
+        line = None
+        for candidate in [filename, f"{filename}.md"]:
+            filepath = workspace / candidate
+            if filepath.is_file():
+                try:
+                    text = filepath.read_text(errors="replace")
+                    pos = text.find(quote)
+                    if pos == -1:
+                        pos = text.lower().find(quote.lower())
+                    if pos >= 0:
+                        matched = True
+                        line = text[:pos].count("\n") + 1
+                        break
+                except OSError:
+                    pass
+        sources.append({
+            "id": idx,
+            "doc": filename.replace(".md", "").replace("_", " "),
+            "quote": quote,
+            "line": line,
+            "matched": matched,
+        })
+        return _superscript(idx)
+    clean_answer = _CITATION_RE.sub(_replace, answer)
+    return clean_answer, sources
 # ---------------------------------------------------------------------------
 # Stats formatting
 # ---------------------------------------------------------------------------
         scratch_path = tempfile.mkdtemp(prefix="lh-scratch-")
     scratch_dir = Path(scratch_path)
+    system_prompt = build_system_prompt(base_prompt=BASE_PROMPT, workspace=WORKSPACE_DIR)
     messages: list[Message] = [
         {"role": "system", "content": system_prompt},
         {"role": "user", "content": message},
     trace = agent_run.trace
     trace.wall_time_s = round(time.monotonic() - start, 2)
+    raw_answer = trace.answer or accumulated_answer or "(no answer)"
+    clean_answer, sources = process_citations(raw_answer, WORKSPACE_DIR)
+    if sources:
+        source_lines = "\n".join(
+            f"{_superscript(s['id'])} {s['doc']}: \"{s['quote']}\""
+            for s in sources
+        )
+        clean_answer += f"\n\n---\n{source_lines}"
     stats = format_stats(trace)
     result = {
         "question": message,
         "passed": True,
         "assertions": {},
         "trace": asdict(trace),
+        "citations": sources,
     }
     upload_trace(result)
     trace_html = render_trace(result, max_chars=2000)
     remaining_msg = f"\n\n---\n{stats}\n\n*{remaining} question{'s' if remaining != 1 else ''} remaining today*"
     yield (
+        f"{clean_answer}{remaining_msg}",
         trace_html,
         scratch_path,
         session_cost,
         return
     scratch_dir = Path(tempfile.mkdtemp(prefix="lh-scratch-"))
+    system_prompt = build_system_prompt(base_prompt=BASE_PROMPT, workspace=WORKSPACE_DIR)
     messages: list[Message] = [
         {"role": "system", "content": system_prompt},
         {"role": "user", "content": question},
     trace = agent_run.trace
     trace.wall_time_s = round(time.monotonic() - start, 2)
+    clean_answer, sources = process_citations(trace.answer or "", WORKSPACE_DIR)
     result = {
         "question": question,
         "source": SOURCE,
         "passed": True,
         "assertions": {},
         "trace": asdict(trace),
+        "citations": sources,
     }
     upload_trace(result)
     trace_html = render_trace(result, max_chars=2000)
     yield json.dumps({
         "type": "done",
+        "answer": clean_answer,
+        "sources": sources,
         "stats": format_stats(trace),
         "trace_html": trace_html,
         "remaining": _remaining(),