File size: 6,854 Bytes
3ebaeb6 5e159ec 3ebaeb6 5e159ec 3ebaeb6 5e159ec 3ebaeb6 5e159ec 3ebaeb6 5e159ec 3ebaeb6 5e159ec 3ebaeb6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 | """Wiki compiler — uses Claude to integrate raw sources into structured wiki articles."""
import json
import datetime
import anthropic
COMPILE_SYSTEM_PROMPT = """You are a clinical knowledge wiki curator for the Nursing Citizen Development Organisation.
Your job is to integrate new source material into an existing nursing knowledge base (wiki).
The wiki is a collection of markdown articles organised by category. Each article has:
- A title, category, tags, and backlinks to other articles
- Substantive clinical content aligned with NMC Standards of Proficiency (2018), UK law, and NHS frameworks
When given new source material, you must:
1. Identify key nursing concepts, frameworks, guidelines, or clinical information in the source
2. Decide which existing articles should be UPDATED with new information
3. Identify any new articles that should be CREATED for concepts not yet covered
4. Integrate the information accurately and clinically appropriately
5. Add/update backlinks between related articles
6. Always cite the source in any updated/created articles
Return a JSON object with this structure:
{
"summary": "Brief summary of what was integrated and why",
"articles_updated": [
{
"slug": "article_slug",
"title": "Article Title",
"category": "category_name",
"tags": ["tag1", "tag2"],
"content": "Full markdown content of the updated article"
}
],
"articles_created": [
{
"slug": "new_slug",
"title": "New Article Title",
"category": "category_name",
"tags": ["tag1", "tag2"],
"content": "Full markdown content of the new article"
}
],
"index_updates": "Updated one-line entries for the index (markdown format)",
"log_entry": "Log entry text for this compilation"
}
Categories to use: standards, clinical, pharmacology, evidence, frameworks, safety, law, mental_health, research, ethics
Clinical content must:
- Be accurate and evidence-based
- Include NMC proficiency mappings where relevant
- Include UK-specific references (NICE, NMC, NHS, BNF)
- Include the disclaimer: "This tool supports but does not replace clinical judgment."
- Use UK spellings (organisation, anaesthesia, etc.)
"""
CHUNK_SIZE = 7000 # chars per chunk for large documents
def _chunk_text(text: str, chunk_size: int = CHUNK_SIZE) -> list[str]:
"""Split text into chunks at paragraph boundaries."""
if len(text) <= chunk_size:
return [text]
chunks = []
paragraphs = text.split("\n\n")
current = []
current_len = 0
for para in paragraphs:
if current_len + len(para) > chunk_size and current:
chunks.append("\n\n".join(current))
current = [para]
current_len = len(para)
else:
current.append(para)
current_len += len(para)
if current:
chunks.append("\n\n".join(current))
return chunks
def compile_source(client: anthropic.Anthropic, source_title: str, source_content: str,
existing_index: str, existing_articles: dict, model: str = "claude-sonnet-4-6") -> dict:
"""
Integrate a new source into the wiki.
Large documents are automatically split into chunks and compiled sequentially,
with the wiki state updated between chunks so each pass builds on the last.
Returns a merged dict with all updated/created articles and metadata.
"""
chunks = _chunk_text(source_content)
total_chunks = len(chunks)
merged: dict = {"articles_updated": [], "articles_created": [], "summary": "", "index_updates": "", "log_entry": ""}
for chunk_num, chunk in enumerate(chunks, 1):
chunk_label = f"{source_title} (part {chunk_num}/{total_chunks})" if total_chunks > 1 else source_title
# Build context from current article state (updates between chunks)
articles_context = ""
if existing_articles:
for slug, art in list(existing_articles.items())[:8]:
preview = art["content"][:400].replace("\n", " ")
articles_context += f"\n- **{art['title']}** ({art['category']}): {preview}...\n"
user_prompt = f"""## Existing Wiki Index
{existing_index}
## Sample of Existing Articles (previews)
{articles_context}
## New Source to Integrate
**Title**: {chunk_label}
{"**(Large document — this is chunk " + str(chunk_num) + " of " + str(total_chunks) + ")**" if total_chunks > 1 else ""}
**Content**:
{chunk}
Please integrate this source into the wiki. Return valid JSON only, no markdown code fences."""
response = client.messages.create(
model=model,
max_tokens=4096,
system=COMPILE_SYSTEM_PROMPT,
messages=[{"role": "user", "content": user_prompt}],
)
raw = response.content[0].text.strip()
if raw.startswith("```"):
raw = raw.split("\n", 1)[1]
if raw.endswith("```"):
raw = raw.rsplit("```", 1)[0]
result = json.loads(raw)
# Merge chunk results
today = datetime.date.today().isoformat()
for art in result.get("articles_updated", []) + result.get("articles_created", []):
art["last_updated"] = today
art["sources"] = art.get("sources", []) + [source_title]
# Apply to existing_articles so next chunk sees current state
existing_articles[art["slug"]] = art
merged["articles_updated"].extend(result.get("articles_updated", []))
merged["articles_created"].extend(result.get("articles_created", []))
if result.get("summary"):
merged["summary"] += f"[Part {chunk_num}] {result['summary']} "
if result.get("log_entry"):
merged["log_entry"] = result["log_entry"]
# Deduplicate by slug (keep last version)
seen: dict = {}
for art in merged["articles_updated"] + merged["articles_created"]:
seen[art["slug"]] = art
merged["articles_updated"] = list(seen.values())
merged["articles_created"] = []
return merged
def rebuild_index(client: anthropic.Anthropic, articles: dict, model: str = "claude-sonnet-4-6") -> str:
"""Regenerate the wiki index from all articles."""
article_list = []
for slug, art in articles.items():
article_list.append(f"- **{art['title']}** ({art['category']}): {', '.join(art.get('tags', []))}")
prompt = f"""Regenerate a well-organised wiki index for these nursing knowledge articles.
Group them by category. Each entry should be a one-line summary.
Format as markdown with category headers (##).
Articles:
{chr(10).join(article_list)}
Return only the markdown index content."""
response = client.messages.create(
model=model,
max_tokens=2048,
messages=[{"role": "user", "content": prompt}],
)
return response.content[0].text.strip()
|