Kheem Dharmani
Initial commit: Agentic Multiwriter
32f259e
raw
history blame contribute delete
868 Bytes
from __future__ import annotations
from typing import List, Dict
from ..state import ResearchSnippet
def normalize_snippets(snippets: List[ResearchSnippet]) -> List[ResearchSnippet]:
"""
Deduplicate snippets by URL+snippet text and trim length.
"""
seen: set[tuple[str, str]] = set()
normalized: List[ResearchSnippet] = []
for s in snippets:
key = (s["url"], s["snippet"])
if key in seen:
continue
seen.add(key)
snippet_text = s["snippet"].strip()
# Hard limit length for prompts
if len(snippet_text) > 600:
snippet_text = snippet_text[:600] + "..."
normalized.append(
{
"title": s["title"].strip(),
"url": s["url"].strip(),
"snippet": snippet_text,
}
)
return normalized