from __future__ import annotations from typing import List, Dict from ..state import ResearchSnippet def normalize_snippets(snippets: List[ResearchSnippet]) -> List[ResearchSnippet]: """ Deduplicate snippets by URL+snippet text and trim length. """ seen: set[tuple[str, str]] = set() normalized: List[ResearchSnippet] = [] for s in snippets: key = (s["url"], s["snippet"]) if key in seen: continue seen.add(key) snippet_text = s["snippet"].strip() # Hard limit length for prompts if len(snippet_text) > 600: snippet_text = snippet_text[:600] + "..." normalized.append( { "title": s["title"].strip(), "url": s["url"].strip(), "snippet": snippet_text, } ) return normalized