Spaces:
Sleeping
Sleeping
| from langchain_community.document_loaders import SeleniumURLLoader | |
| from shared import ResearchState | |
| # Limit content length to ~100,000 characters (β 32,000 tokens max) | |
| MAX_CHARS = 100_000 | |
| def load_node(state: ResearchState) -> dict: | |
| if not state.url: | |
| return {"content": "No URL to load"} | |
| loader = SeleniumURLLoader(urls=[str(state.url)]) | |
| docs = loader.load() | |
| content = docs[0].page_content if docs else "No content" | |
| # Truncate early to prevent overload later | |
| truncated_content = content[:MAX_CHARS] | |
| return {"content": truncated_content} |