Spaces:

DevodG
/

Janus-backend

Running

App Files Files Community

Janus-backend / backend /app /services /external_sources.py

DevodG

deploy: Janus full system stabilization

24f95f0 about 1 month ago

raw

history blame contribute delete

26.7 kB

	from __future__ import annotations
	import re
	import time
	import logging
	import os
	from typing import List, Dict, Any, Optional
	import html
	from urllib.parse import quote_plus, urlparse
	from urllib.parse import parse_qs, unquote

	import httpx

	from app.config import (
	TAVILY_API_KEY,
	NEWSAPI_KEY,
	ALPHAVANTAGE_API_KEY,
	JINA_READER_BASE,
	)

	logger = logging.getLogger(__name__)

	# Module-level connection pool
	_http_pool = httpx.Client(
	timeout=30,
	limits=httpx.Limits(max_connections=10, max_keepalive_connections=5),
	)

	# Simple TTL cache for market quotes (5 min)
	_quote_cache: Dict[str, Dict[str, Any]] = {} # {symbol: {"data": ..., "ts": ...}}
	_QUOTE_TTL = 300 # seconds
	_deep_read_cache: Dict[str, Dict[str, Any]] = {}
	_DEEP_READ_TTL = 900
	_deep_search_cache: Dict[str, Dict[str, Any]] = {}
	_DEEP_SEARCH_TTL = 600

	URL_PATTERN = re.compile(r"https?://\S+")
	TICKER_PATTERN = re.compile(r"\$([A-Z]{1,5})\b")

	_GENERIC_TRUSTED_DOMAINS = {
	"reuters.com": 0.95,
	"apnews.com": 0.92,
	"bloomberg.com": 0.95,
	"ft.com": 0.92,
	"wsj.com": 0.92,
	"cnbc.com": 0.86,
	"marketwatch.com": 0.84,
	"investopedia.com": 0.8,
	"wikipedia.org": 0.72,
	"sec.gov": 1.0,
	"federalreserve.gov": 1.0,
	"treasury.gov": 1.0,
	"ecb.europa.eu": 1.0,
	"imf.org": 0.98,
	"worldbank.org": 0.98,
	"nvidia.com": 0.9,
	"investor.nvidia.com": 0.97,
	}

	_LOWER_CONFIDENCE_DOMAINS = {
	"substack.com": 0.58,
	"blogspot.com": 0.45,
	"medium.com": 0.55,
	"dev.to": 0.5,
	}

	_BROWSER_FAVOR_DOMAINS = {
	"cnbc.com",
	"finance.yahoo.com",
	"seekingalpha.com",
	"futurumgroup.com",
	"marketwatch.com",
	}


	def extract_urls(text: str) -> List[str]:
	return URL_PATTERN.findall(text or "")


	def extract_ticker(text: str) -> Optional[str]:
	match = TICKER_PATTERN.search(text or "")
	if match:
	return match.group(1)
	return None


	def jina_read(url: str) -> str:
	try:
	target = url.replace("https://", "").replace("http://", "")
	full_url = f"{JINA_READER_BASE}{target}"
	with httpx.Client(timeout=30) as client:
	response = client.get(full_url)
	if response.status_code >= 400:
	return ""
	return response.text[:4000]
	except Exception:
	return ""


	def duckduckgo_search(query: str, max_results: int = 5) -> List[Dict[str, Any]]:
	results: List[Dict[str, Any]] = []
	try:
	search_url = f"https://html.duckduckgo.com/html/?q={quote_plus(query)}"
	headers = {
	"User-Agent": "Mozilla/5.0 (compatible; Janus/1.0; +https://janus.local)"
	}
	response = _http_pool.get(search_url, headers=headers, follow_redirects=True)
	if response.status_code >= 400:
	return []

	pattern = re.compile(
	r'class="result__a"[^>]href="([^"]+)"[^>]>(.*?)</a>', re.IGNORECASE
	)
	seen = set()
	for match in pattern.finditer(response.text):
	raw_url = html.unescape(match.group(1) or "")
	url = raw_url
	if raw_url.startswith("//duckduckgo.com/l/?"):
	parsed = urlparse(f"https:{raw_url}")
	uddg = parse_qs(parsed.query).get("uddg", [""])[0]
	url = unquote(uddg) if uddg else ""
	title = re.sub(r"<[^>]+>", "", match.group(2)).strip()
	if not url or not url.startswith("http") or url in seen:
	continue
	seen.add(url)
	results.append({"title": title or url, "url": url, "source": "duckduckgo"})
	if len(results) >= max_results:
	break
	except Exception as e:
	logger.warning(f"DuckDuckGo search error: {e}")
	return results


	def _direct_page_extract(url: str) -> str:
	try:
	headers = {"User-Agent": "Mozilla/5.0 (compatible; Janus/1.0)"}
	response = _http_pool.get(url, headers=headers, follow_redirects=True)
	if response.status_code >= 400:
	return ""
	text = re.sub(r"<script[^>]>.?</script>", "", response.text, flags=re.DOTALL)
	text = re.sub(r"<style[^>]>.?</style>", "", text, flags=re.DOTALL)
	text = re.sub(r"<[^>]+>", " ", text)
	text = re.sub(r"\s+", " ", text).strip()
	return text[:8000]
	except Exception:
	return ""


	def _sanitize_extracted_text(text: str, limit: int = 8000) -> str:
	cleaned = str(text or "")
	noise_patterns = [
	r"Title:\s*",
	r"URL Source:\s*[^\n]+",
	r"Published Time:\s*[^\n]+",
	r"Markdown Content:\s*",
	r"\bSkip Navigation\b",
	r"\bLivestream\b",
	r"\bMenu\b",
	r"\bSearch\b",
	r"\bSign In\b",
	r"\bSubscribe\b",
	r"\[Image [^\]]+\]",
	]
	for pattern in noise_patterns:
	cleaned = re.sub(pattern, " ", cleaned, flags=re.IGNORECASE)
	cleaned = re.sub(r"!\[[^\]]*\]$[^)]+$", " ", cleaned)
	cleaned = re.sub(r"\[([^\]]+)\]$[^)]+$", r"\1", cleaned)
	cleaned = re.sub(r"[#*_`>-]+", " ", cleaned)
	cleaned = re.sub(r"\n{3,}", "\n\n", cleaned)
	cleaned = re.sub(r"\s+", " ", cleaned).strip()
	return cleaned[:limit]


	def _best_excerpt(text: str, limit: int = 280) -> str:
	cleaned = _sanitize_extracted_text(text, limit=3000)
	if not cleaned:
	return ""

	sentences = re.split(r"(?<=[.!?])\s+", cleaned)
	for sentence in sentences:
	candidate = sentence.strip().lstrip("\|:- ")
	if len(candidate) < 60:
	continue
	lowered = candidate.lower()
	if any(
	token in lowered
	for token in [
	"cookie",
	"subscribe",
	"sign in",
	"javascript",
	"skip to main content",
	"accessibility",
	"stock advisor",
	"join the motley fool",
	]
	):
	continue
	return candidate[:limit]

	return cleaned[:limit]


	def crawler_read(url: str) -> Dict[str, Any]:
	try:
	from app.services.crawler import JanusCrawler

	crawler = JanusCrawler()
	result = crawler.crawl_sync(url)
	if result.success:
	return {
	"title": result.title or url,
	"content": _sanitize_extracted_text(result.markdown, limit=12000),
	"links": result.links[:20],
	"metadata": result.metadata,
	"source": "janus_crawler",
	}
	except Exception as e:
	logger.debug(f"Janus crawler unavailable for {url}: {e}")
	return {}


	def _base_domain(domain: str) -> str:
	parts = [part for part in (domain or "").lower().split(".") if part]
	if len(parts) >= 2:
	return ".".join(parts[-2:])
	return domain.lower()


	def score_source_credibility(url: str, metadata: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
	metadata = metadata or {}
	parsed = urlparse(url)
	domain = parsed.netloc.lower().replace("www.", "")
	root = _base_domain(domain)

	try:
	from app.domain_packs.finance.source_checker import check_source_credibility

	finance_assessment = check_source_credibility(url)
	if finance_assessment.get("reason") == "known_trusted_source":
	return {
	"score": finance_assessment.get("credibility_score", 0.5),
	"reason": finance_assessment.get("reason", "known_trusted_source"),
	"domain": finance_assessment.get("domain", domain),
	}
	except Exception:
	pass

	if domain in _GENERIC_TRUSTED_DOMAINS or root in _GENERIC_TRUSTED_DOMAINS:
	return {
	"score": _GENERIC_TRUSTED_DOMAINS.get(domain, _GENERIC_TRUSTED_DOMAINS.get(root, 0.9)),
	"reason": "trusted_domain",
	"domain": domain,
	}

	if domain.endswith(".gov") or domain.endswith(".edu"):
	return {"score": 0.96, "reason": "government_or_education", "domain": domain}

	if domain in _LOWER_CONFIDENCE_DOMAINS or root in _LOWER_CONFIDENCE_DOMAINS:
	return {
	"score": _LOWER_CONFIDENCE_DOMAINS.get(domain, _LOWER_CONFIDENCE_DOMAINS.get(root, 0.5)),
	"reason": "opinion_or_blog_domain",
	"domain": domain,
	}

	score = 0.58
	reason = "unknown_domain"
	path = parsed.path.lower()
	if any(token in path for token in ["/news", "/press", "/press-release", "/investor", "/earnings"]):
	score += 0.08
	reason = "official_or_news_path"
	if metadata.get("published_at"):
	score += 0.04
	if metadata.get("author"):
	score += 0.02
	return {"score": min(score, 0.9), "reason": reason, "domain": domain}


	def plan_deep_research(query: str, max_results: int = 5, follow_links: int = 1) -> Dict[str, Any]:
	query_lower = (query or "").lower()
	deep_terms = [
	"across the web",
	"deep web",
	"research",
	"investigate",
	"compare",
	"earnings",
	"guidance",
	"supply chain",
	"data center",
	"competition",
	"regulation",
	"policy",
	]
	score = sum(1 for term in deep_terms if term in query_lower)
	word_count = len(re.findall(r"[a-z0-9_]+", query_lower))
	if word_count > 10:
	score += 1

	planned_results = max_results
	planned_hops = follow_links
	if score >= 3:
	planned_results = max(max_results, 6)
	planned_hops = max(follow_links, 2)
	elif score >= 1:
	planned_results = max(max_results, 4)
	planned_hops = max(follow_links, 1)

	if os.getenv("JANUS_PREFER_BROWSER_CRAWL", "false").lower() == "true":
	planned_results = min(planned_results, 4)
	planned_hops = min(planned_hops, 1)

	return {
	"max_results": min(planned_results, 8),
	"follow_links": min(planned_hops, 2),
	}


	def expand_research_queries(query: str, max_variants: int = 4) -> List[str]:
	query = (query or "").strip()
	if not query:
	return []

	if os.getenv("JANUS_PREFER_BROWSER_CRAWL", "false").lower() == "true":
	max_variants = min(max_variants, 2)

	variants = [query]
	lower = query.lower()

	if any(term in lower for term in ["earnings", "guidance", "revenue", "margin"]):
	variants.extend(
	[
	f"{query} investor relations",
	f"{query} official results",
	f"{query} analyst reaction",
	]
	)
	elif any(term in lower for term in ["stock", "company", "market", "demand", "supply", "ai"]):
	variants.extend(
	[
	f"{query} official source",
	f"{query} latest analysis",
	f"{query} news and filings",
	]
	)
	else:
	variants.extend(
	[
	f"{query} official source",
	f"{query} latest developments",
	]
	)

	deduped = []
	seen = set()
	for item in variants:
	normalized = item.strip().lower()
	if normalized and normalized not in seen:
	seen.add(normalized)
	deduped.append(item.strip())
	if len(deduped) >= max_variants:
	break
	return deduped


	def _should_prefer_browser(url: str, query: str = "") -> bool:
	if os.getenv("JANUS_PREFER_BROWSER_CRAWL", "false").lower() == "true":
	return True
	parsed = urlparse(url)
	domain = parsed.netloc.lower().replace("www.", "")
	root = _base_domain(domain)
	if domain in _BROWSER_FAVOR_DOMAINS or root in _BROWSER_FAVOR_DOMAINS:
	return True
	query_lower = (query or "").lower()
	return any(term in query_lower for term in ["headline", "latest", "live", "filing", "press release"])


	def _choose_follow_links(
	links: List[str], base_url: str, query: str, limit: int = 1
	) -> List[str]:
	query_words = {
	token for token in re.findall(r"[a-z0-9_]+", query.lower()) if len(token) >= 4
	}
	base_netloc = urlparse(base_url).netloc
	candidates = []
	seen = set()
	for link in links:
	parsed = urlparse(link)
	if parsed.scheme not in {"http", "https"}:
	continue
	if not parsed.netloc or parsed.netloc != base_netloc:
	continue
	if link in seen:
	continue
	seen.add(link)
	score = sum(1 for word in query_words if word in link.lower())
	if any(token in link.lower() for token in ["earnings", "results", "revenue", "guidance", "ai", "data-center", "investor", "quarter"]):
	score += 2
	candidates.append((score, link))
	candidates.sort(key=lambda item: item[0], reverse=True)
	return [link for _, link in candidates[:limit]]


	def _follow_relevant_links(
	seed_url: str,
	links: List[str],
	query: str,
	remaining_hops: int,
	) -> List[Dict[str, Any]]:
	if remaining_hops <= 0 or not links:
	return []

	reads = []
	for link in _choose_follow_links(links, seed_url, query, limit=1):
	nested = deep_read_url(link, follow_links=0, query=query)
	if not nested:
	continue
	credibility = score_source_credibility(link, nested.get("metadata", {}))
	nested["credibility"] = credibility
	nested["domain"] = credibility.get("domain")
	reads.append(
	{
	"url": link,
	"title": nested.get("title") or link,
	"content": nested.get("content", "")[:2500],
	"credibility_score": credibility.get("score", 0.5),
	"credibility_reason": credibility.get("reason", "unknown_domain"),
	}
	)
	if remaining_hops > 1 and nested.get("related_reads"):
	reads.extend(nested.get("related_reads", [])[:1])
	return reads


	def deep_read_url(url: str, follow_links: int = 0, query: str = "") -> Dict[str, Any]:
	cache_key = f"{url}\|{follow_links}\|{query[:120]}\|{os.getenv('JANUS_PREFER_BROWSER_CRAWL', 'false')}"
	cached = _deep_read_cache.get(cache_key)
	if cached and (time.time() - cached["ts"]) < _DEEP_READ_TTL:
	return cached["data"]

	prefer_browser = _should_prefer_browser(url, query)
	content = ""
	source = ""
	links: List[str] = []
	metadata: Dict[str, Any] = {}
	title = url

	if prefer_browser:
	crawled = crawler_read(url)
	if crawled:
	content = crawled.get("content", "")
	links = crawled.get("links", [])
	metadata = crawled.get("metadata", {})
	title = crawled.get("title") or title
	source = crawled.get("source", "crawler")

	if not content:
	content = jina_read(url)
	source = "jina_reader" if content else source

	if not content:
	crawled = crawler_read(url)
	if crawled:
	content = crawled.get("content", "")
	links = crawled.get("links", [])
	metadata = crawled.get("metadata", {})
	title = crawled.get("title") or title
	source = crawled.get("source", "crawler")

	if not content:
	content = _direct_page_extract(url)
	source = "direct_fetch" if content else source

	if not content:
	return {}

	related_reads = []
	if follow_links > 0 and links:
	related_reads = _follow_relevant_links(url, links, query, remaining_hops=follow_links)

	credibility = score_source_credibility(url, metadata)

	result = {
	"title": metadata.get("title") or title,
	"url": url,
	"content": _sanitize_extracted_text(content, limit=8000),
	"source": source,
	"metadata": metadata,
	"domain": credibility.get("domain"),
	"credibility": credibility,
	"related_reads": related_reads,
	}
	_deep_read_cache[cache_key] = {"data": result, "ts": time.time()}
	return result


	def deep_web_search(
	query: str, max_results: int = 5, follow_links: int = 1
	) -> List[Dict[str, Any]]:
	cache_key = f"{query}\|{max_results}\|{follow_links}\|{os.getenv('JANUS_PREFER_BROWSER_CRAWL', 'false')}"
	cached = _deep_search_cache.get(cache_key)
	if cached and (time.time() - cached["ts"]) < _DEEP_SEARCH_TTL:
	return cached["data"]

	plan = plan_deep_research(query, max_results=max_results, follow_links=follow_links)
	max_results = plan["max_results"]
	follow_links = plan["follow_links"]
	started_at = time.time()
	candidates: List[Dict[str, Any]] = []

	tavily_results = tavily_search(query, max_results=max_results)
	for item in tavily_results:
	candidates.append(
	{
	"title": item.get("title", item.get("url", "")),
	"url": item.get("url", ""),
	"snippet": item.get("content", ""),
	"source": "tavily",
	}
	)

	ddg_results = duckduckgo_search(query, max_results=max_results)
	candidates.extend(ddg_results)

	seen_urls = set()
	merged = []
	for item in candidates:
	url = item.get("url", "")
	if not url or url in seen_urls:
	continue
	seen_urls.add(url)
	merged.append(item)
	if len(merged) >= max_results:
	break

	enriched = []
	for item in merged:
	if time.time() - started_at > 20:
	break
	deep = deep_read_url(item.get("url", ""), follow_links=follow_links, query=query)
	if not deep:
	continue
	enriched.append(
	{
	"title": deep.get("title") or item.get("title", "Untitled"),
	"url": item.get("url", ""),
	"content": deep.get("content", "") or item.get("snippet", ""),
	"source": item.get("source", deep.get("source", "web")),
	"domain": deep.get("domain"),
	"credibility_score": (deep.get("credibility") or {}).get("score", 0.5),
	"credibility_reason": (deep.get("credibility") or {}).get("reason", "unknown_domain"),
	"metadata": deep.get("metadata", {}),
	"related_reads": deep.get("related_reads", []),
	}
	)
	enriched.sort(
	key=lambda item: (
	item.get("credibility_score", 0.0),
	len(item.get("content", "")),
	),
	reverse=True,
	)
	result = enriched[:max_results]
	_deep_search_cache[cache_key] = {"data": result, "ts": time.time()}
	return result


	def deep_web_research_bundle(
	query: str,
	max_results: int = 6,
	follow_links: int = 1,
	max_variants: int = 4,
	) -> Dict[str, Any]:
	plan = plan_deep_research(query, max_results=max_results, follow_links=follow_links)
	variants = expand_research_queries(query, max_variants=max_variants)

	merged: Dict[str, Dict[str, Any]] = {}
	started_at = time.time()
	for variant in variants:
	if time.time() - started_at > 30:
	break
	results = deep_web_search(
	variant,
	max_results=max(3, min(plan["max_results"], max_results)),
	follow_links=plan["follow_links"],
	)
	for item in results:
	url = item.get("url", "")
	if not url:
	continue
	current = merged.get(url)
	candidate = {**item, "query_variant": variant}
	if current is None or candidate.get("credibility_score", 0.0) > current.get(
	"credibility_score", 0.0
	):
	merged[url] = candidate

	ranked = sorted(
	merged.values(),
	key=lambda item: (
	item.get("credibility_score", 0.0),
	len(item.get("content", "")),
	),
	reverse=True,
	)
	if ranked and ranked[0].get("credibility_score", 0.0) >= 0.85 and len(ranked) >= 1:
	if os.getenv("JANUS_PREFER_BROWSER_CRAWL", "false").lower() == "true":
	break

	results = sorted(
	merged.values(),
	key=lambda item: (
	item.get("credibility_score", 0.0),
	len(item.get("content", "")),
	),
	reverse=True,
	)[:max_results]
	synthesis = synthesize_deep_web_results(query, results)
	return {
	"query": query,
	"query_variants": variants,
	"results": results,
	"synthesis": synthesis,
	}


	def synthesize_deep_web_results(query: str, results: List[Dict[str, Any]]) -> Dict[str, Any]:
	if not results:
	return {
	"summary": "No credible deep-web results retrieved.",
	"top_sources": [],
	"key_points": [],
	"avg_credibility": 0.0,
	}

	top_sources = [
	{
	"title": item.get("title"),
	"url": item.get("url"),
	"domain": item.get("domain"),
	"credibility_score": item.get("credibility_score", 0.0),
	"credibility_reason": item.get("credibility_reason", "unknown"),
	}
	for item in results[:4]
	]
	avg_credibility = sum(item.get("credibility_score", 0.0) for item in results[: min(len(results), 5)]) / max(min(len(results), 5), 1)

	key_points = []
	for item in results[:3]:
	point = _best_excerpt(item.get("content", ""), limit=280)
	if point:
	key_points.append(
	{
	"point": point,
	"source": item.get("title") or item.get("domain"),
	"credibility_score": item.get("credibility_score", 0.0),
	}
	)

	source_lines = "; ".join(
	f"{item.get('title', item.get('domain', 'source'))} [{item.get('credibility_score', 0.0):.2f}]"
	for item in top_sources[:3]
	)
	summary = (
	f"Janus reviewed {len(results)} deep public-web results for '{query}'. "
	f"Highest-ranked sources: {source_lines}. "
	f"Average credibility of the top results: {avg_credibility:.2f}."
	)

	return {
	"summary": summary,
	"top_sources": top_sources,
	"key_points": key_points,
	"avg_credibility": round(avg_credibility, 3),
	}


	def tavily_search(query: str, max_results: int = 5) -> List[Dict[str, Any]]:
	if not TAVILY_API_KEY:
	return []

	try:
	payload = {
	"api_key": TAVILY_API_KEY,
	"query": query,
	"search_depth": "basic",
	"max_results": max_results,
	}
	response = _http_pool.post("https://api.tavily.com/search", json=payload)
	if response.status_code >= 400:
	logger.warning(f"Tavily search returned {response.status_code}")
	return []
	data = response.json()
	return data.get("results", [])
	except Exception as e:
	logger.error(f"Tavily search error: {e}")
	return []


	def news_search(query: str, page_size: int = 5) -> List[Dict[str, Any]]:
	if not NEWSAPI_KEY:
	return []

	try:
	params = {
	"q": query,
	"pageSize": page_size,
	"language": "en",
	"sortBy": "publishedAt",
	"apiKey": NEWSAPI_KEY,
	}
	response = _http_pool.get("https://newsapi.org/v2/everything", params=params)
	if response.status_code >= 400:
	logger.warning(f"NewsAPI returned {response.status_code}")
	return []
	data = response.json()
	return data.get("articles", [])
	except Exception as e:
	logger.error(f"NewsAPI error: {e}")
	return []


	def market_quote(symbol: str) -> Dict[str, Any]:
	if not ALPHAVANTAGE_API_KEY or not symbol:
	return {}

	# Check cache first
	cached = _quote_cache.get(symbol)
	if cached and (time.time() - cached["ts"]) < _QUOTE_TTL:
	logger.debug(f"Market quote cache hit: {symbol}")
	return cached["data"]

	try:
	params = {
	"function": "GLOBAL_QUOTE",
	"symbol": symbol,
	"apikey": ALPHAVANTAGE_API_KEY,
	}
	response = _http_pool.get("https://www.alphavantage.co/query", params=params)
	if response.status_code >= 400:
	logger.warning(f"Alpha Vantage returned {response.status_code}")
	return {}
	data = response.json()
	quote = data.get("Global Quote", {})

	# Cache the result
	_quote_cache[symbol] = {"data": quote, "ts": time.time()}

	return quote
	except Exception as e:
	logger.error(f"Alpha Vantage error: {e}")
	return {}


	def build_external_context(user_input: str) -> str:
	chunks: List[str] = []

	urls = extract_urls(user_input)
	for url in urls[:2]:
	content = jina_read(url)
	if content:
	chunks.append(f"[Jina Reader for {url}]\n{content}")

	search_results = tavily_search(user_input, max_results=4)
	if search_results:
	formatted = []
	for item in search_results[:4]:
	formatted.append(
	f"- {item.get('title', 'Untitled')}\n {item.get('url', '')}\n {item.get('content', '')[:300]}"
	)
	chunks.append("[Tavily Search]\n" + "\n".join(formatted))

	deep_results = deep_web_search(user_input, max_results=3, follow_links=1)
	if deep_results:
	synthesized = synthesize_deep_web_results(user_input, deep_results)
	chunks.append(f"[Deep Web Brief]\n{synthesized.get('summary', '')}")
	formatted = []
	for item in deep_results[:3]:
	related = item.get("related_reads", [])
	related_text = ""
	if related:
	related_text = "\n Related: " + " \| ".join(
	f"{r.get('url', '')} [{r.get('credibility_score', 0.0):.2f}]: {str(r.get('content', ''))[:140]}" for r in related[:1]
	)
	formatted.append(
	f"- {item.get('title', 'Untitled')}\n {item.get('url', '')}\n Credibility: {item.get('credibility_score', 0.0):.2f} ({item.get('credibility_reason', 'unknown')})\n {str(item.get('content', ''))[:500]}{related_text}"
	)
	chunks.append("[Deep Web Search]\n" + "\n".join(formatted))

	articles = news_search(user_input, page_size=4)
	if articles:
	formatted = []
	for item in articles[:4]:
	formatted.append(
	f"- {item.get('title', 'Untitled')}\n {item.get('url', '')}\n {str(item.get('description', ''))[:300]}"
	)
	chunks.append("[NewsAPI]\n" + "\n".join(formatted))

	ticker = extract_ticker(user_input)
	if ticker:
	quote = market_quote(ticker)
	if quote:
	chunks.append(f"[Alpha Vantage Quote for {ticker}]\n{quote}")

	if not chunks:
	return "No external API context available."

	return "\n\n".join(chunks)