""" URL Validator Tool โ€” Extracts and validates URLs found in text. Assigned To: Safety Guardian agent ONLY Reference: system_design.md โ€” Tool 5 (Lines 543-577) Reference: engineering_guardrails.md โ€” ยง2 Tool-Call Argument Validation Key guardrails: - Extracts all URLs, checks against known malicious domain patterns - Flags shortened URLs (bit.ly, tinyurl) and data: URIs - Caps at 50 URLs - Returns error STRINGS, never raises exceptions """ import re import json from crewai.tools import tool @tool def url_validator_tool(text: str = "") -> str: """Extract and validate URLs found in text. Checks against blocklist of suspicious URL patterns. Returns JSON with URL analysis. Pass the text to scan as the 'text' argument.""" # === INPUT VALIDATION === if not text or not isinstance(text, str): return json.dumps({ "total_urls": 0, "checked": 0, "malicious_urls": [], "is_safe": True, "error": "Empty or invalid input" }) if len(text.strip()) == 0: return json.dumps({ "total_urls": 0, "checked": 0, "malicious_urls": [], "is_safe": True, "error": "Empty text provided" }) # === URL EXTRACTION AND VALIDATION === try: url_pattern = r'https?://[^\s<>"\')\]]+' urls = re.findall(url_pattern, text) suspicious_indicators = [ "bit.ly", "tinyurl", "t.co", "goo.gl", "data:", "javascript:", "file://", "malware", "phishing", ] results = [] malicious = [] for url in urls[:50]: # Cap at 50 URLs is_suspicious = any(ind in url.lower() for ind in suspicious_indicators) results.append({"url": url, "suspicious": is_suspicious}) if is_suspicious: malicious.append(url) return json.dumps({ "total_urls": len(urls), "checked": len(results), "malicious_urls": malicious, "is_safe": len(malicious) == 0, }) except Exception as e: return json.dumps({ "total_urls": 0, "checked": 0, "malicious_urls": [], "is_safe": False, "error": f"URL validation failed: {type(e).__name__}: {str(e)}" })