| | """ |
| | URL Validator Tool — Extracts and validates URLs found in text. |
| | |
| | Assigned To: Safety Guardian agent ONLY |
| | Reference: system_design.md — Tool 5 (Lines 543-577) |
| | Reference: engineering_guardrails.md — §2 Tool-Call Argument Validation |
| | |
| | Key guardrails: |
| | - Extracts all URLs, checks against known malicious domain patterns |
| | - Flags shortened URLs (bit.ly, tinyurl) and data: URIs |
| | - Caps at 50 URLs |
| | - Returns error STRINGS, never raises exceptions |
| | """ |
| |
|
| | import re |
| | import json |
| | from crewai.tools import tool |
| |
|
| |
|
| | @tool |
| | def url_validator_tool(text: str = "") -> str: |
| | """Extract and validate URLs found in text. Checks against blocklist of |
| | suspicious URL patterns. Returns JSON with URL analysis. Pass the text to scan as the 'text' argument.""" |
| |
|
| | |
| | if not text or not isinstance(text, str): |
| | return json.dumps({ |
| | "total_urls": 0, "checked": 0, |
| | "malicious_urls": [], "is_safe": True, |
| | "error": "Empty or invalid input" |
| | }) |
| |
|
| | if len(text.strip()) == 0: |
| | return json.dumps({ |
| | "total_urls": 0, "checked": 0, |
| | "malicious_urls": [], "is_safe": True, |
| | "error": "Empty text provided" |
| | }) |
| |
|
| | |
| | try: |
| | url_pattern = r'https?://[^\s<>"\')\]]+' |
| | urls = re.findall(url_pattern, text) |
| |
|
| | suspicious_indicators = [ |
| | "bit.ly", "tinyurl", "t.co", "goo.gl", |
| | "data:", "javascript:", "file://", |
| | "malware", "phishing", |
| | ] |
| |
|
| | results = [] |
| | malicious = [] |
| |
|
| | for url in urls[:50]: |
| | is_suspicious = any(ind in url.lower() for ind in suspicious_indicators) |
| | results.append({"url": url, "suspicious": is_suspicious}) |
| | if is_suspicious: |
| | malicious.append(url) |
| |
|
| | return json.dumps({ |
| | "total_urls": len(urls), |
| | "checked": len(results), |
| | "malicious_urls": malicious, |
| | "is_safe": len(malicious) == 0, |
| | }) |
| |
|
| | except Exception as e: |
| | return json.dumps({ |
| | "total_urls": 0, "checked": 0, |
| | "malicious_urls": [], "is_safe": False, |
| | "error": f"URL validation failed: {type(e).__name__}: {str(e)}" |
| | }) |
| |
|