Rwkv-xd

Sleeping

App Files Files Community

Rwkv-xd / utils.py

Ksjsjjdj

Upload 4 files

1d08aca verified 2 months ago

raw

history blame contribute delete

41.7 kB

	import re, os, threading, queue, requests, time
	from typing import List, Optional, Union
	from pydantic import BaseModel, Field
	from pydantic_settings import BaseSettings

	from api_types import ChatMessage


	def parse_think_response(full_response: str):
	think_start = full_response.find("<think")
	if think_start == -1:
	return None, full_response.strip()

	think_end = full_response.find("</think>")
	if think_end == -1: # 未闭合的情况
	reasoning = full_response[think_start:].strip()
	content = ""
	else:
	reasoning = full_response[think_start : think_end + 9].strip() # +9包含完整标签
	content = full_response[think_end + 9 :].strip()

	# 清理标签保留内容
	reasoning_content = reasoning.replace("<think", "").replace("</think>", "").strip()
	return reasoning_content, content


	def cleanMessages(messages: List[ChatMessage], removeThinkingContent: bool = False):
	promptStrList = []

	for message in messages:
	content = message.content.strip()
	content = re.sub(r"\n+", "\n", content)
	promptStrList.append(
	f"{message.role.strip().lower().capitalize()}: {content if message.role.strip().lower().capitalize()!='Assistant' or not removeThinkingContent else remove_nested_think_tags_stack(content)}"
	)

	return "\n\n".join(promptStrList)


	def remove_nested_think_tags_stack(text):
	stack = []
	result = ""
	i = 0
	while i < len(text):
	if text[i : i + 7] == "<think>":
	stack.append("<think>")
	i += 7
	elif text[i : i + 8] == "</think>":
	if stack and stack[-1] == "<think>":
	stack.pop()
	i += 8
	else:
	result += text[i : i + 8]
	i += 8
	elif not stack:
	result += text[i]
	i += 1
	else:
	i += 1
	return result


	def format_bytes(size):
	power = 2**10
	n = 0
	power_labels = {0: "", 1: "K", 2: "M", 3: "G", 4: "T"}
	while size > power:
	size /= power
	n += 1
	return f"{size:.4f}{power_labels[n]+'B'}"


	LOGGER_QUEUE = queue.Queue(int(os.environ.get('LOGGER_QUEUE_SIZE', 100)))


	def logger():
	"""Background thread to post logs to LOG_PORT. Uses blocking get so the thread
	will wait for items and won't spin when queue empty. Any errors are swallowed
	to avoid crashing the logger thread.
	"""
	print("enable")
	while True:
	try:
	item = LOGGER_QUEUE.get()
	except Exception:
	# If queue is unexpectedly closed or an error occurs, keep running
	time.sleep(0.1)
	continue
	try:
	LOG_PORT = os.environ.get("LOG_PORT")
	if LOG_PORT:
	# Best-effort; ignore any network error
	requests.post(
	LOG_PORT,
	headers={"Content-Type": "application/json"},
	json=item,
	timeout=5,
	)
	except Exception:
	# never let log failures escape to the main thread
	pass


	if os.environ.get("LOG_PORT"):
	# make the logger thread a daemon so it won't block process exit
	t = threading.Thread(target=logger, daemon=True)
	t.start()


	def log(item):
	try:
	LOGGER_QUEUE.put_nowait(item)
	except queue.Full:
	# Queue is full: drop the log (best-effort). Avoid raising to keep the
	# application responsive; optionally print a fallback log to console
	try:
	# Use a short, non-blocking print so at least something is recorded
	print("LOG DROP: queue full, dropping log item")
	except Exception:
	pass


	def web_search(query: str, top_k: int = 3) -> str:
	"""Perform a simple web search via DuckDuckGo HTML and return top_k results as a combined string.

	This is a lightweight fallback search that does not call external model services —
	it queries a public search endpoint, parses titles/snippets/urls and returns them as
	formatted text to be included into the model's prompt context.
	"""
	if not query or query.strip() == "":
	return ""
	try:
	from bs4 import BeautifulSoup
	except Exception:
	return ""
	try:
	headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
	q = query.strip()
	resp = requests.get("https://duckduckgo.com/html/", params={"q": q}, headers=headers, timeout=10)
	soup = BeautifulSoup(resp.text, "html.parser")
	# DuckDuckGo's html structure: results are in `div.result` containers.
	results = []
	for r in soup.find_all("div", class_="result", limit=top_k):
	a = r.find("a", class_="result__a") or r.find("a", href=True)
	title = a.get_text(strip=True) if a else ""
	href = a.get("href") if a else ""
	snippet = ""
	s = r.find("a", class_="result__snippet") or r.find("div", class_="result__snippet")
	if s:
	snippet = s.get_text(strip=True)
	results.append(f"{title} - {snippet} - {href}")
	return "\n".join(results)
	except Exception:
	return ""


	def calc(expr: str) -> str:
	"""Safely evaluate a simple arithmetic expression and return the result as string.

	This uses ast parsing to disallow attributes and only permit arithmetic operators.
	"""
	try:
	import ast, operator as op

	# supported operators
	allowed_ops = {
	ast.Add: op.add,
	ast.Sub: op.sub,
	ast.Mult: op.mul,
	ast.Div: op.truediv,
	ast.Pow: op.pow,
	ast.BitXor: op.xor,
	ast.USub: op.neg,
	ast.Mod: op.mod,
	ast.FloorDiv: op.floordiv,
	}

	def _eval(node):
	if isinstance(node, ast.Num): # <number>
	return node.n
	elif isinstance(node, ast.BinOp):
	left = _eval(node.left)
	right = _eval(node.right)
	op_type = type(node.op)
	if op_type in allowed_ops:
	return allowed_ops[op_type](left, right)
	else:
	raise ValueError("Unsupported operator")
	elif isinstance(node, ast.UnaryOp):
	operand = _eval(node.operand)
	op_type = type(node.op)
	if op_type in allowed_ops:
	return allowed_ops[op_type](operand)
	raise ValueError("Unsupported unary op")
	else:
	raise ValueError("Unsupported expression type")

	node = ast.parse(expr, mode='eval')
	result = _eval(node.body)
	return str(result)
	except Exception as e:
	return f"ERROR: {e}"


	def detect_tools_and_reasoning(text_or_messages) -> dict:
	"""Detects whether web_search, calc, or reasoning are likely needed based on heuristics.

	Accepts either a single string prompt or a list of ChatMessage. Returns a dict with booleans and detected tools list.
	"""
	if isinstance(text_or_messages, list):
	try:
	text = "\n\n".join([m.get('content', '') if isinstance(m, dict) else (getattr(m, 'content', '') or '') for m in text_or_messages if m])
	except Exception:
	text = ""
	else:
	text = str(text_or_messages or "")

	t = text.lower()
	# Simple heuristics
	need_calc = False
	need_web_search = False
	need_reasoning = False
	need_universal = False
	need_fetch_url = False
	need_summarize = False
	need_keywords = False
	need_sentiment = False
	need_translate = False
	need_spell_check = False
	need_format_code = False
	need_explain_code = False
	detected_tools = []

	# Heuristic for calc: presence of operators AND numbers OR keywords 'calculate/compute' plus numeric tokens
	if (re.search(r"\d+\s[-+/%]\s*\d+", t) or (re.search(r"\b(calculate\|compute\|solve\|evaluate\|sum\|add\|subtract\|multiply\|divide)\b", t) and re.search(r"\d", t))):
	need_calc = True
	# Try to extract a most-likely arithmetic expression from the text
	# Accept digits, parentheses and operators
	m = re.search(r"([\d\(\)\s+\-*/%^.]+)", text)
	expr = m.group(0).strip() if m else None
	# only keep if it includes an operator
	if expr and not re.search(r"[-+*/%]", expr):
	expr = None
	detected_tools.append({"name": "calc", "args": {"expression": expr, "confidence": 0.95 if expr else 0.5}})

	# Heuristic for web search: 'who is', 'what is', 'current', 'latest', 'news', or question words with facts
	# Heuristic for web search: question words + facts or 'current/latest' signals; avoid math queries
	if (
	re.search(r"\b(who is\|who's\|what is\|what's\|when is\|where is\|current\|latest\|news\|is the president\|president of\|population of\|capital of\|how many\|GDP of)\b", t)
	and not re.search(r"\d+\s[-+/%]\s*\d+", t)
	):
	need_web_search = True
	detected_tools.append({"name": "web_search", "args": {"query": text, "confidence": 0.9}})

	# Heuristic for reasoning: words like 'explain', 'why', 'reason', 'prove', 'derive', 'compare'
	if re.search(r"\b(explain\|why\|because\|reason\|prove\|derive\|compare\|analysis\|analysis:\|evaluate\|argue\|consequence\|trade-offs)\b", t):
	need_reasoning = True

	# Heuristic for universal tool: requests to "use tool", "execute tool", or generic function-call language
	if re.search(r"\b(use (a )?tool\|execute (a )?tool\|call (a )?tool\|function call\|run tool\|do this via a tool\|invoke tool\|call tool)\b", t):
	need_universal = True
	# detect fetch_url: a URL string or request to 'open' the link
	if re.search(r"https?://\S+", t) or re.search(r"\b(open\|visit)\s+(https?://\|www\.)", t):
	need_fetch_url = True
	m_url = re.search(r'https?://\S+', text)
	url_val = m_url.group(0) if m_url else text
	detected_tools.append({"name": "fetch_url", "args": {"url": url_val, "confidence": 0.85}})
	# detect translate requests: 'translate to es' or 'traducir a español'
	if re.search(r"\btranslate\b.to\s+([a-z]{2,})\|\btraducir\b.a\s+([a-z]{2,})", t):
	need_translate = True
	m = re.search(r"\btranslate\b.to\s+([a-z]{2,})\|\btraducir\b.a\s+([a-z]{2,})", t)
	tgt = (m.group(1) if m and m.group(1) else (m.group(2) if m and len(m.groups()) > 1 else 'en'))
	detected_tools.append({"name": "translate", "args": {"text": text, "target_lang": tgt, "confidence": 0.85}})
	# detect summarize requests ('summarize', 'tl;dr', 'summarise')
	if re.search(r"\b(summarize\|summarise\|tl;dr\|tl;dr:)\b", t):
	need_summarize = True
	detected_tools.append({"name": "summarize", "args": {"text": text, "max_sentences": 3, "confidence": 0.8}})
	# detect keyword extraction requests
	if re.search(r"\b(keywords\|key words\|key terms\|extract keywords)\b", t):
	need_keywords = True
	detected_tools.append({"name": "keywords", "args": {"text": text, "top_k": 5, "confidence": 0.78}})
	# detect sentiment analysis requests
	if re.search(r"\b(sentiment\|tone\|is this positive\|is this negative\|what is the sentiment)\b", t):
	need_sentiment = True
	detected_tools.append({"name": "sentiment", "args": {"text": text, "confidence": 0.8}})
	# detect code-format and explain: '```', 'explain code', 'what does this function do'
	if re.search(r"```[a-zA-Z]*\|format code\|format this code\|pretty print code", t):
	need_format_code = True
	detected_tools.append({"name": "format_code", "args": {"code": text, "language": "python", "confidence": 0.8}})
	if re.search(r"\bexplain( this)? code\b\|what does this (function\|method\|snippet) do", t):
	need_explain_code = True
	detected_tools.append({"name": "explain_code", "args": {"code": text, "language": "python", "confidence": 0.75}})
	# detect spellcheck requests
	if re.search(r"\b(spell check\|spellcheck\|check spelling\|corregir ortografía\|revisar ortografía)\b", t):
	need_spell_check = True
	detected_tools.append({"name": "spell_check", "args": {"text": text, "confidence": 0.6}})
	if re.search(r"\b(sentiment\|tone\|is this positive\|is this negative\|what is the sentiment)\b", t):
	need_sentiment = True
	detected_tools.append({"name": "sentiment", "args": {"text": text, "confidence": 0.8}})

	# compute confidence summary
	# For now, we use a simple heuristic: reasoning >0.8 if key words present; web_search 0.9; calc 0.95 if numeric
	confs = {
	"calc_confidence": 0.95 if need_calc else 0.0,
	"web_search_confidence": 0.9 if need_web_search else 0.0,
	"reasoning_confidence": 0.85 if need_reasoning else 0.0,
	"universal_confidence": 0.65 if need_universal else 0.0,
	"translate_confidence": 0.85 if need_translate else 0.0,
	"spell_check_confidence": 0.6 if need_spell_check else 0.0,
	"format_code_confidence": 0.7 if need_format_code else 0.0,
	"explain_code_confidence": 0.7 if need_explain_code else 0.0,
	}
	return {
	"need_calc": need_calc,
	"need_web_search": need_web_search,
	"need_reasoning": need_reasoning,
	"need_universal": need_universal,
	"need_fetch_url": need_fetch_url,
	"need_summarize": need_summarize,
	"need_keywords": need_keywords,
	"need_sentiment": need_sentiment,
	"need_translate": need_translate,
	"need_spell_check": need_spell_check,
	"need_format_code": need_format_code,
	"need_explain_code": need_explain_code,
	"detected_tools": detected_tools,
	"confidence": confs,
	}


	def fetch_url(url: str, max_chars: int = 20000) -> str:
	"""Fetch the content of a URL and return cleaned text (strip HTML tags).

	Returns a truncated plain-text string of up to `max_chars` characters.
	"""
	if not url:
	return ""
	try:
	headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
	resp = requests.get(url, headers=headers, timeout=10)
	if not resp.ok:
	return ""
	text = resp.text
	# remove scripts/styles and HTML tags
	try:
	from bs4 import BeautifulSoup

	soup = BeautifulSoup(text, "html.parser")
	for s in soup(["script", "style"]):
	s.decompose()
	body = soup.get_text(separator=" \n ")
	cleaned = re.sub(r"\s+", " ", body).strip()
	return cleaned[:max_chars]
	except Exception:
	# fallback: naive strip
	cleaned = re.sub(r"<[^>]+>", "", text)
	cleaned = re.sub(r"\s+", " ", cleaned)
	return cleaned[:max_chars]
	except Exception:
	return ""


	def summarize_text(text: str, max_sentences: int = 3) -> str:
	"""Naive summary by selecting the leading sentences (simple extractive summarizer).

	This is intentionally simple to avoid heavy dependencies.
	"""
	if not text or not isinstance(text, str):
	return ""
	sents = re.split(r"(?<=[.!?])\s+", text.strip())
	if len(sents) <= max_sentences:
	return " ".join(sents).strip()
	return " ".join(sents[:max_sentences]).strip()


	def extract_keywords(text: str, top_k: int = 5) -> List[str]:
	"""Return top_k frequent non-stopword tokens from text (naive extraction).
	"""
	if not text:
	return []
	try:
	tokens = re.findall(r"\w+", text.lower())
	stopwords = set(["the", "and", "is", "in", "to", "a", "an", "of", "for", "with", "on", "that", "this", "it", "as", "are"])
	filtered = [t for t in tokens if t not in stopwords and len(t) > 2]
	freq = {}
	for t in filtered:
	freq[t] = freq.get(t, 0) + 1
	items = sorted(freq.items(), key=lambda x: -x[1])[:top_k]
	return [k for k, v in items]
	except Exception:
	return []


	def sentiment_analysis(text: str) -> dict:
	"""Very basic lexicon-based sentiment analysis.

	Returns an opinion: {sentiment: 'positive'/'neutral'/'negative', 'score': float }.
	"""
	if not text:
	return {"sentiment": "neutral", "score": 0.0}
	pos = set(["good", "great", "excellent", "positive", "success", "love", "like", "happy", "best"])
	neg = set(["bad", "horrible", "poor", "negative", "hate", "dislike", "sad", "worst", "angry"])
	tokens = re.findall(r"\w+", text.lower())
	score = 0
	for t in tokens:
	if t in pos:
	score += 1
	elif t in neg:
	score -= 1
	if score > 0:
	return {"sentiment": "positive", "score": float(score)}
	if score < 0:
	return {"sentiment": "negative", "score": float(score)}
	return {"sentiment": "neutral", "score": 0.0}


	# removed earlier naive duplicates in favor of featureful versions below


	def translate_text(text: str, target_lang: str = 'en') -> dict:
	"""Translate text to target language using `googletrans` if available; otherwise return a no-op dict indicating translation is unavailable.

	This is intentionally conservative; prefer server-side libraries if available.
	"""
	if not text:
	return {"action": "translate", "result": "", "metadata": {"lang": target_lang, "confidence": 0.0}}
	try:
	import importlib.util
	googletrans_spec = importlib.util.find_spec("googletrans")
	if googletrans_spec is not None:
	# Only attempt import if googletrans is available
	try:
	import importlib
	googletrans_spec = importlib.util.find_spec("googletrans")
	if googletrans_spec is not None:
	googletrans = importlib.import_module("googletrans")
	Translator = getattr(googletrans, 'Translator', None)
	if Translator:
	t = Translator()
	res = t.translate(text, dest=target_lang)
	return {"action": "translate", "result": res.text, "metadata": {"lang": target_lang, "confidence": 0.9}}
	except Exception:
	pass
	# Fallback: return an annotated prefix indicating translation was requested but not performed
	return {"action": "translate", "result": f"[translated to {target_lang}]: {text}", "metadata": {"lang": target_lang, "confidence": 0.0}}
	except Exception:
	return {"action": "translate", "result": f"[translated to {target_lang}]: {text}", "metadata": {"lang": target_lang, "confidence": 0.0}}


	def spell_check_text(text: str) -> dict:
	"""Naive spell check that returns the original text and a no-op list of suggestions.

	If libraries like `textblob` are installed, would provide suggestions; fallback to identity.
	"""
	if not text:
	return {"action": "spell_check", "result": text, "metadata": {"suggestions": [], "confidence": 0.0}}
	try:
	import importlib.util
	textblob_spec = importlib.util.find_spec("textblob")
	if textblob_spec is not None:
	try:
	textblob = importlib.import_module("textblob")
	TextBlob = getattr(textblob, "TextBlob", None)
	if TextBlob is not None:
	tb = TextBlob(text)
	corrected = str(tb.correct())
	if corrected != text:
	return {"action": "spell_check", "result": corrected, "metadata": {"suggestions": [corrected], "confidence": 0.9}}
	except Exception:
	pass
	except Exception:
	pass
	return {"action": "spell_check", "result": text, "metadata": {"suggestions": [], "confidence": 0.0}}


	def format_code_text(code: str, lang: str = 'python') -> dict:
	"""Simple code formatting: attempts to run `black` if available; otherwise returns code unchanged.
	"""
	if not code:
	return {"action": "format_code", "result": code, "metadata": {"lang": lang, "confidence": 0.0}}
	try:
	try:
	try:
	import importlib.util
	black_spec = importlib.util.find_spec("black")
	if black_spec is not None:
	black = importlib.import_module("black")
	else:
	black = None
	except ImportError:
	black = None
	if black is not None:
	mode = black.Mode()
	formatted = black.format_str(code, mode=mode)
	return {"action": "format_code", "result": formatted, "metadata": {"lang": lang, "confidence": 0.95}}
	else:
	# fallback: naive indentation/strip
	cleaned = '\n'.join([ln.rstrip() for ln in code.splitlines()])
	return {"action": "format_code", "result": cleaned, "metadata": {"lang": lang, "confidence": 0.0}}
	except Exception:
	# fallback: naive indentation/strip
	cleaned = '\n'.join([ln.rstrip() for ln in code.splitlines()])
	return {"action": "format_code", "result": cleaned, "metadata": {"lang": lang, "confidence": 0.0}}
	except Exception:
	return {"action": "format_code", "result": code, "metadata": {"lang": lang, "confidence": 0.0}}


	def explain_code_text(code: str, lang: str = 'python') -> dict:
	"""Return a basic explanation by summarizing comments and high level function names.

	This is intentionally naive; future improvement: pass to an LLM or specialized parser.
	"""
	if not code:
	return {"action": "explain_code", "result": "", "metadata": {"lang": lang}}
	try:
	# Extract function names and top-level comments
	funcs = re.findall(r"def\s+(\w+)\s*\(", code)
	comments = re.findall(r"#(.+)", code)
	summary = []
	if funcs:
	summary.append(f"Functions: {', '.join(funcs)}")
	if comments:
	summary.append("Comments: " + "; ".join([c.strip() for c in comments[:3]]))
	if not summary:
	# fallback: first non-empty line
	lines = [l.strip() for l in code.splitlines() if l.strip()]
	summary.append(lines[0] if lines else "No content")
	return {"action": "explain_code", "result": " \| ".join(summary), "metadata": {"lang": lang, "confidence": 0.6}}
	except Exception:
	return {"action": "explain_code", "result": "", "metadata": {"lang": lang, "confidence": 0.0}}


	def ensure_upload_dir():
	from config import CONFIG
	try:
	os.makedirs(CONFIG.UPLOAD_DIR, exist_ok=True)
	except Exception:
	pass


	from typing import Optional


	def save_bytes_to_upload(filename: Optional[str], data: bytes) -> dict:
	from config import CONFIG
	import hashlib, time, uuid

	ensure_upload_dir()
	_id = str(uuid.uuid4())
	safe_name = f"{_id}_{os.path.basename(str(filename or 'uploaded_file'))}"
	path = os.path.join(CONFIG.UPLOAD_DIR, safe_name)
	try:
	with open(path, 'wb') as f:
	f.write(data)
	size = os.path.getsize(path)
	import mimetypes
	mime_type = mimetypes.guess_type(path)[0]
	return {
	'file_id': _id,
	'filename': filename,
	'path': path,
	'mime_type': mime_type,
	'size': size,
	'uploaded_at': int(time.time()),
	}
	except Exception as e:
	return {'error': str(e)}


	def file_read_from_path(path: str, max_bytes: int = 100000) -> str:
	try:
	if not path or not os.path.exists(path):
	return ""
	with open(path, 'rb') as f:
	b = f.read(max_bytes)
	try:
	return b.decode('utf-8', errors='replace')
	except Exception:
	return str(b)
	except Exception:
	return ""


	def universal_tool(args: dict, allow_web_search: bool = True, allow_tools: bool = True, allow_file_tool: bool = True) -> dict:
	"""Universal tool: if 'action' is provided, call the corresponding tool; otherwise autodetect using heuristics.

	Supported actions: 'calc', 'web_search', 'file_upload', 'file_read'. If the action is not provided, attempt to detect the appropriate tool.
	Returns a string result for prompt injection.
	"""
	if not isinstance(args, dict):
	return {"error": "ERROR: invalid args for universal tool"}

	action = args.get("action")
	query = args.get("query")
	# explicit action
	if action == "calc":
	if not allow_tools:
	return {"action": "calc", "result": None, "metadata": {"error": "disabled_by_policy", "confidence": 0.0}}
	expr = args.get("expression") or query
	if not expr:
	return {"action": "calc", "result": None, "metadata": {"error": "no expression provided", "confidence": 0.0}}
	res = calc(str(expr))
	return {"action": "calc", "result": str(res), "metadata": {"expression": expr, "confidence": 0.98}}
	if action == "web_search":
	if not allow_web_search:
	return {"action": "web_search", "result": "", "metadata": {"error": "disabled_by_policy", "confidence": 0.0}}
	q = args.get("query") or query
	if not q:
	return {"action": "web_search", "result": "", "metadata": {"confidence": 0.0}}
	res = web_search(str(q), int(args.get("top_k") or 3))
	return {"action": "web_search", "result": str(res), "metadata": {"query": q, "top_k": int(args.get("top_k") or 3), "confidence": 0.9}}
	if action == 'file_read':
	if not allow_file_tool:
	return {"action": "file_read", "result": None, "metadata": {"error": "disabled_by_policy", "confidence": 0.0}}
	fpath = args.get('path') or args.get('file_path')
	if not fpath and args.get('file_id'):
	from config import CONFIG
	fid = args.get('file_id')
	if fid:
	candidate = os.path.join(CONFIG.UPLOAD_DIR, os.path.basename(str(fid)))
	else:
	candidate = None
	if candidate and os.path.exists(candidate):
	fpath = candidate
	if not fpath:
	return {"action": "file_read", "result": None, "metadata": {"error": "no_path_or_id", "confidence": 0.0}}
	content = file_read_from_path(fpath, int(args.get('max_bytes') or 100000))
	return {"action": "file_read", "result": str(content), "metadata": {"path": fpath, "confidence": 0.9}}
	if action == 'file_upload':
	if not allow_file_tool:
	return {"action": "file_upload", "result": None, "metadata": {"error": "disabled_by_policy", "confidence": 0.0}}
	# Expect either base64 content or raw bytes/text in args
	import base64

	fname = args.get('filename') or args.get('name') or 'uploaded_file'
	content_b64 = args.get('content_base64') or args.get('content')
	if not content_b64:
	return {"action": "file_upload", "result": None, "metadata": {"error": "no_content", "confidence": 0.0}}
	# If the content looks like base64 (contains only b64 chars, padded), decode; else try to treat it as plaintext
	try:
	if isinstance(content_b64, str):
	b = None
	try:
	b = base64.b64decode(content_b64, validate=True)
	except Exception:
	b = str(content_b64).encode('utf-8')
	else:
	b = content_b64 if isinstance(content_b64, (bytes, bytearray)) else str(content_b64).encode('utf-8')
	except Exception:
	return {"action": "file_upload", "result": None, "metadata": {"error": "invalid_content", "confidence": 0.0}}
	# Check size against configuration
	try:
	from config import CONFIG

	if len(b) > getattr(CONFIG, 'MAX_UPLOAD_SIZE_BYTES', 10 * 1024 * 1024):
	return {"action": "file_upload", "result": None, "metadata": {"error": "file_too_large", "confidence": 0.0}}
	except Exception:
	pass
	# Save file
	meta = None
	try:
	# If app exposes an internal API to register uploads, prefer that so model checks happen in one place
	import importlib
	app_module = importlib.import_module('app')
	if hasattr(app_module, 'upload_file_internal'):
	try:
	meta = app_module.upload_file_internal(b, filename=fname)
	except Exception:
	meta = save_bytes_to_upload(fname, b)
	# fallback: attempt to register in app's UPLOADED_FILES if present
	try:
	if hasattr(app_module, 'UPLOADED_FILES') and isinstance(app_module.UPLOADED_FILES, dict):
	app_module.UPLOADED_FILES[meta['file_id']] = meta
	except Exception:
	pass
	else:
	meta = save_bytes_to_upload(fname, b)
	try:
	if hasattr(app_module, 'UPLOADED_FILES') and isinstance(app_module.UPLOADED_FILES, dict):
	app_module.UPLOADED_FILES[meta['file_id']] = meta
	except Exception:
	pass
	except Exception:
	# fallback to local save and skip register
	meta = save_bytes_to_upload(fname, b)
	return {"action": "file_upload", "result": meta, "metadata": {"filename": fname, "file_id": meta.get('file_id'), "confidence": 0.9}}
	if action == 'fetch_url':
	if not allow_web_search:
	return {"action": "fetch_url", "result": None, "metadata": {"error": "disabled_by_policy", "confidence": 0.0}}
	url = args.get('url') or query
	if not url:
	return {"action": "fetch_url", "result": None, "metadata": {"error": "no_url_provided", "confidence": 0.0}}
	content = fetch_url(str(url), int(args.get('max_chars') or 20000))
	return {"action": "fetch_url", "result": str(content), "metadata": {"url": url, "confidence": 0.9}}
	if action == 'summarize':
	if not allow_tools:
	return {"action": "summarize", "result": None, "metadata": {"error": "disabled_by_policy", "confidence": 0.0}}
	txt = args.get('text') or ''
	if not txt and args.get('url'):
	try:
	txt = fetch_url(str(args.get('url')))
	except Exception:
	txt = ''
	if not txt and query:
	txt = query
	if not txt:
	return {"action": "summarize", "result": None, "metadata": {"error": "no_text_or_url_provided", "confidence": 0.0}}
	s = summarize_text(str(txt), int(args.get('max_sentences') or 3))
	return {"action": "summarize", "result": s, "metadata": {"confidence": 0.85}}
	if action == 'keywords' or action == 'keyword_extraction':
	if not allow_tools:
	return {"action": "keywords", "result": None, "metadata": {"error": "disabled_by_policy", "confidence": 0.0}}
	txt = args.get('text') or ''
	if not txt and args.get('url'):
	try:
	txt = fetch_url(str(args.get('url')))
	except Exception:
	txt = ''
	if not txt and query:
	txt = query
	if not txt:
	return {"action": "keywords", "result": None, "metadata": {"error": "no_text_or_url_provided", "confidence": 0.0}}
	kws = extract_keywords(str(txt), int(args.get('top_k') or 5))
	return {"action": "keywords", "result": kws, "metadata": {"confidence": 0.85}}
	if action == 'sentiment':
	if not allow_tools:
	return {"action": "sentiment", "result": None, "metadata": {"error": "disabled_by_policy", "confidence": 0.0}}
	txt = args.get('text') or ''
	if not txt and args.get('url'):
	try:
	txt = fetch_url(str(args.get('url')))
	except Exception:
	txt = ''
	if not txt and query:
	txt = query
	if not txt:
	return {"action": "sentiment", "result": None, "metadata": {"error": "no_text_or_url_provided", "confidence": 0.0}}
	res = sentiment_analysis(str(txt))
	return {"action": "sentiment", "result": res, "metadata": {"confidence": 0.85}}
	if action == 'translate':
	if not allow_tools:
	return {"action": "translate", "result": None, "metadata": {"error": "disabled_by_policy", "confidence": 0.0}}
	txt = args.get('text') or query or ''
	target = args.get('target') or 'en'
	res = translate_text(str(txt), str(target))
	return {"action": "translate", "result": res.get('result'), "metadata": {"lang": res.get('lang'), "note": res.get('note'), "confidence": 0.5}}
	if action == 'spell_check' or action == 'spellcheck':
	if not allow_tools:
	return {"action": "spell_check", "result": None, "metadata": {"error": "disabled_by_policy", "confidence": 0.0}}
	txt = args.get('text') or query or ''
	res = spell_check_text(str(txt))
	return {"action": "spell_check", "result": res.get('result'), "metadata": {"corrections": res.get('corrections'), "confidence": 0.5}}
	if action == 'format_code' or action == 'format':
	if not allow_tools:
	return {"action": "format_code", "result": None, "metadata": {"error": "disabled_by_policy", "confidence": 0.0}}
	txt = args.get('text') or query or ''
	lang = args.get('language') or args.get('lang') or 'python'
	res = format_code_text(txt, lang)
	return {"action": "format_code", "result": res.get('result'), "metadata": {"note": res.get('note'), "confidence": 0.6}}
	if action == 'explain_code' or action == 'explain':
	if not allow_tools:
	return {"action": "explain_code", "result": None, "metadata": {"error": "disabled_by_policy", "confidence": 0.0}}
	txt = args.get('text') or query or ''
	lang = args.get('language') or args.get('lang') or 'python'
	res = explain_code_text(txt, lang)
	# Return a small extracted explanation string if available
	if isinstance(res, dict):
	ds = res.get('docstrings') or []
	expl = res.get('explanation') or (ds[0] if isinstance(ds, list) and len(ds) > 0 else '')
	else:
	expl = str(res)
	return {"action": "explain_code", "result": expl, "metadata": {"docstrings": res.get('docstrings'), "confidence": 0.6}}
	# Removed duplicate action handlers for translate, spell_check, format_code, explain_code
	# auto-detect based on query content
	if query:
	# if expression - use calc
	if re.search(r"\d+\s[-+/%]\s*\d+", str(query)):
	if not allow_tools:
	return {"action": "calc", "result": None, "metadata": {"error": "disabled_by_policy", "confidence": 0.0}}
	res = calc(str(query))
	return {"action": "calc", "result": str(res), "metadata": {"expression": str(query), "confidence": 0.95}}
	# fetch_url auto-detect when a URL present
	if re.search(r"https?://\S+", str(query)):
	if not allow_web_search:
	return {"action": "fetch_url", "result": None, "metadata": {"error": "disabled_by_policy", "confidence": 0.0}}
	content = fetch_url(str(query), int(args.get('max_chars') or 20000))
	return {"action": "fetch_url", "result": str(content), "metadata": {"url": str(query), "confidence": 0.9}}
	# translate/detect: e.g., 'translate to spanish: <text>'
	if re.search(r"\btranslate\b.*to\s+([a-z]{2,})", str(query).lower()):
	import re as _re

	m = _re.search(r"\btranslate\b.*to\s+([a-z]{2,})", str(query).lower())
	tgt = m.group(1) if m else 'en'
	if not allow_tools:
	return {"action": "translate", "result": None, "metadata": {"error": "disabled_by_policy", "confidence": 0.0}}
	res = translate_text(str(query), tgt)
	return res
	# format_code auto-detect: presence of ``` or 'format code' text
	if re.search(r"```[a-zA-Z]*\|format code\|format this code\|pretty print code", str(query).lower()):
	if not allow_tools:
	return {"action": "format_code", "result": None, "metadata": {"error": "disabled_by_policy", "confidence": 0.0}}
	code = str(query)
	res = format_code_text(code)
	return res
	# summarize auto-detect
	if re.search(r"\b(summarize\|summarise\|tl;dr)\b", str(query).lower()):
	if not allow_tools:
	return {"action": "summarize", "result": None, "metadata": {"error": "disabled_by_policy", "confidence": 0.0}}
	s = summarize_text(str(query))
	return {"action": "summarize", "result": s, "metadata": {"confidence": 0.85}}
	# keywords auto-detect
	if re.search(r"\b(keywords\|key terms\|extract keywords)\b", str(query).lower()):
	if not allow_tools:
	return {"action": "keywords", "result": None, "metadata": {"error": "disabled_by_policy", "confidence": 0.0}}
	kws = extract_keywords(str(query))
	return {"action": "keywords", "result": kws, "metadata": {"confidence": 0.78}}
	# sentiment auto-detect
	if re.search(r"\b(sentiment\|tone\|is this positive\|is this negative\|what is the sentiment)\b", str(query).lower()):
	if not allow_tools:
	return {"action": "sentiment", "result": None, "metadata": {"error": "disabled_by_policy", "confidence": 0.0}}
	res = sentiment_analysis(str(query))
	return {"action": "sentiment", "result": res, "metadata": {"confidence": 0.8}}
	if not allow_web_search:
	return {"action": "web_search", "result": "", "metadata": {"error": "disabled_by_policy", "confidence": 0.0}}
	res = web_search(str(query), int(args.get("top_k") or 3))
	return {"action": "web_search", "result": str(res), "metadata": {"query": str(query), "top_k": int(args.get("top_k") or 3), "confidence": 0.9}}

	return {"error": "ERROR: could not determine action for universal tool"}


	def bias_mitigation(text: str) -> dict:
	"""A light-weight bias mitigation helper.

	The goal: detect and neutralize potentially biased, stereotyping, or discriminatory statements.
	It's intentionally conservative (favoring suppression) and returns sanitized content and a flag.
	"""
	import re
	if not text or not isinstance(text, str):
	return {"sanitized": text, "suppressed": False, "reason": None}

	t = text.strip()
	# Simple checks for sweeping generalizations towards protected groups
	# This is a naive approach and can be adapted with an ML classifier.
	protected_terms = [
	r"\b(race\|religion\|ethnicity\|gender\|sexual orientation\|disability)\b",
	r"\b(black\|white\|asian\|hispanic\|muslim\|christian\|jewish\|gay\|lesbian\|transgender)\b",
	]
	sweeping_patterns = [
	r"\b(all\|always\|never\|every\|none)\b[^.?!]{0,60}\b(is\|are\|will\|should\|must)\b",
	r"\b(\w+)s?\b[^.?!]{0,60}\b(are\|is)\b[^.?!]{0,80}\b(inferior\|superior\|stupid\|lazy\|criminal)\b",
	]
	# Simple profanity or slurs (non-exhaustive) - block
	slurs = [r"\b(slur1\|slur2)\b"] # placeholder; real app should use a curated list

	for pattern in sweeping_patterns:
	if re.search(pattern, t, flags=re.I):
	# ensure it references a protected group before suppressing
	for pt in protected_terms:
	if re.search(pt, t, flags=re.I):
	return {"sanitized": "[content suppressed due to potential bias]", "suppressed": True, "reason": "sweeping_generalization_protected_group"}
	# If contains slurs -> suppress
	for s in slurs:
	if re.search(s, t, flags=re.I):
	return {"sanitized": "[content suppressed due to policy]", "suppressed": True, "reason": "profanity_or_slur"}

	# For political content with strong claims, favor neutralization
	if re.search(r"\b(president\|prime minister\|dictator\|election\|vote\|politician)\b", t, flags=re.I) and re.search(r"\b(is\|are\|will\|should)\b[^.?!]{0,80}\b(incompetent\|corrupt\|traitor\|criminal)\b", t, flags=re.I):
	# return a neutral paraphrase where we avoid strong unfounded claims
	sanitized = re.sub(r"\b(is\|are\|will\|should)\b[^.?!]{0,80}\b(incompetent\|corrupt\|traitor\|criminal)\b", "may have actions that deserve scrutiny", t, flags=re.I)
	return {"sanitized": sanitized, "suppressed": False, "reason": "political_neutralization"}

	return {"sanitized": text, "suppressed": False, "reason": None}