from ai_agent.retriever.software_doc import SoftwareDoc from typing import Optional, List, Any import re import os # Constants for affirmative detection _MULTI_WORD_AFFIRMATIVES = ["go ahead", "do it", "run it", "sounds good", "looks good"] _SINGLE_WORD_AFFIRMATIVES = [ "yes", "y", "yeah", "yep", "yup", "sure", "ok", "okay", "fine", "alright", "right", "correct", "affirmative", ] _EMOJI_AFFIRMATIVES = ["👍", "✅", "✓"] _NEGATION_WORDS = {"no", "not", "don't", "dont", "never", "nothing"} # Thresholds for context-based detection _PHRASE_LENGTH_MULTIPLIER = 3 # Max text length = phrase length * multiplier _SHORT_MESSAGE_THRESHOLD = 30 # Character threshold for "short message" # Pre-compile regex pattern for negation detection _NEGATION_PATTERN = re.compile( r"\b(" + "|".join(re.escape(w) for w in _NEGATION_WORDS) + r")\b" ) def _best_runnable_link(doc: SoftwareDoc) -> Optional[str]: """Return the most user-friendly runnable link. Preference order: 1. Hugging Face Space (hf.space or huggingface.co/spaces) 2. Other interactive demo hosts (gradio.live, replicate.run, etc.) 3. Executable notebook links (.ipynb, colab) 4. Fallback to first runnable example / notebook URL (GitHub last) Explicit `priority` values in catalog still respected (lower is better), but host preference can override large default values. """ def base_priority(item) -> float: if isinstance(item, dict) and "priority" in item: try: return float(item["priority"]) except Exception: pass return 100.0 # neutral base def extract_url(item) -> Optional[str]: url = item.get("url") if isinstance(url, list) and url: return url[0].strip() elif isinstance(url, str): return url.strip() return None def host_bonus(u: str) -> float: lu = u.lower() if "huggingface.co/spaces" in lu or lu.startswith("https://hf.space"): return -60.0 if "gradio.live" in lu: return -40.0 if "replicate.run" in lu or "replicate.com" in lu: return -30.0 if lu.endswith(".ipynb") or "colab.research.google.com" in lu: return -10.0 if "github.com" in lu: return +10.0 # de-prioritize plain GitHub vs real demos return 0.0 collected = [] for items in ( getattr(doc, "runnable_example", None) or [], getattr(doc, "has_executable_notebook", None) or [], ): for it in items: url = extract_url(it) if not url: continue pr = base_priority(it) + host_bonus(url) collected.append((pr, url)) if not collected: return None collected.sort(key=lambda x: x[0]) return collected[0][1] def _coerce_files_to_paths(files: List[Any]) -> List[str]: """Convert Gradio file objects to paths.""" if not files: return [] paths = [] for f in files: if isinstance(f, str): paths.append(f) elif isinstance(f, dict): p = f.get("name") or f.get("path") if p: paths.append(p) elif hasattr(f, "name"): paths.append(f.name) # De-duplicate seen = set() deduped = [] for p in paths: if p not in seen: seen.add(p) deduped.append(p) return deduped def _is_affirmative(text: str) -> bool: """Check if user message is affirmative (yes, ok, sure, etc.). Uses word boundary matching and context checking to avoid false positives. """ text_lower = text.lower().strip() if not text_lower: return False # Check emojis for emoji in _EMOJI_AFFIRMATIVES: if emoji in text: return True # With negation, only match if entire message is exactly one affirmative word has_negation = _NEGATION_PATTERN.search(text_lower) is not None if has_negation: stripped = re.sub(r"[.,!?\s]+$", "", text_lower) if stripped in _SINGLE_WORD_AFFIRMATIVES: return True return False # Check multi-word phrases (reject if text is much longer than phrase) for phrase in _MULTI_WORD_AFFIRMATIVES: if re.search(r"\b" + re.escape(phrase) + r"\b", text_lower): if len(text_lower) <= len(phrase) * _PHRASE_LENGTH_MULTIPLIER: return True # Check single words (reject if message is long) for word in _SINGLE_WORD_AFFIRMATIVES: if re.search(r"\b" + re.escape(word) + r"\b", text_lower): if len(text_lower) <= _SHORT_MESSAGE_THRESHOLD: return True return False def _env_flag(name: str, default: bool = False) -> bool: """Parse boolean env vars robustly.""" raw = os.getenv(name) if raw is None: return default val = raw.split("#", 1)[0].strip().lower() if not val: return default return val in {"1", "true", "yes", "on"}