Spaces:
Running
Running
Upload folder using huggingface_hub
#1
by chmielvu - opened
- .gitattributes +35 -35
- Modules/AI_Web_Search.py +158 -0
- Modules/Agent_Skills.py +788 -788
- Modules/Agent_Terminal.py +210 -248
- Modules/Code_Interpreter.py +39 -39
- Modules/Deep_Research.py +596 -596
- Modules/Generate_Image.py +132 -132
- Modules/Memory_Manager.py +275 -212
- Modules/ScrapeGraphAI.py +779 -0
- Modules/Shell_Command.py +194 -194
- Modules/Web_Search.py +517 -499
- Modules/_core.py +861 -861
- Modules/_docstrings.py +149 -149
- Modules/_pollinations_client.py +324 -0
- Modules/_query_optimizer.py +781 -0
- Modules/_searxng_client.py +460 -0
- README.md +266 -266
- app.py +191 -273
- memories.json +19 -19
- requirements.txt +14 -12
- styles.css +307 -307
.gitattributes
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
Modules/AI_Web_Search.py
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
AI Web Search Module.
|
| 3 |
+
|
| 4 |
+
AI-powered web search using Pollinations API with Perplexity and Gemini models.
|
| 5 |
+
Query optimization is ALWAYS enabled for best results.
|
| 6 |
+
|
| 7 |
+
Depth levels:
|
| 8 |
+
- fast: Gemini with Google Search - Quick, reliable answers
|
| 9 |
+
- normal: Perplexity Sonar - Balanced speed and quality
|
| 10 |
+
- deep: Perplexity Sonar Reasoning - Deep analysis with reasoning chain
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
from __future__ import annotations
|
| 14 |
+
|
| 15 |
+
from typing import Annotated, Literal
|
| 16 |
+
|
| 17 |
+
import gradio as gr
|
| 18 |
+
|
| 19 |
+
from app import _log_call_end, _log_call_start, _truncate_for_log
|
| 20 |
+
from ._docstrings import autodoc
|
| 21 |
+
from ._pollinations_client import PollinationsClient
|
| 22 |
+
from ._query_optimizer import get_optimizer
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# Single source of truth for the LLM-facing tool description
|
| 26 |
+
TOOL_SUMMARY = (
|
| 27 |
+
"AI-powered web search using Perplexity or Gemini with built-in web search. "
|
| 28 |
+
"Returns synthesized answers with source citations. "
|
| 29 |
+
"Use for complex questions requiring current information and analysis. "
|
| 30 |
+
"Query optimization is automatically applied for best results."
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
@autodoc(
|
| 35 |
+
summary=TOOL_SUMMARY,
|
| 36 |
+
)
|
| 37 |
+
def AI_Web_Search(
|
| 38 |
+
query: Annotated[str, "The search query or question."],
|
| 39 |
+
depth: Annotated[
|
| 40 |
+
Literal["fast", "normal", "deep"],
|
| 41 |
+
"Search depth: 'fast' (Gemini + Google Search), 'normal' (Perplexity Sonar), 'deep' (Perplexity Sonar Reasoning).",
|
| 42 |
+
] = "normal",
|
| 43 |
+
detailed: Annotated[bool, "Request a comprehensive answer with more detail."] = False,
|
| 44 |
+
) -> str:
|
| 45 |
+
"""
|
| 46 |
+
AI-powered web search with automatic query optimization.
|
| 47 |
+
|
| 48 |
+
Uses Pollinations API to access AI search models with built-in web search:
|
| 49 |
+
- fast: Gemini with Google Search - Best for quick facts
|
| 50 |
+
- normal: Perplexity Sonar - Balanced for general research
|
| 51 |
+
- deep: Perplexity Sonar Reasoning - Best for complex analysis
|
| 52 |
+
|
| 53 |
+
Query optimization is ALWAYS ON - queries are automatically optimized
|
| 54 |
+
for AI search using SC-CoT (Mistral → HF fallback chain).
|
| 55 |
+
|
| 56 |
+
Returns a synthesized answer with numbered citations and source URLs.
|
| 57 |
+
"""
|
| 58 |
+
_log_call_start("AI_Web_Search", query=query, depth=depth, detailed=detailed)
|
| 59 |
+
|
| 60 |
+
if not query or not query.strip():
|
| 61 |
+
result = "No search query provided. Please enter a question or search term."
|
| 62 |
+
_log_call_end("AI_Web_Search", _truncate_for_log(result))
|
| 63 |
+
return result
|
| 64 |
+
|
| 65 |
+
# ALWAYS optimize the query for AI search
|
| 66 |
+
original_query = query
|
| 67 |
+
optimization_metadata = None
|
| 68 |
+
try:
|
| 69 |
+
optimizer = get_optimizer()
|
| 70 |
+
query, optimization_metadata = optimizer.optimize_for_ai_search(query)
|
| 71 |
+
except Exception as exc:
|
| 72 |
+
print(f"[AI_Web_Search] Query optimization failed: {exc}", flush=True)
|
| 73 |
+
# Continue with original query
|
| 74 |
+
|
| 75 |
+
try:
|
| 76 |
+
client = PollinationsClient()
|
| 77 |
+
result_data = client.web_search_sync(query, depth, detailed)
|
| 78 |
+
|
| 79 |
+
# Build output
|
| 80 |
+
lines = []
|
| 81 |
+
|
| 82 |
+
# Add optimization info if available
|
| 83 |
+
if optimization_metadata and optimization_metadata.get("original_query") != optimization_metadata.get("optimized_query"):
|
| 84 |
+
lines.append(f"Optimized query: {query}")
|
| 85 |
+
lines.append(f"Original query: {original_query}")
|
| 86 |
+
lines.append(f"Optimizer: {optimization_metadata.get('provider', 'unknown')}")
|
| 87 |
+
lines.append("")
|
| 88 |
+
|
| 89 |
+
lines.append(f"Query: {result_data['query']}")
|
| 90 |
+
lines.append(f"Model: {result_data['model']}")
|
| 91 |
+
lines.append(f"Depth: {depth}")
|
| 92 |
+
lines.append("")
|
| 93 |
+
lines.append("Answer:")
|
| 94 |
+
lines.append(result_data["answer"] or "No answer generated.")
|
| 95 |
+
|
| 96 |
+
if result_data["sources"]:
|
| 97 |
+
lines.append("")
|
| 98 |
+
lines.append("Sources:")
|
| 99 |
+
for i, source in enumerate(result_data["sources"], 1):
|
| 100 |
+
lines.append(f" {i}. {source}")
|
| 101 |
+
else:
|
| 102 |
+
lines.append("")
|
| 103 |
+
lines.append("(No sources provided)")
|
| 104 |
+
|
| 105 |
+
result = "\n".join(lines)
|
| 106 |
+
_log_call_end("AI_Web_Search", _truncate_for_log(result))
|
| 107 |
+
return result
|
| 108 |
+
|
| 109 |
+
except Exception as exc:
|
| 110 |
+
error_msg = f"Search failed: {exc}"
|
| 111 |
+
_log_call_end("AI_Web_Search", error_msg)
|
| 112 |
+
return error_msg
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def build_interface() -> gr.Interface:
|
| 116 |
+
"""Build the Gradio interface for AI Web Search."""
|
| 117 |
+
return gr.Interface(
|
| 118 |
+
fn=AI_Web_Search,
|
| 119 |
+
inputs=[
|
| 120 |
+
gr.Textbox(
|
| 121 |
+
label="Query",
|
| 122 |
+
placeholder="Ask a question or enter a search topic...",
|
| 123 |
+
max_lines=3,
|
| 124 |
+
info="Your question or search query (will be optimized automatically)",
|
| 125 |
+
),
|
| 126 |
+
gr.Radio(
|
| 127 |
+
label="Search Depth",
|
| 128 |
+
choices=["fast", "normal", "deep"],
|
| 129 |
+
value="normal",
|
| 130 |
+
info="fast: Gemini + Google Search | normal: Perplexity Sonar | deep: Perplexity Reasoning",
|
| 131 |
+
),
|
| 132 |
+
gr.Checkbox(
|
| 133 |
+
label="Detailed Answer",
|
| 134 |
+
value=False,
|
| 135 |
+
info="Request a comprehensive answer with more detail",
|
| 136 |
+
),
|
| 137 |
+
],
|
| 138 |
+
outputs=gr.Textbox(
|
| 139 |
+
label="AI Search Results",
|
| 140 |
+
interactive=False,
|
| 141 |
+
lines=20,
|
| 142 |
+
max_lines=30,
|
| 143 |
+
),
|
| 144 |
+
title="AI Web Search",
|
| 145 |
+
description=(
|
| 146 |
+
"<div style='text-align:center'>"
|
| 147 |
+
"AI-powered web search with automatic query optimization. "
|
| 148 |
+
"Uses Perplexity Sonar or Gemini with built-in web search to provide "
|
| 149 |
+
"direct answers with source citations. Query optimization is always enabled."
|
| 150 |
+
"</div>"
|
| 151 |
+
),
|
| 152 |
+
api_description=TOOL_SUMMARY,
|
| 153 |
+
flagging_mode="never",
|
| 154 |
+
submit_btn="Search",
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
__all__ = ["AI_Web_Search", "build_interface"]
|
Modules/Agent_Skills.py
CHANGED
|
@@ -1,788 +1,788 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
"""
|
| 4 |
-
Agent Skills Module for Nymbo-Tools MCP Server.
|
| 5 |
-
|
| 6 |
-
Provides structured skill discovery, activation, validation, and resource access
|
| 7 |
-
following the Agent Skills specification (https://agentskills.io).
|
| 8 |
-
|
| 9 |
-
Skills are directories containing a SKILL.md file with YAML frontmatter (name, description)
|
| 10 |
-
and Markdown instructions. This tool enables agents to efficiently discover and use skills
|
| 11 |
-
through progressive disclosure: low-token metadata discovery, on-demand full activation,
|
| 12 |
-
and targeted resource access.
|
| 13 |
-
"""
|
| 14 |
-
|
| 15 |
-
import json
|
| 16 |
-
import os
|
| 17 |
-
import re
|
| 18 |
-
import unicodedata
|
| 19 |
-
from pathlib import Path
|
| 20 |
-
from typing import Annotated, Optional
|
| 21 |
-
|
| 22 |
-
import gradio as gr
|
| 23 |
-
|
| 24 |
-
from app import _log_call_end, _log_call_start, _truncate_for_log
|
| 25 |
-
from ._docstrings import autodoc
|
| 26 |
-
from .File_System import ROOT_DIR, _display_path
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
# ---------------------------------------------------------------------------
|
| 30 |
-
# Constants
|
| 31 |
-
# ---------------------------------------------------------------------------
|
| 32 |
-
|
| 33 |
-
SKILLS_SUBDIR = "Skills" # Subdirectory under ROOT_DIR containing skills
|
| 34 |
-
MAX_SKILL_NAME_LENGTH = 64
|
| 35 |
-
MAX_DESCRIPTION_LENGTH = 1024
|
| 36 |
-
MAX_COMPATIBILITY_LENGTH = 500
|
| 37 |
-
|
| 38 |
-
ALLOWED_FRONTMATTER_FIELDS = {
|
| 39 |
-
"name",
|
| 40 |
-
"description",
|
| 41 |
-
"license",
|
| 42 |
-
"allowed-tools",
|
| 43 |
-
"metadata",
|
| 44 |
-
"compatibility",
|
| 45 |
-
}
|
| 46 |
-
|
| 47 |
-
TOOL_SUMMARY = (
|
| 48 |
-
"Discover, inspect, validate, and access Agent Skills. "
|
| 49 |
-
"Actions: discover (list all skills), info (get SKILL.md contents), "
|
| 50 |
-
"resources (list/read bundled files), validate (check format), search (find by keyword). "
|
| 51 |
-
"Skills provide structured instructions for specialized tasks. "
|
| 52 |
-
"Use in combination with the `Shell_Command` and `File_System` tools."
|
| 53 |
-
)
|
| 54 |
-
|
| 55 |
-
HELP_TEXT = """\
|
| 56 |
-
Agent Skills — actions and usage
|
| 57 |
-
|
| 58 |
-
Skills are directories containing a SKILL.md file with YAML frontmatter (name, description)
|
| 59 |
-
and Markdown instructions. They live under /Skills/ in the filesystem root.
|
| 60 |
-
|
| 61 |
-
Actions:
|
| 62 |
-
- discover: List all available skills with their metadata (name, description, location)
|
| 63 |
-
- info: Get the full contents of a specific skill's SKILL.md file
|
| 64 |
-
- resources: List or read files within a skill's bundled directories (scripts/, references/, assets/)
|
| 65 |
-
- validate: Check if a skill conforms to the Agent Skills specification
|
| 66 |
-
- search: Find skills by keyword in name or description
|
| 67 |
-
- help: Show this guide
|
| 68 |
-
|
| 69 |
-
Examples:
|
| 70 |
-
- Discover all skills: action="discover"
|
| 71 |
-
- Get skill info: action="info", skill_name="pdf"
|
| 72 |
-
- List skill resources: action="resources", skill_name="mcp-builder"
|
| 73 |
-
- Read a resource: action="resources", skill_name="pdf", resource_path="references/forms.md"
|
| 74 |
-
- Validate a skill: action="validate", skill_name="pdf"
|
| 75 |
-
- Search for skills: action="search", query="MCP"
|
| 76 |
-
"""
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
# ---------------------------------------------------------------------------
|
| 80 |
-
# Skills Root Resolution
|
| 81 |
-
# ---------------------------------------------------------------------------
|
| 82 |
-
|
| 83 |
-
def _get_skills_root() -> Path:
|
| 84 |
-
"""Get the absolute path to the skills directory."""
|
| 85 |
-
skills_root = os.getenv("NYMBO_SKILLS_ROOT")
|
| 86 |
-
if skills_root and skills_root.strip():
|
| 87 |
-
return Path(skills_root.strip()).resolve()
|
| 88 |
-
return Path(ROOT_DIR) / SKILLS_SUBDIR
|
| 89 |
-
|
| 90 |
-
# Import _fmt_size from shared utility instead of duplicating
|
| 91 |
-
from ._core import _fmt_size
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
# ---------------------------------------------------------------------------
|
| 95 |
-
# YAML Frontmatter Parsing (adapted from skills_ref/parser.py)
|
| 96 |
-
# ---------------------------------------------------------------------------
|
| 97 |
-
|
| 98 |
-
class ParseError(Exception):
|
| 99 |
-
"""Raised when SKILL.md parsing fails."""
|
| 100 |
-
pass
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
class ValidationError(Exception):
|
| 104 |
-
"""Raised when skill validation fails."""
|
| 105 |
-
def __init__(self, message: str, errors: list[str] | None = None):
|
| 106 |
-
super().__init__(message)
|
| 107 |
-
self.errors = errors if errors is not None else [message]
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
def _parse_frontmatter(content: str) -> tuple[dict, str]:
|
| 111 |
-
"""
|
| 112 |
-
Parse YAML frontmatter from SKILL.md content.
|
| 113 |
-
|
| 114 |
-
Returns (metadata dict, markdown body).
|
| 115 |
-
Raises ParseError if frontmatter is missing or invalid.
|
| 116 |
-
"""
|
| 117 |
-
if not content.startswith("---"):
|
| 118 |
-
raise ParseError("SKILL.md must start with YAML frontmatter (---)")
|
| 119 |
-
|
| 120 |
-
parts = content.split("---", 2)
|
| 121 |
-
if len(parts) < 3:
|
| 122 |
-
raise ParseError("SKILL.md frontmatter not properly closed with ---")
|
| 123 |
-
|
| 124 |
-
frontmatter_str = parts[1]
|
| 125 |
-
body = parts[2].strip()
|
| 126 |
-
|
| 127 |
-
# Simple YAML parsing without external dependency
|
| 128 |
-
metadata: dict = {}
|
| 129 |
-
in_metadata_block = False
|
| 130 |
-
metadata_dict: dict = {}
|
| 131 |
-
|
| 132 |
-
for line in frontmatter_str.strip().split("\n"):
|
| 133 |
-
if not line.strip():
|
| 134 |
-
continue
|
| 135 |
-
|
| 136 |
-
if line.strip() == "metadata:":
|
| 137 |
-
in_metadata_block = True
|
| 138 |
-
continue
|
| 139 |
-
|
| 140 |
-
if in_metadata_block:
|
| 141 |
-
if line.startswith(" "):
|
| 142 |
-
match = re.match(r"^\s+(\w+):\s*(.*)$", line)
|
| 143 |
-
if match:
|
| 144 |
-
key = match.group(1).strip()
|
| 145 |
-
value = match.group(2).strip().strip('"').strip("'")
|
| 146 |
-
metadata_dict[key] = value
|
| 147 |
-
continue
|
| 148 |
-
else:
|
| 149 |
-
in_metadata_block = False
|
| 150 |
-
if metadata_dict:
|
| 151 |
-
metadata["metadata"] = metadata_dict
|
| 152 |
-
metadata_dict = {}
|
| 153 |
-
|
| 154 |
-
match = re.match(r"^(\S+):\s*(.*)$", line)
|
| 155 |
-
if match:
|
| 156 |
-
key = match.group(1).strip()
|
| 157 |
-
value = match.group(2).strip()
|
| 158 |
-
if (value.startswith('"') and value.endswith('"')) or \
|
| 159 |
-
(value.startswith("'") and value.endswith("'")):
|
| 160 |
-
value = value[1:-1]
|
| 161 |
-
metadata[key] = value if value else ""
|
| 162 |
-
|
| 163 |
-
if in_metadata_block and metadata_dict:
|
| 164 |
-
metadata["metadata"] = metadata_dict
|
| 165 |
-
|
| 166 |
-
return metadata, body
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
def _find_skill_md(skill_dir: Path) -> Optional[Path]:
|
| 170 |
-
"""Find the SKILL.md file in a skill directory (prefers uppercase)."""
|
| 171 |
-
for name in ("SKILL.md", "skill.md"):
|
| 172 |
-
path = skill_dir / name
|
| 173 |
-
if path.exists():
|
| 174 |
-
return path
|
| 175 |
-
return None
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
# ---------------------------------------------------------------------------
|
| 179 |
-
# Skill Validation (adapted from skills_ref/validator.py)
|
| 180 |
-
# ---------------------------------------------------------------------------
|
| 181 |
-
|
| 182 |
-
def _validate_name(name: str, skill_dir: Path) -> list[str]:
|
| 183 |
-
"""Validate skill name format and directory match."""
|
| 184 |
-
errors = []
|
| 185 |
-
|
| 186 |
-
if not name or not isinstance(name, str) or not name.strip():
|
| 187 |
-
errors.append("Field 'name' must be a non-empty string")
|
| 188 |
-
return errors
|
| 189 |
-
|
| 190 |
-
name = unicodedata.normalize("NFKC", name.strip())
|
| 191 |
-
|
| 192 |
-
if len(name) > MAX_SKILL_NAME_LENGTH:
|
| 193 |
-
errors.append(f"Skill name '{name}' exceeds {MAX_SKILL_NAME_LENGTH} character limit ({len(name)} chars)")
|
| 194 |
-
|
| 195 |
-
if name != name.lower():
|
| 196 |
-
errors.append(f"Skill name '{name}' must be lowercase")
|
| 197 |
-
|
| 198 |
-
if name.startswith("-") or name.endswith("-"):
|
| 199 |
-
errors.append("Skill name cannot start or end with a hyphen")
|
| 200 |
-
|
| 201 |
-
if "--" in name:
|
| 202 |
-
errors.append("Skill name cannot contain consecutive hyphens")
|
| 203 |
-
|
| 204 |
-
if not all(c.isalnum() or c == "-" for c in name):
|
| 205 |
-
errors.append(f"Skill name '{name}' contains invalid characters. Only letters, digits, and hyphens allowed.")
|
| 206 |
-
|
| 207 |
-
if skill_dir:
|
| 208 |
-
dir_name = unicodedata.normalize("NFKC", skill_dir.name)
|
| 209 |
-
if dir_name != name:
|
| 210 |
-
errors.append(f"Directory name '{skill_dir.name}' must match skill name '{name}'")
|
| 211 |
-
|
| 212 |
-
return errors
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
def _validate_description(description: str) -> list[str]:
|
| 216 |
-
"""Validate description format."""
|
| 217 |
-
errors = []
|
| 218 |
-
|
| 219 |
-
if not description or not isinstance(description, str) or not description.strip():
|
| 220 |
-
errors.append("Field 'description' must be a non-empty string")
|
| 221 |
-
return errors
|
| 222 |
-
|
| 223 |
-
if len(description) > MAX_DESCRIPTION_LENGTH:
|
| 224 |
-
errors.append(f"Description exceeds {MAX_DESCRIPTION_LENGTH} character limit ({len(description)} chars)")
|
| 225 |
-
|
| 226 |
-
return errors
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
def _validate_compatibility(compatibility: str) -> list[str]:
|
| 230 |
-
"""Validate compatibility format."""
|
| 231 |
-
errors = []
|
| 232 |
-
|
| 233 |
-
if not isinstance(compatibility, str):
|
| 234 |
-
errors.append("Field 'compatibility' must be a string")
|
| 235 |
-
return errors
|
| 236 |
-
|
| 237 |
-
if len(compatibility) > MAX_COMPATIBILITY_LENGTH:
|
| 238 |
-
errors.append(f"Compatibility exceeds {MAX_COMPATIBILITY_LENGTH} character limit ({len(compatibility)} chars)")
|
| 239 |
-
|
| 240 |
-
return errors
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
def _validate_skill(skill_dir: Path) -> list[str]:
|
| 244 |
-
"""Validate a skill directory. Returns list of error messages (empty = valid)."""
|
| 245 |
-
if not skill_dir.exists():
|
| 246 |
-
return [f"Path does not exist: {skill_dir}"]
|
| 247 |
-
|
| 248 |
-
if not skill_dir.is_dir():
|
| 249 |
-
return [f"Not a directory: {skill_dir}"]
|
| 250 |
-
|
| 251 |
-
skill_md = _find_skill_md(skill_dir)
|
| 252 |
-
if skill_md is None:
|
| 253 |
-
return ["Missing required file: SKILL.md"]
|
| 254 |
-
|
| 255 |
-
try:
|
| 256 |
-
content = skill_md.read_text(encoding="utf-8")
|
| 257 |
-
metadata, _ = _parse_frontmatter(content)
|
| 258 |
-
except ParseError as e:
|
| 259 |
-
return [str(e)]
|
| 260 |
-
except Exception as e:
|
| 261 |
-
return [f"Failed to read SKILL.md: {e}"]
|
| 262 |
-
|
| 263 |
-
errors = []
|
| 264 |
-
|
| 265 |
-
extra_fields = set(metadata.keys()) - ALLOWED_FRONTMATTER_FIELDS
|
| 266 |
-
if extra_fields:
|
| 267 |
-
errors.append(f"Unexpected fields in frontmatter: {', '.join(sorted(extra_fields))}")
|
| 268 |
-
|
| 269 |
-
if "name" not in metadata:
|
| 270 |
-
errors.append("Missing required field: name")
|
| 271 |
-
else:
|
| 272 |
-
errors.extend(_validate_name(metadata["name"], skill_dir))
|
| 273 |
-
|
| 274 |
-
if "description" not in metadata:
|
| 275 |
-
errors.append("Missing required field: description")
|
| 276 |
-
else:
|
| 277 |
-
errors.extend(_validate_description(metadata["description"]))
|
| 278 |
-
|
| 279 |
-
if "compatibility" in metadata:
|
| 280 |
-
errors.extend(_validate_compatibility(metadata["compatibility"]))
|
| 281 |
-
|
| 282 |
-
return errors
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
# ---------------------------------------------------------------------------
|
| 286 |
-
# Skill Discovery and Info
|
| 287 |
-
# ---------------------------------------------------------------------------
|
| 288 |
-
|
| 289 |
-
def _read_skill_properties(skill_dir: Path) -> dict:
|
| 290 |
-
"""Read skill properties from SKILL.md frontmatter. Returns dict with metadata."""
|
| 291 |
-
skill_md = _find_skill_md(skill_dir)
|
| 292 |
-
if skill_md is None:
|
| 293 |
-
raise ParseError(f"SKILL.md not found in {skill_dir}")
|
| 294 |
-
|
| 295 |
-
content = skill_md.read_text(encoding="utf-8")
|
| 296 |
-
metadata, body = _parse_frontmatter(content)
|
| 297 |
-
|
| 298 |
-
if "name" not in metadata:
|
| 299 |
-
raise ValidationError("Missing required field: name")
|
| 300 |
-
if "description" not in metadata:
|
| 301 |
-
raise ValidationError("Missing required field: description")
|
| 302 |
-
|
| 303 |
-
return {
|
| 304 |
-
"name": metadata.get("name", "").strip(),
|
| 305 |
-
"description": metadata.get("description", "").strip(),
|
| 306 |
-
"license": metadata.get("license"),
|
| 307 |
-
"compatibility": metadata.get("compatibility"),
|
| 308 |
-
"allowed_tools": metadata.get("allowed-tools"),
|
| 309 |
-
"metadata": metadata.get("metadata", {}),
|
| 310 |
-
"location": str(skill_md),
|
| 311 |
-
"body": body,
|
| 312 |
-
}
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
def _discover_skills() -> list[dict]:
|
| 316 |
-
"""Discover all valid skills in the skills directory."""
|
| 317 |
-
skills_root = _get_skills_root()
|
| 318 |
-
|
| 319 |
-
if not skills_root.exists():
|
| 320 |
-
return []
|
| 321 |
-
|
| 322 |
-
skills = []
|
| 323 |
-
for item in sorted(skills_root.iterdir()):
|
| 324 |
-
if not item.is_dir():
|
| 325 |
-
continue
|
| 326 |
-
|
| 327 |
-
skill_md = _find_skill_md(item)
|
| 328 |
-
if skill_md is None:
|
| 329 |
-
continue
|
| 330 |
-
|
| 331 |
-
try:
|
| 332 |
-
props = _read_skill_properties(item)
|
| 333 |
-
skills.append({
|
| 334 |
-
"name": props["name"],
|
| 335 |
-
"description": props["description"],
|
| 336 |
-
"location": _display_path(str(skill_md)),
|
| 337 |
-
})
|
| 338 |
-
except Exception:
|
| 339 |
-
continue
|
| 340 |
-
|
| 341 |
-
return skills
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
def _get_skill_info(skill_name: str, offset: int = 0, max_chars: int = 0) -> dict:
|
| 345 |
-
"""Get full information for a specific skill."""
|
| 346 |
-
skills_root = _get_skills_root()
|
| 347 |
-
skill_dir = skills_root / skill_name
|
| 348 |
-
|
| 349 |
-
if not skill_dir.exists():
|
| 350 |
-
raise FileNotFoundError(f"Skill not found: {skill_name}")
|
| 351 |
-
|
| 352 |
-
skill_md = _find_skill_md(skill_dir)
|
| 353 |
-
if skill_md is None:
|
| 354 |
-
raise FileNotFoundError(f"SKILL.md not found in skill: {skill_name}")
|
| 355 |
-
|
| 356 |
-
content = skill_md.read_text(encoding="utf-8")
|
| 357 |
-
metadata, body = _parse_frontmatter(content)
|
| 358 |
-
|
| 359 |
-
total_chars = len(body)
|
| 360 |
-
start = max(0, min(offset, total_chars))
|
| 361 |
-
if max_chars > 0:
|
| 362 |
-
end = min(total_chars, start + max_chars)
|
| 363 |
-
else:
|
| 364 |
-
end = total_chars
|
| 365 |
-
|
| 366 |
-
body_chunk = body[start:end]
|
| 367 |
-
truncated = end < total_chars
|
| 368 |
-
next_cursor = end if truncated else None
|
| 369 |
-
|
| 370 |
-
return {
|
| 371 |
-
"name": metadata.get("name", "").strip(),
|
| 372 |
-
"description": metadata.get("description", "").strip(),
|
| 373 |
-
"license": metadata.get("license"),
|
| 374 |
-
"compatibility": metadata.get("compatibility"),
|
| 375 |
-
"allowed_tools": metadata.get("allowed-tools"),
|
| 376 |
-
"metadata": metadata.get("metadata", {}),
|
| 377 |
-
"location": _display_path(str(skill_md)),
|
| 378 |
-
"body": body_chunk,
|
| 379 |
-
"offset": start,
|
| 380 |
-
"total_chars": total_chars,
|
| 381 |
-
"truncated": truncated,
|
| 382 |
-
"next_cursor": next_cursor,
|
| 383 |
-
}
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
def _list_skill_resources(skill_name: str) -> dict:
|
| 387 |
-
"""List all resources within a skill directory.
|
| 388 |
-
|
| 389 |
-
Dynamically discovers all subdirectories, not just predefined ones.
|
| 390 |
-
"""
|
| 391 |
-
skills_root = _get_skills_root()
|
| 392 |
-
skill_dir = skills_root / skill_name
|
| 393 |
-
|
| 394 |
-
if not skill_dir.exists():
|
| 395 |
-
raise FileNotFoundError(f"Skill not found: {skill_name}")
|
| 396 |
-
|
| 397 |
-
resources = {
|
| 398 |
-
"skill": skill_name,
|
| 399 |
-
"directories": {}, # Dynamic: dirname -> file list
|
| 400 |
-
"other_files": [],
|
| 401 |
-
}
|
| 402 |
-
|
| 403 |
-
for item in sorted(skill_dir.iterdir()):
|
| 404 |
-
if item.name.lower() in ("skill.md",):
|
| 405 |
-
continue
|
| 406 |
-
|
| 407 |
-
if item.is_dir():
|
| 408 |
-
files = []
|
| 409 |
-
for f in sorted(item.rglob("*")):
|
| 410 |
-
if f.is_file():
|
| 411 |
-
files.append({
|
| 412 |
-
"path": f.relative_to(item).as_posix(),
|
| 413 |
-
"size": f.stat().st_size,
|
| 414 |
-
})
|
| 415 |
-
resources["directories"][item.name] = files
|
| 416 |
-
elif item.is_file():
|
| 417 |
-
resources["other_files"].append({
|
| 418 |
-
"path": item.name,
|
| 419 |
-
"size": item.stat().st_size,
|
| 420 |
-
})
|
| 421 |
-
|
| 422 |
-
return resources
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
def _read_skill_resource(skill_name: str, resource_path: str, offset: int = 0, max_chars: int = 3000) -> dict:
|
| 426 |
-
"""Read a specific resource file from a skill."""
|
| 427 |
-
skills_root = _get_skills_root()
|
| 428 |
-
skill_dir = skills_root / skill_name
|
| 429 |
-
|
| 430 |
-
if not skill_dir.exists():
|
| 431 |
-
raise FileNotFoundError(f"Skill not found: {skill_name}")
|
| 432 |
-
|
| 433 |
-
resource_file = skill_dir / resource_path
|
| 434 |
-
|
| 435 |
-
try:
|
| 436 |
-
resource_file.resolve().relative_to(skill_dir.resolve())
|
| 437 |
-
except ValueError:
|
| 438 |
-
raise PermissionError(f"Resource path escapes skill directory: {resource_path}")
|
| 439 |
-
|
| 440 |
-
if not resource_file.exists():
|
| 441 |
-
raise FileNotFoundError(f"Resource not found: {resource_path}")
|
| 442 |
-
|
| 443 |
-
if resource_file.is_dir():
|
| 444 |
-
raise IsADirectoryError(f"Path is a directory: {resource_path}")
|
| 445 |
-
|
| 446 |
-
content = resource_file.read_text(encoding="utf-8", errors="replace")
|
| 447 |
-
total_chars = len(content)
|
| 448 |
-
|
| 449 |
-
start = max(0, min(offset, total_chars))
|
| 450 |
-
if max_chars > 0:
|
| 451 |
-
end = min(total_chars, start + max_chars)
|
| 452 |
-
else:
|
| 453 |
-
end = total_chars
|
| 454 |
-
|
| 455 |
-
chunk = content[start:end]
|
| 456 |
-
truncated = end < total_chars
|
| 457 |
-
next_cursor = end if truncated else None
|
| 458 |
-
|
| 459 |
-
return {
|
| 460 |
-
"skill": skill_name,
|
| 461 |
-
"resource": resource_path,
|
| 462 |
-
"content": chunk,
|
| 463 |
-
"size": resource_file.stat().st_size,
|
| 464 |
-
"offset": start,
|
| 465 |
-
"total_chars": total_chars,
|
| 466 |
-
"truncated": truncated,
|
| 467 |
-
"next_cursor": next_cursor,
|
| 468 |
-
}
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
def _search_skills(query: str) -> list[dict]:
|
| 472 |
-
"""Search for skills by keyword in name or description."""
|
| 473 |
-
query_lower = query.lower()
|
| 474 |
-
all_skills = _discover_skills()
|
| 475 |
-
|
| 476 |
-
matches = []
|
| 477 |
-
for skill in all_skills:
|
| 478 |
-
name_match = query_lower in skill["name"].lower()
|
| 479 |
-
desc_match = query_lower in skill["description"].lower()
|
| 480 |
-
|
| 481 |
-
if name_match or desc_match:
|
| 482 |
-
matches.append({
|
| 483 |
-
**skill,
|
| 484 |
-
"match_in": "name" if name_match else "description",
|
| 485 |
-
})
|
| 486 |
-
|
| 487 |
-
return matches
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
# ---------------------------------------------------------------------------
|
| 491 |
-
# Human-Readable Output Formatters
|
| 492 |
-
# ---------------------------------------------------------------------------
|
| 493 |
-
|
| 494 |
-
def _format_discover(skills: list[dict]) -> str:
|
| 495 |
-
"""Format skill discovery results as human-readable text."""
|
| 496 |
-
skills_root = _display_path(str(_get_skills_root()))
|
| 497 |
-
lines = [
|
| 498 |
-
f"Available Skills",
|
| 499 |
-
f"Root: {skills_root}",
|
| 500 |
-
f"Total: {len(skills)} skills",
|
| 501 |
-
"",
|
| 502 |
-
]
|
| 503 |
-
|
| 504 |
-
if not skills:
|
| 505 |
-
lines.append("No skills found.")
|
| 506 |
-
else:
|
| 507 |
-
for i, skill in enumerate(skills, 1):
|
| 508 |
-
name = skill["name"]
|
| 509 |
-
desc = skill["description"]
|
| 510 |
-
# Truncate long descriptions
|
| 511 |
-
if len(desc) > 100:
|
| 512 |
-
desc = desc[:97] + "..."
|
| 513 |
-
lines.append(f"{i}. {name}")
|
| 514 |
-
lines.append(f" {desc}")
|
| 515 |
-
lines.append("")
|
| 516 |
-
|
| 517 |
-
return "\n".join(lines).strip()
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
def _format_skill_info(info: dict) -> str:
|
| 521 |
-
"""Format skill info as human-readable text."""
|
| 522 |
-
lines = [
|
| 523 |
-
f"Skill: {info['name']}",
|
| 524 |
-
f"Location: {info['location']}",
|
| 525 |
-
"",
|
| 526 |
-
f"Description: {info['description']}",
|
| 527 |
-
]
|
| 528 |
-
|
| 529 |
-
if info.get("license"):
|
| 530 |
-
lines.append(f"License: {info['license']}")
|
| 531 |
-
if info.get("compatibility"):
|
| 532 |
-
lines.append(f"Compatibility: {info['compatibility']}")
|
| 533 |
-
if info.get("allowed_tools"):
|
| 534 |
-
lines.append(f"Allowed Tools: {info['allowed_tools']}")
|
| 535 |
-
if info.get("metadata"):
|
| 536 |
-
meta_str = ", ".join(f"{k}={v}" for k, v in info["metadata"].items())
|
| 537 |
-
lines.append(f"Metadata: {meta_str}")
|
| 538 |
-
|
| 539 |
-
lines.append("")
|
| 540 |
-
lines.append("--- SKILL.md Body ---")
|
| 541 |
-
if info.get("offset", 0) > 0:
|
| 542 |
-
lines.append(f"(Showing content from offset {info['offset']})")
|
| 543 |
-
lines.append("")
|
| 544 |
-
lines.append(info["body"])
|
| 545 |
-
|
| 546 |
-
if info.get("truncated"):
|
| 547 |
-
lines.append("")
|
| 548 |
-
lines.append(f"… Truncated. Showing {len(info['body'])} chars (offset {info['offset']}). Total: {info['total_chars']}.")
|
| 549 |
-
lines.append(f"Next cursor: {info['next_cursor']}")
|
| 550 |
-
|
| 551 |
-
return "\n".join(lines)
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
def _format_resources_list(resources: dict) -> str:
|
| 555 |
-
"""Format resource listing as a visual filesystem tree with line connectors."""
|
| 556 |
-
from ._core import build_tree, render_tree
|
| 557 |
-
|
| 558 |
-
skill = resources["skill"]
|
| 559 |
-
lines = [
|
| 560 |
-
f"Resources for skill: {skill}",
|
| 561 |
-
"",
|
| 562 |
-
]
|
| 563 |
-
|
| 564 |
-
# Build entries list for the tree
|
| 565 |
-
entries: list[tuple[str, dict]] = []
|
| 566 |
-
|
| 567 |
-
# Add all discovered directories and their files
|
| 568 |
-
directories = resources.get("directories", {})
|
| 569 |
-
for dirname, files in directories.items():
|
| 570 |
-
for f in files:
|
| 571 |
-
path = f"{dirname}/{f['path']}"
|
| 572 |
-
entries.append((path, {"size": f["size"]}))
|
| 573 |
-
|
| 574 |
-
# Add root files
|
| 575 |
-
other = resources.get("other_files", [])
|
| 576 |
-
for f in other:
|
| 577 |
-
entries.append((f["path"], {"size": f["size"]}))
|
| 578 |
-
|
| 579 |
-
# Build and render the unified tree
|
| 580 |
-
tree = build_tree(entries)
|
| 581 |
-
|
| 582 |
-
# Count files
|
| 583 |
-
total_files = len(entries)
|
| 584 |
-
|
| 585 |
-
# Render with skill as root
|
| 586 |
-
lines.append(f"└── {skill}/")
|
| 587 |
-
lines.extend(render_tree(tree, " "))
|
| 588 |
-
|
| 589 |
-
lines.append("")
|
| 590 |
-
if total_files == 0:
|
| 591 |
-
lines.append("No resource files found.")
|
| 592 |
-
else:
|
| 593 |
-
lines.append(f"Total: {total_files} files")
|
| 594 |
-
|
| 595 |
-
return "\n".join(lines).strip()
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
def _format_resource_content(data: dict) -> str:
|
| 599 |
-
"""Format resource file content as human-readable text."""
|
| 600 |
-
lines = [
|
| 601 |
-
f"Resource: {data['resource']}",
|
| 602 |
-
f"Skill: {data['skill']}",
|
| 603 |
-
f"Size: {_fmt_size(data['size'])}",
|
| 604 |
-
]
|
| 605 |
-
|
| 606 |
-
offset = data.get("offset", 0)
|
| 607 |
-
lines.append(f"Showing: {len(data['content'])} of {data['total_chars']} chars (offset {offset})")
|
| 608 |
-
|
| 609 |
-
lines.append("")
|
| 610 |
-
lines.append("--- Content ---")
|
| 611 |
-
lines.append("")
|
| 612 |
-
lines.append(data["content"])
|
| 613 |
-
|
| 614 |
-
if data.get("truncated"):
|
| 615 |
-
lines.append("")
|
| 616 |
-
lines.append(f"… Truncated. Next cursor: {data['next_cursor']}")
|
| 617 |
-
|
| 618 |
-
return "\n".join(lines)
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
def _format_validation(skill_name: str, errors: list[str]) -> str:
|
| 622 |
-
"""Format validation results as human-readable text."""
|
| 623 |
-
if not errors:
|
| 624 |
-
return f"✓ Skill '{skill_name}' is valid."
|
| 625 |
-
|
| 626 |
-
lines = [
|
| 627 |
-
f"✗ Validation failed for skill '{skill_name}'",
|
| 628 |
-
f"Errors: {len(errors)}",
|
| 629 |
-
"",
|
| 630 |
-
]
|
| 631 |
-
|
| 632 |
-
for i, err in enumerate(errors, 1):
|
| 633 |
-
lines.append(f" {i}. {err}")
|
| 634 |
-
|
| 635 |
-
return "\n".join(lines)
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
def _format_search(query: str, matches: list[dict]) -> str:
|
| 639 |
-
"""Format search results as human-readable text."""
|
| 640 |
-
lines = [
|
| 641 |
-
f"Search results for: {query}",
|
| 642 |
-
f"Matches: {len(matches)}",
|
| 643 |
-
"",
|
| 644 |
-
]
|
| 645 |
-
|
| 646 |
-
if not matches:
|
| 647 |
-
lines.append("No matching skills found.")
|
| 648 |
-
else:
|
| 649 |
-
for i, m in enumerate(matches, 1):
|
| 650 |
-
name = m["name"]
|
| 651 |
-
desc = m["description"]
|
| 652 |
-
match_in = m.get("match_in", "")
|
| 653 |
-
if len(desc) > 80:
|
| 654 |
-
desc = desc[:77] + "..."
|
| 655 |
-
lines.append(f"{i}. {name} (matched in {match_in})")
|
| 656 |
-
lines.append(f" {desc}")
|
| 657 |
-
lines.append("")
|
| 658 |
-
|
| 659 |
-
return "\n".join(lines).strip()
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
def _format_error(message: str, hint: str = "") -> str:
|
| 663 |
-
"""Format error as human-readable text."""
|
| 664 |
-
lines = [f"Error: {message}"]
|
| 665 |
-
if hint:
|
| 666 |
-
lines.append(f"Hint: {hint}")
|
| 667 |
-
return "\n".join(lines)
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
# ---------------------------------------------------------------------------
|
| 671 |
-
# Main Tool Function
|
| 672 |
-
# ---------------------------------------------------------------------------
|
| 673 |
-
|
| 674 |
-
@autodoc(summary=TOOL_SUMMARY)
|
| 675 |
-
def Agent_Skills(
|
| 676 |
-
action: Annotated[str, "Operation: 'discover', 'info', 'resources', 'validate', 'search', 'help'."],
|
| 677 |
-
skill_name: Annotated[Optional[str], "Name of skill (required for info/resources/validate)."] = None,
|
| 678 |
-
resource_path: Annotated[Optional[str], "Path to resource file within skill (for resources action)."] = None,
|
| 679 |
-
query: Annotated[Optional[str], "Search query (for search action)."] = None,
|
| 680 |
-
max_chars: Annotated[int, "Max characters to return for skill body or resource content (0 = no limit)."] = 3000,
|
| 681 |
-
offset: Annotated[int, "Start offset for reading content (for info/resources)."] = 0,
|
| 682 |
-
) -> str:
|
| 683 |
-
_log_call_start("Agent_Skills", action=action, skill_name=skill_name, resource_path=resource_path, query=query, max_chars=max_chars, offset=offset)
|
| 684 |
-
|
| 685 |
-
action = (action or "").strip().lower()
|
| 686 |
-
|
| 687 |
-
if action not in {"discover", "info", "resources", "validate", "search", "help"}:
|
| 688 |
-
result = _format_error(
|
| 689 |
-
f"Invalid action: {action}",
|
| 690 |
-
"Choose from: discover, info, resources, validate, search, help."
|
| 691 |
-
)
|
| 692 |
-
_log_call_end("Agent_Skills", _truncate_for_log(result))
|
| 693 |
-
return result
|
| 694 |
-
|
| 695 |
-
try:
|
| 696 |
-
if action == "help":
|
| 697 |
-
result = HELP_TEXT
|
| 698 |
-
|
| 699 |
-
elif action == "discover":
|
| 700 |
-
skills = _discover_skills()
|
| 701 |
-
result = _format_discover(skills)
|
| 702 |
-
|
| 703 |
-
elif action == "info":
|
| 704 |
-
if not skill_name:
|
| 705 |
-
result = _format_error("skill_name is required for 'info' action.")
|
| 706 |
-
else:
|
| 707 |
-
info = _get_skill_info(skill_name.strip(), offset=offset, max_chars=max_chars)
|
| 708 |
-
result = _format_skill_info(info)
|
| 709 |
-
|
| 710 |
-
elif action == "resources":
|
| 711 |
-
if not skill_name:
|
| 712 |
-
result = _format_error("skill_name is required for 'resources' action.")
|
| 713 |
-
elif resource_path:
|
| 714 |
-
resource_data = _read_skill_resource(skill_name.strip(), resource_path.strip(), offset=offset, max_chars=max_chars)
|
| 715 |
-
result = _format_resource_content(resource_data)
|
| 716 |
-
else:
|
| 717 |
-
resources = _list_skill_resources(skill_name.strip())
|
| 718 |
-
result = _format_resources_list(resources)
|
| 719 |
-
|
| 720 |
-
elif action == "validate":
|
| 721 |
-
if not skill_name:
|
| 722 |
-
result = _format_error("skill_name is required for 'validate' action.")
|
| 723 |
-
else:
|
| 724 |
-
skills_root = _get_skills_root()
|
| 725 |
-
skill_dir = skills_root / skill_name.strip()
|
| 726 |
-
errors = _validate_skill(skill_dir)
|
| 727 |
-
result = _format_validation(skill_name, errors)
|
| 728 |
-
|
| 729 |
-
elif action == "search":
|
| 730 |
-
if not query:
|
| 731 |
-
result = _format_error("query is required for 'search' action.")
|
| 732 |
-
else:
|
| 733 |
-
matches = _search_skills(query.strip())
|
| 734 |
-
result = _format_search(query, matches)
|
| 735 |
-
|
| 736 |
-
else:
|
| 737 |
-
result = _format_error(f"Action '{action}' not implemented.")
|
| 738 |
-
|
| 739 |
-
except FileNotFoundError as e:
|
| 740 |
-
result = _format_error(str(e))
|
| 741 |
-
except PermissionError as e:
|
| 742 |
-
result = _format_error(str(e))
|
| 743 |
-
except ParseError as e:
|
| 744 |
-
result = _format_error(str(e))
|
| 745 |
-
except ValidationError as e:
|
| 746 |
-
result = _format_error(str(e))
|
| 747 |
-
except Exception as e:
|
| 748 |
-
result = _format_error(f"Unexpected error: {e}")
|
| 749 |
-
|
| 750 |
-
_log_call_end("Agent_Skills", _truncate_for_log(result))
|
| 751 |
-
return result
|
| 752 |
-
|
| 753 |
-
|
| 754 |
-
# ---------------------------------------------------------------------------
|
| 755 |
-
# Gradio Interface
|
| 756 |
-
# ---------------------------------------------------------------------------
|
| 757 |
-
|
| 758 |
-
def build_interface() -> gr.Interface:
|
| 759 |
-
return gr.Interface(
|
| 760 |
-
fn=Agent_Skills,
|
| 761 |
-
inputs=[
|
| 762 |
-
gr.Radio(
|
| 763 |
-
label="Action",
|
| 764 |
-
choices=["discover", "info", "resources", "validate", "search", "help"],
|
| 765 |
-
value="help",
|
| 766 |
-
info="Operation to perform",
|
| 767 |
-
),
|
| 768 |
-
gr.Textbox(label="Skill Name", placeholder="pdf", max_lines=1, info="Name of the skill"),
|
| 769 |
-
gr.Textbox(label="Resource Path", placeholder="references/forms.md", max_lines=1, info="Path to resource within skill"),
|
| 770 |
-
gr.Textbox(label="Search Query", placeholder="MCP", max_lines=1, info="Keyword to search for"),
|
| 771 |
-
gr.Slider(minimum=0, maximum=100000, step=500, value=3000, label="Max Chars", info="Max characters for content (0 = no limit)"),
|
| 772 |
-
gr.Slider(minimum=0, maximum=1_000_000, step=100, value=0, label="Offset", info="Start offset (Info/Resources)"),
|
| 773 |
-
],
|
| 774 |
-
outputs=gr.Textbox(label="Result", lines=20),
|
| 775 |
-
title="Agent Skills",
|
| 776 |
-
description=(
|
| 777 |
-
"<div style=\"text-align:center; overflow:hidden;\">"
|
| 778 |
-
"Discover, inspect, and access Agent Skills. "
|
| 779 |
-
"Skills provide structured instructions and resources for specialized tasks."
|
| 780 |
-
"</div>"
|
| 781 |
-
),
|
| 782 |
-
api_description=TOOL_SUMMARY,
|
| 783 |
-
flagging_mode="never",
|
| 784 |
-
submit_btn="Run",
|
| 785 |
-
)
|
| 786 |
-
|
| 787 |
-
|
| 788 |
-
__all__ = ["Agent_Skills", "build_interface"]
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
"""
|
| 4 |
+
Agent Skills Module for Nymbo-Tools MCP Server.
|
| 5 |
+
|
| 6 |
+
Provides structured skill discovery, activation, validation, and resource access
|
| 7 |
+
following the Agent Skills specification (https://agentskills.io).
|
| 8 |
+
|
| 9 |
+
Skills are directories containing a SKILL.md file with YAML frontmatter (name, description)
|
| 10 |
+
and Markdown instructions. This tool enables agents to efficiently discover and use skills
|
| 11 |
+
through progressive disclosure: low-token metadata discovery, on-demand full activation,
|
| 12 |
+
and targeted resource access.
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
import json
|
| 16 |
+
import os
|
| 17 |
+
import re
|
| 18 |
+
import unicodedata
|
| 19 |
+
from pathlib import Path
|
| 20 |
+
from typing import Annotated, Optional
|
| 21 |
+
|
| 22 |
+
import gradio as gr
|
| 23 |
+
|
| 24 |
+
from app import _log_call_end, _log_call_start, _truncate_for_log
|
| 25 |
+
from ._docstrings import autodoc
|
| 26 |
+
from .File_System import ROOT_DIR, _display_path
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
# ---------------------------------------------------------------------------
|
| 30 |
+
# Constants
|
| 31 |
+
# ---------------------------------------------------------------------------
|
| 32 |
+
|
| 33 |
+
SKILLS_SUBDIR = "Skills" # Subdirectory under ROOT_DIR containing skills
|
| 34 |
+
MAX_SKILL_NAME_LENGTH = 64
|
| 35 |
+
MAX_DESCRIPTION_LENGTH = 1024
|
| 36 |
+
MAX_COMPATIBILITY_LENGTH = 500
|
| 37 |
+
|
| 38 |
+
ALLOWED_FRONTMATTER_FIELDS = {
|
| 39 |
+
"name",
|
| 40 |
+
"description",
|
| 41 |
+
"license",
|
| 42 |
+
"allowed-tools",
|
| 43 |
+
"metadata",
|
| 44 |
+
"compatibility",
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
TOOL_SUMMARY = (
|
| 48 |
+
"Discover, inspect, validate, and access Agent Skills. "
|
| 49 |
+
"Actions: discover (list all skills), info (get SKILL.md contents), "
|
| 50 |
+
"resources (list/read bundled files), validate (check format), search (find by keyword). "
|
| 51 |
+
"Skills provide structured instructions for specialized tasks. "
|
| 52 |
+
"Use in combination with the `Shell_Command` and `File_System` tools."
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
HELP_TEXT = """\
|
| 56 |
+
Agent Skills — actions and usage
|
| 57 |
+
|
| 58 |
+
Skills are directories containing a SKILL.md file with YAML frontmatter (name, description)
|
| 59 |
+
and Markdown instructions. They live under /Skills/ in the filesystem root.
|
| 60 |
+
|
| 61 |
+
Actions:
|
| 62 |
+
- discover: List all available skills with their metadata (name, description, location)
|
| 63 |
+
- info: Get the full contents of a specific skill's SKILL.md file
|
| 64 |
+
- resources: List or read files within a skill's bundled directories (scripts/, references/, assets/)
|
| 65 |
+
- validate: Check if a skill conforms to the Agent Skills specification
|
| 66 |
+
- search: Find skills by keyword in name or description
|
| 67 |
+
- help: Show this guide
|
| 68 |
+
|
| 69 |
+
Examples:
|
| 70 |
+
- Discover all skills: action="discover"
|
| 71 |
+
- Get skill info: action="info", skill_name="pdf"
|
| 72 |
+
- List skill resources: action="resources", skill_name="mcp-builder"
|
| 73 |
+
- Read a resource: action="resources", skill_name="pdf", resource_path="references/forms.md"
|
| 74 |
+
- Validate a skill: action="validate", skill_name="pdf"
|
| 75 |
+
- Search for skills: action="search", query="MCP"
|
| 76 |
+
"""
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
# ---------------------------------------------------------------------------
|
| 80 |
+
# Skills Root Resolution
|
| 81 |
+
# ---------------------------------------------------------------------------
|
| 82 |
+
|
| 83 |
+
def _get_skills_root() -> Path:
|
| 84 |
+
"""Get the absolute path to the skills directory."""
|
| 85 |
+
skills_root = os.getenv("NYMBO_SKILLS_ROOT")
|
| 86 |
+
if skills_root and skills_root.strip():
|
| 87 |
+
return Path(skills_root.strip()).resolve()
|
| 88 |
+
return Path(ROOT_DIR) / SKILLS_SUBDIR
|
| 89 |
+
|
| 90 |
+
# Import _fmt_size from shared utility instead of duplicating
|
| 91 |
+
from ._core import _fmt_size
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
# ---------------------------------------------------------------------------
|
| 95 |
+
# YAML Frontmatter Parsing (adapted from skills_ref/parser.py)
|
| 96 |
+
# ---------------------------------------------------------------------------
|
| 97 |
+
|
| 98 |
+
class ParseError(Exception):
|
| 99 |
+
"""Raised when SKILL.md parsing fails."""
|
| 100 |
+
pass
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
class ValidationError(Exception):
|
| 104 |
+
"""Raised when skill validation fails."""
|
| 105 |
+
def __init__(self, message: str, errors: list[str] | None = None):
|
| 106 |
+
super().__init__(message)
|
| 107 |
+
self.errors = errors if errors is not None else [message]
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def _parse_frontmatter(content: str) -> tuple[dict, str]:
|
| 111 |
+
"""
|
| 112 |
+
Parse YAML frontmatter from SKILL.md content.
|
| 113 |
+
|
| 114 |
+
Returns (metadata dict, markdown body).
|
| 115 |
+
Raises ParseError if frontmatter is missing or invalid.
|
| 116 |
+
"""
|
| 117 |
+
if not content.startswith("---"):
|
| 118 |
+
raise ParseError("SKILL.md must start with YAML frontmatter (---)")
|
| 119 |
+
|
| 120 |
+
parts = content.split("---", 2)
|
| 121 |
+
if len(parts) < 3:
|
| 122 |
+
raise ParseError("SKILL.md frontmatter not properly closed with ---")
|
| 123 |
+
|
| 124 |
+
frontmatter_str = parts[1]
|
| 125 |
+
body = parts[2].strip()
|
| 126 |
+
|
| 127 |
+
# Simple YAML parsing without external dependency
|
| 128 |
+
metadata: dict = {}
|
| 129 |
+
in_metadata_block = False
|
| 130 |
+
metadata_dict: dict = {}
|
| 131 |
+
|
| 132 |
+
for line in frontmatter_str.strip().split("\n"):
|
| 133 |
+
if not line.strip():
|
| 134 |
+
continue
|
| 135 |
+
|
| 136 |
+
if line.strip() == "metadata:":
|
| 137 |
+
in_metadata_block = True
|
| 138 |
+
continue
|
| 139 |
+
|
| 140 |
+
if in_metadata_block:
|
| 141 |
+
if line.startswith(" "):
|
| 142 |
+
match = re.match(r"^\s+(\w+):\s*(.*)$", line)
|
| 143 |
+
if match:
|
| 144 |
+
key = match.group(1).strip()
|
| 145 |
+
value = match.group(2).strip().strip('"').strip("'")
|
| 146 |
+
metadata_dict[key] = value
|
| 147 |
+
continue
|
| 148 |
+
else:
|
| 149 |
+
in_metadata_block = False
|
| 150 |
+
if metadata_dict:
|
| 151 |
+
metadata["metadata"] = metadata_dict
|
| 152 |
+
metadata_dict = {}
|
| 153 |
+
|
| 154 |
+
match = re.match(r"^(\S+):\s*(.*)$", line)
|
| 155 |
+
if match:
|
| 156 |
+
key = match.group(1).strip()
|
| 157 |
+
value = match.group(2).strip()
|
| 158 |
+
if (value.startswith('"') and value.endswith('"')) or \
|
| 159 |
+
(value.startswith("'") and value.endswith("'")):
|
| 160 |
+
value = value[1:-1]
|
| 161 |
+
metadata[key] = value if value else ""
|
| 162 |
+
|
| 163 |
+
if in_metadata_block and metadata_dict:
|
| 164 |
+
metadata["metadata"] = metadata_dict
|
| 165 |
+
|
| 166 |
+
return metadata, body
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def _find_skill_md(skill_dir: Path) -> Optional[Path]:
|
| 170 |
+
"""Find the SKILL.md file in a skill directory (prefers uppercase)."""
|
| 171 |
+
for name in ("SKILL.md", "skill.md"):
|
| 172 |
+
path = skill_dir / name
|
| 173 |
+
if path.exists():
|
| 174 |
+
return path
|
| 175 |
+
return None
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
# ---------------------------------------------------------------------------
|
| 179 |
+
# Skill Validation (adapted from skills_ref/validator.py)
|
| 180 |
+
# ---------------------------------------------------------------------------
|
| 181 |
+
|
| 182 |
+
def _validate_name(name: str, skill_dir: Path) -> list[str]:
|
| 183 |
+
"""Validate skill name format and directory match."""
|
| 184 |
+
errors = []
|
| 185 |
+
|
| 186 |
+
if not name or not isinstance(name, str) or not name.strip():
|
| 187 |
+
errors.append("Field 'name' must be a non-empty string")
|
| 188 |
+
return errors
|
| 189 |
+
|
| 190 |
+
name = unicodedata.normalize("NFKC", name.strip())
|
| 191 |
+
|
| 192 |
+
if len(name) > MAX_SKILL_NAME_LENGTH:
|
| 193 |
+
errors.append(f"Skill name '{name}' exceeds {MAX_SKILL_NAME_LENGTH} character limit ({len(name)} chars)")
|
| 194 |
+
|
| 195 |
+
if name != name.lower():
|
| 196 |
+
errors.append(f"Skill name '{name}' must be lowercase")
|
| 197 |
+
|
| 198 |
+
if name.startswith("-") or name.endswith("-"):
|
| 199 |
+
errors.append("Skill name cannot start or end with a hyphen")
|
| 200 |
+
|
| 201 |
+
if "--" in name:
|
| 202 |
+
errors.append("Skill name cannot contain consecutive hyphens")
|
| 203 |
+
|
| 204 |
+
if not all(c.isalnum() or c == "-" for c in name):
|
| 205 |
+
errors.append(f"Skill name '{name}' contains invalid characters. Only letters, digits, and hyphens allowed.")
|
| 206 |
+
|
| 207 |
+
if skill_dir:
|
| 208 |
+
dir_name = unicodedata.normalize("NFKC", skill_dir.name)
|
| 209 |
+
if dir_name != name:
|
| 210 |
+
errors.append(f"Directory name '{skill_dir.name}' must match skill name '{name}'")
|
| 211 |
+
|
| 212 |
+
return errors
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
def _validate_description(description: str) -> list[str]:
|
| 216 |
+
"""Validate description format."""
|
| 217 |
+
errors = []
|
| 218 |
+
|
| 219 |
+
if not description or not isinstance(description, str) or not description.strip():
|
| 220 |
+
errors.append("Field 'description' must be a non-empty string")
|
| 221 |
+
return errors
|
| 222 |
+
|
| 223 |
+
if len(description) > MAX_DESCRIPTION_LENGTH:
|
| 224 |
+
errors.append(f"Description exceeds {MAX_DESCRIPTION_LENGTH} character limit ({len(description)} chars)")
|
| 225 |
+
|
| 226 |
+
return errors
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
def _validate_compatibility(compatibility: str) -> list[str]:
|
| 230 |
+
"""Validate compatibility format."""
|
| 231 |
+
errors = []
|
| 232 |
+
|
| 233 |
+
if not isinstance(compatibility, str):
|
| 234 |
+
errors.append("Field 'compatibility' must be a string")
|
| 235 |
+
return errors
|
| 236 |
+
|
| 237 |
+
if len(compatibility) > MAX_COMPATIBILITY_LENGTH:
|
| 238 |
+
errors.append(f"Compatibility exceeds {MAX_COMPATIBILITY_LENGTH} character limit ({len(compatibility)} chars)")
|
| 239 |
+
|
| 240 |
+
return errors
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
def _validate_skill(skill_dir: Path) -> list[str]:
|
| 244 |
+
"""Validate a skill directory. Returns list of error messages (empty = valid)."""
|
| 245 |
+
if not skill_dir.exists():
|
| 246 |
+
return [f"Path does not exist: {skill_dir}"]
|
| 247 |
+
|
| 248 |
+
if not skill_dir.is_dir():
|
| 249 |
+
return [f"Not a directory: {skill_dir}"]
|
| 250 |
+
|
| 251 |
+
skill_md = _find_skill_md(skill_dir)
|
| 252 |
+
if skill_md is None:
|
| 253 |
+
return ["Missing required file: SKILL.md"]
|
| 254 |
+
|
| 255 |
+
try:
|
| 256 |
+
content = skill_md.read_text(encoding="utf-8")
|
| 257 |
+
metadata, _ = _parse_frontmatter(content)
|
| 258 |
+
except ParseError as e:
|
| 259 |
+
return [str(e)]
|
| 260 |
+
except Exception as e:
|
| 261 |
+
return [f"Failed to read SKILL.md: {e}"]
|
| 262 |
+
|
| 263 |
+
errors = []
|
| 264 |
+
|
| 265 |
+
extra_fields = set(metadata.keys()) - ALLOWED_FRONTMATTER_FIELDS
|
| 266 |
+
if extra_fields:
|
| 267 |
+
errors.append(f"Unexpected fields in frontmatter: {', '.join(sorted(extra_fields))}")
|
| 268 |
+
|
| 269 |
+
if "name" not in metadata:
|
| 270 |
+
errors.append("Missing required field: name")
|
| 271 |
+
else:
|
| 272 |
+
errors.extend(_validate_name(metadata["name"], skill_dir))
|
| 273 |
+
|
| 274 |
+
if "description" not in metadata:
|
| 275 |
+
errors.append("Missing required field: description")
|
| 276 |
+
else:
|
| 277 |
+
errors.extend(_validate_description(metadata["description"]))
|
| 278 |
+
|
| 279 |
+
if "compatibility" in metadata:
|
| 280 |
+
errors.extend(_validate_compatibility(metadata["compatibility"]))
|
| 281 |
+
|
| 282 |
+
return errors
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
# ---------------------------------------------------------------------------
|
| 286 |
+
# Skill Discovery and Info
|
| 287 |
+
# ---------------------------------------------------------------------------
|
| 288 |
+
|
| 289 |
+
def _read_skill_properties(skill_dir: Path) -> dict:
|
| 290 |
+
"""Read skill properties from SKILL.md frontmatter. Returns dict with metadata."""
|
| 291 |
+
skill_md = _find_skill_md(skill_dir)
|
| 292 |
+
if skill_md is None:
|
| 293 |
+
raise ParseError(f"SKILL.md not found in {skill_dir}")
|
| 294 |
+
|
| 295 |
+
content = skill_md.read_text(encoding="utf-8")
|
| 296 |
+
metadata, body = _parse_frontmatter(content)
|
| 297 |
+
|
| 298 |
+
if "name" not in metadata:
|
| 299 |
+
raise ValidationError("Missing required field: name")
|
| 300 |
+
if "description" not in metadata:
|
| 301 |
+
raise ValidationError("Missing required field: description")
|
| 302 |
+
|
| 303 |
+
return {
|
| 304 |
+
"name": metadata.get("name", "").strip(),
|
| 305 |
+
"description": metadata.get("description", "").strip(),
|
| 306 |
+
"license": metadata.get("license"),
|
| 307 |
+
"compatibility": metadata.get("compatibility"),
|
| 308 |
+
"allowed_tools": metadata.get("allowed-tools"),
|
| 309 |
+
"metadata": metadata.get("metadata", {}),
|
| 310 |
+
"location": str(skill_md),
|
| 311 |
+
"body": body,
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
|
| 315 |
+
def _discover_skills() -> list[dict]:
|
| 316 |
+
"""Discover all valid skills in the skills directory."""
|
| 317 |
+
skills_root = _get_skills_root()
|
| 318 |
+
|
| 319 |
+
if not skills_root.exists():
|
| 320 |
+
return []
|
| 321 |
+
|
| 322 |
+
skills = []
|
| 323 |
+
for item in sorted(skills_root.iterdir()):
|
| 324 |
+
if not item.is_dir():
|
| 325 |
+
continue
|
| 326 |
+
|
| 327 |
+
skill_md = _find_skill_md(item)
|
| 328 |
+
if skill_md is None:
|
| 329 |
+
continue
|
| 330 |
+
|
| 331 |
+
try:
|
| 332 |
+
props = _read_skill_properties(item)
|
| 333 |
+
skills.append({
|
| 334 |
+
"name": props["name"],
|
| 335 |
+
"description": props["description"],
|
| 336 |
+
"location": _display_path(str(skill_md)),
|
| 337 |
+
})
|
| 338 |
+
except Exception:
|
| 339 |
+
continue
|
| 340 |
+
|
| 341 |
+
return skills
|
| 342 |
+
|
| 343 |
+
|
| 344 |
+
def _get_skill_info(skill_name: str, offset: int = 0, max_chars: int = 0) -> dict:
|
| 345 |
+
"""Get full information for a specific skill."""
|
| 346 |
+
skills_root = _get_skills_root()
|
| 347 |
+
skill_dir = skills_root / skill_name
|
| 348 |
+
|
| 349 |
+
if not skill_dir.exists():
|
| 350 |
+
raise FileNotFoundError(f"Skill not found: {skill_name}")
|
| 351 |
+
|
| 352 |
+
skill_md = _find_skill_md(skill_dir)
|
| 353 |
+
if skill_md is None:
|
| 354 |
+
raise FileNotFoundError(f"SKILL.md not found in skill: {skill_name}")
|
| 355 |
+
|
| 356 |
+
content = skill_md.read_text(encoding="utf-8")
|
| 357 |
+
metadata, body = _parse_frontmatter(content)
|
| 358 |
+
|
| 359 |
+
total_chars = len(body)
|
| 360 |
+
start = max(0, min(offset, total_chars))
|
| 361 |
+
if max_chars > 0:
|
| 362 |
+
end = min(total_chars, start + max_chars)
|
| 363 |
+
else:
|
| 364 |
+
end = total_chars
|
| 365 |
+
|
| 366 |
+
body_chunk = body[start:end]
|
| 367 |
+
truncated = end < total_chars
|
| 368 |
+
next_cursor = end if truncated else None
|
| 369 |
+
|
| 370 |
+
return {
|
| 371 |
+
"name": metadata.get("name", "").strip(),
|
| 372 |
+
"description": metadata.get("description", "").strip(),
|
| 373 |
+
"license": metadata.get("license"),
|
| 374 |
+
"compatibility": metadata.get("compatibility"),
|
| 375 |
+
"allowed_tools": metadata.get("allowed-tools"),
|
| 376 |
+
"metadata": metadata.get("metadata", {}),
|
| 377 |
+
"location": _display_path(str(skill_md)),
|
| 378 |
+
"body": body_chunk,
|
| 379 |
+
"offset": start,
|
| 380 |
+
"total_chars": total_chars,
|
| 381 |
+
"truncated": truncated,
|
| 382 |
+
"next_cursor": next_cursor,
|
| 383 |
+
}
|
| 384 |
+
|
| 385 |
+
|
| 386 |
+
def _list_skill_resources(skill_name: str) -> dict:
|
| 387 |
+
"""List all resources within a skill directory.
|
| 388 |
+
|
| 389 |
+
Dynamically discovers all subdirectories, not just predefined ones.
|
| 390 |
+
"""
|
| 391 |
+
skills_root = _get_skills_root()
|
| 392 |
+
skill_dir = skills_root / skill_name
|
| 393 |
+
|
| 394 |
+
if not skill_dir.exists():
|
| 395 |
+
raise FileNotFoundError(f"Skill not found: {skill_name}")
|
| 396 |
+
|
| 397 |
+
resources = {
|
| 398 |
+
"skill": skill_name,
|
| 399 |
+
"directories": {}, # Dynamic: dirname -> file list
|
| 400 |
+
"other_files": [],
|
| 401 |
+
}
|
| 402 |
+
|
| 403 |
+
for item in sorted(skill_dir.iterdir()):
|
| 404 |
+
if item.name.lower() in ("skill.md",):
|
| 405 |
+
continue
|
| 406 |
+
|
| 407 |
+
if item.is_dir():
|
| 408 |
+
files = []
|
| 409 |
+
for f in sorted(item.rglob("*")):
|
| 410 |
+
if f.is_file():
|
| 411 |
+
files.append({
|
| 412 |
+
"path": f.relative_to(item).as_posix(),
|
| 413 |
+
"size": f.stat().st_size,
|
| 414 |
+
})
|
| 415 |
+
resources["directories"][item.name] = files
|
| 416 |
+
elif item.is_file():
|
| 417 |
+
resources["other_files"].append({
|
| 418 |
+
"path": item.name,
|
| 419 |
+
"size": item.stat().st_size,
|
| 420 |
+
})
|
| 421 |
+
|
| 422 |
+
return resources
|
| 423 |
+
|
| 424 |
+
|
| 425 |
+
def _read_skill_resource(skill_name: str, resource_path: str, offset: int = 0, max_chars: int = 3000) -> dict:
|
| 426 |
+
"""Read a specific resource file from a skill."""
|
| 427 |
+
skills_root = _get_skills_root()
|
| 428 |
+
skill_dir = skills_root / skill_name
|
| 429 |
+
|
| 430 |
+
if not skill_dir.exists():
|
| 431 |
+
raise FileNotFoundError(f"Skill not found: {skill_name}")
|
| 432 |
+
|
| 433 |
+
resource_file = skill_dir / resource_path
|
| 434 |
+
|
| 435 |
+
try:
|
| 436 |
+
resource_file.resolve().relative_to(skill_dir.resolve())
|
| 437 |
+
except ValueError:
|
| 438 |
+
raise PermissionError(f"Resource path escapes skill directory: {resource_path}")
|
| 439 |
+
|
| 440 |
+
if not resource_file.exists():
|
| 441 |
+
raise FileNotFoundError(f"Resource not found: {resource_path}")
|
| 442 |
+
|
| 443 |
+
if resource_file.is_dir():
|
| 444 |
+
raise IsADirectoryError(f"Path is a directory: {resource_path}")
|
| 445 |
+
|
| 446 |
+
content = resource_file.read_text(encoding="utf-8", errors="replace")
|
| 447 |
+
total_chars = len(content)
|
| 448 |
+
|
| 449 |
+
start = max(0, min(offset, total_chars))
|
| 450 |
+
if max_chars > 0:
|
| 451 |
+
end = min(total_chars, start + max_chars)
|
| 452 |
+
else:
|
| 453 |
+
end = total_chars
|
| 454 |
+
|
| 455 |
+
chunk = content[start:end]
|
| 456 |
+
truncated = end < total_chars
|
| 457 |
+
next_cursor = end if truncated else None
|
| 458 |
+
|
| 459 |
+
return {
|
| 460 |
+
"skill": skill_name,
|
| 461 |
+
"resource": resource_path,
|
| 462 |
+
"content": chunk,
|
| 463 |
+
"size": resource_file.stat().st_size,
|
| 464 |
+
"offset": start,
|
| 465 |
+
"total_chars": total_chars,
|
| 466 |
+
"truncated": truncated,
|
| 467 |
+
"next_cursor": next_cursor,
|
| 468 |
+
}
|
| 469 |
+
|
| 470 |
+
|
| 471 |
+
def _search_skills(query: str) -> list[dict]:
|
| 472 |
+
"""Search for skills by keyword in name or description."""
|
| 473 |
+
query_lower = query.lower()
|
| 474 |
+
all_skills = _discover_skills()
|
| 475 |
+
|
| 476 |
+
matches = []
|
| 477 |
+
for skill in all_skills:
|
| 478 |
+
name_match = query_lower in skill["name"].lower()
|
| 479 |
+
desc_match = query_lower in skill["description"].lower()
|
| 480 |
+
|
| 481 |
+
if name_match or desc_match:
|
| 482 |
+
matches.append({
|
| 483 |
+
**skill,
|
| 484 |
+
"match_in": "name" if name_match else "description",
|
| 485 |
+
})
|
| 486 |
+
|
| 487 |
+
return matches
|
| 488 |
+
|
| 489 |
+
|
| 490 |
+
# ---------------------------------------------------------------------------
|
| 491 |
+
# Human-Readable Output Formatters
|
| 492 |
+
# ---------------------------------------------------------------------------
|
| 493 |
+
|
| 494 |
+
def _format_discover(skills: list[dict]) -> str:
|
| 495 |
+
"""Format skill discovery results as human-readable text."""
|
| 496 |
+
skills_root = _display_path(str(_get_skills_root()))
|
| 497 |
+
lines = [
|
| 498 |
+
f"Available Skills",
|
| 499 |
+
f"Root: {skills_root}",
|
| 500 |
+
f"Total: {len(skills)} skills",
|
| 501 |
+
"",
|
| 502 |
+
]
|
| 503 |
+
|
| 504 |
+
if not skills:
|
| 505 |
+
lines.append("No skills found.")
|
| 506 |
+
else:
|
| 507 |
+
for i, skill in enumerate(skills, 1):
|
| 508 |
+
name = skill["name"]
|
| 509 |
+
desc = skill["description"]
|
| 510 |
+
# Truncate long descriptions
|
| 511 |
+
if len(desc) > 100:
|
| 512 |
+
desc = desc[:97] + "..."
|
| 513 |
+
lines.append(f"{i}. {name}")
|
| 514 |
+
lines.append(f" {desc}")
|
| 515 |
+
lines.append("")
|
| 516 |
+
|
| 517 |
+
return "\n".join(lines).strip()
|
| 518 |
+
|
| 519 |
+
|
| 520 |
+
def _format_skill_info(info: dict) -> str:
|
| 521 |
+
"""Format skill info as human-readable text."""
|
| 522 |
+
lines = [
|
| 523 |
+
f"Skill: {info['name']}",
|
| 524 |
+
f"Location: {info['location']}",
|
| 525 |
+
"",
|
| 526 |
+
f"Description: {info['description']}",
|
| 527 |
+
]
|
| 528 |
+
|
| 529 |
+
if info.get("license"):
|
| 530 |
+
lines.append(f"License: {info['license']}")
|
| 531 |
+
if info.get("compatibility"):
|
| 532 |
+
lines.append(f"Compatibility: {info['compatibility']}")
|
| 533 |
+
if info.get("allowed_tools"):
|
| 534 |
+
lines.append(f"Allowed Tools: {info['allowed_tools']}")
|
| 535 |
+
if info.get("metadata"):
|
| 536 |
+
meta_str = ", ".join(f"{k}={v}" for k, v in info["metadata"].items())
|
| 537 |
+
lines.append(f"Metadata: {meta_str}")
|
| 538 |
+
|
| 539 |
+
lines.append("")
|
| 540 |
+
lines.append("--- SKILL.md Body ---")
|
| 541 |
+
if info.get("offset", 0) > 0:
|
| 542 |
+
lines.append(f"(Showing content from offset {info['offset']})")
|
| 543 |
+
lines.append("")
|
| 544 |
+
lines.append(info["body"])
|
| 545 |
+
|
| 546 |
+
if info.get("truncated"):
|
| 547 |
+
lines.append("")
|
| 548 |
+
lines.append(f"… Truncated. Showing {len(info['body'])} chars (offset {info['offset']}). Total: {info['total_chars']}.")
|
| 549 |
+
lines.append(f"Next cursor: {info['next_cursor']}")
|
| 550 |
+
|
| 551 |
+
return "\n".join(lines)
|
| 552 |
+
|
| 553 |
+
|
| 554 |
+
def _format_resources_list(resources: dict) -> str:
|
| 555 |
+
"""Format resource listing as a visual filesystem tree with line connectors."""
|
| 556 |
+
from ._core import build_tree, render_tree
|
| 557 |
+
|
| 558 |
+
skill = resources["skill"]
|
| 559 |
+
lines = [
|
| 560 |
+
f"Resources for skill: {skill}",
|
| 561 |
+
"",
|
| 562 |
+
]
|
| 563 |
+
|
| 564 |
+
# Build entries list for the tree
|
| 565 |
+
entries: list[tuple[str, dict]] = []
|
| 566 |
+
|
| 567 |
+
# Add all discovered directories and their files
|
| 568 |
+
directories = resources.get("directories", {})
|
| 569 |
+
for dirname, files in directories.items():
|
| 570 |
+
for f in files:
|
| 571 |
+
path = f"{dirname}/{f['path']}"
|
| 572 |
+
entries.append((path, {"size": f["size"]}))
|
| 573 |
+
|
| 574 |
+
# Add root files
|
| 575 |
+
other = resources.get("other_files", [])
|
| 576 |
+
for f in other:
|
| 577 |
+
entries.append((f["path"], {"size": f["size"]}))
|
| 578 |
+
|
| 579 |
+
# Build and render the unified tree
|
| 580 |
+
tree = build_tree(entries)
|
| 581 |
+
|
| 582 |
+
# Count files
|
| 583 |
+
total_files = len(entries)
|
| 584 |
+
|
| 585 |
+
# Render with skill as root
|
| 586 |
+
lines.append(f"└── {skill}/")
|
| 587 |
+
lines.extend(render_tree(tree, " "))
|
| 588 |
+
|
| 589 |
+
lines.append("")
|
| 590 |
+
if total_files == 0:
|
| 591 |
+
lines.append("No resource files found.")
|
| 592 |
+
else:
|
| 593 |
+
lines.append(f"Total: {total_files} files")
|
| 594 |
+
|
| 595 |
+
return "\n".join(lines).strip()
|
| 596 |
+
|
| 597 |
+
|
| 598 |
+
def _format_resource_content(data: dict) -> str:
|
| 599 |
+
"""Format resource file content as human-readable text."""
|
| 600 |
+
lines = [
|
| 601 |
+
f"Resource: {data['resource']}",
|
| 602 |
+
f"Skill: {data['skill']}",
|
| 603 |
+
f"Size: {_fmt_size(data['size'])}",
|
| 604 |
+
]
|
| 605 |
+
|
| 606 |
+
offset = data.get("offset", 0)
|
| 607 |
+
lines.append(f"Showing: {len(data['content'])} of {data['total_chars']} chars (offset {offset})")
|
| 608 |
+
|
| 609 |
+
lines.append("")
|
| 610 |
+
lines.append("--- Content ---")
|
| 611 |
+
lines.append("")
|
| 612 |
+
lines.append(data["content"])
|
| 613 |
+
|
| 614 |
+
if data.get("truncated"):
|
| 615 |
+
lines.append("")
|
| 616 |
+
lines.append(f"… Truncated. Next cursor: {data['next_cursor']}")
|
| 617 |
+
|
| 618 |
+
return "\n".join(lines)
|
| 619 |
+
|
| 620 |
+
|
| 621 |
+
def _format_validation(skill_name: str, errors: list[str]) -> str:
|
| 622 |
+
"""Format validation results as human-readable text."""
|
| 623 |
+
if not errors:
|
| 624 |
+
return f"✓ Skill '{skill_name}' is valid."
|
| 625 |
+
|
| 626 |
+
lines = [
|
| 627 |
+
f"✗ Validation failed for skill '{skill_name}'",
|
| 628 |
+
f"Errors: {len(errors)}",
|
| 629 |
+
"",
|
| 630 |
+
]
|
| 631 |
+
|
| 632 |
+
for i, err in enumerate(errors, 1):
|
| 633 |
+
lines.append(f" {i}. {err}")
|
| 634 |
+
|
| 635 |
+
return "\n".join(lines)
|
| 636 |
+
|
| 637 |
+
|
| 638 |
+
def _format_search(query: str, matches: list[dict]) -> str:
|
| 639 |
+
"""Format search results as human-readable text."""
|
| 640 |
+
lines = [
|
| 641 |
+
f"Search results for: {query}",
|
| 642 |
+
f"Matches: {len(matches)}",
|
| 643 |
+
"",
|
| 644 |
+
]
|
| 645 |
+
|
| 646 |
+
if not matches:
|
| 647 |
+
lines.append("No matching skills found.")
|
| 648 |
+
else:
|
| 649 |
+
for i, m in enumerate(matches, 1):
|
| 650 |
+
name = m["name"]
|
| 651 |
+
desc = m["description"]
|
| 652 |
+
match_in = m.get("match_in", "")
|
| 653 |
+
if len(desc) > 80:
|
| 654 |
+
desc = desc[:77] + "..."
|
| 655 |
+
lines.append(f"{i}. {name} (matched in {match_in})")
|
| 656 |
+
lines.append(f" {desc}")
|
| 657 |
+
lines.append("")
|
| 658 |
+
|
| 659 |
+
return "\n".join(lines).strip()
|
| 660 |
+
|
| 661 |
+
|
| 662 |
+
def _format_error(message: str, hint: str = "") -> str:
|
| 663 |
+
"""Format error as human-readable text."""
|
| 664 |
+
lines = [f"Error: {message}"]
|
| 665 |
+
if hint:
|
| 666 |
+
lines.append(f"Hint: {hint}")
|
| 667 |
+
return "\n".join(lines)
|
| 668 |
+
|
| 669 |
+
|
| 670 |
+
# ---------------------------------------------------------------------------
|
| 671 |
+
# Main Tool Function
|
| 672 |
+
# ---------------------------------------------------------------------------
|
| 673 |
+
|
| 674 |
+
@autodoc(summary=TOOL_SUMMARY)
|
| 675 |
+
def Agent_Skills(
|
| 676 |
+
action: Annotated[str, "Operation: 'discover', 'info', 'resources', 'validate', 'search', 'help'."],
|
| 677 |
+
skill_name: Annotated[Optional[str], "Name of skill (required for info/resources/validate)."] = None,
|
| 678 |
+
resource_path: Annotated[Optional[str], "Path to resource file within skill (for resources action)."] = None,
|
| 679 |
+
query: Annotated[Optional[str], "Search query (for search action)."] = None,
|
| 680 |
+
max_chars: Annotated[int, "Max characters to return for skill body or resource content (0 = no limit)."] = 3000,
|
| 681 |
+
offset: Annotated[int, "Start offset for reading content (for info/resources)."] = 0,
|
| 682 |
+
) -> str:
|
| 683 |
+
_log_call_start("Agent_Skills", action=action, skill_name=skill_name, resource_path=resource_path, query=query, max_chars=max_chars, offset=offset)
|
| 684 |
+
|
| 685 |
+
action = (action or "").strip().lower()
|
| 686 |
+
|
| 687 |
+
if action not in {"discover", "info", "resources", "validate", "search", "help"}:
|
| 688 |
+
result = _format_error(
|
| 689 |
+
f"Invalid action: {action}",
|
| 690 |
+
"Choose from: discover, info, resources, validate, search, help."
|
| 691 |
+
)
|
| 692 |
+
_log_call_end("Agent_Skills", _truncate_for_log(result))
|
| 693 |
+
return result
|
| 694 |
+
|
| 695 |
+
try:
|
| 696 |
+
if action == "help":
|
| 697 |
+
result = HELP_TEXT
|
| 698 |
+
|
| 699 |
+
elif action == "discover":
|
| 700 |
+
skills = _discover_skills()
|
| 701 |
+
result = _format_discover(skills)
|
| 702 |
+
|
| 703 |
+
elif action == "info":
|
| 704 |
+
if not skill_name:
|
| 705 |
+
result = _format_error("skill_name is required for 'info' action.")
|
| 706 |
+
else:
|
| 707 |
+
info = _get_skill_info(skill_name.strip(), offset=offset, max_chars=max_chars)
|
| 708 |
+
result = _format_skill_info(info)
|
| 709 |
+
|
| 710 |
+
elif action == "resources":
|
| 711 |
+
if not skill_name:
|
| 712 |
+
result = _format_error("skill_name is required for 'resources' action.")
|
| 713 |
+
elif resource_path:
|
| 714 |
+
resource_data = _read_skill_resource(skill_name.strip(), resource_path.strip(), offset=offset, max_chars=max_chars)
|
| 715 |
+
result = _format_resource_content(resource_data)
|
| 716 |
+
else:
|
| 717 |
+
resources = _list_skill_resources(skill_name.strip())
|
| 718 |
+
result = _format_resources_list(resources)
|
| 719 |
+
|
| 720 |
+
elif action == "validate":
|
| 721 |
+
if not skill_name:
|
| 722 |
+
result = _format_error("skill_name is required for 'validate' action.")
|
| 723 |
+
else:
|
| 724 |
+
skills_root = _get_skills_root()
|
| 725 |
+
skill_dir = skills_root / skill_name.strip()
|
| 726 |
+
errors = _validate_skill(skill_dir)
|
| 727 |
+
result = _format_validation(skill_name, errors)
|
| 728 |
+
|
| 729 |
+
elif action == "search":
|
| 730 |
+
if not query:
|
| 731 |
+
result = _format_error("query is required for 'search' action.")
|
| 732 |
+
else:
|
| 733 |
+
matches = _search_skills(query.strip())
|
| 734 |
+
result = _format_search(query, matches)
|
| 735 |
+
|
| 736 |
+
else:
|
| 737 |
+
result = _format_error(f"Action '{action}' not implemented.")
|
| 738 |
+
|
| 739 |
+
except FileNotFoundError as e:
|
| 740 |
+
result = _format_error(str(e))
|
| 741 |
+
except PermissionError as e:
|
| 742 |
+
result = _format_error(str(e))
|
| 743 |
+
except ParseError as e:
|
| 744 |
+
result = _format_error(str(e))
|
| 745 |
+
except ValidationError as e:
|
| 746 |
+
result = _format_error(str(e))
|
| 747 |
+
except Exception as e:
|
| 748 |
+
result = _format_error(f"Unexpected error: {e}")
|
| 749 |
+
|
| 750 |
+
_log_call_end("Agent_Skills", _truncate_for_log(result))
|
| 751 |
+
return result
|
| 752 |
+
|
| 753 |
+
|
| 754 |
+
# ---------------------------------------------------------------------------
|
| 755 |
+
# Gradio Interface
|
| 756 |
+
# ---------------------------------------------------------------------------
|
| 757 |
+
|
| 758 |
+
def build_interface() -> gr.Interface:
|
| 759 |
+
return gr.Interface(
|
| 760 |
+
fn=Agent_Skills,
|
| 761 |
+
inputs=[
|
| 762 |
+
gr.Radio(
|
| 763 |
+
label="Action",
|
| 764 |
+
choices=["discover", "info", "resources", "validate", "search", "help"],
|
| 765 |
+
value="help",
|
| 766 |
+
info="Operation to perform",
|
| 767 |
+
),
|
| 768 |
+
gr.Textbox(label="Skill Name", placeholder="pdf", max_lines=1, info="Name of the skill"),
|
| 769 |
+
gr.Textbox(label="Resource Path", placeholder="references/forms.md", max_lines=1, info="Path to resource within skill"),
|
| 770 |
+
gr.Textbox(label="Search Query", placeholder="MCP", max_lines=1, info="Keyword to search for"),
|
| 771 |
+
gr.Slider(minimum=0, maximum=100000, step=500, value=3000, label="Max Chars", info="Max characters for content (0 = no limit)"),
|
| 772 |
+
gr.Slider(minimum=0, maximum=1_000_000, step=100, value=0, label="Offset", info="Start offset (Info/Resources)"),
|
| 773 |
+
],
|
| 774 |
+
outputs=gr.Textbox(label="Result", lines=20),
|
| 775 |
+
title="Agent Skills",
|
| 776 |
+
description=(
|
| 777 |
+
"<div style=\"text-align:center; overflow:hidden;\">"
|
| 778 |
+
"Discover, inspect, and access Agent Skills. "
|
| 779 |
+
"Skills provide structured instructions and resources for specialized tasks."
|
| 780 |
+
"</div>"
|
| 781 |
+
),
|
| 782 |
+
api_description=TOOL_SUMMARY,
|
| 783 |
+
flagging_mode="never",
|
| 784 |
+
submit_btn="Run",
|
| 785 |
+
)
|
| 786 |
+
|
| 787 |
+
|
| 788 |
+
__all__ = ["Agent_Skills", "build_interface"]
|
Modules/Agent_Terminal.py
CHANGED
|
@@ -1,159 +1,135 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
import sys
|
| 4 |
import types
|
| 5 |
import inspect
|
| 6 |
import functools
|
| 7 |
from typing import Annotated, get_type_hints, get_origin, get_args
|
| 8 |
-
|
| 9 |
-
import gradio as gr
|
| 10 |
-
from ._docstrings import autodoc
|
| 11 |
-
from ._core import sandboxed_exec
|
| 12 |
-
|
| 13 |
-
from app import _log_call_end, _log_call_start, _truncate_for_log
|
| 14 |
-
|
| 15 |
# NOTE: Tool imports are deferred to _get_tools_map() to avoid circular imports
|
| 16 |
-
#
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
# Example usages for each tool - simple and advanced
|
| 20 |
_TOOL_EXAMPLES = {
|
| 21 |
-
"Web_Fetch": (
|
| 22 |
-
'Web_Fetch(url="https://example.com")',
|
| 23 |
-
'Web_Fetch(url="https://example.com", max_chars=5000, mode="url_scraper")',
|
| 24 |
-
),
|
| 25 |
"Web_Search": (
|
| 26 |
'Web_Search(query="Python tutorials")',
|
| 27 |
'Web_Search(query="AI news", max_results=10, search_type="news", date_filter="week")',
|
| 28 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
"Code_Interpreter": (
|
| 30 |
'Code_Interpreter(code="print(2 + 2)")',
|
| 31 |
'Code_Interpreter(code="import math; print(math.pi)", timeout=60)',
|
| 32 |
-
),
|
| 33 |
"Shell_Command": (
|
| 34 |
'Shell_Command(command="echo Hello")',
|
| 35 |
'Shell_Command(command="ls -la", timeout=30)',
|
| 36 |
),
|
| 37 |
-
"File_System": (
|
| 38 |
-
'File_System(action="list", path="/")',
|
| 39 |
-
'File_System(action="edit", path="/script.py", content="<<<<<<< SEARCH\\nold_text\\n=======\\nnew_text\\n>>>>>>> REPLACE")',
|
| 40 |
-
),
|
| 41 |
-
"Obsidian_Vault": (
|
| 42 |
-
'Obsidian_Vault(action="list", path="/")',
|
| 43 |
-
'Obsidian_Vault(action="search", query="meeting notes", recursive=True)',
|
| 44 |
-
),
|
| 45 |
"Memory_Manager": (
|
| 46 |
'Memory_Manager(action="list")',
|
| 47 |
'Memory_Manager(action="save", text="Remember this fact", tags="important, facts")',
|
| 48 |
),
|
| 49 |
-
"Generate_Speech": (
|
| 50 |
-
'Generate_Speech(text="Hello, world!")',
|
| 51 |
-
'Generate_Speech(text="Welcome to the demo", voice="af_heart", speed=1.2)',
|
| 52 |
-
),
|
| 53 |
"Generate_Image": (
|
| 54 |
'Generate_Image(prompt="A sunset over mountains")',
|
| 55 |
'Generate_Image(prompt="A cyberpunk city", steps=50, cfg_scale=9.0, width=1024, height=768)',
|
| 56 |
),
|
| 57 |
-
"Generate_Video": (
|
| 58 |
-
'Generate_Video(prompt="A cat playing piano")',
|
| 59 |
-
'Generate_Video(prompt="Ocean waves", duration=5, aspect_ratio="16:9")',
|
| 60 |
-
),
|
| 61 |
-
"Deep_Research": (
|
| 62 |
-
'Deep_Research(query="Climate change effects")',
|
| 63 |
-
'Deep_Research(query="Quantum computing advances", max_sources=10, search_type="news")',
|
| 64 |
-
),
|
| 65 |
-
"Agent_Skills": (
|
| 66 |
-
'Agent_Skills(action="discover")',
|
| 67 |
-
'Agent_Skills(action="info", skill_name="pdf")',
|
| 68 |
-
),
|
| 69 |
}
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
def _format_tool_usage(func) -> str:
|
| 73 |
-
"""Generate detailed usage information for a tool function."""
|
| 74 |
-
name = func.__name__
|
| 75 |
-
doc = func.__doc__ or "No description available."
|
| 76 |
-
|
| 77 |
-
# Extract just the summary (first paragraph) - skip Args/Returns sections
|
| 78 |
-
# since we generate our own detailed parameter list
|
| 79 |
-
doc_lines = doc.strip().split('\n')
|
| 80 |
-
summary_lines = []
|
| 81 |
-
for line in doc_lines:
|
| 82 |
-
stripped = line.strip().lower()
|
| 83 |
-
# Stop at Args:, Returns:, Parameters:, etc.
|
| 84 |
-
if stripped.startswith(('args:', 'returns:', 'parameters:', 'raises:', 'example:', 'note:', 'notes:')):
|
| 85 |
-
break
|
| 86 |
-
summary_lines.append(line)
|
| 87 |
-
summary = '\n'.join(summary_lines).strip()
|
| 88 |
-
|
| 89 |
-
# Get the signature
|
| 90 |
-
sig = inspect.signature(func)
|
| 91 |
-
|
| 92 |
-
# Try to get type hints
|
| 93 |
-
try:
|
| 94 |
-
hints = get_type_hints(func, include_extras=True)
|
| 95 |
-
except Exception:
|
| 96 |
-
hints = {}
|
| 97 |
-
|
| 98 |
-
lines = [f"=== {name} ===", "", summary, "", "Parameters:"]
|
| 99 |
-
|
| 100 |
-
for param_name, param in sig.parameters.items():
|
| 101 |
-
if param_name in ("self", "cls"):
|
| 102 |
-
continue
|
| 103 |
-
|
| 104 |
-
# Get type and description from Annotated if available
|
| 105 |
-
hint = hints.get(param_name)
|
| 106 |
-
type_str = "any"
|
| 107 |
-
desc = ""
|
| 108 |
-
|
| 109 |
-
if hint is not None:
|
| 110 |
-
if get_origin(hint) is Annotated:
|
| 111 |
-
args = get_args(hint)
|
| 112 |
-
if args:
|
| 113 |
-
type_str = getattr(args[0], "__name__", str(args[0]))
|
| 114 |
-
if len(args) > 1 and isinstance(args[1], str):
|
| 115 |
-
desc = args[1]
|
| 116 |
-
else:
|
| 117 |
-
type_str = getattr(hint, "__name__", str(hint))
|
| 118 |
-
|
| 119 |
-
# Check for default
|
| 120 |
-
if param.default is not inspect.Parameter.empty:
|
| 121 |
-
default_repr = repr(param.default)
|
| 122 |
-
if len(default_repr) > 50:
|
| 123 |
-
default_repr = default_repr[:47] + "..."
|
| 124 |
-
default_str = f" = {default_repr}"
|
| 125 |
-
else:
|
| 126 |
-
default_str = " (required)"
|
| 127 |
-
|
| 128 |
-
lines.append(f" - {param_name}: {type_str}{default_str}")
|
| 129 |
-
if desc:
|
| 130 |
-
lines.append(f" {desc}")
|
| 131 |
-
|
| 132 |
-
# Add examples
|
| 133 |
-
lines.append("")
|
| 134 |
-
lines.append("Examples:")
|
| 135 |
-
if name in _TOOL_EXAMPLES:
|
| 136 |
-
simple, advanced = _TOOL_EXAMPLES[name]
|
| 137 |
-
lines.append(f" {simple}")
|
| 138 |
-
lines.append(f" {advanced}")
|
| 139 |
-
else:
|
| 140 |
-
lines.append(f" {name}(...)")
|
| 141 |
-
|
| 142 |
-
return "\n".join(lines)
|
| 143 |
-
|
| 144 |
-
|
| 145 |
def _wrap_tool_for_no_arg_usage(func):
|
| 146 |
-
"""
|
| 147 |
-
Wrap a tool function so that calling it with no arguments
|
| 148 |
-
returns usage information instead of raising an error.
|
| 149 |
-
"""
|
| 150 |
-
@functools.wraps(func)
|
| 151 |
-
def wrapper(*args, **kwargs):
|
| 152 |
-
# If called with no arguments, return usage info
|
| 153 |
-
if not args and not kwargs:
|
| 154 |
-
return _format_tool_usage(func)
|
| 155 |
-
return func(*args, **kwargs)
|
| 156 |
-
|
| 157 |
# Preserve the original function for introspection
|
| 158 |
wrapper._original_func = func
|
| 159 |
return wrapper
|
|
@@ -164,140 +140,126 @@ def _get_tools_map():
|
|
| 164 |
Imports are done here (lazily) to avoid circular imports when app.py loads Agent_Terminal.
|
| 165 |
"""
|
| 166 |
# Lazy imports to avoid circular import during app startup
|
| 167 |
-
from .File_System import File_System
|
| 168 |
-
from .Web_Fetch import Web_Fetch
|
| 169 |
from .Web_Search import Web_Search
|
|
|
|
| 170 |
from .Memory_Manager import Memory_Manager
|
| 171 |
-
from .Generate_Speech import Generate_Speech, List_Kokoro_Voices
|
| 172 |
from .Generate_Image import Generate_Image
|
| 173 |
-
from .Generate_Video import Generate_Video
|
| 174 |
-
from .Deep_Research import Deep_Research
|
| 175 |
-
from .Obsidian_Vault import Obsidian_Vault
|
| 176 |
from .Shell_Command import Shell_Command
|
| 177 |
from .Code_Interpreter import Code_Interpreter
|
| 178 |
-
from .Agent_Skills import Agent_Skills
|
| 179 |
-
|
| 180 |
raw_tools = {
|
| 181 |
-
"Web_Fetch": Web_Fetch,
|
| 182 |
"Web_Search": Web_Search,
|
|
|
|
| 183 |
"Memory_Manager": Memory_Manager,
|
| 184 |
-
"Generate_Speech": Generate_Speech,
|
| 185 |
-
"List_Kokoro_Voices": List_Kokoro_Voices,
|
| 186 |
"Generate_Image": Generate_Image,
|
| 187 |
-
"Generate_Video": Generate_Video,
|
| 188 |
-
"Deep_Research": Deep_Research,
|
| 189 |
-
"File_System": File_System,
|
| 190 |
-
"Obsidian_Vault": Obsidian_Vault,
|
| 191 |
"Shell_Command": Shell_Command,
|
| 192 |
"Code_Interpreter": Code_Interpreter,
|
| 193 |
-
"Agent_Skills": Agent_Skills,
|
| 194 |
}
|
| 195 |
return {name: _wrap_tool_for_no_arg_usage(func) for name, func in raw_tools.items()}
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
def search_tools(query: str) -> str:
|
| 200 |
-
"""Search for tools by name or description. Returns usage info for matches."""
|
| 201 |
-
query = query.lower()
|
| 202 |
-
matches = []
|
| 203 |
-
tools = _get_tools_map()
|
| 204 |
-
for name, func in tools.items():
|
| 205 |
-
# Get original function for docstring if wrapped
|
| 206 |
-
original = getattr(func, '_original_func', func)
|
| 207 |
-
doc = (original.__doc__ or "").lower()
|
| 208 |
-
if query in name.lower() or query in doc:
|
| 209 |
-
matches.append((name, func))
|
| 210 |
-
|
| 211 |
-
if not matches:
|
| 212 |
-
return f"No tools found matching '{query}'."
|
| 213 |
-
|
| 214 |
-
output = []
|
| 215 |
-
for name, func in matches:
|
| 216 |
-
output.append(_format_tool_usage(getattr(func, '_original_func', func)))
|
| 217 |
-
output.append("")
|
| 218 |
-
return "\n".join(output)
|
| 219 |
-
|
| 220 |
-
def _initialize_mock_modules():
|
| 221 |
-
"""
|
| 222 |
-
Registers a mock 'functions' module in sys.modules so that LLMs
|
| 223 |
-
can do 'from functions import ...' without error.
|
| 224 |
-
Uses wrapped tools that return usage info when called with no args.
|
| 225 |
-
"""
|
| 226 |
-
mock_module = types.ModuleType("functions")
|
| 227 |
-
|
| 228 |
-
# Add wrapped tools (return usage when called with no args)
|
| 229 |
-
for name, tool in _get_tools_map().items():
|
| 230 |
-
setattr(mock_module, name, tool)
|
| 231 |
-
|
| 232 |
-
# Add helpers
|
| 233 |
-
helpers = {
|
| 234 |
-
"search_tools": search_tools,
|
| 235 |
-
}
|
| 236 |
-
for name, func in helpers.items():
|
| 237 |
-
setattr(mock_module, name, func)
|
| 238 |
-
|
| 239 |
-
sys.modules["functions"] = mock_module
|
| 240 |
-
|
| 241 |
-
# Defer initialization until first use to avoid circular imports during app startup
|
| 242 |
-
_mock_modules_initialized = False
|
| 243 |
-
|
| 244 |
-
def _ensure_mock_modules():
|
| 245 |
-
"""Initialize mock modules on first use (deferred to avoid circular imports)."""
|
| 246 |
-
global _mock_modules_initialized
|
| 247 |
-
if not _mock_modules_initialized:
|
| 248 |
-
_initialize_mock_modules()
|
| 249 |
-
_mock_modules_initialized = True
|
| 250 |
-
|
| 251 |
TOOL_SUMMARY = (
|
| 252 |
"Executes Python code as the unified interface for the entire tools ecosystem. "
|
| 253 |
"Use Agent Terminal repeatedly whenever you need to chain or combine tool operations. Input must be JSON that will be executed in Python. "
|
| 254 |
-
"Available tools: `
|
| 255 |
)
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
@autodoc(
|
| 260 |
-
summary=TOOL_SUMMARY,
|
| 261 |
-
)
|
| 262 |
-
def Agent_Terminal(input: Annotated[str, (
|
| 263 |
-
"Python source code to run; stdout is captured and returned. "
|
| 264 |
-
"Use `search_tools(`query`)` to search tools by name or capability, returns tool definitions and examples. "
|
| 265 |
-
"Call any tool with no arguments to get its full usage info (e.g., `Generate_Image()`)."
|
| 266 |
-
)]) -> str:
|
| 267 |
-
# Initialize mock modules on first call (deferred to avoid circular imports)
|
| 268 |
-
_ensure_mock_modules()
|
| 269 |
-
|
| 270 |
-
_log_call_start("Agent_Terminal", input=_truncate_for_log(input or "", 300))
|
| 271 |
-
if input is None:
|
| 272 |
-
result = "No code provided."
|
| 273 |
-
_log_call_end("Agent_Terminal", result)
|
| 274 |
-
return result
|
| 275 |
-
|
| 276 |
-
# Get wrapped tools that return usage info when called with no args
|
| 277 |
-
wrapped_tools = _get_tools_map()
|
| 278 |
-
|
| 279 |
-
# Build tools environment to inject
|
| 280 |
-
tools_env = {
|
| 281 |
-
**wrapped_tools,
|
| 282 |
-
"search_tools": search_tools,
|
| 283 |
-
}
|
| 284 |
-
|
| 285 |
-
# Execute with AST mode to print all expression results
|
| 286 |
-
result = sandboxed_exec(input, extra_globals=tools_env, ast_mode=True)
|
| 287 |
-
_log_call_end("Agent_Terminal", _truncate_for_log(result))
|
| 288 |
-
return result
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
def build_interface() -> gr.Interface:
|
| 292 |
-
return gr.Interface(
|
| 293 |
-
fn=Agent_Terminal,
|
| 294 |
-
inputs=gr.Code(label="Python Code", language="python"),
|
| 295 |
-
outputs=gr.Textbox(label="Output", lines=5, max_lines=20),
|
| 296 |
-
title="Agent Terminal",
|
| 297 |
-
description="<div style=\"text-align:center\">Interact with all other tools via a Python API. Reduces token usage by 90%.</div>",
|
| 298 |
-
api_description=TOOL_SUMMARY,
|
| 299 |
-
flagging_mode="never",
|
| 300 |
-
)
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
__all__ = ["Agent_Terminal", "build_interface"]
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
import sys
|
| 4 |
import types
|
| 5 |
import inspect
|
| 6 |
import functools
|
| 7 |
from typing import Annotated, get_type_hints, get_origin, get_args
|
| 8 |
+
|
| 9 |
+
import gradio as gr
|
| 10 |
+
from ._docstrings import autodoc
|
| 11 |
+
from ._core import sandboxed_exec
|
| 12 |
+
|
| 13 |
+
from app import _log_call_end, _log_call_start, _truncate_for_log
|
| 14 |
+
|
| 15 |
# NOTE: Tool imports are deferred to _get_tools_map() to avoid circular imports
|
| 16 |
+
# during app startup.
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
# Example usages for each tool - simple and advanced
|
| 20 |
_TOOL_EXAMPLES = {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
"Web_Search": (
|
| 22 |
'Web_Search(query="Python tutorials")',
|
| 23 |
'Web_Search(query="AI news", max_results=10, search_type="news", date_filter="week")',
|
| 24 |
),
|
| 25 |
+
"ScrapeGraphAI": (
|
| 26 |
+
'ScrapeGraphAI(action="extract", url="https://example.com", prompt="Extract the main offer")',
|
| 27 |
+
'ScrapeGraphAI(action="multi_extract", urls=["https://example.com/a", "https://example.com/b"], prompt="Compare pricing", schema_json={"type":"object","properties":{"plans":{"type":"array","items":{"type":"string"}}}})',
|
| 28 |
+
),
|
| 29 |
"Code_Interpreter": (
|
| 30 |
'Code_Interpreter(code="print(2 + 2)")',
|
| 31 |
'Code_Interpreter(code="import math; print(math.pi)", timeout=60)',
|
| 32 |
+
),
|
| 33 |
"Shell_Command": (
|
| 34 |
'Shell_Command(command="echo Hello")',
|
| 35 |
'Shell_Command(command="ls -la", timeout=30)',
|
| 36 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
"Memory_Manager": (
|
| 38 |
'Memory_Manager(action="list")',
|
| 39 |
'Memory_Manager(action="save", text="Remember this fact", tags="important, facts")',
|
| 40 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
"Generate_Image": (
|
| 42 |
'Generate_Image(prompt="A sunset over mountains")',
|
| 43 |
'Generate_Image(prompt="A cyberpunk city", steps=50, cfg_scale=9.0, width=1024, height=768)',
|
| 44 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
}
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def _format_tool_usage(func) -> str:
|
| 49 |
+
"""Generate detailed usage information for a tool function."""
|
| 50 |
+
name = func.__name__
|
| 51 |
+
doc = func.__doc__ or "No description available."
|
| 52 |
+
|
| 53 |
+
# Extract just the summary (first paragraph) - skip Args/Returns sections
|
| 54 |
+
# since we generate our own detailed parameter list
|
| 55 |
+
doc_lines = doc.strip().split('\n')
|
| 56 |
+
summary_lines = []
|
| 57 |
+
for line in doc_lines:
|
| 58 |
+
stripped = line.strip().lower()
|
| 59 |
+
# Stop at Args:, Returns:, Parameters:, etc.
|
| 60 |
+
if stripped.startswith(('args:', 'returns:', 'parameters:', 'raises:', 'example:', 'note:', 'notes:')):
|
| 61 |
+
break
|
| 62 |
+
summary_lines.append(line)
|
| 63 |
+
summary = '\n'.join(summary_lines).strip()
|
| 64 |
+
|
| 65 |
+
# Get the signature
|
| 66 |
+
sig = inspect.signature(func)
|
| 67 |
+
|
| 68 |
+
# Try to get type hints
|
| 69 |
+
try:
|
| 70 |
+
hints = get_type_hints(func, include_extras=True)
|
| 71 |
+
except Exception:
|
| 72 |
+
hints = {}
|
| 73 |
+
|
| 74 |
+
lines = [f"=== {name} ===", "", summary, "", "Parameters:"]
|
| 75 |
+
|
| 76 |
+
for param_name, param in sig.parameters.items():
|
| 77 |
+
if param_name in ("self", "cls"):
|
| 78 |
+
continue
|
| 79 |
+
|
| 80 |
+
# Get type and description from Annotated if available
|
| 81 |
+
hint = hints.get(param_name)
|
| 82 |
+
type_str = "any"
|
| 83 |
+
desc = ""
|
| 84 |
+
|
| 85 |
+
if hint is not None:
|
| 86 |
+
if get_origin(hint) is Annotated:
|
| 87 |
+
args = get_args(hint)
|
| 88 |
+
if args:
|
| 89 |
+
type_str = getattr(args[0], "__name__", str(args[0]))
|
| 90 |
+
if len(args) > 1 and isinstance(args[1], str):
|
| 91 |
+
desc = args[1]
|
| 92 |
+
else:
|
| 93 |
+
type_str = getattr(hint, "__name__", str(hint))
|
| 94 |
+
|
| 95 |
+
# Check for default
|
| 96 |
+
if param.default is not inspect.Parameter.empty:
|
| 97 |
+
default_repr = repr(param.default)
|
| 98 |
+
if len(default_repr) > 50:
|
| 99 |
+
default_repr = default_repr[:47] + "..."
|
| 100 |
+
default_str = f" = {default_repr}"
|
| 101 |
+
else:
|
| 102 |
+
default_str = " (required)"
|
| 103 |
+
|
| 104 |
+
lines.append(f" - {param_name}: {type_str}{default_str}")
|
| 105 |
+
if desc:
|
| 106 |
+
lines.append(f" {desc}")
|
| 107 |
+
|
| 108 |
+
# Add examples
|
| 109 |
+
lines.append("")
|
| 110 |
+
lines.append("Examples:")
|
| 111 |
+
if name in _TOOL_EXAMPLES:
|
| 112 |
+
simple, advanced = _TOOL_EXAMPLES[name]
|
| 113 |
+
lines.append(f" {simple}")
|
| 114 |
+
lines.append(f" {advanced}")
|
| 115 |
+
else:
|
| 116 |
+
lines.append(f" {name}(...)")
|
| 117 |
+
|
| 118 |
+
return "\n".join(lines)
|
| 119 |
+
|
| 120 |
+
|
| 121 |
def _wrap_tool_for_no_arg_usage(func):
|
| 122 |
+
"""
|
| 123 |
+
Wrap a tool function so that calling it with no arguments
|
| 124 |
+
returns usage information instead of raising an error.
|
| 125 |
+
"""
|
| 126 |
+
@functools.wraps(func)
|
| 127 |
+
def wrapper(*args, **kwargs):
|
| 128 |
+
# If called with no arguments, return usage info
|
| 129 |
+
if not args and not kwargs:
|
| 130 |
+
return _format_tool_usage(func)
|
| 131 |
+
return func(*args, **kwargs)
|
| 132 |
+
|
| 133 |
# Preserve the original function for introspection
|
| 134 |
wrapper._original_func = func
|
| 135 |
return wrapper
|
|
|
|
| 140 |
Imports are done here (lazily) to avoid circular imports when app.py loads Agent_Terminal.
|
| 141 |
"""
|
| 142 |
# Lazy imports to avoid circular import during app startup
|
|
|
|
|
|
|
| 143 |
from .Web_Search import Web_Search
|
| 144 |
+
from .ScrapeGraphAI import ScrapeGraphAI
|
| 145 |
from .Memory_Manager import Memory_Manager
|
|
|
|
| 146 |
from .Generate_Image import Generate_Image
|
|
|
|
|
|
|
|
|
|
| 147 |
from .Shell_Command import Shell_Command
|
| 148 |
from .Code_Interpreter import Code_Interpreter
|
|
|
|
|
|
|
| 149 |
raw_tools = {
|
|
|
|
| 150 |
"Web_Search": Web_Search,
|
| 151 |
+
"ScrapeGraphAI": ScrapeGraphAI,
|
| 152 |
"Memory_Manager": Memory_Manager,
|
|
|
|
|
|
|
| 153 |
"Generate_Image": Generate_Image,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
"Shell_Command": Shell_Command,
|
| 155 |
"Code_Interpreter": Code_Interpreter,
|
|
|
|
| 156 |
}
|
| 157 |
return {name: _wrap_tool_for_no_arg_usage(func) for name, func in raw_tools.items()}
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def search_tools(query: str) -> str:
|
| 162 |
+
"""Search for tools by name or description. Returns usage info for matches."""
|
| 163 |
+
query = query.lower()
|
| 164 |
+
matches = []
|
| 165 |
+
tools = _get_tools_map()
|
| 166 |
+
for name, func in tools.items():
|
| 167 |
+
# Get original function for docstring if wrapped
|
| 168 |
+
original = getattr(func, '_original_func', func)
|
| 169 |
+
doc = (original.__doc__ or "").lower()
|
| 170 |
+
if query in name.lower() or query in doc:
|
| 171 |
+
matches.append((name, func))
|
| 172 |
+
|
| 173 |
+
if not matches:
|
| 174 |
+
return f"No tools found matching '{query}'."
|
| 175 |
+
|
| 176 |
+
output = []
|
| 177 |
+
for name, func in matches:
|
| 178 |
+
output.append(_format_tool_usage(getattr(func, '_original_func', func)))
|
| 179 |
+
output.append("")
|
| 180 |
+
return "\n".join(output)
|
| 181 |
+
|
| 182 |
+
def _initialize_mock_modules():
|
| 183 |
+
"""
|
| 184 |
+
Registers a mock 'functions' module in sys.modules so that LLMs
|
| 185 |
+
can do 'from functions import ...' without error.
|
| 186 |
+
Uses wrapped tools that return usage info when called with no args.
|
| 187 |
+
"""
|
| 188 |
+
mock_module = types.ModuleType("functions")
|
| 189 |
+
|
| 190 |
+
# Add wrapped tools (return usage when called with no args)
|
| 191 |
+
for name, tool in _get_tools_map().items():
|
| 192 |
+
setattr(mock_module, name, tool)
|
| 193 |
+
|
| 194 |
+
# Add helpers
|
| 195 |
+
helpers = {
|
| 196 |
+
"search_tools": search_tools,
|
| 197 |
+
}
|
| 198 |
+
for name, func in helpers.items():
|
| 199 |
+
setattr(mock_module, name, func)
|
| 200 |
+
|
| 201 |
+
sys.modules["functions"] = mock_module
|
| 202 |
+
|
| 203 |
+
# Defer initialization until first use to avoid circular imports during app startup
|
| 204 |
+
_mock_modules_initialized = False
|
| 205 |
+
|
| 206 |
+
def _ensure_mock_modules():
|
| 207 |
+
"""Initialize mock modules on first use (deferred to avoid circular imports)."""
|
| 208 |
+
global _mock_modules_initialized
|
| 209 |
+
if not _mock_modules_initialized:
|
| 210 |
+
_initialize_mock_modules()
|
| 211 |
+
_mock_modules_initialized = True
|
| 212 |
+
|
| 213 |
TOOL_SUMMARY = (
|
| 214 |
"Executes Python code as the unified interface for the entire tools ecosystem. "
|
| 215 |
"Use Agent Terminal repeatedly whenever you need to chain or combine tool operations. Input must be JSON that will be executed in Python. "
|
| 216 |
+
"Available tools: `Web_Search`, `ScrapeGraphAI`, `Code_Interpreter`, `Shell_Command`, `Memory_Manager`, `Generate_Image`."
|
| 217 |
)
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
@autodoc(
|
| 222 |
+
summary=TOOL_SUMMARY,
|
| 223 |
+
)
|
| 224 |
+
def Agent_Terminal(input: Annotated[str, (
|
| 225 |
+
"Python source code to run; stdout is captured and returned. "
|
| 226 |
+
"Use `search_tools(`query`)` to search tools by name or capability, returns tool definitions and examples. "
|
| 227 |
+
"Call any tool with no arguments to get its full usage info (e.g., `Generate_Image()`)."
|
| 228 |
+
)]) -> str:
|
| 229 |
+
# Initialize mock modules on first call (deferred to avoid circular imports)
|
| 230 |
+
_ensure_mock_modules()
|
| 231 |
+
|
| 232 |
+
_log_call_start("Agent_Terminal", input=_truncate_for_log(input or "", 300))
|
| 233 |
+
if input is None:
|
| 234 |
+
result = "No code provided."
|
| 235 |
+
_log_call_end("Agent_Terminal", result)
|
| 236 |
+
return result
|
| 237 |
+
|
| 238 |
+
# Get wrapped tools that return usage info when called with no args
|
| 239 |
+
wrapped_tools = _get_tools_map()
|
| 240 |
+
|
| 241 |
+
# Build tools environment to inject
|
| 242 |
+
tools_env = {
|
| 243 |
+
**wrapped_tools,
|
| 244 |
+
"search_tools": search_tools,
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
# Execute with AST mode to print all expression results
|
| 248 |
+
result = sandboxed_exec(input, extra_globals=tools_env, ast_mode=True)
|
| 249 |
+
_log_call_end("Agent_Terminal", _truncate_for_log(result))
|
| 250 |
+
return result
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
def build_interface() -> gr.Interface:
|
| 254 |
+
return gr.Interface(
|
| 255 |
+
fn=Agent_Terminal,
|
| 256 |
+
inputs=gr.Code(label="Python Code", language="python"),
|
| 257 |
+
outputs=gr.Textbox(label="Output", lines=5, max_lines=20),
|
| 258 |
+
title="Agent Terminal",
|
| 259 |
+
description="<div style=\"text-align:center\">Interact with all other tools via a Python API. Reduces token usage by 90%.</div>",
|
| 260 |
+
api_description=TOOL_SUMMARY,
|
| 261 |
+
flagging_mode="never",
|
| 262 |
+
)
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
__all__ = ["Agent_Terminal", "build_interface"]
|
Modules/Code_Interpreter.py
CHANGED
|
@@ -1,40 +1,40 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
from typing import Annotated
|
| 4 |
-
|
| 5 |
-
import gradio as gr
|
| 6 |
-
from ._docstrings import autodoc
|
| 7 |
-
from ._core import sandboxed_exec
|
| 8 |
-
|
| 9 |
-
from app import _log_call_end, _log_call_start, _truncate_for_log
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
# Single source of truth for the LLM-facing tool description
|
| 13 |
-
TOOL_SUMMARY = (
|
| 14 |
-
"Execute Python code from the tool root; returns captured stdout or the exception text."
|
| 15 |
-
)
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
@autodoc(
|
| 19 |
-
summary=TOOL_SUMMARY,
|
| 20 |
-
)
|
| 21 |
-
def Code_Interpreter(code: Annotated[str, "Python source code to run; stdout is captured and returned."]) -> str:
|
| 22 |
-
_log_call_start("Code_Interpreter", code=_truncate_for_log(code or "", 300))
|
| 23 |
-
result = sandboxed_exec(code, ast_mode=False)
|
| 24 |
-
_log_call_end("Code_Interpreter", _truncate_for_log(result))
|
| 25 |
-
return result
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
def build_interface() -> gr.Interface:
|
| 29 |
-
return gr.Interface(
|
| 30 |
-
fn=Code_Interpreter,
|
| 31 |
-
inputs=gr.Code(label="Python Code", language="python"),
|
| 32 |
-
outputs=gr.Textbox(label="Output", lines=5, max_lines=20),
|
| 33 |
-
title="Code Interpreter",
|
| 34 |
-
description="<div style=\"text-align:center\">Execute Python code and see the output.</div>",
|
| 35 |
-
api_description=TOOL_SUMMARY,
|
| 36 |
-
flagging_mode="never",
|
| 37 |
-
)
|
| 38 |
-
|
| 39 |
-
|
| 40 |
__all__ = ["Code_Interpreter", "build_interface"]
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import Annotated
|
| 4 |
+
|
| 5 |
+
import gradio as gr
|
| 6 |
+
from ._docstrings import autodoc
|
| 7 |
+
from ._core import sandboxed_exec
|
| 8 |
+
|
| 9 |
+
from app import _log_call_end, _log_call_start, _truncate_for_log
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
# Single source of truth for the LLM-facing tool description
|
| 13 |
+
TOOL_SUMMARY = (
|
| 14 |
+
"Execute Python code from the tool root; returns captured stdout or the exception text."
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@autodoc(
|
| 19 |
+
summary=TOOL_SUMMARY,
|
| 20 |
+
)
|
| 21 |
+
def Code_Interpreter(code: Annotated[str, "Python source code to run; stdout is captured and returned."]) -> str:
|
| 22 |
+
_log_call_start("Code_Interpreter", code=_truncate_for_log(code or "", 300))
|
| 23 |
+
result = sandboxed_exec(code, ast_mode=False)
|
| 24 |
+
_log_call_end("Code_Interpreter", _truncate_for_log(result))
|
| 25 |
+
return result
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def build_interface() -> gr.Interface:
|
| 29 |
+
return gr.Interface(
|
| 30 |
+
fn=Code_Interpreter,
|
| 31 |
+
inputs=gr.Code(label="Python Code", language="python"),
|
| 32 |
+
outputs=gr.Textbox(label="Output", lines=5, max_lines=20),
|
| 33 |
+
title="Code Interpreter",
|
| 34 |
+
description="<div style=\"text-align:center\">Execute Python code and see the output.</div>",
|
| 35 |
+
api_description=TOOL_SUMMARY,
|
| 36 |
+
flagging_mode="never",
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
__all__ = ["Code_Interpreter", "build_interface"]
|
Modules/Deep_Research.py
CHANGED
|
@@ -1,596 +1,596 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
import os
|
| 4 |
-
import re
|
| 5 |
-
import tempfile
|
| 6 |
-
import time
|
| 7 |
-
import uuid
|
| 8 |
-
from collections import OrderedDict, deque
|
| 9 |
-
from concurrent.futures import Future, ThreadPoolExecutor, as_completed
|
| 10 |
-
from datetime import datetime
|
| 11 |
-
from typing import Annotated, Callable, Dict, List, Tuple
|
| 12 |
-
from urllib.parse import urlparse
|
| 13 |
-
|
| 14 |
-
import gradio as gr
|
| 15 |
-
import requests
|
| 16 |
-
from bs4 import BeautifulSoup
|
| 17 |
-
from ddgs import DDGS
|
| 18 |
-
from huggingface_hub import InferenceClient
|
| 19 |
-
|
| 20 |
-
from .Web_Fetch import _fullpage_markdown_from_soup, _http_get_enhanced
|
| 21 |
-
from app import _log_call_end, _log_call_start, _search_rate_limiter, _truncate_for_log
|
| 22 |
-
from ._docstrings import autodoc
|
| 23 |
-
from .File_System import ROOT_DIR
|
| 24 |
-
from ._core import get_hf_token
|
| 25 |
-
|
| 26 |
-
HF_TEXTGEN_TOKEN = get_hf_token()
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
# Single source of truth for the LLM-facing tool description
|
| 30 |
-
TOOL_SUMMARY = (
|
| 31 |
-
"Write a summary of what the user wants to research, and "
|
| 32 |
-
"run multiple DuckDuckGo searches (up to 50 max results between all queries), fetch pages, and a Research agent will produce a comprehensive research report with sources; "
|
| 33 |
-
"returns (Markdown report, newline-separated source links, downloadable report path). "
|
| 34 |
-
"Provide the user with one-paragraph summary of the research report and the txt file in this format ``."
|
| 35 |
-
)
|
| 36 |
-
|
| 37 |
-
RESEARCHER_SYSTEM_PROMPT = (
|
| 38 |
-
"You are Nymbot, a helpful deep research assistant. You will be asked a Query from a user and you will create a long, comprehensive, well-structured research report in response to the user's Query.\n\n"
|
| 39 |
-
"You will receive a summary of the user question, the search queries used, and the fetched webpages. Follow the guidance below when writing the report.\n\n"
|
| 40 |
-
"<report_format>\n"
|
| 41 |
-
"Write a well-formatted report in the structure of a scientific report to a broad audience. The report must be readable and have a nice flow of Markdown headers and paragraphs of text. Do NOT use bullet points or lists which break up the natural flow. The report must be exhaustive for comprehensive topics.\n"
|
| 42 |
-
"For any given user query, first determine the major themes or areas that need investigation, then structure these as main sections, and develop detailed subsections that explore various facets of each theme. Each section and subsection requires paragraphs of texts that need to all connect into one narrative flow.\n"
|
| 43 |
-
"</report_format>\n\n"
|
| 44 |
-
"<document_structure>\n"
|
| 45 |
-
"- Always begin with a clear title using a single # header\n"
|
| 46 |
-
"- Organize content into major sections using ## headers\n"
|
| 47 |
-
"- Further divide into subsections using ### headers\n"
|
| 48 |
-
"- Use #### headers sparingly for special subsections\n"
|
| 49 |
-
"- Never skip header levels\n"
|
| 50 |
-
"- Write multiple paragraphs per section or subsection\n"
|
| 51 |
-
"- Each paragraph must contain at least 4-5 sentences, present novel insights and analysis grounded in source material, connect ideas to original query, and build upon previous paragraphs to create a narrative flow\n"
|
| 52 |
-
"- Never use lists, instead always use text or tables\n\n"
|
| 53 |
-
"Mandatory Section Flow:\n"
|
| 54 |
-
"1. Title (# level)\n - Before writing the main report, start with one detailed paragraph summarizing key findings\n"
|
| 55 |
-
"2. Main Body Sections (## level)\n - Each major topic gets its own section (## level). There MUST BE at least 5 sections.\n - Use ### subsections for detailed analysis\n - Every section or subsection needs at least one paragraph of narrative before moving to the next section\n - Do NOT have a section titled \"Main Body Sections\" and instead pick informative section names that convey the theme of the section\n"
|
| 56 |
-
"3. Conclusion (## level)\n - Synthesis of findings\n - Potential recommendations or next steps\n"
|
| 57 |
-
"</document_structure>\n\n"
|
| 58 |
-
"<planning_rules>\n"
|
| 59 |
-
"- Always break it down into multiple steps\n"
|
| 60 |
-
"- Assess the different sources and whether they are useful for any steps needed to answer the query\n"
|
| 61 |
-
"- Create the best report that weighs all the evidence from the sources\n"
|
| 62 |
-
"- Use the current date supplied in the first user message to contextualize findings\n"
|
| 63 |
-
"- Make sure that your final report addresses all parts of the query\n"
|
| 64 |
-
"- Communicate a brief high-level plan in the introduction; do not reveal chain-of-thought.\n"
|
| 65 |
-
"- When referencing sources during analysis, you should still refer to them by index with brackets and follow <citations>\n"
|
| 66 |
-
"- As a final step, review your planned report structure and ensure it completely answers the query.\n"
|
| 67 |
-
"</planning_rules>\n\n"
|
| 68 |
-
)
|
| 69 |
-
|
| 70 |
-
FILTERER_SYSTEM_PROMPT = (
|
| 71 |
-
"You are Nymbot Filterer, an analyst who selects the most relevant sources for a research task. "
|
| 72 |
-
"You will be given a summary of the research topic (and optional search queries) followed by multiple fetched documents. "
|
| 73 |
-
"Each document includes its URL and a truncated excerpt. Evaluate how well each source helps answer the research topic. "
|
| 74 |
-
"Return only the URLs that should be used for the final research step. Output plain text with exactly one URL per line and no additional commentary, bullets, numbering, or explanations. "
|
| 75 |
-
"If no sources are relevant, return an empty string."
|
| 76 |
-
)
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
class SlowHost(Exception):
|
| 80 |
-
pass
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
def _normalize_query(q: str) -> str:
|
| 84 |
-
if not q:
|
| 85 |
-
return ""
|
| 86 |
-
repl = {"“": '"', "”": '"', "‘": "'", "’": "'", "`": "'"}
|
| 87 |
-
for key, value in repl.items():
|
| 88 |
-
q = q.replace(key, value)
|
| 89 |
-
q = re.sub(r"\s+", " ", q)
|
| 90 |
-
q = re.sub(r'"\s+"', " ", q)
|
| 91 |
-
q = q.strip().strip('"').strip()
|
| 92 |
-
return q
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
def _search_urls_only(query: str, max_results: int) -> list[str]:
|
| 96 |
-
if not query or not query.strip() or max_results <= 0:
|
| 97 |
-
return []
|
| 98 |
-
urls: list[str] = []
|
| 99 |
-
try:
|
| 100 |
-
_search_rate_limiter.acquire()
|
| 101 |
-
with DDGS() as ddgs:
|
| 102 |
-
for item in ddgs.text(query, region="wt-wt", safesearch="moderate", max_results=max_results):
|
| 103 |
-
url = (item.get("href") or item.get("url") or "").strip()
|
| 104 |
-
if url:
|
| 105 |
-
urls.append(url)
|
| 106 |
-
except Exception:
|
| 107 |
-
pass
|
| 108 |
-
seen = set()
|
| 109 |
-
deduped = []
|
| 110 |
-
for url in urls:
|
| 111 |
-
if url not in seen:
|
| 112 |
-
seen.add(url)
|
| 113 |
-
deduped.append(url)
|
| 114 |
-
return deduped
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
def _fetch_page_markdown_fast(url: str, max_chars: int = 3000, timeout: float = 10.0) -> str:
|
| 118 |
-
try:
|
| 119 |
-
resp = _http_get_enhanced(url, timeout=timeout, skip_rate_limit=True)
|
| 120 |
-
resp.raise_for_status()
|
| 121 |
-
except requests.exceptions.RequestException as exc:
|
| 122 |
-
msg = str(exc)
|
| 123 |
-
if "timed out" in msg.lower():
|
| 124 |
-
raise SlowHost(msg) from exc
|
| 125 |
-
return ""
|
| 126 |
-
final_url = str(resp.url)
|
| 127 |
-
ctype = resp.headers.get("Content-Type", "")
|
| 128 |
-
if "html" not in ctype.lower():
|
| 129 |
-
return ""
|
| 130 |
-
resp.encoding = resp.encoding or resp.apparent_encoding
|
| 131 |
-
html = resp.text
|
| 132 |
-
soup = BeautifulSoup(html, "lxml")
|
| 133 |
-
md_text = _fullpage_markdown_from_soup(soup, final_url, "")
|
| 134 |
-
if max_chars > 0 and len(md_text) > max_chars:
|
| 135 |
-
md_text = md_text[:max_chars]
|
| 136 |
-
return md_text
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
def _truncate_join(parts: List[str], max_chars: int) -> Tuple[str, bool]:
|
| 140 |
-
out = []
|
| 141 |
-
total = 0
|
| 142 |
-
truncated = False
|
| 143 |
-
for part in parts:
|
| 144 |
-
if not part:
|
| 145 |
-
continue
|
| 146 |
-
if total + len(part) > max_chars:
|
| 147 |
-
out.append(part[: max(0, max_chars - total)])
|
| 148 |
-
truncated = True
|
| 149 |
-
break
|
| 150 |
-
out.append(part)
|
| 151 |
-
total += len(part)
|
| 152 |
-
return ("\n\n".join(out), truncated)
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
def _build_research_prompt(summary: str, queries: List[str], url_list: List[str], pages_map: Dict[str, str]) -> str:
|
| 156 |
-
sources_blocks: List[str] = []
|
| 157 |
-
indexed_urls: List[str] = []
|
| 158 |
-
for idx, url in enumerate(url_list, start=1):
|
| 159 |
-
text = pages_map.get(url, "").strip()
|
| 160 |
-
if not text:
|
| 161 |
-
continue
|
| 162 |
-
indexed_urls.append(f"[{idx}] {url}")
|
| 163 |
-
sources_blocks.append(f"[Source {idx}] URL: {url}\n\n{text}")
|
| 164 |
-
sources_joined, truncated = _truncate_join(sources_blocks, max_chars=100_000)
|
| 165 |
-
prompt_parts: List[str] = []
|
| 166 |
-
prompt_parts.append("<user_query_summary>\n" + (summary or "") + "\n</user_query_summary>\n")
|
| 167 |
-
populated = [q for q in queries if q and q.strip()]
|
| 168 |
-
if populated:
|
| 169 |
-
prompt_parts.append("<search_queries>\n" + "\n".join(f"- {q.strip()}" for q in populated) + "\n</search_queries>\n")
|
| 170 |
-
if indexed_urls:
|
| 171 |
-
prompt_parts.append("<sources_list>\n" + "\n".join(indexed_urls) + "\n</sources_list>\n")
|
| 172 |
-
prompt_parts.append("<fetched_documents>\n" + sources_joined + ("\n\n[NOTE] Sources truncated due to context limits." if truncated else "") + "\n</fetched_documents>")
|
| 173 |
-
return "\n\n".join(prompt_parts)
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
def _build_filter_prompt(summary: str, queries: List[str], pages_map: Dict[str, str]) -> str:
|
| 177 |
-
populated = [q for q in queries if q and q.strip()]
|
| 178 |
-
summary_text = summary or ""
|
| 179 |
-
prompt_sections: List[str] = []
|
| 180 |
-
prompt_sections.append("<research_topic_summary>\n" + summary_text + "\n</research_topic_summary>")
|
| 181 |
-
if populated:
|
| 182 |
-
prompt_sections.append("<search_queries>\n" + "\n".join(populated) + "\n</search_queries>")
|
| 183 |
-
sources: List[str] = []
|
| 184 |
-
for idx, (url, text) in enumerate(pages_map.items(), start=1):
|
| 185 |
-
content = text.strip()
|
| 186 |
-
if not content:
|
| 187 |
-
continue
|
| 188 |
-
sources.append(f"[Source {idx}] URL: {url}\n\n{content}")
|
| 189 |
-
sources_joined, truncated = _truncate_join(sources, max_chars=60_000)
|
| 190 |
-
prompt_sections.append("<candidate_sources>\n" + sources_joined + ("\n\n[NOTE] Sources truncated due to context limits." if truncated else "") + "\n</candidate_sources>")
|
| 191 |
-
prompt_sections.append(
|
| 192 |
-
"<task>\nIdentify which of the provided URLs should be retained for the final research synthesis. "
|
| 193 |
-
"Consider coverage, credibility, and relevance to the research topic. "
|
| 194 |
-
"Return ONLY the URLs you choose, with one URL per line and no additional text.\n</task>"
|
| 195 |
-
)
|
| 196 |
-
return "\n\n".join(prompt_sections)
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
def _parse_filterer_output(raw: str, allowed_urls: List[str]) -> List[str]:
|
| 200 |
-
if not raw:
|
| 201 |
-
return []
|
| 202 |
-
allowed_set = {url.strip(): idx for idx, url in enumerate(allowed_urls)}
|
| 203 |
-
found_indices: set[int] = set()
|
| 204 |
-
for line in raw.splitlines():
|
| 205 |
-
candidate = line.strip()
|
| 206 |
-
if not candidate:
|
| 207 |
-
continue
|
| 208 |
-
if candidate in allowed_set:
|
| 209 |
-
found_indices.add(allowed_set[candidate])
|
| 210 |
-
continue
|
| 211 |
-
match = re.search(r"https?://[^\s]+", candidate)
|
| 212 |
-
if not match:
|
| 213 |
-
continue
|
| 214 |
-
url = match.group(0).rstrip(".,);]")
|
| 215 |
-
if url in allowed_set:
|
| 216 |
-
found_indices.add(allowed_set[url])
|
| 217 |
-
selected = [allowed_urls[idx] for idx in sorted(found_indices)]
|
| 218 |
-
return selected
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
def _write_report_tmp(text: str) -> str:
|
| 222 |
-
filename = f"research_report_{uuid.uuid4().hex}.txt"
|
| 223 |
-
path = os.path.join(ROOT_DIR, filename)
|
| 224 |
-
with open(path, "w", encoding="utf-8") as file:
|
| 225 |
-
file.write(text)
|
| 226 |
-
return path
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
def _fetch_pages_within_budget(urls: List[str], char_limit: int, time_left_fn: Callable[[], float]) -> OrderedDict:
|
| 230 |
-
pages: dict[str, str] = {}
|
| 231 |
-
if not urls:
|
| 232 |
-
return OrderedDict()
|
| 233 |
-
queue = deque(urls)
|
| 234 |
-
attempts: dict[str, int] = {url: 0 for url in urls}
|
| 235 |
-
max_attempts = 2
|
| 236 |
-
max_workers = min(12, max(4, len(urls)))
|
| 237 |
-
in_flight: dict[Future, str] = {}
|
| 238 |
-
delayed: list[tuple[float, str]] = []
|
| 239 |
-
|
| 240 |
-
def schedule_next(executor: ThreadPoolExecutor) -> None:
|
| 241 |
-
while queue and len(in_flight) < max_workers:
|
| 242 |
-
url = queue.popleft()
|
| 243 |
-
if url in pages:
|
| 244 |
-
continue
|
| 245 |
-
attempts.setdefault(url, 0)
|
| 246 |
-
if attempts[url] >= max_attempts:
|
| 247 |
-
continue
|
| 248 |
-
attempts[url] += 1
|
| 249 |
-
tl = time_left_fn()
|
| 250 |
-
if tl <= 0.1:
|
| 251 |
-
return
|
| 252 |
-
per_timeout = 10.0 if tl > 15 else (5.0 if tl > 8 else 2.0)
|
| 253 |
-
future = executor.submit(_fetch_page_markdown_fast, url, char_limit, per_timeout)
|
| 254 |
-
in_flight[future] = url
|
| 255 |
-
|
| 256 |
-
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
| 257 |
-
schedule_next(executor)
|
| 258 |
-
while (in_flight or queue or delayed) and time_left_fn() > 0.2:
|
| 259 |
-
now = time.time()
|
| 260 |
-
if delayed:
|
| 261 |
-
ready: list[tuple[float, str]] = []
|
| 262 |
-
not_ready: list[tuple[float, str]] = []
|
| 263 |
-
for ready_time, delayed_url in delayed:
|
| 264 |
-
(ready if ready_time <= now else not_ready).append((ready_time, delayed_url))
|
| 265 |
-
delayed = not_ready
|
| 266 |
-
for _, delayed_url in ready:
|
| 267 |
-
queue.append(delayed_url)
|
| 268 |
-
if ready:
|
| 269 |
-
schedule_next(executor)
|
| 270 |
-
done = [future for future in list(in_flight.keys()) if future.done()]
|
| 271 |
-
if not done:
|
| 272 |
-
if not queue and delayed:
|
| 273 |
-
next_ready = min((t for t, _ in delayed), default=time.time())
|
| 274 |
-
sleep_for = max(0.0, next_ready - time.time())
|
| 275 |
-
time.sleep(max(0.02, min(0.25, sleep_for)))
|
| 276 |
-
else:
|
| 277 |
-
time.sleep(0.05)
|
| 278 |
-
continue
|
| 279 |
-
for future in done:
|
| 280 |
-
url = in_flight.pop(future)
|
| 281 |
-
try:
|
| 282 |
-
md = future.result()
|
| 283 |
-
if md and not md.startswith("Unsupported content type") and not md.startswith("An error occurred"):
|
| 284 |
-
pages[url] = md
|
| 285 |
-
try:
|
| 286 |
-
print(f"[FETCH OK] {url} (chars={len(md)})", flush=True)
|
| 287 |
-
except Exception:
|
| 288 |
-
pass
|
| 289 |
-
except SlowHost:
|
| 290 |
-
if time_left_fn() > 5.0:
|
| 291 |
-
delayed.append((time.time() + 3.0, url))
|
| 292 |
-
except Exception:
|
| 293 |
-
pass
|
| 294 |
-
schedule_next(executor)
|
| 295 |
-
ordered = OrderedDict((url, pages[url]) for url in urls if url in pages)
|
| 296 |
-
return ordered
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
@autodoc(
|
| 300 |
-
summary=TOOL_SUMMARY,
|
| 301 |
-
)
|
| 302 |
-
def Deep_Research(
|
| 303 |
-
summary: Annotated[str, "Summarization of research topic (one or more sentences)."],
|
| 304 |
-
query1: Annotated[str, "DDG Search Query 1"],
|
| 305 |
-
max1: Annotated[int, "Max results for Query 1 (1-50)"] = 10,
|
| 306 |
-
query2: Annotated[str, "DDG Search Query 2"] = "",
|
| 307 |
-
max2: Annotated[int, "Max results for Query 2 (1-50)"] = 10,
|
| 308 |
-
query3: Annotated[str, "DDG Search Query 3"] = "",
|
| 309 |
-
max3: Annotated[int, "Max results for Query 3 (1-50)"] = 10,
|
| 310 |
-
query4: Annotated[str, "DDG Search Query 4"] = "",
|
| 311 |
-
max4: Annotated[int, "Max results for Query 4 (1-50)"] = 10,
|
| 312 |
-
query5: Annotated[str, "DDG Search Query 5"] = "",
|
| 313 |
-
max5: Annotated[int, "Max results for Query 5 (1-50)"] = 10,
|
| 314 |
-
) -> tuple[str, str, str]:
|
| 315 |
-
_log_call_start(
|
| 316 |
-
"Deep_Research",
|
| 317 |
-
summary=_truncate_for_log(summary or "", 200),
|
| 318 |
-
queries=[q for q in [query1, query2, query3, query4, query5] if q],
|
| 319 |
-
)
|
| 320 |
-
if not HF_TEXTGEN_TOKEN:
|
| 321 |
-
_log_call_end("Deep_Research", "error=missing HF token")
|
| 322 |
-
raise gr.Error("Please provide a `HF_READ_TOKEN` to enable Deep Research.")
|
| 323 |
-
queries = [
|
| 324 |
-
_normalize_query(query1 or ""),
|
| 325 |
-
_normalize_query(query2 or ""),
|
| 326 |
-
_normalize_query(query3 or ""),
|
| 327 |
-
_normalize_query(query4 or ""),
|
| 328 |
-
_normalize_query(query5 or ""),
|
| 329 |
-
]
|
| 330 |
-
reqs = [
|
| 331 |
-
max(1, min(50, int(max1))),
|
| 332 |
-
max(1, min(50, int(max2))),
|
| 333 |
-
max(1, min(50, int(max3))),
|
| 334 |
-
max(1, min(50, int(max4))),
|
| 335 |
-
max(1, min(50, int(max5))),
|
| 336 |
-
]
|
| 337 |
-
total_requested = sum(reqs)
|
| 338 |
-
if total_requested > 50:
|
| 339 |
-
reqs = [10, 10, 10, 10, 10]
|
| 340 |
-
start_ts = time.time()
|
| 341 |
-
budget_seconds = 55.0
|
| 342 |
-
deadline = start_ts + budget_seconds
|
| 343 |
-
|
| 344 |
-
def time_left() -> float:
|
| 345 |
-
return max(0.0, deadline - time.time())
|
| 346 |
-
|
| 347 |
-
now_dt = datetime.now().astimezone()
|
| 348 |
-
date_str = now_dt.strftime("%A, %B %d, %Y %I:%M %p %Z").strip()
|
| 349 |
-
if not date_str:
|
| 350 |
-
date_str = now_dt.isoformat()
|
| 351 |
-
|
| 352 |
-
all_urls: list[str] = []
|
| 353 |
-
tasks = []
|
| 354 |
-
with ThreadPoolExecutor(max_workers=min(5, sum(1 for q in queries if q.strip())) or 1) as executor:
|
| 355 |
-
for query, count in zip(queries, reqs):
|
| 356 |
-
if not query.strip():
|
| 357 |
-
continue
|
| 358 |
-
tasks.append(executor.submit(_search_urls_only, query.strip(), count))
|
| 359 |
-
for future in as_completed(tasks):
|
| 360 |
-
try:
|
| 361 |
-
urls = future.result() or []
|
| 362 |
-
except Exception:
|
| 363 |
-
urls = []
|
| 364 |
-
for url in urls:
|
| 365 |
-
if url not in all_urls:
|
| 366 |
-
all_urls.append(url)
|
| 367 |
-
if len(all_urls) >= 50:
|
| 368 |
-
break
|
| 369 |
-
if time_left() <= 0.5:
|
| 370 |
-
break
|
| 371 |
-
if len(all_urls) > 50:
|
| 372 |
-
all_urls = all_urls[:50]
|
| 373 |
-
blacklist = {
|
| 374 |
-
"homedepot.com",
|
| 375 |
-
"tractorsupply.com",
|
| 376 |
-
"mcmaster.com",
|
| 377 |
-
"mrchain.com",
|
| 378 |
-
"answers.com",
|
| 379 |
-
"city-data.com",
|
| 380 |
-
"dictionary.cambridge.org",
|
| 381 |
-
}
|
| 382 |
-
|
| 383 |
-
def _domain(url: str) -> str:
|
| 384 |
-
try:
|
| 385 |
-
return urlparse(url).netloc.lower()
|
| 386 |
-
except Exception:
|
| 387 |
-
return ""
|
| 388 |
-
|
| 389 |
-
all_urls = [url for url in all_urls if _domain(url) not in blacklist]
|
| 390 |
-
skip_exts = (
|
| 391 |
-
".pdf",
|
| 392 |
-
".ppt",
|
| 393 |
-
".pptx",
|
| 394 |
-
".doc",
|
| 395 |
-
".docx",
|
| 396 |
-
".xls",
|
| 397 |
-
".xlsx",
|
| 398 |
-
".zip",
|
| 399 |
-
".gz",
|
| 400 |
-
".tgz",
|
| 401 |
-
".bz2",
|
| 402 |
-
".7z",
|
| 403 |
-
".rar",
|
| 404 |
-
)
|
| 405 |
-
|
| 406 |
-
def _skip_url(url: str) -> bool:
|
| 407 |
-
try:
|
| 408 |
-
path = urlparse(url).path.lower()
|
| 409 |
-
except Exception:
|
| 410 |
-
return False
|
| 411 |
-
return any(path.endswith(ext) for ext in skip_exts)
|
| 412 |
-
|
| 413 |
-
all_urls = [url for url in all_urls if not _skip_url(url)]
|
| 414 |
-
truncated_pages = OrderedDict()
|
| 415 |
-
if all_urls and time_left() > 0.2:
|
| 416 |
-
truncated_pages = _fetch_pages_within_budget(all_urls, 3000, time_left)
|
| 417 |
-
print(
|
| 418 |
-
f"[PIPELINE] Initial fetch complete: candidates={len(all_urls)}, truncated_documents={len(truncated_pages)}, time_left={time_left():.2f}s",
|
| 419 |
-
flush=True,
|
| 420 |
-
)
|
| 421 |
-
|
| 422 |
-
def _invoke_chat(messages, provider: str, max_tokens: int, temp: float, top_p: float):
|
| 423 |
-
client = InferenceClient(provider=provider, api_key=HF_TEXTGEN_TOKEN)
|
| 424 |
-
return client.chat.completions.create(
|
| 425 |
-
model="zai-org/GLM-4.7",
|
| 426 |
-
messages=messages,
|
| 427 |
-
max_tokens=max_tokens,
|
| 428 |
-
temperature=temp,
|
| 429 |
-
top_p=top_p,
|
| 430 |
-
)
|
| 431 |
-
|
| 432 |
-
filtered_urls: List[str] = list(truncated_pages.keys())
|
| 433 |
-
filter_output = ""
|
| 434 |
-
filter_used_fallback = False
|
| 435 |
-
filter_success = False
|
| 436 |
-
if truncated_pages and time_left() > 3.0:
|
| 437 |
-
filter_prompt = _build_filter_prompt(summary or "", [q for q in queries if q.strip()], truncated_pages)
|
| 438 |
-
filter_messages = [
|
| 439 |
-
{"role": "system", "content": FILTERER_SYSTEM_PROMPT},
|
| 440 |
-
{"role": "user", "content": f"The current date is {date_str}. Consider how recent each source is when deciding relevance."},
|
| 441 |
-
{"role": "user", "content": filter_prompt},
|
| 442 |
-
]
|
| 443 |
-
filter_completion = None
|
| 444 |
-
try:
|
| 445 |
-
print("[FILTER] Attempt 1: provider=cerebras, max_tokens=2048", flush=True)
|
| 446 |
-
filter_completion = _invoke_chat(filter_messages, "cerebras", 2048, 0.2, 0.9)
|
| 447 |
-
except Exception as exc1:
|
| 448 |
-
print(f"[FILTER] Attempt 1 failed: {str(exc1)[:200]}", flush=True)
|
| 449 |
-
try:
|
| 450 |
-
print("[FILTER] Attempt 2: provider=auto, max_tokens=2048", flush=True)
|
| 451 |
-
filter_completion = _invoke_chat(filter_messages, "auto", 2048, 0.2, 0.9)
|
| 452 |
-
except Exception as exc2:
|
| 453 |
-
print(f"[FILTER] Attempt 2 failed: {str(exc2)[:200]}", flush=True)
|
| 454 |
-
if filter_completion and filter_completion.choices:
|
| 455 |
-
filter_output = filter_completion.choices[0].message.content or ""
|
| 456 |
-
filtered_urls = _parse_filterer_output(filter_output, list(truncated_pages.keys()))
|
| 457 |
-
filter_success = bool(filter_output.strip()) and bool(filtered_urls)
|
| 458 |
-
if not filtered_urls:
|
| 459 |
-
filter_used_fallback = True
|
| 460 |
-
fallback_count = min(8, len(truncated_pages))
|
| 461 |
-
filtered_urls = list(truncated_pages.keys())[:fallback_count]
|
| 462 |
-
max_final_urls = 20
|
| 463 |
-
if len(filtered_urls) > max_final_urls:
|
| 464 |
-
filter_used_fallback = True
|
| 465 |
-
filtered_urls = filtered_urls[:max_final_urls]
|
| 466 |
-
if not filter_success:
|
| 467 |
-
filter_used_fallback = True
|
| 468 |
-
print(
|
| 469 |
-
f"[FILTER] Selected URLs={len(filtered_urls)}, fallback={filter_used_fallback}, time_left={time_left():.2f}s",
|
| 470 |
-
flush=True,
|
| 471 |
-
)
|
| 472 |
-
|
| 473 |
-
final_pages_fetched = OrderedDict()
|
| 474 |
-
if filtered_urls and time_left() > 0.2:
|
| 475 |
-
final_pages_fetched = _fetch_pages_within_budget(filtered_urls, 8000, time_left)
|
| 476 |
-
merged_pages = OrderedDict()
|
| 477 |
-
for url in filtered_urls:
|
| 478 |
-
content = final_pages_fetched.get(url) or truncated_pages.get(url) or ""
|
| 479 |
-
if content:
|
| 480 |
-
merged_pages[url] = content
|
| 481 |
-
pages = merged_pages
|
| 482 |
-
print(
|
| 483 |
-
f"[PIPELINE] Final fetch complete: retained_documents={len(pages)}, time_left={time_left():.2f}s",
|
| 484 |
-
flush=True,
|
| 485 |
-
)
|
| 486 |
-
prompt = _build_research_prompt(summary=summary or "", queries=[q for q in queries if q.strip()], url_list=list(pages.keys()), pages_map=pages)
|
| 487 |
-
system_message = {"role": "system", "content": RESEARCHER_SYSTEM_PROMPT}
|
| 488 |
-
date_message = {"role": "user", "content": f"The current date is {date_str}. Return only the research report."}
|
| 489 |
-
messages = [
|
| 490 |
-
system_message,
|
| 491 |
-
date_message,
|
| 492 |
-
{"role": "user", "content": prompt},
|
| 493 |
-
]
|
| 494 |
-
try:
|
| 495 |
-
prompt_chars = len(prompt)
|
| 496 |
-
except Exception:
|
| 497 |
-
prompt_chars = -1
|
| 498 |
-
print(f"[PIPELINE] Fetch complete: pages={len(pages)}, unique_urls={len(pages.keys())}, prompt_chars={prompt_chars}", flush=True)
|
| 499 |
-
print("[PIPELINE] Starting inference (provider=cerebras, model=zai-org/GLM-4.7)", flush=True)
|
| 500 |
-
|
| 501 |
-
try:
|
| 502 |
-
print("[LLM] Attempt 1: provider=cerebras, max_tokens=32768", flush=True)
|
| 503 |
-
completion = _invoke_chat(messages, "cerebras", max_tokens=32768, temp=0.3, top_p=0.95)
|
| 504 |
-
except Exception as exc1:
|
| 505 |
-
print(f"[LLM] Attempt 1 failed: {str(exc1)[:200]}", flush=True)
|
| 506 |
-
try:
|
| 507 |
-
prompt2 = _build_research_prompt(
|
| 508 |
-
summary=summary or "",
|
| 509 |
-
queries=[q for q in queries if q.strip()],
|
| 510 |
-
url_list=list(pages.keys())[:30],
|
| 511 |
-
pages_map={key: pages[key] for key in list(pages.keys())[:30]},
|
| 512 |
-
)
|
| 513 |
-
messages = [
|
| 514 |
-
system_message,
|
| 515 |
-
date_message,
|
| 516 |
-
{"role": "user", "content": prompt2},
|
| 517 |
-
]
|
| 518 |
-
print("[LLM] Attempt 2: provider=cerebras (trimmed), max_tokens=16384", flush=True)
|
| 519 |
-
completion = _invoke_chat(messages, "cerebras", max_tokens=16384, temp=0.7, top_p=0.95)
|
| 520 |
-
except Exception as exc2:
|
| 521 |
-
print(f"[LLM] Attempt 2 failed: {str(exc2)[:200]}", flush=True)
|
| 522 |
-
try:
|
| 523 |
-
print("[LLM] Attempt 3: provider=auto, max_tokens=8192", flush=True)
|
| 524 |
-
completion = _invoke_chat(messages, "auto", max_tokens=8192, temp=0.7, top_p=0.95)
|
| 525 |
-
except Exception as exc3:
|
| 526 |
-
_log_call_end("Deep_Research", f"error={_truncate_for_log(str(exc3), 260)}")
|
| 527 |
-
raise gr.Error(f"Researcher model call failed: {exc3}")
|
| 528 |
-
raw = completion.choices[0].message.content or ""
|
| 529 |
-
try:
|
| 530 |
-
no_think = re.sub(r"<think>[\s\S]*?<\\/think>", "", raw, flags=re.IGNORECASE)
|
| 531 |
-
no_think = re.sub(r"<\\/?think>", "", no_think, flags=re.IGNORECASE)
|
| 532 |
-
except Exception:
|
| 533 |
-
no_think = raw
|
| 534 |
-
try:
|
| 535 |
-
paragraphs = [p for p in re.split(r"\n\s*\n", no_think) if p.strip()]
|
| 536 |
-
keep: List[str] = []
|
| 537 |
-
removed = 0
|
| 538 |
-
planning_re = re.compile(r"\b(let me|now i(?:'ll| will)?|first,|i will now|i will|i'll|let's|now let me|i need to|now i'll|now i will)\b", re.IGNORECASE)
|
| 539 |
-
for paragraph in paragraphs:
|
| 540 |
-
if planning_re.search(paragraph):
|
| 541 |
-
removed += 1
|
| 542 |
-
continue
|
| 543 |
-
keep.append(paragraph)
|
| 544 |
-
report = "\n\n".join(keep).strip()
|
| 545 |
-
if not report:
|
| 546 |
-
report = no_think.strip()
|
| 547 |
-
except Exception:
|
| 548 |
-
report = no_think
|
| 549 |
-
removed = 0
|
| 550 |
-
report = re.sub(r"\n\s*\n\s*\n+", "\n\n", report)
|
| 551 |
-
try:
|
| 552 |
-
print(f"[POSTPROCESS] removed_planning_paragraphs={removed}, raw_chars={len(raw)}, final_chars={len(report)}", flush=True)
|
| 553 |
-
except Exception:
|
| 554 |
-
pass
|
| 555 |
-
links_text = "\n".join([f"[{i+1}] {url}" for i, url in enumerate(pages.keys())])
|
| 556 |
-
if links_text:
|
| 557 |
-
sources_section = "\n\n## Sources\n" + "\n".join([f"[{i+1}] {url}" for i, url in enumerate(pages.keys())])
|
| 558 |
-
report = report.rstrip() + sources_section
|
| 559 |
-
file_path = _write_report_tmp(report)
|
| 560 |
-
elapsed = time.time() - start_ts
|
| 561 |
-
print(f"[TIMING] Deep_Research elapsed: {elapsed:.2f}s", flush=True)
|
| 562 |
-
_log_call_end("Deep_Research", f"urls={len(pages)} file={os.path.basename(file_path)} duration={elapsed:.2f}s")
|
| 563 |
-
return report, links_text, file_path
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
def build_interface() -> gr.Interface:
|
| 567 |
-
return gr.Interface(
|
| 568 |
-
fn=Deep_Research,
|
| 569 |
-
inputs=[
|
| 570 |
-
gr.Textbox(label="Summarization of research topic", lines=3, placeholder="Briefly summarize the research topic or user question", info="Summarization of research topic (one or more sentences)"),
|
| 571 |
-
gr.Textbox(label="DDG Search Query 1", max_lines=1, info="DDG Search Query 1"),
|
| 572 |
-
gr.Slider(1, 50, value=10, step=1, label="Max results (Q1)", info="Max results for Query 1 (1-50)"),
|
| 573 |
-
gr.Textbox(label="DDG Search Query 2", value="", max_lines=1, info="DDG Search Query 2"),
|
| 574 |
-
gr.Slider(1, 50, value=10, step=1, label="Max results (Q2)", info="Max results for Query 2 (1-50)"),
|
| 575 |
-
gr.Textbox(label="DDG Search Query 3", value="", max_lines=1, info="DDG Search Query 3"),
|
| 576 |
-
gr.Slider(1, 50, value=10, step=1, label="Max results (Q3)", info="Max results for Query 3 (1-50)"),
|
| 577 |
-
gr.Textbox(label="DDG Search Query 4", value="", max_lines=1, info="DDG Search Query 4"),
|
| 578 |
-
gr.Slider(1, 50, value=10, step=1, label="Max results (Q4)", info="Max results for Query 4 (1-50)"),
|
| 579 |
-
gr.Textbox(label="DDG Search Query 5", value="", max_lines=1, info="DDG Search Query 5"),
|
| 580 |
-
gr.Slider(1, 50, value=10, step=1, label="Max results (Q5)", info="Max results for Query 5 (1-50)"),
|
| 581 |
-
],
|
| 582 |
-
outputs=[
|
| 583 |
-
gr.Markdown(label="Research Report"),
|
| 584 |
-
gr.Textbox(label="Fetched Links", lines=8),
|
| 585 |
-
gr.File(label="Download Research Report", file_count="single"),
|
| 586 |
-
],
|
| 587 |
-
title="Deep Research",
|
| 588 |
-
description=(
|
| 589 |
-
"<div style=\"text-align:center\">Generate a research report based on dozens of sources. Default model is GLM-4.7</div>"
|
| 590 |
-
),
|
| 591 |
-
api_description=TOOL_SUMMARY,
|
| 592 |
-
flagging_mode="never",
|
| 593 |
-
)
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
__all__ = ["Deep_Research", "build_interface"]
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import re
|
| 5 |
+
import tempfile
|
| 6 |
+
import time
|
| 7 |
+
import uuid
|
| 8 |
+
from collections import OrderedDict, deque
|
| 9 |
+
from concurrent.futures import Future, ThreadPoolExecutor, as_completed
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
from typing import Annotated, Callable, Dict, List, Tuple
|
| 12 |
+
from urllib.parse import urlparse
|
| 13 |
+
|
| 14 |
+
import gradio as gr
|
| 15 |
+
import requests
|
| 16 |
+
from bs4 import BeautifulSoup
|
| 17 |
+
from ddgs import DDGS
|
| 18 |
+
from huggingface_hub import InferenceClient
|
| 19 |
+
|
| 20 |
+
from .Web_Fetch import _fullpage_markdown_from_soup, _http_get_enhanced
|
| 21 |
+
from app import _log_call_end, _log_call_start, _search_rate_limiter, _truncate_for_log
|
| 22 |
+
from ._docstrings import autodoc
|
| 23 |
+
from .File_System import ROOT_DIR
|
| 24 |
+
from ._core import get_hf_token
|
| 25 |
+
|
| 26 |
+
HF_TEXTGEN_TOKEN = get_hf_token()
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
# Single source of truth for the LLM-facing tool description
|
| 30 |
+
TOOL_SUMMARY = (
|
| 31 |
+
"Write a summary of what the user wants to research, and "
|
| 32 |
+
"run multiple DuckDuckGo searches (up to 50 max results between all queries), fetch pages, and a Research agent will produce a comprehensive research report with sources; "
|
| 33 |
+
"returns (Markdown report, newline-separated source links, downloadable report path). "
|
| 34 |
+
"Provide the user with one-paragraph summary of the research report and the txt file in this format ``."
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
RESEARCHER_SYSTEM_PROMPT = (
|
| 38 |
+
"You are Nymbot, a helpful deep research assistant. You will be asked a Query from a user and you will create a long, comprehensive, well-structured research report in response to the user's Query.\n\n"
|
| 39 |
+
"You will receive a summary of the user question, the search queries used, and the fetched webpages. Follow the guidance below when writing the report.\n\n"
|
| 40 |
+
"<report_format>\n"
|
| 41 |
+
"Write a well-formatted report in the structure of a scientific report to a broad audience. The report must be readable and have a nice flow of Markdown headers and paragraphs of text. Do NOT use bullet points or lists which break up the natural flow. The report must be exhaustive for comprehensive topics.\n"
|
| 42 |
+
"For any given user query, first determine the major themes or areas that need investigation, then structure these as main sections, and develop detailed subsections that explore various facets of each theme. Each section and subsection requires paragraphs of texts that need to all connect into one narrative flow.\n"
|
| 43 |
+
"</report_format>\n\n"
|
| 44 |
+
"<document_structure>\n"
|
| 45 |
+
"- Always begin with a clear title using a single # header\n"
|
| 46 |
+
"- Organize content into major sections using ## headers\n"
|
| 47 |
+
"- Further divide into subsections using ### headers\n"
|
| 48 |
+
"- Use #### headers sparingly for special subsections\n"
|
| 49 |
+
"- Never skip header levels\n"
|
| 50 |
+
"- Write multiple paragraphs per section or subsection\n"
|
| 51 |
+
"- Each paragraph must contain at least 4-5 sentences, present novel insights and analysis grounded in source material, connect ideas to original query, and build upon previous paragraphs to create a narrative flow\n"
|
| 52 |
+
"- Never use lists, instead always use text or tables\n\n"
|
| 53 |
+
"Mandatory Section Flow:\n"
|
| 54 |
+
"1. Title (# level)\n - Before writing the main report, start with one detailed paragraph summarizing key findings\n"
|
| 55 |
+
"2. Main Body Sections (## level)\n - Each major topic gets its own section (## level). There MUST BE at least 5 sections.\n - Use ### subsections for detailed analysis\n - Every section or subsection needs at least one paragraph of narrative before moving to the next section\n - Do NOT have a section titled \"Main Body Sections\" and instead pick informative section names that convey the theme of the section\n"
|
| 56 |
+
"3. Conclusion (## level)\n - Synthesis of findings\n - Potential recommendations or next steps\n"
|
| 57 |
+
"</document_structure>\n\n"
|
| 58 |
+
"<planning_rules>\n"
|
| 59 |
+
"- Always break it down into multiple steps\n"
|
| 60 |
+
"- Assess the different sources and whether they are useful for any steps needed to answer the query\n"
|
| 61 |
+
"- Create the best report that weighs all the evidence from the sources\n"
|
| 62 |
+
"- Use the current date supplied in the first user message to contextualize findings\n"
|
| 63 |
+
"- Make sure that your final report addresses all parts of the query\n"
|
| 64 |
+
"- Communicate a brief high-level plan in the introduction; do not reveal chain-of-thought.\n"
|
| 65 |
+
"- When referencing sources during analysis, you should still refer to them by index with brackets and follow <citations>\n"
|
| 66 |
+
"- As a final step, review your planned report structure and ensure it completely answers the query.\n"
|
| 67 |
+
"</planning_rules>\n\n"
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
FILTERER_SYSTEM_PROMPT = (
|
| 71 |
+
"You are Nymbot Filterer, an analyst who selects the most relevant sources for a research task. "
|
| 72 |
+
"You will be given a summary of the research topic (and optional search queries) followed by multiple fetched documents. "
|
| 73 |
+
"Each document includes its URL and a truncated excerpt. Evaluate how well each source helps answer the research topic. "
|
| 74 |
+
"Return only the URLs that should be used for the final research step. Output plain text with exactly one URL per line and no additional commentary, bullets, numbering, or explanations. "
|
| 75 |
+
"If no sources are relevant, return an empty string."
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
class SlowHost(Exception):
|
| 80 |
+
pass
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def _normalize_query(q: str) -> str:
|
| 84 |
+
if not q:
|
| 85 |
+
return ""
|
| 86 |
+
repl = {"“": '"', "”": '"', "‘": "'", "’": "'", "`": "'"}
|
| 87 |
+
for key, value in repl.items():
|
| 88 |
+
q = q.replace(key, value)
|
| 89 |
+
q = re.sub(r"\s+", " ", q)
|
| 90 |
+
q = re.sub(r'"\s+"', " ", q)
|
| 91 |
+
q = q.strip().strip('"').strip()
|
| 92 |
+
return q
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def _search_urls_only(query: str, max_results: int) -> list[str]:
|
| 96 |
+
if not query or not query.strip() or max_results <= 0:
|
| 97 |
+
return []
|
| 98 |
+
urls: list[str] = []
|
| 99 |
+
try:
|
| 100 |
+
_search_rate_limiter.acquire()
|
| 101 |
+
with DDGS() as ddgs:
|
| 102 |
+
for item in ddgs.text(query, region="wt-wt", safesearch="moderate", max_results=max_results):
|
| 103 |
+
url = (item.get("href") or item.get("url") or "").strip()
|
| 104 |
+
if url:
|
| 105 |
+
urls.append(url)
|
| 106 |
+
except Exception:
|
| 107 |
+
pass
|
| 108 |
+
seen = set()
|
| 109 |
+
deduped = []
|
| 110 |
+
for url in urls:
|
| 111 |
+
if url not in seen:
|
| 112 |
+
seen.add(url)
|
| 113 |
+
deduped.append(url)
|
| 114 |
+
return deduped
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def _fetch_page_markdown_fast(url: str, max_chars: int = 3000, timeout: float = 10.0) -> str:
|
| 118 |
+
try:
|
| 119 |
+
resp = _http_get_enhanced(url, timeout=timeout, skip_rate_limit=True)
|
| 120 |
+
resp.raise_for_status()
|
| 121 |
+
except requests.exceptions.RequestException as exc:
|
| 122 |
+
msg = str(exc)
|
| 123 |
+
if "timed out" in msg.lower():
|
| 124 |
+
raise SlowHost(msg) from exc
|
| 125 |
+
return ""
|
| 126 |
+
final_url = str(resp.url)
|
| 127 |
+
ctype = resp.headers.get("Content-Type", "")
|
| 128 |
+
if "html" not in ctype.lower():
|
| 129 |
+
return ""
|
| 130 |
+
resp.encoding = resp.encoding or resp.apparent_encoding
|
| 131 |
+
html = resp.text
|
| 132 |
+
soup = BeautifulSoup(html, "lxml")
|
| 133 |
+
md_text = _fullpage_markdown_from_soup(soup, final_url, "")
|
| 134 |
+
if max_chars > 0 and len(md_text) > max_chars:
|
| 135 |
+
md_text = md_text[:max_chars]
|
| 136 |
+
return md_text
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def _truncate_join(parts: List[str], max_chars: int) -> Tuple[str, bool]:
|
| 140 |
+
out = []
|
| 141 |
+
total = 0
|
| 142 |
+
truncated = False
|
| 143 |
+
for part in parts:
|
| 144 |
+
if not part:
|
| 145 |
+
continue
|
| 146 |
+
if total + len(part) > max_chars:
|
| 147 |
+
out.append(part[: max(0, max_chars - total)])
|
| 148 |
+
truncated = True
|
| 149 |
+
break
|
| 150 |
+
out.append(part)
|
| 151 |
+
total += len(part)
|
| 152 |
+
return ("\n\n".join(out), truncated)
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def _build_research_prompt(summary: str, queries: List[str], url_list: List[str], pages_map: Dict[str, str]) -> str:
|
| 156 |
+
sources_blocks: List[str] = []
|
| 157 |
+
indexed_urls: List[str] = []
|
| 158 |
+
for idx, url in enumerate(url_list, start=1):
|
| 159 |
+
text = pages_map.get(url, "").strip()
|
| 160 |
+
if not text:
|
| 161 |
+
continue
|
| 162 |
+
indexed_urls.append(f"[{idx}] {url}")
|
| 163 |
+
sources_blocks.append(f"[Source {idx}] URL: {url}\n\n{text}")
|
| 164 |
+
sources_joined, truncated = _truncate_join(sources_blocks, max_chars=100_000)
|
| 165 |
+
prompt_parts: List[str] = []
|
| 166 |
+
prompt_parts.append("<user_query_summary>\n" + (summary or "") + "\n</user_query_summary>\n")
|
| 167 |
+
populated = [q for q in queries if q and q.strip()]
|
| 168 |
+
if populated:
|
| 169 |
+
prompt_parts.append("<search_queries>\n" + "\n".join(f"- {q.strip()}" for q in populated) + "\n</search_queries>\n")
|
| 170 |
+
if indexed_urls:
|
| 171 |
+
prompt_parts.append("<sources_list>\n" + "\n".join(indexed_urls) + "\n</sources_list>\n")
|
| 172 |
+
prompt_parts.append("<fetched_documents>\n" + sources_joined + ("\n\n[NOTE] Sources truncated due to context limits." if truncated else "") + "\n</fetched_documents>")
|
| 173 |
+
return "\n\n".join(prompt_parts)
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
def _build_filter_prompt(summary: str, queries: List[str], pages_map: Dict[str, str]) -> str:
|
| 177 |
+
populated = [q for q in queries if q and q.strip()]
|
| 178 |
+
summary_text = summary or ""
|
| 179 |
+
prompt_sections: List[str] = []
|
| 180 |
+
prompt_sections.append("<research_topic_summary>\n" + summary_text + "\n</research_topic_summary>")
|
| 181 |
+
if populated:
|
| 182 |
+
prompt_sections.append("<search_queries>\n" + "\n".join(populated) + "\n</search_queries>")
|
| 183 |
+
sources: List[str] = []
|
| 184 |
+
for idx, (url, text) in enumerate(pages_map.items(), start=1):
|
| 185 |
+
content = text.strip()
|
| 186 |
+
if not content:
|
| 187 |
+
continue
|
| 188 |
+
sources.append(f"[Source {idx}] URL: {url}\n\n{content}")
|
| 189 |
+
sources_joined, truncated = _truncate_join(sources, max_chars=60_000)
|
| 190 |
+
prompt_sections.append("<candidate_sources>\n" + sources_joined + ("\n\n[NOTE] Sources truncated due to context limits." if truncated else "") + "\n</candidate_sources>")
|
| 191 |
+
prompt_sections.append(
|
| 192 |
+
"<task>\nIdentify which of the provided URLs should be retained for the final research synthesis. "
|
| 193 |
+
"Consider coverage, credibility, and relevance to the research topic. "
|
| 194 |
+
"Return ONLY the URLs you choose, with one URL per line and no additional text.\n</task>"
|
| 195 |
+
)
|
| 196 |
+
return "\n\n".join(prompt_sections)
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
def _parse_filterer_output(raw: str, allowed_urls: List[str]) -> List[str]:
|
| 200 |
+
if not raw:
|
| 201 |
+
return []
|
| 202 |
+
allowed_set = {url.strip(): idx for idx, url in enumerate(allowed_urls)}
|
| 203 |
+
found_indices: set[int] = set()
|
| 204 |
+
for line in raw.splitlines():
|
| 205 |
+
candidate = line.strip()
|
| 206 |
+
if not candidate:
|
| 207 |
+
continue
|
| 208 |
+
if candidate in allowed_set:
|
| 209 |
+
found_indices.add(allowed_set[candidate])
|
| 210 |
+
continue
|
| 211 |
+
match = re.search(r"https?://[^\s]+", candidate)
|
| 212 |
+
if not match:
|
| 213 |
+
continue
|
| 214 |
+
url = match.group(0).rstrip(".,);]")
|
| 215 |
+
if url in allowed_set:
|
| 216 |
+
found_indices.add(allowed_set[url])
|
| 217 |
+
selected = [allowed_urls[idx] for idx in sorted(found_indices)]
|
| 218 |
+
return selected
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
def _write_report_tmp(text: str) -> str:
|
| 222 |
+
filename = f"research_report_{uuid.uuid4().hex}.txt"
|
| 223 |
+
path = os.path.join(ROOT_DIR, filename)
|
| 224 |
+
with open(path, "w", encoding="utf-8") as file:
|
| 225 |
+
file.write(text)
|
| 226 |
+
return path
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
def _fetch_pages_within_budget(urls: List[str], char_limit: int, time_left_fn: Callable[[], float]) -> OrderedDict:
|
| 230 |
+
pages: dict[str, str] = {}
|
| 231 |
+
if not urls:
|
| 232 |
+
return OrderedDict()
|
| 233 |
+
queue = deque(urls)
|
| 234 |
+
attempts: dict[str, int] = {url: 0 for url in urls}
|
| 235 |
+
max_attempts = 2
|
| 236 |
+
max_workers = min(12, max(4, len(urls)))
|
| 237 |
+
in_flight: dict[Future, str] = {}
|
| 238 |
+
delayed: list[tuple[float, str]] = []
|
| 239 |
+
|
| 240 |
+
def schedule_next(executor: ThreadPoolExecutor) -> None:
|
| 241 |
+
while queue and len(in_flight) < max_workers:
|
| 242 |
+
url = queue.popleft()
|
| 243 |
+
if url in pages:
|
| 244 |
+
continue
|
| 245 |
+
attempts.setdefault(url, 0)
|
| 246 |
+
if attempts[url] >= max_attempts:
|
| 247 |
+
continue
|
| 248 |
+
attempts[url] += 1
|
| 249 |
+
tl = time_left_fn()
|
| 250 |
+
if tl <= 0.1:
|
| 251 |
+
return
|
| 252 |
+
per_timeout = 10.0 if tl > 15 else (5.0 if tl > 8 else 2.0)
|
| 253 |
+
future = executor.submit(_fetch_page_markdown_fast, url, char_limit, per_timeout)
|
| 254 |
+
in_flight[future] = url
|
| 255 |
+
|
| 256 |
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
| 257 |
+
schedule_next(executor)
|
| 258 |
+
while (in_flight or queue or delayed) and time_left_fn() > 0.2:
|
| 259 |
+
now = time.time()
|
| 260 |
+
if delayed:
|
| 261 |
+
ready: list[tuple[float, str]] = []
|
| 262 |
+
not_ready: list[tuple[float, str]] = []
|
| 263 |
+
for ready_time, delayed_url in delayed:
|
| 264 |
+
(ready if ready_time <= now else not_ready).append((ready_time, delayed_url))
|
| 265 |
+
delayed = not_ready
|
| 266 |
+
for _, delayed_url in ready:
|
| 267 |
+
queue.append(delayed_url)
|
| 268 |
+
if ready:
|
| 269 |
+
schedule_next(executor)
|
| 270 |
+
done = [future for future in list(in_flight.keys()) if future.done()]
|
| 271 |
+
if not done:
|
| 272 |
+
if not queue and delayed:
|
| 273 |
+
next_ready = min((t for t, _ in delayed), default=time.time())
|
| 274 |
+
sleep_for = max(0.0, next_ready - time.time())
|
| 275 |
+
time.sleep(max(0.02, min(0.25, sleep_for)))
|
| 276 |
+
else:
|
| 277 |
+
time.sleep(0.05)
|
| 278 |
+
continue
|
| 279 |
+
for future in done:
|
| 280 |
+
url = in_flight.pop(future)
|
| 281 |
+
try:
|
| 282 |
+
md = future.result()
|
| 283 |
+
if md and not md.startswith("Unsupported content type") and not md.startswith("An error occurred"):
|
| 284 |
+
pages[url] = md
|
| 285 |
+
try:
|
| 286 |
+
print(f"[FETCH OK] {url} (chars={len(md)})", flush=True)
|
| 287 |
+
except Exception:
|
| 288 |
+
pass
|
| 289 |
+
except SlowHost:
|
| 290 |
+
if time_left_fn() > 5.0:
|
| 291 |
+
delayed.append((time.time() + 3.0, url))
|
| 292 |
+
except Exception:
|
| 293 |
+
pass
|
| 294 |
+
schedule_next(executor)
|
| 295 |
+
ordered = OrderedDict((url, pages[url]) for url in urls if url in pages)
|
| 296 |
+
return ordered
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
@autodoc(
|
| 300 |
+
summary=TOOL_SUMMARY,
|
| 301 |
+
)
|
| 302 |
+
def Deep_Research(
|
| 303 |
+
summary: Annotated[str, "Summarization of research topic (one or more sentences)."],
|
| 304 |
+
query1: Annotated[str, "DDG Search Query 1"],
|
| 305 |
+
max1: Annotated[int, "Max results for Query 1 (1-50)"] = 10,
|
| 306 |
+
query2: Annotated[str, "DDG Search Query 2"] = "",
|
| 307 |
+
max2: Annotated[int, "Max results for Query 2 (1-50)"] = 10,
|
| 308 |
+
query3: Annotated[str, "DDG Search Query 3"] = "",
|
| 309 |
+
max3: Annotated[int, "Max results for Query 3 (1-50)"] = 10,
|
| 310 |
+
query4: Annotated[str, "DDG Search Query 4"] = "",
|
| 311 |
+
max4: Annotated[int, "Max results for Query 4 (1-50)"] = 10,
|
| 312 |
+
query5: Annotated[str, "DDG Search Query 5"] = "",
|
| 313 |
+
max5: Annotated[int, "Max results for Query 5 (1-50)"] = 10,
|
| 314 |
+
) -> tuple[str, str, str]:
|
| 315 |
+
_log_call_start(
|
| 316 |
+
"Deep_Research",
|
| 317 |
+
summary=_truncate_for_log(summary or "", 200),
|
| 318 |
+
queries=[q for q in [query1, query2, query3, query4, query5] if q],
|
| 319 |
+
)
|
| 320 |
+
if not HF_TEXTGEN_TOKEN:
|
| 321 |
+
_log_call_end("Deep_Research", "error=missing HF token")
|
| 322 |
+
raise gr.Error("Please provide a `HF_READ_TOKEN` to enable Deep Research.")
|
| 323 |
+
queries = [
|
| 324 |
+
_normalize_query(query1 or ""),
|
| 325 |
+
_normalize_query(query2 or ""),
|
| 326 |
+
_normalize_query(query3 or ""),
|
| 327 |
+
_normalize_query(query4 or ""),
|
| 328 |
+
_normalize_query(query5 or ""),
|
| 329 |
+
]
|
| 330 |
+
reqs = [
|
| 331 |
+
max(1, min(50, int(max1))),
|
| 332 |
+
max(1, min(50, int(max2))),
|
| 333 |
+
max(1, min(50, int(max3))),
|
| 334 |
+
max(1, min(50, int(max4))),
|
| 335 |
+
max(1, min(50, int(max5))),
|
| 336 |
+
]
|
| 337 |
+
total_requested = sum(reqs)
|
| 338 |
+
if total_requested > 50:
|
| 339 |
+
reqs = [10, 10, 10, 10, 10]
|
| 340 |
+
start_ts = time.time()
|
| 341 |
+
budget_seconds = 55.0
|
| 342 |
+
deadline = start_ts + budget_seconds
|
| 343 |
+
|
| 344 |
+
def time_left() -> float:
|
| 345 |
+
return max(0.0, deadline - time.time())
|
| 346 |
+
|
| 347 |
+
now_dt = datetime.now().astimezone()
|
| 348 |
+
date_str = now_dt.strftime("%A, %B %d, %Y %I:%M %p %Z").strip()
|
| 349 |
+
if not date_str:
|
| 350 |
+
date_str = now_dt.isoformat()
|
| 351 |
+
|
| 352 |
+
all_urls: list[str] = []
|
| 353 |
+
tasks = []
|
| 354 |
+
with ThreadPoolExecutor(max_workers=min(5, sum(1 for q in queries if q.strip())) or 1) as executor:
|
| 355 |
+
for query, count in zip(queries, reqs):
|
| 356 |
+
if not query.strip():
|
| 357 |
+
continue
|
| 358 |
+
tasks.append(executor.submit(_search_urls_only, query.strip(), count))
|
| 359 |
+
for future in as_completed(tasks):
|
| 360 |
+
try:
|
| 361 |
+
urls = future.result() or []
|
| 362 |
+
except Exception:
|
| 363 |
+
urls = []
|
| 364 |
+
for url in urls:
|
| 365 |
+
if url not in all_urls:
|
| 366 |
+
all_urls.append(url)
|
| 367 |
+
if len(all_urls) >= 50:
|
| 368 |
+
break
|
| 369 |
+
if time_left() <= 0.5:
|
| 370 |
+
break
|
| 371 |
+
if len(all_urls) > 50:
|
| 372 |
+
all_urls = all_urls[:50]
|
| 373 |
+
blacklist = {
|
| 374 |
+
"homedepot.com",
|
| 375 |
+
"tractorsupply.com",
|
| 376 |
+
"mcmaster.com",
|
| 377 |
+
"mrchain.com",
|
| 378 |
+
"answers.com",
|
| 379 |
+
"city-data.com",
|
| 380 |
+
"dictionary.cambridge.org",
|
| 381 |
+
}
|
| 382 |
+
|
| 383 |
+
def _domain(url: str) -> str:
|
| 384 |
+
try:
|
| 385 |
+
return urlparse(url).netloc.lower()
|
| 386 |
+
except Exception:
|
| 387 |
+
return ""
|
| 388 |
+
|
| 389 |
+
all_urls = [url for url in all_urls if _domain(url) not in blacklist]
|
| 390 |
+
skip_exts = (
|
| 391 |
+
".pdf",
|
| 392 |
+
".ppt",
|
| 393 |
+
".pptx",
|
| 394 |
+
".doc",
|
| 395 |
+
".docx",
|
| 396 |
+
".xls",
|
| 397 |
+
".xlsx",
|
| 398 |
+
".zip",
|
| 399 |
+
".gz",
|
| 400 |
+
".tgz",
|
| 401 |
+
".bz2",
|
| 402 |
+
".7z",
|
| 403 |
+
".rar",
|
| 404 |
+
)
|
| 405 |
+
|
| 406 |
+
def _skip_url(url: str) -> bool:
|
| 407 |
+
try:
|
| 408 |
+
path = urlparse(url).path.lower()
|
| 409 |
+
except Exception:
|
| 410 |
+
return False
|
| 411 |
+
return any(path.endswith(ext) for ext in skip_exts)
|
| 412 |
+
|
| 413 |
+
all_urls = [url for url in all_urls if not _skip_url(url)]
|
| 414 |
+
truncated_pages = OrderedDict()
|
| 415 |
+
if all_urls and time_left() > 0.2:
|
| 416 |
+
truncated_pages = _fetch_pages_within_budget(all_urls, 3000, time_left)
|
| 417 |
+
print(
|
| 418 |
+
f"[PIPELINE] Initial fetch complete: candidates={len(all_urls)}, truncated_documents={len(truncated_pages)}, time_left={time_left():.2f}s",
|
| 419 |
+
flush=True,
|
| 420 |
+
)
|
| 421 |
+
|
| 422 |
+
def _invoke_chat(messages, provider: str, max_tokens: int, temp: float, top_p: float):
|
| 423 |
+
client = InferenceClient(provider=provider, api_key=HF_TEXTGEN_TOKEN)
|
| 424 |
+
return client.chat.completions.create(
|
| 425 |
+
model="zai-org/GLM-4.7",
|
| 426 |
+
messages=messages,
|
| 427 |
+
max_tokens=max_tokens,
|
| 428 |
+
temperature=temp,
|
| 429 |
+
top_p=top_p,
|
| 430 |
+
)
|
| 431 |
+
|
| 432 |
+
filtered_urls: List[str] = list(truncated_pages.keys())
|
| 433 |
+
filter_output = ""
|
| 434 |
+
filter_used_fallback = False
|
| 435 |
+
filter_success = False
|
| 436 |
+
if truncated_pages and time_left() > 3.0:
|
| 437 |
+
filter_prompt = _build_filter_prompt(summary or "", [q for q in queries if q.strip()], truncated_pages)
|
| 438 |
+
filter_messages = [
|
| 439 |
+
{"role": "system", "content": FILTERER_SYSTEM_PROMPT},
|
| 440 |
+
{"role": "user", "content": f"The current date is {date_str}. Consider how recent each source is when deciding relevance."},
|
| 441 |
+
{"role": "user", "content": filter_prompt},
|
| 442 |
+
]
|
| 443 |
+
filter_completion = None
|
| 444 |
+
try:
|
| 445 |
+
print("[FILTER] Attempt 1: provider=cerebras, max_tokens=2048", flush=True)
|
| 446 |
+
filter_completion = _invoke_chat(filter_messages, "cerebras", 2048, 0.2, 0.9)
|
| 447 |
+
except Exception as exc1:
|
| 448 |
+
print(f"[FILTER] Attempt 1 failed: {str(exc1)[:200]}", flush=True)
|
| 449 |
+
try:
|
| 450 |
+
print("[FILTER] Attempt 2: provider=auto, max_tokens=2048", flush=True)
|
| 451 |
+
filter_completion = _invoke_chat(filter_messages, "auto", 2048, 0.2, 0.9)
|
| 452 |
+
except Exception as exc2:
|
| 453 |
+
print(f"[FILTER] Attempt 2 failed: {str(exc2)[:200]}", flush=True)
|
| 454 |
+
if filter_completion and filter_completion.choices:
|
| 455 |
+
filter_output = filter_completion.choices[0].message.content or ""
|
| 456 |
+
filtered_urls = _parse_filterer_output(filter_output, list(truncated_pages.keys()))
|
| 457 |
+
filter_success = bool(filter_output.strip()) and bool(filtered_urls)
|
| 458 |
+
if not filtered_urls:
|
| 459 |
+
filter_used_fallback = True
|
| 460 |
+
fallback_count = min(8, len(truncated_pages))
|
| 461 |
+
filtered_urls = list(truncated_pages.keys())[:fallback_count]
|
| 462 |
+
max_final_urls = 20
|
| 463 |
+
if len(filtered_urls) > max_final_urls:
|
| 464 |
+
filter_used_fallback = True
|
| 465 |
+
filtered_urls = filtered_urls[:max_final_urls]
|
| 466 |
+
if not filter_success:
|
| 467 |
+
filter_used_fallback = True
|
| 468 |
+
print(
|
| 469 |
+
f"[FILTER] Selected URLs={len(filtered_urls)}, fallback={filter_used_fallback}, time_left={time_left():.2f}s",
|
| 470 |
+
flush=True,
|
| 471 |
+
)
|
| 472 |
+
|
| 473 |
+
final_pages_fetched = OrderedDict()
|
| 474 |
+
if filtered_urls and time_left() > 0.2:
|
| 475 |
+
final_pages_fetched = _fetch_pages_within_budget(filtered_urls, 8000, time_left)
|
| 476 |
+
merged_pages = OrderedDict()
|
| 477 |
+
for url in filtered_urls:
|
| 478 |
+
content = final_pages_fetched.get(url) or truncated_pages.get(url) or ""
|
| 479 |
+
if content:
|
| 480 |
+
merged_pages[url] = content
|
| 481 |
+
pages = merged_pages
|
| 482 |
+
print(
|
| 483 |
+
f"[PIPELINE] Final fetch complete: retained_documents={len(pages)}, time_left={time_left():.2f}s",
|
| 484 |
+
flush=True,
|
| 485 |
+
)
|
| 486 |
+
prompt = _build_research_prompt(summary=summary or "", queries=[q for q in queries if q.strip()], url_list=list(pages.keys()), pages_map=pages)
|
| 487 |
+
system_message = {"role": "system", "content": RESEARCHER_SYSTEM_PROMPT}
|
| 488 |
+
date_message = {"role": "user", "content": f"The current date is {date_str}. Return only the research report."}
|
| 489 |
+
messages = [
|
| 490 |
+
system_message,
|
| 491 |
+
date_message,
|
| 492 |
+
{"role": "user", "content": prompt},
|
| 493 |
+
]
|
| 494 |
+
try:
|
| 495 |
+
prompt_chars = len(prompt)
|
| 496 |
+
except Exception:
|
| 497 |
+
prompt_chars = -1
|
| 498 |
+
print(f"[PIPELINE] Fetch complete: pages={len(pages)}, unique_urls={len(pages.keys())}, prompt_chars={prompt_chars}", flush=True)
|
| 499 |
+
print("[PIPELINE] Starting inference (provider=cerebras, model=zai-org/GLM-4.7)", flush=True)
|
| 500 |
+
|
| 501 |
+
try:
|
| 502 |
+
print("[LLM] Attempt 1: provider=cerebras, max_tokens=32768", flush=True)
|
| 503 |
+
completion = _invoke_chat(messages, "cerebras", max_tokens=32768, temp=0.3, top_p=0.95)
|
| 504 |
+
except Exception as exc1:
|
| 505 |
+
print(f"[LLM] Attempt 1 failed: {str(exc1)[:200]}", flush=True)
|
| 506 |
+
try:
|
| 507 |
+
prompt2 = _build_research_prompt(
|
| 508 |
+
summary=summary or "",
|
| 509 |
+
queries=[q for q in queries if q.strip()],
|
| 510 |
+
url_list=list(pages.keys())[:30],
|
| 511 |
+
pages_map={key: pages[key] for key in list(pages.keys())[:30]},
|
| 512 |
+
)
|
| 513 |
+
messages = [
|
| 514 |
+
system_message,
|
| 515 |
+
date_message,
|
| 516 |
+
{"role": "user", "content": prompt2},
|
| 517 |
+
]
|
| 518 |
+
print("[LLM] Attempt 2: provider=cerebras (trimmed), max_tokens=16384", flush=True)
|
| 519 |
+
completion = _invoke_chat(messages, "cerebras", max_tokens=16384, temp=0.7, top_p=0.95)
|
| 520 |
+
except Exception as exc2:
|
| 521 |
+
print(f"[LLM] Attempt 2 failed: {str(exc2)[:200]}", flush=True)
|
| 522 |
+
try:
|
| 523 |
+
print("[LLM] Attempt 3: provider=auto, max_tokens=8192", flush=True)
|
| 524 |
+
completion = _invoke_chat(messages, "auto", max_tokens=8192, temp=0.7, top_p=0.95)
|
| 525 |
+
except Exception as exc3:
|
| 526 |
+
_log_call_end("Deep_Research", f"error={_truncate_for_log(str(exc3), 260)}")
|
| 527 |
+
raise gr.Error(f"Researcher model call failed: {exc3}")
|
| 528 |
+
raw = completion.choices[0].message.content or ""
|
| 529 |
+
try:
|
| 530 |
+
no_think = re.sub(r"<think>[\s\S]*?<\\/think>", "", raw, flags=re.IGNORECASE)
|
| 531 |
+
no_think = re.sub(r"<\\/?think>", "", no_think, flags=re.IGNORECASE)
|
| 532 |
+
except Exception:
|
| 533 |
+
no_think = raw
|
| 534 |
+
try:
|
| 535 |
+
paragraphs = [p for p in re.split(r"\n\s*\n", no_think) if p.strip()]
|
| 536 |
+
keep: List[str] = []
|
| 537 |
+
removed = 0
|
| 538 |
+
planning_re = re.compile(r"\b(let me|now i(?:'ll| will)?|first,|i will now|i will|i'll|let's|now let me|i need to|now i'll|now i will)\b", re.IGNORECASE)
|
| 539 |
+
for paragraph in paragraphs:
|
| 540 |
+
if planning_re.search(paragraph):
|
| 541 |
+
removed += 1
|
| 542 |
+
continue
|
| 543 |
+
keep.append(paragraph)
|
| 544 |
+
report = "\n\n".join(keep).strip()
|
| 545 |
+
if not report:
|
| 546 |
+
report = no_think.strip()
|
| 547 |
+
except Exception:
|
| 548 |
+
report = no_think
|
| 549 |
+
removed = 0
|
| 550 |
+
report = re.sub(r"\n\s*\n\s*\n+", "\n\n", report)
|
| 551 |
+
try:
|
| 552 |
+
print(f"[POSTPROCESS] removed_planning_paragraphs={removed}, raw_chars={len(raw)}, final_chars={len(report)}", flush=True)
|
| 553 |
+
except Exception:
|
| 554 |
+
pass
|
| 555 |
+
links_text = "\n".join([f"[{i+1}] {url}" for i, url in enumerate(pages.keys())])
|
| 556 |
+
if links_text:
|
| 557 |
+
sources_section = "\n\n## Sources\n" + "\n".join([f"[{i+1}] {url}" for i, url in enumerate(pages.keys())])
|
| 558 |
+
report = report.rstrip() + sources_section
|
| 559 |
+
file_path = _write_report_tmp(report)
|
| 560 |
+
elapsed = time.time() - start_ts
|
| 561 |
+
print(f"[TIMING] Deep_Research elapsed: {elapsed:.2f}s", flush=True)
|
| 562 |
+
_log_call_end("Deep_Research", f"urls={len(pages)} file={os.path.basename(file_path)} duration={elapsed:.2f}s")
|
| 563 |
+
return report, links_text, file_path
|
| 564 |
+
|
| 565 |
+
|
| 566 |
+
def build_interface() -> gr.Interface:
|
| 567 |
+
return gr.Interface(
|
| 568 |
+
fn=Deep_Research,
|
| 569 |
+
inputs=[
|
| 570 |
+
gr.Textbox(label="Summarization of research topic", lines=3, placeholder="Briefly summarize the research topic or user question", info="Summarization of research topic (one or more sentences)"),
|
| 571 |
+
gr.Textbox(label="DDG Search Query 1", max_lines=1, info="DDG Search Query 1"),
|
| 572 |
+
gr.Slider(1, 50, value=10, step=1, label="Max results (Q1)", info="Max results for Query 1 (1-50)"),
|
| 573 |
+
gr.Textbox(label="DDG Search Query 2", value="", max_lines=1, info="DDG Search Query 2"),
|
| 574 |
+
gr.Slider(1, 50, value=10, step=1, label="Max results (Q2)", info="Max results for Query 2 (1-50)"),
|
| 575 |
+
gr.Textbox(label="DDG Search Query 3", value="", max_lines=1, info="DDG Search Query 3"),
|
| 576 |
+
gr.Slider(1, 50, value=10, step=1, label="Max results (Q3)", info="Max results for Query 3 (1-50)"),
|
| 577 |
+
gr.Textbox(label="DDG Search Query 4", value="", max_lines=1, info="DDG Search Query 4"),
|
| 578 |
+
gr.Slider(1, 50, value=10, step=1, label="Max results (Q4)", info="Max results for Query 4 (1-50)"),
|
| 579 |
+
gr.Textbox(label="DDG Search Query 5", value="", max_lines=1, info="DDG Search Query 5"),
|
| 580 |
+
gr.Slider(1, 50, value=10, step=1, label="Max results (Q5)", info="Max results for Query 5 (1-50)"),
|
| 581 |
+
],
|
| 582 |
+
outputs=[
|
| 583 |
+
gr.Markdown(label="Research Report"),
|
| 584 |
+
gr.Textbox(label="Fetched Links", lines=8),
|
| 585 |
+
gr.File(label="Download Research Report", file_count="single"),
|
| 586 |
+
],
|
| 587 |
+
title="Deep Research",
|
| 588 |
+
description=(
|
| 589 |
+
"<div style=\"text-align:center\">Generate a research report based on dozens of sources. Default model is GLM-4.7</div>"
|
| 590 |
+
),
|
| 591 |
+
api_description=TOOL_SUMMARY,
|
| 592 |
+
flagging_mode="never",
|
| 593 |
+
)
|
| 594 |
+
|
| 595 |
+
|
| 596 |
+
__all__ = ["Deep_Research", "build_interface"]
|
Modules/Generate_Image.py
CHANGED
|
@@ -1,132 +1,132 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
import os
|
| 4 |
-
import uuid
|
| 5 |
-
import random
|
| 6 |
-
from typing import Annotated
|
| 7 |
-
|
| 8 |
-
import gradio as gr
|
| 9 |
-
from PIL import Image
|
| 10 |
-
from huggingface_hub import InferenceClient
|
| 11 |
-
from ._core import ROOT_DIR, get_hf_token, DEFAULT_PROVIDERS, handle_hf_error
|
| 12 |
-
|
| 13 |
-
from app import _log_call_end, _log_call_start, _truncate_for_log
|
| 14 |
-
from ._docstrings import autodoc
|
| 15 |
-
|
| 16 |
-
HF_API_TOKEN = get_hf_token()
|
| 17 |
-
|
| 18 |
-
# Single source of truth for the LLM-facing tool description
|
| 19 |
-
TOOL_SUMMARY = (
|
| 20 |
-
"Generate an image from a text prompt via Hugging Face serverless inference; "
|
| 21 |
-
"tunable model/steps/guidance/size, supports negative prompt and seed; returns a PIL.Image. "
|
| 22 |
-
"Return the generated media to the user in this format ``."
|
| 23 |
-
)
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
@autodoc(
|
| 27 |
-
summary=TOOL_SUMMARY,
|
| 28 |
-
)
|
| 29 |
-
def Generate_Image(
|
| 30 |
-
prompt: Annotated[str, "Text description of the image to generate."],
|
| 31 |
-
model_id: Annotated[str, "Hugging Face model id in the form 'creator/model-name' (e.g., Tongyi-MAI/Z-Image-Turbo)."] = "Tongyi-MAI/Z-Image-Turbo",
|
| 32 |
-
negative_prompt: Annotated[str, "What should NOT appear in the image."] = (
|
| 33 |
-
"(deformed, distorted, disfigured), poorly drawn, bad anatomy, wrong anatomy, extra limb, "
|
| 34 |
-
"missing limb, floating limbs, (mutated hands and fingers), disconnected limbs, mutation, "
|
| 35 |
-
"mutated, ugly, disgusting, blurry, amputation, misspellings, typos"
|
| 36 |
-
),
|
| 37 |
-
steps: Annotated[int, "Number of denoising steps (1–100). Higher = slower, potentially higher quality."] = 35,
|
| 38 |
-
cfg_scale: Annotated[float, "Classifier-free guidance scale (1–20). Higher = follow the prompt more closely."] = 7.0,
|
| 39 |
-
seed: Annotated[int, "Random seed for reproducibility. Use -1 for a random seed per call."] = -1,
|
| 40 |
-
width: Annotated[int, "Output width in pixels (64–1216, multiple of 32 recommended)."] = 1024,
|
| 41 |
-
height: Annotated[int, "Output height in pixels (64–1216, multiple of 32 recommended)."] = 1024,
|
| 42 |
-
sampler: Annotated[str, "Sampling method label (UI only). Common options: 'DPM++ 2M Karras', 'DPM++ SDE Karras', 'Euler', 'Euler a', 'Heun', 'DDIM'."] = "DPM++ 2M Karras",
|
| 43 |
-
) -> str:
|
| 44 |
-
_log_call_start(
|
| 45 |
-
"Generate_Image",
|
| 46 |
-
prompt=_truncate_for_log(prompt, 200),
|
| 47 |
-
model_id=model_id,
|
| 48 |
-
steps=steps,
|
| 49 |
-
cfg_scale=cfg_scale,
|
| 50 |
-
seed=seed,
|
| 51 |
-
size=f"{width}x{height}",
|
| 52 |
-
)
|
| 53 |
-
if not prompt or not prompt.strip():
|
| 54 |
-
_log_call_end("Generate_Image", "error=empty prompt")
|
| 55 |
-
raise gr.Error("Please provide a non-empty prompt.")
|
| 56 |
-
enhanced_prompt = f"{prompt} | ultra detail, ultra elaboration, ultra quality, perfect."
|
| 57 |
-
last_error: Exception | None = None
|
| 58 |
-
for provider in DEFAULT_PROVIDERS:
|
| 59 |
-
try:
|
| 60 |
-
client = InferenceClient(api_key=HF_API_TOKEN, provider=provider)
|
| 61 |
-
image = client.text_to_image(
|
| 62 |
-
prompt=enhanced_prompt,
|
| 63 |
-
negative_prompt=negative_prompt,
|
| 64 |
-
model=model_id,
|
| 65 |
-
width=width,
|
| 66 |
-
height=height,
|
| 67 |
-
num_inference_steps=steps,
|
| 68 |
-
guidance_scale=cfg_scale,
|
| 69 |
-
seed=seed if seed != -1 else random.randint(1, 1_000_000_000),
|
| 70 |
-
)
|
| 71 |
-
|
| 72 |
-
filename = f"image_{uuid.uuid4().hex[:8]}.png"
|
| 73 |
-
output_path = os.path.join(ROOT_DIR, filename)
|
| 74 |
-
image.save(output_path)
|
| 75 |
-
|
| 76 |
-
_log_call_end("Generate_Image", f"provider={provider} size={image.size} saved_to={filename}")
|
| 77 |
-
return output_path
|
| 78 |
-
except Exception as exc:
|
| 79 |
-
last_error = exc
|
| 80 |
-
continue
|
| 81 |
-
|
| 82 |
-
msg = str(last_error) if last_error else "Unknown error"
|
| 83 |
-
_log_call_end("Generate_Image", f"error={_truncate_for_log(msg, 200)}")
|
| 84 |
-
handle_hf_error(msg, model_id, context="Image generation")
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
def build_interface() -> gr.Interface:
|
| 88 |
-
return gr.Interface(
|
| 89 |
-
fn=Generate_Image,
|
| 90 |
-
inputs=[
|
| 91 |
-
gr.Textbox(label="Prompt", placeholder="Enter a prompt", lines=2, info="Text description of the image to generate"),
|
| 92 |
-
gr.Textbox(
|
| 93 |
-
label="Model",
|
| 94 |
-
value="Tongyi-MAI/Z-Image-Turbo",
|
| 95 |
-
placeholder="creator/model-name",
|
| 96 |
-
max_lines=1,
|
| 97 |
-
info="<a href=\"https://huggingface.co/models?pipeline_tag=text-to-image&inference_provider=nebius,cerebras,novita,fireworks-ai,together,fal-ai,groq,featherless-ai,nscale,hyperbolic,sambanova,cohere,replicate,scaleway,publicai,hf-inference&sort=trending\" target=\"_blank\" rel=\"noopener noreferrer\">Browse models</a>",
|
| 98 |
-
),
|
| 99 |
-
gr.Textbox(
|
| 100 |
-
label="Negative Prompt",
|
| 101 |
-
value=(
|
| 102 |
-
"(deformed, distorted, disfigured), poorly drawn, bad anatomy, wrong anatomy, extra limb, "
|
| 103 |
-
"missing limb, floating limbs, (mutated hands and fingers), disconnected limbs, mutation, "
|
| 104 |
-
"mutated, ugly, disgusting, blurry, amputation, misspellings, typos"
|
| 105 |
-
),
|
| 106 |
-
lines=2,
|
| 107 |
-
info="What should NOT appear in the image",
|
| 108 |
-
),
|
| 109 |
-
gr.Slider(minimum=1, maximum=100, value=35, step=1, label="Steps", info="Number of denoising steps (1–100)"),
|
| 110 |
-
gr.Slider(minimum=1.0, maximum=20.0, value=7.0, step=0.1, label="CFG Scale", info="Classifier-free guidance scale (1–20)"),
|
| 111 |
-
gr.Slider(minimum=-1, maximum=1_000_000_000, value=-1, step=1, label="Seed (-1 = random)", info="Random seed for reproducibility"),
|
| 112 |
-
gr.Slider(minimum=64, maximum=1216, value=1024, step=32, label="Width", info="Output width in pixels"),
|
| 113 |
-
gr.Slider(minimum=64, maximum=1216, value=1024, step=32, label="Height", info="Output height in pixels"),
|
| 114 |
-
gr.Radio(
|
| 115 |
-
label="Sampler",
|
| 116 |
-
value="DPM++ 2M Karras",
|
| 117 |
-
choices=["DPM++ 2M Karras", "DPM++ SDE Karras", "Euler", "Euler a", "Heun", "DDIM"],
|
| 118 |
-
info="Sampling method",
|
| 119 |
-
),
|
| 120 |
-
],
|
| 121 |
-
outputs=gr.Image(label="Generated Image"),
|
| 122 |
-
title="Generate Image",
|
| 123 |
-
description=(
|
| 124 |
-
"<div style=\"text-align:center\">Generate images via Hugging Face serverless inference. "
|
| 125 |
-
"Default model is Z-Image-Turbo.</div>"
|
| 126 |
-
),
|
| 127 |
-
api_description=TOOL_SUMMARY,
|
| 128 |
-
flagging_mode="never",
|
| 129 |
-
)
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
__all__ = ["Generate_Image", "build_interface"]
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import uuid
|
| 5 |
+
import random
|
| 6 |
+
from typing import Annotated
|
| 7 |
+
|
| 8 |
+
import gradio as gr
|
| 9 |
+
from PIL import Image
|
| 10 |
+
from huggingface_hub import InferenceClient
|
| 11 |
+
from ._core import ROOT_DIR, get_hf_token, DEFAULT_PROVIDERS, handle_hf_error
|
| 12 |
+
|
| 13 |
+
from app import _log_call_end, _log_call_start, _truncate_for_log
|
| 14 |
+
from ._docstrings import autodoc
|
| 15 |
+
|
| 16 |
+
HF_API_TOKEN = get_hf_token()
|
| 17 |
+
|
| 18 |
+
# Single source of truth for the LLM-facing tool description
|
| 19 |
+
TOOL_SUMMARY = (
|
| 20 |
+
"Generate an image from a text prompt via Hugging Face serverless inference; "
|
| 21 |
+
"tunable model/steps/guidance/size, supports negative prompt and seed; returns a PIL.Image. "
|
| 22 |
+
"Return the generated media to the user in this format ``."
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@autodoc(
|
| 27 |
+
summary=TOOL_SUMMARY,
|
| 28 |
+
)
|
| 29 |
+
def Generate_Image(
|
| 30 |
+
prompt: Annotated[str, "Text description of the image to generate."],
|
| 31 |
+
model_id: Annotated[str, "Hugging Face model id in the form 'creator/model-name' (e.g., Tongyi-MAI/Z-Image-Turbo)."] = "Tongyi-MAI/Z-Image-Turbo",
|
| 32 |
+
negative_prompt: Annotated[str, "What should NOT appear in the image."] = (
|
| 33 |
+
"(deformed, distorted, disfigured), poorly drawn, bad anatomy, wrong anatomy, extra limb, "
|
| 34 |
+
"missing limb, floating limbs, (mutated hands and fingers), disconnected limbs, mutation, "
|
| 35 |
+
"mutated, ugly, disgusting, blurry, amputation, misspellings, typos"
|
| 36 |
+
),
|
| 37 |
+
steps: Annotated[int, "Number of denoising steps (1–100). Higher = slower, potentially higher quality."] = 35,
|
| 38 |
+
cfg_scale: Annotated[float, "Classifier-free guidance scale (1–20). Higher = follow the prompt more closely."] = 7.0,
|
| 39 |
+
seed: Annotated[int, "Random seed for reproducibility. Use -1 for a random seed per call."] = -1,
|
| 40 |
+
width: Annotated[int, "Output width in pixels (64–1216, multiple of 32 recommended)."] = 1024,
|
| 41 |
+
height: Annotated[int, "Output height in pixels (64–1216, multiple of 32 recommended)."] = 1024,
|
| 42 |
+
sampler: Annotated[str, "Sampling method label (UI only). Common options: 'DPM++ 2M Karras', 'DPM++ SDE Karras', 'Euler', 'Euler a', 'Heun', 'DDIM'."] = "DPM++ 2M Karras",
|
| 43 |
+
) -> str:
|
| 44 |
+
_log_call_start(
|
| 45 |
+
"Generate_Image",
|
| 46 |
+
prompt=_truncate_for_log(prompt, 200),
|
| 47 |
+
model_id=model_id,
|
| 48 |
+
steps=steps,
|
| 49 |
+
cfg_scale=cfg_scale,
|
| 50 |
+
seed=seed,
|
| 51 |
+
size=f"{width}x{height}",
|
| 52 |
+
)
|
| 53 |
+
if not prompt or not prompt.strip():
|
| 54 |
+
_log_call_end("Generate_Image", "error=empty prompt")
|
| 55 |
+
raise gr.Error("Please provide a non-empty prompt.")
|
| 56 |
+
enhanced_prompt = f"{prompt} | ultra detail, ultra elaboration, ultra quality, perfect."
|
| 57 |
+
last_error: Exception | None = None
|
| 58 |
+
for provider in DEFAULT_PROVIDERS:
|
| 59 |
+
try:
|
| 60 |
+
client = InferenceClient(api_key=HF_API_TOKEN, provider=provider)
|
| 61 |
+
image = client.text_to_image(
|
| 62 |
+
prompt=enhanced_prompt,
|
| 63 |
+
negative_prompt=negative_prompt,
|
| 64 |
+
model=model_id,
|
| 65 |
+
width=width,
|
| 66 |
+
height=height,
|
| 67 |
+
num_inference_steps=steps,
|
| 68 |
+
guidance_scale=cfg_scale,
|
| 69 |
+
seed=seed if seed != -1 else random.randint(1, 1_000_000_000),
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
filename = f"image_{uuid.uuid4().hex[:8]}.png"
|
| 73 |
+
output_path = os.path.join(ROOT_DIR, filename)
|
| 74 |
+
image.save(output_path)
|
| 75 |
+
|
| 76 |
+
_log_call_end("Generate_Image", f"provider={provider} size={image.size} saved_to={filename}")
|
| 77 |
+
return output_path
|
| 78 |
+
except Exception as exc:
|
| 79 |
+
last_error = exc
|
| 80 |
+
continue
|
| 81 |
+
|
| 82 |
+
msg = str(last_error) if last_error else "Unknown error"
|
| 83 |
+
_log_call_end("Generate_Image", f"error={_truncate_for_log(msg, 200)}")
|
| 84 |
+
handle_hf_error(msg, model_id, context="Image generation")
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def build_interface() -> gr.Interface:
|
| 88 |
+
return gr.Interface(
|
| 89 |
+
fn=Generate_Image,
|
| 90 |
+
inputs=[
|
| 91 |
+
gr.Textbox(label="Prompt", placeholder="Enter a prompt", lines=2, info="Text description of the image to generate"),
|
| 92 |
+
gr.Textbox(
|
| 93 |
+
label="Model",
|
| 94 |
+
value="Tongyi-MAI/Z-Image-Turbo",
|
| 95 |
+
placeholder="creator/model-name",
|
| 96 |
+
max_lines=1,
|
| 97 |
+
info="<a href=\"https://huggingface.co/models?pipeline_tag=text-to-image&inference_provider=nebius,cerebras,novita,fireworks-ai,together,fal-ai,groq,featherless-ai,nscale,hyperbolic,sambanova,cohere,replicate,scaleway,publicai,hf-inference&sort=trending\" target=\"_blank\" rel=\"noopener noreferrer\">Browse models</a>",
|
| 98 |
+
),
|
| 99 |
+
gr.Textbox(
|
| 100 |
+
label="Negative Prompt",
|
| 101 |
+
value=(
|
| 102 |
+
"(deformed, distorted, disfigured), poorly drawn, bad anatomy, wrong anatomy, extra limb, "
|
| 103 |
+
"missing limb, floating limbs, (mutated hands and fingers), disconnected limbs, mutation, "
|
| 104 |
+
"mutated, ugly, disgusting, blurry, amputation, misspellings, typos"
|
| 105 |
+
),
|
| 106 |
+
lines=2,
|
| 107 |
+
info="What should NOT appear in the image",
|
| 108 |
+
),
|
| 109 |
+
gr.Slider(minimum=1, maximum=100, value=35, step=1, label="Steps", info="Number of denoising steps (1–100)"),
|
| 110 |
+
gr.Slider(minimum=1.0, maximum=20.0, value=7.0, step=0.1, label="CFG Scale", info="Classifier-free guidance scale (1–20)"),
|
| 111 |
+
gr.Slider(minimum=-1, maximum=1_000_000_000, value=-1, step=1, label="Seed (-1 = random)", info="Random seed for reproducibility"),
|
| 112 |
+
gr.Slider(minimum=64, maximum=1216, value=1024, step=32, label="Width", info="Output width in pixels"),
|
| 113 |
+
gr.Slider(minimum=64, maximum=1216, value=1024, step=32, label="Height", info="Output height in pixels"),
|
| 114 |
+
gr.Radio(
|
| 115 |
+
label="Sampler",
|
| 116 |
+
value="DPM++ 2M Karras",
|
| 117 |
+
choices=["DPM++ 2M Karras", "DPM++ SDE Karras", "Euler", "Euler a", "Heun", "DDIM"],
|
| 118 |
+
info="Sampling method",
|
| 119 |
+
),
|
| 120 |
+
],
|
| 121 |
+
outputs=gr.Image(label="Generated Image"),
|
| 122 |
+
title="Generate Image",
|
| 123 |
+
description=(
|
| 124 |
+
"<div style=\"text-align:center\">Generate images via Hugging Face serverless inference. "
|
| 125 |
+
"Default model is Z-Image-Turbo.</div>"
|
| 126 |
+
),
|
| 127 |
+
api_description=TOOL_SUMMARY,
|
| 128 |
+
flagging_mode="never",
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
__all__ = ["Generate_Image", "build_interface"]
|
Modules/Memory_Manager.py
CHANGED
|
@@ -1,253 +1,316 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
-
import json
|
| 4 |
import os
|
| 5 |
-
import
|
| 6 |
-
import
|
| 7 |
-
from datetime import datetime
|
| 8 |
-
from typing import Annotated, Dict, List, Literal, Optional
|
| 9 |
|
| 10 |
import gradio as gr
|
| 11 |
from ._docstrings import autodoc
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
|
|
|
| 17 |
|
|
|
|
|
|
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
|
| 22 |
|
| 23 |
-
def
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
with open(MEMORY_FILE, "r", encoding="utf-8") as file:
|
| 28 |
-
data = json.load(file)
|
| 29 |
-
if isinstance(data, list):
|
| 30 |
-
cleaned: List[Dict[str, str]] = []
|
| 31 |
-
for item in data:
|
| 32 |
-
if isinstance(item, dict) and "id" in item and "text" in item:
|
| 33 |
-
cleaned.append(item)
|
| 34 |
-
return cleaned
|
| 35 |
-
return []
|
| 36 |
-
except Exception:
|
| 37 |
try:
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
"
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
if not
|
| 160 |
-
return
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
TOOL_SUMMARY = (
|
| 192 |
-
"Manage
|
| 193 |
-
"
|
|
|
|
| 194 |
)
|
| 195 |
|
| 196 |
|
| 197 |
-
@autodoc(
|
| 198 |
-
summary=TOOL_SUMMARY,
|
| 199 |
-
)
|
| 200 |
def Memory_Manager(
|
| 201 |
-
action: Annotated[Literal["save", "list", "search", "
|
| 202 |
-
text: Annotated[Optional[str], "
|
| 203 |
tags: Annotated[Optional[str], "Comma-separated tags (Save only)"] = None,
|
| 204 |
-
query: Annotated[Optional[str], "
|
| 205 |
limit: Annotated[int, "Max results (List/Search only)"] = 20,
|
| 206 |
-
|
| 207 |
-
include_tags: Annotated[bool, "Include tags (List/Search only)"] = True,
|
| 208 |
) -> str:
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
if act == "save":
|
| 215 |
-
|
|
|
|
| 216 |
return "Error: 'text' is required when action=save."
|
| 217 |
-
return
|
|
|
|
| 218 |
if act == "list":
|
| 219 |
-
return
|
|
|
|
| 220 |
if act == "search":
|
| 221 |
-
|
|
|
|
| 222 |
return "Error: 'query' is required when action=search."
|
| 223 |
-
return
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
return "Error: 'memory_id' is required when action=delete."
|
| 227 |
-
return _mem_delete(memory_id=memory_id)
|
| 228 |
-
return "Error: invalid action (use save|list|search|delete)."
|
| 229 |
|
| 230 |
|
| 231 |
def build_interface() -> gr.Interface:
|
|
|
|
|
|
|
|
|
|
| 232 |
return gr.Interface(
|
| 233 |
fn=Memory_Manager,
|
| 234 |
inputs=[
|
| 235 |
-
gr.Radio(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
gr.Textbox(label="Text", lines=3, info="Memory text (Save only)"),
|
| 237 |
gr.Textbox(label="Tags", placeholder="tag1, tag2", max_lines=1, info="Comma-separated tags (Save only)"),
|
| 238 |
-
gr.Textbox(label="Query", placeholder="
|
| 239 |
gr.Slider(1, 200, value=20, step=1, label="Limit", info="Max results (List/Search only)"),
|
| 240 |
-
gr.
|
| 241 |
-
gr.Checkbox(value=True, label="Include Tags", info="Include tags in output (List/Search only)"),
|
| 242 |
],
|
| 243 |
outputs=gr.Textbox(label="Result", lines=14),
|
| 244 |
-
title="Memory Manager",
|
| 245 |
-
description=
|
| 246 |
-
"<div style=\"text-align:center\">Lightweight local JSON memory store (no external DB). Choose an Action, fill only the relevant fields, and run.</div>"
|
| 247 |
-
),
|
| 248 |
api_description=TOOL_SUMMARY,
|
| 249 |
flagging_mode="never",
|
| 250 |
)
|
| 251 |
|
| 252 |
|
| 253 |
-
__all__ = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Memory Manager - Graphiti Knowledge Graph Interface.
|
| 3 |
+
|
| 4 |
+
Provides unified memory operations using the same Graphiti instance
|
| 5 |
+
configured for Claude Code's Graphiti MCP server.
|
| 6 |
+
|
| 7 |
+
Configuration (must match Graphiti MCP):
|
| 8 |
+
- FALKORDB_URI: redis://localhost:6379 (default)
|
| 9 |
+
- FALKORDB_DATABASE: graphiti (default)
|
| 10 |
+
- MISTRAL_API_KEY: Required for entity extraction
|
| 11 |
+
- GRAPHITI_GROUP_ID: main (default)
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
from __future__ import annotations
|
| 15 |
|
|
|
|
| 16 |
import os
|
| 17 |
+
from datetime import datetime, timezone
|
| 18 |
+
from typing import Annotated, Literal, Optional
|
|
|
|
|
|
|
| 19 |
|
| 20 |
import gradio as gr
|
| 21 |
from ._docstrings import autodoc
|
| 22 |
|
| 23 |
+
# Graphiti configuration - matches Graphiti MCP server
|
| 24 |
+
FALKORDB_URI = os.getenv("FALKORDB_URI", "redis://localhost:6379")
|
| 25 |
+
FALKORDB_DATABASE = os.getenv("FALKORDB_DATABASE", "graphiti")
|
| 26 |
+
MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY", "")
|
| 27 |
+
GRAPHITI_GROUP_ID = os.getenv("GRAPHITI_GROUP_ID", "main")
|
| 28 |
|
| 29 |
+
# Check if Graphiti is available
|
| 30 |
+
GRAPHITI_AVAILABLE = bool(MISTRAL_API_KEY)
|
| 31 |
|
| 32 |
+
# Lazy-loaded Graphiti client
|
| 33 |
+
_graphiti_client = None
|
| 34 |
|
| 35 |
|
| 36 |
+
def _get_graphiti_client():
|
| 37 |
+
"""Get or create the Graphiti client (lazy load to avoid import errors)."""
|
| 38 |
+
global _graphiti_client
|
| 39 |
+
if _graphiti_client is None and GRAPHITI_AVAILABLE:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
try:
|
| 41 |
+
from graphiti_core import Graphiti
|
| 42 |
+
from graphiti_core.llm_client import OpenAIClient
|
| 43 |
+
from graphiti_core.driver.falkordb_driver import FalkorDriver
|
| 44 |
+
|
| 45 |
+
# Create FalkorDB driver
|
| 46 |
+
driver = FalkorDriver(
|
| 47 |
+
uri=FALKORDB_URI,
|
| 48 |
+
database=FALKORDB_DATABASE,
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
# Create Mistral LLM client (OpenAI-compatible API)
|
| 52 |
+
llm_client = OpenAIClient(
|
| 53 |
+
api_key=MISTRAL_API_KEY,
|
| 54 |
+
base_url="https://api.mistral.ai/v1",
|
| 55 |
+
model="mistral-large-2411",
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
# Create Graphiti client
|
| 59 |
+
_graphiti_client = Graphiti(
|
| 60 |
+
uri=FALKORDB_URI,
|
| 61 |
+
driver=driver,
|
| 62 |
+
llm_client=llm_client,
|
| 63 |
+
)
|
| 64 |
+
except ImportError as e:
|
| 65 |
+
print(f"[Memory_Manager] Graphiti not available: {e}")
|
| 66 |
+
return None
|
| 67 |
+
except Exception as e:
|
| 68 |
+
print(f"[Memory_Manager] Failed to initialize Graphiti: {e}")
|
| 69 |
+
return None
|
| 70 |
+
return _graphiti_client
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def _format_timestamp() -> str:
|
| 74 |
+
"""Return current UTC timestamp in ISO format."""
|
| 75 |
+
return datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
# ============================================================================
|
| 79 |
+
# Graphiti Memory Operations
|
| 80 |
+
# ============================================================================
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def _graphiti_save(text: str, tags: str) -> str:
|
| 84 |
+
"""Save memory to Graphiti knowledge graph."""
|
| 85 |
+
if not GRAPHITI_AVAILABLE:
|
| 86 |
+
return "Error: MISTRAL_API_KEY not set. Cannot save to Graphiti."
|
| 87 |
+
|
| 88 |
+
client = _get_graphiti_client()
|
| 89 |
+
if not client:
|
| 90 |
+
return "Error: Failed to initialize Graphiti client."
|
| 91 |
+
|
| 92 |
+
try:
|
| 93 |
+
# Build episode body with tags
|
| 94 |
+
episode_body = text.strip()
|
| 95 |
+
if tags and tags.strip():
|
| 96 |
+
episode_body = f"{text.strip()}\n\nTags: {tags.strip()}"
|
| 97 |
+
|
| 98 |
+
# Add episode to Graphiti
|
| 99 |
+
import asyncio
|
| 100 |
+
|
| 101 |
+
async def _save():
|
| 102 |
+
return await client.add_episode(
|
| 103 |
+
name=f"Memory {_format_timestamp()}",
|
| 104 |
+
episode_body=episode_body,
|
| 105 |
+
source_description="Memory_Manager tool",
|
| 106 |
+
group_id=GRAPHITI_GROUP_ID,
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
result = asyncio.run(_save())
|
| 110 |
+
return f"Memory saved to Graphiti knowledge graph (group: {GRAPHITI_GROUP_ID})"
|
| 111 |
+
except Exception as e:
|
| 112 |
+
return f"Error saving to Graphiti: {e}"
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def _graphiti_list(limit: int, include_tags: bool) -> str:
|
| 116 |
+
"""List recent memories from Graphiti."""
|
| 117 |
+
if not GRAPHITI_AVAILABLE:
|
| 118 |
+
return "Error: MISTRAL_API_KEY not set. Cannot access Graphiti."
|
| 119 |
+
|
| 120 |
+
client = _get_graphiti_client()
|
| 121 |
+
if not client:
|
| 122 |
+
return "Error: Failed to initialize Graphiti client."
|
| 123 |
+
|
| 124 |
+
try:
|
| 125 |
+
import asyncio
|
| 126 |
+
|
| 127 |
+
async def _list():
|
| 128 |
+
# Get episodes from Graphiti
|
| 129 |
+
return await client.get_episodes(
|
| 130 |
+
group_ids=[GRAPHITI_GROUP_ID],
|
| 131 |
+
limit=limit,
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
episodes = asyncio.run(_list())
|
| 135 |
+
|
| 136 |
+
if not episodes:
|
| 137 |
+
return f"No memories found in Graphiti (group: {GRAPHITI_GROUP_ID})"
|
| 138 |
+
|
| 139 |
+
lines = [f"Graphiti Memories (group: {GRAPHITI_GROUP_ID})", "-" * 50]
|
| 140 |
+
for ep in episodes:
|
| 141 |
+
name = ep.name if hasattr(ep, "name") else "?"
|
| 142 |
+
created = ep.created_at if hasattr(ep, "created_at") else "?"
|
| 143 |
+
content = ep.content if hasattr(ep, "content") else str(ep)
|
| 144 |
+
|
| 145 |
+
# Extract tags from content if present
|
| 146 |
+
tags_str = ""
|
| 147 |
+
if include_tags and "Tags:" in content:
|
| 148 |
+
parts = content.split("Tags:")
|
| 149 |
+
if len(parts) > 1:
|
| 150 |
+
tags_str = f" | tags: {parts[1].strip()}"
|
| 151 |
+
content = parts[0].strip()
|
| 152 |
+
|
| 153 |
+
lines.append(f"[{created}] {content[:100]}{'...' if len(content) > 100 else ''}{tags_str}")
|
| 154 |
+
|
| 155 |
+
return "\n".join(lines)
|
| 156 |
+
except Exception as e:
|
| 157 |
+
return f"Error listing from Graphiti: {e}"
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
def _graphiti_search(query: str, limit: int) -> str:
|
| 161 |
+
"""Search memories in Graphiti knowledge graph."""
|
| 162 |
+
if not GRAPHITI_AVAILABLE:
|
| 163 |
+
return "Error: MISTRAL_API_KEY not set. Cannot search Graphiti."
|
| 164 |
+
|
| 165 |
+
client = _get_graphiti_client()
|
| 166 |
+
if not client:
|
| 167 |
+
return "Error: Failed to initialize Graphiti client."
|
| 168 |
+
|
| 169 |
+
try:
|
| 170 |
+
import asyncio
|
| 171 |
+
|
| 172 |
+
async def _search():
|
| 173 |
+
# Use Graphiti's hybrid search
|
| 174 |
+
return await client.search(
|
| 175 |
+
query=query,
|
| 176 |
+
group_ids=[GRAPHITI_GROUP_ID],
|
| 177 |
+
num_results=limit,
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
results = asyncio.run(_search())
|
| 181 |
+
|
| 182 |
+
if not results:
|
| 183 |
+
return f"No matches found for: {query}"
|
| 184 |
+
|
| 185 |
+
lines = [f"Graphiti Search Results for: {query}", "-" * 50]
|
| 186 |
+
|
| 187 |
+
for i, result in enumerate(results, 1):
|
| 188 |
+
if hasattr(result, "fact"):
|
| 189 |
+
# Edge/fact result
|
| 190 |
+
source = getattr(result, "source_node", "?")
|
| 191 |
+
target = getattr(result, "target_node", "?")
|
| 192 |
+
fact = result.fact
|
| 193 |
+
lines.append(f"{i}. {source} -> {target}: {fact}")
|
| 194 |
+
elif hasattr(result, "name"):
|
| 195 |
+
# Node result
|
| 196 |
+
name = result.name
|
| 197 |
+
summary = getattr(result, "summary", "")
|
| 198 |
+
lines.append(f"{i}. [{name}] {summary[:150]}{'...' if len(summary) > 150 else ''}")
|
| 199 |
+
else:
|
| 200 |
+
lines.append(f"{i}. {str(result)[:150]}")
|
| 201 |
+
|
| 202 |
+
return "\n".join(lines)
|
| 203 |
+
except Exception as e:
|
| 204 |
+
return f"Error searching Graphiti: {e}"
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
def _graphiti_delete(memory_id: str) -> str:
|
| 208 |
+
"""Delete memory from Graphiti (requires episode UUID)."""
|
| 209 |
+
if not GRAPHITI_AVAILABLE:
|
| 210 |
+
return "Error: MISTRAL_API_KEY not set. Cannot access Graphiti."
|
| 211 |
+
|
| 212 |
+
# Note: Graphiti deletion requires the full episode UUID
|
| 213 |
+
# This is a simplified implementation
|
| 214 |
+
return f"Note: To delete from Graphiti, use the Graphiti MCP directly with the episode UUID. Memory deletion is not fully implemented in this interface."
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
# ============================================================================
|
| 218 |
+
# Status Check
|
| 219 |
+
# ============================================================================
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
def _get_status() -> str:
|
| 223 |
+
"""Get Graphiti connection status."""
|
| 224 |
+
if not GRAPHITI_AVAILABLE:
|
| 225 |
+
return "Status: MISTRAL_API_KEY not configured"
|
| 226 |
+
|
| 227 |
+
client = _get_graphiti_client()
|
| 228 |
+
if client:
|
| 229 |
+
return f"Status: Connected to Graphiti\nDatabase: {FALKORDB_DATABASE}\nGroup: {GRAPHITI_GROUP_ID}"
|
| 230 |
+
return "Status: Failed to initialize Graphiti client"
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
# ============================================================================
|
| 234 |
+
# Main Tool Function
|
| 235 |
+
# ============================================================================
|
| 236 |
+
|
| 237 |
+
|
| 238 |
TOOL_SUMMARY = (
|
| 239 |
+
"Manage memories in Graphiti knowledge graph (save, list, search, status). "
|
| 240 |
+
"Connects to the same Graphiti instance as the Graphiti MCP server. "
|
| 241 |
+
"Requires MISTRAL_API_KEY for entity extraction and knowledge graph operations."
|
| 242 |
)
|
| 243 |
|
| 244 |
|
| 245 |
+
@autodoc(summary=TOOL_SUMMARY)
|
|
|
|
|
|
|
| 246 |
def Memory_Manager(
|
| 247 |
+
action: Annotated[Literal["save", "list", "search", "status"], "Action: save | list | search | status"] = "list",
|
| 248 |
+
text: Annotated[Optional[str], "Memory text (Save only)"] = None,
|
| 249 |
tags: Annotated[Optional[str], "Comma-separated tags (Save only)"] = None,
|
| 250 |
+
query: Annotated[Optional[str], "Search query (Search only)"] = None,
|
| 251 |
limit: Annotated[int, "Max results (List/Search only)"] = 20,
|
| 252 |
+
include_tags: Annotated[bool, "Include tags in output"] = True,
|
|
|
|
| 253 |
) -> str:
|
| 254 |
+
"""
|
| 255 |
+
Memory Manager - Graphiti Knowledge Graph Interface.
|
| 256 |
+
|
| 257 |
+
Connects to the same Graphiti instance used by Claude Code's Graphiti MCP.
|
| 258 |
+
All memories are stored in the knowledge graph with automatic entity extraction
|
| 259 |
+
and relationship detection.
|
| 260 |
+
"""
|
| 261 |
+
act = (action or "list").lower().strip()
|
| 262 |
+
|
| 263 |
+
if act == "status":
|
| 264 |
+
return _get_status()
|
| 265 |
+
|
| 266 |
if act == "save":
|
| 267 |
+
text = (text or "").strip()
|
| 268 |
+
if not text:
|
| 269 |
return "Error: 'text' is required when action=save."
|
| 270 |
+
return _graphiti_save(text=text, tags=tags or "")
|
| 271 |
+
|
| 272 |
if act == "list":
|
| 273 |
+
return _graphiti_list(limit=max(1, min(200, limit)), include_tags=include_tags)
|
| 274 |
+
|
| 275 |
if act == "search":
|
| 276 |
+
query = (query or "").strip()
|
| 277 |
+
if not query:
|
| 278 |
return "Error: 'query' is required when action=search."
|
| 279 |
+
return _graphiti_search(query=query, limit=max(1, min(200, limit)))
|
| 280 |
+
|
| 281 |
+
return "Error: invalid action (use save|list|search|status)."
|
|
|
|
|
|
|
|
|
|
| 282 |
|
| 283 |
|
| 284 |
def build_interface() -> gr.Interface:
|
| 285 |
+
"""Build Gradio interface for Memory Manager."""
|
| 286 |
+
status_info = _get_status()
|
| 287 |
+
|
| 288 |
return gr.Interface(
|
| 289 |
fn=Memory_Manager,
|
| 290 |
inputs=[
|
| 291 |
+
gr.Radio(
|
| 292 |
+
label="Action",
|
| 293 |
+
choices=["save", "list", "search", "status"],
|
| 294 |
+
value="status",
|
| 295 |
+
info="Action to perform",
|
| 296 |
+
),
|
| 297 |
gr.Textbox(label="Text", lines=3, info="Memory text (Save only)"),
|
| 298 |
gr.Textbox(label="Tags", placeholder="tag1, tag2", max_lines=1, info="Comma-separated tags (Save only)"),
|
| 299 |
+
gr.Textbox(label="Query", placeholder="search terms...", max_lines=1, info="Search query (Search only)"),
|
| 300 |
gr.Slider(1, 200, value=20, step=1, label="Limit", info="Max results (List/Search only)"),
|
| 301 |
+
gr.Checkbox(value=True, label="Include Tags", info="Include tags in output"),
|
|
|
|
| 302 |
],
|
| 303 |
outputs=gr.Textbox(label="Result", lines=14),
|
| 304 |
+
title="Memory Manager - Graphiti",
|
| 305 |
+
description=f"<div style='text-align:center'><strong>{status_info}</strong><br/>Knowledge graph memory with entity extraction</div>",
|
|
|
|
|
|
|
| 306 |
api_description=TOOL_SUMMARY,
|
| 307 |
flagging_mode="never",
|
| 308 |
)
|
| 309 |
|
| 310 |
|
| 311 |
+
__all__ = [
|
| 312 |
+
"Memory_Manager",
|
| 313 |
+
"build_interface",
|
| 314 |
+
"GRAPHITI_AVAILABLE",
|
| 315 |
+
"GRAPHITI_GROUP_ID",
|
| 316 |
+
]
|
Modules/ScrapeGraphAI.py
ADDED
|
@@ -0,0 +1,779 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
from typing import Annotated, Any, Literal
|
| 6 |
+
|
| 7 |
+
import gradio as gr
|
| 8 |
+
|
| 9 |
+
from app import _log_call_end, _log_call_start, _truncate_for_log
|
| 10 |
+
from ._core import _resolve_path
|
| 11 |
+
from ._docstrings import autodoc
|
| 12 |
+
|
| 13 |
+
TOOL_SUMMARY = (
|
| 14 |
+
"Scrape and extract structured data from known URLs using ScrapeGraphAI with "
|
| 15 |
+
"Mistral-only models. Supports single-page extraction, bounded crawl extraction, "
|
| 16 |
+
"multi-URL extraction, rendered markdown, and image-aware extraction."
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
ACTION_CHOICES = [
|
| 20 |
+
"extract",
|
| 21 |
+
"crawl_extract",
|
| 22 |
+
"multi_extract",
|
| 23 |
+
"render_markdown",
|
| 24 |
+
"vision_extract",
|
| 25 |
+
]
|
| 26 |
+
|
| 27 |
+
RENDER_CHOICES = ["auto", "browser", "http"]
|
| 28 |
+
|
| 29 |
+
TEXT_MODEL_ENV = "SCRAPEGRAPH_TEXT_MODEL"
|
| 30 |
+
VISION_MODEL_ENV = "SCRAPEGRAPH_VISION_MODEL"
|
| 31 |
+
DEFAULT_TEXT_MODEL = "mistral-small-latest"
|
| 32 |
+
DEFAULT_VISION_MODEL = "pixtral-12b-latest"
|
| 33 |
+
|
| 34 |
+
_IMPORT_ERROR: Exception | None = None
|
| 35 |
+
|
| 36 |
+
try:
|
| 37 |
+
from langchain.chat_models import init_chat_model
|
| 38 |
+
from pydantic import BaseModel, Field, create_model
|
| 39 |
+
from scrapegraphai.graphs import SmartScraperGraph, SmartScraperMultiGraph
|
| 40 |
+
from scrapegraphai.graphs.abstract_graph import AbstractGraph
|
| 41 |
+
from scrapegraphai.graphs.base_graph import BaseGraph
|
| 42 |
+
from scrapegraphai.nodes import (
|
| 43 |
+
DescriptionNode,
|
| 44 |
+
FetchNode,
|
| 45 |
+
FetchNodeLevelK,
|
| 46 |
+
GenerateAnswerNodeKLevel,
|
| 47 |
+
GenerateAnswerOmniNode,
|
| 48 |
+
ImageToTextNode,
|
| 49 |
+
ParseNode,
|
| 50 |
+
ParseNodeDepthK,
|
| 51 |
+
RAGNode,
|
| 52 |
+
)
|
| 53 |
+
from scrapegraphai.utils.convert_to_md import convert_to_md
|
| 54 |
+
except Exception as exc: # pragma: no cover - import error path is runtime-only
|
| 55 |
+
_IMPORT_ERROR = exc
|
| 56 |
+
init_chat_model = None
|
| 57 |
+
BaseModel = None
|
| 58 |
+
Field = None
|
| 59 |
+
create_model = None
|
| 60 |
+
SmartScraperGraph = None
|
| 61 |
+
SmartScraperMultiGraph = None
|
| 62 |
+
AbstractGraph = None
|
| 63 |
+
BaseGraph = None
|
| 64 |
+
DescriptionNode = None
|
| 65 |
+
FetchNode = None
|
| 66 |
+
FetchNodeLevelK = None
|
| 67 |
+
GenerateAnswerNodeKLevel = None
|
| 68 |
+
GenerateAnswerOmniNode = None
|
| 69 |
+
ImageToTextNode = None
|
| 70 |
+
ParseNode = None
|
| 71 |
+
ParseNodeDepthK = None
|
| 72 |
+
RAGNode = None
|
| 73 |
+
convert_to_md = None
|
| 74 |
+
else:
|
| 75 |
+
class _LimitedFetchNodeLevelK(FetchNodeLevelK):
|
| 76 |
+
def __init__(self, *args, **kwargs):
|
| 77 |
+
super().__init__(*args, **kwargs)
|
| 78 |
+
self.max_pages = None if self.node_config is None else self.node_config.get("max_pages")
|
| 79 |
+
|
| 80 |
+
def obtain_content(self, documents, loader_kwargs):
|
| 81 |
+
documents = super().obtain_content(documents, loader_kwargs)
|
| 82 |
+
if self.max_pages and len(documents) > self.max_pages:
|
| 83 |
+
return documents[: self.max_pages]
|
| 84 |
+
return documents
|
| 85 |
+
|
| 86 |
+
class _BoundedDepthSearchGraph(AbstractGraph):
|
| 87 |
+
def __init__(self, prompt: str, source: str, config: dict, schema: type[BaseModel] | None = None):
|
| 88 |
+
super().__init__(prompt, config, source, schema)
|
| 89 |
+
self.input_key = "url" if source.startswith("http") else "local_dir"
|
| 90 |
+
|
| 91 |
+
def _create_graph(self):
|
| 92 |
+
fetch_node_k = _LimitedFetchNodeLevelK(
|
| 93 |
+
input="url| local_dir",
|
| 94 |
+
output=["docs"],
|
| 95 |
+
node_config={
|
| 96 |
+
"loader_kwargs": self.config.get("loader_kwargs", {}),
|
| 97 |
+
"force": self.config.get("force", False),
|
| 98 |
+
"cut": self.config.get("cut", True),
|
| 99 |
+
"browser_base": self.config.get("browser_base"),
|
| 100 |
+
"storage_state": self.config.get("storage_state"),
|
| 101 |
+
"depth": self.config.get("depth", 1),
|
| 102 |
+
"only_inside_links": self.config.get("only_inside_links", False),
|
| 103 |
+
"max_pages": self.config.get("max_pages"),
|
| 104 |
+
},
|
| 105 |
+
)
|
| 106 |
+
parse_node_k = ParseNodeDepthK(
|
| 107 |
+
input="docs",
|
| 108 |
+
output=["docs"],
|
| 109 |
+
node_config={"verbose": self.config.get("verbose", False)},
|
| 110 |
+
)
|
| 111 |
+
description_node = DescriptionNode(
|
| 112 |
+
input="docs",
|
| 113 |
+
output=["docs"],
|
| 114 |
+
node_config={
|
| 115 |
+
"llm_model": self.llm_model,
|
| 116 |
+
"verbose": self.config.get("verbose", False),
|
| 117 |
+
"cache_path": self.config.get("cache_path", False),
|
| 118 |
+
},
|
| 119 |
+
)
|
| 120 |
+
rag_node = RAGNode(
|
| 121 |
+
input="docs",
|
| 122 |
+
output=["vectorial_db"],
|
| 123 |
+
node_config={
|
| 124 |
+
"llm_model": self.llm_model,
|
| 125 |
+
"embedder_model": self.config.get("embedder_model", False),
|
| 126 |
+
"verbose": self.config.get("verbose", False),
|
| 127 |
+
},
|
| 128 |
+
)
|
| 129 |
+
generate_answer_k = GenerateAnswerNodeKLevel(
|
| 130 |
+
input="vectorial_db",
|
| 131 |
+
output=["answer"],
|
| 132 |
+
node_config={
|
| 133 |
+
"llm_model": self.llm_model,
|
| 134 |
+
"embedder_model": self.config.get("embedder_model", False),
|
| 135 |
+
"verbose": self.config.get("verbose", False),
|
| 136 |
+
"schema": self.schema,
|
| 137 |
+
},
|
| 138 |
+
)
|
| 139 |
+
return BaseGraph(
|
| 140 |
+
nodes=[fetch_node_k, parse_node_k, description_node, rag_node, generate_answer_k],
|
| 141 |
+
edges=[
|
| 142 |
+
(fetch_node_k, parse_node_k),
|
| 143 |
+
(parse_node_k, description_node),
|
| 144 |
+
(description_node, rag_node),
|
| 145 |
+
(rag_node, generate_answer_k),
|
| 146 |
+
],
|
| 147 |
+
entry_point=fetch_node_k,
|
| 148 |
+
graph_name=self.__class__.__name__,
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
def run(self):
|
| 152 |
+
inputs = {"user_prompt": self.prompt, self.input_key: self.source}
|
| 153 |
+
self.final_state, self.execution_info = self.graph.execute(inputs)
|
| 154 |
+
return self.final_state.get("answer", "No answer found.")
|
| 155 |
+
|
| 156 |
+
class _MistralOmniScraperGraph(AbstractGraph):
|
| 157 |
+
def __init__(self, prompt: str, source: str, config: dict, schema: type[BaseModel] | None = None):
|
| 158 |
+
self.max_images = config.get("max_images", 5)
|
| 159 |
+
super().__init__(prompt, config, source, schema)
|
| 160 |
+
self.input_key = "url" if source.startswith("http") else "local_dir"
|
| 161 |
+
|
| 162 |
+
def _create_graph(self):
|
| 163 |
+
vision_model = init_chat_model(
|
| 164 |
+
model=self.config.get("vision_model", DEFAULT_VISION_MODEL),
|
| 165 |
+
model_provider="mistralai",
|
| 166 |
+
api_key=self.config["llm"]["api_key"],
|
| 167 |
+
temperature=0,
|
| 168 |
+
)
|
| 169 |
+
fetch_node = FetchNode(
|
| 170 |
+
input="url | local_dir",
|
| 171 |
+
output=["doc"],
|
| 172 |
+
node_config={
|
| 173 |
+
"loader_kwargs": self.config.get("loader_kwargs", {}),
|
| 174 |
+
"storage_state": self.config.get("storage_state"),
|
| 175 |
+
"use_soup": self.config.get("use_soup", False),
|
| 176 |
+
"timeout": self.config.get("timeout", 30),
|
| 177 |
+
},
|
| 178 |
+
)
|
| 179 |
+
parse_node = ParseNode(
|
| 180 |
+
input="doc & (url | local_dir)",
|
| 181 |
+
output=["parsed_doc", "link_urls", "img_urls"],
|
| 182 |
+
node_config={
|
| 183 |
+
"chunk_size": self.model_token,
|
| 184 |
+
"parse_urls": True,
|
| 185 |
+
"llm_model": self.llm_model,
|
| 186 |
+
},
|
| 187 |
+
)
|
| 188 |
+
image_to_text_node = ImageToTextNode(
|
| 189 |
+
input="img_urls",
|
| 190 |
+
output=["img_desc"],
|
| 191 |
+
node_config={
|
| 192 |
+
"llm_model": vision_model,
|
| 193 |
+
"max_images": self.max_images,
|
| 194 |
+
},
|
| 195 |
+
)
|
| 196 |
+
generate_answer_omni_node = GenerateAnswerOmniNode(
|
| 197 |
+
input="user_prompt & (relevant_chunks | parsed_doc | doc) & img_desc",
|
| 198 |
+
output=["answer"],
|
| 199 |
+
node_config={
|
| 200 |
+
"llm_model": self.llm_model,
|
| 201 |
+
"additional_info": self.config.get("additional_info"),
|
| 202 |
+
"schema": self.schema,
|
| 203 |
+
},
|
| 204 |
+
)
|
| 205 |
+
return BaseGraph(
|
| 206 |
+
nodes=[fetch_node, parse_node, image_to_text_node, generate_answer_omni_node],
|
| 207 |
+
edges=[
|
| 208 |
+
(fetch_node, parse_node),
|
| 209 |
+
(parse_node, image_to_text_node),
|
| 210 |
+
(image_to_text_node, generate_answer_omni_node),
|
| 211 |
+
],
|
| 212 |
+
entry_point=fetch_node,
|
| 213 |
+
graph_name=self.__class__.__name__,
|
| 214 |
+
)
|
| 215 |
+
|
| 216 |
+
def run(self):
|
| 217 |
+
inputs = {"user_prompt": self.prompt, self.input_key: self.source}
|
| 218 |
+
self.final_state, self.execution_info = self.graph.execute(inputs)
|
| 219 |
+
return self.final_state.get("answer", "No answer found.")
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
class ScrapeGraphToolError(RuntimeError):
|
| 223 |
+
def __init__(self, code: str, message: str, hint: str | None = None):
|
| 224 |
+
super().__init__(message)
|
| 225 |
+
self.code = code
|
| 226 |
+
self.message = message
|
| 227 |
+
self.hint = hint
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
def _json_response(payload: dict[str, Any]) -> str:
|
| 231 |
+
return json.dumps(payload, ensure_ascii=False, indent=2, default=str)
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
def _error_response(action: str, code: str, message: str, hint: str | None = None) -> str:
|
| 235 |
+
return _json_response(
|
| 236 |
+
{
|
| 237 |
+
"action": action,
|
| 238 |
+
"error": {"code": code, "message": message, **({"hint": hint} if hint else {})},
|
| 239 |
+
}
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
def _require_scrapegraph() -> None:
|
| 244 |
+
if _IMPORT_ERROR is not None:
|
| 245 |
+
raise ScrapeGraphToolError(
|
| 246 |
+
"missing_scrapegraph_dependencies",
|
| 247 |
+
f"ScrapeGraphAI dependencies are unavailable: {_IMPORT_ERROR}",
|
| 248 |
+
"Install `scrapegraphai>=1.75.1` and its runtime dependencies.",
|
| 249 |
+
)
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
def _require_mistral_key() -> str:
|
| 253 |
+
api_key = os.getenv("MISTRAL_API_KEY", "").strip()
|
| 254 |
+
if not api_key:
|
| 255 |
+
raise ScrapeGraphToolError(
|
| 256 |
+
"missing_mistral_api_key",
|
| 257 |
+
"MISTRAL_API_KEY is not configured.",
|
| 258 |
+
"Set MISTRAL_API_KEY in the environment before using ScrapeGraphAI extraction actions.",
|
| 259 |
+
)
|
| 260 |
+
return api_key
|
| 261 |
+
|
| 262 |
+
|
| 263 |
+
def _coerce_urls(urls: Any) -> list[str]:
|
| 264 |
+
if urls is None or urls == "":
|
| 265 |
+
return []
|
| 266 |
+
if isinstance(urls, list):
|
| 267 |
+
return [str(url).strip() for url in urls if str(url).strip()]
|
| 268 |
+
if isinstance(urls, str):
|
| 269 |
+
text = urls.strip()
|
| 270 |
+
if not text:
|
| 271 |
+
return []
|
| 272 |
+
if text.startswith("["):
|
| 273 |
+
parsed = json.loads(text)
|
| 274 |
+
if not isinstance(parsed, list):
|
| 275 |
+
raise ScrapeGraphToolError("invalid_urls", "urls must be a JSON array of URL strings.")
|
| 276 |
+
return [str(url).strip() for url in parsed if str(url).strip()]
|
| 277 |
+
return [part.strip() for part in text.replace("\r", "\n").replace(",", "\n").split("\n") if part.strip()]
|
| 278 |
+
raise ScrapeGraphToolError("invalid_urls", "urls must be provided as a list or JSON array string.")
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
def _coerce_schema(schema_json: Any) -> dict[str, Any] | None:
|
| 282 |
+
if schema_json in (None, "", {}):
|
| 283 |
+
return None
|
| 284 |
+
if isinstance(schema_json, dict):
|
| 285 |
+
return schema_json
|
| 286 |
+
if isinstance(schema_json, str):
|
| 287 |
+
try:
|
| 288 |
+
parsed = json.loads(schema_json)
|
| 289 |
+
except json.JSONDecodeError as exc:
|
| 290 |
+
raise ScrapeGraphToolError("invalid_schema_json", f"schema_json is not valid JSON: {exc}") from exc
|
| 291 |
+
if not isinstance(parsed, dict):
|
| 292 |
+
raise ScrapeGraphToolError("invalid_schema_json", "schema_json must decode to a JSON object.")
|
| 293 |
+
return parsed
|
| 294 |
+
raise ScrapeGraphToolError("invalid_schema_json", "schema_json must be a JSON object or JSON string.")
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
def _schema_to_type(name: str, schema: dict[str, Any]) -> Any:
|
| 298 |
+
schema_type = schema.get("type")
|
| 299 |
+
if schema_type == "string":
|
| 300 |
+
return str
|
| 301 |
+
if schema_type == "integer":
|
| 302 |
+
return int
|
| 303 |
+
if schema_type == "number":
|
| 304 |
+
return float
|
| 305 |
+
if schema_type == "boolean":
|
| 306 |
+
return bool
|
| 307 |
+
if schema_type == "array":
|
| 308 |
+
item_schema = schema.get("items", {})
|
| 309 |
+
return list[_schema_to_type(f"{name}Item", item_schema)]
|
| 310 |
+
if schema_type == "object" or "properties" in schema:
|
| 311 |
+
properties = schema.get("properties", {})
|
| 312 |
+
required = set(schema.get("required", []))
|
| 313 |
+
fields: dict[str, tuple[Any, Any]] = {}
|
| 314 |
+
for prop_name, prop_schema in properties.items():
|
| 315 |
+
prop_type = _schema_to_type(f"{name}{prop_name.title()}", prop_schema)
|
| 316 |
+
description = prop_schema.get("description")
|
| 317 |
+
is_required = prop_name in required
|
| 318 |
+
annotation = prop_type if is_required else (prop_type | None)
|
| 319 |
+
default = Field(... if is_required else None, description=description)
|
| 320 |
+
fields[prop_name] = (annotation, default)
|
| 321 |
+
return create_model(name, **fields)
|
| 322 |
+
return Any
|
| 323 |
+
|
| 324 |
+
|
| 325 |
+
def _schema_to_model(schema: dict[str, Any] | None) -> type[BaseModel] | None:
|
| 326 |
+
if not schema:
|
| 327 |
+
return None
|
| 328 |
+
if schema.get("type") not in (None, "object") and "properties" not in schema:
|
| 329 |
+
raise ScrapeGraphToolError(
|
| 330 |
+
"invalid_schema_json",
|
| 331 |
+
"Only object-shaped JSON schemas are supported for schema_json.",
|
| 332 |
+
)
|
| 333 |
+
model_type = _schema_to_type("ScrapeGraphResult", schema)
|
| 334 |
+
if not isinstance(model_type, type) or not issubclass(model_type, BaseModel):
|
| 335 |
+
raise ScrapeGraphToolError(
|
| 336 |
+
"invalid_schema_json",
|
| 337 |
+
"schema_json must define an object with properties for structured extraction.",
|
| 338 |
+
)
|
| 339 |
+
return model_type
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
def _resolve_storage_state(storage_state_path: str | None) -> str | None:
|
| 343 |
+
if not storage_state_path:
|
| 344 |
+
return None
|
| 345 |
+
candidate = storage_state_path.strip()
|
| 346 |
+
if not candidate:
|
| 347 |
+
return None
|
| 348 |
+
if os.path.isabs(candidate):
|
| 349 |
+
resolved = candidate
|
| 350 |
+
else:
|
| 351 |
+
resolved, _ = _resolve_path(candidate)
|
| 352 |
+
if not os.path.exists(resolved):
|
| 353 |
+
raise ScrapeGraphToolError(
|
| 354 |
+
"invalid_storage_state_path",
|
| 355 |
+
f"Storage state file not found: {candidate}",
|
| 356 |
+
)
|
| 357 |
+
return resolved
|
| 358 |
+
|
| 359 |
+
|
| 360 |
+
def _build_config(
|
| 361 |
+
*,
|
| 362 |
+
api_key: str | None,
|
| 363 |
+
text_model: str | None = None,
|
| 364 |
+
render_mode: str = "auto",
|
| 365 |
+
timeout_s: int = 30,
|
| 366 |
+
storage_state_path: str | None = None,
|
| 367 |
+
depth: int | None = None,
|
| 368 |
+
max_pages: int | None = None,
|
| 369 |
+
same_domain_only: bool | None = None,
|
| 370 |
+
max_images: int | None = None,
|
| 371 |
+
vision_model: str | None = None,
|
| 372 |
+
) -> dict[str, Any]:
|
| 373 |
+
if render_mode not in RENDER_CHOICES:
|
| 374 |
+
raise ScrapeGraphToolError("invalid_render_mode", f"Unsupported render_mode: {render_mode}")
|
| 375 |
+
config: dict[str, Any] = {
|
| 376 |
+
"headless": True,
|
| 377 |
+
"verbose": False,
|
| 378 |
+
"timeout": max(5, int(timeout_s)),
|
| 379 |
+
"use_soup": render_mode == "http",
|
| 380 |
+
}
|
| 381 |
+
if api_key:
|
| 382 |
+
config["llm"] = {
|
| 383 |
+
"api_key": api_key,
|
| 384 |
+
"model": f"mistralai/{text_model or os.getenv(TEXT_MODEL_ENV, DEFAULT_TEXT_MODEL)}",
|
| 385 |
+
"temperature": 0,
|
| 386 |
+
}
|
| 387 |
+
if storage_state_path:
|
| 388 |
+
config["storage_state"] = storage_state_path
|
| 389 |
+
if depth is not None:
|
| 390 |
+
config["depth"] = max(1, int(depth))
|
| 391 |
+
if max_pages is not None:
|
| 392 |
+
config["max_pages"] = max(1, int(max_pages))
|
| 393 |
+
if same_domain_only is not None:
|
| 394 |
+
config["only_inside_links"] = bool(same_domain_only)
|
| 395 |
+
if max_images is not None:
|
| 396 |
+
config["max_images"] = max(1, int(max_images))
|
| 397 |
+
if vision_model:
|
| 398 |
+
config["vision_model"] = vision_model
|
| 399 |
+
return config
|
| 400 |
+
|
| 401 |
+
|
| 402 |
+
def _json_safe(value: Any) -> Any:
|
| 403 |
+
if BaseModel is not None and isinstance(value, BaseModel):
|
| 404 |
+
return value.model_dump(mode="json")
|
| 405 |
+
if isinstance(value, dict):
|
| 406 |
+
return {key: _json_safe(val) for key, val in value.items()}
|
| 407 |
+
if isinstance(value, list):
|
| 408 |
+
return [_json_safe(item) for item in value]
|
| 409 |
+
if isinstance(value, tuple):
|
| 410 |
+
return [_json_safe(item) for item in value]
|
| 411 |
+
if hasattr(value, "metadata") and hasattr(value, "page_content"):
|
| 412 |
+
return {
|
| 413 |
+
"page_content": getattr(value, "page_content", ""),
|
| 414 |
+
"metadata": _json_safe(getattr(value, "metadata", {})),
|
| 415 |
+
}
|
| 416 |
+
if isinstance(value, str):
|
| 417 |
+
stripped = value.strip()
|
| 418 |
+
if stripped.startswith("{") or stripped.startswith("["):
|
| 419 |
+
try:
|
| 420 |
+
return json.loads(stripped)
|
| 421 |
+
except Exception:
|
| 422 |
+
return value
|
| 423 |
+
return value
|
| 424 |
+
|
| 425 |
+
|
| 426 |
+
def _extract_sources(state: dict[str, Any], fallback: list[str] | None = None) -> list[str]:
|
| 427 |
+
sources: list[str] = []
|
| 428 |
+
for item in state.get("docs", []) or []:
|
| 429 |
+
source = item.get("source") if isinstance(item, dict) else None
|
| 430 |
+
if source and source not in sources:
|
| 431 |
+
sources.append(source)
|
| 432 |
+
for doc in state.get("doc", []) or []:
|
| 433 |
+
metadata = getattr(doc, "metadata", {}) or {}
|
| 434 |
+
source = metadata.get("source")
|
| 435 |
+
if source and source not in sources:
|
| 436 |
+
sources.append(source)
|
| 437 |
+
if not sources and fallback:
|
| 438 |
+
sources.extend([source for source in fallback if source])
|
| 439 |
+
return sources
|
| 440 |
+
|
| 441 |
+
|
| 442 |
+
def _extract_links_and_images(doc_state: dict[str, Any], url: str) -> tuple[list[str], list[str]]:
|
| 443 |
+
parse_node = ParseNode(
|
| 444 |
+
input="doc & url",
|
| 445 |
+
output=["parsed_doc", "link_urls", "img_urls"],
|
| 446 |
+
node_config={
|
| 447 |
+
"parse_urls": True,
|
| 448 |
+
"parse_html": True,
|
| 449 |
+
"chunk_size": 8192,
|
| 450 |
+
"llm_model": None,
|
| 451 |
+
},
|
| 452 |
+
)
|
| 453 |
+
docs = doc_state.get("doc")
|
| 454 |
+
if not docs:
|
| 455 |
+
docs = doc_state.get("html_content", [])
|
| 456 |
+
if not docs:
|
| 457 |
+
return [], []
|
| 458 |
+
state = {"doc": docs, "url": url}
|
| 459 |
+
parse_node.execute(state)
|
| 460 |
+
return state.get("link_urls", []) or [], state.get("img_urls", []) or []
|
| 461 |
+
|
| 462 |
+
|
| 463 |
+
def _render_markdown_with_fetch(url: str, config: dict[str, Any]) -> tuple[dict[str, Any], list[dict[str, Any]]]:
|
| 464 |
+
fetch_node = FetchNode(
|
| 465 |
+
input="url",
|
| 466 |
+
output=["doc"],
|
| 467 |
+
node_config=config,
|
| 468 |
+
)
|
| 469 |
+
state = {"url": url}
|
| 470 |
+
state = fetch_node.execute(state)
|
| 471 |
+
docs = state.get("doc", []) or []
|
| 472 |
+
if not docs:
|
| 473 |
+
raise ScrapeGraphToolError("fetch_failed", "ScrapeGraph fetch returned no documents for render_markdown.")
|
| 474 |
+
html = getattr(docs[0], "page_content", None) or ""
|
| 475 |
+
if not html.strip():
|
| 476 |
+
raise ScrapeGraphToolError("fetch_failed", "Fetched document for render_markdown had empty content.")
|
| 477 |
+
state["markdown"] = convert_to_md(html)
|
| 478 |
+
return state, []
|
| 479 |
+
|
| 480 |
+
|
| 481 |
+
@autodoc(summary=TOOL_SUMMARY)
|
| 482 |
+
def ScrapeGraphAI(
|
| 483 |
+
action: Annotated[
|
| 484 |
+
Literal["extract", "crawl_extract", "multi_extract", "render_markdown", "vision_extract"],
|
| 485 |
+
"Action to run: extract, crawl_extract, multi_extract, render_markdown, or vision_extract.",
|
| 486 |
+
] = "extract",
|
| 487 |
+
url: Annotated[str, "Single URL for extract, crawl_extract, render_markdown, or vision_extract."] = "",
|
| 488 |
+
urls: Annotated[list[str] | str | None, "Explicit list of URLs for multi_extract. Accepts a list or JSON array string."] = None,
|
| 489 |
+
prompt: Annotated[str, "Natural-language extraction prompt. Required for extraction actions."] = "",
|
| 490 |
+
schema_json: Annotated[dict[str, Any] | str | None, "Optional object-shaped JSON schema for structured extraction."] = None,
|
| 491 |
+
render_mode: Annotated[Literal["auto", "browser", "http"], "Fetch mode. `browser` uses ScrapeGraph browser loading, `http` uses requests + soup, `auto` currently follows ScrapeGraph's browser-first path."] = "auto",
|
| 492 |
+
include_images: Annotated[bool, "For `extract`, include page images in the extraction context."] = False,
|
| 493 |
+
depth: Annotated[int, "For `crawl_extract`, crawl depth from the starting URL."] = 1,
|
| 494 |
+
max_pages: Annotated[int, "For `crawl_extract`, soft cap on fetched pages."] = 4,
|
| 495 |
+
same_domain_only: Annotated[bool, "For `crawl_extract`, stay within the starting site's links only."] = True,
|
| 496 |
+
max_urls: Annotated[int, "For `multi_extract`, maximum URLs allowed in one call."] = 8,
|
| 497 |
+
max_images: Annotated[int, "For `vision_extract` and image-aware extraction, maximum images to describe."] = 5,
|
| 498 |
+
max_chars: Annotated[int, "For `render_markdown`, trim returned markdown to this many characters."] = 12000,
|
| 499 |
+
include_links: Annotated[bool, "For `render_markdown`, include discovered page links."] = True,
|
| 500 |
+
timeout_s: Annotated[int, "Timeout in seconds passed to ScrapeGraph fetch and generation nodes."] = 30,
|
| 501 |
+
storage_state_path: Annotated[str, "Optional Playwright storage state JSON path for authenticated pages."] = "",
|
| 502 |
+
return_debug: Annotated[bool, "Include execution metadata and graph execution info in the response."] = False,
|
| 503 |
+
) -> str:
|
| 504 |
+
_log_call_start(
|
| 505 |
+
"ScrapeGraphAI",
|
| 506 |
+
action=action,
|
| 507 |
+
url=url,
|
| 508 |
+
urls=urls,
|
| 509 |
+
prompt=_truncate_for_log(prompt or "", 180),
|
| 510 |
+
render_mode=render_mode,
|
| 511 |
+
include_images=include_images,
|
| 512 |
+
depth=depth,
|
| 513 |
+
max_pages=max_pages,
|
| 514 |
+
max_urls=max_urls,
|
| 515 |
+
max_images=max_images,
|
| 516 |
+
timeout_s=timeout_s,
|
| 517 |
+
storage_state_path=storage_state_path,
|
| 518 |
+
return_debug=return_debug,
|
| 519 |
+
)
|
| 520 |
+
|
| 521 |
+
try:
|
| 522 |
+
_require_scrapegraph()
|
| 523 |
+
storage_state = _resolve_storage_state(storage_state_path)
|
| 524 |
+
schema = _coerce_schema(schema_json)
|
| 525 |
+
schema_model = _schema_to_model(schema)
|
| 526 |
+
text_model_name = os.getenv(TEXT_MODEL_ENV, DEFAULT_TEXT_MODEL)
|
| 527 |
+
vision_model_name = os.getenv(VISION_MODEL_ENV, DEFAULT_VISION_MODEL)
|
| 528 |
+
|
| 529 |
+
if action == "render_markdown":
|
| 530 |
+
if not url.strip():
|
| 531 |
+
raise ScrapeGraphToolError("missing_url", "url is required for render_markdown.")
|
| 532 |
+
final_state, exec_info = _render_markdown_with_fetch(
|
| 533 |
+
url.strip(),
|
| 534 |
+
_build_config(
|
| 535 |
+
api_key=None,
|
| 536 |
+
render_mode=render_mode,
|
| 537 |
+
timeout_s=timeout_s,
|
| 538 |
+
storage_state_path=storage_state,
|
| 539 |
+
),
|
| 540 |
+
)
|
| 541 |
+
markdown = (final_state.get("markdown") or "")[: max(1000, int(max_chars))]
|
| 542 |
+
links, images = _extract_links_and_images(final_state, url.strip())
|
| 543 |
+
response = {
|
| 544 |
+
"action": action,
|
| 545 |
+
"result": {"markdown": markdown},
|
| 546 |
+
"sources": [url.strip()],
|
| 547 |
+
"artifacts": {
|
| 548 |
+
"markdown": markdown,
|
| 549 |
+
"links": links if include_links else [],
|
| 550 |
+
"images": images if include_images else [],
|
| 551 |
+
"per_url_results": [],
|
| 552 |
+
},
|
| 553 |
+
"meta": {
|
| 554 |
+
"render_mode_used": render_mode,
|
| 555 |
+
"text_model": None,
|
| 556 |
+
"vision_model": None,
|
| 557 |
+
},
|
| 558 |
+
"warnings": [],
|
| 559 |
+
}
|
| 560 |
+
if return_debug:
|
| 561 |
+
response["debug"] = {"final_state": _json_safe(final_state), "execution_info": _json_safe(exec_info)}
|
| 562 |
+
result = _json_response(response)
|
| 563 |
+
_log_call_end("ScrapeGraphAI", _truncate_for_log(result))
|
| 564 |
+
return result
|
| 565 |
+
|
| 566 |
+
api_key = _require_mistral_key()
|
| 567 |
+
if action == "extract":
|
| 568 |
+
if not url.strip() or not prompt.strip():
|
| 569 |
+
raise ScrapeGraphToolError("missing_arguments", "url and prompt are required for extract.")
|
| 570 |
+
config = _build_config(
|
| 571 |
+
api_key=api_key,
|
| 572 |
+
text_model=text_model_name,
|
| 573 |
+
render_mode=render_mode,
|
| 574 |
+
timeout_s=timeout_s,
|
| 575 |
+
storage_state_path=storage_state,
|
| 576 |
+
max_images=max_images,
|
| 577 |
+
vision_model=vision_model_name,
|
| 578 |
+
)
|
| 579 |
+
graph_cls = _MistralOmniScraperGraph if include_images else SmartScraperGraph
|
| 580 |
+
graph = graph_cls(prompt=prompt.strip(), source=url.strip(), config=config, schema=schema_model)
|
| 581 |
+
result_data = _json_safe(graph.run())
|
| 582 |
+
final_state = graph.get_state()
|
| 583 |
+
response = {
|
| 584 |
+
"action": action,
|
| 585 |
+
"result": result_data,
|
| 586 |
+
"sources": _extract_sources(final_state, [url.strip()]),
|
| 587 |
+
"artifacts": {
|
| 588 |
+
"markdown": None,
|
| 589 |
+
"links": final_state.get("link_urls", []) or [],
|
| 590 |
+
"images": final_state.get("img_urls", []) or [],
|
| 591 |
+
"per_url_results": [],
|
| 592 |
+
},
|
| 593 |
+
"meta": {
|
| 594 |
+
"render_mode_used": render_mode,
|
| 595 |
+
"text_model": text_model_name,
|
| 596 |
+
"vision_model": vision_model_name if include_images else None,
|
| 597 |
+
},
|
| 598 |
+
"warnings": [],
|
| 599 |
+
}
|
| 600 |
+
if return_debug:
|
| 601 |
+
response["debug"] = {"final_state": _json_safe(final_state), "execution_info": _json_safe(graph.get_execution_info())}
|
| 602 |
+
result = _json_response(response)
|
| 603 |
+
_log_call_end("ScrapeGraphAI", _truncate_for_log(result))
|
| 604 |
+
return result
|
| 605 |
+
|
| 606 |
+
if action == "vision_extract":
|
| 607 |
+
if not url.strip() or not prompt.strip():
|
| 608 |
+
raise ScrapeGraphToolError("missing_arguments", "url and prompt are required for vision_extract.")
|
| 609 |
+
graph = _MistralOmniScraperGraph(
|
| 610 |
+
prompt=prompt.strip(),
|
| 611 |
+
source=url.strip(),
|
| 612 |
+
config=_build_config(
|
| 613 |
+
api_key=api_key,
|
| 614 |
+
text_model=text_model_name,
|
| 615 |
+
render_mode=render_mode,
|
| 616 |
+
timeout_s=timeout_s,
|
| 617 |
+
storage_state_path=storage_state,
|
| 618 |
+
max_images=max_images,
|
| 619 |
+
vision_model=vision_model_name,
|
| 620 |
+
),
|
| 621 |
+
schema=schema_model,
|
| 622 |
+
)
|
| 623 |
+
result_data = _json_safe(graph.run())
|
| 624 |
+
final_state = graph.get_state()
|
| 625 |
+
img_urls = final_state.get("img_urls", []) or []
|
| 626 |
+
if not img_urls:
|
| 627 |
+
raise ScrapeGraphToolError("no_images_found", "No images were found on the page for vision_extract.")
|
| 628 |
+
response = {
|
| 629 |
+
"action": action,
|
| 630 |
+
"result": result_data,
|
| 631 |
+
"sources": _extract_sources(final_state, [url.strip()]),
|
| 632 |
+
"artifacts": {
|
| 633 |
+
"markdown": None,
|
| 634 |
+
"links": final_state.get("link_urls", []) or [],
|
| 635 |
+
"images": img_urls,
|
| 636 |
+
"per_url_results": [],
|
| 637 |
+
},
|
| 638 |
+
"meta": {
|
| 639 |
+
"render_mode_used": render_mode,
|
| 640 |
+
"text_model": text_model_name,
|
| 641 |
+
"vision_model": vision_model_name,
|
| 642 |
+
},
|
| 643 |
+
"warnings": [],
|
| 644 |
+
}
|
| 645 |
+
if return_debug:
|
| 646 |
+
response["debug"] = {"final_state": _json_safe(final_state), "execution_info": _json_safe(graph.get_execution_info())}
|
| 647 |
+
result = _json_response(response)
|
| 648 |
+
_log_call_end("ScrapeGraphAI", _truncate_for_log(result))
|
| 649 |
+
return result
|
| 650 |
+
|
| 651 |
+
if action == "multi_extract":
|
| 652 |
+
normalized_urls = _coerce_urls(urls)
|
| 653 |
+
if not normalized_urls or not prompt.strip():
|
| 654 |
+
raise ScrapeGraphToolError("missing_arguments", "urls and prompt are required for multi_extract.")
|
| 655 |
+
if len(normalized_urls) > max(1, int(max_urls)):
|
| 656 |
+
raise ScrapeGraphToolError("too_many_urls", f"multi_extract supports at most {max_urls} URLs per call.")
|
| 657 |
+
graph = SmartScraperMultiGraph(
|
| 658 |
+
prompt=prompt.strip(),
|
| 659 |
+
source=normalized_urls,
|
| 660 |
+
config=_build_config(
|
| 661 |
+
api_key=api_key,
|
| 662 |
+
text_model=text_model_name,
|
| 663 |
+
render_mode=render_mode,
|
| 664 |
+
timeout_s=timeout_s,
|
| 665 |
+
storage_state_path=storage_state,
|
| 666 |
+
),
|
| 667 |
+
schema=schema_model,
|
| 668 |
+
)
|
| 669 |
+
result_data = _json_safe(graph.run())
|
| 670 |
+
final_state = graph.get_state()
|
| 671 |
+
response = {
|
| 672 |
+
"action": action,
|
| 673 |
+
"result": result_data,
|
| 674 |
+
"sources": normalized_urls,
|
| 675 |
+
"artifacts": {
|
| 676 |
+
"markdown": None,
|
| 677 |
+
"links": [],
|
| 678 |
+
"images": [],
|
| 679 |
+
"per_url_results": _json_safe(final_state.get("results", [])),
|
| 680 |
+
},
|
| 681 |
+
"meta": {
|
| 682 |
+
"render_mode_used": render_mode,
|
| 683 |
+
"text_model": text_model_name,
|
| 684 |
+
"vision_model": None,
|
| 685 |
+
},
|
| 686 |
+
"warnings": [],
|
| 687 |
+
}
|
| 688 |
+
if return_debug:
|
| 689 |
+
response["debug"] = {"final_state": _json_safe(final_state), "execution_info": _json_safe(graph.get_execution_info())}
|
| 690 |
+
result = _json_response(response)
|
| 691 |
+
_log_call_end("ScrapeGraphAI", _truncate_for_log(result))
|
| 692 |
+
return result
|
| 693 |
+
|
| 694 |
+
if action == "crawl_extract":
|
| 695 |
+
if not url.strip() or not prompt.strip():
|
| 696 |
+
raise ScrapeGraphToolError("missing_arguments", "url and prompt are required for crawl_extract.")
|
| 697 |
+
graph = _BoundedDepthSearchGraph(
|
| 698 |
+
prompt=prompt.strip(),
|
| 699 |
+
source=url.strip(),
|
| 700 |
+
config=_build_config(
|
| 701 |
+
api_key=api_key,
|
| 702 |
+
text_model=text_model_name,
|
| 703 |
+
render_mode=render_mode,
|
| 704 |
+
timeout_s=timeout_s,
|
| 705 |
+
storage_state_path=storage_state,
|
| 706 |
+
depth=depth,
|
| 707 |
+
max_pages=max_pages,
|
| 708 |
+
same_domain_only=same_domain_only,
|
| 709 |
+
),
|
| 710 |
+
schema=schema_model,
|
| 711 |
+
)
|
| 712 |
+
result_data = _json_safe(graph.run())
|
| 713 |
+
final_state = graph.get_state()
|
| 714 |
+
response = {
|
| 715 |
+
"action": action,
|
| 716 |
+
"result": result_data,
|
| 717 |
+
"sources": _extract_sources(final_state, [url.strip()]),
|
| 718 |
+
"artifacts": {
|
| 719 |
+
"markdown": None,
|
| 720 |
+
"links": [],
|
| 721 |
+
"images": [],
|
| 722 |
+
"per_url_results": [],
|
| 723 |
+
},
|
| 724 |
+
"meta": {
|
| 725 |
+
"render_mode_used": render_mode,
|
| 726 |
+
"text_model": text_model_name,
|
| 727 |
+
"vision_model": None,
|
| 728 |
+
},
|
| 729 |
+
"warnings": [],
|
| 730 |
+
}
|
| 731 |
+
if return_debug:
|
| 732 |
+
response["debug"] = {"final_state": _json_safe(final_state), "execution_info": _json_safe(graph.get_execution_info())}
|
| 733 |
+
result = _json_response(response)
|
| 734 |
+
_log_call_end("ScrapeGraphAI", _truncate_for_log(result))
|
| 735 |
+
return result
|
| 736 |
+
|
| 737 |
+
raise ScrapeGraphToolError("unsupported_action", f"Unsupported action: {action}")
|
| 738 |
+
except ScrapeGraphToolError as exc:
|
| 739 |
+
result = _error_response(action, exc.code, exc.message, exc.hint)
|
| 740 |
+
_log_call_end("ScrapeGraphAI", _truncate_for_log(result))
|
| 741 |
+
return result
|
| 742 |
+
except Exception as exc: # pragma: no cover - runtime integration path
|
| 743 |
+
code = "browser_unavailable" if "playwright" in str(exc).lower() or "chromium" in str(exc).lower() else "fetch_failed"
|
| 744 |
+
result = _error_response(action, code, f"ScrapeGraphAI action failed: {exc}")
|
| 745 |
+
_log_call_end("ScrapeGraphAI", _truncate_for_log(result))
|
| 746 |
+
return result
|
| 747 |
+
|
| 748 |
+
|
| 749 |
+
def build_interface() -> gr.Interface:
|
| 750 |
+
return gr.Interface(
|
| 751 |
+
fn=ScrapeGraphAI,
|
| 752 |
+
inputs=[
|
| 753 |
+
gr.Dropdown(choices=ACTION_CHOICES, value="extract", label="Action"),
|
| 754 |
+
gr.Textbox(label="URL", placeholder="https://example.com"),
|
| 755 |
+
gr.JSON(label="URLs", value=[]),
|
| 756 |
+
gr.Textbox(label="Prompt", lines=4, placeholder="Extract pricing tiers and main limits."),
|
| 757 |
+
gr.JSON(label="Schema JSON", value={}),
|
| 758 |
+
gr.Dropdown(choices=RENDER_CHOICES, value="auto", label="Render Mode"),
|
| 759 |
+
gr.Checkbox(label="Include Images", value=False),
|
| 760 |
+
gr.Number(label="Depth", value=1, precision=0),
|
| 761 |
+
gr.Number(label="Max Pages", value=4, precision=0),
|
| 762 |
+
gr.Checkbox(label="Same Domain Only", value=True),
|
| 763 |
+
gr.Number(label="Max URLs", value=8, precision=0),
|
| 764 |
+
gr.Number(label="Max Images", value=5, precision=0),
|
| 765 |
+
gr.Number(label="Max Chars", value=12000, precision=0),
|
| 766 |
+
gr.Checkbox(label="Include Links", value=True),
|
| 767 |
+
gr.Number(label="Timeout (seconds)", value=30, precision=0),
|
| 768 |
+
gr.Textbox(label="Storage State Path", placeholder="Optional Playwright storage_state JSON path"),
|
| 769 |
+
gr.Checkbox(label="Return Debug", value=False),
|
| 770 |
+
],
|
| 771 |
+
outputs=gr.Textbox(label="Result", lines=20, max_lines=40),
|
| 772 |
+
title="ScrapeGraphAI",
|
| 773 |
+
description="<div style=\"text-align:center\">Mistral-only structured scraping using ScrapeGraphAI graphs.</div>",
|
| 774 |
+
api_description=TOOL_SUMMARY,
|
| 775 |
+
flagging_mode="never",
|
| 776 |
+
)
|
| 777 |
+
|
| 778 |
+
|
| 779 |
+
__all__ = ["ScrapeGraphAI", "build_interface"]
|
Modules/Shell_Command.py
CHANGED
|
@@ -1,194 +1,194 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
import os
|
| 4 |
-
import platform
|
| 5 |
-
import shlex
|
| 6 |
-
import subprocess
|
| 7 |
-
from typing import Annotated
|
| 8 |
-
|
| 9 |
-
import gradio as gr
|
| 10 |
-
|
| 11 |
-
from app import _log_call_end, _log_call_start, _truncate_for_log
|
| 12 |
-
from ._docstrings import autodoc
|
| 13 |
-
from ._core import _resolve_path, ROOT_DIR, _display_path, ALLOW_ABS
|
| 14 |
-
import shutil
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
def _detect_shell(prefer_powershell: bool = True) -> tuple[list[str], str]:
|
| 19 |
-
"""
|
| 20 |
-
Pick an appropriate shell for the host OS.
|
| 21 |
-
- Windows: use PowerShell by default, fall back to cmd.exe.
|
| 22 |
-
- POSIX: use /bin/bash if available, else /bin/sh.
|
| 23 |
-
Returns (shell_cmd_prefix, shell_name) where shell_cmd_prefix is the command list to launch the shell.
|
| 24 |
-
"""
|
| 25 |
-
system = platform.system().lower()
|
| 26 |
-
if system == "windows":
|
| 27 |
-
if prefer_powershell:
|
| 28 |
-
pwsh = shutil.which("pwsh")
|
| 29 |
-
candidates = [pwsh, shutil.which("powershell"), shutil.which("powershell.exe")]
|
| 30 |
-
for cand in candidates:
|
| 31 |
-
if cand:
|
| 32 |
-
return [cand, "-NoLogo", "-NoProfile", "-Command"], "powershell"
|
| 33 |
-
# Fallback to cmd
|
| 34 |
-
comspec = os.environ.get("ComSpec", r"C:\\Windows\\System32\\cmd.exe")
|
| 35 |
-
return [comspec, "/C"], "cmd"
|
| 36 |
-
# POSIX
|
| 37 |
-
bash = shutil.which("bash")
|
| 38 |
-
if bash:
|
| 39 |
-
return [bash, "-lc"], "bash"
|
| 40 |
-
sh = os.environ.get("SHELL", "/bin/sh")
|
| 41 |
-
return [sh, "-lc"], "sh"
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
# Detect shell at import time for docs/UI purposes
|
| 45 |
-
_DETECTED_SHELL_PREFIX, _DETECTED_SHELL_NAME = _detect_shell()
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
# Clarify path semantics and expose detected shell in summary
|
| 49 |
-
TOOL_SUMMARY = (
|
| 50 |
-
"Execute a shell command within a safe working directory under the tool root ('/'). "
|
| 51 |
-
"Paths must be relative to '/'. "
|
| 52 |
-
"Set workdir to '.' to use the root. "
|
| 53 |
-
"Absolute paths are disabled."
|
| 54 |
-
f"Detected shell: {_DETECTED_SHELL_NAME}."
|
| 55 |
-
)
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
def _run_command(command: str, cwd: str, timeout: int) -> tuple[str, str, int]:
|
| 59 |
-
shell_prefix, shell_name = _detect_shell()
|
| 60 |
-
full_cmd = shell_prefix + [command]
|
| 61 |
-
try:
|
| 62 |
-
proc = subprocess.run(
|
| 63 |
-
full_cmd,
|
| 64 |
-
cwd=cwd,
|
| 65 |
-
stdout=subprocess.PIPE,
|
| 66 |
-
stderr=subprocess.PIPE,
|
| 67 |
-
text=True,
|
| 68 |
-
encoding="utf-8",
|
| 69 |
-
errors="replace",
|
| 70 |
-
timeout=timeout if timeout and timeout > 0 else None,
|
| 71 |
-
)
|
| 72 |
-
return proc.stdout, proc.stderr, proc.returncode
|
| 73 |
-
except subprocess.TimeoutExpired as exc:
|
| 74 |
-
return exc.stdout or "", (exc.stderr or "") + "\n[timeout]", 124
|
| 75 |
-
except Exception as exc:
|
| 76 |
-
return "", f"Execution failed: {exc}", 1
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
@autodoc(summary=TOOL_SUMMARY)
|
| 80 |
-
def Shell_Command(
|
| 81 |
-
command: Annotated[str, "Shell command to execute. Accepts multi-part pipelines as a single string."],
|
| 82 |
-
workdir: Annotated[str, "Working directory (relative to root unless UNSAFE_ALLOW_ABS_PATHS=1)."] = ".",
|
| 83 |
-
timeout: Annotated[int, "Timeout in seconds (0 = no timeout, be careful on public hosting)."] = 60,
|
| 84 |
-
) -> str:
|
| 85 |
-
_log_call_start("Shell_Command", command=command, workdir=workdir, timeout=timeout)
|
| 86 |
-
if not command or not command.strip():
|
| 87 |
-
result = "No command provided."
|
| 88 |
-
_log_call_end("Shell_Command", _truncate_for_log(result))
|
| 89 |
-
return result
|
| 90 |
-
|
| 91 |
-
abs_cwd, err = _resolve_path(workdir)
|
| 92 |
-
if err:
|
| 93 |
-
_log_call_end("Shell_Command", _truncate_for_log(err))
|
| 94 |
-
return err
|
| 95 |
-
if not os.path.exists(abs_cwd):
|
| 96 |
-
result = f"Working directory not found: {abs_cwd}"
|
| 97 |
-
_log_call_end("Shell_Command", _truncate_for_log(result))
|
| 98 |
-
return result
|
| 99 |
-
|
| 100 |
-
# Heuristic check for absolute paths in arguments if sandboxing is strictly enforced
|
| 101 |
-
# We look for typical absolute path patterns: "/..." or "C:\..."
|
| 102 |
-
# This is not perfect (e.g., inside strings) but helps enforce "Impossible" rule.
|
| 103 |
-
import re
|
| 104 |
-
if not ALLOW_ABS:
|
| 105 |
-
|
| 106 |
-
# Regex for Unix-style absolute path (start with /)
|
| 107 |
-
# or Windows-style absolute path (start with drive letter)
|
| 108 |
-
# We look for these patterns preceded by space or start of string
|
| 109 |
-
# to avoid matching arguments like --flag=/value (though those might be paths too!)
|
| 110 |
-
# Actually, matching ANY absolute path substring is safer for "Impossible".
|
| 111 |
-
# Patterns:
|
| 112 |
-
# Unix: / followed by non-space
|
| 113 |
-
# Win: X:\ followed by non-space
|
| 114 |
-
|
| 115 |
-
# Simple heuristic: if command contains potential absolute path
|
| 116 |
-
unix_abs = r"(?:\s|^)/[a-zA-Z0-9_.]"
|
| 117 |
-
win_abs = r"(?:\s|^)[a-zA-Z]:\\"
|
| 118 |
-
|
| 119 |
-
if re.search(unix_abs, command) or re.search(win_abs, command):
|
| 120 |
-
# We allow a few exceptions if needed, but for "Impossible" we block.
|
| 121 |
-
# Note: This might block flags like /C, but we run powershell/cmd separately.
|
| 122 |
-
# Wait, Windows flags start with /. 'dir /s'. This heuristic is dangerous for Windows flags.
|
| 123 |
-
# We should refine it.
|
| 124 |
-
pass
|
| 125 |
-
|
| 126 |
-
# Refined check:
|
| 127 |
-
# On Windows, flags start with /, so checking for / is bad.
|
| 128 |
-
# But paths in Windows usually use \ or /.
|
| 129 |
-
# Let's focus on Unix roots and Windows Drive roots.
|
| 130 |
-
|
| 131 |
-
has_abs_path = False
|
| 132 |
-
if platform.system().lower() == "windows":
|
| 133 |
-
# Look for Drive:\ - anchored to start of string, space, or quote to avoid matching URLs like https://
|
| 134 |
-
if re.search(r"(?:\s|^|['\"])[a-zA-Z]:[\\/]", command):
|
| 135 |
-
has_abs_path = True
|
| 136 |
-
# On Windows with PowerShell, /path is valid too, but confusing with flags.
|
| 137 |
-
# We'll trust that Drive:\ is the main vector to save OUTSIDE tool root (which is likely C: or P:).
|
| 138 |
-
# If tool root is P:/Code..., writing to C:/... requires Drive arg.
|
| 139 |
-
else:
|
| 140 |
-
# Unix: Look for / at start of token, but exclude common flags?
|
| 141 |
-
# Actually, just looking for " /" or start "/" is decent.
|
| 142 |
-
# But flags like /dev/null are common.
|
| 143 |
-
# Maybe we just warn or block known dangerous patterns?
|
| 144 |
-
# User said "Make it impossible". a broad block is better than a leak.
|
| 145 |
-
if re.search(r"(?:\s|^)/", command):
|
| 146 |
-
# This blocks flags like /bin/bash or paths.
|
| 147 |
-
has_abs_path = True
|
| 148 |
-
|
| 149 |
-
if has_abs_path:
|
| 150 |
-
result = "Error: Absolute paths are not allowed in commands to ensure sandbox safety. Use relative paths."
|
| 151 |
-
_log_call_end("Shell_Command", _truncate_for_log(result))
|
| 152 |
-
return result
|
| 153 |
-
|
| 154 |
-
# Capture shell used for transparency
|
| 155 |
-
_, shell_name = _detect_shell()
|
| 156 |
-
stdout, stderr, code = _run_command(command, cwd=abs_cwd, timeout=timeout)
|
| 157 |
-
display_cwd = _display_path(abs_cwd)
|
| 158 |
-
header = (
|
| 159 |
-
f"Command: {command}\n"
|
| 160 |
-
f"CWD: {display_cwd}\n"
|
| 161 |
-
f"Root: /\n"
|
| 162 |
-
f"Shell: {shell_name}\n"
|
| 163 |
-
f"Exit code: {code}\n"
|
| 164 |
-
f"--- STDOUT ---\n"
|
| 165 |
-
)
|
| 166 |
-
output = header + (stdout or "<empty>") + "\n--- STDERR ---\n" + (stderr or "<empty>")
|
| 167 |
-
_log_call_end("Shell_Command", _truncate_for_log(f"exit={code} stdout={len(stdout)} stderr={len(stderr)}"))
|
| 168 |
-
return output
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
def build_interface() -> gr.Interface:
|
| 172 |
-
return gr.Interface(
|
| 173 |
-
fn=Shell_Command,
|
| 174 |
-
inputs=[
|
| 175 |
-
gr.Textbox(label="Command", placeholder="echo hello || dir", lines=2, info="Shell command to execute"),
|
| 176 |
-
gr.Textbox(label="Workdir", value=".", max_lines=1, info="Working directory (relative to root)"),
|
| 177 |
-
gr.Slider(minimum=0, maximum=600, step=5, value=60, label="Timeout (seconds)", info="Timeout in seconds (0 = no timeout)"),
|
| 178 |
-
],
|
| 179 |
-
outputs=gr.Textbox(label="Output", lines=20),
|
| 180 |
-
title="Shell Command",
|
| 181 |
-
description=(
|
| 182 |
-
"<div style=\"text-align:center; overflow:hidden;\">"
|
| 183 |
-
"Run a shell command under the same safe root as File System. "
|
| 184 |
-
"Absolute paths are disabled, use relative paths. "
|
| 185 |
-
f"Detected shell: {_DETECTED_SHELL_NAME}. "
|
| 186 |
-
"</div>"
|
| 187 |
-
),
|
| 188 |
-
api_description=TOOL_SUMMARY,
|
| 189 |
-
flagging_mode="never",
|
| 190 |
-
submit_btn="Run",
|
| 191 |
-
)
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
__all__ = ["Shell_Command", "build_interface"]
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import platform
|
| 5 |
+
import shlex
|
| 6 |
+
import subprocess
|
| 7 |
+
from typing import Annotated
|
| 8 |
+
|
| 9 |
+
import gradio as gr
|
| 10 |
+
|
| 11 |
+
from app import _log_call_end, _log_call_start, _truncate_for_log
|
| 12 |
+
from ._docstrings import autodoc
|
| 13 |
+
from ._core import _resolve_path, ROOT_DIR, _display_path, ALLOW_ABS
|
| 14 |
+
import shutil
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def _detect_shell(prefer_powershell: bool = True) -> tuple[list[str], str]:
|
| 19 |
+
"""
|
| 20 |
+
Pick an appropriate shell for the host OS.
|
| 21 |
+
- Windows: use PowerShell by default, fall back to cmd.exe.
|
| 22 |
+
- POSIX: use /bin/bash if available, else /bin/sh.
|
| 23 |
+
Returns (shell_cmd_prefix, shell_name) where shell_cmd_prefix is the command list to launch the shell.
|
| 24 |
+
"""
|
| 25 |
+
system = platform.system().lower()
|
| 26 |
+
if system == "windows":
|
| 27 |
+
if prefer_powershell:
|
| 28 |
+
pwsh = shutil.which("pwsh")
|
| 29 |
+
candidates = [pwsh, shutil.which("powershell"), shutil.which("powershell.exe")]
|
| 30 |
+
for cand in candidates:
|
| 31 |
+
if cand:
|
| 32 |
+
return [cand, "-NoLogo", "-NoProfile", "-Command"], "powershell"
|
| 33 |
+
# Fallback to cmd
|
| 34 |
+
comspec = os.environ.get("ComSpec", r"C:\\Windows\\System32\\cmd.exe")
|
| 35 |
+
return [comspec, "/C"], "cmd"
|
| 36 |
+
# POSIX
|
| 37 |
+
bash = shutil.which("bash")
|
| 38 |
+
if bash:
|
| 39 |
+
return [bash, "-lc"], "bash"
|
| 40 |
+
sh = os.environ.get("SHELL", "/bin/sh")
|
| 41 |
+
return [sh, "-lc"], "sh"
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
# Detect shell at import time for docs/UI purposes
|
| 45 |
+
_DETECTED_SHELL_PREFIX, _DETECTED_SHELL_NAME = _detect_shell()
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
# Clarify path semantics and expose detected shell in summary
|
| 49 |
+
TOOL_SUMMARY = (
|
| 50 |
+
"Execute a shell command within a safe working directory under the tool root ('/'). "
|
| 51 |
+
"Paths must be relative to '/'. "
|
| 52 |
+
"Set workdir to '.' to use the root. "
|
| 53 |
+
"Absolute paths are disabled."
|
| 54 |
+
f"Detected shell: {_DETECTED_SHELL_NAME}."
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def _run_command(command: str, cwd: str, timeout: int) -> tuple[str, str, int]:
|
| 59 |
+
shell_prefix, shell_name = _detect_shell()
|
| 60 |
+
full_cmd = shell_prefix + [command]
|
| 61 |
+
try:
|
| 62 |
+
proc = subprocess.run(
|
| 63 |
+
full_cmd,
|
| 64 |
+
cwd=cwd,
|
| 65 |
+
stdout=subprocess.PIPE,
|
| 66 |
+
stderr=subprocess.PIPE,
|
| 67 |
+
text=True,
|
| 68 |
+
encoding="utf-8",
|
| 69 |
+
errors="replace",
|
| 70 |
+
timeout=timeout if timeout and timeout > 0 else None,
|
| 71 |
+
)
|
| 72 |
+
return proc.stdout, proc.stderr, proc.returncode
|
| 73 |
+
except subprocess.TimeoutExpired as exc:
|
| 74 |
+
return exc.stdout or "", (exc.stderr or "") + "\n[timeout]", 124
|
| 75 |
+
except Exception as exc:
|
| 76 |
+
return "", f"Execution failed: {exc}", 1
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
@autodoc(summary=TOOL_SUMMARY)
|
| 80 |
+
def Shell_Command(
|
| 81 |
+
command: Annotated[str, "Shell command to execute. Accepts multi-part pipelines as a single string."],
|
| 82 |
+
workdir: Annotated[str, "Working directory (relative to root unless UNSAFE_ALLOW_ABS_PATHS=1)."] = ".",
|
| 83 |
+
timeout: Annotated[int, "Timeout in seconds (0 = no timeout, be careful on public hosting)."] = 60,
|
| 84 |
+
) -> str:
|
| 85 |
+
_log_call_start("Shell_Command", command=command, workdir=workdir, timeout=timeout)
|
| 86 |
+
if not command or not command.strip():
|
| 87 |
+
result = "No command provided."
|
| 88 |
+
_log_call_end("Shell_Command", _truncate_for_log(result))
|
| 89 |
+
return result
|
| 90 |
+
|
| 91 |
+
abs_cwd, err = _resolve_path(workdir)
|
| 92 |
+
if err:
|
| 93 |
+
_log_call_end("Shell_Command", _truncate_for_log(err))
|
| 94 |
+
return err
|
| 95 |
+
if not os.path.exists(abs_cwd):
|
| 96 |
+
result = f"Working directory not found: {abs_cwd}"
|
| 97 |
+
_log_call_end("Shell_Command", _truncate_for_log(result))
|
| 98 |
+
return result
|
| 99 |
+
|
| 100 |
+
# Heuristic check for absolute paths in arguments if sandboxing is strictly enforced
|
| 101 |
+
# We look for typical absolute path patterns: "/..." or "C:\..."
|
| 102 |
+
# This is not perfect (e.g., inside strings) but helps enforce "Impossible" rule.
|
| 103 |
+
import re
|
| 104 |
+
if not ALLOW_ABS:
|
| 105 |
+
|
| 106 |
+
# Regex for Unix-style absolute path (start with /)
|
| 107 |
+
# or Windows-style absolute path (start with drive letter)
|
| 108 |
+
# We look for these patterns preceded by space or start of string
|
| 109 |
+
# to avoid matching arguments like --flag=/value (though those might be paths too!)
|
| 110 |
+
# Actually, matching ANY absolute path substring is safer for "Impossible".
|
| 111 |
+
# Patterns:
|
| 112 |
+
# Unix: / followed by non-space
|
| 113 |
+
# Win: X:\ followed by non-space
|
| 114 |
+
|
| 115 |
+
# Simple heuristic: if command contains potential absolute path
|
| 116 |
+
unix_abs = r"(?:\s|^)/[a-zA-Z0-9_.]"
|
| 117 |
+
win_abs = r"(?:\s|^)[a-zA-Z]:\\"
|
| 118 |
+
|
| 119 |
+
if re.search(unix_abs, command) or re.search(win_abs, command):
|
| 120 |
+
# We allow a few exceptions if needed, but for "Impossible" we block.
|
| 121 |
+
# Note: This might block flags like /C, but we run powershell/cmd separately.
|
| 122 |
+
# Wait, Windows flags start with /. 'dir /s'. This heuristic is dangerous for Windows flags.
|
| 123 |
+
# We should refine it.
|
| 124 |
+
pass
|
| 125 |
+
|
| 126 |
+
# Refined check:
|
| 127 |
+
# On Windows, flags start with /, so checking for / is bad.
|
| 128 |
+
# But paths in Windows usually use \ or /.
|
| 129 |
+
# Let's focus on Unix roots and Windows Drive roots.
|
| 130 |
+
|
| 131 |
+
has_abs_path = False
|
| 132 |
+
if platform.system().lower() == "windows":
|
| 133 |
+
# Look for Drive:\ - anchored to start of string, space, or quote to avoid matching URLs like https://
|
| 134 |
+
if re.search(r"(?:\s|^|['\"])[a-zA-Z]:[\\/]", command):
|
| 135 |
+
has_abs_path = True
|
| 136 |
+
# On Windows with PowerShell, /path is valid too, but confusing with flags.
|
| 137 |
+
# We'll trust that Drive:\ is the main vector to save OUTSIDE tool root (which is likely C: or P:).
|
| 138 |
+
# If tool root is P:/Code..., writing to C:/... requires Drive arg.
|
| 139 |
+
else:
|
| 140 |
+
# Unix: Look for / at start of token, but exclude common flags?
|
| 141 |
+
# Actually, just looking for " /" or start "/" is decent.
|
| 142 |
+
# But flags like /dev/null are common.
|
| 143 |
+
# Maybe we just warn or block known dangerous patterns?
|
| 144 |
+
# User said "Make it impossible". a broad block is better than a leak.
|
| 145 |
+
if re.search(r"(?:\s|^)/", command):
|
| 146 |
+
# This blocks flags like /bin/bash or paths.
|
| 147 |
+
has_abs_path = True
|
| 148 |
+
|
| 149 |
+
if has_abs_path:
|
| 150 |
+
result = "Error: Absolute paths are not allowed in commands to ensure sandbox safety. Use relative paths."
|
| 151 |
+
_log_call_end("Shell_Command", _truncate_for_log(result))
|
| 152 |
+
return result
|
| 153 |
+
|
| 154 |
+
# Capture shell used for transparency
|
| 155 |
+
_, shell_name = _detect_shell()
|
| 156 |
+
stdout, stderr, code = _run_command(command, cwd=abs_cwd, timeout=timeout)
|
| 157 |
+
display_cwd = _display_path(abs_cwd)
|
| 158 |
+
header = (
|
| 159 |
+
f"Command: {command}\n"
|
| 160 |
+
f"CWD: {display_cwd}\n"
|
| 161 |
+
f"Root: /\n"
|
| 162 |
+
f"Shell: {shell_name}\n"
|
| 163 |
+
f"Exit code: {code}\n"
|
| 164 |
+
f"--- STDOUT ---\n"
|
| 165 |
+
)
|
| 166 |
+
output = header + (stdout or "<empty>") + "\n--- STDERR ---\n" + (stderr or "<empty>")
|
| 167 |
+
_log_call_end("Shell_Command", _truncate_for_log(f"exit={code} stdout={len(stdout)} stderr={len(stderr)}"))
|
| 168 |
+
return output
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def build_interface() -> gr.Interface:
|
| 172 |
+
return gr.Interface(
|
| 173 |
+
fn=Shell_Command,
|
| 174 |
+
inputs=[
|
| 175 |
+
gr.Textbox(label="Command", placeholder="echo hello || dir", lines=2, info="Shell command to execute"),
|
| 176 |
+
gr.Textbox(label="Workdir", value=".", max_lines=1, info="Working directory (relative to root)"),
|
| 177 |
+
gr.Slider(minimum=0, maximum=600, step=5, value=60, label="Timeout (seconds)", info="Timeout in seconds (0 = no timeout)"),
|
| 178 |
+
],
|
| 179 |
+
outputs=gr.Textbox(label="Output", lines=20),
|
| 180 |
+
title="Shell Command",
|
| 181 |
+
description=(
|
| 182 |
+
"<div style=\"text-align:center; overflow:hidden;\">"
|
| 183 |
+
"Run a shell command under the same safe root as File System. "
|
| 184 |
+
"Absolute paths are disabled, use relative paths. "
|
| 185 |
+
f"Detected shell: {_DETECTED_SHELL_NAME}. "
|
| 186 |
+
"</div>"
|
| 187 |
+
),
|
| 188 |
+
api_description=TOOL_SUMMARY,
|
| 189 |
+
flagging_mode="never",
|
| 190 |
+
submit_btn="Run",
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
__all__ = ["Shell_Command", "build_interface"]
|
Modules/Web_Search.py
CHANGED
|
@@ -1,499 +1,517 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
from typing import Annotated, List
|
| 4 |
-
from datetime import datetime
|
| 5 |
-
|
| 6 |
-
import gradio as gr
|
| 7 |
-
from ddgs import DDGS
|
| 8 |
-
|
| 9 |
-
from app import _log_call_end, _log_call_start, _search_rate_limiter, _truncate_for_log
|
| 10 |
-
from ._docstrings import autodoc
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
"
|
| 18 |
-
)
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
"
|
| 28 |
-
"
|
| 29 |
-
"
|
| 30 |
-
"
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
"
|
| 39 |
-
"
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
"
|
| 48 |
-
"
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
""
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
if
|
| 70 |
-
return
|
| 71 |
-
#
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
"
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
r"\b(\d
|
| 108 |
-
r"
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
if
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
if
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import Annotated, List, Literal
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
import gradio as gr
|
| 7 |
+
from ddgs import DDGS
|
| 8 |
+
|
| 9 |
+
from app import _log_call_end, _log_call_start, _search_rate_limiter, _truncate_for_log
|
| 10 |
+
from ._docstrings import autodoc
|
| 11 |
+
from ._searxng_client import SearXNGClient, TimeRange
|
| 12 |
+
from ._query_optimizer import get_optimizer
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
# Single source of truth for the LLM-facing tool description
|
| 16 |
+
TOOL_SUMMARY = (
|
| 17 |
+
"Run a web search across text, news, images, videos, or books. "
|
| 18 |
+
"Supports multiple backends (DuckDuckGo, SearXNG) with optional AI query optimization. "
|
| 19 |
+
"Readable results include pagination hints and next_offset when more results are available."
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
_SAFESEARCH_LEVEL = "off"
|
| 24 |
+
|
| 25 |
+
# Defaults and choices for newly added parameters
|
| 26 |
+
BACKEND_CHOICES = [
|
| 27 |
+
"auto",
|
| 28 |
+
"duckduckgo",
|
| 29 |
+
"searxng",
|
| 30 |
+
"bing",
|
| 31 |
+
"brave",
|
| 32 |
+
"yahoo",
|
| 33 |
+
"wikipedia",
|
| 34 |
+
]
|
| 35 |
+
|
| 36 |
+
# Allowed backends per type (explicit selection set)
|
| 37 |
+
_ALLOWED_BACKENDS = {
|
| 38 |
+
"text": ["duckduckgo", "searxng", "bing", "brave", "yahoo", "wikipedia"],
|
| 39 |
+
"news": ["duckduckgo", "searxng", "bing", "yahoo"],
|
| 40 |
+
"images": ["duckduckgo", "searxng"],
|
| 41 |
+
"videos": ["duckduckgo"],
|
| 42 |
+
"books": ["annasarchive"],
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
# Auto order per type (used when backend == "auto"); wikipedia excluded for text
|
| 46 |
+
_AUTO_ORDER = {
|
| 47 |
+
"text": ["searxng", "duckduckgo", "bing", "brave", "yahoo"],
|
| 48 |
+
"news": ["searxng", "duckduckgo", "bing", "yahoo"],
|
| 49 |
+
"images": ["searxng", "duckduckgo"],
|
| 50 |
+
"videos": ["duckduckgo"],
|
| 51 |
+
"books": ["annasarchive"],
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
# Date filter choices: canonical values used by resolver
|
| 55 |
+
DATE_FILTER_CHOICES = ["any", "day", "week", "month", "year"]
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def _resolve_backend(search_type: str, backend_choice: str) -> str:
|
| 59 |
+
"""Resolve backend string for DDGS based on search type and user choice.
|
| 60 |
+
|
| 61 |
+
- If backend_choice is "auto", return a comma-separated fallback order for that type.
|
| 62 |
+
- If backend_choice is not supported by the type, fall back to the first allowed backend.
|
| 63 |
+
- Books endpoint uses only 'annasarchive'.
|
| 64 |
+
"""
|
| 65 |
+
stype = search_type if search_type in _ALLOWED_BACKENDS else "text"
|
| 66 |
+
allowed = _ALLOWED_BACKENDS[stype]
|
| 67 |
+
if backend_choice == "auto":
|
| 68 |
+
return ", ".join(_AUTO_ORDER[stype])
|
| 69 |
+
if stype == "books":
|
| 70 |
+
return "annasarchive"
|
| 71 |
+
# Validate backend against allowed set for this type
|
| 72 |
+
if backend_choice in allowed:
|
| 73 |
+
return backend_choice
|
| 74 |
+
# Fallback to first allowed backend
|
| 75 |
+
return allowed[0]
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def _resolve_timelimit(date_filter: str, search_type: str) -> str | None:
|
| 79 |
+
"""Map UI date filter to DDGS timelimit code per endpoint.
|
| 80 |
+
|
| 81 |
+
Returns one of: None, 'd', 'w', 'm', 'y'. For news/videos (which support d/w/m),
|
| 82 |
+
selecting 'year' will coerce to 'm' to stay within supported range.
|
| 83 |
+
"""
|
| 84 |
+
normalized = (date_filter or "any").strip().lower()
|
| 85 |
+
if normalized in ("any", "none", ""):
|
| 86 |
+
return None
|
| 87 |
+
mapping = {
|
| 88 |
+
"day": "d",
|
| 89 |
+
"week": "w",
|
| 90 |
+
"month": "m",
|
| 91 |
+
"year": "y",
|
| 92 |
+
}
|
| 93 |
+
code = mapping.get(normalized)
|
| 94 |
+
if not code:
|
| 95 |
+
return None
|
| 96 |
+
if search_type in ("news", "videos") and code == "y":
|
| 97 |
+
return "m"
|
| 98 |
+
return code
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def _extract_date_from_snippet(snippet: str) -> str:
|
| 102 |
+
if not snippet:
|
| 103 |
+
return ""
|
| 104 |
+
import re
|
| 105 |
+
|
| 106 |
+
date_patterns = [
|
| 107 |
+
r"\b(\d{4}[-/]\d{1,2}[-/]\d{1,2})\b",
|
| 108 |
+
r"\b([A-Za-z]{3,9}\s+\d{1,2},?\s+\d{4})\b",
|
| 109 |
+
r"\b(\d{1,2}\s+[A-Za-z]{3,9}\s+\d{4})\b",
|
| 110 |
+
r"\b(\d+\s+(?:day|week|month|year)s?\s+ago)\b",
|
| 111 |
+
r"(?:Published|Updated|Posted):\s*([^,\n]+?)(?:[,\n]|$)",
|
| 112 |
+
]
|
| 113 |
+
for pattern in date_patterns:
|
| 114 |
+
matches = re.findall(pattern, snippet, re.IGNORECASE)
|
| 115 |
+
if matches:
|
| 116 |
+
return matches[0].strip()
|
| 117 |
+
return ""
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def _format_search_result(result: dict, search_type: str, index: int) -> List[str]:
|
| 121 |
+
lines: List[str] = []
|
| 122 |
+
if search_type == "text":
|
| 123 |
+
title = result.get("title", "").strip()
|
| 124 |
+
url = result.get("href", "").strip()
|
| 125 |
+
snippet = result.get("body", "").strip()
|
| 126 |
+
date = _extract_date_from_snippet(snippet)
|
| 127 |
+
lines.append(f"{index}. {title}")
|
| 128 |
+
lines.append(f" URL: {url}")
|
| 129 |
+
if snippet:
|
| 130 |
+
lines.append(f" Summary: {snippet}")
|
| 131 |
+
if date:
|
| 132 |
+
lines.append(f" Date: {date}")
|
| 133 |
+
elif search_type == "news":
|
| 134 |
+
title = result.get("title", "").strip()
|
| 135 |
+
url = result.get("url", "").strip()
|
| 136 |
+
body = result.get("body", "").strip()
|
| 137 |
+
date = result.get("date", "").strip()
|
| 138 |
+
source = result.get("source", "").strip()
|
| 139 |
+
lines.append(f"{index}. {title}")
|
| 140 |
+
lines.append(f" URL: {url}")
|
| 141 |
+
if source:
|
| 142 |
+
lines.append(f" Source: {source}")
|
| 143 |
+
if date:
|
| 144 |
+
lines.append(f" Date: {date}")
|
| 145 |
+
if body:
|
| 146 |
+
lines.append(f" Summary: {body}")
|
| 147 |
+
elif search_type == "images":
|
| 148 |
+
title = result.get("title", "").strip()
|
| 149 |
+
image_url = result.get("image", "").strip()
|
| 150 |
+
source_url = result.get("url", "").strip()
|
| 151 |
+
source = result.get("source", "").strip()
|
| 152 |
+
width = result.get("width", "")
|
| 153 |
+
height = result.get("height", "")
|
| 154 |
+
lines.append(f"{index}. {title}")
|
| 155 |
+
lines.append(f" Image: {image_url}")
|
| 156 |
+
lines.append(f" Source: {source_url}")
|
| 157 |
+
if source:
|
| 158 |
+
lines.append(f" Publisher: {source}")
|
| 159 |
+
if width and height:
|
| 160 |
+
lines.append(f" Dimensions: {width}x{height}")
|
| 161 |
+
elif search_type == "videos":
|
| 162 |
+
title = result.get("title", "").strip()
|
| 163 |
+
description = result.get("description", "").strip()
|
| 164 |
+
duration = result.get("duration", "").strip()
|
| 165 |
+
published = result.get("published", "").strip()
|
| 166 |
+
uploader = result.get("uploader", "").strip()
|
| 167 |
+
embed_url = result.get("embed_url", "").strip()
|
| 168 |
+
lines.append(f"{index}. {title}")
|
| 169 |
+
if embed_url:
|
| 170 |
+
lines.append(f" Video: {embed_url}")
|
| 171 |
+
if uploader:
|
| 172 |
+
lines.append(f" Uploader: {uploader}")
|
| 173 |
+
if duration:
|
| 174 |
+
lines.append(f" Duration: {duration}")
|
| 175 |
+
if published:
|
| 176 |
+
lines.append(f" Published: {published}")
|
| 177 |
+
if description:
|
| 178 |
+
lines.append(f" Description: {description}")
|
| 179 |
+
elif search_type == "books":
|
| 180 |
+
title = result.get("title", "").strip()
|
| 181 |
+
url = result.get("url", "").strip()
|
| 182 |
+
body = result.get("body", "").strip()
|
| 183 |
+
lines.append(f"{index}. {title}")
|
| 184 |
+
lines.append(f" URL: {url}")
|
| 185 |
+
if body:
|
| 186 |
+
lines.append(f" Description: {body}")
|
| 187 |
+
return lines
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
@autodoc(
|
| 191 |
+
summary=TOOL_SUMMARY,
|
| 192 |
+
)
|
| 193 |
+
def Web_Search(
|
| 194 |
+
query: Annotated[str, "The search query (supports operators like site:, quotes, OR)."],
|
| 195 |
+
max_results: Annotated[int, "Number of results to return (1–20)."] = 5,
|
| 196 |
+
page: Annotated[int, "Page number for pagination (1-based, each page contains max_results items)."] = 1,
|
| 197 |
+
offset: Annotated[int, "Result offset to start from (overrides page if > 0, for precise continuation)."] = 0,
|
| 198 |
+
search_type: Annotated[str, "Type of search: 'text' (web pages), 'news', 'images', 'videos', or 'books'."] = "text",
|
| 199 |
+
backend: Annotated[str, "Search backend: 'duckduckgo', 'searxng', or 'auto' (SearXNG first, then DDG)."] = "auto",
|
| 200 |
+
date_filter: Annotated[str, "Time filter: any, day, week, month, year."] = "any",
|
| 201 |
+
optimize_query: Annotated[bool, "Use AI to optimize the query for better results (adds ~2s latency)."] = False,
|
| 202 |
+
) -> str:
|
| 203 |
+
_log_call_start(
|
| 204 |
+
"Web_Search",
|
| 205 |
+
query=query,
|
| 206 |
+
max_results=max_results,
|
| 207 |
+
page=page,
|
| 208 |
+
search_type=search_type,
|
| 209 |
+
offset=offset,
|
| 210 |
+
backend=backend,
|
| 211 |
+
date_filter=date_filter,
|
| 212 |
+
optimize_query=optimize_query,
|
| 213 |
+
)
|
| 214 |
+
|
| 215 |
+
# Query optimization (optional)
|
| 216 |
+
optimization_metadata = None
|
| 217 |
+
if optimize_query:
|
| 218 |
+
try:
|
| 219 |
+
optimizer = get_optimizer()
|
| 220 |
+
query, optimization_metadata = optimizer.optimize_for_search_engine(query)
|
| 221 |
+
except Exception as exc:
|
| 222 |
+
print(f"[Web_Search] Query optimization failed: {exc}", flush=True)
|
| 223 |
+
# Continue with original query
|
| 224 |
+
|
| 225 |
+
if not query or not query.strip():
|
| 226 |
+
result = "No search query provided. Please enter a search term."
|
| 227 |
+
_log_call_end("Web_Search", _truncate_for_log(result))
|
| 228 |
+
return result
|
| 229 |
+
max_results = max(1, min(20, max_results))
|
| 230 |
+
page = max(1, page)
|
| 231 |
+
offset = max(0, offset)
|
| 232 |
+
valid_types = ["text", "news", "images", "videos", "books"]
|
| 233 |
+
if search_type not in valid_types:
|
| 234 |
+
search_type = "text"
|
| 235 |
+
if offset > 0:
|
| 236 |
+
actual_offset = offset
|
| 237 |
+
calculated_page = (offset // max_results) + 1
|
| 238 |
+
else:
|
| 239 |
+
actual_offset = (page - 1) * max_results
|
| 240 |
+
calculated_page = page
|
| 241 |
+
total_needed = actual_offset + max_results
|
| 242 |
+
used_fallback = False
|
| 243 |
+
original_search_type = search_type
|
| 244 |
+
# Prepare cross-cutting parameters
|
| 245 |
+
resolved_backend = _resolve_backend(search_type, (backend or "auto").lower())
|
| 246 |
+
timelimit = _resolve_timelimit(date_filter, search_type)
|
| 247 |
+
|
| 248 |
+
# Map date_filter to SearXNG TimeRange
|
| 249 |
+
_TIME_RANGE_MAP = {
|
| 250 |
+
"day": TimeRange.DAY,
|
| 251 |
+
"week": TimeRange.WEEK,
|
| 252 |
+
"month": TimeRange.MONTH,
|
| 253 |
+
"year": TimeRange.YEAR,
|
| 254 |
+
}
|
| 255 |
+
searxng_time_range = _TIME_RANGE_MAP.get(date_filter.lower()) if date_filter else None
|
| 256 |
+
|
| 257 |
+
def _perform_searxng_search(stype: str) -> list[dict]:
|
| 258 |
+
"""Perform search using SearXNG backend."""
|
| 259 |
+
try:
|
| 260 |
+
_search_rate_limiter.acquire()
|
| 261 |
+
with SearXNGClient() as client:
|
| 262 |
+
if stype == "text":
|
| 263 |
+
results = client.text(query, max_results=total_needed, time_range=searxng_time_range)
|
| 264 |
+
return [
|
| 265 |
+
{
|
| 266 |
+
"title": r.title,
|
| 267 |
+
"href": r.url,
|
| 268 |
+
"body": r.content,
|
| 269 |
+
"engine": r.engine,
|
| 270 |
+
}
|
| 271 |
+
for r in results
|
| 272 |
+
]
|
| 273 |
+
elif stype == "news":
|
| 274 |
+
results = client.news(query, max_results=total_needed, time_range=searxng_time_range)
|
| 275 |
+
return [
|
| 276 |
+
{
|
| 277 |
+
"title": r.title,
|
| 278 |
+
"url": r.url,
|
| 279 |
+
"body": r.content,
|
| 280 |
+
"date": r.published_date or "",
|
| 281 |
+
"source": r.engine or "",
|
| 282 |
+
}
|
| 283 |
+
for r in results
|
| 284 |
+
]
|
| 285 |
+
elif stype == "images":
|
| 286 |
+
results = client.images(query, max_results=total_needed)
|
| 287 |
+
return [
|
| 288 |
+
{
|
| 289 |
+
"title": r.title,
|
| 290 |
+
"image": r.img_src,
|
| 291 |
+
"url": r.url,
|
| 292 |
+
"source": r.source or r.engine or "",
|
| 293 |
+
"thumbnail": r.thumbnail_src,
|
| 294 |
+
}
|
| 295 |
+
for r in results
|
| 296 |
+
]
|
| 297 |
+
return []
|
| 298 |
+
except Exception as exc:
|
| 299 |
+
print(f"[Web_Search] SearXNG error: {exc}", flush=True)
|
| 300 |
+
return []
|
| 301 |
+
|
| 302 |
+
def _perform_search(stype: str) -> list[dict]:
|
| 303 |
+
user_backend_choice = (backend or "auto").lower()
|
| 304 |
+
|
| 305 |
+
# Handle SearXNG backend explicitly
|
| 306 |
+
if user_backend_choice == "searxng":
|
| 307 |
+
return _perform_searxng_search(stype)
|
| 308 |
+
|
| 309 |
+
# Handle auto: SearXNG first, then DDG fallback
|
| 310 |
+
if user_backend_choice == "auto":
|
| 311 |
+
# Try SearXNG first
|
| 312 |
+
searxng_results = _perform_searxng_search(stype)
|
| 313 |
+
if searxng_results:
|
| 314 |
+
return searxng_results
|
| 315 |
+
# Fallback to DDG
|
| 316 |
+
print(f"[Web_Search] SearXNG returned no results, falling back to DuckDuckGo", flush=True)
|
| 317 |
+
|
| 318 |
+
try:
|
| 319 |
+
_search_rate_limiter.acquire()
|
| 320 |
+
with DDGS() as ddgs:
|
| 321 |
+
if stype == "text":
|
| 322 |
+
if user_backend_choice == "auto":
|
| 323 |
+
# Auto fallback to DDG after SearXNG failed
|
| 324 |
+
raw_gen = ddgs.text(
|
| 325 |
+
query,
|
| 326 |
+
max_results=total_needed + 10,
|
| 327 |
+
safesearch=_SAFESEARCH_LEVEL,
|
| 328 |
+
timelimit=timelimit,
|
| 329 |
+
backend="duckduckgo",
|
| 330 |
+
)
|
| 331 |
+
else:
|
| 332 |
+
raw_gen = ddgs.text(
|
| 333 |
+
query,
|
| 334 |
+
max_results=total_needed + 10,
|
| 335 |
+
safesearch=_SAFESEARCH_LEVEL,
|
| 336 |
+
timelimit=timelimit,
|
| 337 |
+
backend=resolved_backend,
|
| 338 |
+
)
|
| 339 |
+
elif stype == "news":
|
| 340 |
+
if user_backend_choice == "auto":
|
| 341 |
+
# Auto fallback to DDG after SearXNG failed
|
| 342 |
+
raw_gen = ddgs.news(
|
| 343 |
+
query,
|
| 344 |
+
max_results=total_needed + 10,
|
| 345 |
+
safesearch=_SAFESEARCH_LEVEL,
|
| 346 |
+
timelimit=timelimit,
|
| 347 |
+
backend="duckduckgo",
|
| 348 |
+
)
|
| 349 |
+
else:
|
| 350 |
+
raw_gen = ddgs.news(
|
| 351 |
+
query,
|
| 352 |
+
max_results=total_needed + 10,
|
| 353 |
+
safesearch=_SAFESEARCH_LEVEL,
|
| 354 |
+
timelimit=timelimit,
|
| 355 |
+
backend=_resolve_backend("news", user_backend_choice),
|
| 356 |
+
)
|
| 357 |
+
elif stype == "images":
|
| 358 |
+
raw_gen = ddgs.images(
|
| 359 |
+
query,
|
| 360 |
+
max_results=total_needed + 10,
|
| 361 |
+
safesearch=_SAFESEARCH_LEVEL,
|
| 362 |
+
timelimit=timelimit,
|
| 363 |
+
backend=_resolve_backend("images", (backend or "auto").lower()),
|
| 364 |
+
)
|
| 365 |
+
elif stype == "videos":
|
| 366 |
+
raw_gen = ddgs.videos(
|
| 367 |
+
query,
|
| 368 |
+
max_results=total_needed + 10,
|
| 369 |
+
safesearch=_SAFESEARCH_LEVEL,
|
| 370 |
+
timelimit=timelimit,
|
| 371 |
+
backend=_resolve_backend("videos", (backend or "auto").lower()),
|
| 372 |
+
)
|
| 373 |
+
else:
|
| 374 |
+
raw_gen = ddgs.books(
|
| 375 |
+
query,
|
| 376 |
+
max_results=total_needed + 10,
|
| 377 |
+
backend=_resolve_backend("books", (backend or "auto").lower()),
|
| 378 |
+
)
|
| 379 |
+
try:
|
| 380 |
+
return list(raw_gen)
|
| 381 |
+
except Exception as inner_exc:
|
| 382 |
+
if "no results" in str(inner_exc).lower() or "not found" in str(inner_exc).lower():
|
| 383 |
+
return []
|
| 384 |
+
raise inner_exc
|
| 385 |
+
except Exception as exc:
|
| 386 |
+
error_msg = f"Search failed: {str(exc)[:200]}"
|
| 387 |
+
lowered = str(exc).lower()
|
| 388 |
+
if "blocked" in lowered or "rate" in lowered:
|
| 389 |
+
error_msg = "Search temporarily blocked due to rate limiting. Please try again in a few minutes."
|
| 390 |
+
elif "timeout" in lowered:
|
| 391 |
+
error_msg = "Search timed out. Please try again with a simpler query."
|
| 392 |
+
elif "network" in lowered or "connection" in lowered:
|
| 393 |
+
error_msg = "Network connection error. Please check your internet connection and try again."
|
| 394 |
+
elif "no results" in lowered or "not found" in lowered:
|
| 395 |
+
return []
|
| 396 |
+
raise Exception(error_msg)
|
| 397 |
+
|
| 398 |
+
try:
|
| 399 |
+
raw = _perform_search(search_type)
|
| 400 |
+
except Exception as exc:
|
| 401 |
+
result = f"Error: {exc}"
|
| 402 |
+
_log_call_end("Web_Search", _truncate_for_log(result))
|
| 403 |
+
return result
|
| 404 |
+
|
| 405 |
+
if not raw and search_type == "news":
|
| 406 |
+
try:
|
| 407 |
+
raw = _perform_search("text")
|
| 408 |
+
if raw:
|
| 409 |
+
used_fallback = True
|
| 410 |
+
search_type = "text"
|
| 411 |
+
except Exception:
|
| 412 |
+
pass
|
| 413 |
+
|
| 414 |
+
if not raw:
|
| 415 |
+
fallback_note = " (also tried 'text' search as fallback)" if original_search_type == "news" and used_fallback else ""
|
| 416 |
+
result = f"No {original_search_type} results found for query: {query}{fallback_note}"
|
| 417 |
+
_log_call_end("Web_Search", _truncate_for_log(result))
|
| 418 |
+
return result
|
| 419 |
+
|
| 420 |
+
paginated_results = raw[actual_offset: actual_offset + max_results]
|
| 421 |
+
if not paginated_results:
|
| 422 |
+
if actual_offset >= len(raw):
|
| 423 |
+
result = f"Offset {actual_offset} exceeds available results ({len(raw)} total). Try offset=0 to start from beginning."
|
| 424 |
+
else:
|
| 425 |
+
result = f"No {original_search_type} results found on page {calculated_page} for query: {query}. Try page 1 or reduce page number."
|
| 426 |
+
_log_call_end("Web_Search", _truncate_for_log(result))
|
| 427 |
+
return result
|
| 428 |
+
|
| 429 |
+
total_available = len(raw)
|
| 430 |
+
start_num = actual_offset + 1
|
| 431 |
+
end_num = actual_offset + len(paginated_results)
|
| 432 |
+
next_offset = actual_offset + len(paginated_results)
|
| 433 |
+
search_label = original_search_type.title()
|
| 434 |
+
if used_fallback:
|
| 435 |
+
search_label += " → Text (Smart Fallback)"
|
| 436 |
+
|
| 437 |
+
now_dt = datetime.now().astimezone()
|
| 438 |
+
date_str = now_dt.strftime("%A, %B %d, %Y %I:%M %p %Z").strip()
|
| 439 |
+
if not date_str:
|
| 440 |
+
date_str = now_dt.isoformat()
|
| 441 |
+
|
| 442 |
+
pagination_info = f"Page {calculated_page}"
|
| 443 |
+
if offset > 0:
|
| 444 |
+
pagination_info = f"Offset {actual_offset} (≈ {pagination_info})"
|
| 445 |
+
lines = [f"Current Date: {date_str}", f"{search_label} search results for: {query}"]
|
| 446 |
+
if used_fallback:
|
| 447 |
+
lines.append("📍 Note: News search returned no results, automatically searched general web content instead")
|
| 448 |
+
lines.append(f"{pagination_info} (results {start_num}-{end_num} of ~{total_available}+ available)\n")
|
| 449 |
+
for i, result in enumerate(paginated_results, start_num):
|
| 450 |
+
result_lines = _format_search_result(result, search_type, i)
|
| 451 |
+
lines.extend(result_lines)
|
| 452 |
+
lines.append("")
|
| 453 |
+
if total_available > end_num:
|
| 454 |
+
lines.append("💡 More results available:")
|
| 455 |
+
lines.append(f" • Next page: page={calculated_page + 1}")
|
| 456 |
+
lines.append(f" • Next offset: offset={next_offset}")
|
| 457 |
+
lines.append(f" • Use offset={next_offset} to continue exactly from result {next_offset + 1}")
|
| 458 |
+
result = "\n".join(lines)
|
| 459 |
+
search_info = f"type={original_search_type}"
|
| 460 |
+
if used_fallback:
|
| 461 |
+
search_info += "→text"
|
| 462 |
+
_log_call_end("Web_Search", f"{search_info} page={calculated_page} offset={actual_offset} results={len(paginated_results)} chars={len(result)}")
|
| 463 |
+
return result
|
| 464 |
+
|
| 465 |
+
|
| 466 |
+
def build_interface() -> gr.Interface:
|
| 467 |
+
return gr.Interface(
|
| 468 |
+
fn=Web_Search,
|
| 469 |
+
inputs=[
|
| 470 |
+
gr.Textbox(label="Query", placeholder="topic OR site:example.com", max_lines=1, info="The search query"),
|
| 471 |
+
gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results", info="Number of results to return (1–20)"),
|
| 472 |
+
gr.Slider(minimum=1, maximum=10, value=1, step=1, label="Page", info="Page number for pagination (ignored if offset > 0)"),
|
| 473 |
+
gr.Slider(
|
| 474 |
+
minimum=0,
|
| 475 |
+
maximum=1000,
|
| 476 |
+
value=0,
|
| 477 |
+
step=1,
|
| 478 |
+
label="Offset",
|
| 479 |
+
info="Result offset to start from (overrides page if > 0, use next_offset from previous search)",
|
| 480 |
+
),
|
| 481 |
+
gr.Radio(
|
| 482 |
+
label="Search Type",
|
| 483 |
+
choices=["text", "news", "images", "videos", "books"],
|
| 484 |
+
value="text",
|
| 485 |
+
info="Type of content to search for",
|
| 486 |
+
),
|
| 487 |
+
gr.Radio(
|
| 488 |
+
label="Backend",
|
| 489 |
+
choices=BACKEND_CHOICES,
|
| 490 |
+
value="auto",
|
| 491 |
+
info="Search backend: auto (SearXNG → DDG), searxng, or duckduckgo",
|
| 492 |
+
),
|
| 493 |
+
gr.Radio(
|
| 494 |
+
label="Date filter",
|
| 495 |
+
choices=DATE_FILTER_CHOICES,
|
| 496 |
+
value="any",
|
| 497 |
+
info="Limit results to: day, week, month, or year",
|
| 498 |
+
),
|
| 499 |
+
gr.Checkbox(
|
| 500 |
+
label="Optimize Query",
|
| 501 |
+
value=False,
|
| 502 |
+
info="Use AI to optimize the query for better results (adds ~2s latency)",
|
| 503 |
+
),
|
| 504 |
+
],
|
| 505 |
+
outputs=gr.Textbox(label="Search Results", interactive=False, lines=20, max_lines=20),
|
| 506 |
+
title="Web Search",
|
| 507 |
+
description=(
|
| 508 |
+
"<div style=\"text-align:center\">Multi-backend web search (SearXNG + DuckDuckGo) with optional AI query optimization. "
|
| 509 |
+
"Supports text, news, images, videos, and books. Auto backend tries SearXNG first, then DDG fallback.</div>"
|
| 510 |
+
),
|
| 511 |
+
api_description=TOOL_SUMMARY,
|
| 512 |
+
flagging_mode="never",
|
| 513 |
+
submit_btn="Search",
|
| 514 |
+
)
|
| 515 |
+
|
| 516 |
+
|
| 517 |
+
__all__ = ["Web_Search", "build_interface"]
|
Modules/_core.py
CHANGED
|
@@ -1,861 +1,861 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Core shared utilities for the Nymbo-Tools MCP server.
|
| 3 |
-
|
| 4 |
-
Consolidates three key areas:
|
| 5 |
-
1. Sandboxed filesystem operations (path resolution, reading, writing, safe_open)
|
| 6 |
-
2. Sandboxed Python execution (code interpreter, agent terminal)
|
| 7 |
-
3. Hugging Face inference utilities (token, providers, error handling)
|
| 8 |
-
"""
|
| 9 |
-
|
| 10 |
-
from __future__ import annotations
|
| 11 |
-
|
| 12 |
-
import ast
|
| 13 |
-
import json
|
| 14 |
-
import os
|
| 15 |
-
import re
|
| 16 |
-
import stat
|
| 17 |
-
import sys
|
| 18 |
-
from datetime import datetime
|
| 19 |
-
from io import StringIO
|
| 20 |
-
from typing import Any, Callable, Optional, TypeVar
|
| 21 |
-
|
| 22 |
-
import gradio as gr
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
# ===========================================================================
|
| 26 |
-
# Part 0: Tree Rendering Utilities
|
| 27 |
-
# ===========================================================================
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
def _fmt_size(num_bytes: int) -> str:
|
| 31 |
-
"""Format byte size as human-readable string."""
|
| 32 |
-
units = ["B", "KB", "MB", "GB"]
|
| 33 |
-
size = float(num_bytes)
|
| 34 |
-
for unit in units:
|
| 35 |
-
if size < 1024.0:
|
| 36 |
-
return f"{size:.1f} {unit}"
|
| 37 |
-
size /= 1024.0
|
| 38 |
-
return f"{size:.1f} TB"
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
def build_tree(entries: list[tuple[str, dict]]) -> dict:
|
| 42 |
-
"""
|
| 43 |
-
Build a nested tree structure from flat path entries.
|
| 44 |
-
|
| 45 |
-
Args:
|
| 46 |
-
entries: List of (path, metadata) tuples where path uses forward slashes.
|
| 47 |
-
Paths ending with '/' are treated as directories.
|
| 48 |
-
|
| 49 |
-
Returns:
|
| 50 |
-
Nested dict with "__files__" key for files at each level.
|
| 51 |
-
"""
|
| 52 |
-
root: dict = {"__files__": []}
|
| 53 |
-
|
| 54 |
-
for path, metadata in entries:
|
| 55 |
-
parts = path.rstrip("/").split("/")
|
| 56 |
-
is_dir = path.endswith("/")
|
| 57 |
-
|
| 58 |
-
node = root
|
| 59 |
-
for i, part in enumerate(parts[:-1]):
|
| 60 |
-
if part not in node:
|
| 61 |
-
node[part] = {"__files__": []}
|
| 62 |
-
node = node[part]
|
| 63 |
-
|
| 64 |
-
final = parts[-1]
|
| 65 |
-
if is_dir:
|
| 66 |
-
if final not in node:
|
| 67 |
-
node[final] = {"__files__": []}
|
| 68 |
-
if metadata:
|
| 69 |
-
node[final]["__meta__"] = metadata
|
| 70 |
-
else:
|
| 71 |
-
node["__files__"].append((final, metadata))
|
| 72 |
-
|
| 73 |
-
return root
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
def render_tree(
|
| 77 |
-
node: dict,
|
| 78 |
-
prefix: str = "",
|
| 79 |
-
format_entry: Optional[Callable[[str, dict, bool], str]] = None,
|
| 80 |
-
) -> list[str]:
|
| 81 |
-
"""
|
| 82 |
-
Render a tree with line connectors.
|
| 83 |
-
|
| 84 |
-
Args:
|
| 85 |
-
node: Nested dict from build_tree()
|
| 86 |
-
prefix: Current line prefix for indentation
|
| 87 |
-
format_entry: Optional callback to format each entry.
|
| 88 |
-
|
| 89 |
-
Returns:
|
| 90 |
-
List of formatted lines.
|
| 91 |
-
"""
|
| 92 |
-
result = []
|
| 93 |
-
|
| 94 |
-
def default_format(name: str, meta: dict, is_dir: bool) -> str:
|
| 95 |
-
if is_dir:
|
| 96 |
-
return f"{name}/"
|
| 97 |
-
size = meta.get("size")
|
| 98 |
-
if size is not None:
|
| 99 |
-
return f"{name} ({_fmt_size(size)})"
|
| 100 |
-
return name
|
| 101 |
-
|
| 102 |
-
fmt = format_entry or default_format
|
| 103 |
-
|
| 104 |
-
entries = []
|
| 105 |
-
subdirs = sorted(k for k in node.keys() if k not in ("__files__", "__meta__"))
|
| 106 |
-
files_here = sorted(node.get("__files__", []), key=lambda x: x[0])
|
| 107 |
-
|
| 108 |
-
for dirname in subdirs:
|
| 109 |
-
dir_meta = node[dirname].get("__meta__", {})
|
| 110 |
-
entries.append(("dir", dirname, node[dirname], dir_meta))
|
| 111 |
-
for fname, fmeta in files_here:
|
| 112 |
-
entries.append(("file", fname, None, fmeta))
|
| 113 |
-
|
| 114 |
-
for i, entry in enumerate(entries):
|
| 115 |
-
is_last = (i == len(entries) - 1)
|
| 116 |
-
connector = "└── " if is_last else "├── "
|
| 117 |
-
child_prefix = prefix + (" " if is_last else "│ ")
|
| 118 |
-
|
| 119 |
-
etype, name, subtree, meta = entry
|
| 120 |
-
|
| 121 |
-
if etype == "dir":
|
| 122 |
-
result.append(f"{prefix}{connector}{fmt(name, meta, True)}")
|
| 123 |
-
result.extend(render_tree(subtree, child_prefix, format_entry))
|
| 124 |
-
else:
|
| 125 |
-
result.append(f"{prefix}{connector}{fmt(name, meta, False)}")
|
| 126 |
-
|
| 127 |
-
return result
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
def walk_and_build_tree(
|
| 131 |
-
abs_path: str,
|
| 132 |
-
*,
|
| 133 |
-
show_hidden: bool = False,
|
| 134 |
-
recursive: bool = False,
|
| 135 |
-
max_entries: int = 100,
|
| 136 |
-
) -> tuple[dict, int, bool]:
|
| 137 |
-
"""
|
| 138 |
-
Walk a directory and build a tree structure.
|
| 139 |
-
|
| 140 |
-
Returns:
|
| 141 |
-
(tree, total_entries, truncated)
|
| 142 |
-
"""
|
| 143 |
-
entries: list[tuple[str, dict]] = []
|
| 144 |
-
total = 0
|
| 145 |
-
truncated = False
|
| 146 |
-
|
| 147 |
-
for root, dirs, files in os.walk(abs_path):
|
| 148 |
-
if not show_hidden:
|
| 149 |
-
dirs[:] = [d for d in dirs if not d.startswith('.')]
|
| 150 |
-
files = [f for f in files if not f.startswith('.')]
|
| 151 |
-
|
| 152 |
-
dirs.sort()
|
| 153 |
-
files.sort()
|
| 154 |
-
|
| 155 |
-
try:
|
| 156 |
-
rel_root = os.path.relpath(root, abs_path)
|
| 157 |
-
except Exception:
|
| 158 |
-
rel_root = ""
|
| 159 |
-
prefix = "" if rel_root == "." else rel_root.replace("\\", "/") + "/"
|
| 160 |
-
|
| 161 |
-
for d in dirs:
|
| 162 |
-
p = os.path.join(root, d)
|
| 163 |
-
try:
|
| 164 |
-
mtime = datetime.fromtimestamp(os.path.getmtime(p)).strftime("%Y-%m-%d %H:%M")
|
| 165 |
-
except Exception:
|
| 166 |
-
mtime = "?"
|
| 167 |
-
entries.append((f"{prefix}{d}/", {"mtime": mtime}))
|
| 168 |
-
total += 1
|
| 169 |
-
if total >= max_entries:
|
| 170 |
-
truncated = True
|
| 171 |
-
break
|
| 172 |
-
|
| 173 |
-
if truncated:
|
| 174 |
-
break
|
| 175 |
-
|
| 176 |
-
for f in files:
|
| 177 |
-
p = os.path.join(root, f)
|
| 178 |
-
try:
|
| 179 |
-
size = os.path.getsize(p)
|
| 180 |
-
mtime = datetime.fromtimestamp(os.path.getmtime(p)).strftime("%Y-%m-%d %H:%M")
|
| 181 |
-
except Exception:
|
| 182 |
-
size, mtime = 0, "?"
|
| 183 |
-
entries.append((f"{prefix}{f}", {"size": size, "mtime": mtime}))
|
| 184 |
-
total += 1
|
| 185 |
-
if total >= max_entries:
|
| 186 |
-
truncated = True
|
| 187 |
-
break
|
| 188 |
-
|
| 189 |
-
if truncated:
|
| 190 |
-
break
|
| 191 |
-
|
| 192 |
-
if not recursive:
|
| 193 |
-
break
|
| 194 |
-
|
| 195 |
-
return build_tree(entries), total, truncated
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
def format_dir_listing(
|
| 199 |
-
abs_path: str,
|
| 200 |
-
display_path: str,
|
| 201 |
-
*,
|
| 202 |
-
show_hidden: bool = False,
|
| 203 |
-
recursive: bool = False,
|
| 204 |
-
max_entries: int = 100,
|
| 205 |
-
fmt_size_fn: Optional[Callable[[int], str]] = None,
|
| 206 |
-
) -> str:
|
| 207 |
-
"""Format a directory listing as a visual tree."""
|
| 208 |
-
fmt_size = fmt_size_fn or _fmt_size
|
| 209 |
-
|
| 210 |
-
tree, total, truncated = walk_and_build_tree(
|
| 211 |
-
abs_path,
|
| 212 |
-
show_hidden=show_hidden,
|
| 213 |
-
recursive=recursive,
|
| 214 |
-
max_entries=max_entries,
|
| 215 |
-
)
|
| 216 |
-
|
| 217 |
-
def format_entry(name: str, meta: dict, is_dir: bool) -> str:
|
| 218 |
-
mtime = meta.get("mtime", "")
|
| 219 |
-
if is_dir:
|
| 220 |
-
return f"{name}/ ({mtime})"
|
| 221 |
-
size = meta.get("size", 0)
|
| 222 |
-
return f"{name} ({fmt_size(size)}, {mtime})"
|
| 223 |
-
|
| 224 |
-
tree_lines = render_tree(tree, " ", format_entry)
|
| 225 |
-
|
| 226 |
-
header = f"Listing of {display_path}\nRoot: /\nEntries: {total}"
|
| 227 |
-
if truncated:
|
| 228 |
-
header += f"\n… Truncated at {max_entries} entries."
|
| 229 |
-
|
| 230 |
-
lines = [header, "", "└── /"]
|
| 231 |
-
lines.extend(tree_lines)
|
| 232 |
-
|
| 233 |
-
return "\n".join(lines).strip()
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
# ===========================================================================
|
| 237 |
-
# Part 1: Sandboxed Filesystem Operations
|
| 238 |
-
# ===========================================================================
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
class SandboxedRoot:
|
| 242 |
-
"""
|
| 243 |
-
A configurable sandboxed root directory with path resolution and safety checks.
|
| 244 |
-
|
| 245 |
-
Args:
|
| 246 |
-
root_dir: Absolute path to the sandbox root.
|
| 247 |
-
allow_abs: If True, allow absolute paths outside the sandbox.
|
| 248 |
-
"""
|
| 249 |
-
|
| 250 |
-
def __init__(self, root_dir: str, allow_abs: bool = False):
|
| 251 |
-
self.root_dir = os.path.abspath(root_dir)
|
| 252 |
-
self.allow_abs = allow_abs
|
| 253 |
-
# Ensure root exists
|
| 254 |
-
try:
|
| 255 |
-
os.makedirs(self.root_dir, exist_ok=True)
|
| 256 |
-
except Exception:
|
| 257 |
-
pass
|
| 258 |
-
|
| 259 |
-
def safe_err(self, exc: Exception | str) -> str:
|
| 260 |
-
"""Return an error string with any absolute root replaced by '/' and slashes normalized."""
|
| 261 |
-
s = str(exc)
|
| 262 |
-
s_norm = s.replace("\\", "/")
|
| 263 |
-
root_fwd = self.root_dir.replace("\\", "/")
|
| 264 |
-
root_variants = {self.root_dir, root_fwd, re.sub(r"/+", "/", root_fwd)}
|
| 265 |
-
for variant in root_variants:
|
| 266 |
-
if variant:
|
| 267 |
-
s_norm = s_norm.replace(variant, "/")
|
| 268 |
-
s_norm = re.sub(r"/+", "/", s_norm)
|
| 269 |
-
return s_norm
|
| 270 |
-
|
| 271 |
-
def err(
|
| 272 |
-
self,
|
| 273 |
-
code: str,
|
| 274 |
-
message: str,
|
| 275 |
-
*,
|
| 276 |
-
path: Optional[str] = None,
|
| 277 |
-
hint: Optional[str] = None,
|
| 278 |
-
data: Optional[dict] = None,
|
| 279 |
-
) -> str:
|
| 280 |
-
"""Return a structured error JSON string."""
|
| 281 |
-
payload = {
|
| 282 |
-
"status": "error",
|
| 283 |
-
"code": code,
|
| 284 |
-
"message": message,
|
| 285 |
-
"root": "/",
|
| 286 |
-
}
|
| 287 |
-
if path is not None and path != "":
|
| 288 |
-
payload["path"] = path
|
| 289 |
-
if hint:
|
| 290 |
-
payload["hint"] = hint
|
| 291 |
-
if data:
|
| 292 |
-
payload["data"] = data
|
| 293 |
-
return json.dumps(payload, ensure_ascii=False)
|
| 294 |
-
|
| 295 |
-
def display_path(self, abs_path: str) -> str:
|
| 296 |
-
"""Return a user-friendly path relative to root using forward slashes."""
|
| 297 |
-
try:
|
| 298 |
-
norm_root = os.path.normpath(self.root_dir)
|
| 299 |
-
norm_abs = os.path.normpath(abs_path)
|
| 300 |
-
common = os.path.commonpath([norm_root, norm_abs])
|
| 301 |
-
if os.path.normcase(common) == os.path.normcase(norm_root):
|
| 302 |
-
rel = os.path.relpath(norm_abs, norm_root)
|
| 303 |
-
if rel == ".":
|
| 304 |
-
return "/"
|
| 305 |
-
return "/" + rel.replace("\\", "/")
|
| 306 |
-
except Exception:
|
| 307 |
-
pass
|
| 308 |
-
return abs_path.replace("\\", "/")
|
| 309 |
-
|
| 310 |
-
def resolve_path(self, path: str) -> tuple[str, str]:
|
| 311 |
-
"""
|
| 312 |
-
Resolve a user-provided path to an absolute, normalized path constrained to root.
|
| 313 |
-
Returns (abs_path, error_message). error_message is empty when ok.
|
| 314 |
-
"""
|
| 315 |
-
try:
|
| 316 |
-
user_input = (path or "/").strip() or "/"
|
| 317 |
-
if user_input.startswith("/"):
|
| 318 |
-
rel_part = user_input.lstrip("/") or "."
|
| 319 |
-
raw = os.path.expanduser(rel_part)
|
| 320 |
-
treat_as_relative = True
|
| 321 |
-
else:
|
| 322 |
-
raw = os.path.expanduser(user_input)
|
| 323 |
-
treat_as_relative = False
|
| 324 |
-
|
| 325 |
-
if not treat_as_relative and os.path.isabs(raw):
|
| 326 |
-
if not self.allow_abs:
|
| 327 |
-
return "", self.err(
|
| 328 |
-
"absolute_path_disabled",
|
| 329 |
-
"Absolute paths are disabled in safe mode.",
|
| 330 |
-
path=raw.replace("\\", "/"),
|
| 331 |
-
hint="Use a path relative to / (e.g., /notes/todo.txt).",
|
| 332 |
-
)
|
| 333 |
-
abs_path = os.path.abspath(raw)
|
| 334 |
-
else:
|
| 335 |
-
abs_path = os.path.abspath(os.path.join(self.root_dir, raw))
|
| 336 |
-
|
| 337 |
-
# Constrain to root when not allowing absolute paths
|
| 338 |
-
if not self.allow_abs:
|
| 339 |
-
try:
|
| 340 |
-
common = os.path.commonpath(
|
| 341 |
-
[os.path.normpath(self.root_dir), os.path.normpath(abs_path)]
|
| 342 |
-
)
|
| 343 |
-
if common != os.path.normpath(self.root_dir):
|
| 344 |
-
return "", self.err(
|
| 345 |
-
"path_outside_root",
|
| 346 |
-
"Path is outside the sandbox root.",
|
| 347 |
-
path=abs_path,
|
| 348 |
-
)
|
| 349 |
-
except Exception:
|
| 350 |
-
return "", self.err(
|
| 351 |
-
"path_outside_root",
|
| 352 |
-
"Path is outside the sandbox root.",
|
| 353 |
-
path=abs_path,
|
| 354 |
-
)
|
| 355 |
-
|
| 356 |
-
return abs_path, ""
|
| 357 |
-
except Exception as exc:
|
| 358 |
-
return "", self.err(
|
| 359 |
-
"resolve_path_failed",
|
| 360 |
-
"Failed to resolve path.",
|
| 361 |
-
path=(path or ""),
|
| 362 |
-
data={"error": self.safe_err(exc)},
|
| 363 |
-
)
|
| 364 |
-
|
| 365 |
-
def safe_open(self, file, *args, **kwargs):
|
| 366 |
-
"""A drop-in replacement for open() that enforces sandbox constraints."""
|
| 367 |
-
if isinstance(file, int):
|
| 368 |
-
return open(file, *args, **kwargs)
|
| 369 |
-
|
| 370 |
-
path_str = os.fspath(file)
|
| 371 |
-
abs_path, err = self.resolve_path(path_str)
|
| 372 |
-
if err:
|
| 373 |
-
try:
|
| 374 |
-
msg = json.loads(err)["message"]
|
| 375 |
-
except Exception:
|
| 376 |
-
msg = err
|
| 377 |
-
raise PermissionError(f"Sandboxed open() failed: {msg}")
|
| 378 |
-
|
| 379 |
-
return open(abs_path, *args, **kwargs)
|
| 380 |
-
|
| 381 |
-
def list_dir(
|
| 382 |
-
self,
|
| 383 |
-
abs_path: str,
|
| 384 |
-
*,
|
| 385 |
-
show_hidden: bool = False,
|
| 386 |
-
recursive: bool = False,
|
| 387 |
-
max_entries: int = 100,
|
| 388 |
-
) -> str:
|
| 389 |
-
"""List directory contents as a visual tree."""
|
| 390 |
-
return format_dir_listing(
|
| 391 |
-
abs_path,
|
| 392 |
-
self.display_path(abs_path),
|
| 393 |
-
show_hidden=show_hidden,
|
| 394 |
-
recursive=recursive,
|
| 395 |
-
max_entries=max_entries,
|
| 396 |
-
fmt_size_fn=_fmt_size,
|
| 397 |
-
)
|
| 398 |
-
|
| 399 |
-
def search_text(
|
| 400 |
-
self,
|
| 401 |
-
abs_path: str,
|
| 402 |
-
query: str,
|
| 403 |
-
*,
|
| 404 |
-
recursive: bool = False,
|
| 405 |
-
show_hidden: bool = False,
|
| 406 |
-
max_results: int = 20,
|
| 407 |
-
case_sensitive: bool = False,
|
| 408 |
-
start_index: int = 0,
|
| 409 |
-
) -> str:
|
| 410 |
-
"""Search for text within files."""
|
| 411 |
-
if not os.path.exists(abs_path):
|
| 412 |
-
return self.err(
|
| 413 |
-
"path_not_found",
|
| 414 |
-
f"Path not found: {self.display_path(abs_path)}",
|
| 415 |
-
path=self.display_path(abs_path),
|
| 416 |
-
)
|
| 417 |
-
|
| 418 |
-
query = query or ""
|
| 419 |
-
normalized_query = query if case_sensitive else query.lower()
|
| 420 |
-
if normalized_query == "":
|
| 421 |
-
return self.err(
|
| 422 |
-
"missing_search_query",
|
| 423 |
-
"Search query is required for the search action.",
|
| 424 |
-
hint="Provide text in the Content field to search for.",
|
| 425 |
-
)
|
| 426 |
-
|
| 427 |
-
max_results = max(1, int(max_results) if max_results is not None else 20)
|
| 428 |
-
start_index = max(0, int(start_index) if start_index is not None else 0)
|
| 429 |
-
matches: list[tuple[str, int, str]] = []
|
| 430 |
-
errors: list[str] = []
|
| 431 |
-
files_scanned = 0
|
| 432 |
-
truncated = False
|
| 433 |
-
total_matches = 0
|
| 434 |
-
|
| 435 |
-
def _should_skip(name: str) -> bool:
|
| 436 |
-
return not show_hidden and name.startswith(".")
|
| 437 |
-
|
| 438 |
-
def _handle_match(file_path: str, line_no: int, line_text: str) -> bool:
|
| 439 |
-
nonlocal truncated, total_matches
|
| 440 |
-
total_matches += 1
|
| 441 |
-
if total_matches <= start_index:
|
| 442 |
-
return False
|
| 443 |
-
if len(matches) < max_results:
|
| 444 |
-
snippet = line_text.strip()
|
| 445 |
-
if len(snippet) > 200:
|
| 446 |
-
snippet = snippet[:197] + "…"
|
| 447 |
-
matches.append((self.display_path(file_path), line_no, snippet))
|
| 448 |
-
return False
|
| 449 |
-
truncated = True
|
| 450 |
-
return True
|
| 451 |
-
|
| 452 |
-
def _search_file(file_path: str) -> bool:
|
| 453 |
-
nonlocal files_scanned
|
| 454 |
-
files_scanned += 1
|
| 455 |
-
try:
|
| 456 |
-
with open(file_path, "r", encoding="utf-8", errors="replace") as handle:
|
| 457 |
-
for line_no, line in enumerate(handle, start=1):
|
| 458 |
-
haystack = line if case_sensitive else line.lower()
|
| 459 |
-
if normalized_query in haystack:
|
| 460 |
-
if _handle_match(file_path, line_no, line):
|
| 461 |
-
return True
|
| 462 |
-
except Exception as exc:
|
| 463 |
-
errors.append(f"{self.display_path(file_path)} ({self.safe_err(exc)})")
|
| 464 |
-
return truncated
|
| 465 |
-
|
| 466 |
-
if os.path.isfile(abs_path):
|
| 467 |
-
_search_file(abs_path)
|
| 468 |
-
else:
|
| 469 |
-
for root, dirs, files in os.walk(abs_path):
|
| 470 |
-
dirs[:] = [d for d in dirs if not _should_skip(d)]
|
| 471 |
-
visible_files = [f for f in files if show_hidden or not f.startswith(".")]
|
| 472 |
-
for name in visible_files:
|
| 473 |
-
file_path = os.path.join(root, name)
|
| 474 |
-
if _search_file(file_path):
|
| 475 |
-
break
|
| 476 |
-
if truncated:
|
| 477 |
-
break
|
| 478 |
-
if not recursive:
|
| 479 |
-
break
|
| 480 |
-
|
| 481 |
-
header_lines = [
|
| 482 |
-
f"Search results for {query!r}",
|
| 483 |
-
f"Scope: {self.display_path(abs_path)}",
|
| 484 |
-
f"Recursive: {'yes' if recursive else 'no'}, Hidden: {'yes' if show_hidden else 'no'}, Case-sensitive: {'yes' if case_sensitive else 'no'}",
|
| 485 |
-
f"Start offset: {start_index}",
|
| 486 |
-
f"Matches returned: {len(matches)}" + (" (truncated)" if truncated else ""),
|
| 487 |
-
f"Files scanned: {files_scanned}",
|
| 488 |
-
]
|
| 489 |
-
|
| 490 |
-
next_cursor = start_index + len(matches) if truncated else None
|
| 491 |
-
|
| 492 |
-
if truncated:
|
| 493 |
-
header_lines.append(f"Matches encountered before truncation: {total_matches}")
|
| 494 |
-
header_lines.append(f"Truncated: yes — re-run with offset={next_cursor} to continue.")
|
| 495 |
-
header_lines.append(f"Next cursor: {next_cursor}")
|
| 496 |
-
else:
|
| 497 |
-
header_lines.append(f"Total matches found: {total_matches}")
|
| 498 |
-
header_lines.append("Truncated: no — end of results.")
|
| 499 |
-
header_lines.append("Next cursor: None")
|
| 500 |
-
|
| 501 |
-
if not matches:
|
| 502 |
-
if total_matches > 0 and start_index >= total_matches:
|
| 503 |
-
hint_limit = max(total_matches - 1, 0)
|
| 504 |
-
body_lines = [
|
| 505 |
-
f"No matches found at or after offset {start_index}. Total matches available: {total_matches}.",
|
| 506 |
-
(f"Try a smaller offset (≤ {hint_limit})." if hint_limit >= 0 else ""),
|
| 507 |
-
]
|
| 508 |
-
body_lines = [line for line in body_lines if line]
|
| 509 |
-
else:
|
| 510 |
-
body_lines = [
|
| 511 |
-
"No matches found.",
|
| 512 |
-
(f"Total matches encountered: {total_matches}." if total_matches else ""),
|
| 513 |
-
]
|
| 514 |
-
body_lines = [line for line in body_lines if line]
|
| 515 |
-
else:
|
| 516 |
-
body_lines = [
|
| 517 |
-
f"{idx}. {path}:{line_no}: {text}"
|
| 518 |
-
for idx, (path, line_no, text) in enumerate(matches, start=1)
|
| 519 |
-
]
|
| 520 |
-
|
| 521 |
-
if errors:
|
| 522 |
-
shown = errors[:5]
|
| 523 |
-
body_lines.extend(["", "Warnings:"])
|
| 524 |
-
body_lines.extend(shown)
|
| 525 |
-
if len(errors) > len(shown):
|
| 526 |
-
body_lines.append(f"… {len(errors) - len(shown)} additional files could not be read.")
|
| 527 |
-
|
| 528 |
-
return "\n".join(header_lines) + "\n\n" + "\n".join(body_lines)
|
| 529 |
-
|
| 530 |
-
def read_file(self, abs_path: str, *, offset: int = 0, max_chars: int = 4000) -> str:
|
| 531 |
-
"""Read file contents with optional offset and character limit."""
|
| 532 |
-
if not os.path.exists(abs_path):
|
| 533 |
-
return self.err(
|
| 534 |
-
"file_not_found",
|
| 535 |
-
f"File not found: {self.display_path(abs_path)}",
|
| 536 |
-
path=self.display_path(abs_path),
|
| 537 |
-
)
|
| 538 |
-
if os.path.isdir(abs_path):
|
| 539 |
-
return self.err(
|
| 540 |
-
"is_directory",
|
| 541 |
-
f"Path is a directory, not a file: {self.display_path(abs_path)}",
|
| 542 |
-
path=self.display_path(abs_path),
|
| 543 |
-
hint="Provide a file path.",
|
| 544 |
-
)
|
| 545 |
-
try:
|
| 546 |
-
with open(abs_path, "r", encoding="utf-8", errors="replace") as f:
|
| 547 |
-
data = f.read()
|
| 548 |
-
except Exception as exc:
|
| 549 |
-
return self.err(
|
| 550 |
-
"read_failed",
|
| 551 |
-
"Failed to read file.",
|
| 552 |
-
path=self.display_path(abs_path),
|
| 553 |
-
data={"error": self.safe_err(exc)},
|
| 554 |
-
)
|
| 555 |
-
total = len(data)
|
| 556 |
-
start = max(0, min(offset, total))
|
| 557 |
-
if max_chars > 0:
|
| 558 |
-
end = min(total, start + max_chars)
|
| 559 |
-
else:
|
| 560 |
-
end = total
|
| 561 |
-
chunk = data[start:end]
|
| 562 |
-
next_cursor = end if end < total else None
|
| 563 |
-
header = (
|
| 564 |
-
f"Reading {self.display_path(abs_path)}\n"
|
| 565 |
-
f"Offset {start}, returned {len(chunk)} of {total}."
|
| 566 |
-
+ (f"\nNext cursor: {next_cursor}" if next_cursor is not None else "")
|
| 567 |
-
)
|
| 568 |
-
sep = "\n\n---\n\n"
|
| 569 |
-
return header + sep + chunk
|
| 570 |
-
|
| 571 |
-
def info(self, abs_path: str) -> str:
|
| 572 |
-
"""Get file/directory metadata as JSON."""
|
| 573 |
-
try:
|
| 574 |
-
st = os.stat(abs_path)
|
| 575 |
-
except Exception as exc:
|
| 576 |
-
return self.err(
|
| 577 |
-
"stat_failed",
|
| 578 |
-
"Failed to stat path.",
|
| 579 |
-
path=self.display_path(abs_path),
|
| 580 |
-
data={"error": self.safe_err(exc)},
|
| 581 |
-
)
|
| 582 |
-
info_dict = {
|
| 583 |
-
"path": self.display_path(abs_path),
|
| 584 |
-
"type": "directory" if stat.S_ISDIR(st.st_mode) else "file",
|
| 585 |
-
"size": st.st_size,
|
| 586 |
-
"modified": datetime.fromtimestamp(st.st_mtime).isoformat(sep=" ", timespec="seconds"),
|
| 587 |
-
"created": datetime.fromtimestamp(st.st_ctime).isoformat(sep=" ", timespec="seconds"),
|
| 588 |
-
"mode": oct(st.st_mode),
|
| 589 |
-
"root": "/",
|
| 590 |
-
}
|
| 591 |
-
return json.dumps(info_dict, indent=2)
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
# ---------------------------------------------------------------------------
|
| 595 |
-
# Default roots (can be overridden by environment variables)
|
| 596 |
-
# ---------------------------------------------------------------------------
|
| 597 |
-
|
| 598 |
-
def _get_filesystem_root() -> str:
|
| 599 |
-
"""Get the default filesystem root directory."""
|
| 600 |
-
root = os.getenv("NYMBO_TOOLS_ROOT")
|
| 601 |
-
if root and root.strip():
|
| 602 |
-
return os.path.abspath(os.path.expanduser(root.strip()))
|
| 603 |
-
try:
|
| 604 |
-
here = os.path.abspath(__file__)
|
| 605 |
-
tools_dir = os.path.dirname(os.path.dirname(here))
|
| 606 |
-
return os.path.abspath(os.path.join(tools_dir, "Filesystem"))
|
| 607 |
-
except Exception:
|
| 608 |
-
return os.path.abspath(os.getcwd())
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
def _get_obsidian_root() -> str:
|
| 612 |
-
"""Get the default Obsidian vault root directory."""
|
| 613 |
-
env_root = os.getenv("OBSIDIAN_VAULT_ROOT")
|
| 614 |
-
if env_root and env_root.strip():
|
| 615 |
-
return os.path.abspath(os.path.expanduser(env_root.strip()))
|
| 616 |
-
try:
|
| 617 |
-
here = os.path.abspath(__file__)
|
| 618 |
-
tools_dir = os.path.dirname(os.path.dirname(here))
|
| 619 |
-
return os.path.abspath(os.path.join(tools_dir, "Obsidian"))
|
| 620 |
-
except Exception:
|
| 621 |
-
return os.path.abspath(os.getcwd())
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
# Pre-configured sandbox instances
|
| 625 |
-
ALLOW_ABS = bool(int(os.getenv("UNSAFE_ALLOW_ABS_PATHS", "0")))
|
| 626 |
-
|
| 627 |
-
FILESYSTEM_ROOT = _get_filesystem_root()
|
| 628 |
-
OBSIDIAN_ROOT = _get_obsidian_root()
|
| 629 |
-
|
| 630 |
-
# Default sandbox for /Filesystem (used by most tools)
|
| 631 |
-
filesystem_sandbox = SandboxedRoot(FILESYSTEM_ROOT, allow_abs=ALLOW_ABS)
|
| 632 |
-
|
| 633 |
-
# Sandbox for /Obsidian vault
|
| 634 |
-
obsidian_sandbox = SandboxedRoot(OBSIDIAN_ROOT, allow_abs=ALLOW_ABS)
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
# Convenience exports (for backward compatibility)
|
| 638 |
-
ROOT_DIR = FILESYSTEM_ROOT
|
| 639 |
-
|
| 640 |
-
def _resolve_path(path: str) -> tuple[str, str]:
|
| 641 |
-
"""Resolve path using the default filesystem sandbox."""
|
| 642 |
-
return filesystem_sandbox.resolve_path(path)
|
| 643 |
-
|
| 644 |
-
def _display_path(abs_path: str) -> str:
|
| 645 |
-
"""Display path using the default filesystem sandbox."""
|
| 646 |
-
return filesystem_sandbox.display_path(abs_path)
|
| 647 |
-
|
| 648 |
-
def safe_open(file, *args, **kwargs):
|
| 649 |
-
"""Open file using the default filesystem sandbox."""
|
| 650 |
-
return filesystem_sandbox.safe_open(file, *args, **kwargs)
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
# ===========================================================================
|
| 654 |
-
# Part 2: Sandboxed Python Execution
|
| 655 |
-
# ===========================================================================
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
def create_safe_builtins() -> dict:
|
| 659 |
-
"""Create a builtins dict with sandboxed open()."""
|
| 660 |
-
if isinstance(__builtins__, dict):
|
| 661 |
-
safe_builtins = __builtins__.copy()
|
| 662 |
-
else:
|
| 663 |
-
safe_builtins = vars(__builtins__).copy()
|
| 664 |
-
safe_builtins["open"] = safe_open
|
| 665 |
-
return safe_builtins
|
| 666 |
-
|
| 667 |
-
|
| 668 |
-
def sandboxed_exec(
|
| 669 |
-
code: str,
|
| 670 |
-
*,
|
| 671 |
-
extra_globals: dict[str, Any] | None = None,
|
| 672 |
-
ast_mode: bool = False,
|
| 673 |
-
) -> str:
|
| 674 |
-
"""
|
| 675 |
-
Execute Python code in a sandboxed environment.
|
| 676 |
-
|
| 677 |
-
Args:
|
| 678 |
-
code: Python source code to execute
|
| 679 |
-
extra_globals: Additional globals to inject (e.g., tools)
|
| 680 |
-
ast_mode: If True, parse and print results of all expression statements
|
| 681 |
-
(like Agent_Terminal). If False, simple exec (like Code_Interpreter).
|
| 682 |
-
|
| 683 |
-
Returns:
|
| 684 |
-
Captured stdout output, or exception text on error.
|
| 685 |
-
"""
|
| 686 |
-
if not code:
|
| 687 |
-
return "No code provided."
|
| 688 |
-
|
| 689 |
-
old_stdout = sys.stdout
|
| 690 |
-
old_cwd = os.getcwd()
|
| 691 |
-
redirected_output = sys.stdout = StringIO()
|
| 692 |
-
|
| 693 |
-
# Build execution environment
|
| 694 |
-
safe_builtins = create_safe_builtins()
|
| 695 |
-
env: dict[str, Any] = {
|
| 696 |
-
"open": safe_open,
|
| 697 |
-
"__builtins__": safe_builtins,
|
| 698 |
-
"print": print,
|
| 699 |
-
}
|
| 700 |
-
if extra_globals:
|
| 701 |
-
env.update(extra_globals)
|
| 702 |
-
|
| 703 |
-
try:
|
| 704 |
-
os.chdir(ROOT_DIR)
|
| 705 |
-
|
| 706 |
-
if ast_mode:
|
| 707 |
-
# Parse and evaluate each statement, printing expression results
|
| 708 |
-
tree = ast.parse(code)
|
| 709 |
-
for node in tree.body:
|
| 710 |
-
if isinstance(node, ast.Expr):
|
| 711 |
-
# Standalone expression - evaluate and print result
|
| 712 |
-
expr = compile(ast.Expression(node.value), filename="<string>", mode="eval")
|
| 713 |
-
result_val = eval(expr, env)
|
| 714 |
-
if result_val is not None:
|
| 715 |
-
print(result_val)
|
| 716 |
-
else:
|
| 717 |
-
# Statement - execute it
|
| 718 |
-
mod = ast.Module(body=[node], type_ignores=[])
|
| 719 |
-
exec(compile(mod, filename="<string>", mode="exec"), env)
|
| 720 |
-
else:
|
| 721 |
-
# Simple exec mode
|
| 722 |
-
exec(code, env)
|
| 723 |
-
|
| 724 |
-
result = redirected_output.getvalue()
|
| 725 |
-
except Exception as exc:
|
| 726 |
-
result = str(exc)
|
| 727 |
-
finally:
|
| 728 |
-
sys.stdout = old_stdout
|
| 729 |
-
try:
|
| 730 |
-
os.chdir(old_cwd)
|
| 731 |
-
except Exception:
|
| 732 |
-
pass
|
| 733 |
-
|
| 734 |
-
return result
|
| 735 |
-
|
| 736 |
-
|
| 737 |
-
# ===========================================================================
|
| 738 |
-
# Part 3: Hugging Face Inference Utilities
|
| 739 |
-
# ===========================================================================
|
| 740 |
-
|
| 741 |
-
|
| 742 |
-
def get_hf_token() -> str | None:
|
| 743 |
-
"""Get the HF API token from environment variables.
|
| 744 |
-
|
| 745 |
-
Checks HF_READ_TOKEN first, then falls back to HF_TOKEN.
|
| 746 |
-
"""
|
| 747 |
-
return os.getenv("HF_READ_TOKEN") or os.getenv("HF_TOKEN")
|
| 748 |
-
|
| 749 |
-
|
| 750 |
-
# Pre-instantiated token for modules that prefer this pattern
|
| 751 |
-
HF_TOKEN = get_hf_token()
|
| 752 |
-
|
| 753 |
-
# Standard provider list for image/video generation
|
| 754 |
-
DEFAULT_PROVIDERS = ["auto", "replicate", "fal-ai"]
|
| 755 |
-
|
| 756 |
-
# Provider list for text generation (Deep Research)
|
| 757 |
-
TEXTGEN_PROVIDERS = ["cerebras", "auto"]
|
| 758 |
-
|
| 759 |
-
|
| 760 |
-
T = TypeVar("T")
|
| 761 |
-
|
| 762 |
-
|
| 763 |
-
def handle_hf_error(msg: str, model_id: str, *, context: str = "generation") -> None:
|
| 764 |
-
"""
|
| 765 |
-
Raise appropriate gr.Error for common HF API error codes.
|
| 766 |
-
|
| 767 |
-
Args:
|
| 768 |
-
msg: Error message string to analyze
|
| 769 |
-
model_id: The model ID being used (for error messages)
|
| 770 |
-
context: Description of operation for error messages
|
| 771 |
-
|
| 772 |
-
Raises:
|
| 773 |
-
gr.Error: With user-friendly message based on error type
|
| 774 |
-
"""
|
| 775 |
-
lowered = msg.lower()
|
| 776 |
-
|
| 777 |
-
if "404" in msg:
|
| 778 |
-
raise gr.Error(f"Model not found or unavailable: {model_id}. Check the id and your HF token access.")
|
| 779 |
-
|
| 780 |
-
if "503" in msg:
|
| 781 |
-
raise gr.Error("The model is warming up. Please try again shortly.")
|
| 782 |
-
|
| 783 |
-
if "401" in msg or "403" in msg:
|
| 784 |
-
raise gr.Error("Please duplicate the space and provide a `HF_READ_TOKEN` to enable Image and Video Generation.")
|
| 785 |
-
|
| 786 |
-
if any(pattern in lowered for pattern in ("api_key", "hf auth login", "unauthorized", "forbidden")):
|
| 787 |
-
raise gr.Error("Please duplicate the space and provide a `HF_READ_TOKEN` to enable Image and Video Generation.")
|
| 788 |
-
|
| 789 |
-
# If none of the known patterns match, raise generic error
|
| 790 |
-
raise gr.Error(f"{context.capitalize()} failed: {msg}")
|
| 791 |
-
|
| 792 |
-
|
| 793 |
-
def invoke_with_fallback(
|
| 794 |
-
fn: Callable[[str], T],
|
| 795 |
-
providers: list[str] | None = None,
|
| 796 |
-
) -> T:
|
| 797 |
-
"""
|
| 798 |
-
Try calling fn(provider) for each provider until one succeeds.
|
| 799 |
-
|
| 800 |
-
Args:
|
| 801 |
-
fn: Function that takes a provider string and returns a result.
|
| 802 |
-
Should raise an exception on failure.
|
| 803 |
-
providers: List of provider strings to try. Defaults to DEFAULT_PROVIDERS.
|
| 804 |
-
|
| 805 |
-
Returns:
|
| 806 |
-
The result from the first successful fn() call.
|
| 807 |
-
|
| 808 |
-
Raises:
|
| 809 |
-
The last exception if all providers fail.
|
| 810 |
-
"""
|
| 811 |
-
if providers is None:
|
| 812 |
-
providers = DEFAULT_PROVIDERS
|
| 813 |
-
|
| 814 |
-
last_error: Exception | None = None
|
| 815 |
-
|
| 816 |
-
for provider in providers:
|
| 817 |
-
try:
|
| 818 |
-
return fn(provider)
|
| 819 |
-
except Exception as exc:
|
| 820 |
-
last_error = exc
|
| 821 |
-
continue
|
| 822 |
-
|
| 823 |
-
# All providers failed
|
| 824 |
-
if last_error:
|
| 825 |
-
raise last_error
|
| 826 |
-
raise RuntimeError("No providers available")
|
| 827 |
-
|
| 828 |
-
|
| 829 |
-
# ===========================================================================
|
| 830 |
-
# Public API
|
| 831 |
-
# ===========================================================================
|
| 832 |
-
|
| 833 |
-
__all__ = [
|
| 834 |
-
# Tree Utils
|
| 835 |
-
"_fmt_size",
|
| 836 |
-
"build_tree",
|
| 837 |
-
"render_tree",
|
| 838 |
-
"walk_and_build_tree",
|
| 839 |
-
"format_dir_listing",
|
| 840 |
-
# Filesystem
|
| 841 |
-
"SandboxedRoot",
|
| 842 |
-
"filesystem_sandbox",
|
| 843 |
-
"obsidian_sandbox",
|
| 844 |
-
"ROOT_DIR",
|
| 845 |
-
"FILESYSTEM_ROOT",
|
| 846 |
-
"OBSIDIAN_ROOT",
|
| 847 |
-
"ALLOW_ABS",
|
| 848 |
-
"_resolve_path",
|
| 849 |
-
"_display_path",
|
| 850 |
-
"safe_open",
|
| 851 |
-
# Execution
|
| 852 |
-
"sandboxed_exec",
|
| 853 |
-
"create_safe_builtins",
|
| 854 |
-
# HF Inference
|
| 855 |
-
"get_hf_token",
|
| 856 |
-
"HF_TOKEN",
|
| 857 |
-
"DEFAULT_PROVIDERS",
|
| 858 |
-
"TEXTGEN_PROVIDERS",
|
| 859 |
-
"handle_hf_error",
|
| 860 |
-
"invoke_with_fallback",
|
| 861 |
-
]
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Core shared utilities for the Nymbo-Tools MCP server.
|
| 3 |
+
|
| 4 |
+
Consolidates three key areas:
|
| 5 |
+
1. Sandboxed filesystem operations (path resolution, reading, writing, safe_open)
|
| 6 |
+
2. Sandboxed Python execution (code interpreter, agent terminal)
|
| 7 |
+
3. Hugging Face inference utilities (token, providers, error handling)
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from __future__ import annotations
|
| 11 |
+
|
| 12 |
+
import ast
|
| 13 |
+
import json
|
| 14 |
+
import os
|
| 15 |
+
import re
|
| 16 |
+
import stat
|
| 17 |
+
import sys
|
| 18 |
+
from datetime import datetime
|
| 19 |
+
from io import StringIO
|
| 20 |
+
from typing import Any, Callable, Optional, TypeVar
|
| 21 |
+
|
| 22 |
+
import gradio as gr
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# ===========================================================================
|
| 26 |
+
# Part 0: Tree Rendering Utilities
|
| 27 |
+
# ===========================================================================
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def _fmt_size(num_bytes: int) -> str:
|
| 31 |
+
"""Format byte size as human-readable string."""
|
| 32 |
+
units = ["B", "KB", "MB", "GB"]
|
| 33 |
+
size = float(num_bytes)
|
| 34 |
+
for unit in units:
|
| 35 |
+
if size < 1024.0:
|
| 36 |
+
return f"{size:.1f} {unit}"
|
| 37 |
+
size /= 1024.0
|
| 38 |
+
return f"{size:.1f} TB"
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def build_tree(entries: list[tuple[str, dict]]) -> dict:
|
| 42 |
+
"""
|
| 43 |
+
Build a nested tree structure from flat path entries.
|
| 44 |
+
|
| 45 |
+
Args:
|
| 46 |
+
entries: List of (path, metadata) tuples where path uses forward slashes.
|
| 47 |
+
Paths ending with '/' are treated as directories.
|
| 48 |
+
|
| 49 |
+
Returns:
|
| 50 |
+
Nested dict with "__files__" key for files at each level.
|
| 51 |
+
"""
|
| 52 |
+
root: dict = {"__files__": []}
|
| 53 |
+
|
| 54 |
+
for path, metadata in entries:
|
| 55 |
+
parts = path.rstrip("/").split("/")
|
| 56 |
+
is_dir = path.endswith("/")
|
| 57 |
+
|
| 58 |
+
node = root
|
| 59 |
+
for i, part in enumerate(parts[:-1]):
|
| 60 |
+
if part not in node:
|
| 61 |
+
node[part] = {"__files__": []}
|
| 62 |
+
node = node[part]
|
| 63 |
+
|
| 64 |
+
final = parts[-1]
|
| 65 |
+
if is_dir:
|
| 66 |
+
if final not in node:
|
| 67 |
+
node[final] = {"__files__": []}
|
| 68 |
+
if metadata:
|
| 69 |
+
node[final]["__meta__"] = metadata
|
| 70 |
+
else:
|
| 71 |
+
node["__files__"].append((final, metadata))
|
| 72 |
+
|
| 73 |
+
return root
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def render_tree(
|
| 77 |
+
node: dict,
|
| 78 |
+
prefix: str = "",
|
| 79 |
+
format_entry: Optional[Callable[[str, dict, bool], str]] = None,
|
| 80 |
+
) -> list[str]:
|
| 81 |
+
"""
|
| 82 |
+
Render a tree with line connectors.
|
| 83 |
+
|
| 84 |
+
Args:
|
| 85 |
+
node: Nested dict from build_tree()
|
| 86 |
+
prefix: Current line prefix for indentation
|
| 87 |
+
format_entry: Optional callback to format each entry.
|
| 88 |
+
|
| 89 |
+
Returns:
|
| 90 |
+
List of formatted lines.
|
| 91 |
+
"""
|
| 92 |
+
result = []
|
| 93 |
+
|
| 94 |
+
def default_format(name: str, meta: dict, is_dir: bool) -> str:
|
| 95 |
+
if is_dir:
|
| 96 |
+
return f"{name}/"
|
| 97 |
+
size = meta.get("size")
|
| 98 |
+
if size is not None:
|
| 99 |
+
return f"{name} ({_fmt_size(size)})"
|
| 100 |
+
return name
|
| 101 |
+
|
| 102 |
+
fmt = format_entry or default_format
|
| 103 |
+
|
| 104 |
+
entries = []
|
| 105 |
+
subdirs = sorted(k for k in node.keys() if k not in ("__files__", "__meta__"))
|
| 106 |
+
files_here = sorted(node.get("__files__", []), key=lambda x: x[0])
|
| 107 |
+
|
| 108 |
+
for dirname in subdirs:
|
| 109 |
+
dir_meta = node[dirname].get("__meta__", {})
|
| 110 |
+
entries.append(("dir", dirname, node[dirname], dir_meta))
|
| 111 |
+
for fname, fmeta in files_here:
|
| 112 |
+
entries.append(("file", fname, None, fmeta))
|
| 113 |
+
|
| 114 |
+
for i, entry in enumerate(entries):
|
| 115 |
+
is_last = (i == len(entries) - 1)
|
| 116 |
+
connector = "└── " if is_last else "├── "
|
| 117 |
+
child_prefix = prefix + (" " if is_last else "│ ")
|
| 118 |
+
|
| 119 |
+
etype, name, subtree, meta = entry
|
| 120 |
+
|
| 121 |
+
if etype == "dir":
|
| 122 |
+
result.append(f"{prefix}{connector}{fmt(name, meta, True)}")
|
| 123 |
+
result.extend(render_tree(subtree, child_prefix, format_entry))
|
| 124 |
+
else:
|
| 125 |
+
result.append(f"{prefix}{connector}{fmt(name, meta, False)}")
|
| 126 |
+
|
| 127 |
+
return result
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def walk_and_build_tree(
|
| 131 |
+
abs_path: str,
|
| 132 |
+
*,
|
| 133 |
+
show_hidden: bool = False,
|
| 134 |
+
recursive: bool = False,
|
| 135 |
+
max_entries: int = 100,
|
| 136 |
+
) -> tuple[dict, int, bool]:
|
| 137 |
+
"""
|
| 138 |
+
Walk a directory and build a tree structure.
|
| 139 |
+
|
| 140 |
+
Returns:
|
| 141 |
+
(tree, total_entries, truncated)
|
| 142 |
+
"""
|
| 143 |
+
entries: list[tuple[str, dict]] = []
|
| 144 |
+
total = 0
|
| 145 |
+
truncated = False
|
| 146 |
+
|
| 147 |
+
for root, dirs, files in os.walk(abs_path):
|
| 148 |
+
if not show_hidden:
|
| 149 |
+
dirs[:] = [d for d in dirs if not d.startswith('.')]
|
| 150 |
+
files = [f for f in files if not f.startswith('.')]
|
| 151 |
+
|
| 152 |
+
dirs.sort()
|
| 153 |
+
files.sort()
|
| 154 |
+
|
| 155 |
+
try:
|
| 156 |
+
rel_root = os.path.relpath(root, abs_path)
|
| 157 |
+
except Exception:
|
| 158 |
+
rel_root = ""
|
| 159 |
+
prefix = "" if rel_root == "." else rel_root.replace("\\", "/") + "/"
|
| 160 |
+
|
| 161 |
+
for d in dirs:
|
| 162 |
+
p = os.path.join(root, d)
|
| 163 |
+
try:
|
| 164 |
+
mtime = datetime.fromtimestamp(os.path.getmtime(p)).strftime("%Y-%m-%d %H:%M")
|
| 165 |
+
except Exception:
|
| 166 |
+
mtime = "?"
|
| 167 |
+
entries.append((f"{prefix}{d}/", {"mtime": mtime}))
|
| 168 |
+
total += 1
|
| 169 |
+
if total >= max_entries:
|
| 170 |
+
truncated = True
|
| 171 |
+
break
|
| 172 |
+
|
| 173 |
+
if truncated:
|
| 174 |
+
break
|
| 175 |
+
|
| 176 |
+
for f in files:
|
| 177 |
+
p = os.path.join(root, f)
|
| 178 |
+
try:
|
| 179 |
+
size = os.path.getsize(p)
|
| 180 |
+
mtime = datetime.fromtimestamp(os.path.getmtime(p)).strftime("%Y-%m-%d %H:%M")
|
| 181 |
+
except Exception:
|
| 182 |
+
size, mtime = 0, "?"
|
| 183 |
+
entries.append((f"{prefix}{f}", {"size": size, "mtime": mtime}))
|
| 184 |
+
total += 1
|
| 185 |
+
if total >= max_entries:
|
| 186 |
+
truncated = True
|
| 187 |
+
break
|
| 188 |
+
|
| 189 |
+
if truncated:
|
| 190 |
+
break
|
| 191 |
+
|
| 192 |
+
if not recursive:
|
| 193 |
+
break
|
| 194 |
+
|
| 195 |
+
return build_tree(entries), total, truncated
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
def format_dir_listing(
|
| 199 |
+
abs_path: str,
|
| 200 |
+
display_path: str,
|
| 201 |
+
*,
|
| 202 |
+
show_hidden: bool = False,
|
| 203 |
+
recursive: bool = False,
|
| 204 |
+
max_entries: int = 100,
|
| 205 |
+
fmt_size_fn: Optional[Callable[[int], str]] = None,
|
| 206 |
+
) -> str:
|
| 207 |
+
"""Format a directory listing as a visual tree."""
|
| 208 |
+
fmt_size = fmt_size_fn or _fmt_size
|
| 209 |
+
|
| 210 |
+
tree, total, truncated = walk_and_build_tree(
|
| 211 |
+
abs_path,
|
| 212 |
+
show_hidden=show_hidden,
|
| 213 |
+
recursive=recursive,
|
| 214 |
+
max_entries=max_entries,
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
def format_entry(name: str, meta: dict, is_dir: bool) -> str:
|
| 218 |
+
mtime = meta.get("mtime", "")
|
| 219 |
+
if is_dir:
|
| 220 |
+
return f"{name}/ ({mtime})"
|
| 221 |
+
size = meta.get("size", 0)
|
| 222 |
+
return f"{name} ({fmt_size(size)}, {mtime})"
|
| 223 |
+
|
| 224 |
+
tree_lines = render_tree(tree, " ", format_entry)
|
| 225 |
+
|
| 226 |
+
header = f"Listing of {display_path}\nRoot: /\nEntries: {total}"
|
| 227 |
+
if truncated:
|
| 228 |
+
header += f"\n… Truncated at {max_entries} entries."
|
| 229 |
+
|
| 230 |
+
lines = [header, "", "└── /"]
|
| 231 |
+
lines.extend(tree_lines)
|
| 232 |
+
|
| 233 |
+
return "\n".join(lines).strip()
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
# ===========================================================================
|
| 237 |
+
# Part 1: Sandboxed Filesystem Operations
|
| 238 |
+
# ===========================================================================
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
class SandboxedRoot:
|
| 242 |
+
"""
|
| 243 |
+
A configurable sandboxed root directory with path resolution and safety checks.
|
| 244 |
+
|
| 245 |
+
Args:
|
| 246 |
+
root_dir: Absolute path to the sandbox root.
|
| 247 |
+
allow_abs: If True, allow absolute paths outside the sandbox.
|
| 248 |
+
"""
|
| 249 |
+
|
| 250 |
+
def __init__(self, root_dir: str, allow_abs: bool = False):
|
| 251 |
+
self.root_dir = os.path.abspath(root_dir)
|
| 252 |
+
self.allow_abs = allow_abs
|
| 253 |
+
# Ensure root exists
|
| 254 |
+
try:
|
| 255 |
+
os.makedirs(self.root_dir, exist_ok=True)
|
| 256 |
+
except Exception:
|
| 257 |
+
pass
|
| 258 |
+
|
| 259 |
+
def safe_err(self, exc: Exception | str) -> str:
|
| 260 |
+
"""Return an error string with any absolute root replaced by '/' and slashes normalized."""
|
| 261 |
+
s = str(exc)
|
| 262 |
+
s_norm = s.replace("\\", "/")
|
| 263 |
+
root_fwd = self.root_dir.replace("\\", "/")
|
| 264 |
+
root_variants = {self.root_dir, root_fwd, re.sub(r"/+", "/", root_fwd)}
|
| 265 |
+
for variant in root_variants:
|
| 266 |
+
if variant:
|
| 267 |
+
s_norm = s_norm.replace(variant, "/")
|
| 268 |
+
s_norm = re.sub(r"/+", "/", s_norm)
|
| 269 |
+
return s_norm
|
| 270 |
+
|
| 271 |
+
def err(
|
| 272 |
+
self,
|
| 273 |
+
code: str,
|
| 274 |
+
message: str,
|
| 275 |
+
*,
|
| 276 |
+
path: Optional[str] = None,
|
| 277 |
+
hint: Optional[str] = None,
|
| 278 |
+
data: Optional[dict] = None,
|
| 279 |
+
) -> str:
|
| 280 |
+
"""Return a structured error JSON string."""
|
| 281 |
+
payload = {
|
| 282 |
+
"status": "error",
|
| 283 |
+
"code": code,
|
| 284 |
+
"message": message,
|
| 285 |
+
"root": "/",
|
| 286 |
+
}
|
| 287 |
+
if path is not None and path != "":
|
| 288 |
+
payload["path"] = path
|
| 289 |
+
if hint:
|
| 290 |
+
payload["hint"] = hint
|
| 291 |
+
if data:
|
| 292 |
+
payload["data"] = data
|
| 293 |
+
return json.dumps(payload, ensure_ascii=False)
|
| 294 |
+
|
| 295 |
+
def display_path(self, abs_path: str) -> str:
|
| 296 |
+
"""Return a user-friendly path relative to root using forward slashes."""
|
| 297 |
+
try:
|
| 298 |
+
norm_root = os.path.normpath(self.root_dir)
|
| 299 |
+
norm_abs = os.path.normpath(abs_path)
|
| 300 |
+
common = os.path.commonpath([norm_root, norm_abs])
|
| 301 |
+
if os.path.normcase(common) == os.path.normcase(norm_root):
|
| 302 |
+
rel = os.path.relpath(norm_abs, norm_root)
|
| 303 |
+
if rel == ".":
|
| 304 |
+
return "/"
|
| 305 |
+
return "/" + rel.replace("\\", "/")
|
| 306 |
+
except Exception:
|
| 307 |
+
pass
|
| 308 |
+
return abs_path.replace("\\", "/")
|
| 309 |
+
|
| 310 |
+
def resolve_path(self, path: str) -> tuple[str, str]:
|
| 311 |
+
"""
|
| 312 |
+
Resolve a user-provided path to an absolute, normalized path constrained to root.
|
| 313 |
+
Returns (abs_path, error_message). error_message is empty when ok.
|
| 314 |
+
"""
|
| 315 |
+
try:
|
| 316 |
+
user_input = (path or "/").strip() or "/"
|
| 317 |
+
if user_input.startswith("/"):
|
| 318 |
+
rel_part = user_input.lstrip("/") or "."
|
| 319 |
+
raw = os.path.expanduser(rel_part)
|
| 320 |
+
treat_as_relative = True
|
| 321 |
+
else:
|
| 322 |
+
raw = os.path.expanduser(user_input)
|
| 323 |
+
treat_as_relative = False
|
| 324 |
+
|
| 325 |
+
if not treat_as_relative and os.path.isabs(raw):
|
| 326 |
+
if not self.allow_abs:
|
| 327 |
+
return "", self.err(
|
| 328 |
+
"absolute_path_disabled",
|
| 329 |
+
"Absolute paths are disabled in safe mode.",
|
| 330 |
+
path=raw.replace("\\", "/"),
|
| 331 |
+
hint="Use a path relative to / (e.g., /notes/todo.txt).",
|
| 332 |
+
)
|
| 333 |
+
abs_path = os.path.abspath(raw)
|
| 334 |
+
else:
|
| 335 |
+
abs_path = os.path.abspath(os.path.join(self.root_dir, raw))
|
| 336 |
+
|
| 337 |
+
# Constrain to root when not allowing absolute paths
|
| 338 |
+
if not self.allow_abs:
|
| 339 |
+
try:
|
| 340 |
+
common = os.path.commonpath(
|
| 341 |
+
[os.path.normpath(self.root_dir), os.path.normpath(abs_path)]
|
| 342 |
+
)
|
| 343 |
+
if common != os.path.normpath(self.root_dir):
|
| 344 |
+
return "", self.err(
|
| 345 |
+
"path_outside_root",
|
| 346 |
+
"Path is outside the sandbox root.",
|
| 347 |
+
path=abs_path,
|
| 348 |
+
)
|
| 349 |
+
except Exception:
|
| 350 |
+
return "", self.err(
|
| 351 |
+
"path_outside_root",
|
| 352 |
+
"Path is outside the sandbox root.",
|
| 353 |
+
path=abs_path,
|
| 354 |
+
)
|
| 355 |
+
|
| 356 |
+
return abs_path, ""
|
| 357 |
+
except Exception as exc:
|
| 358 |
+
return "", self.err(
|
| 359 |
+
"resolve_path_failed",
|
| 360 |
+
"Failed to resolve path.",
|
| 361 |
+
path=(path or ""),
|
| 362 |
+
data={"error": self.safe_err(exc)},
|
| 363 |
+
)
|
| 364 |
+
|
| 365 |
+
def safe_open(self, file, *args, **kwargs):
|
| 366 |
+
"""A drop-in replacement for open() that enforces sandbox constraints."""
|
| 367 |
+
if isinstance(file, int):
|
| 368 |
+
return open(file, *args, **kwargs)
|
| 369 |
+
|
| 370 |
+
path_str = os.fspath(file)
|
| 371 |
+
abs_path, err = self.resolve_path(path_str)
|
| 372 |
+
if err:
|
| 373 |
+
try:
|
| 374 |
+
msg = json.loads(err)["message"]
|
| 375 |
+
except Exception:
|
| 376 |
+
msg = err
|
| 377 |
+
raise PermissionError(f"Sandboxed open() failed: {msg}")
|
| 378 |
+
|
| 379 |
+
return open(abs_path, *args, **kwargs)
|
| 380 |
+
|
| 381 |
+
def list_dir(
|
| 382 |
+
self,
|
| 383 |
+
abs_path: str,
|
| 384 |
+
*,
|
| 385 |
+
show_hidden: bool = False,
|
| 386 |
+
recursive: bool = False,
|
| 387 |
+
max_entries: int = 100,
|
| 388 |
+
) -> str:
|
| 389 |
+
"""List directory contents as a visual tree."""
|
| 390 |
+
return format_dir_listing(
|
| 391 |
+
abs_path,
|
| 392 |
+
self.display_path(abs_path),
|
| 393 |
+
show_hidden=show_hidden,
|
| 394 |
+
recursive=recursive,
|
| 395 |
+
max_entries=max_entries,
|
| 396 |
+
fmt_size_fn=_fmt_size,
|
| 397 |
+
)
|
| 398 |
+
|
| 399 |
+
def search_text(
|
| 400 |
+
self,
|
| 401 |
+
abs_path: str,
|
| 402 |
+
query: str,
|
| 403 |
+
*,
|
| 404 |
+
recursive: bool = False,
|
| 405 |
+
show_hidden: bool = False,
|
| 406 |
+
max_results: int = 20,
|
| 407 |
+
case_sensitive: bool = False,
|
| 408 |
+
start_index: int = 0,
|
| 409 |
+
) -> str:
|
| 410 |
+
"""Search for text within files."""
|
| 411 |
+
if not os.path.exists(abs_path):
|
| 412 |
+
return self.err(
|
| 413 |
+
"path_not_found",
|
| 414 |
+
f"Path not found: {self.display_path(abs_path)}",
|
| 415 |
+
path=self.display_path(abs_path),
|
| 416 |
+
)
|
| 417 |
+
|
| 418 |
+
query = query or ""
|
| 419 |
+
normalized_query = query if case_sensitive else query.lower()
|
| 420 |
+
if normalized_query == "":
|
| 421 |
+
return self.err(
|
| 422 |
+
"missing_search_query",
|
| 423 |
+
"Search query is required for the search action.",
|
| 424 |
+
hint="Provide text in the Content field to search for.",
|
| 425 |
+
)
|
| 426 |
+
|
| 427 |
+
max_results = max(1, int(max_results) if max_results is not None else 20)
|
| 428 |
+
start_index = max(0, int(start_index) if start_index is not None else 0)
|
| 429 |
+
matches: list[tuple[str, int, str]] = []
|
| 430 |
+
errors: list[str] = []
|
| 431 |
+
files_scanned = 0
|
| 432 |
+
truncated = False
|
| 433 |
+
total_matches = 0
|
| 434 |
+
|
| 435 |
+
def _should_skip(name: str) -> bool:
|
| 436 |
+
return not show_hidden and name.startswith(".")
|
| 437 |
+
|
| 438 |
+
def _handle_match(file_path: str, line_no: int, line_text: str) -> bool:
|
| 439 |
+
nonlocal truncated, total_matches
|
| 440 |
+
total_matches += 1
|
| 441 |
+
if total_matches <= start_index:
|
| 442 |
+
return False
|
| 443 |
+
if len(matches) < max_results:
|
| 444 |
+
snippet = line_text.strip()
|
| 445 |
+
if len(snippet) > 200:
|
| 446 |
+
snippet = snippet[:197] + "…"
|
| 447 |
+
matches.append((self.display_path(file_path), line_no, snippet))
|
| 448 |
+
return False
|
| 449 |
+
truncated = True
|
| 450 |
+
return True
|
| 451 |
+
|
| 452 |
+
def _search_file(file_path: str) -> bool:
|
| 453 |
+
nonlocal files_scanned
|
| 454 |
+
files_scanned += 1
|
| 455 |
+
try:
|
| 456 |
+
with open(file_path, "r", encoding="utf-8", errors="replace") as handle:
|
| 457 |
+
for line_no, line in enumerate(handle, start=1):
|
| 458 |
+
haystack = line if case_sensitive else line.lower()
|
| 459 |
+
if normalized_query in haystack:
|
| 460 |
+
if _handle_match(file_path, line_no, line):
|
| 461 |
+
return True
|
| 462 |
+
except Exception as exc:
|
| 463 |
+
errors.append(f"{self.display_path(file_path)} ({self.safe_err(exc)})")
|
| 464 |
+
return truncated
|
| 465 |
+
|
| 466 |
+
if os.path.isfile(abs_path):
|
| 467 |
+
_search_file(abs_path)
|
| 468 |
+
else:
|
| 469 |
+
for root, dirs, files in os.walk(abs_path):
|
| 470 |
+
dirs[:] = [d for d in dirs if not _should_skip(d)]
|
| 471 |
+
visible_files = [f for f in files if show_hidden or not f.startswith(".")]
|
| 472 |
+
for name in visible_files:
|
| 473 |
+
file_path = os.path.join(root, name)
|
| 474 |
+
if _search_file(file_path):
|
| 475 |
+
break
|
| 476 |
+
if truncated:
|
| 477 |
+
break
|
| 478 |
+
if not recursive:
|
| 479 |
+
break
|
| 480 |
+
|
| 481 |
+
header_lines = [
|
| 482 |
+
f"Search results for {query!r}",
|
| 483 |
+
f"Scope: {self.display_path(abs_path)}",
|
| 484 |
+
f"Recursive: {'yes' if recursive else 'no'}, Hidden: {'yes' if show_hidden else 'no'}, Case-sensitive: {'yes' if case_sensitive else 'no'}",
|
| 485 |
+
f"Start offset: {start_index}",
|
| 486 |
+
f"Matches returned: {len(matches)}" + (" (truncated)" if truncated else ""),
|
| 487 |
+
f"Files scanned: {files_scanned}",
|
| 488 |
+
]
|
| 489 |
+
|
| 490 |
+
next_cursor = start_index + len(matches) if truncated else None
|
| 491 |
+
|
| 492 |
+
if truncated:
|
| 493 |
+
header_lines.append(f"Matches encountered before truncation: {total_matches}")
|
| 494 |
+
header_lines.append(f"Truncated: yes — re-run with offset={next_cursor} to continue.")
|
| 495 |
+
header_lines.append(f"Next cursor: {next_cursor}")
|
| 496 |
+
else:
|
| 497 |
+
header_lines.append(f"Total matches found: {total_matches}")
|
| 498 |
+
header_lines.append("Truncated: no — end of results.")
|
| 499 |
+
header_lines.append("Next cursor: None")
|
| 500 |
+
|
| 501 |
+
if not matches:
|
| 502 |
+
if total_matches > 0 and start_index >= total_matches:
|
| 503 |
+
hint_limit = max(total_matches - 1, 0)
|
| 504 |
+
body_lines = [
|
| 505 |
+
f"No matches found at or after offset {start_index}. Total matches available: {total_matches}.",
|
| 506 |
+
(f"Try a smaller offset (≤ {hint_limit})." if hint_limit >= 0 else ""),
|
| 507 |
+
]
|
| 508 |
+
body_lines = [line for line in body_lines if line]
|
| 509 |
+
else:
|
| 510 |
+
body_lines = [
|
| 511 |
+
"No matches found.",
|
| 512 |
+
(f"Total matches encountered: {total_matches}." if total_matches else ""),
|
| 513 |
+
]
|
| 514 |
+
body_lines = [line for line in body_lines if line]
|
| 515 |
+
else:
|
| 516 |
+
body_lines = [
|
| 517 |
+
f"{idx}. {path}:{line_no}: {text}"
|
| 518 |
+
for idx, (path, line_no, text) in enumerate(matches, start=1)
|
| 519 |
+
]
|
| 520 |
+
|
| 521 |
+
if errors:
|
| 522 |
+
shown = errors[:5]
|
| 523 |
+
body_lines.extend(["", "Warnings:"])
|
| 524 |
+
body_lines.extend(shown)
|
| 525 |
+
if len(errors) > len(shown):
|
| 526 |
+
body_lines.append(f"… {len(errors) - len(shown)} additional files could not be read.")
|
| 527 |
+
|
| 528 |
+
return "\n".join(header_lines) + "\n\n" + "\n".join(body_lines)
|
| 529 |
+
|
| 530 |
+
def read_file(self, abs_path: str, *, offset: int = 0, max_chars: int = 4000) -> str:
|
| 531 |
+
"""Read file contents with optional offset and character limit."""
|
| 532 |
+
if not os.path.exists(abs_path):
|
| 533 |
+
return self.err(
|
| 534 |
+
"file_not_found",
|
| 535 |
+
f"File not found: {self.display_path(abs_path)}",
|
| 536 |
+
path=self.display_path(abs_path),
|
| 537 |
+
)
|
| 538 |
+
if os.path.isdir(abs_path):
|
| 539 |
+
return self.err(
|
| 540 |
+
"is_directory",
|
| 541 |
+
f"Path is a directory, not a file: {self.display_path(abs_path)}",
|
| 542 |
+
path=self.display_path(abs_path),
|
| 543 |
+
hint="Provide a file path.",
|
| 544 |
+
)
|
| 545 |
+
try:
|
| 546 |
+
with open(abs_path, "r", encoding="utf-8", errors="replace") as f:
|
| 547 |
+
data = f.read()
|
| 548 |
+
except Exception as exc:
|
| 549 |
+
return self.err(
|
| 550 |
+
"read_failed",
|
| 551 |
+
"Failed to read file.",
|
| 552 |
+
path=self.display_path(abs_path),
|
| 553 |
+
data={"error": self.safe_err(exc)},
|
| 554 |
+
)
|
| 555 |
+
total = len(data)
|
| 556 |
+
start = max(0, min(offset, total))
|
| 557 |
+
if max_chars > 0:
|
| 558 |
+
end = min(total, start + max_chars)
|
| 559 |
+
else:
|
| 560 |
+
end = total
|
| 561 |
+
chunk = data[start:end]
|
| 562 |
+
next_cursor = end if end < total else None
|
| 563 |
+
header = (
|
| 564 |
+
f"Reading {self.display_path(abs_path)}\n"
|
| 565 |
+
f"Offset {start}, returned {len(chunk)} of {total}."
|
| 566 |
+
+ (f"\nNext cursor: {next_cursor}" if next_cursor is not None else "")
|
| 567 |
+
)
|
| 568 |
+
sep = "\n\n---\n\n"
|
| 569 |
+
return header + sep + chunk
|
| 570 |
+
|
| 571 |
+
def info(self, abs_path: str) -> str:
|
| 572 |
+
"""Get file/directory metadata as JSON."""
|
| 573 |
+
try:
|
| 574 |
+
st = os.stat(abs_path)
|
| 575 |
+
except Exception as exc:
|
| 576 |
+
return self.err(
|
| 577 |
+
"stat_failed",
|
| 578 |
+
"Failed to stat path.",
|
| 579 |
+
path=self.display_path(abs_path),
|
| 580 |
+
data={"error": self.safe_err(exc)},
|
| 581 |
+
)
|
| 582 |
+
info_dict = {
|
| 583 |
+
"path": self.display_path(abs_path),
|
| 584 |
+
"type": "directory" if stat.S_ISDIR(st.st_mode) else "file",
|
| 585 |
+
"size": st.st_size,
|
| 586 |
+
"modified": datetime.fromtimestamp(st.st_mtime).isoformat(sep=" ", timespec="seconds"),
|
| 587 |
+
"created": datetime.fromtimestamp(st.st_ctime).isoformat(sep=" ", timespec="seconds"),
|
| 588 |
+
"mode": oct(st.st_mode),
|
| 589 |
+
"root": "/",
|
| 590 |
+
}
|
| 591 |
+
return json.dumps(info_dict, indent=2)
|
| 592 |
+
|
| 593 |
+
|
| 594 |
+
# ---------------------------------------------------------------------------
|
| 595 |
+
# Default roots (can be overridden by environment variables)
|
| 596 |
+
# ---------------------------------------------------------------------------
|
| 597 |
+
|
| 598 |
+
def _get_filesystem_root() -> str:
|
| 599 |
+
"""Get the default filesystem root directory."""
|
| 600 |
+
root = os.getenv("NYMBO_TOOLS_ROOT")
|
| 601 |
+
if root and root.strip():
|
| 602 |
+
return os.path.abspath(os.path.expanduser(root.strip()))
|
| 603 |
+
try:
|
| 604 |
+
here = os.path.abspath(__file__)
|
| 605 |
+
tools_dir = os.path.dirname(os.path.dirname(here))
|
| 606 |
+
return os.path.abspath(os.path.join(tools_dir, "Filesystem"))
|
| 607 |
+
except Exception:
|
| 608 |
+
return os.path.abspath(os.getcwd())
|
| 609 |
+
|
| 610 |
+
|
| 611 |
+
def _get_obsidian_root() -> str:
|
| 612 |
+
"""Get the default Obsidian vault root directory."""
|
| 613 |
+
env_root = os.getenv("OBSIDIAN_VAULT_ROOT")
|
| 614 |
+
if env_root and env_root.strip():
|
| 615 |
+
return os.path.abspath(os.path.expanduser(env_root.strip()))
|
| 616 |
+
try:
|
| 617 |
+
here = os.path.abspath(__file__)
|
| 618 |
+
tools_dir = os.path.dirname(os.path.dirname(here))
|
| 619 |
+
return os.path.abspath(os.path.join(tools_dir, "Obsidian"))
|
| 620 |
+
except Exception:
|
| 621 |
+
return os.path.abspath(os.getcwd())
|
| 622 |
+
|
| 623 |
+
|
| 624 |
+
# Pre-configured sandbox instances
|
| 625 |
+
ALLOW_ABS = bool(int(os.getenv("UNSAFE_ALLOW_ABS_PATHS", "0")))
|
| 626 |
+
|
| 627 |
+
FILESYSTEM_ROOT = _get_filesystem_root()
|
| 628 |
+
OBSIDIAN_ROOT = _get_obsidian_root()
|
| 629 |
+
|
| 630 |
+
# Default sandbox for /Filesystem (used by most tools)
|
| 631 |
+
filesystem_sandbox = SandboxedRoot(FILESYSTEM_ROOT, allow_abs=ALLOW_ABS)
|
| 632 |
+
|
| 633 |
+
# Sandbox for /Obsidian vault
|
| 634 |
+
obsidian_sandbox = SandboxedRoot(OBSIDIAN_ROOT, allow_abs=ALLOW_ABS)
|
| 635 |
+
|
| 636 |
+
|
| 637 |
+
# Convenience exports (for backward compatibility)
|
| 638 |
+
ROOT_DIR = FILESYSTEM_ROOT
|
| 639 |
+
|
| 640 |
+
def _resolve_path(path: str) -> tuple[str, str]:
|
| 641 |
+
"""Resolve path using the default filesystem sandbox."""
|
| 642 |
+
return filesystem_sandbox.resolve_path(path)
|
| 643 |
+
|
| 644 |
+
def _display_path(abs_path: str) -> str:
|
| 645 |
+
"""Display path using the default filesystem sandbox."""
|
| 646 |
+
return filesystem_sandbox.display_path(abs_path)
|
| 647 |
+
|
| 648 |
+
def safe_open(file, *args, **kwargs):
|
| 649 |
+
"""Open file using the default filesystem sandbox."""
|
| 650 |
+
return filesystem_sandbox.safe_open(file, *args, **kwargs)
|
| 651 |
+
|
| 652 |
+
|
| 653 |
+
# ===========================================================================
|
| 654 |
+
# Part 2: Sandboxed Python Execution
|
| 655 |
+
# ===========================================================================
|
| 656 |
+
|
| 657 |
+
|
| 658 |
+
def create_safe_builtins() -> dict:
|
| 659 |
+
"""Create a builtins dict with sandboxed open()."""
|
| 660 |
+
if isinstance(__builtins__, dict):
|
| 661 |
+
safe_builtins = __builtins__.copy()
|
| 662 |
+
else:
|
| 663 |
+
safe_builtins = vars(__builtins__).copy()
|
| 664 |
+
safe_builtins["open"] = safe_open
|
| 665 |
+
return safe_builtins
|
| 666 |
+
|
| 667 |
+
|
| 668 |
+
def sandboxed_exec(
|
| 669 |
+
code: str,
|
| 670 |
+
*,
|
| 671 |
+
extra_globals: dict[str, Any] | None = None,
|
| 672 |
+
ast_mode: bool = False,
|
| 673 |
+
) -> str:
|
| 674 |
+
"""
|
| 675 |
+
Execute Python code in a sandboxed environment.
|
| 676 |
+
|
| 677 |
+
Args:
|
| 678 |
+
code: Python source code to execute
|
| 679 |
+
extra_globals: Additional globals to inject (e.g., tools)
|
| 680 |
+
ast_mode: If True, parse and print results of all expression statements
|
| 681 |
+
(like Agent_Terminal). If False, simple exec (like Code_Interpreter).
|
| 682 |
+
|
| 683 |
+
Returns:
|
| 684 |
+
Captured stdout output, or exception text on error.
|
| 685 |
+
"""
|
| 686 |
+
if not code:
|
| 687 |
+
return "No code provided."
|
| 688 |
+
|
| 689 |
+
old_stdout = sys.stdout
|
| 690 |
+
old_cwd = os.getcwd()
|
| 691 |
+
redirected_output = sys.stdout = StringIO()
|
| 692 |
+
|
| 693 |
+
# Build execution environment
|
| 694 |
+
safe_builtins = create_safe_builtins()
|
| 695 |
+
env: dict[str, Any] = {
|
| 696 |
+
"open": safe_open,
|
| 697 |
+
"__builtins__": safe_builtins,
|
| 698 |
+
"print": print,
|
| 699 |
+
}
|
| 700 |
+
if extra_globals:
|
| 701 |
+
env.update(extra_globals)
|
| 702 |
+
|
| 703 |
+
try:
|
| 704 |
+
os.chdir(ROOT_DIR)
|
| 705 |
+
|
| 706 |
+
if ast_mode:
|
| 707 |
+
# Parse and evaluate each statement, printing expression results
|
| 708 |
+
tree = ast.parse(code)
|
| 709 |
+
for node in tree.body:
|
| 710 |
+
if isinstance(node, ast.Expr):
|
| 711 |
+
# Standalone expression - evaluate and print result
|
| 712 |
+
expr = compile(ast.Expression(node.value), filename="<string>", mode="eval")
|
| 713 |
+
result_val = eval(expr, env)
|
| 714 |
+
if result_val is not None:
|
| 715 |
+
print(result_val)
|
| 716 |
+
else:
|
| 717 |
+
# Statement - execute it
|
| 718 |
+
mod = ast.Module(body=[node], type_ignores=[])
|
| 719 |
+
exec(compile(mod, filename="<string>", mode="exec"), env)
|
| 720 |
+
else:
|
| 721 |
+
# Simple exec mode
|
| 722 |
+
exec(code, env)
|
| 723 |
+
|
| 724 |
+
result = redirected_output.getvalue()
|
| 725 |
+
except Exception as exc:
|
| 726 |
+
result = str(exc)
|
| 727 |
+
finally:
|
| 728 |
+
sys.stdout = old_stdout
|
| 729 |
+
try:
|
| 730 |
+
os.chdir(old_cwd)
|
| 731 |
+
except Exception:
|
| 732 |
+
pass
|
| 733 |
+
|
| 734 |
+
return result
|
| 735 |
+
|
| 736 |
+
|
| 737 |
+
# ===========================================================================
|
| 738 |
+
# Part 3: Hugging Face Inference Utilities
|
| 739 |
+
# ===========================================================================
|
| 740 |
+
|
| 741 |
+
|
| 742 |
+
def get_hf_token() -> str | None:
|
| 743 |
+
"""Get the HF API token from environment variables.
|
| 744 |
+
|
| 745 |
+
Checks HF_READ_TOKEN first, then falls back to HF_TOKEN.
|
| 746 |
+
"""
|
| 747 |
+
return os.getenv("HF_READ_TOKEN") or os.getenv("HF_TOKEN")
|
| 748 |
+
|
| 749 |
+
|
| 750 |
+
# Pre-instantiated token for modules that prefer this pattern
|
| 751 |
+
HF_TOKEN = get_hf_token()
|
| 752 |
+
|
| 753 |
+
# Standard provider list for image/video generation
|
| 754 |
+
DEFAULT_PROVIDERS = ["auto", "replicate", "fal-ai"]
|
| 755 |
+
|
| 756 |
+
# Provider list for text generation (Deep Research)
|
| 757 |
+
TEXTGEN_PROVIDERS = ["cerebras", "auto"]
|
| 758 |
+
|
| 759 |
+
|
| 760 |
+
T = TypeVar("T")
|
| 761 |
+
|
| 762 |
+
|
| 763 |
+
def handle_hf_error(msg: str, model_id: str, *, context: str = "generation") -> None:
|
| 764 |
+
"""
|
| 765 |
+
Raise appropriate gr.Error for common HF API error codes.
|
| 766 |
+
|
| 767 |
+
Args:
|
| 768 |
+
msg: Error message string to analyze
|
| 769 |
+
model_id: The model ID being used (for error messages)
|
| 770 |
+
context: Description of operation for error messages
|
| 771 |
+
|
| 772 |
+
Raises:
|
| 773 |
+
gr.Error: With user-friendly message based on error type
|
| 774 |
+
"""
|
| 775 |
+
lowered = msg.lower()
|
| 776 |
+
|
| 777 |
+
if "404" in msg:
|
| 778 |
+
raise gr.Error(f"Model not found or unavailable: {model_id}. Check the id and your HF token access.")
|
| 779 |
+
|
| 780 |
+
if "503" in msg:
|
| 781 |
+
raise gr.Error("The model is warming up. Please try again shortly.")
|
| 782 |
+
|
| 783 |
+
if "401" in msg or "403" in msg:
|
| 784 |
+
raise gr.Error("Please duplicate the space and provide a `HF_READ_TOKEN` to enable Image and Video Generation.")
|
| 785 |
+
|
| 786 |
+
if any(pattern in lowered for pattern in ("api_key", "hf auth login", "unauthorized", "forbidden")):
|
| 787 |
+
raise gr.Error("Please duplicate the space and provide a `HF_READ_TOKEN` to enable Image and Video Generation.")
|
| 788 |
+
|
| 789 |
+
# If none of the known patterns match, raise generic error
|
| 790 |
+
raise gr.Error(f"{context.capitalize()} failed: {msg}")
|
| 791 |
+
|
| 792 |
+
|
| 793 |
+
def invoke_with_fallback(
|
| 794 |
+
fn: Callable[[str], T],
|
| 795 |
+
providers: list[str] | None = None,
|
| 796 |
+
) -> T:
|
| 797 |
+
"""
|
| 798 |
+
Try calling fn(provider) for each provider until one succeeds.
|
| 799 |
+
|
| 800 |
+
Args:
|
| 801 |
+
fn: Function that takes a provider string and returns a result.
|
| 802 |
+
Should raise an exception on failure.
|
| 803 |
+
providers: List of provider strings to try. Defaults to DEFAULT_PROVIDERS.
|
| 804 |
+
|
| 805 |
+
Returns:
|
| 806 |
+
The result from the first successful fn() call.
|
| 807 |
+
|
| 808 |
+
Raises:
|
| 809 |
+
The last exception if all providers fail.
|
| 810 |
+
"""
|
| 811 |
+
if providers is None:
|
| 812 |
+
providers = DEFAULT_PROVIDERS
|
| 813 |
+
|
| 814 |
+
last_error: Exception | None = None
|
| 815 |
+
|
| 816 |
+
for provider in providers:
|
| 817 |
+
try:
|
| 818 |
+
return fn(provider)
|
| 819 |
+
except Exception as exc:
|
| 820 |
+
last_error = exc
|
| 821 |
+
continue
|
| 822 |
+
|
| 823 |
+
# All providers failed
|
| 824 |
+
if last_error:
|
| 825 |
+
raise last_error
|
| 826 |
+
raise RuntimeError("No providers available")
|
| 827 |
+
|
| 828 |
+
|
| 829 |
+
# ===========================================================================
|
| 830 |
+
# Public API
|
| 831 |
+
# ===========================================================================
|
| 832 |
+
|
| 833 |
+
__all__ = [
|
| 834 |
+
# Tree Utils
|
| 835 |
+
"_fmt_size",
|
| 836 |
+
"build_tree",
|
| 837 |
+
"render_tree",
|
| 838 |
+
"walk_and_build_tree",
|
| 839 |
+
"format_dir_listing",
|
| 840 |
+
# Filesystem
|
| 841 |
+
"SandboxedRoot",
|
| 842 |
+
"filesystem_sandbox",
|
| 843 |
+
"obsidian_sandbox",
|
| 844 |
+
"ROOT_DIR",
|
| 845 |
+
"FILESYSTEM_ROOT",
|
| 846 |
+
"OBSIDIAN_ROOT",
|
| 847 |
+
"ALLOW_ABS",
|
| 848 |
+
"_resolve_path",
|
| 849 |
+
"_display_path",
|
| 850 |
+
"safe_open",
|
| 851 |
+
# Execution
|
| 852 |
+
"sandboxed_exec",
|
| 853 |
+
"create_safe_builtins",
|
| 854 |
+
# HF Inference
|
| 855 |
+
"get_hf_token",
|
| 856 |
+
"HF_TOKEN",
|
| 857 |
+
"DEFAULT_PROVIDERS",
|
| 858 |
+
"TEXTGEN_PROVIDERS",
|
| 859 |
+
"handle_hf_error",
|
| 860 |
+
"invoke_with_fallback",
|
| 861 |
+
]
|
Modules/_docstrings.py
CHANGED
|
@@ -1,149 +1,149 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
import inspect
|
| 4 |
-
import re
|
| 5 |
-
from typing import Any, Annotated, get_args, get_origin, get_type_hints
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
def _typename(tp: Any) -> str:
|
| 9 |
-
"""Return a readable type name from a type or annotation."""
|
| 10 |
-
try:
|
| 11 |
-
if hasattr(tp, "__name__"):
|
| 12 |
-
return tp.__name__ # e.g. int, str
|
| 13 |
-
if getattr(tp, "__module__", None) and getattr(tp, "__qualname__", None):
|
| 14 |
-
return f"{tp.__module__}.{tp.__qualname__}"
|
| 15 |
-
return str(tp).replace("typing.", "")
|
| 16 |
-
except Exception:
|
| 17 |
-
return str(tp)
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
def _parse_string_annotation(annot_str: str) -> tuple[str | None, str | None]:
|
| 21 |
-
"""
|
| 22 |
-
Parse a string annotation like "Annotated[Optional[str], 'description']"
|
| 23 |
-
and extract the base type name and the description metadata.
|
| 24 |
-
|
| 25 |
-
Returns (base_type_name, description) or (None, None) if parsing fails.
|
| 26 |
-
"""
|
| 27 |
-
if not isinstance(annot_str, str):
|
| 28 |
-
return None, None
|
| 29 |
-
|
| 30 |
-
# Match Annotated[..., 'description'] or Annotated[..., "description"]
|
| 31 |
-
# Pattern: Annotated[<base_type>, '<description>'] or with double quotes
|
| 32 |
-
match = re.match(
|
| 33 |
-
r"^Annotated\[(.+?),\s*['\"](.+?)['\"]\s*\]$",
|
| 34 |
-
annot_str.strip(),
|
| 35 |
-
re.DOTALL,
|
| 36 |
-
)
|
| 37 |
-
if match:
|
| 38 |
-
base_type_str = match.group(1).strip()
|
| 39 |
-
description = match.group(2)
|
| 40 |
-
# Simplify Optional[X] -> just the base type for display
|
| 41 |
-
opt_match = re.match(r"^Optional\[(.+)\]$", base_type_str)
|
| 42 |
-
if opt_match:
|
| 43 |
-
base_type_str = opt_match.group(1).strip()
|
| 44 |
-
return base_type_str, description
|
| 45 |
-
|
| 46 |
-
return None, None
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
def _extract_base_and_meta(annotation: Any) -> tuple[Any, str | None]:
|
| 50 |
-
"""Given an annotation, return (base_type, first string metadata) if Annotated, else (annotation, None)."""
|
| 51 |
-
try:
|
| 52 |
-
# Handle string annotations from PEP 563 (__future__.annotations)
|
| 53 |
-
if isinstance(annotation, str):
|
| 54 |
-
base_str, meta = _parse_string_annotation(annotation)
|
| 55 |
-
if meta:
|
| 56 |
-
return base_str or annotation, meta
|
| 57 |
-
return annotation, None
|
| 58 |
-
|
| 59 |
-
if get_origin(annotation) is Annotated:
|
| 60 |
-
args = get_args(annotation)
|
| 61 |
-
base = args[0] if args else annotation
|
| 62 |
-
# Grab the first string metadata if present
|
| 63 |
-
for meta in args[1:]:
|
| 64 |
-
if isinstance(meta, str):
|
| 65 |
-
return base, meta
|
| 66 |
-
return base, None
|
| 67 |
-
return annotation, None
|
| 68 |
-
except Exception:
|
| 69 |
-
return annotation, None
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
def autodoc(summary: str | None = None, returns: str | None = None, *, force: bool = False):
|
| 73 |
-
"""
|
| 74 |
-
Decorator that auto-generates a concise Google-style docstring from a function's
|
| 75 |
-
type hints and Annotated metadata. Useful for Gradio MCP where docstrings are
|
| 76 |
-
used for tool descriptions and parameter docs.
|
| 77 |
-
|
| 78 |
-
Args:
|
| 79 |
-
summary: Optional one-line summary for the function. If not provided,
|
| 80 |
-
will generate a simple sentence from the function name.
|
| 81 |
-
returns: Optional return value description. If not provided, only the
|
| 82 |
-
return type will be listed (if available).
|
| 83 |
-
force: When True, overwrite an existing docstring. Default False.
|
| 84 |
-
|
| 85 |
-
Returns:
|
| 86 |
-
The original function with its __doc__ populated (unless skipped).
|
| 87 |
-
"""
|
| 88 |
-
|
| 89 |
-
def decorator(func):
|
| 90 |
-
# Skip if docstring already present and not forcing
|
| 91 |
-
if not force and func.__doc__ and func.__doc__.strip():
|
| 92 |
-
return func
|
| 93 |
-
|
| 94 |
-
try:
|
| 95 |
-
# include_extras=True to retain Annotated metadata
|
| 96 |
-
hints = get_type_hints(func, include_extras=True, globalns=getattr(func, "__globals__", None))
|
| 97 |
-
except Exception:
|
| 98 |
-
hints = {}
|
| 99 |
-
|
| 100 |
-
sig = inspect.signature(func)
|
| 101 |
-
|
| 102 |
-
lines: list[str] = []
|
| 103 |
-
# Summary line
|
| 104 |
-
if summary and summary.strip():
|
| 105 |
-
lines.append(summary.strip())
|
| 106 |
-
else:
|
| 107 |
-
pretty = func.__name__.replace("_", " ").strip().capitalize()
|
| 108 |
-
if not pretty.endswith("."):
|
| 109 |
-
pretty += "."
|
| 110 |
-
lines.append(pretty)
|
| 111 |
-
|
| 112 |
-
# Args section
|
| 113 |
-
if sig.parameters:
|
| 114 |
-
lines.append("")
|
| 115 |
-
lines.append("Args:")
|
| 116 |
-
for name, param in sig.parameters.items():
|
| 117 |
-
if name == "self":
|
| 118 |
-
continue
|
| 119 |
-
annot = hints.get(name, param.annotation)
|
| 120 |
-
base, meta = _extract_base_and_meta(annot)
|
| 121 |
-
tname = _typename(base) if base is not inspect._empty else None
|
| 122 |
-
desc = meta or ""
|
| 123 |
-
if tname and tname != str(inspect._empty):
|
| 124 |
-
lines.append(f" {name} ({tname}): {desc}".rstrip())
|
| 125 |
-
else:
|
| 126 |
-
lines.append(f" {name}: {desc}".rstrip())
|
| 127 |
-
|
| 128 |
-
# Returns section
|
| 129 |
-
ret_hint = hints.get("return", sig.return_annotation)
|
| 130 |
-
if returns or (ret_hint and ret_hint is not inspect.Signature.empty):
|
| 131 |
-
lines.append("")
|
| 132 |
-
lines.append("Returns:")
|
| 133 |
-
if returns:
|
| 134 |
-
lines.append(f" {returns}")
|
| 135 |
-
else:
|
| 136 |
-
base, meta = _extract_base_and_meta(ret_hint)
|
| 137 |
-
rtype = _typename(base)
|
| 138 |
-
if meta:
|
| 139 |
-
lines.append(f" {rtype}: {meta}")
|
| 140 |
-
else:
|
| 141 |
-
lines.append(f" {rtype}")
|
| 142 |
-
|
| 143 |
-
func.__doc__ = "\n".join(lines).strip() + "\n"
|
| 144 |
-
return func
|
| 145 |
-
|
| 146 |
-
return decorator
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
__all__ = ["autodoc"]
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import inspect
|
| 4 |
+
import re
|
| 5 |
+
from typing import Any, Annotated, get_args, get_origin, get_type_hints
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def _typename(tp: Any) -> str:
|
| 9 |
+
"""Return a readable type name from a type or annotation."""
|
| 10 |
+
try:
|
| 11 |
+
if hasattr(tp, "__name__"):
|
| 12 |
+
return tp.__name__ # e.g. int, str
|
| 13 |
+
if getattr(tp, "__module__", None) and getattr(tp, "__qualname__", None):
|
| 14 |
+
return f"{tp.__module__}.{tp.__qualname__}"
|
| 15 |
+
return str(tp).replace("typing.", "")
|
| 16 |
+
except Exception:
|
| 17 |
+
return str(tp)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def _parse_string_annotation(annot_str: str) -> tuple[str | None, str | None]:
|
| 21 |
+
"""
|
| 22 |
+
Parse a string annotation like "Annotated[Optional[str], 'description']"
|
| 23 |
+
and extract the base type name and the description metadata.
|
| 24 |
+
|
| 25 |
+
Returns (base_type_name, description) or (None, None) if parsing fails.
|
| 26 |
+
"""
|
| 27 |
+
if not isinstance(annot_str, str):
|
| 28 |
+
return None, None
|
| 29 |
+
|
| 30 |
+
# Match Annotated[..., 'description'] or Annotated[..., "description"]
|
| 31 |
+
# Pattern: Annotated[<base_type>, '<description>'] or with double quotes
|
| 32 |
+
match = re.match(
|
| 33 |
+
r"^Annotated\[(.+?),\s*['\"](.+?)['\"]\s*\]$",
|
| 34 |
+
annot_str.strip(),
|
| 35 |
+
re.DOTALL,
|
| 36 |
+
)
|
| 37 |
+
if match:
|
| 38 |
+
base_type_str = match.group(1).strip()
|
| 39 |
+
description = match.group(2)
|
| 40 |
+
# Simplify Optional[X] -> just the base type for display
|
| 41 |
+
opt_match = re.match(r"^Optional\[(.+)\]$", base_type_str)
|
| 42 |
+
if opt_match:
|
| 43 |
+
base_type_str = opt_match.group(1).strip()
|
| 44 |
+
return base_type_str, description
|
| 45 |
+
|
| 46 |
+
return None, None
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _extract_base_and_meta(annotation: Any) -> tuple[Any, str | None]:
|
| 50 |
+
"""Given an annotation, return (base_type, first string metadata) if Annotated, else (annotation, None)."""
|
| 51 |
+
try:
|
| 52 |
+
# Handle string annotations from PEP 563 (__future__.annotations)
|
| 53 |
+
if isinstance(annotation, str):
|
| 54 |
+
base_str, meta = _parse_string_annotation(annotation)
|
| 55 |
+
if meta:
|
| 56 |
+
return base_str or annotation, meta
|
| 57 |
+
return annotation, None
|
| 58 |
+
|
| 59 |
+
if get_origin(annotation) is Annotated:
|
| 60 |
+
args = get_args(annotation)
|
| 61 |
+
base = args[0] if args else annotation
|
| 62 |
+
# Grab the first string metadata if present
|
| 63 |
+
for meta in args[1:]:
|
| 64 |
+
if isinstance(meta, str):
|
| 65 |
+
return base, meta
|
| 66 |
+
return base, None
|
| 67 |
+
return annotation, None
|
| 68 |
+
except Exception:
|
| 69 |
+
return annotation, None
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def autodoc(summary: str | None = None, returns: str | None = None, *, force: bool = False):
|
| 73 |
+
"""
|
| 74 |
+
Decorator that auto-generates a concise Google-style docstring from a function's
|
| 75 |
+
type hints and Annotated metadata. Useful for Gradio MCP where docstrings are
|
| 76 |
+
used for tool descriptions and parameter docs.
|
| 77 |
+
|
| 78 |
+
Args:
|
| 79 |
+
summary: Optional one-line summary for the function. If not provided,
|
| 80 |
+
will generate a simple sentence from the function name.
|
| 81 |
+
returns: Optional return value description. If not provided, only the
|
| 82 |
+
return type will be listed (if available).
|
| 83 |
+
force: When True, overwrite an existing docstring. Default False.
|
| 84 |
+
|
| 85 |
+
Returns:
|
| 86 |
+
The original function with its __doc__ populated (unless skipped).
|
| 87 |
+
"""
|
| 88 |
+
|
| 89 |
+
def decorator(func):
|
| 90 |
+
# Skip if docstring already present and not forcing
|
| 91 |
+
if not force and func.__doc__ and func.__doc__.strip():
|
| 92 |
+
return func
|
| 93 |
+
|
| 94 |
+
try:
|
| 95 |
+
# include_extras=True to retain Annotated metadata
|
| 96 |
+
hints = get_type_hints(func, include_extras=True, globalns=getattr(func, "__globals__", None))
|
| 97 |
+
except Exception:
|
| 98 |
+
hints = {}
|
| 99 |
+
|
| 100 |
+
sig = inspect.signature(func)
|
| 101 |
+
|
| 102 |
+
lines: list[str] = []
|
| 103 |
+
# Summary line
|
| 104 |
+
if summary and summary.strip():
|
| 105 |
+
lines.append(summary.strip())
|
| 106 |
+
else:
|
| 107 |
+
pretty = func.__name__.replace("_", " ").strip().capitalize()
|
| 108 |
+
if not pretty.endswith("."):
|
| 109 |
+
pretty += "."
|
| 110 |
+
lines.append(pretty)
|
| 111 |
+
|
| 112 |
+
# Args section
|
| 113 |
+
if sig.parameters:
|
| 114 |
+
lines.append("")
|
| 115 |
+
lines.append("Args:")
|
| 116 |
+
for name, param in sig.parameters.items():
|
| 117 |
+
if name == "self":
|
| 118 |
+
continue
|
| 119 |
+
annot = hints.get(name, param.annotation)
|
| 120 |
+
base, meta = _extract_base_and_meta(annot)
|
| 121 |
+
tname = _typename(base) if base is not inspect._empty else None
|
| 122 |
+
desc = meta or ""
|
| 123 |
+
if tname and tname != str(inspect._empty):
|
| 124 |
+
lines.append(f" {name} ({tname}): {desc}".rstrip())
|
| 125 |
+
else:
|
| 126 |
+
lines.append(f" {name}: {desc}".rstrip())
|
| 127 |
+
|
| 128 |
+
# Returns section
|
| 129 |
+
ret_hint = hints.get("return", sig.return_annotation)
|
| 130 |
+
if returns or (ret_hint and ret_hint is not inspect.Signature.empty):
|
| 131 |
+
lines.append("")
|
| 132 |
+
lines.append("Returns:")
|
| 133 |
+
if returns:
|
| 134 |
+
lines.append(f" {returns}")
|
| 135 |
+
else:
|
| 136 |
+
base, meta = _extract_base_and_meta(ret_hint)
|
| 137 |
+
rtype = _typename(base)
|
| 138 |
+
if meta:
|
| 139 |
+
lines.append(f" {rtype}: {meta}")
|
| 140 |
+
else:
|
| 141 |
+
lines.append(f" {rtype}")
|
| 142 |
+
|
| 143 |
+
func.__doc__ = "\n".join(lines).strip() + "\n"
|
| 144 |
+
return func
|
| 145 |
+
|
| 146 |
+
return decorator
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
__all__ = ["autodoc"]
|
Modules/_pollinations_client.py
ADDED
|
@@ -0,0 +1,324 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from typing import Annotated, Any, Literal
|
| 5 |
+
|
| 6 |
+
import httpx
|
| 7 |
+
import gradio as gr
|
| 8 |
+
|
| 9 |
+
from app import _log_call_end, _log_call_start, _truncate_for_log
|
| 10 |
+
from ._docstrings import autodoc
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# ===========================================================================
|
| 14 |
+
# Constants
|
| 15 |
+
# ===========================================================================
|
| 16 |
+
|
| 17 |
+
BASE_URL = "https://text.pollinations.ai"
|
| 18 |
+
|
| 19 |
+
# Model mappings for different depth levels
|
| 20 |
+
MODEL_MAPPING = {
|
| 21 |
+
"fast": "gemini-search",
|
| 22 |
+
"normal": "perplexity-fast",
|
| 23 |
+
"deep": "perplexity-reasoning",
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
# System prompts for different detail levels
|
| 27 |
+
SYSTEM_PROMPTS = {
|
| 28 |
+
True: "Search the web and provide a comprehensive answer with sources. Include relevant details and cite your sources.",
|
| 29 |
+
False: "Search the web and provide a concise, accurate answer. Include source URLs.",
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
# Timeout settings (seconds)
|
| 33 |
+
REQUEST_TIMEOUT = 30.0
|
| 34 |
+
|
| 35 |
+
# Single source of truth for the LLM-facing tool description
|
| 36 |
+
TOOL_SUMMARY = (
|
| 37 |
+
"Search the web using AI-powered search models with source citations. "
|
| 38 |
+
"Supports different depth levels: fast (Gemini with Google Search), normal (Perplexity Sonar), "
|
| 39 |
+
"and deep (Perplexity Sonar Reasoning). Returns answers with source URLs."
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
# ===========================================================================
|
| 44 |
+
# Core Client
|
| 45 |
+
# ===========================================================================
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
class PollinationsClient:
|
| 49 |
+
"""
|
| 50 |
+
HTTP client for Pollinations AI web search API.
|
| 51 |
+
|
| 52 |
+
Provides web search functionality with different depth levels and citation support.
|
| 53 |
+
"""
|
| 54 |
+
|
| 55 |
+
def __init__(
|
| 56 |
+
self,
|
| 57 |
+
base_url: str = BASE_URL,
|
| 58 |
+
timeout: float = REQUEST_TIMEOUT,
|
| 59 |
+
api_key: str | None = None,
|
| 60 |
+
) -> None:
|
| 61 |
+
"""
|
| 62 |
+
Initialize the Pollinations client.
|
| 63 |
+
|
| 64 |
+
Args:
|
| 65 |
+
base_url: Base URL for the Pollinations API (default: https://text.pollinations.ai)
|
| 66 |
+
timeout: Request timeout in seconds (default: 30)
|
| 67 |
+
api_key: Optional API key (reads from POLLINATIONS_API_KEY env var if not provided)
|
| 68 |
+
"""
|
| 69 |
+
self.base_url = base_url.rstrip("/")
|
| 70 |
+
self.timeout = timeout
|
| 71 |
+
self.api_key = api_key or os.getenv("POLLINATIONS_API_KEY")
|
| 72 |
+
|
| 73 |
+
def _get_headers(self) -> dict[str, str]:
|
| 74 |
+
"""Get request headers including API key if available."""
|
| 75 |
+
headers = {
|
| 76 |
+
"Content-Type": "application/json",
|
| 77 |
+
}
|
| 78 |
+
if self.api_key:
|
| 79 |
+
headers["Authorization"] = f"Bearer {self.api_key}"
|
| 80 |
+
return headers
|
| 81 |
+
|
| 82 |
+
def _resolve_model(self, depth: str) -> str:
|
| 83 |
+
"""
|
| 84 |
+
Resolve depth level to actual model name.
|
| 85 |
+
|
| 86 |
+
Args:
|
| 87 |
+
depth: Depth level ('fast', 'normal', or 'deep')
|
| 88 |
+
|
| 89 |
+
Returns:
|
| 90 |
+
The model identifier for the Pollinations API
|
| 91 |
+
"""
|
| 92 |
+
return MODEL_MAPPING.get(depth, "perplexity-fast")
|
| 93 |
+
|
| 94 |
+
async def web_search(
|
| 95 |
+
self,
|
| 96 |
+
query: str,
|
| 97 |
+
depth: str = "normal",
|
| 98 |
+
detailed: bool = False,
|
| 99 |
+
) -> dict[str, Any]:
|
| 100 |
+
"""
|
| 101 |
+
Perform web search using Pollinations AI.
|
| 102 |
+
|
| 103 |
+
Args:
|
| 104 |
+
query: The search query
|
| 105 |
+
depth: Search depth level ('fast', 'normal', or 'deep')
|
| 106 |
+
detailed: Whether to request a comprehensive answer
|
| 107 |
+
|
| 108 |
+
Returns:
|
| 109 |
+
Dictionary with keys:
|
| 110 |
+
- answer: The generated answer
|
| 111 |
+
- sources: List of source URLs (citations)
|
| 112 |
+
- model: The model used
|
| 113 |
+
- query: The original query
|
| 114 |
+
|
| 115 |
+
Raises:
|
| 116 |
+
httpx.HTTPError: For network/HTTP errors
|
| 117 |
+
ValueError: For invalid parameters
|
| 118 |
+
"""
|
| 119 |
+
if not query or not query.strip():
|
| 120 |
+
raise ValueError("Query cannot be empty")
|
| 121 |
+
|
| 122 |
+
if depth not in MODEL_MAPPING:
|
| 123 |
+
raise ValueError(f"Invalid depth: {depth}. Must be one of {list(MODEL_MAPPING.keys())}")
|
| 124 |
+
|
| 125 |
+
model = self._resolve_model(depth)
|
| 126 |
+
system_prompt = SYSTEM_PROMPTS.get(detailed, SYSTEM_PROMPTS[False])
|
| 127 |
+
|
| 128 |
+
# Prepare OpenAI-compatible request
|
| 129 |
+
payload = {
|
| 130 |
+
"model": model,
|
| 131 |
+
"messages": [
|
| 132 |
+
{"role": "system", "content": system_prompt},
|
| 133 |
+
{"role": "user", "content": query},
|
| 134 |
+
],
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
url = f"{self.base_url}/v1/chat/completions"
|
| 138 |
+
|
| 139 |
+
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
| 140 |
+
try:
|
| 141 |
+
response = await client.post(
|
| 142 |
+
url,
|
| 143 |
+
json=payload,
|
| 144 |
+
headers=self._get_headers(),
|
| 145 |
+
)
|
| 146 |
+
response.raise_for_status()
|
| 147 |
+
except httpx.TimeoutException as exc:
|
| 148 |
+
raise httpx.HTTPError(f"Request timed out after {self.timeout}s") from exc
|
| 149 |
+
except httpx.HTTPStatusError as exc:
|
| 150 |
+
# Handle rate limiting specifically
|
| 151 |
+
if exc.response.status_code == 429:
|
| 152 |
+
raise httpx.HTTPError("Rate limited. Please try again later.") from exc
|
| 153 |
+
raise
|
| 154 |
+
|
| 155 |
+
data = response.json()
|
| 156 |
+
|
| 157 |
+
# Extract answer and citations from response
|
| 158 |
+
answer = ""
|
| 159 |
+
sources = []
|
| 160 |
+
|
| 161 |
+
# OpenAI-compatible response format
|
| 162 |
+
if "choices" in data and data["choices"]:
|
| 163 |
+
answer = data["choices"][0].get("message", {}).get("content", "")
|
| 164 |
+
|
| 165 |
+
# Extract citations if present (Pollinations-specific extension)
|
| 166 |
+
if "citations" in data:
|
| 167 |
+
sources = data["citations"]
|
| 168 |
+
|
| 169 |
+
# Also check if citations are embedded in the message
|
| 170 |
+
if not sources and isinstance(answer, str):
|
| 171 |
+
# Try to extract URLs from the answer
|
| 172 |
+
import re
|
| 173 |
+
url_pattern = r'https?://[^\s<>"\'\)]+'
|
| 174 |
+
sources = list(dict.fromkeys(re.findall(url_pattern, answer))) # Unique URLs
|
| 175 |
+
|
| 176 |
+
return {
|
| 177 |
+
"answer": answer,
|
| 178 |
+
"sources": sources,
|
| 179 |
+
"model": model,
|
| 180 |
+
"query": query,
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
def web_search_sync(
|
| 184 |
+
self,
|
| 185 |
+
query: str,
|
| 186 |
+
depth: str = "normal",
|
| 187 |
+
detailed: bool = False,
|
| 188 |
+
) -> dict[str, Any]:
|
| 189 |
+
"""
|
| 190 |
+
Synchronous version of web_search.
|
| 191 |
+
|
| 192 |
+
Args:
|
| 193 |
+
query: The search query
|
| 194 |
+
depth: Search depth level ('fast', 'normal', or 'deep')
|
| 195 |
+
detailed: Whether to request a comprehensive answer
|
| 196 |
+
|
| 197 |
+
Returns:
|
| 198 |
+
Dictionary with answer, sources, model, and query
|
| 199 |
+
"""
|
| 200 |
+
import asyncio
|
| 201 |
+
|
| 202 |
+
return asyncio.run(self.web_search(query, depth, detailed))
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
# ===========================================================================
|
| 206 |
+
# Gradio Tool Function
|
| 207 |
+
# ===========================================================================
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
@autodoc(
|
| 211 |
+
summary=TOOL_SUMMARY,
|
| 212 |
+
)
|
| 213 |
+
def Pollinations_Web_Search(
|
| 214 |
+
query: Annotated[str, "The search query string"],
|
| 215 |
+
depth: Annotated[
|
| 216 |
+
Literal["fast", "normal", "deep"],
|
| 217 |
+
"Search depth: 'fast' (Gemini with Google Search), 'normal' (Perplexity Sonar), or 'deep' (Perplexity Sonar Reasoning).",
|
| 218 |
+
] = "normal",
|
| 219 |
+
detailed: Annotated[bool, "Request a comprehensive answer instead of concise summary"] = False,
|
| 220 |
+
) -> str:
|
| 221 |
+
"""
|
| 222 |
+
Search the web using Pollinations AI with source citations.
|
| 223 |
+
|
| 224 |
+
Uses AI-powered search models that provide direct answers with source citations.
|
| 225 |
+
Supports three depth levels for different search capabilities.
|
| 226 |
+
"""
|
| 227 |
+
_log_call_start("Pollinations_Web_Search", query=query, depth=depth, detailed=detailed)
|
| 228 |
+
|
| 229 |
+
try:
|
| 230 |
+
client = PollinationsClient()
|
| 231 |
+
result = client.web_search_sync(query, depth, detailed)
|
| 232 |
+
|
| 233 |
+
# Format the result for display
|
| 234 |
+
lines = [
|
| 235 |
+
f"Query: {result['query']}",
|
| 236 |
+
f"Model: {result['model']}",
|
| 237 |
+
f"Depth: {depth}",
|
| 238 |
+
"",
|
| 239 |
+
"Answer:",
|
| 240 |
+
result["answer"] or "No answer generated.",
|
| 241 |
+
]
|
| 242 |
+
|
| 243 |
+
if result["sources"]:
|
| 244 |
+
lines.append("")
|
| 245 |
+
lines.append("Sources:")
|
| 246 |
+
for i, source in enumerate(result["sources"], 1):
|
| 247 |
+
lines.append(f" {i}. {source}")
|
| 248 |
+
else:
|
| 249 |
+
lines.append("")
|
| 250 |
+
lines.append("(No sources provided)")
|
| 251 |
+
|
| 252 |
+
formatted_result = "\n".join(lines)
|
| 253 |
+
_log_call_end("Pollinations_Web_Search", _truncate_for_log(formatted_result))
|
| 254 |
+
return formatted_result
|
| 255 |
+
|
| 256 |
+
except ValueError as exc:
|
| 257 |
+
error_msg = f"Invalid input: {exc}"
|
| 258 |
+
_log_call_end("Pollinations_Web_Search", error_msg)
|
| 259 |
+
return error_msg
|
| 260 |
+
except httpx.HTTPError as exc:
|
| 261 |
+
error_msg = f"Search failed: {exc}"
|
| 262 |
+
_log_call_end("Pollinations_Web_Search", error_msg)
|
| 263 |
+
return error_msg
|
| 264 |
+
except Exception as exc:
|
| 265 |
+
error_msg = f"Unexpected error: {exc}"
|
| 266 |
+
_log_call_end("Pollinations_Web_Search", error_msg)
|
| 267 |
+
return error_msg
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
# ===========================================================================
|
| 271 |
+
# Gradio Interface
|
| 272 |
+
# ===========================================================================
|
| 273 |
+
|
| 274 |
+
|
| 275 |
+
def build_interface() -> gr.Interface:
|
| 276 |
+
"""Build the Gradio interface for Pollinations web search."""
|
| 277 |
+
return gr.Interface(
|
| 278 |
+
fn=Pollinations_Web_Search,
|
| 279 |
+
inputs=[
|
| 280 |
+
gr.Textbox(
|
| 281 |
+
label="Query",
|
| 282 |
+
placeholder="Enter your search query here...",
|
| 283 |
+
max_lines=2,
|
| 284 |
+
info="The search query",
|
| 285 |
+
),
|
| 286 |
+
gr.Radio(
|
| 287 |
+
label="Search Depth",
|
| 288 |
+
choices=["fast", "normal", "deep"],
|
| 289 |
+
value="normal",
|
| 290 |
+
info="Search depth level: fast (Gemini), normal (Perplexity), deep (Reasoning)",
|
| 291 |
+
),
|
| 292 |
+
gr.Checkbox(
|
| 293 |
+
label="Detailed Answer",
|
| 294 |
+
value=False,
|
| 295 |
+
info="Request a comprehensive answer instead of concise summary",
|
| 296 |
+
),
|
| 297 |
+
],
|
| 298 |
+
outputs=gr.Textbox(
|
| 299 |
+
label="Search Results",
|
| 300 |
+
interactive=False,
|
| 301 |
+
lines=15,
|
| 302 |
+
max_lines=20,
|
| 303 |
+
),
|
| 304 |
+
title="Pollinations Web Search",
|
| 305 |
+
description=(
|
| 306 |
+
"<div style=\"text-align:center\">AI-powered web search with source citations. "
|
| 307 |
+
"Uses Google Search, Perplexity Sonar, and Perplexity Sonar Reasoning models "
|
| 308 |
+
"to provide direct answers with reliable source URLs.</div>"
|
| 309 |
+
),
|
| 310 |
+
api_description=TOOL_SUMMARY,
|
| 311 |
+
flagging_mode="never",
|
| 312 |
+
submit_btn="Search",
|
| 313 |
+
)
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
# ===========================================================================
|
| 317 |
+
# Public API
|
| 318 |
+
# ===========================================================================
|
| 319 |
+
|
| 320 |
+
__all__ = [
|
| 321 |
+
"PollinationsClient",
|
| 322 |
+
"Pollinations_Web_Search",
|
| 323 |
+
"build_interface",
|
| 324 |
+
]
|
Modules/_query_optimizer.py
ADDED
|
@@ -0,0 +1,781 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Query Optimizer Module with Self-Consistency Chain-of-Thought (SC-CoT).
|
| 3 |
+
|
| 4 |
+
Optimizes search queries using AI-generated candidate scoring with a fallback chain:
|
| 5 |
+
1. Mistral API (magistral-medium-2509) - Primary
|
| 6 |
+
2. HuggingFace Inference (openai/gpt-oss-20b:cheapest) - Fallback
|
| 7 |
+
3. Bypass (return raw query) - Final fallback
|
| 8 |
+
|
| 9 |
+
Two optimization modes:
|
| 10 |
+
- optimize_for_search_engine(): Boolean operators, site:, filetype:, exact phrases
|
| 11 |
+
- optimize_for_ai_search(): Clear intent, context, specific questions
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from __future__ import annotations
|
| 15 |
+
|
| 16 |
+
import json
|
| 17 |
+
import os
|
| 18 |
+
import re
|
| 19 |
+
from typing import Annotated, Any, Literal
|
| 20 |
+
|
| 21 |
+
import gradio as gr
|
| 22 |
+
from pydantic import BaseModel, Field
|
| 23 |
+
|
| 24 |
+
from app import _log_call_end, _log_call_start, _truncate_for_log
|
| 25 |
+
from ._docstrings import autodoc
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
# ===========================================================================
|
| 29 |
+
# Pydantic Schemas for Structured Output
|
| 30 |
+
# ===========================================================================
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class OptimizedCandidate(BaseModel):
|
| 34 |
+
"""A single optimized query candidate with reasoning."""
|
| 35 |
+
|
| 36 |
+
version: int = Field(description="Candidate version number (1-based)")
|
| 37 |
+
optimized_query: str = Field(description="The optimized query string")
|
| 38 |
+
reasoning: list[str] = Field(description="List of reasoning steps explaining optimizations")
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class GenerationOutput(BaseModel):
|
| 42 |
+
"""Output from candidate generation phase."""
|
| 43 |
+
|
| 44 |
+
original_query: str = Field(description="The original user query")
|
| 45 |
+
candidates: list[OptimizedCandidate] = Field(description="List of generated candidates")
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
class ScoringOutput(BaseModel):
|
| 49 |
+
"""Output from candidate selection phase."""
|
| 50 |
+
|
| 51 |
+
selected_version: int = Field(description="Version number of the best candidate")
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
# ===========================================================================
|
| 55 |
+
# Core Query Optimizer Class
|
| 56 |
+
# ===========================================================================
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
class QueryOptimizer:
|
| 60 |
+
"""
|
| 61 |
+
Self-Consistency Chain-of-Thought query optimizer.
|
| 62 |
+
|
| 63 |
+
Generates multiple optimized candidates and selects the best one through
|
| 64 |
+
self-consistency scoring. Implements a fallback chain for reliability.
|
| 65 |
+
"""
|
| 66 |
+
|
| 67 |
+
# Few-shot examples for search engine optimization
|
| 68 |
+
_SEARCH_ENGINE_EXAMPLES = """
|
| 69 |
+
Example 1:
|
| 70 |
+
Input: python fastapi performance
|
| 71 |
+
Candidates:
|
| 72 |
+
1. ("python fastapi performance", "Direct query covers main concepts")
|
| 73 |
+
2. ("fastapi performance optimization python", "Added 'optimization' for more specific results")
|
| 74 |
+
3. ("site:stackoverflow.com fastapi performance python", "Targeted technical Q&A for performance issues")
|
| 75 |
+
4. ("fastapi async performance benchmark", "Added 'async' and 'benchmark' for technical depth")
|
| 76 |
+
5. "fastapi OR flask performance python", "Added comparison with Flask for broader context")
|
| 77 |
+
|
| 78 |
+
Example 2:
|
| 79 |
+
Input: climate change effects on agriculture
|
| 80 |
+
Candidates:
|
| 81 |
+
1. ("climate change effects on agriculture", "Clear and comprehensive query")
|
| 82 |
+
2. ("site:nature.com OR site:science.org climate change agriculture", "Targeted reputable scientific sources")
|
| 83 |
+
3. "\"climate change\" AND agriculture filetype:pdf", "Using exact phrase match and PDF filter for research papers")
|
| 84 |
+
4. ("climate change impact crop yield 2023..2024", "Added temporal filter and specific terminology")
|
| 85 |
+
5. ("agricultural adaptation climate change strategies", "Rephrased to focus on solutions")
|
| 86 |
+
|
| 87 |
+
Example 3:
|
| 88 |
+
Input: machine learning tutorial python
|
| 89 |
+
Candidates:
|
| 90 |
+
1. ("python machine learning tutorial", "Reordered for better SEO")
|
| 91 |
+
2. ("site:youtube.com python machine learning tutorial", "Targeted video tutorials")
|
| 92 |
+
3. ("python machine learning tutorial filetype:pdf", "Focus on PDF documentation")
|
| 93 |
+
4. ("machine learning python sklearn tutorial", "Added popular library 'sklearn' for relevance")
|
| 94 |
+
5. "\"machine learning\" AND python AND tutorial", "Using boolean operators for precision")
|
| 95 |
+
|
| 96 |
+
Example 4:
|
| 97 |
+
Input: react native vs flutter
|
| 98 |
+
Candidates:
|
| 99 |
+
1. ("react native vs flutter comparison", "Added 'comparison' for explicit intent")
|
| 100 |
+
2. ("site:reddit.com \"react native\" flutter", "Targeted community discussions")
|
| 101 |
+
3. "\"react native\" OR flutter mobile development", "Broader search for mobile frameworks")
|
| 102 |
+
4. ("react native flutter performance benchmark", "Focus on technical comparison")
|
| 103 |
+
5. ("flutter vs react native 2024", "Added year for current information")
|
| 104 |
+
|
| 105 |
+
Example 5:
|
| 106 |
+
Input: best restaurants in tokyo
|
| 107 |
+
Candidates:
|
| 108 |
+
1. ("best restaurants tokyo", "Simplified for broad search")
|
| 109 |
+
2. ("site:michelin.com Tokyo restaurants", "Targeted Michelin guide sources")
|
| 110 |
+
3. ("Tokyo restaurant guide 2024", "Added temporal context")
|
| 111 |
+
4. "\"best restaurants\" AND tokyo AND review", "Boolean operators for precision")
|
| 112 |
+
5. ("tokyo food guide michelin OR local", "Added 'local' for authentic recommendations")
|
| 113 |
+
"""
|
| 114 |
+
|
| 115 |
+
# Few-shot examples for AI search optimization
|
| 116 |
+
_AI_SEARCH_EXAMPLES = """
|
| 117 |
+
Example 1:
|
| 118 |
+
Input: python fastapi performance
|
| 119 |
+
Candidates:
|
| 120 |
+
1. ("What are the performance characteristics of FastAPI in Python, and how does it compare to other web frameworks?", "Added comparison context and framework focus")
|
| 121 |
+
2. ("Explain the key performance optimization techniques for FastAPI applications in Python.", "Focused on actionable optimization strategies")
|
| 122 |
+
3. ("How does FastAPI's async/await model impact performance compared to synchronous frameworks?", "Targeted technical architectural question")
|
| 123 |
+
4. ("What are the benchmarks and real-world performance metrics for FastAPI in production environments?", "Asked for empirical data")
|
| 124 |
+
5. ("How can I identify and resolve performance bottlenecks in FastAPI applications?", "Problem-solving focused")
|
| 125 |
+
|
| 126 |
+
Example 2:
|
| 127 |
+
Input: climate change effects on agriculture
|
| 128 |
+
Candidates:
|
| 129 |
+
1. ("What are the primary impacts of climate change on global agricultural productivity and crop yields?", "Comprehensive question covering direct effects")
|
| 130 |
+
2. ("How is climate change affecting different agricultural regions around the world?", "Geographic focus")
|
| 131 |
+
3. ("What adaptation strategies are farmers using to cope with climate change impacts?", "Solution-oriented focus")
|
| 132 |
+
4. ("What scientific evidence exists linking climate change to agricultural changes?", "Evidence-based inquiry")
|
| 133 |
+
5. ("How will climate change affect food security and agricultural sustainability by 2050?", "Temporal and sustainability focus")
|
| 134 |
+
|
| 135 |
+
Example 3:
|
| 136 |
+
Input: react native vs flutter
|
| 137 |
+
Candidates:
|
| 138 |
+
1. ("What are the key differences between React Native and Flutter in terms of performance, development experience, and ecosystem?", "Comprehensive comparison framework")
|
| 139 |
+
2. ("Which cross-platform mobile framework is better suited for startup applications: React Native or Flutter?", "Use-case specific question")
|
| 140 |
+
3. ("How do React Native and Flutter compare in terms of learning curve, community support, and hiring availability?", "Practical development considerations")
|
| 141 |
+
4. ("What are the long-term maintenance implications of choosing React Native vs Flutter?", "Strategic business question")
|
| 142 |
+
5. ("Which framework provides better native performance and access to device features: React Native or Flutter?", "Technical performance focus")
|
| 143 |
+
|
| 144 |
+
Example 4:
|
| 145 |
+
Input: machine learning tutorial python
|
| 146 |
+
Candidates:
|
| 147 |
+
1. ("What is the best learning path for getting started with machine learning using Python?", "Learning path focused question")
|
| 148 |
+
2. ("Can you recommend a comprehensive Python machine learning tutorial for beginners?", "Resource-seeking question")
|
| 149 |
+
3. ("What are the essential Python libraries and tools for implementing machine learning algorithms?", "Tool ecosystem question")
|
| 150 |
+
4. ("How can I build my first machine learning model in Python from scratch?", hands-on implementation focus")
|
| 151 |
+
5. ("What are the common pitfalls and best practices for learning machine learning with Python?", "Learning guidance question")
|
| 152 |
+
|
| 153 |
+
Example 5:
|
| 154 |
+
Input: quantum computing explained
|
| 155 |
+
Candidates:
|
| 156 |
+
1. ("Can you explain quantum computing in simple terms for someone without a physics background?", "Accessible explanation request")
|
| 157 |
+
2. ("What are the fundamental principles of quantum computing and how do they differ from classical computing?", "Conceptual comparison question")
|
| 158 |
+
3. ("What are the practical applications of quantum computing and when might they become viable?", "Real-world impact question")
|
| 159 |
+
4. ("How do qubits work and why do they enable quantum computational advantages?", "Technical explanation question")
|
| 160 |
+
5. ("What are the current limitations and challenges in developing practical quantum computers?", "Critical analysis question")
|
| 161 |
+
"""
|
| 162 |
+
|
| 163 |
+
_SELECTOR_PROMPT = """
|
| 164 |
+
Given the original query and multiple optimized candidates, select the best one.
|
| 165 |
+
|
| 166 |
+
Criteria for selection:
|
| 167 |
+
- Relevance: Most accurately captures the user's intent
|
| 168 |
+
- Precision: Will return the most relevant results
|
| 169 |
+
- Completeness: Covers all important aspects of the query
|
| 170 |
+
- Clarity: Easy to understand and well-structured
|
| 171 |
+
|
| 172 |
+
Return only the version number of the best candidate (1-indexed).
|
| 173 |
+
"""
|
| 174 |
+
|
| 175 |
+
def __init__(self) -> None:
|
| 176 |
+
"""Initialize the query optimizer with API clients."""
|
| 177 |
+
self._mistral_api_key: str | None = os.getenv("MISTRAL_API_KEY")
|
| 178 |
+
self._hf_token: str | None = os.getenv("HF_TOKEN")
|
| 179 |
+
self._mistral_model: str = "magistral-medium-2509"
|
| 180 |
+
self._hf_model: str = "openai/gpt-oss-20b:cheapest"
|
| 181 |
+
self._hf_endpoint: str = "https://router.huggingface.co/v1"
|
| 182 |
+
|
| 183 |
+
def _mistral_generate(
|
| 184 |
+
self, prompt: str, response_format: dict[str, Any]
|
| 185 |
+
) -> str:
|
| 186 |
+
"""Generate structured output using Mistral API with response_format."""
|
| 187 |
+
if not self._mistral_api_key:
|
| 188 |
+
raise ValueError("MISTRAL_API_KEY not set")
|
| 189 |
+
|
| 190 |
+
import httpx
|
| 191 |
+
|
| 192 |
+
messages = [
|
| 193 |
+
{
|
| 194 |
+
"role": "user",
|
| 195 |
+
"content": prompt,
|
| 196 |
+
}
|
| 197 |
+
]
|
| 198 |
+
|
| 199 |
+
payload = {
|
| 200 |
+
"model": self._mistral_model,
|
| 201 |
+
"messages": messages,
|
| 202 |
+
"response_format": response_format,
|
| 203 |
+
"max_tokens": 2000,
|
| 204 |
+
"temperature": 0.3,
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
headers = {
|
| 208 |
+
"Authorization": f"Bearer {self._mistral_api_key}",
|
| 209 |
+
"Content-Type": "application/json",
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
+
response = httpx.post(
|
| 213 |
+
"https://api.mistral.ai/v1/chat/completions",
|
| 214 |
+
json=payload,
|
| 215 |
+
headers=headers,
|
| 216 |
+
timeout=30.0,
|
| 217 |
+
)
|
| 218 |
+
response.raise_for_status()
|
| 219 |
+
result = response.json()
|
| 220 |
+
|
| 221 |
+
if "choices" not in result or not result["choices"]:
|
| 222 |
+
raise ValueError("Invalid Mistral API response: no choices")
|
| 223 |
+
|
| 224 |
+
return result["choices"][0]["message"]["content"]
|
| 225 |
+
|
| 226 |
+
def _hf_generate(self, prompt: str) -> str:
|
| 227 |
+
"""Generate output using HuggingFace Inference API."""
|
| 228 |
+
if not self._hf_token:
|
| 229 |
+
raise ValueError("HF_TOKEN not set")
|
| 230 |
+
|
| 231 |
+
import httpx
|
| 232 |
+
|
| 233 |
+
payload = {
|
| 234 |
+
"model": self._hf_model,
|
| 235 |
+
"messages": [
|
| 236 |
+
{
|
| 237 |
+
"role": "user",
|
| 238 |
+
"content": prompt,
|
| 239 |
+
}
|
| 240 |
+
],
|
| 241 |
+
"max_tokens": 2000,
|
| 242 |
+
"temperature": 0.3,
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
headers = {
|
| 246 |
+
"Authorization": f"Bearer {self._hf_token}",
|
| 247 |
+
"Content-Type": "application/json",
|
| 248 |
+
}
|
| 249 |
+
|
| 250 |
+
response = httpx.post(
|
| 251 |
+
f"{self._hf_endpoint}/chat/completions",
|
| 252 |
+
json=payload,
|
| 253 |
+
headers=headers,
|
| 254 |
+
timeout=30.0,
|
| 255 |
+
)
|
| 256 |
+
response.raise_for_status()
|
| 257 |
+
result = response.json()
|
| 258 |
+
|
| 259 |
+
if "choices" not in result or not result["choices"]:
|
| 260 |
+
raise ValueError("Invalid HF API response: no choices")
|
| 261 |
+
|
| 262 |
+
return result["choices"][0]["message"]["content"]
|
| 263 |
+
|
| 264 |
+
def _extract_json_from_response(self, response: str) -> str:
|
| 265 |
+
"""Extract JSON from a response that may have markdown formatting."""
|
| 266 |
+
# Try to find JSON between ```json and ``` or between ``` and ```
|
| 267 |
+
patterns = [
|
| 268 |
+
r"```json\s*([\s\S]*?)\s*```",
|
| 269 |
+
r"```\s*([\s\S]*?)\s*```",
|
| 270 |
+
r"(\{[\s\S]*\})",
|
| 271 |
+
]
|
| 272 |
+
|
| 273 |
+
for pattern in patterns:
|
| 274 |
+
match = re.search(pattern, response.strip())
|
| 275 |
+
if match:
|
| 276 |
+
return match.group(1).strip()
|
| 277 |
+
|
| 278 |
+
return response.strip()
|
| 279 |
+
|
| 280 |
+
def _optimize_search_engine_mistral(self, query: str) -> str:
|
| 281 |
+
"""Optimize for search engines using Mistral API."""
|
| 282 |
+
prompt = f"""Generate 5 optimized versions of the following search query for traditional search engines (DuckDuckGo, Google, etc.).
|
| 283 |
+
|
| 284 |
+
Optimization techniques to use:
|
| 285 |
+
- Add boolean operators (AND, OR, NOT)
|
| 286 |
+
- Use site: to target specific domains
|
| 287 |
+
- Use filetype: to filter by document type
|
| 288 |
+
- Use exact phrases with quotes
|
| 289 |
+
- Add relevant keywords for precision
|
| 290 |
+
- Include temporal filters when appropriate
|
| 291 |
+
- Target reputable sources (Wikipedia, StackOverflow, GitHub, etc.)
|
| 292 |
+
|
| 293 |
+
{self._SEARCH_ENGINE_EXAMPLES}
|
| 294 |
+
|
| 295 |
+
Original query: {query}
|
| 296 |
+
|
| 297 |
+
Generate candidates in the following JSON format:
|
| 298 |
+
{{
|
| 299 |
+
"original_query": "{query}",
|
| 300 |
+
"candidates": [
|
| 301 |
+
{{
|
| 302 |
+
"version": 1,
|
| 303 |
+
"optimized_query": "...",
|
| 304 |
+
"reasoning": ["...", "..."]
|
| 305 |
+
}},
|
| 306 |
+
...
|
| 307 |
+
]
|
| 308 |
+
}}
|
| 309 |
+
|
| 310 |
+
Return ONLY valid JSON, no markdown formatting."""
|
| 311 |
+
|
| 312 |
+
return self._mistral_generate(
|
| 313 |
+
prompt,
|
| 314 |
+
response_format={
|
| 315 |
+
"type": "json_schema",
|
| 316 |
+
"json_schema": GenerationOutput.model_json_schema(),
|
| 317 |
+
},
|
| 318 |
+
)
|
| 319 |
+
|
| 320 |
+
def _optimize_ai_search_mistral(self, query: str) -> str:
|
| 321 |
+
"""Optimize for AI search using Mistral API."""
|
| 322 |
+
prompt = f"""Generate 5 optimized versions of the following query for AI-powered search engines (Perplexity, Gemini Search, etc.).
|
| 323 |
+
|
| 324 |
+
Optimization techniques to use:
|
| 325 |
+
- Reframe as clear, specific questions
|
| 326 |
+
- Add context about what information is needed
|
| 327 |
+
- Include comparative or evaluative language when relevant
|
| 328 |
+
- Ask for explanations, examples, or step-by-step guides
|
| 329 |
+
- Include temporal context (current state, recent developments)
|
| 330 |
+
- Focus on actionable information or insights
|
| 331 |
+
|
| 332 |
+
{self._AI_SEARCH_EXAMPLES}
|
| 333 |
+
|
| 334 |
+
Original query: {query}
|
| 335 |
+
|
| 336 |
+
Generate candidates in the following JSON format:
|
| 337 |
+
{{
|
| 338 |
+
"original_query": "{query}",
|
| 339 |
+
"candidates": [
|
| 340 |
+
{{
|
| 341 |
+
"version": 1,
|
| 342 |
+
"optimized_query": "...",
|
| 343 |
+
"reasoning": ["...", "..."]
|
| 344 |
+
}},
|
| 345 |
+
...
|
| 346 |
+
]
|
| 347 |
+
}}
|
| 348 |
+
|
| 349 |
+
Return ONLY valid JSON, no markdown formatting."""
|
| 350 |
+
|
| 351 |
+
return self._mistral_generate(
|
| 352 |
+
prompt,
|
| 353 |
+
response_format={
|
| 354 |
+
"type": "json_schema",
|
| 355 |
+
"json_schema": GenerationOutput.model_json_schema(),
|
| 356 |
+
},
|
| 357 |
+
)
|
| 358 |
+
|
| 359 |
+
def _select_best_mistral(self, candidates_json: str) -> int:
|
| 360 |
+
"""Select best candidate using Mistral API."""
|
| 361 |
+
prompt = f"""{self._SELECTOR_PROMPT}
|
| 362 |
+
|
| 363 |
+
{candidates_json}
|
| 364 |
+
|
| 365 |
+
Return the version number (1-5) of the best candidate."""
|
| 366 |
+
|
| 367 |
+
response = self._mistral_generate(
|
| 368 |
+
prompt,
|
| 369 |
+
response_format={
|
| 370 |
+
"type": "json_schema",
|
| 371 |
+
"json_schema": ScoringOutput.model_json_schema(),
|
| 372 |
+
},
|
| 373 |
+
)
|
| 374 |
+
|
| 375 |
+
# Parse JSON response
|
| 376 |
+
json_str = self._extract_json_from_response(response)
|
| 377 |
+
result = json.loads(json_str)
|
| 378 |
+
return result["selected_version"]
|
| 379 |
+
|
| 380 |
+
def _optimize_search_engine_hf(self, query: str) -> str:
|
| 381 |
+
"""Optimize for search engines using HF Inference (fallback)."""
|
| 382 |
+
prompt = f"""Generate 5 optimized search query candidates. Return as JSON with format:
|
| 383 |
+
{{
|
| 384 |
+
"original_query": "...",
|
| 385 |
+
"candidates": [
|
| 386 |
+
{{"version": 1, "optimized_query": "...", "reasoning": ["..."]}},
|
| 387 |
+
...
|
| 388 |
+
]
|
| 389 |
+
}}
|
| 390 |
+
|
| 391 |
+
Query: {query}
|
| 392 |
+
|
| 393 |
+
Optimize with boolean operators, site:, filetype:, quotes for phrases, and relevant keywords."""
|
| 394 |
+
|
| 395 |
+
response = self._hf_generate(prompt)
|
| 396 |
+
return self._extract_json_from_response(response)
|
| 397 |
+
|
| 398 |
+
def _optimize_ai_search_hf(self, query: str) -> str:
|
| 399 |
+
"""Optimize for AI search using HF Inference (fallback)."""
|
| 400 |
+
prompt = f"""Generate 5 optimized query candidates for AI search. Return as JSON with format:
|
| 401 |
+
{{
|
| 402 |
+
"original_query": "...",
|
| 403 |
+
"candidates": [
|
| 404 |
+
{{"version": 1, "optimized_query": "...", "reasoning": ["..."]}},
|
| 405 |
+
...
|
| 406 |
+
]
|
| 407 |
+
}}
|
| 408 |
+
|
| 409 |
+
Query: {query}
|
| 410 |
+
|
| 411 |
+
Optimize as clear, specific questions with context and intent."""
|
| 412 |
+
|
| 413 |
+
response = self._hf_generate(prompt)
|
| 414 |
+
return self._extract_json_from_response(response)
|
| 415 |
+
|
| 416 |
+
def _select_best_hf(self, candidates_json: str) -> int:
|
| 417 |
+
"""Select best candidate using HF Inference (fallback)."""
|
| 418 |
+
prompt = f"""{self._SELECTOR_PROMPT}
|
| 419 |
+
|
| 420 |
+
{candidates_json}
|
| 421 |
+
|
| 422 |
+
Return only the number (1-5)."""
|
| 423 |
+
|
| 424 |
+
response = self._hf_generate(prompt)
|
| 425 |
+
# Try to extract number from response
|
| 426 |
+
match = re.search(r"\b([1-5])\b", response)
|
| 427 |
+
if match:
|
| 428 |
+
return int(match.group(1))
|
| 429 |
+
return 1 # Default to first candidate
|
| 430 |
+
|
| 431 |
+
def _parse_candidates(self, json_str: str, original_query: str) -> GenerationOutput:
|
| 432 |
+
"""Parse candidate JSON with fallback."""
|
| 433 |
+
try:
|
| 434 |
+
json_clean = self._extract_json_from_response(json_str)
|
| 435 |
+
return GenerationOutput.model_validate_json(json_clean)
|
| 436 |
+
except Exception:
|
| 437 |
+
# Fallback: create minimal candidate with original query
|
| 438 |
+
return GenerationOutput(
|
| 439 |
+
original_query=original_query,
|
| 440 |
+
candidates=[
|
| 441 |
+
OptimizedCandidate(
|
| 442 |
+
version=1,
|
| 443 |
+
optimized_query=original_query,
|
| 444 |
+
reasoning=["Fallback: using original query"],
|
| 445 |
+
)
|
| 446 |
+
],
|
| 447 |
+
)
|
| 448 |
+
|
| 449 |
+
def _run_optimization_chain(
|
| 450 |
+
self,
|
| 451 |
+
query: str,
|
| 452 |
+
mode: Literal["search_engine", "ai_search"],
|
| 453 |
+
) -> tuple[GenerationOutput, int, str]:
|
| 454 |
+
"""
|
| 455 |
+
Run optimization with fallback chain.
|
| 456 |
+
|
| 457 |
+
Returns:
|
| 458 |
+
(candidates, best_version, provider_used)
|
| 459 |
+
"""
|
| 460 |
+
provider = "bypass"
|
| 461 |
+
|
| 462 |
+
# Try Mistral API first
|
| 463 |
+
try:
|
| 464 |
+
if mode == "search_engine":
|
| 465 |
+
response = self._optimize_search_engine_mistral(query)
|
| 466 |
+
else:
|
| 467 |
+
response = self._optimize_ai_search_mistral(query)
|
| 468 |
+
|
| 469 |
+
candidates = self._parse_candidates(response, query)
|
| 470 |
+
best_version = self._select_best_mistral(response)
|
| 471 |
+
provider = "mistral"
|
| 472 |
+
return candidates, best_version, provider
|
| 473 |
+
except Exception as exc:
|
| 474 |
+
print(f"[QueryOptimizer] Mistral failed: {exc}", flush=True)
|
| 475 |
+
|
| 476 |
+
# Fallback to HF Inference
|
| 477 |
+
try:
|
| 478 |
+
if mode == "search_engine":
|
| 479 |
+
response = self._optimize_search_engine_hf(query)
|
| 480 |
+
else:
|
| 481 |
+
response = self._optimize_ai_search_hf(query)
|
| 482 |
+
|
| 483 |
+
candidates = self._parse_candidates(response, query)
|
| 484 |
+
best_version = self._select_best_hf(response)
|
| 485 |
+
provider = "hf"
|
| 486 |
+
return candidates, best_version, provider
|
| 487 |
+
except Exception as exc:
|
| 488 |
+
print(f"[QueryOptimizer] HF failed: {exc}", flush=True)
|
| 489 |
+
|
| 490 |
+
# Final bypass: return original query
|
| 491 |
+
candidates = GenerationOutput(
|
| 492 |
+
original_query=query,
|
| 493 |
+
candidates=[
|
| 494 |
+
OptimizedCandidate(
|
| 495 |
+
version=1,
|
| 496 |
+
optimized_query=query,
|
| 497 |
+
reasoning=["Bypass: using original query due to optimization failure"],
|
| 498 |
+
)
|
| 499 |
+
],
|
| 500 |
+
)
|
| 501 |
+
return candidates, 1, provider
|
| 502 |
+
|
| 503 |
+
def optimize_for_search_engine(self, query: str) -> tuple[str, dict[str, Any]]:
|
| 504 |
+
"""
|
| 505 |
+
Optimize query for traditional search engines.
|
| 506 |
+
|
| 507 |
+
Optimizes with boolean operators, site:, filetype:, exact phrases.
|
| 508 |
+
|
| 509 |
+
Args:
|
| 510 |
+
query: The original search query
|
| 511 |
+
|
| 512 |
+
Returns:
|
| 513 |
+
(optimized_query, metadata) tuple with metadata including:
|
| 514 |
+
- original_query: The input query
|
| 515 |
+
- all_candidates: List of all generated candidates
|
| 516 |
+
- reasoning: Reasoning for selected candidate
|
| 517 |
+
- provider: Which provider was used (mistral/hf/bypass)
|
| 518 |
+
"""
|
| 519 |
+
candidates, best_version, provider = self._run_optimization_chain(
|
| 520 |
+
query, "search_engine"
|
| 521 |
+
)
|
| 522 |
+
|
| 523 |
+
# Get selected candidate
|
| 524 |
+
selected = next(
|
| 525 |
+
(c for c in candidates.candidates if c.version == best_version),
|
| 526 |
+
candidates.candidates[0],
|
| 527 |
+
)
|
| 528 |
+
|
| 529 |
+
metadata = {
|
| 530 |
+
"original_query": candidates.original_query,
|
| 531 |
+
"all_candidates": [
|
| 532 |
+
{"version": c.version, "query": c.optimized_query}
|
| 533 |
+
for c in candidates.candidates
|
| 534 |
+
],
|
| 535 |
+
"reasoning": selected.reasoning,
|
| 536 |
+
"provider": provider,
|
| 537 |
+
}
|
| 538 |
+
|
| 539 |
+
return selected.optimized_query, metadata
|
| 540 |
+
|
| 541 |
+
def optimize_for_ai_search(self, query: str) -> tuple[str, dict[str, Any]]:
|
| 542 |
+
"""
|
| 543 |
+
Optimize query for AI-powered search engines.
|
| 544 |
+
|
| 545 |
+
Optimizes with clear intent, context, specific questions.
|
| 546 |
+
|
| 547 |
+
Args:
|
| 548 |
+
query: The original search query
|
| 549 |
+
|
| 550 |
+
Returns:
|
| 551 |
+
(optimized_query, metadata) tuple with metadata including:
|
| 552 |
+
- original_query: The input query
|
| 553 |
+
- all_candidates: List of all generated candidates
|
| 554 |
+
- reasoning: Reasoning for selected candidate
|
| 555 |
+
- provider: Which provider was used (mistral/hf/bypass)
|
| 556 |
+
"""
|
| 557 |
+
candidates, best_version, provider = self._run_optimization_chain(query, "ai_search")
|
| 558 |
+
|
| 559 |
+
# Get selected candidate
|
| 560 |
+
selected = next(
|
| 561 |
+
(c for c in candidates.candidates if c.version == best_version),
|
| 562 |
+
candidates.candidates[0],
|
| 563 |
+
)
|
| 564 |
+
|
| 565 |
+
metadata = {
|
| 566 |
+
"original_query": candidates.original_query,
|
| 567 |
+
"all_candidates": [
|
| 568 |
+
{"version": c.version, "query": c.optimized_query}
|
| 569 |
+
for c in candidates.candidates
|
| 570 |
+
],
|
| 571 |
+
"reasoning": selected.reasoning,
|
| 572 |
+
"provider": provider,
|
| 573 |
+
}
|
| 574 |
+
|
| 575 |
+
return selected.optimized_query, metadata
|
| 576 |
+
|
| 577 |
+
|
| 578 |
+
# Singleton instance for module-level caching
|
| 579 |
+
_optimizer_instance: QueryOptimizer | None = None
|
| 580 |
+
|
| 581 |
+
|
| 582 |
+
def get_optimizer() -> QueryOptimizer:
|
| 583 |
+
"""Get or create the singleton optimizer instance."""
|
| 584 |
+
global _optimizer_instance
|
| 585 |
+
if _optimizer_instance is None:
|
| 586 |
+
_optimizer_instance = QueryOptimizer()
|
| 587 |
+
return _optimizer_instance
|
| 588 |
+
|
| 589 |
+
|
| 590 |
+
# ===========================================================================
|
| 591 |
+
# Gradio Tool Functions
|
| 592 |
+
# ===========================================================================
|
| 593 |
+
|
| 594 |
+
|
| 595 |
+
@autodoc(
|
| 596 |
+
summary="Optimize a search query for traditional search engines using SC-CoT with fallback chain (Mistral → HF → bypass).",
|
| 597 |
+
)
|
| 598 |
+
def Optimize_for_Search_Engine(
|
| 599 |
+
query: Annotated[str, "The search query to optimize."],
|
| 600 |
+
) -> str:
|
| 601 |
+
"""
|
| 602 |
+
Optimize a query for traditional search engines (DuckDuckGo, Google, etc.).
|
| 603 |
+
|
| 604 |
+
Uses Self-Consistency Chain-of-Thought with fallback chain:
|
| 605 |
+
1. Mistral API (magistral-medium-2509) - Primary
|
| 606 |
+
2. HuggingFace Inference - Fallback
|
| 607 |
+
3. Bypass (return raw query) - Final fallback
|
| 608 |
+
|
| 609 |
+
Optimization techniques:
|
| 610 |
+
- Boolean operators (AND, OR, NOT)
|
| 611 |
+
- site: for domain targeting
|
| 612 |
+
- filetype: for document type filtering
|
| 613 |
+
- Exact phrases with quotes
|
| 614 |
+
- Relevant keywords for precision
|
| 615 |
+
- Temporal filters when appropriate
|
| 616 |
+
"""
|
| 617 |
+
_log_call_start("Optimize_for_Search_Engine", query=query)
|
| 618 |
+
|
| 619 |
+
if not query or not query.strip():
|
| 620 |
+
result = "No query provided. Please enter a search query to optimize."
|
| 621 |
+
_log_call_end("Optimize_for_Search_Engine", _truncate_for_log(result))
|
| 622 |
+
return result
|
| 623 |
+
|
| 624 |
+
optimizer = get_optimizer()
|
| 625 |
+
|
| 626 |
+
try:
|
| 627 |
+
optimized, metadata = optimizer.optimize_for_search_engine(query)
|
| 628 |
+
|
| 629 |
+
lines = [
|
| 630 |
+
f"Original: {metadata['original_query']}",
|
| 631 |
+
f"Optimized: {optimized}",
|
| 632 |
+
f"Provider: {metadata['provider']}",
|
| 633 |
+
"",
|
| 634 |
+
"All candidates:",
|
| 635 |
+
]
|
| 636 |
+
|
| 637 |
+
for i, candidate in enumerate(metadata["all_candidates"], 1):
|
| 638 |
+
prefix = "→" if i == 1 else " "
|
| 639 |
+
lines.append(f"{prefix} {candidate['version']}. {candidate['query']}")
|
| 640 |
+
|
| 641 |
+
lines.append("")
|
| 642 |
+
lines.append("Reasoning:")
|
| 643 |
+
lines.extend(f" • {step}" for step in metadata["reasoning"])
|
| 644 |
+
|
| 645 |
+
result = "\n".join(lines)
|
| 646 |
+
_log_call_end("Optimize_for_Search_Engine", _truncate_for_log(result))
|
| 647 |
+
return result
|
| 648 |
+
except Exception as exc:
|
| 649 |
+
result = f"Optimization failed: {exc}"
|
| 650 |
+
_log_call_end("Optimize_for_Search_Engine", _truncate_for_log(result))
|
| 651 |
+
return result
|
| 652 |
+
|
| 653 |
+
|
| 654 |
+
@autodoc(
|
| 655 |
+
summary="Optimize a search query for AI-powered search engines using SC-CoT with fallback chain (Mistral → HF → bypass).",
|
| 656 |
+
)
|
| 657 |
+
def Optimize_for_AI_Search(
|
| 658 |
+
query: Annotated[str, "The search query to optimize."],
|
| 659 |
+
) -> str:
|
| 660 |
+
"""
|
| 661 |
+
Optimize a query for AI-powered search engines (Perplexity, Gemini, etc.).
|
| 662 |
+
|
| 663 |
+
Uses Self-Consistency Chain-of-Thought with fallback chain:
|
| 664 |
+
1. Mistral API (magistral-medium-2509) - Primary
|
| 665 |
+
2. HuggingFace Inference - Fallback
|
| 666 |
+
3. Bypass (return raw query) - Final fallback
|
| 667 |
+
|
| 668 |
+
Optimization techniques:
|
| 669 |
+
- Clear, specific questions
|
| 670 |
+
- Context about what information is needed
|
| 671 |
+
- Comparative or evaluative language
|
| 672 |
+
- Requests for explanations or examples
|
| 673 |
+
- Temporal context (current state, recent developments)
|
| 674 |
+
- Focus on actionable information
|
| 675 |
+
"""
|
| 676 |
+
_log_call_start("Optimize_for_AI_Search", query=query)
|
| 677 |
+
|
| 678 |
+
if not query or not query.strip():
|
| 679 |
+
result = "No query provided. Please enter a search query to optimize."
|
| 680 |
+
_log_call_end("Optimize_for_AI_Search", _truncate_for_log(result))
|
| 681 |
+
return result
|
| 682 |
+
|
| 683 |
+
optimizer = get_optimizer()
|
| 684 |
+
|
| 685 |
+
try:
|
| 686 |
+
optimized, metadata = optimizer.optimize_for_ai_search(query)
|
| 687 |
+
|
| 688 |
+
lines = [
|
| 689 |
+
f"Original: {metadata['original_query']}",
|
| 690 |
+
f"Optimized: {optimized}",
|
| 691 |
+
f"Provider: {metadata['provider']}",
|
| 692 |
+
"",
|
| 693 |
+
"All candidates:",
|
| 694 |
+
]
|
| 695 |
+
|
| 696 |
+
for i, candidate in enumerate(metadata["all_candidates"], 1):
|
| 697 |
+
prefix = "→" if i == 1 else " "
|
| 698 |
+
lines.append(f"{prefix} {candidate['version']}. {candidate['query']}")
|
| 699 |
+
|
| 700 |
+
lines.append("")
|
| 701 |
+
lines.append("Reasoning:")
|
| 702 |
+
lines.extend(f" • {step}" for step in metadata["reasoning"])
|
| 703 |
+
|
| 704 |
+
result = "\n".join(lines)
|
| 705 |
+
_log_call_end("Optimize_for_AI_Search", _truncate_for_log(result))
|
| 706 |
+
return result
|
| 707 |
+
except Exception as exc:
|
| 708 |
+
result = f"Optimization failed: {exc}"
|
| 709 |
+
_log_call_end("Optimize_for_AI_Search", _truncate_for_log(result))
|
| 710 |
+
return result
|
| 711 |
+
|
| 712 |
+
|
| 713 |
+
def build_interfaces() -> list[gr.Interface]:
|
| 714 |
+
"""Build Gradio interfaces for query optimizer tools."""
|
| 715 |
+
return [
|
| 716 |
+
gr.Interface(
|
| 717 |
+
fn=Optimize_for_Search_Engine,
|
| 718 |
+
inputs=[
|
| 719 |
+
gr.Textbox(
|
| 720 |
+
label="Query",
|
| 721 |
+
placeholder="Enter your search query",
|
| 722 |
+
max_lines=1,
|
| 723 |
+
info="The search query to optimize for traditional search engines",
|
| 724 |
+
),
|
| 725 |
+
],
|
| 726 |
+
outputs=gr.Textbox(
|
| 727 |
+
label="Optimization Results",
|
| 728 |
+
interactive=False,
|
| 729 |
+
lines=15,
|
| 730 |
+
max_lines=20,
|
| 731 |
+
),
|
| 732 |
+
title="Query Optimizer (Search Engine)",
|
| 733 |
+
description=(
|
| 734 |
+
"<div style='text-align:center'>"
|
| 735 |
+
"Optimize queries for traditional search engines using AI. "
|
| 736 |
+
"Generates multiple candidates and selects the best one. "
|
| 737 |
+
"Optimizes with boolean operators, site:, filetype:, and precise keywords."
|
| 738 |
+
"</div>"
|
| 739 |
+
),
|
| 740 |
+
api_name="optimize_for_search_engine",
|
| 741 |
+
flagging_mode="never",
|
| 742 |
+
submit_btn="Optimize",
|
| 743 |
+
),
|
| 744 |
+
gr.Interface(
|
| 745 |
+
fn=Optimize_for_AI_Search,
|
| 746 |
+
inputs=[
|
| 747 |
+
gr.Textbox(
|
| 748 |
+
label="Query",
|
| 749 |
+
placeholder="Enter your search query",
|
| 750 |
+
max_lines=1,
|
| 751 |
+
info="The search query to optimize for AI-powered search engines",
|
| 752 |
+
),
|
| 753 |
+
],
|
| 754 |
+
outputs=gr.Textbox(
|
| 755 |
+
label="Optimization Results",
|
| 756 |
+
interactive=False,
|
| 757 |
+
lines=15,
|
| 758 |
+
max_lines=20,
|
| 759 |
+
),
|
| 760 |
+
title="Query Optimizer (AI Search)",
|
| 761 |
+
description=(
|
| 762 |
+
"<div style='text-align:center'>"
|
| 763 |
+
"Optimize queries for AI-powered search engines using AI. "
|
| 764 |
+
"Generates multiple candidates and selects the best one. "
|
| 765 |
+
"Optimizes with clear questions, context, and specific intent."
|
| 766 |
+
"</div>"
|
| 767 |
+
),
|
| 768 |
+
api_name="optimize_for_ai_search",
|
| 769 |
+
flagging_mode="never",
|
| 770 |
+
submit_btn="Optimize",
|
| 771 |
+
),
|
| 772 |
+
]
|
| 773 |
+
|
| 774 |
+
|
| 775 |
+
__all__ = [
|
| 776 |
+
"QueryOptimizer",
|
| 777 |
+
"get_optimizer",
|
| 778 |
+
"Optimize_for_Search_Engine",
|
| 779 |
+
"Optimize_for_AI_Search",
|
| 780 |
+
"build_interfaces",
|
| 781 |
+
]
|
Modules/_searxng_client.py
ADDED
|
@@ -0,0 +1,460 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
SearXNG Client Module
|
| 3 |
+
|
| 4 |
+
HTTP client for SearXNG metasearch engine with auto-fallback to multiple instances.
|
| 5 |
+
Supports text, image, and news search with rate limiting and error handling.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import time
|
| 11 |
+
from dataclasses import dataclass
|
| 12 |
+
from enum import Enum
|
| 13 |
+
from typing import Any, Optional
|
| 14 |
+
|
| 15 |
+
import httpx
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class TimeRange(str, Enum):
|
| 19 |
+
"""Time range options for search results."""
|
| 20 |
+
DAY = "day"
|
| 21 |
+
WEEK = "week"
|
| 22 |
+
MONTH = "month"
|
| 23 |
+
YEAR = "year"
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@dataclass
|
| 27 |
+
class TextResult:
|
| 28 |
+
"""Represents a text/web search result."""
|
| 29 |
+
url: str
|
| 30 |
+
title: str
|
| 31 |
+
content: str
|
| 32 |
+
engine: Optional[str] = None
|
| 33 |
+
category: Optional[str] = None
|
| 34 |
+
score: float = 0.0
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
@dataclass
|
| 38 |
+
class ImageResult:
|
| 39 |
+
"""Represents an image search result."""
|
| 40 |
+
url: str
|
| 41 |
+
title: str
|
| 42 |
+
img_src: str
|
| 43 |
+
thumbnail_src: str
|
| 44 |
+
engine: Optional[str] = None
|
| 45 |
+
source: Optional[str] = None
|
| 46 |
+
resolution: Optional[str] = None
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
@dataclass
|
| 50 |
+
class NewsResult:
|
| 51 |
+
"""Represents a news search result."""
|
| 52 |
+
url: str
|
| 53 |
+
title: str
|
| 54 |
+
content: str
|
| 55 |
+
engine: Optional[str] = None
|
| 56 |
+
published_date: Optional[str] = None
|
| 57 |
+
score: float = 0.0
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
class SearXNGError(Exception):
|
| 61 |
+
"""Base exception for SearXNG client errors."""
|
| 62 |
+
pass
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
class RateLimitError(SearXNGError):
|
| 66 |
+
"""Raised when rate limit is exceeded."""
|
| 67 |
+
pass
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
class InstanceUnavailableError(SearXNGError):
|
| 71 |
+
"""Raised when a SearXNG instance is unavailable."""
|
| 72 |
+
pass
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
class AllInstancesFailedError(SearXNGError):
|
| 76 |
+
"""Raised when all SearXNG instances fail."""
|
| 77 |
+
pass
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
class SearXNGClient:
|
| 81 |
+
"""
|
| 82 |
+
HTTP client for SearXNG metasearch engine.
|
| 83 |
+
|
| 84 |
+
Features:
|
| 85 |
+
- Multiple instance support with auto-fallback
|
| 86 |
+
- Connection pooling for performance
|
| 87 |
+
- Automatic retries with exponential backoff
|
| 88 |
+
- Type-safe result parsing
|
| 89 |
+
- Configurable timeouts
|
| 90 |
+
"""
|
| 91 |
+
|
| 92 |
+
def __init__(
|
| 93 |
+
self,
|
| 94 |
+
instances: Optional[list[str]] = None,
|
| 95 |
+
timeout: float = 10.0,
|
| 96 |
+
max_retries: int = 3,
|
| 97 |
+
retry_delay: float = 1.0,
|
| 98 |
+
pool_connections: int = 10,
|
| 99 |
+
pool_maxsize: int = 10,
|
| 100 |
+
) -> None:
|
| 101 |
+
"""
|
| 102 |
+
Initialize SearXNG client.
|
| 103 |
+
|
| 104 |
+
Args:
|
| 105 |
+
instances: List of SearXNG instance URLs (primary first)
|
| 106 |
+
timeout: Request timeout in seconds
|
| 107 |
+
max_retries: Maximum retry attempts per instance
|
| 108 |
+
retry_delay: Initial retry delay in seconds (exponential backoff)
|
| 109 |
+
pool_connections: Connection pool size
|
| 110 |
+
pool_maxsize: Maximum connections in pool
|
| 111 |
+
"""
|
| 112 |
+
self.instances = instances or [
|
| 113 |
+
"https://searx.be",
|
| 114 |
+
"https://search.sapti.me",
|
| 115 |
+
"https://searx.fmac.xyz",
|
| 116 |
+
]
|
| 117 |
+
self.timeout = timeout
|
| 118 |
+
self.max_retries = max_retries
|
| 119 |
+
self.retry_delay = retry_delay
|
| 120 |
+
|
| 121 |
+
# Configure httpx client with connection pooling
|
| 122 |
+
limits = httpx.Limits(
|
| 123 |
+
max_connections=pool_connections,
|
| 124 |
+
max_keepalive_connections=pool_maxsize,
|
| 125 |
+
)
|
| 126 |
+
self._client = httpx.Client(
|
| 127 |
+
timeout=timeout,
|
| 128 |
+
limits=limits,
|
| 129 |
+
follow_redirects=True,
|
| 130 |
+
verify=True,
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
def _build_params(
|
| 134 |
+
self,
|
| 135 |
+
query: str,
|
| 136 |
+
categories: Optional[list[str]] = None,
|
| 137 |
+
engines: Optional[list[str]] = None,
|
| 138 |
+
pageno: int = 1,
|
| 139 |
+
time_range: Optional[TimeRange] = None,
|
| 140 |
+
) -> dict[str, str | int]:
|
| 141 |
+
"""Build query parameters for SearXNG API."""
|
| 142 |
+
params: dict[str, str | int] = {
|
| 143 |
+
"q": query,
|
| 144 |
+
"format": "json",
|
| 145 |
+
"pageno": pageno,
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
if categories:
|
| 149 |
+
params["categories"] = ",".join(categories)
|
| 150 |
+
if engines:
|
| 151 |
+
params["engines"] = ",".join(engines)
|
| 152 |
+
if time_range:
|
| 153 |
+
params["time_range"] = time_range.value
|
| 154 |
+
|
| 155 |
+
return params
|
| 156 |
+
|
| 157 |
+
def _make_request(
|
| 158 |
+
self,
|
| 159 |
+
instance: str,
|
| 160 |
+
params: dict[str, Any],
|
| 161 |
+
) -> dict[str, Any]:
|
| 162 |
+
"""
|
| 163 |
+
Make HTTP request to SearXNG instance.
|
| 164 |
+
|
| 165 |
+
Args:
|
| 166 |
+
instance: SearXNG instance URL
|
| 167 |
+
params: Query parameters
|
| 168 |
+
|
| 169 |
+
Returns:
|
| 170 |
+
JSON response data
|
| 171 |
+
|
| 172 |
+
Raises:
|
| 173 |
+
InstanceUnavailableError: If instance is unreachable
|
| 174 |
+
RateLimitError: If rate limit is exceeded
|
| 175 |
+
SearXNGError: For other API errors
|
| 176 |
+
"""
|
| 177 |
+
url = f"{instance.rstrip('/')}/search"
|
| 178 |
+
|
| 179 |
+
try:
|
| 180 |
+
response = self._client.get(url, params=params)
|
| 181 |
+
|
| 182 |
+
# Handle rate limiting
|
| 183 |
+
if response.status_code == 429:
|
| 184 |
+
raise RateLimitError(f"Rate limit exceeded for {instance}")
|
| 185 |
+
|
| 186 |
+
# Handle server errors
|
| 187 |
+
if response.status_code >= 500:
|
| 188 |
+
raise InstanceUnavailableError(
|
| 189 |
+
f"Server error {response.status_code} for {instance}"
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
# Handle client errors
|
| 193 |
+
if response.status_code >= 400:
|
| 194 |
+
raise SearXNGError(
|
| 195 |
+
f"Client error {response.status_code}: {response.text}"
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
return response.json()
|
| 199 |
+
|
| 200 |
+
except httpx.TimeoutException as e:
|
| 201 |
+
raise InstanceUnavailableError(f"Timeout for {instance}: {e}") from e
|
| 202 |
+
except httpx.ConnectError as e:
|
| 203 |
+
raise InstanceUnavailableError(f"Connection failed to {instance}: {e}") from e
|
| 204 |
+
except httpx.HTTPStatusError as e:
|
| 205 |
+
raise InstanceUnavailableError(f"HTTP error from {instance}: {e}") from e
|
| 206 |
+
|
| 207 |
+
def _search_with_retry(
|
| 208 |
+
self,
|
| 209 |
+
params: dict[str, Any],
|
| 210 |
+
) -> tuple[list[dict[str, Any]], str]:
|
| 211 |
+
"""
|
| 212 |
+
Search with automatic instance fallback and retry logic.
|
| 213 |
+
|
| 214 |
+
Args:
|
| 215 |
+
params: Query parameters
|
| 216 |
+
|
| 217 |
+
Returns:
|
| 218 |
+
Tuple of (results list, instance URL that succeeded)
|
| 219 |
+
|
| 220 |
+
Raises:
|
| 221 |
+
AllInstancesFailedError: If all instances fail
|
| 222 |
+
"""
|
| 223 |
+
last_error: Optional[Exception] = None
|
| 224 |
+
|
| 225 |
+
for instance in self.instances:
|
| 226 |
+
for attempt in range(self.max_retries):
|
| 227 |
+
try:
|
| 228 |
+
response = self._make_request(instance, params)
|
| 229 |
+
|
| 230 |
+
if not response.get("results"):
|
| 231 |
+
# No results but request succeeded
|
| 232 |
+
return [], instance
|
| 233 |
+
|
| 234 |
+
return response["results"], instance
|
| 235 |
+
|
| 236 |
+
except RateLimitError as e:
|
| 237 |
+
last_error = e
|
| 238 |
+
# Don't retry rate limit errors, move to next instance
|
| 239 |
+
break
|
| 240 |
+
|
| 241 |
+
except InstanceUnavailableError as e:
|
| 242 |
+
last_error = e
|
| 243 |
+
if attempt < self.max_retries - 1:
|
| 244 |
+
# Exponential backoff
|
| 245 |
+
delay = self.retry_delay * (2**attempt)
|
| 246 |
+
time.sleep(delay)
|
| 247 |
+
continue
|
| 248 |
+
|
| 249 |
+
except SearXNGError as e:
|
| 250 |
+
last_error = e
|
| 251 |
+
# Non-retryable error, move to next instance
|
| 252 |
+
break
|
| 253 |
+
|
| 254 |
+
# All instances failed
|
| 255 |
+
raise AllInstancesFailedError(
|
| 256 |
+
f"All SearXNG instances failed. Last error: {last_error}"
|
| 257 |
+
) from last_error
|
| 258 |
+
|
| 259 |
+
def search(
|
| 260 |
+
self,
|
| 261 |
+
query: str,
|
| 262 |
+
categories: Optional[list[str]] = None,
|
| 263 |
+
engines: Optional[list[str]] = None,
|
| 264 |
+
pageno: int = 1,
|
| 265 |
+
time_range: Optional[TimeRange] = None,
|
| 266 |
+
) -> list[dict[str, Any]]:
|
| 267 |
+
"""
|
| 268 |
+
Generic search method returning raw results.
|
| 269 |
+
|
| 270 |
+
Args:
|
| 271 |
+
query: Search query string
|
| 272 |
+
categories: List of result categories (e.g., ['general', 'images'])
|
| 273 |
+
engines: List of search engines to use
|
| 274 |
+
pageno: Page number (1-indexed)
|
| 275 |
+
time_range: Time filter for results
|
| 276 |
+
|
| 277 |
+
Returns:
|
| 278 |
+
List of raw result dictionaries
|
| 279 |
+
|
| 280 |
+
Raises:
|
| 281 |
+
AllInstancesFailedError: If all instances fail
|
| 282 |
+
"""
|
| 283 |
+
params = self._build_params(
|
| 284 |
+
query=query,
|
| 285 |
+
categories=categories,
|
| 286 |
+
engines=engines,
|
| 287 |
+
pageno=pageno,
|
| 288 |
+
time_range=time_range,
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
results, _instance = self._search_with_retry(params)
|
| 292 |
+
return results
|
| 293 |
+
|
| 294 |
+
def text(
|
| 295 |
+
self,
|
| 296 |
+
query: str,
|
| 297 |
+
max_results: int = 10,
|
| 298 |
+
time_range: Optional[TimeRange] = None,
|
| 299 |
+
) -> list[TextResult]:
|
| 300 |
+
"""
|
| 301 |
+
Perform text/web search.
|
| 302 |
+
|
| 303 |
+
Args:
|
| 304 |
+
query: Search query string
|
| 305 |
+
max_results: Maximum number of results to return
|
| 306 |
+
time_range: Time filter for results
|
| 307 |
+
|
| 308 |
+
Returns:
|
| 309 |
+
List of TextResult objects
|
| 310 |
+
|
| 311 |
+
Raises:
|
| 312 |
+
AllInstancesFailedError: If all instances fail
|
| 313 |
+
"""
|
| 314 |
+
params = self._build_params(
|
| 315 |
+
query=query,
|
| 316 |
+
categories=["general"],
|
| 317 |
+
pageno=1,
|
| 318 |
+
time_range=time_range,
|
| 319 |
+
)
|
| 320 |
+
|
| 321 |
+
raw_results, _instance = self._search_with_retry(params)
|
| 322 |
+
|
| 323 |
+
results = []
|
| 324 |
+
for item in raw_results[:max_results]:
|
| 325 |
+
if item.get("category") in ["general", ""]:
|
| 326 |
+
results.append(
|
| 327 |
+
TextResult(
|
| 328 |
+
url=item.get("url", ""),
|
| 329 |
+
title=item.get("title", ""),
|
| 330 |
+
content=item.get("content", ""),
|
| 331 |
+
engine=item.get("engine"),
|
| 332 |
+
category=item.get("category"),
|
| 333 |
+
score=item.get("score", 0.0),
|
| 334 |
+
)
|
| 335 |
+
)
|
| 336 |
+
|
| 337 |
+
return results
|
| 338 |
+
|
| 339 |
+
def images(
|
| 340 |
+
self,
|
| 341 |
+
query: str,
|
| 342 |
+
max_results: int = 10,
|
| 343 |
+
) -> list[ImageResult]:
|
| 344 |
+
"""
|
| 345 |
+
Perform image search.
|
| 346 |
+
|
| 347 |
+
Args:
|
| 348 |
+
query: Search query string
|
| 349 |
+
max_results: Maximum number of results to return
|
| 350 |
+
|
| 351 |
+
Returns:
|
| 352 |
+
List of ImageResult objects
|
| 353 |
+
|
| 354 |
+
Raises:
|
| 355 |
+
AllInstancesFailedError: If all instances fail
|
| 356 |
+
"""
|
| 357 |
+
params = self._build_params(
|
| 358 |
+
query=query,
|
| 359 |
+
categories=["images"],
|
| 360 |
+
pageno=1,
|
| 361 |
+
)
|
| 362 |
+
|
| 363 |
+
raw_results, _instance = self._search_with_retry(params)
|
| 364 |
+
|
| 365 |
+
results = []
|
| 366 |
+
for item in raw_results[:max_results]:
|
| 367 |
+
if item.get("category") == "images":
|
| 368 |
+
results.append(
|
| 369 |
+
ImageResult(
|
| 370 |
+
url=item.get("url", ""),
|
| 371 |
+
title=item.get("title", ""),
|
| 372 |
+
img_src=item.get("img_src", ""),
|
| 373 |
+
thumbnail_src=item.get("thumbnail_src", ""),
|
| 374 |
+
engine=item.get("engine"),
|
| 375 |
+
source=item.get("source"),
|
| 376 |
+
resolution=item.get("resolution"),
|
| 377 |
+
)
|
| 378 |
+
)
|
| 379 |
+
|
| 380 |
+
return results
|
| 381 |
+
|
| 382 |
+
def news(
|
| 383 |
+
self,
|
| 384 |
+
query: str,
|
| 385 |
+
max_results: int = 10,
|
| 386 |
+
time_range: Optional[TimeRange] = None,
|
| 387 |
+
) -> list[NewsResult]:
|
| 388 |
+
"""
|
| 389 |
+
Perform news search.
|
| 390 |
+
|
| 391 |
+
Args:
|
| 392 |
+
query: Search query string
|
| 393 |
+
max_results: Maximum number of results to return
|
| 394 |
+
time_range: Time filter for results
|
| 395 |
+
|
| 396 |
+
Returns:
|
| 397 |
+
List of NewsResult objects
|
| 398 |
+
|
| 399 |
+
Raises:
|
| 400 |
+
AllInstancesFailedError: If all instances fail
|
| 401 |
+
"""
|
| 402 |
+
params = self._build_params(
|
| 403 |
+
query=query,
|
| 404 |
+
categories=["news"],
|
| 405 |
+
pageno=1,
|
| 406 |
+
time_range=time_range,
|
| 407 |
+
)
|
| 408 |
+
|
| 409 |
+
raw_results, _instance = self._search_with_retry(params)
|
| 410 |
+
|
| 411 |
+
results = []
|
| 412 |
+
for item in raw_results[:max_results]:
|
| 413 |
+
if item.get("category") == "news":
|
| 414 |
+
results.append(
|
| 415 |
+
NewsResult(
|
| 416 |
+
url=item.get("url", ""),
|
| 417 |
+
title=item.get("title", ""),
|
| 418 |
+
content=item.get("content", ""),
|
| 419 |
+
engine=item.get("engine"),
|
| 420 |
+
published_date=item.get("publishedDate"),
|
| 421 |
+
score=item.get("score", 0.0),
|
| 422 |
+
)
|
| 423 |
+
)
|
| 424 |
+
|
| 425 |
+
return results
|
| 426 |
+
|
| 427 |
+
def close(self) -> None:
|
| 428 |
+
"""Close the HTTP client and release resources."""
|
| 429 |
+
self._client.close()
|
| 430 |
+
|
| 431 |
+
def __enter__(self) -> "SearXNGClient":
|
| 432 |
+
"""Context manager entry."""
|
| 433 |
+
return self
|
| 434 |
+
|
| 435 |
+
def __exit__(self, _exc_type: Any, _exc_val: Any, _exc_tb: Any) -> None:
|
| 436 |
+
"""Context manager exit."""
|
| 437 |
+
self.close()
|
| 438 |
+
|
| 439 |
+
|
| 440 |
+
# Convenience function for quick searches
|
| 441 |
+
def search_text(
|
| 442 |
+
query: str,
|
| 443 |
+
max_results: int = 10,
|
| 444 |
+
time_range: Optional[TimeRange] = None,
|
| 445 |
+
instances: Optional[list[str]] = None,
|
| 446 |
+
) -> list[TextResult]:
|
| 447 |
+
"""
|
| 448 |
+
Quick text search with default configuration.
|
| 449 |
+
|
| 450 |
+
Args:
|
| 451 |
+
query: Search query string
|
| 452 |
+
max_results: Maximum number of results to return
|
| 453 |
+
time_range: Time filter for results
|
| 454 |
+
instances: Optional custom instance list
|
| 455 |
+
|
| 456 |
+
Returns:
|
| 457 |
+
List of TextResult objects
|
| 458 |
+
"""
|
| 459 |
+
with SearXNGClient(instances=instances) as client:
|
| 460 |
+
return client.text(query, max_results, time_range)
|
README.md
CHANGED
|
@@ -1,267 +1,267 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: Nymbo Tools MCP
|
| 3 |
-
emoji: ⚙️
|
| 4 |
-
colorFrom: green
|
| 5 |
-
colorTo: gray
|
| 6 |
-
sdk: gradio
|
| 7 |
-
sdk_version: 6.2.0
|
| 8 |
-
python_version: 3.12
|
| 9 |
-
app_file: app.py
|
| 10 |
-
pinned: true
|
| 11 |
-
license: apache-2.0
|
| 12 |
-
short_description: All-in-one hub of general purpose tools useful for any agent
|
| 13 |
-
---
|
| 14 |
-
|
| 15 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 16 |
-
|
| 17 |
-
## Nymbo-Tools MCP Server
|
| 18 |
-
|
| 19 |
-
All-in-one hub of general-purpose tools useful for any agent. Run it as a Gradio web app, or connect to it remotely as a Model Context Protocol (MCP) server to call its tools programmatically.
|
| 20 |
-
|
| 21 |
-
Live Space: https://huggingface.co/spaces/Nymbo/Tools
|
| 22 |
-
|
| 23 |
-
### What’s inside
|
| 24 |
-
|
| 25 |
-
- Web Fetch: Turn any webpage into clean Markdown with optional link-only scraping, CSS selector stripping, length limits, and pagination via cursor offset.
|
| 26 |
-
- Web Search: DuckDuckGo-backed search across text, news, images, videos, and books with readable, paginated output.
|
| 27 |
-
- Code Interpreter: Execute small Python snippets and capture stdout.
|
| 28 |
-
- Memory Manager: Lightweight JSON-based memory store with save/list/search/delete and tag filters.
|
| 29 |
-
- Generate Speech: Kokoro‑82M TTS with 54 voices and adjustable speed (CPU or CUDA if available).
|
| 30 |
-
- Generate Image: Text-to-image via Hugging Face serverless inference (choose model, steps, CFG, size, seed).
|
| 31 |
-
- Generate Video: Text-to-video via Hugging Face serverless inference (model, steps, guidance, size, fps, duration, seed).
|
| 32 |
-
- Deep Research: Multi-query web research pipeline (DDG search + parallel fetch + LLM report synthesis) with downloadable report.
|
| 33 |
-
- Agent Skills: Discover, inspect, and access specialized agent skills and resources.
|
| 34 |
-
- Agent Terminal: Execute Python code to chain multiple tools together (e.g., fetch -> summarize -> save) efficiently.
|
| 35 |
-
- Obsidian Vault: Read-only access to an Obsidian vault (list, read, search notes) with safelisted paths.
|
| 36 |
-
- File System: Safe, sandboxed filesystem operations under a tool root.
|
| 37 |
-
- Shell Command: Run shell commands inside the same safe root as File System.
|
| 38 |
-
|
| 39 |
-
## Quick start
|
| 40 |
-
|
| 41 |
-
Run the following commands in sequence to run the server locally:
|
| 42 |
-
|
| 43 |
-
```shell
|
| 44 |
-
git clone https://huggingface.co/spaces/Nymbo/Tools
|
| 45 |
-
cd Tools
|
| 46 |
-
python -m venv env
|
| 47 |
-
source env/bin/activate
|
| 48 |
-
pip install -r requirements.txt
|
| 49 |
-
python app.py
|
| 50 |
-
```
|
| 51 |
-
|
| 52 |
-
Defaults:
|
| 53 |
-
- The Gradio UI typically serves on http://127.0.0.1:7860
|
| 54 |
-
- The MCP endpoint is available at http://127.0.0.1:7860/gradio_api/mcp/
|
| 55 |
-
|
| 56 |
-
## Using it as an MCP server
|
| 57 |
-
|
| 58 |
-
Remote MCP (hosted):
|
| 59 |
-
- Base URL: https://mcp.nymbo.net/gradio_api/mcp/
|
| 60 |
-
- SSE endpoint (for clients that need it): https://mcp.nymbo.net/gradio_api/mcp/sse
|
| 61 |
-
|
| 62 |
-
Local MCP (when you run app.py):
|
| 63 |
-
- Base URL: http://127.0.0.1:7860/gradio_api/mcp/
|
| 64 |
-
- SSE endpoint: http://127.0.0.1:7860/gradio_api/mcp/sse
|
| 65 |
-
|
| 66 |
-
Example client config (JSON):
|
| 67 |
-
|
| 68 |
-
```json
|
| 69 |
-
{
|
| 70 |
-
"mcpServers": {
|
| 71 |
-
"nymbo-tools": {
|
| 72 |
-
"url": "https://mcp.nymbo.net/gradio_api/mcp/"
|
| 73 |
-
}
|
| 74 |
-
}
|
| 75 |
-
}
|
| 76 |
-
```
|
| 77 |
-
|
| 78 |
-
## Environment variables (optional but recommended)
|
| 79 |
-
|
| 80 |
-
- HF_READ_TOKEN: Enables Image Generation, Video Generation, and Deep Research (Hugging Face serverless inference). These tools stay visible to MCP clients but calls require a valid token to succeed.
|
| 81 |
-
- HF_TOKEN: Alternative token fallback used by some providers (also enables Deep Research/Video).
|
| 82 |
-
- NYMBO_TOOLS_ROOT: Overrides the File System/Shell working root. Defaults to Nymbo-Tools/Filesystem.
|
| 83 |
-
- UNSAFE_ALLOW_ABS_PATHS=1: Allow absolute paths in File System and Shell Command (off by default for safety).
|
| 84 |
-
|
| 85 |
-
Notes:
|
| 86 |
-
- Without a HF API key, you can still use Web Fetch, Web Search, Code Interpreter, Memory Manager, File System, Shell Command, and Generate Speech.
|
| 87 |
-
- Generate Speech requires the kokoro package and its dependencies; it works on CPU and uses CUDA if available. Doesn't require an API key because it's computed on the server itself.
|
| 88 |
-
|
| 89 |
-
## Persistence and privacy
|
| 90 |
-
|
| 91 |
-
- Memory Manager stores entries in `memories.json` at the Nymbo-Tools folder root when running locally.
|
| 92 |
-
- File System defaults to the `Filesystem/` directory under Nymbo-Tools.
|
| 93 |
-
- In the public demo Space, storage is ephemeral and visible to anyone using the Space; avoid personal or sensitive data.
|
| 94 |
-
|
| 95 |
-
## Tool reference (signatures and behavior)
|
| 96 |
-
|
| 97 |
-
Below are the MCP tool parameters summarized by inputs, outputs, and notable behaviors.
|
| 98 |
-
|
| 99 |
-
### Web_Fetch (Webpages, converted to Markdown)
|
| 100 |
-
Inputs:
|
| 101 |
-
- url (str): Absolute URL to fetch (must return HTML).
|
| 102 |
-
- max_chars (int, default 3000): 0 = full page; otherwise truncates with a next_cursor notice.
|
| 103 |
-
- strip_selectors (str): Comma-separated CSS selectors to remove (e.g., .header, .footer, nav).
|
| 104 |
-
- mode (str): "markdown" (default), "html", or "url_scraper" (returns list of links).
|
| 105 |
-
- offset (int): Character offset for pagination; pass the previous next_cursor to continue.
|
| 106 |
-
|
| 107 |
-
Output: Markdown string, raw HTML, or link list. If truncated, includes a next_cursor to continue.
|
| 108 |
-
|
| 109 |
-
### Web_Search (DuckDuckGo backend)
|
| 110 |
-
Inputs:
|
| 111 |
-
- query (str): DuckDuckGo query (supports site:, quotes, OR).
|
| 112 |
-
- max_results (int 1–20, default 5)
|
| 113 |
-
- page (int, default 1) or offset (int) for precise continuation
|
| 114 |
-
- search_type (str): "text" | "news" | "images" | "videos" | "books"
|
| 115 |
-
|
| 116 |
-
Output: Readable text with pagination hints and next_offset.
|
| 117 |
-
|
| 118 |
-
### Code_Interpreter (Python)
|
| 119 |
-
Inputs:
|
| 120 |
-
- code (str): Python source; stdout is captured.
|
| 121 |
-
|
| 122 |
-
Output: Captured stdout or the exception text.
|
| 123 |
-
|
| 124 |
-
### Memory_Manager (Simple JSON store)
|
| 125 |
-
Inputs:
|
| 126 |
-
- action: "save" | "list" | "search" | "delete"
|
| 127 |
-
- text (save only), tags (save only)
|
| 128 |
-
- query (search only): supports tag:name terms and AND/OR
|
| 129 |
-
- limit (list/search): default 20
|
| 130 |
-
- memory_id (delete): full UUID or unique prefix
|
| 131 |
-
- include_tags (bool): include tags when listing/searching
|
| 132 |
-
|
| 133 |
-
Output: Confirmation string, listing, search matches, or structured error text.
|
| 134 |
-
|
| 135 |
-
### Generate_Speech (Kokoro-82M)
|
| 136 |
-
Inputs:
|
| 137 |
-
- text (str)
|
| 138 |
-
- speed (float 0.5–2.0, default 1.25)
|
| 139 |
-
- voice (str): One of 54 voices (e.g., af_heart, am_liam, bf_alice, zf_xiaoyi…)
|
| 140 |
-
|
| 141 |
-
Output: (sample_rate:int, waveform:np.ndarray) – rendered as downloadable WAV in the UI.
|
| 142 |
-
|
| 143 |
-
### Generate_Image (HF inference)
|
| 144 |
-
Requires: HF_READ_TOKEN
|
| 145 |
-
|
| 146 |
-
Inputs:
|
| 147 |
-
- prompt (str)
|
| 148 |
-
- model_id (str): e.g., black-forest-labs/FLUX.1-Krea-dev
|
| 149 |
-
- negative_prompt (str)
|
| 150 |
-
- steps (1–100), cfg_scale (1–20), sampler (UI label), seed (-1=random), width/height
|
| 151 |
-
|
| 152 |
-
Output: PIL.Image. In UI, displayed and downloadable. Errors guide you to provide a token or fix model id.
|
| 153 |
-
|
| 154 |
-
### Generate_Video (HF inference)
|
| 155 |
-
Requires: HF_READ_TOKEN or HF_TOKEN
|
| 156 |
-
|
| 157 |
-
Inputs:
|
| 158 |
-
- prompt (str)
|
| 159 |
-
- model_id (str): default Wan-AI/Wan2.2-T2V-A14B
|
| 160 |
-
- negative_prompt (str)
|
| 161 |
-
- steps (1–100), cfg_scale, seed, width/height, fps, duration (s)
|
| 162 |
-
|
| 163 |
-
Output: Temporary MP4 file path; UI shows a playable/downloadable video.
|
| 164 |
-
|
| 165 |
-
### Deep_Research (HF inference)
|
| 166 |
-
Requires: HF_READ_TOKEN or HF_TOKEN
|
| 167 |
-
|
| 168 |
-
Inputs:
|
| 169 |
-
- summary (str): One or more sentences describing the research task.
|
| 170 |
-
- query1..query5 (str) with max1..max5 (1–50). Total requested results across queries are capped at 50.
|
| 171 |
-
|
| 172 |
-
Behavior:
|
| 173 |
-
- Parallel DDG searches → fetch pages in budget → filter candidate sources with an LLM → synthesize a long, well-structured Markdown report and list of sources.
|
| 174 |
-
|
| 175 |
-
Output: (report_md, fetched_links_text, report_file_path)
|
| 176 |
-
|
| 177 |
-
### File_System (safe root)
|
| 178 |
-
Root:
|
| 179 |
-
- Defaults to `Nymbo-Tools/Filesystem` (or NYMBO_TOOLS_ROOT). Absolute paths disabled unless UNSAFE_ALLOW_ABS_PATHS=1.
|
| 180 |
-
|
| 181 |
-
Actions:
|
| 182 |
-
- list, read, write, append, edit, mkdir, move, copy, delete, info, help
|
| 183 |
-
|
| 184 |
-
Key fields:
|
| 185 |
-
- path, content (write/append/edit), dest_path (move/copy), recursive, show_hidden, max_entries, offset, max_chars, create_dirs, overwrite
|
| 186 |
-
|
| 187 |
-
Edit format (SEARCH/REPLACE blocks):
|
| 188 |
-
```
|
| 189 |
-
<<<<<<< SEARCH
|
| 190 |
-
[exact content to find]
|
| 191 |
-
=======
|
| 192 |
-
[new content to replace with]
|
| 193 |
-
>>>>>>> REPLACE
|
| 194 |
-
```
|
| 195 |
-
- Multiple blocks can be included; each is applied in order
|
| 196 |
-
- Search text must match exactly (whitespace, indentation)
|
| 197 |
-
- Only the first occurrence of each search text is replaced
|
| 198 |
-
|
| 199 |
-
Output:
|
| 200 |
-
- Human-readable listings or JSON-like error strings with code/message/hint.
|
| 201 |
-
|
| 202 |
-
### Shell_Command (same safe root)
|
| 203 |
-
Inputs:
|
| 204 |
-
- command (str): Single-string shell command (pipelines supported by the host shell).
|
| 205 |
-
- workdir (str): Relative to the root.
|
| 206 |
-
- timeout (s)
|
| 207 |
-
|
| 208 |
-
Output:
|
| 209 |
-
- Combined header + STDOUT/STDERR. Absolute paths disabled by default. Shell is detected automatically (PowerShell on Windows when available; bash/sh on POSIX).
|
| 210 |
-
|
| 211 |
-
### Agent_Skills (Skill Discovery)
|
| 212 |
-
Inputs:
|
| 213 |
-
- action: "discover" | "info" | "resources" | "validate" | "search" | "help"
|
| 214 |
-
- skill_name (str): Required for info/resources/validate.
|
| 215 |
-
- resource_path (str): Specific file to read within a skill.
|
| 216 |
-
- query (str): Search term for "search" action.
|
| 217 |
-
- max_chars (int), offset (int)
|
| 218 |
-
|
| 219 |
-
Output:
|
| 220 |
-
- Detailed skill metadata, SKILL.md content, resource file content, or validation reports.
|
| 221 |
-
|
| 222 |
-
### Agent_Terminal (Tool Chaining)
|
| 223 |
-
Inputs:
|
| 224 |
-
- input (str): Python source code to execute.
|
| 225 |
-
- Can call any other tool (e.g., `Web_Fetch(...)`, `File_System(...)`).
|
| 226 |
-
- Use `search_tools("query")` to find tools.
|
| 227 |
-
- Call a tool with no args to get its usage guide.
|
| 228 |
-
|
| 229 |
-
Output:
|
| 230 |
-
- Captured STDOUT validation of the script.
|
| 231 |
-
|
| 232 |
-
### Obsidian_Vault (Read-only Note Access)
|
| 233 |
-
Root:
|
| 234 |
-
- Defaults to `Tools/Obsidian` (or OBSIDIAN_VAULT_ROOT).
|
| 235 |
-
|
| 236 |
-
Inputs:
|
| 237 |
-
- action: "list" | "read" | "info" | "search" | "help"
|
| 238 |
-
- path (str): Relative to vault root (start with /).
|
| 239 |
-
- query (str): For search action.
|
| 240 |
-
- recursive (bool), show_hidden (bool), max_entries (int)
|
| 241 |
-
- offset (int), max_chars (int)
|
| 242 |
-
|
| 243 |
-
Output:
|
| 244 |
-
- File listings, note content, or search results (with context).
|
| 245 |
-
|
| 246 |
-
## Running on Hugging Face Spaces
|
| 247 |
-
|
| 248 |
-
1) Duplicate the Space at https://huggingface.co/spaces/Nymbo/Tools.
|
| 249 |
-
2) In Space Settings → Secrets, add HF_READ_TOKEN (and/or HF_TOKEN) for model access.
|
| 250 |
-
3) Both the UI and MCP clients will list every tool. Image/Video/Deep Research still need a valid token when invoked.
|
| 251 |
-
|
| 252 |
-
## Troubleshooting
|
| 253 |
-
|
| 254 |
-
- Image/Video/Deep Research calls fail immediately:
|
| 255 |
-
- Provide HF_READ_TOKEN (and optionally HF_TOKEN). Restart the app/Space.
|
| 256 |
-
- 401/403 when calling generation tools:
|
| 257 |
-
- Token missing/insufficient permissions. Ensure your token can read the chosen model.
|
| 258 |
-
- Kokoro not found:
|
| 259 |
-
- Install kokoro>=0.9.4. CPU works; CUDA used if available. Torch may be skipped on Apple Silicon by design.
|
| 260 |
-
- Windows PowerShell activation policy blocks venv activation:
|
| 261 |
-
- Run PowerShell as Admin and set a suitable execution policy for the current user (e.g., RemoteSigned), or manually run `python app.py` after installing dependencies.
|
| 262 |
-
- File System or Shell path errors:
|
| 263 |
-
- Paths are relative to the tool root. Set NYMBO_TOOLS_ROOT to customize. Set UNSAFE_ALLOW_ABS_PATHS=1 only if you fully trust the environment.
|
| 264 |
-
|
| 265 |
-
## License
|
| 266 |
-
|
| 267 |
Apache-2.0 (see Space metadata). If you duplicate the Space or use these tools, ensure your usage complies with the licenses and terms of the underlying models and providers.
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Nymbo Tools MCP
|
| 3 |
+
emoji: ⚙️
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: gray
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 6.2.0
|
| 8 |
+
python_version: 3.12
|
| 9 |
+
app_file: app.py
|
| 10 |
+
pinned: true
|
| 11 |
+
license: apache-2.0
|
| 12 |
+
short_description: All-in-one hub of general purpose tools useful for any agent
|
| 13 |
+
---
|
| 14 |
+
|
| 15 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 16 |
+
|
| 17 |
+
## Nymbo-Tools MCP Server
|
| 18 |
+
|
| 19 |
+
All-in-one hub of general-purpose tools useful for any agent. Run it as a Gradio web app, or connect to it remotely as a Model Context Protocol (MCP) server to call its tools programmatically.
|
| 20 |
+
|
| 21 |
+
Live Space: https://huggingface.co/spaces/Nymbo/Tools
|
| 22 |
+
|
| 23 |
+
### What’s inside
|
| 24 |
+
|
| 25 |
+
- Web Fetch: Turn any webpage into clean Markdown with optional link-only scraping, CSS selector stripping, length limits, and pagination via cursor offset.
|
| 26 |
+
- Web Search: DuckDuckGo-backed search across text, news, images, videos, and books with readable, paginated output.
|
| 27 |
+
- Code Interpreter: Execute small Python snippets and capture stdout.
|
| 28 |
+
- Memory Manager: Lightweight JSON-based memory store with save/list/search/delete and tag filters.
|
| 29 |
+
- Generate Speech: Kokoro‑82M TTS with 54 voices and adjustable speed (CPU or CUDA if available).
|
| 30 |
+
- Generate Image: Text-to-image via Hugging Face serverless inference (choose model, steps, CFG, size, seed).
|
| 31 |
+
- Generate Video: Text-to-video via Hugging Face serverless inference (model, steps, guidance, size, fps, duration, seed).
|
| 32 |
+
- Deep Research: Multi-query web research pipeline (DDG search + parallel fetch + LLM report synthesis) with downloadable report.
|
| 33 |
+
- Agent Skills: Discover, inspect, and access specialized agent skills and resources.
|
| 34 |
+
- Agent Terminal: Execute Python code to chain multiple tools together (e.g., fetch -> summarize -> save) efficiently.
|
| 35 |
+
- Obsidian Vault: Read-only access to an Obsidian vault (list, read, search notes) with safelisted paths.
|
| 36 |
+
- File System: Safe, sandboxed filesystem operations under a tool root.
|
| 37 |
+
- Shell Command: Run shell commands inside the same safe root as File System.
|
| 38 |
+
|
| 39 |
+
## Quick start
|
| 40 |
+
|
| 41 |
+
Run the following commands in sequence to run the server locally:
|
| 42 |
+
|
| 43 |
+
```shell
|
| 44 |
+
git clone https://huggingface.co/spaces/Nymbo/Tools
|
| 45 |
+
cd Tools
|
| 46 |
+
python -m venv env
|
| 47 |
+
source env/bin/activate
|
| 48 |
+
pip install -r requirements.txt
|
| 49 |
+
python app.py
|
| 50 |
+
```
|
| 51 |
+
|
| 52 |
+
Defaults:
|
| 53 |
+
- The Gradio UI typically serves on http://127.0.0.1:7860
|
| 54 |
+
- The MCP endpoint is available at http://127.0.0.1:7860/gradio_api/mcp/
|
| 55 |
+
|
| 56 |
+
## Using it as an MCP server
|
| 57 |
+
|
| 58 |
+
Remote MCP (hosted):
|
| 59 |
+
- Base URL: https://mcp.nymbo.net/gradio_api/mcp/
|
| 60 |
+
- SSE endpoint (for clients that need it): https://mcp.nymbo.net/gradio_api/mcp/sse
|
| 61 |
+
|
| 62 |
+
Local MCP (when you run app.py):
|
| 63 |
+
- Base URL: http://127.0.0.1:7860/gradio_api/mcp/
|
| 64 |
+
- SSE endpoint: http://127.0.0.1:7860/gradio_api/mcp/sse
|
| 65 |
+
|
| 66 |
+
Example client config (JSON):
|
| 67 |
+
|
| 68 |
+
```json
|
| 69 |
+
{
|
| 70 |
+
"mcpServers": {
|
| 71 |
+
"nymbo-tools": {
|
| 72 |
+
"url": "https://mcp.nymbo.net/gradio_api/mcp/"
|
| 73 |
+
}
|
| 74 |
+
}
|
| 75 |
+
}
|
| 76 |
+
```
|
| 77 |
+
|
| 78 |
+
## Environment variables (optional but recommended)
|
| 79 |
+
|
| 80 |
+
- HF_READ_TOKEN: Enables Image Generation, Video Generation, and Deep Research (Hugging Face serverless inference). These tools stay visible to MCP clients but calls require a valid token to succeed.
|
| 81 |
+
- HF_TOKEN: Alternative token fallback used by some providers (also enables Deep Research/Video).
|
| 82 |
+
- NYMBO_TOOLS_ROOT: Overrides the File System/Shell working root. Defaults to Nymbo-Tools/Filesystem.
|
| 83 |
+
- UNSAFE_ALLOW_ABS_PATHS=1: Allow absolute paths in File System and Shell Command (off by default for safety).
|
| 84 |
+
|
| 85 |
+
Notes:
|
| 86 |
+
- Without a HF API key, you can still use Web Fetch, Web Search, Code Interpreter, Memory Manager, File System, Shell Command, and Generate Speech.
|
| 87 |
+
- Generate Speech requires the kokoro package and its dependencies; it works on CPU and uses CUDA if available. Doesn't require an API key because it's computed on the server itself.
|
| 88 |
+
|
| 89 |
+
## Persistence and privacy
|
| 90 |
+
|
| 91 |
+
- Memory Manager stores entries in `memories.json` at the Nymbo-Tools folder root when running locally.
|
| 92 |
+
- File System defaults to the `Filesystem/` directory under Nymbo-Tools.
|
| 93 |
+
- In the public demo Space, storage is ephemeral and visible to anyone using the Space; avoid personal or sensitive data.
|
| 94 |
+
|
| 95 |
+
## Tool reference (signatures and behavior)
|
| 96 |
+
|
| 97 |
+
Below are the MCP tool parameters summarized by inputs, outputs, and notable behaviors.
|
| 98 |
+
|
| 99 |
+
### Web_Fetch (Webpages, converted to Markdown)
|
| 100 |
+
Inputs:
|
| 101 |
+
- url (str): Absolute URL to fetch (must return HTML).
|
| 102 |
+
- max_chars (int, default 3000): 0 = full page; otherwise truncates with a next_cursor notice.
|
| 103 |
+
- strip_selectors (str): Comma-separated CSS selectors to remove (e.g., .header, .footer, nav).
|
| 104 |
+
- mode (str): "markdown" (default), "html", or "url_scraper" (returns list of links).
|
| 105 |
+
- offset (int): Character offset for pagination; pass the previous next_cursor to continue.
|
| 106 |
+
|
| 107 |
+
Output: Markdown string, raw HTML, or link list. If truncated, includes a next_cursor to continue.
|
| 108 |
+
|
| 109 |
+
### Web_Search (DuckDuckGo backend)
|
| 110 |
+
Inputs:
|
| 111 |
+
- query (str): DuckDuckGo query (supports site:, quotes, OR).
|
| 112 |
+
- max_results (int 1–20, default 5)
|
| 113 |
+
- page (int, default 1) or offset (int) for precise continuation
|
| 114 |
+
- search_type (str): "text" | "news" | "images" | "videos" | "books"
|
| 115 |
+
|
| 116 |
+
Output: Readable text with pagination hints and next_offset.
|
| 117 |
+
|
| 118 |
+
### Code_Interpreter (Python)
|
| 119 |
+
Inputs:
|
| 120 |
+
- code (str): Python source; stdout is captured.
|
| 121 |
+
|
| 122 |
+
Output: Captured stdout or the exception text.
|
| 123 |
+
|
| 124 |
+
### Memory_Manager (Simple JSON store)
|
| 125 |
+
Inputs:
|
| 126 |
+
- action: "save" | "list" | "search" | "delete"
|
| 127 |
+
- text (save only), tags (save only)
|
| 128 |
+
- query (search only): supports tag:name terms and AND/OR
|
| 129 |
+
- limit (list/search): default 20
|
| 130 |
+
- memory_id (delete): full UUID or unique prefix
|
| 131 |
+
- include_tags (bool): include tags when listing/searching
|
| 132 |
+
|
| 133 |
+
Output: Confirmation string, listing, search matches, or structured error text.
|
| 134 |
+
|
| 135 |
+
### Generate_Speech (Kokoro-82M)
|
| 136 |
+
Inputs:
|
| 137 |
+
- text (str)
|
| 138 |
+
- speed (float 0.5–2.0, default 1.25)
|
| 139 |
+
- voice (str): One of 54 voices (e.g., af_heart, am_liam, bf_alice, zf_xiaoyi…)
|
| 140 |
+
|
| 141 |
+
Output: (sample_rate:int, waveform:np.ndarray) – rendered as downloadable WAV in the UI.
|
| 142 |
+
|
| 143 |
+
### Generate_Image (HF inference)
|
| 144 |
+
Requires: HF_READ_TOKEN
|
| 145 |
+
|
| 146 |
+
Inputs:
|
| 147 |
+
- prompt (str)
|
| 148 |
+
- model_id (str): e.g., black-forest-labs/FLUX.1-Krea-dev
|
| 149 |
+
- negative_prompt (str)
|
| 150 |
+
- steps (1–100), cfg_scale (1–20), sampler (UI label), seed (-1=random), width/height
|
| 151 |
+
|
| 152 |
+
Output: PIL.Image. In UI, displayed and downloadable. Errors guide you to provide a token or fix model id.
|
| 153 |
+
|
| 154 |
+
### Generate_Video (HF inference)
|
| 155 |
+
Requires: HF_READ_TOKEN or HF_TOKEN
|
| 156 |
+
|
| 157 |
+
Inputs:
|
| 158 |
+
- prompt (str)
|
| 159 |
+
- model_id (str): default Wan-AI/Wan2.2-T2V-A14B
|
| 160 |
+
- negative_prompt (str)
|
| 161 |
+
- steps (1–100), cfg_scale, seed, width/height, fps, duration (s)
|
| 162 |
+
|
| 163 |
+
Output: Temporary MP4 file path; UI shows a playable/downloadable video.
|
| 164 |
+
|
| 165 |
+
### Deep_Research (HF inference)
|
| 166 |
+
Requires: HF_READ_TOKEN or HF_TOKEN
|
| 167 |
+
|
| 168 |
+
Inputs:
|
| 169 |
+
- summary (str): One or more sentences describing the research task.
|
| 170 |
+
- query1..query5 (str) with max1..max5 (1–50). Total requested results across queries are capped at 50.
|
| 171 |
+
|
| 172 |
+
Behavior:
|
| 173 |
+
- Parallel DDG searches → fetch pages in budget → filter candidate sources with an LLM → synthesize a long, well-structured Markdown report and list of sources.
|
| 174 |
+
|
| 175 |
+
Output: (report_md, fetched_links_text, report_file_path)
|
| 176 |
+
|
| 177 |
+
### File_System (safe root)
|
| 178 |
+
Root:
|
| 179 |
+
- Defaults to `Nymbo-Tools/Filesystem` (or NYMBO_TOOLS_ROOT). Absolute paths disabled unless UNSAFE_ALLOW_ABS_PATHS=1.
|
| 180 |
+
|
| 181 |
+
Actions:
|
| 182 |
+
- list, read, write, append, edit, mkdir, move, copy, delete, info, help
|
| 183 |
+
|
| 184 |
+
Key fields:
|
| 185 |
+
- path, content (write/append/edit), dest_path (move/copy), recursive, show_hidden, max_entries, offset, max_chars, create_dirs, overwrite
|
| 186 |
+
|
| 187 |
+
Edit format (SEARCH/REPLACE blocks):
|
| 188 |
+
```
|
| 189 |
+
<<<<<<< SEARCH
|
| 190 |
+
[exact content to find]
|
| 191 |
+
=======
|
| 192 |
+
[new content to replace with]
|
| 193 |
+
>>>>>>> REPLACE
|
| 194 |
+
```
|
| 195 |
+
- Multiple blocks can be included; each is applied in order
|
| 196 |
+
- Search text must match exactly (whitespace, indentation)
|
| 197 |
+
- Only the first occurrence of each search text is replaced
|
| 198 |
+
|
| 199 |
+
Output:
|
| 200 |
+
- Human-readable listings or JSON-like error strings with code/message/hint.
|
| 201 |
+
|
| 202 |
+
### Shell_Command (same safe root)
|
| 203 |
+
Inputs:
|
| 204 |
+
- command (str): Single-string shell command (pipelines supported by the host shell).
|
| 205 |
+
- workdir (str): Relative to the root.
|
| 206 |
+
- timeout (s)
|
| 207 |
+
|
| 208 |
+
Output:
|
| 209 |
+
- Combined header + STDOUT/STDERR. Absolute paths disabled by default. Shell is detected automatically (PowerShell on Windows when available; bash/sh on POSIX).
|
| 210 |
+
|
| 211 |
+
### Agent_Skills (Skill Discovery)
|
| 212 |
+
Inputs:
|
| 213 |
+
- action: "discover" | "info" | "resources" | "validate" | "search" | "help"
|
| 214 |
+
- skill_name (str): Required for info/resources/validate.
|
| 215 |
+
- resource_path (str): Specific file to read within a skill.
|
| 216 |
+
- query (str): Search term for "search" action.
|
| 217 |
+
- max_chars (int), offset (int)
|
| 218 |
+
|
| 219 |
+
Output:
|
| 220 |
+
- Detailed skill metadata, SKILL.md content, resource file content, or validation reports.
|
| 221 |
+
|
| 222 |
+
### Agent_Terminal (Tool Chaining)
|
| 223 |
+
Inputs:
|
| 224 |
+
- input (str): Python source code to execute.
|
| 225 |
+
- Can call any other tool (e.g., `Web_Fetch(...)`, `File_System(...)`).
|
| 226 |
+
- Use `search_tools("query")` to find tools.
|
| 227 |
+
- Call a tool with no args to get its usage guide.
|
| 228 |
+
|
| 229 |
+
Output:
|
| 230 |
+
- Captured STDOUT validation of the script.
|
| 231 |
+
|
| 232 |
+
### Obsidian_Vault (Read-only Note Access)
|
| 233 |
+
Root:
|
| 234 |
+
- Defaults to `Tools/Obsidian` (or OBSIDIAN_VAULT_ROOT).
|
| 235 |
+
|
| 236 |
+
Inputs:
|
| 237 |
+
- action: "list" | "read" | "info" | "search" | "help"
|
| 238 |
+
- path (str): Relative to vault root (start with /).
|
| 239 |
+
- query (str): For search action.
|
| 240 |
+
- recursive (bool), show_hidden (bool), max_entries (int)
|
| 241 |
+
- offset (int), max_chars (int)
|
| 242 |
+
|
| 243 |
+
Output:
|
| 244 |
+
- File listings, note content, or search results (with context).
|
| 245 |
+
|
| 246 |
+
## Running on Hugging Face Spaces
|
| 247 |
+
|
| 248 |
+
1) Duplicate the Space at https://huggingface.co/spaces/Nymbo/Tools.
|
| 249 |
+
2) In Space Settings → Secrets, add HF_READ_TOKEN (and/or HF_TOKEN) for model access.
|
| 250 |
+
3) Both the UI and MCP clients will list every tool. Image/Video/Deep Research still need a valid token when invoked.
|
| 251 |
+
|
| 252 |
+
## Troubleshooting
|
| 253 |
+
|
| 254 |
+
- Image/Video/Deep Research calls fail immediately:
|
| 255 |
+
- Provide HF_READ_TOKEN (and optionally HF_TOKEN). Restart the app/Space.
|
| 256 |
+
- 401/403 when calling generation tools:
|
| 257 |
+
- Token missing/insufficient permissions. Ensure your token can read the chosen model.
|
| 258 |
+
- Kokoro not found:
|
| 259 |
+
- Install kokoro>=0.9.4. CPU works; CUDA used if available. Torch may be skipped on Apple Silicon by design.
|
| 260 |
+
- Windows PowerShell activation policy blocks venv activation:
|
| 261 |
+
- Run PowerShell as Admin and set a suitable execution policy for the current user (e.g., RemoteSigned), or manually run `python app.py` after installing dependencies.
|
| 262 |
+
- File System or Shell path errors:
|
| 263 |
+
- Paths are relative to the tool root. Set NYMBO_TOOLS_ROOT to customize. Set UNSAFE_ALLOW_ABS_PATHS=1 only if you fully trust the environment.
|
| 264 |
+
|
| 265 |
+
## License
|
| 266 |
+
|
| 267 |
Apache-2.0 (see Space metadata). If you duplicate the Space or use these tools, ensure your usage complies with the licenses and terms of the underlying models and providers.
|
app.py
CHANGED
|
@@ -1,124 +1,118 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
# Project by Nymbo
|
| 4 |
-
|
| 5 |
import json
|
| 6 |
import os
|
| 7 |
import sys
|
| 8 |
-
import threading
|
| 9 |
-
import time
|
| 10 |
-
import warnings
|
| 11 |
-
from datetime import datetime, timedelta
|
| 12 |
-
from typing import Any
|
| 13 |
-
|
| 14 |
-
# Suppress asyncio event loop cleanup errors (Python 3.10 issue on HF Spaces)
|
| 15 |
-
# These occur when event loops are garbage collected after file descriptors close
|
| 16 |
-
def _patch_asyncio_event_loop_del():
|
| 17 |
-
"""Patch BaseEventLoop.__del__ to suppress 'Invalid file descriptor: -1' errors."""
|
| 18 |
-
try:
|
| 19 |
-
import asyncio.base_events as base_events
|
| 20 |
-
original_del = getattr(base_events.BaseEventLoop, "__del__", None)
|
| 21 |
-
if original_del is None:
|
| 22 |
-
return
|
| 23 |
-
def patched_del(self):
|
| 24 |
-
try:
|
| 25 |
-
original_del(self)
|
| 26 |
-
except ValueError as e:
|
| 27 |
-
if "Invalid file descriptor" not in str(e):
|
| 28 |
-
raise
|
| 29 |
-
base_events.BaseEventLoop.__del__ = patched_del
|
| 30 |
-
except Exception:
|
| 31 |
-
pass
|
| 32 |
-
|
| 33 |
-
_patch_asyncio_event_loop_del()
|
| 34 |
-
|
| 35 |
-
import gradio as gr
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
class RateLimiter:
|
| 39 |
-
"""Best-effort in-process rate limiter for HTTP-heavy tools."""
|
| 40 |
-
|
| 41 |
-
def __init__(self, requests_per_minute: int = 30) -> None:
|
| 42 |
-
self.requests_per_minute = requests_per_minute
|
| 43 |
-
self._requests: list[datetime] = []
|
| 44 |
-
self._lock = threading.Lock()
|
| 45 |
-
|
| 46 |
-
def acquire(self) -> None:
|
| 47 |
-
now = datetime.now()
|
| 48 |
-
with self._lock:
|
| 49 |
-
self._requests = [req for req in self._requests if now - req < timedelta(minutes=1)]
|
| 50 |
-
if len(self._requests) >= self.requests_per_minute:
|
| 51 |
-
wait_time = 60 - (now - self._requests[0]).total_seconds()
|
| 52 |
-
if wait_time > 0:
|
| 53 |
-
time.sleep(max(1, wait_time))
|
| 54 |
-
self._requests.append(now)
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
_search_rate_limiter = RateLimiter(requests_per_minute=20)
|
| 58 |
-
_fetch_rate_limiter = RateLimiter(requests_per_minute=25)
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
def _truncate_for_log(value: Any, limit: int = 500) -> str:
|
| 62 |
-
if not isinstance(value, str):
|
| 63 |
-
value = str(value)
|
| 64 |
-
if len(value) <= limit:
|
| 65 |
-
return value
|
| 66 |
-
return value[: limit - 1] + "…"
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
def _serialize_input(val: Any) -> Any:
|
| 70 |
-
try:
|
| 71 |
-
if isinstance(val, (str, int, float, bool)) or val is None:
|
| 72 |
-
return val
|
| 73 |
-
if isinstance(val, (list, tuple)):
|
| 74 |
-
return [_serialize_input(v) for v in list(val)[:10]] + (["…"] if len(val) > 10 else [])
|
| 75 |
-
if isinstance(val, dict):
|
| 76 |
-
out: dict[str, Any] = {}
|
| 77 |
-
for i, (k, v) in enumerate(val.items()):
|
| 78 |
-
if i >= 12:
|
| 79 |
-
out["…"] = "…"
|
| 80 |
-
break
|
| 81 |
-
out[str(k)] = _serialize_input(v)
|
| 82 |
-
return out
|
| 83 |
-
return repr(val)[:120]
|
| 84 |
-
except Exception:
|
| 85 |
-
return "<unserializable>"
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
def _log_call_start(func_name: str, **kwargs: Any) -> None:
|
| 89 |
-
try:
|
| 90 |
-
compact = {k: _serialize_input(v) for k, v in kwargs.items()}
|
| 91 |
-
# Use sys.__stdout__ to avoid capturing logs in redirected output
|
| 92 |
-
print(f"[TOOL CALL] {func_name} inputs: {json.dumps(compact, ensure_ascii=False)[:800]}", flush=True, file=sys.__stdout__)
|
| 93 |
-
except Exception as exc:
|
| 94 |
-
print(f"[TOOL CALL] {func_name} (failed to log inputs: {exc})", flush=True, file=sys.__stdout__)
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
def _log_call_end(func_name: str, output_desc: str) -> None:
|
| 98 |
-
try:
|
| 99 |
-
# Use sys.__stdout__ to avoid capturing logs in redirected output
|
| 100 |
-
print(f"[TOOL RESULT] {func_name} output: {output_desc}", flush=True, file=sys.__stdout__)
|
| 101 |
-
except Exception as exc:
|
| 102 |
-
print(f"[TOOL RESULT] {func_name} (failed to log output: {exc})", flush=True, file=sys.__stdout__)
|
| 103 |
-
|
| 104 |
-
# Ensure Tools modules can import 'app' when this file is executed as a script
|
| 105 |
-
# (their code does `from app import ...`).
|
| 106 |
-
sys.modules.setdefault("app", sys.modules[__name__])
|
| 107 |
-
|
| 108 |
# Import per-tool interface builders from the Tools package
|
| 109 |
-
from Modules.Web_Fetch import build_interface as build_fetch_interface
|
| 110 |
from Modules.Web_Search import build_interface as build_search_interface
|
|
|
|
| 111 |
from Modules.Agent_Terminal import build_interface as build_agent_terminal_interface
|
| 112 |
from Modules.Code_Interpreter import build_interface as build_code_interface
|
| 113 |
from Modules.Memory_Manager import build_interface as build_memory_interface
|
| 114 |
-
from Modules.Generate_Speech import build_interface as build_speech_interface
|
| 115 |
from Modules.Generate_Image import build_interface as build_image_interface
|
| 116 |
-
from Modules.Generate_Video import build_interface as build_video_interface
|
| 117 |
-
from Modules.Deep_Research import build_interface as build_research_interface
|
| 118 |
-
from Modules.File_System import build_interface as build_fs_interface
|
| 119 |
-
from Modules.Obsidian_Vault import build_interface as build_obsidian_interface
|
| 120 |
from Modules.Shell_Command import build_interface as build_shell_interface
|
| 121 |
-
from Modules.Agent_Skills import build_interface as build_skills_interface
|
| 122 |
|
| 123 |
# Optional environment flags used to conditionally show API schemas (unchanged behavior)
|
| 124 |
HF_IMAGE_TOKEN = bool(os.getenv("HF_READ_TOKEN"))
|
|
@@ -131,96 +125,78 @@ with open(_css_path, "r", encoding="utf-8") as _css_file:
|
|
| 131 |
CSS_STYLES = _css_file.read()
|
| 132 |
|
| 133 |
# Build each tab interface using modular builders
|
| 134 |
-
fetch_interface = build_fetch_interface()
|
| 135 |
web_search_interface = build_search_interface()
|
|
|
|
| 136 |
agent_terminal_interface = build_agent_terminal_interface()
|
| 137 |
code_interface = build_code_interface()
|
| 138 |
memory_interface = build_memory_interface()
|
| 139 |
-
kokoro_interface = build_speech_interface()
|
| 140 |
image_generation_interface = build_image_interface()
|
| 141 |
-
video_generation_interface = build_video_interface()
|
| 142 |
-
deep_research_interface = build_research_interface()
|
| 143 |
-
fs_interface = build_fs_interface()
|
| 144 |
shell_interface = build_shell_interface()
|
| 145 |
-
obsidian_interface = build_obsidian_interface()
|
| 146 |
-
skills_interface = build_skills_interface()
|
| 147 |
|
| 148 |
_interfaces = [
|
| 149 |
agent_terminal_interface,
|
| 150 |
-
skills_interface,
|
| 151 |
-
fetch_interface,
|
| 152 |
web_search_interface,
|
|
|
|
| 153 |
code_interface,
|
| 154 |
shell_interface,
|
| 155 |
-
fs_interface,
|
| 156 |
-
obsidian_interface,
|
| 157 |
memory_interface,
|
| 158 |
-
kokoro_interface,
|
| 159 |
image_generation_interface,
|
| 160 |
-
video_generation_interface,
|
| 161 |
-
deep_research_interface,
|
| 162 |
]
|
| 163 |
_tab_names = [
|
| 164 |
"Agent Terminal",
|
| 165 |
-
"Agent Skills",
|
| 166 |
-
"Web Fetch",
|
| 167 |
"Web Search",
|
|
|
|
| 168 |
"Code Interpreter",
|
| 169 |
"Shell Command",
|
| 170 |
-
"File System",
|
| 171 |
-
"Obsidian Vault",
|
| 172 |
"Memory Manager",
|
| 173 |
-
"Generate Speech",
|
| 174 |
"Generate Image",
|
| 175 |
-
"Generate Video",
|
| 176 |
-
"Deep Research",
|
| 177 |
]
|
| 178 |
-
|
| 179 |
-
with gr.Blocks(title="Nymbo/Tools MCP") as demo:
|
| 180 |
-
|
| 181 |
-
with gr.Sidebar(width=300, elem_classes="app-sidebar"):
|
| 182 |
-
gr.Markdown(
|
| 183 |
-
"## Nymbo/Tools MCP\n"
|
| 184 |
-
"<p style='font-size: 0.7rem; opacity: 0.85; margin-top: 2px; margin-bottom: 6px;'>General purpose tools useful for any agent.</p>\n"
|
| 185 |
-
"<a href='https://www.nymbo.net/nymbot' target='_blank' style='font-size: 0.7rem; display: block;'>Test with Nymbot</a>"
|
| 186 |
-
)
|
| 187 |
-
|
| 188 |
-
with gr.Accordion("Information", open=False):
|
| 189 |
-
gr.HTML(
|
| 190 |
-
"""
|
| 191 |
-
<div class="info-accordion">
|
| 192 |
-
<div class="info-grid" style="grid-template-columns: 1fr;">
|
| 193 |
-
<section class="info-card">
|
| 194 |
-
<div class="info-card__body">
|
| 195 |
-
<h3>Connecting from an MCP Client</h3>
|
| 196 |
-
<p>
|
| 197 |
-
This Space also runs as a Model Context Protocol (MCP) server. Point your client to:
|
| 198 |
-
<br/>
|
| 199 |
-
<code>https://nymbo-tools.hf.space/gradio_api/mcp/</code>
|
| 200 |
-
</p>
|
| 201 |
-
<p>Example client configuration:</p>
|
| 202 |
-
<pre><code class="language-json">{
|
| 203 |
-
"mcpServers": {
|
| 204 |
-
"nymbo-tools": {
|
| 205 |
-
"url": "https://nymbo-tools.hf.space/gradio_api/mcp/"
|
| 206 |
-
}
|
| 207 |
-
}
|
| 208 |
-
}</code></pre>
|
| 209 |
-
<p>Run the following commands in sequence to run the server locally:</p>
|
| 210 |
-
<pre><code>git clone https://huggingface.co/spaces/Nymbo/Tools
|
| 211 |
-
cd Tools
|
| 212 |
-
python -m venv env
|
| 213 |
-
source env/bin/activate
|
| 214 |
-
pip install -r requirements.txt
|
| 215 |
-
python app.py</code></pre>
|
| 216 |
-
</div>
|
| 217 |
-
</section>
|
| 218 |
-
|
| 219 |
<section class="info-card">
|
| 220 |
<div class="info-card__body">
|
| 221 |
-
<h3>Enable Image Gen
|
| 222 |
<p>
|
| 223 |
-
The <code>Generate_Image</code>
|
| 224 |
<code>HF_READ_TOKEN</code> set as a secret or environment variable.
|
| 225 |
</p>
|
| 226 |
<ul class="info-list">
|
|
@@ -228,124 +204,66 @@ python app.py</code></pre>
|
|
| 228 |
<li>Or run locally with <code>HF_READ_TOKEN</code> in your environment.</li>
|
| 229 |
</ul>
|
| 230 |
<div class="info-hint">
|
| 231 |
-
|
| 232 |
</div>
|
| 233 |
</div>
|
| 234 |
</section>
|
| 235 |
-
|
| 236 |
<section class="info-card">
|
| 237 |
<div class="info-card__body">
|
| 238 |
-
<h3>Persistent Memories
|
| 239 |
<p>
|
| 240 |
-
In this public demo, memories
|
| 241 |
</p>
|
| 242 |
<p>
|
| 243 |
-
When running locally, memories are saved to <code>memories.json</code> at the repo root for privacy
|
| 244 |
</p>
|
| 245 |
</div>
|
| 246 |
</section>
|
| 247 |
-
|
| 248 |
<section class="info-card">
|
| 249 |
<div class="info-card__body">
|
| 250 |
-
<h3>Tool Notes
|
| 251 |
<p><strong>No authentication required for:</strong></p>
|
| 252 |
<ul class="info-list">
|
| 253 |
-
<li><code>Web_Fetch</code></li>
|
| 254 |
<li><code>Web_Search</code></li>
|
| 255 |
<li><code>Agent_Terminal</code></li>
|
| 256 |
<li><code>Code_Interpreter</code></li>
|
| 257 |
<li><code>Memory_Manager</code></li>
|
| 258 |
-
<li><code>Generate_Speech</code></li>
|
| 259 |
-
<li><code>File_System</code></li>
|
| 260 |
<li><code>Shell_Command</code></li>
|
| 261 |
-
<li><code>Agent_Skills</code></li>
|
| 262 |
</ul>
|
| 263 |
-
<p>
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
<tr style="border-bottom:1px solid rgba(255,255,255,0.15);">
|
| 267 |
-
<th style="padding:6px 8px; text-align:left;">Accent</th>
|
| 268 |
-
<th style="padding:6px 8px; text-align:center;">Female</th>
|
| 269 |
-
<th style="padding:6px 8px; text-align:center;">Male</th>
|
| 270 |
-
</tr>
|
| 271 |
-
</thead>
|
| 272 |
-
<tbody>
|
| 273 |
-
<tr style="border-bottom:1px solid rgba(255,255,255,0.08);">
|
| 274 |
-
<td style="padding:6px 8px; font-weight:600;">American</td>
|
| 275 |
-
<td style="padding:6px 8px; text-align:center;"><code>af</code></td>
|
| 276 |
-
<td style="padding:6px 8px; text-align:center;"><code>am</code></td>
|
| 277 |
-
</tr>
|
| 278 |
-
<tr style="border-bottom:1px solid rgba(255,255,255,0.08);">
|
| 279 |
-
<td style="padding:6px 8px; font-weight:600;">British</td>
|
| 280 |
-
<td style="padding:6px 8px; text-align:center;"><code>bf</code></td>
|
| 281 |
-
<td style="padding:6px 8px; text-align:center;"><code>bm</code></td>
|
| 282 |
-
</tr>
|
| 283 |
-
<tr style="border-bottom:1px solid rgba(255,255,255,0.08);">
|
| 284 |
-
<td style="padding:6px 8px; font-weight:600;">European</td>
|
| 285 |
-
<td style="padding:6px 8px; text-align:center;"><code>ef</code></td>
|
| 286 |
-
<td style="padding:6px 8px; text-align:center;"><code>em</code></td>
|
| 287 |
-
</tr>
|
| 288 |
-
<tr style="border-bottom:1px solid rgba(255,255,255,0.08);">
|
| 289 |
-
<td style="padding:6px 8px; font-weight:600;">French</td>
|
| 290 |
-
<td style="padding:6px 8px; text-align:center;"><code>ff</code></td>
|
| 291 |
-
<td style="padding:6px 8px; text-align:center;">—</td>
|
| 292 |
-
</tr>
|
| 293 |
-
<tr style="border-bottom:1px solid rgba(255,255,255,0.08);">
|
| 294 |
-
<td style="padding:6px 8px; font-weight:600;">Hindi</td>
|
| 295 |
-
<td style="padding:6px 8px; text-align:center;"><code>hf</code></td>
|
| 296 |
-
<td style="padding:6px 8px; text-align:center;"><code>hm</code></td>
|
| 297 |
-
</tr>
|
| 298 |
-
<tr style="border-bottom:1px solid rgba(255,255,255,0.08);">
|
| 299 |
-
<td style="padding:6px 8px; font-weight:600;">Italian</td>
|
| 300 |
-
<td style="padding:6px 8px; text-align:center;"><code>if</code></td>
|
| 301 |
-
<td style="padding:6px 8px; text-align:center;"><code>im</code></td>
|
| 302 |
-
</tr>
|
| 303 |
-
<tr style="border-bottom:1px solid rgba(255,255,255,0.08);">
|
| 304 |
-
<td style="padding:6px 8px; font-weight:600;">Japanese</td>
|
| 305 |
-
<td style="padding:6px 8px; text-align:center;"><code>jf</code></td>
|
| 306 |
-
<td style="padding:6px 8px; text-align:center;"><code>jm</code></td>
|
| 307 |
-
</tr>
|
| 308 |
-
<tr style="border-bottom:1px solid rgba(255,255,255,0.08);">
|
| 309 |
-
<td style="padding:6px 8px; font-weight:600;">Portuguese</td>
|
| 310 |
-
<td style="padding:6px 8px; text-align:center;"><code>pf</code></td>
|
| 311 |
-
<td style="padding:6px 8px; text-align:center;"><code>pm</code></td>
|
| 312 |
-
</tr>
|
| 313 |
-
<tr>
|
| 314 |
-
<td style="padding:6px 8px; font-weight:600;">Chinese</td>
|
| 315 |
-
<td style="padding:6px 8px; text-align:center;"><code>zf</code></td>
|
| 316 |
-
<td style="padding:6px 8px; text-align:center;"><code>zm</code></td>
|
| 317 |
-
</tr>
|
| 318 |
-
</tbody>
|
| 319 |
-
</table>
|
| 320 |
</div>
|
| 321 |
</section>
|
| 322 |
-
</div>
|
| 323 |
-
</div>
|
| 324 |
-
"""
|
| 325 |
-
)
|
| 326 |
-
|
| 327 |
-
gr.Markdown("### Tools")
|
| 328 |
-
tool_selector = gr.Radio(
|
| 329 |
-
choices=_tab_names,
|
| 330 |
-
value=_tab_names[0],
|
| 331 |
-
label="Select Tool",
|
| 332 |
-
show_label=False,
|
| 333 |
-
container=False,
|
| 334 |
-
elem_classes="sidebar-nav"
|
| 335 |
-
)
|
| 336 |
-
|
| 337 |
-
with gr.Tabs(elem_classes="hidden-tabs", selected=_tab_names[0]) as tool_tabs:
|
| 338 |
-
for name, interface in zip(_tab_names, _interfaces):
|
| 339 |
-
with gr.TabItem(label=name, id=name, elem_id=f"tab-{name}"):
|
| 340 |
-
interface.render()
|
| 341 |
-
|
| 342 |
-
# Use JavaScript to click the hidden tab button when the radio selection changes
|
| 343 |
-
tool_selector.change(
|
| 344 |
-
fn=None,
|
| 345 |
-
inputs=tool_selector,
|
| 346 |
-
outputs=None,
|
| 347 |
-
js="(selected_tool) => { const buttons = document.querySelectorAll('.hidden-tabs button'); buttons.forEach(btn => { if (btn.innerText.trim() === selected_tool) { btn.click(); } }); }"
|
| 348 |
-
)
|
| 349 |
-
|
| 350 |
-
if __name__ == "__main__":
|
| 351 |
-
demo.launch(mcp_server=True, theme="Nymbo/Nymbo_Theme", css=CSS_STYLES, ssr_mode=False)
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
# Project by Nymbo
|
| 4 |
+
|
| 5 |
import json
|
| 6 |
import os
|
| 7 |
import sys
|
| 8 |
+
import threading
|
| 9 |
+
import time
|
| 10 |
+
import warnings
|
| 11 |
+
from datetime import datetime, timedelta
|
| 12 |
+
from typing import Any
|
| 13 |
+
|
| 14 |
+
# Suppress asyncio event loop cleanup errors (Python 3.10 issue on HF Spaces)
|
| 15 |
+
# These occur when event loops are garbage collected after file descriptors close
|
| 16 |
+
def _patch_asyncio_event_loop_del():
|
| 17 |
+
"""Patch BaseEventLoop.__del__ to suppress 'Invalid file descriptor: -1' errors."""
|
| 18 |
+
try:
|
| 19 |
+
import asyncio.base_events as base_events
|
| 20 |
+
original_del = getattr(base_events.BaseEventLoop, "__del__", None)
|
| 21 |
+
if original_del is None:
|
| 22 |
+
return
|
| 23 |
+
def patched_del(self):
|
| 24 |
+
try:
|
| 25 |
+
original_del(self)
|
| 26 |
+
except ValueError as e:
|
| 27 |
+
if "Invalid file descriptor" not in str(e):
|
| 28 |
+
raise
|
| 29 |
+
base_events.BaseEventLoop.__del__ = patched_del
|
| 30 |
+
except Exception:
|
| 31 |
+
pass
|
| 32 |
+
|
| 33 |
+
_patch_asyncio_event_loop_del()
|
| 34 |
+
|
| 35 |
+
import gradio as gr
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class RateLimiter:
|
| 39 |
+
"""Best-effort in-process rate limiter for HTTP-heavy tools."""
|
| 40 |
+
|
| 41 |
+
def __init__(self, requests_per_minute: int = 30) -> None:
|
| 42 |
+
self.requests_per_minute = requests_per_minute
|
| 43 |
+
self._requests: list[datetime] = []
|
| 44 |
+
self._lock = threading.Lock()
|
| 45 |
+
|
| 46 |
+
def acquire(self) -> None:
|
| 47 |
+
now = datetime.now()
|
| 48 |
+
with self._lock:
|
| 49 |
+
self._requests = [req for req in self._requests if now - req < timedelta(minutes=1)]
|
| 50 |
+
if len(self._requests) >= self.requests_per_minute:
|
| 51 |
+
wait_time = 60 - (now - self._requests[0]).total_seconds()
|
| 52 |
+
if wait_time > 0:
|
| 53 |
+
time.sleep(max(1, wait_time))
|
| 54 |
+
self._requests.append(now)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
_search_rate_limiter = RateLimiter(requests_per_minute=20)
|
| 58 |
+
_fetch_rate_limiter = RateLimiter(requests_per_minute=25)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def _truncate_for_log(value: Any, limit: int = 500) -> str:
|
| 62 |
+
if not isinstance(value, str):
|
| 63 |
+
value = str(value)
|
| 64 |
+
if len(value) <= limit:
|
| 65 |
+
return value
|
| 66 |
+
return value[: limit - 1] + "…"
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def _serialize_input(val: Any) -> Any:
|
| 70 |
+
try:
|
| 71 |
+
if isinstance(val, (str, int, float, bool)) or val is None:
|
| 72 |
+
return val
|
| 73 |
+
if isinstance(val, (list, tuple)):
|
| 74 |
+
return [_serialize_input(v) for v in list(val)[:10]] + (["…"] if len(val) > 10 else [])
|
| 75 |
+
if isinstance(val, dict):
|
| 76 |
+
out: dict[str, Any] = {}
|
| 77 |
+
for i, (k, v) in enumerate(val.items()):
|
| 78 |
+
if i >= 12:
|
| 79 |
+
out["…"] = "…"
|
| 80 |
+
break
|
| 81 |
+
out[str(k)] = _serialize_input(v)
|
| 82 |
+
return out
|
| 83 |
+
return repr(val)[:120]
|
| 84 |
+
except Exception:
|
| 85 |
+
return "<unserializable>"
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def _log_call_start(func_name: str, **kwargs: Any) -> None:
|
| 89 |
+
try:
|
| 90 |
+
compact = {k: _serialize_input(v) for k, v in kwargs.items()}
|
| 91 |
+
# Use sys.__stdout__ to avoid capturing logs in redirected output
|
| 92 |
+
print(f"[TOOL CALL] {func_name} inputs: {json.dumps(compact, ensure_ascii=False)[:800]}", flush=True, file=sys.__stdout__)
|
| 93 |
+
except Exception as exc:
|
| 94 |
+
print(f"[TOOL CALL] {func_name} (failed to log inputs: {exc})", flush=True, file=sys.__stdout__)
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def _log_call_end(func_name: str, output_desc: str) -> None:
|
| 98 |
+
try:
|
| 99 |
+
# Use sys.__stdout__ to avoid capturing logs in redirected output
|
| 100 |
+
print(f"[TOOL RESULT] {func_name} output: {output_desc}", flush=True, file=sys.__stdout__)
|
| 101 |
+
except Exception as exc:
|
| 102 |
+
print(f"[TOOL RESULT] {func_name} (failed to log output: {exc})", flush=True, file=sys.__stdout__)
|
| 103 |
+
|
| 104 |
+
# Ensure Tools modules can import 'app' when this file is executed as a script
|
| 105 |
+
# (their code does `from app import ...`).
|
| 106 |
+
sys.modules.setdefault("app", sys.modules[__name__])
|
| 107 |
+
|
| 108 |
# Import per-tool interface builders from the Tools package
|
|
|
|
| 109 |
from Modules.Web_Search import build_interface as build_search_interface
|
| 110 |
+
from Modules.ScrapeGraphAI import build_interface as build_scrapegraph_interface
|
| 111 |
from Modules.Agent_Terminal import build_interface as build_agent_terminal_interface
|
| 112 |
from Modules.Code_Interpreter import build_interface as build_code_interface
|
| 113 |
from Modules.Memory_Manager import build_interface as build_memory_interface
|
|
|
|
| 114 |
from Modules.Generate_Image import build_interface as build_image_interface
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
from Modules.Shell_Command import build_interface as build_shell_interface
|
|
|
|
| 116 |
|
| 117 |
# Optional environment flags used to conditionally show API schemas (unchanged behavior)
|
| 118 |
HF_IMAGE_TOKEN = bool(os.getenv("HF_READ_TOKEN"))
|
|
|
|
| 125 |
CSS_STYLES = _css_file.read()
|
| 126 |
|
| 127 |
# Build each tab interface using modular builders
|
|
|
|
| 128 |
web_search_interface = build_search_interface()
|
| 129 |
+
scrapegraph_interface = build_scrapegraph_interface()
|
| 130 |
agent_terminal_interface = build_agent_terminal_interface()
|
| 131 |
code_interface = build_code_interface()
|
| 132 |
memory_interface = build_memory_interface()
|
|
|
|
| 133 |
image_generation_interface = build_image_interface()
|
|
|
|
|
|
|
|
|
|
| 134 |
shell_interface = build_shell_interface()
|
|
|
|
|
|
|
| 135 |
|
| 136 |
_interfaces = [
|
| 137 |
agent_terminal_interface,
|
|
|
|
|
|
|
| 138 |
web_search_interface,
|
| 139 |
+
scrapegraph_interface,
|
| 140 |
code_interface,
|
| 141 |
shell_interface,
|
|
|
|
|
|
|
| 142 |
memory_interface,
|
|
|
|
| 143 |
image_generation_interface,
|
|
|
|
|
|
|
| 144 |
]
|
| 145 |
_tab_names = [
|
| 146 |
"Agent Terminal",
|
|
|
|
|
|
|
| 147 |
"Web Search",
|
| 148 |
+
"ScrapeGraphAI",
|
| 149 |
"Code Interpreter",
|
| 150 |
"Shell Command",
|
|
|
|
|
|
|
| 151 |
"Memory Manager",
|
|
|
|
| 152 |
"Generate Image",
|
|
|
|
|
|
|
| 153 |
]
|
| 154 |
+
|
| 155 |
+
with gr.Blocks(title="Nymbo/Tools MCP") as demo:
|
| 156 |
+
|
| 157 |
+
with gr.Sidebar(width=300, elem_classes="app-sidebar"):
|
| 158 |
+
gr.Markdown(
|
| 159 |
+
"## Nymbo/Tools MCP\n"
|
| 160 |
+
"<p style='font-size: 0.7rem; opacity: 0.85; margin-top: 2px; margin-bottom: 6px;'>General purpose tools useful for any agent.</p>\n"
|
| 161 |
+
"<a href='https://www.nymbo.net/nymbot' target='_blank' style='font-size: 0.7rem; display: block;'>Test with Nymbot</a>"
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
with gr.Accordion("Information", open=False):
|
| 165 |
+
gr.HTML(
|
| 166 |
+
"""
|
| 167 |
+
<div class="info-accordion">
|
| 168 |
+
<div class="info-grid" style="grid-template-columns: 1fr;">
|
| 169 |
+
<section class="info-card">
|
| 170 |
+
<div class="info-card__body">
|
| 171 |
+
<h3>Connecting from an MCP Client</h3>
|
| 172 |
+
<p>
|
| 173 |
+
This Space also runs as a Model Context Protocol (MCP) server. Point your client to:
|
| 174 |
+
<br/>
|
| 175 |
+
<code>https://nymbo-tools.hf.space/gradio_api/mcp/</code>
|
| 176 |
+
</p>
|
| 177 |
+
<p>Example client configuration:</p>
|
| 178 |
+
<pre><code class="language-json">{
|
| 179 |
+
"mcpServers": {
|
| 180 |
+
"nymbo-tools": {
|
| 181 |
+
"url": "https://nymbo-tools.hf.space/gradio_api/mcp/"
|
| 182 |
+
}
|
| 183 |
+
}
|
| 184 |
+
}</code></pre>
|
| 185 |
+
<p>Run the following commands in sequence to run the server locally:</p>
|
| 186 |
+
<pre><code>git clone https://huggingface.co/spaces/Nymbo/Tools
|
| 187 |
+
cd Tools
|
| 188 |
+
python -m venv env
|
| 189 |
+
source env/bin/activate
|
| 190 |
+
pip install -r requirements.txt
|
| 191 |
+
python app.py</code></pre>
|
| 192 |
+
</div>
|
| 193 |
+
</section>
|
| 194 |
+
|
| 195 |
<section class="info-card">
|
| 196 |
<div class="info-card__body">
|
| 197 |
+
<h3>Enable Image Gen</h3>
|
| 198 |
<p>
|
| 199 |
+
The <code>Generate_Image</code> tool requires a
|
| 200 |
<code>HF_READ_TOKEN</code> set as a secret or environment variable.
|
| 201 |
</p>
|
| 202 |
<ul class="info-list">
|
|
|
|
| 204 |
<li>Or run locally with <code>HF_READ_TOKEN</code> in your environment.</li>
|
| 205 |
</ul>
|
| 206 |
<div class="info-hint">
|
| 207 |
+
The <code>ScrapeGraphAI</code> tool also requires <code>MISTRAL_API_KEY</code> for extraction actions.
|
| 208 |
</div>
|
| 209 |
</div>
|
| 210 |
</section>
|
| 211 |
+
|
| 212 |
<section class="info-card">
|
| 213 |
<div class="info-card__body">
|
| 214 |
+
<h3>Persistent Memories</h3>
|
| 215 |
<p>
|
| 216 |
+
In this public demo, memories created with the <code>Memory_Manager</code> tool are stored in the Space's running container and are cleared when the Space restarts. Content is visible to everyone—avoid personal data.
|
| 217 |
</p>
|
| 218 |
<p>
|
| 219 |
+
When running locally, memories are saved to <code>memories.json</code> at the repo root for privacy.
|
| 220 |
</p>
|
| 221 |
</div>
|
| 222 |
</section>
|
| 223 |
+
|
| 224 |
<section class="info-card">
|
| 225 |
<div class="info-card__body">
|
| 226 |
+
<h3>Tool Notes</h3>
|
| 227 |
<p><strong>No authentication required for:</strong></p>
|
| 228 |
<ul class="info-list">
|
|
|
|
| 229 |
<li><code>Web_Search</code></li>
|
| 230 |
<li><code>Agent_Terminal</code></li>
|
| 231 |
<li><code>Code_Interpreter</code></li>
|
| 232 |
<li><code>Memory_Manager</code></li>
|
|
|
|
|
|
|
| 233 |
<li><code>Shell_Command</code></li>
|
|
|
|
| 234 |
</ul>
|
| 235 |
+
<p>
|
| 236 |
+
<code>ScrapeGraphAI</code> is available in this Space, but extraction actions require <code>MISTRAL_API_KEY</code>. The <code>render_markdown</code> action does not require Mistral.
|
| 237 |
+
</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
</div>
|
| 239 |
</section>
|
| 240 |
+
</div>
|
| 241 |
+
</div>
|
| 242 |
+
"""
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
gr.Markdown("### Tools")
|
| 246 |
+
tool_selector = gr.Radio(
|
| 247 |
+
choices=_tab_names,
|
| 248 |
+
value=_tab_names[0],
|
| 249 |
+
label="Select Tool",
|
| 250 |
+
show_label=False,
|
| 251 |
+
container=False,
|
| 252 |
+
elem_classes="sidebar-nav"
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
with gr.Tabs(elem_classes="hidden-tabs", selected=_tab_names[0]) as tool_tabs:
|
| 256 |
+
for name, interface in zip(_tab_names, _interfaces):
|
| 257 |
+
with gr.TabItem(label=name, id=name, elem_id=f"tab-{name}"):
|
| 258 |
+
interface.render()
|
| 259 |
+
|
| 260 |
+
# Use JavaScript to click the hidden tab button when the radio selection changes
|
| 261 |
+
tool_selector.change(
|
| 262 |
+
fn=None,
|
| 263 |
+
inputs=tool_selector,
|
| 264 |
+
outputs=None,
|
| 265 |
+
js="(selected_tool) => { const buttons = document.querySelectorAll('.hidden-tabs button'); buttons.forEach(btn => { if (btn.innerText.trim() === selected_tool) { btn.click(); } }); }"
|
| 266 |
+
)
|
| 267 |
+
|
| 268 |
+
if __name__ == "__main__":
|
| 269 |
+
demo.launch(mcp_server=True, theme="Nymbo/Nymbo_Theme", css=CSS_STYLES, ssr_mode=False)
|
memories.json
CHANGED
|
@@ -1,20 +1,20 @@
|
|
| 1 |
-
[
|
| 2 |
-
{
|
| 3 |
-
"id": "c8e3965d-270c-4baf-836f-33c6ed57f527",
|
| 4 |
-
"text": "The user's personal website is driven by Markdown and Vue, hosted on Vercel.",
|
| 5 |
-
"timestamp": "2025-09-06 02:21:17",
|
| 6 |
-
"tags": "website,markdown,vue,vercel"
|
| 7 |
-
},
|
| 8 |
-
{
|
| 9 |
-
"id": "17806073-cb86-472f-9b39-c1aaaf3ac058",
|
| 10 |
-
"text": "The user lives in New York City.",
|
| 11 |
-
"timestamp": "2025-09-06 17:07:27",
|
| 12 |
-
"tags": "location,address"
|
| 13 |
-
},
|
| 14 |
-
{
|
| 15 |
-
"id": "86e9f249-b43d-4aaa-bca0-b55fcb0c03be",
|
| 16 |
-
"text": "The user has a pet Russian tortoise who is 8 years old.",
|
| 17 |
-
"timestamp": "2025-09-06 02:20:59",
|
| 18 |
-
"tags": "pet,tortoise,animals"
|
| 19 |
-
}
|
| 20 |
]
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"id": "c8e3965d-270c-4baf-836f-33c6ed57f527",
|
| 4 |
+
"text": "The user's personal website is driven by Markdown and Vue, hosted on Vercel.",
|
| 5 |
+
"timestamp": "2025-09-06 02:21:17",
|
| 6 |
+
"tags": "website,markdown,vue,vercel"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"id": "17806073-cb86-472f-9b39-c1aaaf3ac058",
|
| 10 |
+
"text": "The user lives in New York City.",
|
| 11 |
+
"timestamp": "2025-09-06 17:07:27",
|
| 12 |
+
"tags": "location,address"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"id": "86e9f249-b43d-4aaa-bca0-b55fcb0c03be",
|
| 16 |
+
"text": "The user has a pet Russian tortoise who is 8 years old.",
|
| 17 |
+
"timestamp": "2025-09-06 02:20:59",
|
| 18 |
+
"tags": "pet,tortoise,animals"
|
| 19 |
+
}
|
| 20 |
]
|
requirements.txt
CHANGED
|
@@ -1,12 +1,14 @@
|
|
| 1 |
-
gradio[mcp]==6.2.0
|
| 2 |
-
requests
|
| 3 |
-
beautifulsoup4
|
| 4 |
-
lxml
|
| 5 |
-
readability-lxml
|
| 6 |
-
ddgs
|
| 7 |
-
kokoro>=0.7.16
|
| 8 |
-
numpy
|
| 9 |
-
torch; platform_system != "Darwin" or platform_machine != "arm64"
|
| 10 |
-
Pillow
|
| 11 |
-
huggingface_hub>=0.30.0
|
| 12 |
-
markdownify
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio[mcp]==6.2.0
|
| 2 |
+
requests
|
| 3 |
+
beautifulsoup4
|
| 4 |
+
lxml
|
| 5 |
+
readability-lxml
|
| 6 |
+
ddgs
|
| 7 |
+
kokoro>=0.7.16
|
| 8 |
+
numpy
|
| 9 |
+
torch; platform_system != "Darwin" or platform_machine != "arm64"
|
| 10 |
+
Pillow
|
| 11 |
+
huggingface_hub>=0.30.0
|
| 12 |
+
markdownify
|
| 13 |
+
langchain-mistralai>=1.1.1
|
| 14 |
+
scrapegraphai>=1.75.1
|
styles.css
CHANGED
|
@@ -1,308 +1,308 @@
|
|
| 1 |
-
/* Style only the top-level app title to avoid affecting headings elsewhere */
|
| 2 |
-
.app-title {
|
| 3 |
-
text-align: center;
|
| 4 |
-
/* Ensure main title appears first, then our two subtitle lines */
|
| 5 |
-
display: grid;
|
| 6 |
-
justify-items: center;
|
| 7 |
-
}
|
| 8 |
-
.app-title::after {
|
| 9 |
-
grid-row: 2;
|
| 10 |
-
content: "General purpose tools useful for any agent.";
|
| 11 |
-
display: block;
|
| 12 |
-
font-size: 1rem;
|
| 13 |
-
font-weight: 400;
|
| 14 |
-
opacity: 0.9;
|
| 15 |
-
margin-top: 2px;
|
| 16 |
-
white-space: pre-wrap;
|
| 17 |
-
}
|
| 18 |
-
|
| 19 |
-
/* Sidebar Container */
|
| 20 |
-
.app-sidebar {
|
| 21 |
-
background: var(--body-background-fill) !important;
|
| 22 |
-
border-right: 1px solid rgba(255, 255, 255, 0.08) !important;
|
| 23 |
-
}
|
| 24 |
-
@media (prefers-color-scheme: light) {
|
| 25 |
-
.app-sidebar {
|
| 26 |
-
border-right: 1px solid rgba(0, 0, 0, 0.08) !important;
|
| 27 |
-
}
|
| 28 |
-
}
|
| 29 |
-
|
| 30 |
-
/* Historical safeguard: if any h1 appears inside tabs, don't attach pseudo content */
|
| 31 |
-
.gradio-container [role="tabpanel"] h1::before,
|
| 32 |
-
.gradio-container [role="tabpanel"] h1::after {
|
| 33 |
-
content: none !important;
|
| 34 |
-
}
|
| 35 |
-
|
| 36 |
-
/* Information accordion - modern info cards */
|
| 37 |
-
.info-accordion {
|
| 38 |
-
margin: 8px 0 2px;
|
| 39 |
-
}
|
| 40 |
-
.info-grid {
|
| 41 |
-
display: grid;
|
| 42 |
-
gap: 12px;
|
| 43 |
-
/* Force a 2x2 layout on medium+ screens */
|
| 44 |
-
grid-template-columns: repeat(2, minmax(0, 1fr));
|
| 45 |
-
align-items: stretch;
|
| 46 |
-
}
|
| 47 |
-
/* On narrow screens, stack into a single column */
|
| 48 |
-
@media (max-width: 800px) {
|
| 49 |
-
.info-grid {
|
| 50 |
-
grid-template-columns: 1fr;
|
| 51 |
-
}
|
| 52 |
-
}
|
| 53 |
-
.info-card {
|
| 54 |
-
display: flex;
|
| 55 |
-
gap: 14px;
|
| 56 |
-
padding: 14px 16px;
|
| 57 |
-
border: 1px solid rgba(255, 255, 255, 0.08);
|
| 58 |
-
background: linear-gradient(180deg, rgba(255,255,255,0.05), rgba(255,255,255,0.03));
|
| 59 |
-
border-radius: 12px;
|
| 60 |
-
box-shadow: 0 1px 2px rgba(0, 0, 0, 0.04);
|
| 61 |
-
position: relative;
|
| 62 |
-
overflow: hidden;
|
| 63 |
-
-webkit-backdrop-filter: blur(2px);
|
| 64 |
-
backdrop-filter: blur(2px);
|
| 65 |
-
}
|
| 66 |
-
.info-card::before {
|
| 67 |
-
content: "";
|
| 68 |
-
position: absolute;
|
| 69 |
-
inset: 0;
|
| 70 |
-
border-radius: 12px;
|
| 71 |
-
pointer-events: none;
|
| 72 |
-
background: linear-gradient(90deg, rgba(99,102,241,0.06), rgba(59,130,246,0.05));
|
| 73 |
-
}
|
| 74 |
-
.info-card__icon {
|
| 75 |
-
font-size: 24px;
|
| 76 |
-
flex: 0 0 28px;
|
| 77 |
-
line-height: 1;
|
| 78 |
-
filter: saturate(1.1);
|
| 79 |
-
}
|
| 80 |
-
.info-card__body {
|
| 81 |
-
min-width: 0;
|
| 82 |
-
}
|
| 83 |
-
.info-card__body h3 {
|
| 84 |
-
margin: 0 0 6px;
|
| 85 |
-
font-size: 1.05rem;
|
| 86 |
-
}
|
| 87 |
-
.info-card__body p {
|
| 88 |
-
margin: 6px 0;
|
| 89 |
-
opacity: 0.95;
|
| 90 |
-
}
|
| 91 |
-
/* Readable code blocks inside info cards */
|
| 92 |
-
.info-card pre {
|
| 93 |
-
margin: 8px 0;
|
| 94 |
-
padding: 10px 12px;
|
| 95 |
-
background: rgba(20, 20, 30, 0.55);
|
| 96 |
-
border: 1px solid rgba(255, 255, 255, 0.08);
|
| 97 |
-
border-radius: 10px;
|
| 98 |
-
overflow-x: auto;
|
| 99 |
-
white-space: pre;
|
| 100 |
-
}
|
| 101 |
-
.info-card code {
|
| 102 |
-
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace;
|
| 103 |
-
font-size: 0.95em;
|
| 104 |
-
}
|
| 105 |
-
.info-card pre code {
|
| 106 |
-
display: block;
|
| 107 |
-
}
|
| 108 |
-
.info-card p {
|
| 109 |
-
word-wrap: break-word;
|
| 110 |
-
overflow-wrap: break-word;
|
| 111 |
-
}
|
| 112 |
-
.info-card p code {
|
| 113 |
-
word-break: break-all;
|
| 114 |
-
}
|
| 115 |
-
.info-list {
|
| 116 |
-
margin: 6px 0 0 18px;
|
| 117 |
-
padding: 0;
|
| 118 |
-
}
|
| 119 |
-
.info-hint {
|
| 120 |
-
margin-top: 8px;
|
| 121 |
-
font-size: 0.9em;
|
| 122 |
-
opacity: 0.9;
|
| 123 |
-
}
|
| 124 |
-
|
| 125 |
-
/* Light theme adjustments */
|
| 126 |
-
@media (prefers-color-scheme: light) {
|
| 127 |
-
.info-card {
|
| 128 |
-
border-color: rgba(0, 0, 0, 0.08);
|
| 129 |
-
background: linear-gradient(180deg, rgba(255,255,255,0.95), rgba(255,255,255,0.9));
|
| 130 |
-
}
|
| 131 |
-
.info-card::before {
|
| 132 |
-
background: linear-gradient(90deg, rgba(99,102,241,0.08), rgba(59,130,246,0.06));
|
| 133 |
-
}
|
| 134 |
-
.info-card pre {
|
| 135 |
-
background: rgba(245, 246, 250, 0.95);
|
| 136 |
-
border-color: rgba(0, 0, 0, 0.08);
|
| 137 |
-
}
|
| 138 |
-
}
|
| 139 |
-
|
| 140 |
-
/* Sidebar Navigation - styled like the previous tabs */
|
| 141 |
-
.sidebar-nav {
|
| 142 |
-
background: transparent !important;
|
| 143 |
-
border: none !important;
|
| 144 |
-
padding: 0 !important;
|
| 145 |
-
}
|
| 146 |
-
.sidebar-nav .form {
|
| 147 |
-
gap: 8px !important;
|
| 148 |
-
display: flex !important;
|
| 149 |
-
flex-direction: column !important;
|
| 150 |
-
border: none !important;
|
| 151 |
-
background: transparent !important;
|
| 152 |
-
}
|
| 153 |
-
.sidebar-nav label {
|
| 154 |
-
display: flex !important;
|
| 155 |
-
align-items: center !important;
|
| 156 |
-
padding: 10px 12px !important;
|
| 157 |
-
border-radius: 10px !important;
|
| 158 |
-
border: 1px solid rgba(255, 255, 255, 0.08) !important;
|
| 159 |
-
background: linear-gradient(180deg, rgba(255,255,255,0.05), rgba(255,255,255,0.03)) !important;
|
| 160 |
-
transition: background .2s ease, border-color .2s ease, box-shadow .2s ease, transform .06s ease !important;
|
| 161 |
-
cursor: pointer !important;
|
| 162 |
-
margin-bottom: 0 !important;
|
| 163 |
-
width: 100% !important;
|
| 164 |
-
justify-content: flex-start !important;
|
| 165 |
-
text-align: left !important;
|
| 166 |
-
}
|
| 167 |
-
.sidebar-nav label:hover {
|
| 168 |
-
border-color: rgba(99,102,241,0.28) !important;
|
| 169 |
-
background: linear-gradient(180deg, rgba(99,102,241,0.10), rgba(59,130,246,0.08)) !important;
|
| 170 |
-
}
|
| 171 |
-
/* Selected state - Gradio adds 'selected' class to the label in some versions, or we check input:checked */
|
| 172 |
-
.sidebar-nav label.selected {
|
| 173 |
-
border-color: rgba(99,102,241,0.35) !important;
|
| 174 |
-
box-shadow: inset 0 0 0 1px rgba(99,102,241,0.25), 0 1px 2px rgba(0,0,0,0.25) !important;
|
| 175 |
-
background: linear-gradient(180deg, rgba(99,102,241,0.18), rgba(59,130,246,0.14)) !important;
|
| 176 |
-
color: rgba(255, 255, 255, 0.95) !important;
|
| 177 |
-
}
|
| 178 |
-
|
| 179 |
-
/* Light theme adjustments for sidebar */
|
| 180 |
-
@media (prefers-color-scheme: light) {
|
| 181 |
-
.sidebar-nav label {
|
| 182 |
-
border-color: rgba(0, 0, 0, 0.08) !important;
|
| 183 |
-
background: linear-gradient(180deg, rgba(255,255,255,0.95), rgba(255,255,255,0.90)) !important;
|
| 184 |
-
color: rgba(0, 0, 0, 0.85) !important;
|
| 185 |
-
}
|
| 186 |
-
.sidebar-nav label:hover {
|
| 187 |
-
border-color: rgba(99,102,241,0.25) !important;
|
| 188 |
-
background: linear-gradient(180deg, rgba(99,102,241,0.08), rgba(59,130,246,0.06)) !important;
|
| 189 |
-
}
|
| 190 |
-
.sidebar-nav label.selected {
|
| 191 |
-
border-color: rgba(99,102,241,0.35) !important;
|
| 192 |
-
background: linear-gradient(180deg, rgba(99,102,241,0.16), rgba(59,130,246,0.12)) !important;
|
| 193 |
-
color: rgba(0, 0, 0, 0.85) !important;
|
| 194 |
-
}
|
| 195 |
-
}
|
| 196 |
-
|
| 197 |
-
/* Hide scrollbars/arrows that can appear on the description block in some browsers */
|
| 198 |
-
/* stylelint-disable compat-api/css */
|
| 199 |
-
article.prose, .prose, .gr-prose {
|
| 200 |
-
overflow: visible !important;
|
| 201 |
-
max-height: none !important;
|
| 202 |
-
-ms-overflow-style: none !important; /* IE/Edge */
|
| 203 |
-
scrollbar-width: none !important; /* Firefox */
|
| 204 |
-
}
|
| 205 |
-
/* stylelint-enable compat-api/css */
|
| 206 |
-
article.prose::-webkit-scrollbar,
|
| 207 |
-
.prose::-webkit-scrollbar,
|
| 208 |
-
.gr-prose::-webkit-scrollbar {
|
| 209 |
-
display: none !important; /* Chrome/Safari */
|
| 210 |
-
}
|
| 211 |
-
|
| 212 |
-
/* Fix for white background on single-line inputs in dark mode */
|
| 213 |
-
.gradio-container input[type="text"],
|
| 214 |
-
.gradio-container input[type="password"],
|
| 215 |
-
.gradio-container input[type="number"],
|
| 216 |
-
.gradio-container input[type="email"] {
|
| 217 |
-
background-color: var(--input-background-fill) !important;
|
| 218 |
-
color: var(--body-text-color) !important;
|
| 219 |
-
}
|
| 220 |
-
|
| 221 |
-
/* Custom glossy purple styling for primary action buttons */
|
| 222 |
-
.gradio-container button.primary {
|
| 223 |
-
border: 1px solid rgba(99, 102, 241, 0.35) !important;
|
| 224 |
-
background: linear-gradient(180deg, rgba(99, 102, 241, 0.25), rgba(59, 130, 246, 0.20)) !important;
|
| 225 |
-
box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.12), 0 2px 4px rgba(0, 0, 0, 0.15) !important;
|
| 226 |
-
color: rgba(255, 255, 255, 0.95) !important;
|
| 227 |
-
transition: background .2s ease, border-color .2s ease, box-shadow .2s ease, transform .06s ease !important;
|
| 228 |
-
}
|
| 229 |
-
.gradio-container button.primary:hover {
|
| 230 |
-
border-color: rgba(99, 102, 241, 0.5) !important;
|
| 231 |
-
background: linear-gradient(180deg, rgba(99, 102, 241, 0.35), rgba(59, 130, 246, 0.28)) !important;
|
| 232 |
-
box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.15), 0 3px 6px rgba(0, 0, 0, 0.2) !important;
|
| 233 |
-
}
|
| 234 |
-
.gradio-container button.primary:active {
|
| 235 |
-
transform: scale(0.98) !important;
|
| 236 |
-
box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.2), 0 1px 2px rgba(0, 0, 0, 0.1) !important;
|
| 237 |
-
}
|
| 238 |
-
@media (prefers-color-scheme: light) {
|
| 239 |
-
.gradio-container button.primary {
|
| 240 |
-
border-color: rgba(99, 102, 241, 0.4) !important;
|
| 241 |
-
background: linear-gradient(180deg, rgba(99, 102, 241, 0.85), rgba(79, 70, 229, 0.75)) !important;
|
| 242 |
-
box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.25), 0 2px 4px rgba(0, 0, 0, 0.12) !important;
|
| 243 |
-
color: rgba(255, 255, 255, 0.98) !important;
|
| 244 |
-
}
|
| 245 |
-
.gradio-container button.primary:hover {
|
| 246 |
-
background: linear-gradient(180deg, rgba(99, 102, 241, 0.95), rgba(79, 70, 229, 0.85)) !important;
|
| 247 |
-
box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.3), 0 3px 6px rgba(0, 0, 0, 0.15) !important;
|
| 248 |
-
}
|
| 249 |
-
}
|
| 250 |
-
|
| 251 |
-
/* Hide the actual tabs since we use the sidebar to control them */
|
| 252 |
-
.hidden-tabs .tab-nav,
|
| 253 |
-
.hidden-tabs [role="tablist"] {
|
| 254 |
-
display: none !important;
|
| 255 |
-
}
|
| 256 |
-
/* Hide the entire first row of the tabs container (contains tab buttons + overflow) */
|
| 257 |
-
.hidden-tabs > div:first-child {
|
| 258 |
-
display: none !important;
|
| 259 |
-
}
|
| 260 |
-
/* Ensure audio component buttons remain visible - they're inside tab panels, not the first row */
|
| 261 |
-
.hidden-tabs [role="tabpanel"] button {
|
| 262 |
-
display: inline-flex !important;
|
| 263 |
-
}
|
| 264 |
-
|
| 265 |
-
/* Custom scrollbar styling - Progressive enhancement, falls back to default scrollbars */
|
| 266 |
-
/* stylelint-disable compat-api/css */
|
| 267 |
-
* {
|
| 268 |
-
scrollbar-width: thin;
|
| 269 |
-
scrollbar-color: rgba(61, 212, 159, 0.4) rgba(255, 255, 255, 0.05);
|
| 270 |
-
}
|
| 271 |
-
*::-webkit-scrollbar {
|
| 272 |
-
width: 8px;
|
| 273 |
-
height: 8px;
|
| 274 |
-
}
|
| 275 |
-
*::-webkit-scrollbar-track {
|
| 276 |
-
background: rgba(255, 255, 255, 0.05);
|
| 277 |
-
border-radius: 4px;
|
| 278 |
-
}
|
| 279 |
-
*::-webkit-scrollbar-thumb {
|
| 280 |
-
background: linear-gradient(180deg, rgba(61, 212, 159, 0.5), rgba(17, 186, 136, 0.4));
|
| 281 |
-
border-radius: 4px;
|
| 282 |
-
border: 1px solid rgba(119, 247, 209, 0.2);
|
| 283 |
-
}
|
| 284 |
-
*::-webkit-scrollbar-thumb:hover {
|
| 285 |
-
background: linear-gradient(180deg, rgba(85, 250, 192, 0.7), rgba(65, 184, 131, 0.6));
|
| 286 |
-
}
|
| 287 |
-
*::-webkit-scrollbar-corner {
|
| 288 |
-
background: rgba(255, 255, 255, 0.05);
|
| 289 |
-
}
|
| 290 |
-
@media (prefers-color-scheme: light) {
|
| 291 |
-
* {
|
| 292 |
-
scrollbar-color: rgba(61, 212, 159, 0.4) rgba(0, 0, 0, 0.05);
|
| 293 |
-
}
|
| 294 |
-
*::-webkit-scrollbar-track {
|
| 295 |
-
background: rgba(0, 0, 0, 0.05);
|
| 296 |
-
}
|
| 297 |
-
*::-webkit-scrollbar-thumb {
|
| 298 |
-
background: linear-gradient(180deg, rgba(61, 212, 159, 0.5), rgba(17, 186, 136, 0.4));
|
| 299 |
-
border-color: rgba(0, 0, 0, 0.1);
|
| 300 |
-
}
|
| 301 |
-
*::-webkit-scrollbar-thumb:hover {
|
| 302 |
-
background: linear-gradient(180deg, rgba(85, 250, 192, 0.7), rgba(65, 184, 131, 0.6));
|
| 303 |
-
}
|
| 304 |
-
*::-webkit-scrollbar-corner {
|
| 305 |
-
background: rgba(0, 0, 0, 0.05);
|
| 306 |
-
}
|
| 307 |
-
}
|
| 308 |
/* stylelint-enable compat-api/css */
|
|
|
|
| 1 |
+
/* Style only the top-level app title to avoid affecting headings elsewhere */
|
| 2 |
+
.app-title {
|
| 3 |
+
text-align: center;
|
| 4 |
+
/* Ensure main title appears first, then our two subtitle lines */
|
| 5 |
+
display: grid;
|
| 6 |
+
justify-items: center;
|
| 7 |
+
}
|
| 8 |
+
.app-title::after {
|
| 9 |
+
grid-row: 2;
|
| 10 |
+
content: "General purpose tools useful for any agent.";
|
| 11 |
+
display: block;
|
| 12 |
+
font-size: 1rem;
|
| 13 |
+
font-weight: 400;
|
| 14 |
+
opacity: 0.9;
|
| 15 |
+
margin-top: 2px;
|
| 16 |
+
white-space: pre-wrap;
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
/* Sidebar Container */
|
| 20 |
+
.app-sidebar {
|
| 21 |
+
background: var(--body-background-fill) !important;
|
| 22 |
+
border-right: 1px solid rgba(255, 255, 255, 0.08) !important;
|
| 23 |
+
}
|
| 24 |
+
@media (prefers-color-scheme: light) {
|
| 25 |
+
.app-sidebar {
|
| 26 |
+
border-right: 1px solid rgba(0, 0, 0, 0.08) !important;
|
| 27 |
+
}
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
/* Historical safeguard: if any h1 appears inside tabs, don't attach pseudo content */
|
| 31 |
+
.gradio-container [role="tabpanel"] h1::before,
|
| 32 |
+
.gradio-container [role="tabpanel"] h1::after {
|
| 33 |
+
content: none !important;
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
/* Information accordion - modern info cards */
|
| 37 |
+
.info-accordion {
|
| 38 |
+
margin: 8px 0 2px;
|
| 39 |
+
}
|
| 40 |
+
.info-grid {
|
| 41 |
+
display: grid;
|
| 42 |
+
gap: 12px;
|
| 43 |
+
/* Force a 2x2 layout on medium+ screens */
|
| 44 |
+
grid-template-columns: repeat(2, minmax(0, 1fr));
|
| 45 |
+
align-items: stretch;
|
| 46 |
+
}
|
| 47 |
+
/* On narrow screens, stack into a single column */
|
| 48 |
+
@media (max-width: 800px) {
|
| 49 |
+
.info-grid {
|
| 50 |
+
grid-template-columns: 1fr;
|
| 51 |
+
}
|
| 52 |
+
}
|
| 53 |
+
.info-card {
|
| 54 |
+
display: flex;
|
| 55 |
+
gap: 14px;
|
| 56 |
+
padding: 14px 16px;
|
| 57 |
+
border: 1px solid rgba(255, 255, 255, 0.08);
|
| 58 |
+
background: linear-gradient(180deg, rgba(255,255,255,0.05), rgba(255,255,255,0.03));
|
| 59 |
+
border-radius: 12px;
|
| 60 |
+
box-shadow: 0 1px 2px rgba(0, 0, 0, 0.04);
|
| 61 |
+
position: relative;
|
| 62 |
+
overflow: hidden;
|
| 63 |
+
-webkit-backdrop-filter: blur(2px);
|
| 64 |
+
backdrop-filter: blur(2px);
|
| 65 |
+
}
|
| 66 |
+
.info-card::before {
|
| 67 |
+
content: "";
|
| 68 |
+
position: absolute;
|
| 69 |
+
inset: 0;
|
| 70 |
+
border-radius: 12px;
|
| 71 |
+
pointer-events: none;
|
| 72 |
+
background: linear-gradient(90deg, rgba(99,102,241,0.06), rgba(59,130,246,0.05));
|
| 73 |
+
}
|
| 74 |
+
.info-card__icon {
|
| 75 |
+
font-size: 24px;
|
| 76 |
+
flex: 0 0 28px;
|
| 77 |
+
line-height: 1;
|
| 78 |
+
filter: saturate(1.1);
|
| 79 |
+
}
|
| 80 |
+
.info-card__body {
|
| 81 |
+
min-width: 0;
|
| 82 |
+
}
|
| 83 |
+
.info-card__body h3 {
|
| 84 |
+
margin: 0 0 6px;
|
| 85 |
+
font-size: 1.05rem;
|
| 86 |
+
}
|
| 87 |
+
.info-card__body p {
|
| 88 |
+
margin: 6px 0;
|
| 89 |
+
opacity: 0.95;
|
| 90 |
+
}
|
| 91 |
+
/* Readable code blocks inside info cards */
|
| 92 |
+
.info-card pre {
|
| 93 |
+
margin: 8px 0;
|
| 94 |
+
padding: 10px 12px;
|
| 95 |
+
background: rgba(20, 20, 30, 0.55);
|
| 96 |
+
border: 1px solid rgba(255, 255, 255, 0.08);
|
| 97 |
+
border-radius: 10px;
|
| 98 |
+
overflow-x: auto;
|
| 99 |
+
white-space: pre;
|
| 100 |
+
}
|
| 101 |
+
.info-card code {
|
| 102 |
+
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace;
|
| 103 |
+
font-size: 0.95em;
|
| 104 |
+
}
|
| 105 |
+
.info-card pre code {
|
| 106 |
+
display: block;
|
| 107 |
+
}
|
| 108 |
+
.info-card p {
|
| 109 |
+
word-wrap: break-word;
|
| 110 |
+
overflow-wrap: break-word;
|
| 111 |
+
}
|
| 112 |
+
.info-card p code {
|
| 113 |
+
word-break: break-all;
|
| 114 |
+
}
|
| 115 |
+
.info-list {
|
| 116 |
+
margin: 6px 0 0 18px;
|
| 117 |
+
padding: 0;
|
| 118 |
+
}
|
| 119 |
+
.info-hint {
|
| 120 |
+
margin-top: 8px;
|
| 121 |
+
font-size: 0.9em;
|
| 122 |
+
opacity: 0.9;
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
/* Light theme adjustments */
|
| 126 |
+
@media (prefers-color-scheme: light) {
|
| 127 |
+
.info-card {
|
| 128 |
+
border-color: rgba(0, 0, 0, 0.08);
|
| 129 |
+
background: linear-gradient(180deg, rgba(255,255,255,0.95), rgba(255,255,255,0.9));
|
| 130 |
+
}
|
| 131 |
+
.info-card::before {
|
| 132 |
+
background: linear-gradient(90deg, rgba(99,102,241,0.08), rgba(59,130,246,0.06));
|
| 133 |
+
}
|
| 134 |
+
.info-card pre {
|
| 135 |
+
background: rgba(245, 246, 250, 0.95);
|
| 136 |
+
border-color: rgba(0, 0, 0, 0.08);
|
| 137 |
+
}
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
/* Sidebar Navigation - styled like the previous tabs */
|
| 141 |
+
.sidebar-nav {
|
| 142 |
+
background: transparent !important;
|
| 143 |
+
border: none !important;
|
| 144 |
+
padding: 0 !important;
|
| 145 |
+
}
|
| 146 |
+
.sidebar-nav .form {
|
| 147 |
+
gap: 8px !important;
|
| 148 |
+
display: flex !important;
|
| 149 |
+
flex-direction: column !important;
|
| 150 |
+
border: none !important;
|
| 151 |
+
background: transparent !important;
|
| 152 |
+
}
|
| 153 |
+
.sidebar-nav label {
|
| 154 |
+
display: flex !important;
|
| 155 |
+
align-items: center !important;
|
| 156 |
+
padding: 10px 12px !important;
|
| 157 |
+
border-radius: 10px !important;
|
| 158 |
+
border: 1px solid rgba(255, 255, 255, 0.08) !important;
|
| 159 |
+
background: linear-gradient(180deg, rgba(255,255,255,0.05), rgba(255,255,255,0.03)) !important;
|
| 160 |
+
transition: background .2s ease, border-color .2s ease, box-shadow .2s ease, transform .06s ease !important;
|
| 161 |
+
cursor: pointer !important;
|
| 162 |
+
margin-bottom: 0 !important;
|
| 163 |
+
width: 100% !important;
|
| 164 |
+
justify-content: flex-start !important;
|
| 165 |
+
text-align: left !important;
|
| 166 |
+
}
|
| 167 |
+
.sidebar-nav label:hover {
|
| 168 |
+
border-color: rgba(99,102,241,0.28) !important;
|
| 169 |
+
background: linear-gradient(180deg, rgba(99,102,241,0.10), rgba(59,130,246,0.08)) !important;
|
| 170 |
+
}
|
| 171 |
+
/* Selected state - Gradio adds 'selected' class to the label in some versions, or we check input:checked */
|
| 172 |
+
.sidebar-nav label.selected {
|
| 173 |
+
border-color: rgba(99,102,241,0.35) !important;
|
| 174 |
+
box-shadow: inset 0 0 0 1px rgba(99,102,241,0.25), 0 1px 2px rgba(0,0,0,0.25) !important;
|
| 175 |
+
background: linear-gradient(180deg, rgba(99,102,241,0.18), rgba(59,130,246,0.14)) !important;
|
| 176 |
+
color: rgba(255, 255, 255, 0.95) !important;
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
/* Light theme adjustments for sidebar */
|
| 180 |
+
@media (prefers-color-scheme: light) {
|
| 181 |
+
.sidebar-nav label {
|
| 182 |
+
border-color: rgba(0, 0, 0, 0.08) !important;
|
| 183 |
+
background: linear-gradient(180deg, rgba(255,255,255,0.95), rgba(255,255,255,0.90)) !important;
|
| 184 |
+
color: rgba(0, 0, 0, 0.85) !important;
|
| 185 |
+
}
|
| 186 |
+
.sidebar-nav label:hover {
|
| 187 |
+
border-color: rgba(99,102,241,0.25) !important;
|
| 188 |
+
background: linear-gradient(180deg, rgba(99,102,241,0.08), rgba(59,130,246,0.06)) !important;
|
| 189 |
+
}
|
| 190 |
+
.sidebar-nav label.selected {
|
| 191 |
+
border-color: rgba(99,102,241,0.35) !important;
|
| 192 |
+
background: linear-gradient(180deg, rgba(99,102,241,0.16), rgba(59,130,246,0.12)) !important;
|
| 193 |
+
color: rgba(0, 0, 0, 0.85) !important;
|
| 194 |
+
}
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
/* Hide scrollbars/arrows that can appear on the description block in some browsers */
|
| 198 |
+
/* stylelint-disable compat-api/css */
|
| 199 |
+
article.prose, .prose, .gr-prose {
|
| 200 |
+
overflow: visible !important;
|
| 201 |
+
max-height: none !important;
|
| 202 |
+
-ms-overflow-style: none !important; /* IE/Edge */
|
| 203 |
+
scrollbar-width: none !important; /* Firefox */
|
| 204 |
+
}
|
| 205 |
+
/* stylelint-enable compat-api/css */
|
| 206 |
+
article.prose::-webkit-scrollbar,
|
| 207 |
+
.prose::-webkit-scrollbar,
|
| 208 |
+
.gr-prose::-webkit-scrollbar {
|
| 209 |
+
display: none !important; /* Chrome/Safari */
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
+
/* Fix for white background on single-line inputs in dark mode */
|
| 213 |
+
.gradio-container input[type="text"],
|
| 214 |
+
.gradio-container input[type="password"],
|
| 215 |
+
.gradio-container input[type="number"],
|
| 216 |
+
.gradio-container input[type="email"] {
|
| 217 |
+
background-color: var(--input-background-fill) !important;
|
| 218 |
+
color: var(--body-text-color) !important;
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
/* Custom glossy purple styling for primary action buttons */
|
| 222 |
+
.gradio-container button.primary {
|
| 223 |
+
border: 1px solid rgba(99, 102, 241, 0.35) !important;
|
| 224 |
+
background: linear-gradient(180deg, rgba(99, 102, 241, 0.25), rgba(59, 130, 246, 0.20)) !important;
|
| 225 |
+
box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.12), 0 2px 4px rgba(0, 0, 0, 0.15) !important;
|
| 226 |
+
color: rgba(255, 255, 255, 0.95) !important;
|
| 227 |
+
transition: background .2s ease, border-color .2s ease, box-shadow .2s ease, transform .06s ease !important;
|
| 228 |
+
}
|
| 229 |
+
.gradio-container button.primary:hover {
|
| 230 |
+
border-color: rgba(99, 102, 241, 0.5) !important;
|
| 231 |
+
background: linear-gradient(180deg, rgba(99, 102, 241, 0.35), rgba(59, 130, 246, 0.28)) !important;
|
| 232 |
+
box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.15), 0 3px 6px rgba(0, 0, 0, 0.2) !important;
|
| 233 |
+
}
|
| 234 |
+
.gradio-container button.primary:active {
|
| 235 |
+
transform: scale(0.98) !important;
|
| 236 |
+
box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.2), 0 1px 2px rgba(0, 0, 0, 0.1) !important;
|
| 237 |
+
}
|
| 238 |
+
@media (prefers-color-scheme: light) {
|
| 239 |
+
.gradio-container button.primary {
|
| 240 |
+
border-color: rgba(99, 102, 241, 0.4) !important;
|
| 241 |
+
background: linear-gradient(180deg, rgba(99, 102, 241, 0.85), rgba(79, 70, 229, 0.75)) !important;
|
| 242 |
+
box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.25), 0 2px 4px rgba(0, 0, 0, 0.12) !important;
|
| 243 |
+
color: rgba(255, 255, 255, 0.98) !important;
|
| 244 |
+
}
|
| 245 |
+
.gradio-container button.primary:hover {
|
| 246 |
+
background: linear-gradient(180deg, rgba(99, 102, 241, 0.95), rgba(79, 70, 229, 0.85)) !important;
|
| 247 |
+
box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.3), 0 3px 6px rgba(0, 0, 0, 0.15) !important;
|
| 248 |
+
}
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
/* Hide the actual tabs since we use the sidebar to control them */
|
| 252 |
+
.hidden-tabs .tab-nav,
|
| 253 |
+
.hidden-tabs [role="tablist"] {
|
| 254 |
+
display: none !important;
|
| 255 |
+
}
|
| 256 |
+
/* Hide the entire first row of the tabs container (contains tab buttons + overflow) */
|
| 257 |
+
.hidden-tabs > div:first-child {
|
| 258 |
+
display: none !important;
|
| 259 |
+
}
|
| 260 |
+
/* Ensure audio component buttons remain visible - they're inside tab panels, not the first row */
|
| 261 |
+
.hidden-tabs [role="tabpanel"] button {
|
| 262 |
+
display: inline-flex !important;
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
/* Custom scrollbar styling - Progressive enhancement, falls back to default scrollbars */
|
| 266 |
+
/* stylelint-disable compat-api/css */
|
| 267 |
+
* {
|
| 268 |
+
scrollbar-width: thin;
|
| 269 |
+
scrollbar-color: rgba(61, 212, 159, 0.4) rgba(255, 255, 255, 0.05);
|
| 270 |
+
}
|
| 271 |
+
*::-webkit-scrollbar {
|
| 272 |
+
width: 8px;
|
| 273 |
+
height: 8px;
|
| 274 |
+
}
|
| 275 |
+
*::-webkit-scrollbar-track {
|
| 276 |
+
background: rgba(255, 255, 255, 0.05);
|
| 277 |
+
border-radius: 4px;
|
| 278 |
+
}
|
| 279 |
+
*::-webkit-scrollbar-thumb {
|
| 280 |
+
background: linear-gradient(180deg, rgba(61, 212, 159, 0.5), rgba(17, 186, 136, 0.4));
|
| 281 |
+
border-radius: 4px;
|
| 282 |
+
border: 1px solid rgba(119, 247, 209, 0.2);
|
| 283 |
+
}
|
| 284 |
+
*::-webkit-scrollbar-thumb:hover {
|
| 285 |
+
background: linear-gradient(180deg, rgba(85, 250, 192, 0.7), rgba(65, 184, 131, 0.6));
|
| 286 |
+
}
|
| 287 |
+
*::-webkit-scrollbar-corner {
|
| 288 |
+
background: rgba(255, 255, 255, 0.05);
|
| 289 |
+
}
|
| 290 |
+
@media (prefers-color-scheme: light) {
|
| 291 |
+
* {
|
| 292 |
+
scrollbar-color: rgba(61, 212, 159, 0.4) rgba(0, 0, 0, 0.05);
|
| 293 |
+
}
|
| 294 |
+
*::-webkit-scrollbar-track {
|
| 295 |
+
background: rgba(0, 0, 0, 0.05);
|
| 296 |
+
}
|
| 297 |
+
*::-webkit-scrollbar-thumb {
|
| 298 |
+
background: linear-gradient(180deg, rgba(61, 212, 159, 0.5), rgba(17, 186, 136, 0.4));
|
| 299 |
+
border-color: rgba(0, 0, 0, 0.1);
|
| 300 |
+
}
|
| 301 |
+
*::-webkit-scrollbar-thumb:hover {
|
| 302 |
+
background: linear-gradient(180deg, rgba(85, 250, 192, 0.7), rgba(65, 184, 131, 0.6));
|
| 303 |
+
}
|
| 304 |
+
*::-webkit-scrollbar-corner {
|
| 305 |
+
background: rgba(0, 0, 0, 0.05);
|
| 306 |
+
}
|
| 307 |
+
}
|
| 308 |
/* stylelint-enable compat-api/css */
|