Update app.py
Browse files
app.py
CHANGED
|
@@ -314,7 +314,7 @@ def Search_Structured( # <-- MCP tool #2 (Structured DDG)
|
|
| 314 |
max_results: int = 5,
|
| 315 |
) -> List[Dict[Literal["snippet", "title", "link"], str]]:
|
| 316 |
"""
|
| 317 |
-
|
| 318 |
"""
|
| 319 |
if not input_query or not input_query.strip():
|
| 320 |
return []
|
|
@@ -335,7 +335,7 @@ def Search_Raw( # <-- MCP tool #3 (Unstructured DDG)
|
|
| 335 |
query: str,
|
| 336 |
) -> list[dict]:
|
| 337 |
"""
|
| 338 |
-
|
| 339 |
"""
|
| 340 |
if not query or not query.strip():
|
| 341 |
return []
|
|
@@ -357,30 +357,26 @@ def Search_Concise( # <-- MCP tool #4 (Concise DDG)
|
|
| 357 |
title_chars: int = 80,
|
| 358 |
) -> str:
|
| 359 |
"""
|
| 360 |
-
|
| 361 |
- Returns newline-delimited JSON (JSONL) with short keys:
|
| 362 |
-
|
| 363 |
-
- Titles and snippets are hard-capped to save tokens.
|
| 364 |
-
- Domain dedupe on by default to avoid near-duplicates.
|
| 365 |
|
| 366 |
Returns:
|
| 367 |
A compact string like:
|
| 368 |
{"t":"Example","u":"https://example.com/x"}
|
| 369 |
{"t":"Another…","u":"https://a.com/y","s":"Short snippet…"}
|
| 370 |
"""
|
| 371 |
-
|
| 372 |
if not query or not query.strip():
|
| 373 |
return ""
|
| 374 |
|
| 375 |
-
# Perform the search
|
| 376 |
try:
|
| 377 |
with DDGS() as ddgs:
|
| 378 |
raw = ddgs.text(query, max_results=max_results)
|
| 379 |
except Exception as e:
|
| 380 |
-
|
| 381 |
return json.dumps({"error": str(e)[:120]}, ensure_ascii=False, separators=(",", ":"))
|
| 382 |
|
| 383 |
-
# Optionally dedupe by domain to reduce redundant lines
|
| 384 |
seen_domains = set()
|
| 385 |
lines: List[str] = []
|
| 386 |
|
|
@@ -427,7 +423,7 @@ fetch_interface = gr.Interface(
|
|
| 427 |
gr.Slider(0, 100, value=20, step=1, label="Max Links"),
|
| 428 |
],
|
| 429 |
outputs=gr.Markdown(label="Extracted Summary"),
|
| 430 |
-
title="Fetch
|
| 431 |
description="Extract title, key metadata, readable text, and links. No noisy HTML.",
|
| 432 |
allow_flagging="never",
|
| 433 |
theme="Nymbo/Nymbo_Theme",
|
|
@@ -441,7 +437,7 @@ websearch_interface = gr.Interface(
|
|
| 441 |
gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
|
| 442 |
],
|
| 443 |
outputs=gr.JSON(label="Search results"),
|
| 444 |
-
title="
|
| 445 |
description="Search the web using DuckDuckGo; returns snippet, title, and link.",
|
| 446 |
allow_flagging="never",
|
| 447 |
theme="Nymbo/Nymbo_Theme",
|
|
@@ -449,10 +445,10 @@ websearch_interface = gr.Interface(
|
|
| 449 |
|
| 450 |
# --- Unstructured DDG tab (matches your separate app’s output) ---
|
| 451 |
unstructured_interface = gr.Interface(
|
| 452 |
-
fn=ddg_unstructured,
|
| 453 |
-
inputs=gr.Textbox(label="Enter Search Query"),
|
| 454 |
-
outputs=gr.Textbox(label="Results", interactive=False),
|
| 455 |
-
title="
|
| 456 |
description="Returns the raw list of results (list[dict]) shown as text.",
|
| 457 |
allow_flagging="never",
|
| 458 |
theme="Nymbo/Nymbo_Theme",
|
|
@@ -461,7 +457,7 @@ unstructured_interface = gr.Interface(
|
|
| 461 |
|
| 462 |
# --- Concise DDG tab (JSONL with short keys, minimal tokens) ---
|
| 463 |
concise_interface = gr.Interface(
|
| 464 |
-
fn=ddg_concise,
|
| 465 |
inputs=[
|
| 466 |
gr.Textbox(label="Query", placeholder="topic OR site:example.com"),
|
| 467 |
gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
|
|
@@ -470,8 +466,8 @@ concise_interface = gr.Interface(
|
|
| 470 |
gr.Checkbox(value=True, label="Dedupe by domain"),
|
| 471 |
gr.Slider(minimum=20, maximum=120, value=80, step=5, label="Max title chars"),
|
| 472 |
],
|
| 473 |
-
outputs=gr.Textbox(label="Results (JSONL)", interactive=False),
|
| 474 |
-
title="
|
| 475 |
description="Emits JSONL with short keys (t,u[,s]). Defaults avoid snippets and duplicate domains.",
|
| 476 |
allow_flagging="never",
|
| 477 |
theme="Nymbo/Nymbo_Theme",
|
|
@@ -482,7 +478,7 @@ concise_interface = gr.Interface(
|
|
| 482 |
demo = gr.TabbedInterface(
|
| 483 |
interface_list=[fetch_interface, websearch_interface, unstructured_interface, concise_interface],
|
| 484 |
tab_names=["Fetch", "Websearch", "Unstructured DDG", "DDG (Concise)"],
|
| 485 |
-
title="Web MCP — Fetch
|
| 486 |
theme="Nymbo/Nymbo_Theme",
|
| 487 |
)
|
| 488 |
|
|
|
|
| 314 |
max_results: int = 5,
|
| 315 |
) -> List[Dict[Literal["snippet", "title", "link"], str]]:
|
| 316 |
"""
|
| 317 |
+
Run a DuckDuckGo search and return a list of {snippet, title, link}.
|
| 318 |
"""
|
| 319 |
if not input_query or not input_query.strip():
|
| 320 |
return []
|
|
|
|
| 335 |
query: str,
|
| 336 |
) -> list[dict]:
|
| 337 |
"""
|
| 338 |
+
Search using Native DDG client. Returns a plain list[dict] — exactly like your separate space.
|
| 339 |
"""
|
| 340 |
if not query or not query.strip():
|
| 341 |
return []
|
|
|
|
| 357 |
title_chars: int = 80,
|
| 358 |
) -> str:
|
| 359 |
"""
|
| 360 |
+
Minimal-output DuckDuckGo search designed to reduce tokens:
|
| 361 |
- Returns newline-delimited JSON (JSONL) with short keys:
|
| 362 |
+
t=title, u=url, s=snippet
|
|
|
|
|
|
|
| 363 |
|
| 364 |
Returns:
|
| 365 |
A compact string like:
|
| 366 |
{"t":"Example","u":"https://example.com/x"}
|
| 367 |
{"t":"Another…","u":"https://a.com/y","s":"Short snippet…"}
|
| 368 |
"""
|
| 369 |
+
|
| 370 |
if not query or not query.strip():
|
| 371 |
return ""
|
| 372 |
|
|
|
|
| 373 |
try:
|
| 374 |
with DDGS() as ddgs:
|
| 375 |
raw = ddgs.text(query, max_results=max_results)
|
| 376 |
except Exception as e:
|
| 377 |
+
|
| 378 |
return json.dumps({"error": str(e)[:120]}, ensure_ascii=False, separators=(",", ":"))
|
| 379 |
|
|
|
|
| 380 |
seen_domains = set()
|
| 381 |
lines: List[str] = []
|
| 382 |
|
|
|
|
| 423 |
gr.Slider(0, 100, value=20, step=1, label="Max Links"),
|
| 424 |
],
|
| 425 |
outputs=gr.Markdown(label="Extracted Summary"),
|
| 426 |
+
title="Fetch Webpage",
|
| 427 |
description="Extract title, key metadata, readable text, and links. No noisy HTML.",
|
| 428 |
allow_flagging="never",
|
| 429 |
theme="Nymbo/Nymbo_Theme",
|
|
|
|
| 437 |
gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
|
| 438 |
],
|
| 439 |
outputs=gr.JSON(label="Search results"),
|
| 440 |
+
title="DuckDuckGo Search (Structured)",
|
| 441 |
description="Search the web using DuckDuckGo; returns snippet, title, and link.",
|
| 442 |
allow_flagging="never",
|
| 443 |
theme="Nymbo/Nymbo_Theme",
|
|
|
|
| 445 |
|
| 446 |
# --- Unstructured DDG tab (matches your separate app’s output) ---
|
| 447 |
unstructured_interface = gr.Interface(
|
| 448 |
+
fn=ddg_unstructured,
|
| 449 |
+
inputs=gr.Textbox(label="Enter Search Query"),
|
| 450 |
+
outputs=gr.Textbox(label="Results", interactive=False),
|
| 451 |
+
title="DuckDuckGo Search (Raw)",
|
| 452 |
description="Returns the raw list of results (list[dict]) shown as text.",
|
| 453 |
allow_flagging="never",
|
| 454 |
theme="Nymbo/Nymbo_Theme",
|
|
|
|
| 457 |
|
| 458 |
# --- Concise DDG tab (JSONL with short keys, minimal tokens) ---
|
| 459 |
concise_interface = gr.Interface(
|
| 460 |
+
fn=ddg_concise,
|
| 461 |
inputs=[
|
| 462 |
gr.Textbox(label="Query", placeholder="topic OR site:example.com"),
|
| 463 |
gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
|
|
|
|
| 466 |
gr.Checkbox(value=True, label="Dedupe by domain"),
|
| 467 |
gr.Slider(minimum=20, maximum=120, value=80, step=5, label="Max title chars"),
|
| 468 |
],
|
| 469 |
+
outputs=gr.Textbox(label="Results (JSONL)", interactive=False),
|
| 470 |
+
title="DuckDuckGo Search (Concise)",
|
| 471 |
description="Emits JSONL with short keys (t,u[,s]). Defaults avoid snippets and duplicate domains.",
|
| 472 |
allow_flagging="never",
|
| 473 |
theme="Nymbo/Nymbo_Theme",
|
|
|
|
| 478 |
demo = gr.TabbedInterface(
|
| 479 |
interface_list=[fetch_interface, websearch_interface, unstructured_interface, concise_interface],
|
| 480 |
tab_names=["Fetch", "Websearch", "Unstructured DDG", "DDG (Concise)"],
|
| 481 |
+
title="Web MCP — Fetch & DuckDuckGo search with customizable output modes.",
|
| 482 |
theme="Nymbo/Nymbo_Theme",
|
| 483 |
)
|
| 484 |
|