Update app.py
Browse files
app.py
CHANGED
|
@@ -315,14 +315,14 @@ def _format_markdown(
|
|
| 315 |
|
| 316 |
|
| 317 |
def Fetch_Webpage( # <-- MCP tool #1 (Fetch)
|
| 318 |
-
url: Annotated[str, "The absolute URL to fetch (must return HTML)."]
|
|
|
|
| 319 |
verbosity: Annotated[str, "Controls body length: one of 'Brief', 'Standard', or 'Full'."] = "Standard",
|
| 320 |
include_metadata: Annotated[bool, "Include a Metadata section (description, site name, canonical, lang, fetched URL)."] = True,
|
| 321 |
include_text: Annotated[bool, "Include the readable main text extracted with Readability."] = True,
|
| 322 |
include_links: Annotated[bool, "Include outbound links discovered in the readable section."] = True,
|
| 323 |
max_chars: Annotated[int, "Hard cap for body characters after the verbosity preset. Use 0 to disable the cap."] = 3000,
|
| 324 |
max_links: Annotated[int, "Maximum number of links to include from the readable content. Set 0 to omit links."] = 20,
|
| 325 |
-
full_page_markdown: Annotated[bool, "If true, return the page as full Markdown (Content Scraper mode) instead of a compact summary."] = False,
|
| 326 |
) -> str:
|
| 327 |
"""
|
| 328 |
Fetch a web page and return a compact Markdown summary containing title, key
|
|
@@ -330,15 +330,15 @@ def Fetch_Webpage( # <-- MCP tool #1 (Fetch)
|
|
| 330 |
|
| 331 |
Args:
|
| 332 |
url: The absolute URL to fetch (must return HTML).
|
|
|
|
|
|
|
|
|
|
| 333 |
verbosity: Controls body length: one of 'Brief', 'Standard', or 'Full'.
|
| 334 |
include_metadata: Include a Metadata section (description, site name, canonical, lang, fetched URL).
|
| 335 |
include_text: Include the readable main text extracted with Readability.
|
| 336 |
include_links: Include outbound links discovered in the readable section.
|
| 337 |
max_chars: Hard cap for body characters after the verbosity preset. Use 0 to disable the cap.
|
| 338 |
max_links: Maximum number of links to include from the readable content. Set 0 to omit links.
|
| 339 |
-
full_page_markdown: If True, return the page converted to full Markdown (Content Scraper mode)
|
| 340 |
-
instead of the compact summary. This ignores verbosity/include_* and max_* limits and
|
| 341 |
-
attempts to convert the main content area to Markdown with headings preserved.
|
| 342 |
|
| 343 |
Returns:
|
| 344 |
str: Markdown that may contain the following sections:
|
|
@@ -730,13 +730,13 @@ fetch_interface = gr.Interface(
|
|
| 730 |
fn=Fetch_Webpage, # connect the function to the UI
|
| 731 |
inputs=[
|
| 732 |
gr.Textbox(label="URL", placeholder="https://example.com/article"),
|
|
|
|
| 733 |
gr.Dropdown(label="Verbosity", choices=["Brief", "Standard", "Full"], value="Standard"),
|
| 734 |
gr.Checkbox(value=True, label="Include Metadata"),
|
| 735 |
gr.Checkbox(value=True, label="Include Main Text"),
|
| 736 |
gr.Checkbox(value=True, label="Include Links"),
|
| 737 |
gr.Slider(400, 12000, value=3000, step=100, label="Max Characters (body text)"),
|
| 738 |
gr.Slider(0, 100, value=20, step=1, label="Max Links"),
|
| 739 |
-
gr.Checkbox(value=False, label="Full-page Markdown (Content Scraper mode)"),
|
| 740 |
],
|
| 741 |
outputs=gr.Markdown(label="Extracted Summary"),
|
| 742 |
title="Fetch Webpage",
|
|
|
|
| 315 |
|
| 316 |
|
| 317 |
def Fetch_Webpage( # <-- MCP tool #1 (Fetch)
|
| 318 |
+
url: Annotated[str, "The absolute URL to fetch (must return HTML)."],
|
| 319 |
+
full_page_markdown: Annotated[bool, "If true, return the page as full Markdown (Content Scraper mode) instead of a compact summary."] = False,
|
| 320 |
verbosity: Annotated[str, "Controls body length: one of 'Brief', 'Standard', or 'Full'."] = "Standard",
|
| 321 |
include_metadata: Annotated[bool, "Include a Metadata section (description, site name, canonical, lang, fetched URL)."] = True,
|
| 322 |
include_text: Annotated[bool, "Include the readable main text extracted with Readability."] = True,
|
| 323 |
include_links: Annotated[bool, "Include outbound links discovered in the readable section."] = True,
|
| 324 |
max_chars: Annotated[int, "Hard cap for body characters after the verbosity preset. Use 0 to disable the cap."] = 3000,
|
| 325 |
max_links: Annotated[int, "Maximum number of links to include from the readable content. Set 0 to omit links."] = 20,
|
|
|
|
| 326 |
) -> str:
|
| 327 |
"""
|
| 328 |
Fetch a web page and return a compact Markdown summary containing title, key
|
|
|
|
| 330 |
|
| 331 |
Args:
|
| 332 |
url: The absolute URL to fetch (must return HTML).
|
| 333 |
+
full_page_markdown: If True, return the page converted to full Markdown (Content Scraper mode)
|
| 334 |
+
instead of the compact summary. This ignores verbosity/include_* and max_* limits and
|
| 335 |
+
attempts to convert the main content area to Markdown with headings preserved.
|
| 336 |
verbosity: Controls body length: one of 'Brief', 'Standard', or 'Full'.
|
| 337 |
include_metadata: Include a Metadata section (description, site name, canonical, lang, fetched URL).
|
| 338 |
include_text: Include the readable main text extracted with Readability.
|
| 339 |
include_links: Include outbound links discovered in the readable section.
|
| 340 |
max_chars: Hard cap for body characters after the verbosity preset. Use 0 to disable the cap.
|
| 341 |
max_links: Maximum number of links to include from the readable content. Set 0 to omit links.
|
|
|
|
|
|
|
|
|
|
| 342 |
|
| 343 |
Returns:
|
| 344 |
str: Markdown that may contain the following sections:
|
|
|
|
| 730 |
fn=Fetch_Webpage, # connect the function to the UI
|
| 731 |
inputs=[
|
| 732 |
gr.Textbox(label="URL", placeholder="https://example.com/article"),
|
| 733 |
+
gr.Checkbox(value=False, label="Full-page Markdown (Content Scraper mode)"),
|
| 734 |
gr.Dropdown(label="Verbosity", choices=["Brief", "Standard", "Full"], value="Standard"),
|
| 735 |
gr.Checkbox(value=True, label="Include Metadata"),
|
| 736 |
gr.Checkbox(value=True, label="Include Main Text"),
|
| 737 |
gr.Checkbox(value=True, label="Include Links"),
|
| 738 |
gr.Slider(400, 12000, value=3000, step=100, label="Max Characters (body text)"),
|
| 739 |
gr.Slider(0, 100, value=20, step=1, label="Max Links"),
|
|
|
|
| 740 |
],
|
| 741 |
outputs=gr.Markdown(label="Extracted Summary"),
|
| 742 |
title="Fetch Webpage",
|