Karim shoair commited on
Commit ·
135c17b
1
Parent(s): 354b388
refactor: better implementation for the mcp mode
Browse files- scrapling/cli.py +2 -2
- scrapling/core/ai.py +26 -10
scrapling/cli.py
CHANGED
|
@@ -154,8 +154,8 @@ def install(force): # pragma: no cover
|
|
| 154 |
def mcp(http, host, port):
|
| 155 |
from scrapling.core.ai import ScraplingMCPServer
|
| 156 |
|
| 157 |
-
server = ScraplingMCPServer(
|
| 158 |
-
server.
|
| 159 |
|
| 160 |
|
| 161 |
@command(help="Interactive scraping console")
|
|
|
|
| 154 |
def mcp(http, host, port):
|
| 155 |
from scrapling.core.ai import ScraplingMCPServer
|
| 156 |
|
| 157 |
+
server = ScraplingMCPServer()
|
| 158 |
+
server.serve(http, host, port)
|
| 159 |
|
| 160 |
|
| 161 |
@command(help="Interactive scraping console")
|
scrapling/core/ai.py
CHANGED
|
@@ -41,10 +41,8 @@ def _ContentTranslator(content: Generator[str, None, None], page: _ScraplingResp
|
|
| 41 |
return ResponseModel(status=page.status, content=[result for result in content], url=page.url)
|
| 42 |
|
| 43 |
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
@_server.tool()
|
| 48 |
def get(
|
| 49 |
url: str,
|
| 50 |
impersonate: Optional[BrowserTypeLiteral] = "chrome",
|
|
@@ -122,7 +120,7 @@ def ScraplingMCPServer(host: str, port: int) -> FastMCP:
|
|
| 122 |
page,
|
| 123 |
)
|
| 124 |
|
| 125 |
-
@
|
| 126 |
async def bulk_get(
|
| 127 |
urls: Tuple[str, ...],
|
| 128 |
impersonate: Optional[BrowserTypeLiteral] = "chrome",
|
|
@@ -208,7 +206,7 @@ def ScraplingMCPServer(host: str, port: int) -> FastMCP:
|
|
| 208 |
for page in responses
|
| 209 |
]
|
| 210 |
|
| 211 |
-
@
|
| 212 |
async def fetch(
|
| 213 |
url: str,
|
| 214 |
extraction_type: extraction_types = "markdown",
|
|
@@ -296,7 +294,7 @@ def ScraplingMCPServer(host: str, port: int) -> FastMCP:
|
|
| 296 |
page,
|
| 297 |
)
|
| 298 |
|
| 299 |
-
@
|
| 300 |
async def bulk_fetch(
|
| 301 |
urls: Tuple[str, ...],
|
| 302 |
extraction_type: extraction_types = "markdown",
|
|
@@ -389,7 +387,7 @@ def ScraplingMCPServer(host: str, port: int) -> FastMCP:
|
|
| 389 |
for page in responses
|
| 390 |
]
|
| 391 |
|
| 392 |
-
@
|
| 393 |
async def stealthy_fetch(
|
| 394 |
url: str,
|
| 395 |
extraction_type: extraction_types = "markdown",
|
|
@@ -488,7 +486,7 @@ def ScraplingMCPServer(host: str, port: int) -> FastMCP:
|
|
| 488 |
page,
|
| 489 |
)
|
| 490 |
|
| 491 |
-
@
|
| 492 |
async def bulk_stealthy_fetch(
|
| 493 |
urls: Tuple[str, ...],
|
| 494 |
extraction_type: extraction_types = "markdown",
|
|
@@ -592,4 +590,22 @@ def ScraplingMCPServer(host: str, port: int) -> FastMCP:
|
|
| 592 |
for page in responses
|
| 593 |
]
|
| 594 |
|
| 595 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
return ResponseModel(status=page.status, content=[result for result in content], url=page.url)
|
| 42 |
|
| 43 |
|
| 44 |
+
class ScraplingMCPServer:
|
| 45 |
+
@staticmethod
|
|
|
|
|
|
|
| 46 |
def get(
|
| 47 |
url: str,
|
| 48 |
impersonate: Optional[BrowserTypeLiteral] = "chrome",
|
|
|
|
| 120 |
page,
|
| 121 |
)
|
| 122 |
|
| 123 |
+
@staticmethod
|
| 124 |
async def bulk_get(
|
| 125 |
urls: Tuple[str, ...],
|
| 126 |
impersonate: Optional[BrowserTypeLiteral] = "chrome",
|
|
|
|
| 206 |
for page in responses
|
| 207 |
]
|
| 208 |
|
| 209 |
+
@staticmethod
|
| 210 |
async def fetch(
|
| 211 |
url: str,
|
| 212 |
extraction_type: extraction_types = "markdown",
|
|
|
|
| 294 |
page,
|
| 295 |
)
|
| 296 |
|
| 297 |
+
@staticmethod
|
| 298 |
async def bulk_fetch(
|
| 299 |
urls: Tuple[str, ...],
|
| 300 |
extraction_type: extraction_types = "markdown",
|
|
|
|
| 387 |
for page in responses
|
| 388 |
]
|
| 389 |
|
| 390 |
+
@staticmethod
|
| 391 |
async def stealthy_fetch(
|
| 392 |
url: str,
|
| 393 |
extraction_type: extraction_types = "markdown",
|
|
|
|
| 486 |
page,
|
| 487 |
)
|
| 488 |
|
| 489 |
+
@staticmethod
|
| 490 |
async def bulk_stealthy_fetch(
|
| 491 |
urls: Tuple[str, ...],
|
| 492 |
extraction_type: extraction_types = "markdown",
|
|
|
|
| 590 |
for page in responses
|
| 591 |
]
|
| 592 |
|
| 593 |
+
def serve(self, http: bool, host: str, port: int):
|
| 594 |
+
"""Serve the MCP server."""
|
| 595 |
+
server = FastMCP(name="Scrapling", host=host, port=port)
|
| 596 |
+
server.add_tool(self.get, title="get", description=self.get.__doc__, structured_output=True)
|
| 597 |
+
server.add_tool(self.bulk_get, title="bulk_get", description=self.bulk_get.__doc__, structured_output=True)
|
| 598 |
+
server.add_tool(self.fetch, title="fetch", description=self.fetch.__doc__, structured_output=True)
|
| 599 |
+
server.add_tool(
|
| 600 |
+
self.bulk_fetch, title="bulk_fetch", description=self.bulk_fetch.__doc__, structured_output=True
|
| 601 |
+
)
|
| 602 |
+
server.add_tool(
|
| 603 |
+
self.stealthy_fetch, title="stealthy_fetch", description=self.stealthy_fetch.__doc__, structured_output=True
|
| 604 |
+
)
|
| 605 |
+
server.add_tool(
|
| 606 |
+
self.bulk_stealthy_fetch,
|
| 607 |
+
title="bulk_stealthy_fetch",
|
| 608 |
+
description=self.bulk_stealthy_fetch.__doc__,
|
| 609 |
+
structured_output=True,
|
| 610 |
+
)
|
| 611 |
+
server.run(transport="stdio" if not http else "streamable-http")
|