Karim shoair commited on
Commit
135c17b
·
1 Parent(s): 354b388

refactor: better implementation for the mcp mode

Browse files
Files changed (2) hide show
  1. scrapling/cli.py +2 -2
  2. scrapling/core/ai.py +26 -10
scrapling/cli.py CHANGED
@@ -154,8 +154,8 @@ def install(force): # pragma: no cover
154
  def mcp(http, host, port):
155
  from scrapling.core.ai import ScraplingMCPServer
156
 
157
- server = ScraplingMCPServer(host, port)
158
- server.run(transport="stdio" if not http else "streamable-http")
159
 
160
 
161
  @command(help="Interactive scraping console")
 
154
  def mcp(http, host, port):
155
  from scrapling.core.ai import ScraplingMCPServer
156
 
157
+ server = ScraplingMCPServer()
158
+ server.serve(http, host, port)
159
 
160
 
161
  @command(help="Interactive scraping console")
scrapling/core/ai.py CHANGED
@@ -41,10 +41,8 @@ def _ContentTranslator(content: Generator[str, None, None], page: _ScraplingResp
41
  return ResponseModel(status=page.status, content=[result for result in content], url=page.url)
42
 
43
 
44
- def ScraplingMCPServer(host: str, port: int) -> FastMCP:
45
- _server = FastMCP(name="Scrapling", host=host, port=port)
46
-
47
- @_server.tool()
48
  def get(
49
  url: str,
50
  impersonate: Optional[BrowserTypeLiteral] = "chrome",
@@ -122,7 +120,7 @@ def ScraplingMCPServer(host: str, port: int) -> FastMCP:
122
  page,
123
  )
124
 
125
- @_server.tool()
126
  async def bulk_get(
127
  urls: Tuple[str, ...],
128
  impersonate: Optional[BrowserTypeLiteral] = "chrome",
@@ -208,7 +206,7 @@ def ScraplingMCPServer(host: str, port: int) -> FastMCP:
208
  for page in responses
209
  ]
210
 
211
- @_server.tool()
212
  async def fetch(
213
  url: str,
214
  extraction_type: extraction_types = "markdown",
@@ -296,7 +294,7 @@ def ScraplingMCPServer(host: str, port: int) -> FastMCP:
296
  page,
297
  )
298
 
299
- @_server.tool()
300
  async def bulk_fetch(
301
  urls: Tuple[str, ...],
302
  extraction_type: extraction_types = "markdown",
@@ -389,7 +387,7 @@ def ScraplingMCPServer(host: str, port: int) -> FastMCP:
389
  for page in responses
390
  ]
391
 
392
- @_server.tool()
393
  async def stealthy_fetch(
394
  url: str,
395
  extraction_type: extraction_types = "markdown",
@@ -488,7 +486,7 @@ def ScraplingMCPServer(host: str, port: int) -> FastMCP:
488
  page,
489
  )
490
 
491
- @_server.tool()
492
  async def bulk_stealthy_fetch(
493
  urls: Tuple[str, ...],
494
  extraction_type: extraction_types = "markdown",
@@ -592,4 +590,22 @@ def ScraplingMCPServer(host: str, port: int) -> FastMCP:
592
  for page in responses
593
  ]
594
 
595
- return _server
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  return ResponseModel(status=page.status, content=[result for result in content], url=page.url)
42
 
43
 
44
+ class ScraplingMCPServer:
45
+ @staticmethod
 
 
46
  def get(
47
  url: str,
48
  impersonate: Optional[BrowserTypeLiteral] = "chrome",
 
120
  page,
121
  )
122
 
123
+ @staticmethod
124
  async def bulk_get(
125
  urls: Tuple[str, ...],
126
  impersonate: Optional[BrowserTypeLiteral] = "chrome",
 
206
  for page in responses
207
  ]
208
 
209
+ @staticmethod
210
  async def fetch(
211
  url: str,
212
  extraction_type: extraction_types = "markdown",
 
294
  page,
295
  )
296
 
297
+ @staticmethod
298
  async def bulk_fetch(
299
  urls: Tuple[str, ...],
300
  extraction_type: extraction_types = "markdown",
 
387
  for page in responses
388
  ]
389
 
390
+ @staticmethod
391
  async def stealthy_fetch(
392
  url: str,
393
  extraction_type: extraction_types = "markdown",
 
486
  page,
487
  )
488
 
489
+ @staticmethod
490
  async def bulk_stealthy_fetch(
491
  urls: Tuple[str, ...],
492
  extraction_type: extraction_types = "markdown",
 
590
  for page in responses
591
  ]
592
 
593
+ def serve(self, http: bool, host: str, port: int):
594
+ """Serve the MCP server."""
595
+ server = FastMCP(name="Scrapling", host=host, port=port)
596
+ server.add_tool(self.get, title="get", description=self.get.__doc__, structured_output=True)
597
+ server.add_tool(self.bulk_get, title="bulk_get", description=self.bulk_get.__doc__, structured_output=True)
598
+ server.add_tool(self.fetch, title="fetch", description=self.fetch.__doc__, structured_output=True)
599
+ server.add_tool(
600
+ self.bulk_fetch, title="bulk_fetch", description=self.bulk_fetch.__doc__, structured_output=True
601
+ )
602
+ server.add_tool(
603
+ self.stealthy_fetch, title="stealthy_fetch", description=self.stealthy_fetch.__doc__, structured_output=True
604
+ )
605
+ server.add_tool(
606
+ self.bulk_stealthy_fetch,
607
+ title="bulk_stealthy_fetch",
608
+ description=self.bulk_stealthy_fetch.__doc__,
609
+ structured_output=True,
610
+ )
611
+ server.run(transport="stdio" if not http else "streamable-http")