siam3310 commited on
Commit
40ac7d2
·
verified ·
1 Parent(s): 3b69fa9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -38
app.py CHANGED
@@ -19,10 +19,8 @@ import os
19
  def scrape_website_content(url: str) -> Tuple[str, str]:
20
  """
21
  Scrape a website and return its main content formatted as markdown and a downloadable file path.
22
-
23
  Args:
24
  url (str): The URL to scrape (can include or omit http/https protocol)
25
-
26
  Returns:
27
  Tuple[str, str]: The scraped content formatted as markdown, and a file path for download
28
  """
@@ -444,6 +442,7 @@ def fibwatch_latest_to_originals(url: str) -> Tuple[str, str]:
444
  except Exception as e:
445
  return str(e), None
446
  # Create Gradio interfaces for each function
 
447
  def create_mcp_interface():
448
  """Create Gradio interface that exposes web scraping tools as MCP functions."""
449
  # Create individual interfaces for each tool
@@ -465,7 +464,8 @@ def create_mcp_interface():
465
  ],
466
  title="Website Content Scraper",
467
  description="Extract and format website content as markdown",
468
- api_name="scrape_content" )
 
469
 
470
  sitemap_interface = gr.Interface(
471
  fn=generate_sitemap_for_ui,
@@ -487,6 +487,7 @@ def create_mcp_interface():
487
  description="Generate a sitemap of all links found on a webpage",
488
  api_name="generate_sitemap"
489
  )
 
490
  bulk_extract_interface = gr.Interface(
491
  fn=extract_all_content_for_ui,
492
  inputs=gr.Textbox(
@@ -505,9 +506,9 @@ def create_mcp_interface():
505
  ],
506
  title="Bulk Content Extractor",
507
  description="Extract text content from all internal links and download as ZIP",
508
- api_name="extract_all_content" )
 
509
 
510
- # Enhanced sitemap interface with configurable limits
511
  sitemap_limited_interface = gr.Interface(
512
  fn=generate_sitemap_with_limit,
513
  inputs=[
@@ -538,20 +539,7 @@ def create_mcp_interface():
538
  api_name="generate_sitemap_limited"
539
  )
540
 
541
- # Enhanced bulk extract interface with configurable limits
542
  bulk_limited_interface = gr.Interface(
543
- fibwatch_interface = gr.Interface(
544
- fn=fibwatch_latest_to_originals,
545
- inputs=gr.Textbox(
546
- label="Fibwatch Page",
547
- placeholder="https://fibwatch.art/videos/latest?page_id=1"
548
- ),
549
- outputs=[
550
- gr.Textbox(label="Original Links", lines=10),
551
- gr.File(label="Download")
552
- ],
553
- title="Fibwatch Scraper"
554
- )
555
  fn=extract_limited_content_as_zip,
556
  inputs=[
557
  gr.Textbox(
@@ -581,29 +569,41 @@ def create_mcp_interface():
581
  api_name="extract_limited_content"
582
  )
583
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
584
  # Combine into tabbed interface
585
- demo = gr.TabbedInterface(
586
- [
587
- scrape_interface,
588
- sitemap_interface,
589
- sitemap_limited_interface,
590
- bulk_extract_interface,
591
- bulk_limited_interface,
592
- fibwatch_interface,
593
- ],
594
- [
595
- "Content Scraper",
596
- "All Links Sitemap",
597
- "Limited Sitemap",
598
- "Bulk Extractor",
599
- "Limited Bulk Extractor",
600
- "Fibwatch Scraper",
601
- ],
602
- title="🕷️ Web Scraper MCP Server"
603
- )
604
  return demo
605
 
606
-
607
  if __name__ == "__main__":
608
  # Create and launch the MCP server
609
  app = create_mcp_interface()
 
19
  def scrape_website_content(url: str) -> Tuple[str, str]:
20
  """
21
  Scrape a website and return its main content formatted as markdown and a downloadable file path.
 
22
  Args:
23
  url (str): The URL to scrape (can include or omit http/https protocol)
 
24
  Returns:
25
  Tuple[str, str]: The scraped content formatted as markdown, and a file path for download
26
  """
 
442
  except Exception as e:
443
  return str(e), None
444
  # Create Gradio interfaces for each function
445
+ # Create Gradio interfaces for each function
446
  def create_mcp_interface():
447
  """Create Gradio interface that exposes web scraping tools as MCP functions."""
448
  # Create individual interfaces for each tool
 
464
  ],
465
  title="Website Content Scraper",
466
  description="Extract and format website content as markdown",
467
+ api_name="scrape_content"
468
+ )
469
 
470
  sitemap_interface = gr.Interface(
471
  fn=generate_sitemap_for_ui,
 
487
  description="Generate a sitemap of all links found on a webpage",
488
  api_name="generate_sitemap"
489
  )
490
+
491
  bulk_extract_interface = gr.Interface(
492
  fn=extract_all_content_for_ui,
493
  inputs=gr.Textbox(
 
506
  ],
507
  title="Bulk Content Extractor",
508
  description="Extract text content from all internal links and download as ZIP",
509
+ api_name="extract_all_content"
510
+ )
511
 
 
512
  sitemap_limited_interface = gr.Interface(
513
  fn=generate_sitemap_with_limit,
514
  inputs=[
 
539
  api_name="generate_sitemap_limited"
540
  )
541
 
 
542
  bulk_limited_interface = gr.Interface(
 
 
 
 
 
 
 
 
 
 
 
 
543
  fn=extract_limited_content_as_zip,
544
  inputs=[
545
  gr.Textbox(
 
569
  api_name="extract_limited_content"
570
  )
571
 
572
+ fibwatch_interface = gr.Interface(
573
+ fn=fibwatch_latest_to_originals,
574
+ inputs=gr.Textbox(
575
+ label="Fibwatch Page",
576
+ placeholder="https://fibwatch.art/videos/latest?page_id=1"
577
+ ),
578
+ outputs=[
579
+ gr.Textbox(label="Original Links", lines=10),
580
+ gr.File(label="Download")
581
+ ],
582
+ title="Fibwatch Scraper",
583
+ description="Extract original b-cdn.net links from Fibwatch pages"
584
+ )
585
+
586
  # Combine into tabbed interface
587
+ demo = gr.TabbedInterface(
588
+ [
589
+ scrape_interface,
590
+ sitemap_interface,
591
+ bulk_extract_interface,
592
+ bulk_limited_interface,
593
+ fibwatch_interface,
594
+ ],
595
+ [
596
+ "Content Scraper",
597
+ "All Links Sitemap",
598
+ "Bulk Extractor",
599
+ "Limited Bulk Extractor",
600
+ "Fibwatch Scraper",
601
+ ],
602
+ title="🕷️ Web Scraper MCP Server"
603
+ )
604
+
 
605
  return demo
606
 
 
607
  if __name__ == "__main__":
608
  # Create and launch the MCP server
609
  app = create_mcp_interface()