NLarchive commited on
Commit
40bda89
·
verified ·
1 Parent(s): 2f76365

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -75
app.py CHANGED
@@ -1,5 +1,4 @@
1
- from typing import Dict, List, Union, Optional, Tuple
2
- from urllib.parse import urlparse
3
  import requests
4
  import time
5
  import json
@@ -98,21 +97,17 @@ def parse_huggingface_url(url: str) -> str:
98
 
99
  return json.dumps(result, indent=2)
100
 
101
- def parse_huggingface_url_with_summary(url: str, progress=gr.Progress()) -> tuple:
102
  """Parse URL and return both markdown summary and JSON."""
103
- progress(0, desc="🔍 Starting URL parsing...")
104
 
105
  if not url.strip():
106
  return "# ❌ No URL Provided\n\nPlease enter a URL to parse.", "{}"
107
 
108
- progress(0.3, desc="🔍 Analyzing URL format...")
109
  json_result = parse_huggingface_url(url)
110
 
111
- progress(0.7, desc="🔍 Generating summary...")
112
  parsed_info = json.loads(json_result)
113
  md_summary = format_url_summary(parsed_info)
114
 
115
- progress(1.0, desc="✅ URL parsing complete!")
116
  return md_summary, json_result
117
 
118
  def format_url_summary(parsed_info: dict) -> str:
@@ -145,7 +140,7 @@ def format_url_summary(parsed_info: dict) -> str:
145
 
146
  return md
147
 
148
- def check_single_server_health(url: str, progress=gr.Progress()) -> tuple:
149
  """
150
  Check health of a single MCP server from any URL format.
151
 
@@ -155,12 +150,10 @@ def check_single_server_health(url: str, progress=gr.Progress()) -> tuple:
155
  Returns:
156
  tuple: (markdown_summary, json_data)
157
  """
158
- progress(0, desc="🏥 Starting health check...")
159
 
160
  if not url.strip():
161
  return "# ❌ No URL Provided\n\nPlease enter a URL to check.", "{}"
162
 
163
- progress(0.1, desc="🏥 Parsing URL...")
164
  parsed_info = json.loads(parse_huggingface_url(url))
165
 
166
  if not parsed_info["is_valid"]:
@@ -182,7 +175,6 @@ def check_single_server_health(url: str, progress=gr.Progress()) -> tuple:
182
  }
183
 
184
  # Test 1: Check space URL health
185
- progress(0.3, desc="🌐 Checking space accessibility...")
186
  if parsed_info["space_url"] != "unknown":
187
  start_time = time.time()
188
  try:
@@ -206,7 +198,6 @@ def check_single_server_health(url: str, progress=gr.Progress()) -> tuple:
206
  }
207
 
208
  # Test 2: Check MCP endpoint health
209
- progress(0.6, desc="🔧 Checking MCP endpoint...")
210
  start_time = time.time()
211
  try:
212
  response = requests.get(parsed_info["mcp_endpoint"], timeout=8, stream=True)
@@ -228,8 +219,6 @@ def check_single_server_health(url: str, progress=gr.Progress()) -> tuple:
228
  "error": str(e)
229
  }
230
 
231
- progress(0.9, desc="📊 Analyzing results...")
232
-
233
  # Determine overall status
234
  space_ok = results["space_health"] is None or results["space_health"]["accessible"]
235
  mcp_ok = results["mcp_health"]["accessible"]
@@ -246,7 +235,6 @@ def check_single_server_health(url: str, progress=gr.Progress()) -> tuple:
246
  # Generate markdown summary
247
  md = format_health_summary(results)
248
 
249
- progress(1.0, desc="✅ Health check complete!")
250
  return md, json.dumps(results, indent=2)
251
 
252
  def format_health_summary(results: dict) -> str:
@@ -333,7 +321,7 @@ def extract_functions_from_source(source_code: str) -> List[Tuple[str, str, List
333
 
334
  return functions
335
 
336
- def discover_server_tools(url: str, progress=gr.Progress()) -> tuple:
337
  """
338
  Discover available MCP tools from a server.
339
 
@@ -343,12 +331,10 @@ def discover_server_tools(url: str, progress=gr.Progress()) -> tuple:
343
  Returns:
344
  tuple: (markdown_summary, json_data)
345
  """
346
- progress(0, desc="🛠️ Starting tools discovery...")
347
 
348
  if not url.strip():
349
  return "# ❌ No URL Provided\n\nPlease enter a URL to discover tools.", "{}"
350
 
351
- progress(0.1, desc="🛠️ Parsing URL...")
352
  parsed_info = json.loads(parse_huggingface_url(url))
353
 
354
  if not parsed_info["is_valid"]:
@@ -364,15 +350,12 @@ def discover_server_tools(url: str, progress=gr.Progress()) -> tuple:
364
  discovery_methods = []
365
 
366
  # Method: Analyze app.py source code
367
- progress(0.3, desc="🛠️ Fetching source code...")
368
  try:
369
  # Try to get app.py from HF spaces
370
  if parsed_info["hf_spaces_url"] != "unknown":
371
  app_url = f"{parsed_info['hf_spaces_url']}/raw/main/app.py"
372
- progress(0.5, desc="🛠️ Analyzing app.py...")
373
  response = requests.get(app_url, timeout=10)
374
  if response.status_code == 200:
375
- progress(0.7, desc="🛠️ Extracting functions...")
376
  functions = extract_functions_from_source(response.text)
377
  for func_name, docstring, params in functions:
378
  tools.append({
@@ -385,8 +368,6 @@ def discover_server_tools(url: str, progress=gr.Progress()) -> tuple:
385
  except Exception as e:
386
  discovery_methods.append(f"Failed to analyze app.py: {str(e)}")
387
 
388
- progress(0.9, desc="🛠️ Preparing results...")
389
-
390
  # Prepare result
391
  result = {
392
  "original_url": url,
@@ -404,7 +385,6 @@ def discover_server_tools(url: str, progress=gr.Progress()) -> tuple:
404
  # Generate markdown summary
405
  md = format_tools_summary(result)
406
 
407
- progress(1.0, desc="✅ Tools discovery complete!")
408
  return md, json.dumps(result, indent=2)
409
 
410
  def format_tools_summary(result: dict) -> str:
@@ -434,7 +414,7 @@ def format_tools_summary(result: dict) -> str:
434
 
435
  return md
436
 
437
- def monitor_multiple_servers(urls_text: str, progress=gr.Progress()) -> tuple:
438
  """
439
  Monitor health and tools of multiple MCP servers simultaneously.
440
 
@@ -444,7 +424,6 @@ def monitor_multiple_servers(urls_text: str, progress=gr.Progress()) -> tuple:
444
  Returns:
445
  tuple: (markdown_summary, json_data)
446
  """
447
- progress(0, desc="📊 Starting multi-server monitoring...")
448
 
449
  if not urls_text.strip():
450
  result = {
@@ -455,27 +434,22 @@ def monitor_multiple_servers(urls_text: str, progress=gr.Progress()) -> tuple:
455
  md = "# ❌ No URLs Provided\n\nPlease enter URLs to monitor."
456
  return md, json.dumps(result, indent=2)
457
 
458
- progress(0.1, desc="📊 Parsing URL list...")
459
  urls = [url.strip() for url in urls_text.strip().split('\n') if url.strip()]
460
 
461
  if not urls:
462
  result = {
463
  "error": "No valid URLs found",
464
  "servers": [],
465
- "total_servers": 0
466
- }
467
  md = "# ❌ No Valid URLs\n\nPlease check the URL format."
468
  return md, json.dumps(result, indent=2)
469
 
470
  results = []
471
 
472
  for i, url in enumerate(urls, 1):
473
- progress_pct = 0.1 + (i / len(urls)) * 0.8 # Reserve 10% for setup, 10% for final processing
474
- progress(progress_pct, desc=f"📊 Checking server {i}/{len(urls)}: {url[:50]}...")
475
 
476
  try:
477
- print(f"🔍 Checking server {i}/{len(urls)}: {url}")
478
-
479
  _, health_json = check_single_server_health(url)
480
  health_data = json.loads(health_json)
481
 
@@ -499,8 +473,6 @@ def monitor_multiple_servers(urls_text: str, progress=gr.Progress()) -> tuple:
499
  "combined_status": "error"
500
  })
501
 
502
- progress(0.95, desc="📊 Generating report...")
503
-
504
  final_result = {
505
  "servers": results,
506
  "total_servers": len(urls),
@@ -510,7 +482,6 @@ def monitor_multiple_servers(urls_text: str, progress=gr.Progress()) -> tuple:
510
  # Generate markdown summary
511
  md = format_multiple_servers_summary(final_result)
512
 
513
- progress(1.0, desc="✅ Multi-server monitoring complete!")
514
  return md, json.dumps(final_result, indent=2)
515
 
516
  def format_multiple_servers_summary(result: dict) -> str:
@@ -546,7 +517,7 @@ def format_multiple_servers_summary(result: dict) -> str:
546
 
547
  return md
548
 
549
- def validate_mcp_endpoint(url: str, progress=gr.Progress()) -> tuple:
550
  """
551
  Validate that a URL is a working MCP endpoint by checking its schema.
552
 
@@ -556,12 +527,10 @@ def validate_mcp_endpoint(url: str, progress=gr.Progress()) -> tuple:
556
  Returns:
557
  tuple: (markdown_summary, json_data)
558
  """
559
- progress(0, desc="✅ Starting MCP validation...")
560
 
561
  if not url.strip():
562
  return "# ❌ No URL Provided\n\nPlease enter a URL to validate.", "{}"
563
 
564
- progress(0.1, desc="✅ Parsing URL...")
565
  parsed_info = json.loads(parse_huggingface_url(url))
566
 
567
  validation_result = {
@@ -581,8 +550,6 @@ def validate_mcp_endpoint(url: str, progress=gr.Progress()) -> tuple:
581
 
582
  mcp_endpoint = validation_result["mcp_endpoint_url"]
583
 
584
- progress(0.3, desc="✅ Building schema URL...")
585
-
586
  # Construct schema URL from MCP SSE endpoint
587
  # Example: https://user-space.hf.space/gradio_api/mcp/sse -> https://user-space.hf.space/gradio_api/mcp/schema
588
  if mcp_endpoint.endswith("/sse"):
@@ -596,8 +563,6 @@ def validate_mcp_endpoint(url: str, progress=gr.Progress()) -> tuple:
596
 
597
  print(f"ℹ️ Validating MCP: Original URL='{url}', Endpoint='{mcp_endpoint}', Schema='{mcp_schema_url}'")
598
 
599
- progress(0.5, desc="✅ Testing schema endpoint...")
600
-
601
  # Test MCP schema endpoint
602
  try:
603
  headers = {'User-Agent': 'MCP-Validator/1.0'}
@@ -605,8 +570,6 @@ def validate_mcp_endpoint(url: str, progress=gr.Progress()) -> tuple:
605
 
606
  validation_result["schema_http_status"] = response.status_code
607
 
608
- progress(0.7, desc="✅ Processing schema response...")
609
-
610
  if response.status_code == 200:
611
  try:
612
  schema_data = response.json()
@@ -656,12 +619,9 @@ def validate_mcp_endpoint(url: str, progress=gr.Progress()) -> tuple:
656
  validation_result["error"] = f"An unexpected error occurred during validation: {str(e_gen)}"
657
  print(f"❌ Unexpected error during MCP validation for {mcp_schema_url}: {str(e_gen)}")
658
 
659
- progress(0.9, desc="✅ Generating validation report...")
660
-
661
  # Generate markdown summary
662
  md = format_validation_summary(validation_result)
663
 
664
- progress(1.0, desc="✅ MCP validation complete!")
665
  return md, json.dumps(validation_result, indent=2)
666
 
667
  def format_validation_summary(result: dict) -> str:
@@ -711,40 +671,18 @@ def format_validation_summary(result: dict) -> str:
711
 
712
  return md
713
 
714
- def scrape_hf_spaces_with_progress(max_pages: int, sort_by: str, progress=gr.Progress()) -> tuple:
715
- """Wrapper function for scraping with progress feedback."""
716
- progress(0, desc="🕷️ Starting HuggingFace scraping...")
717
 
718
  # Validate sort option
719
  if sort_by not in SORT_OPTIONS:
720
  sort_by = "relevance"
721
 
722
- progress(0.1, desc=f"🕷️ Using sort method: {SORT_OPTIONS[sort_by]['label']}")
723
-
724
- # Update the scraper to accept a progress callback
725
- def progress_callback(message):
726
- # Extract progress percentage from message if possible
727
- if "page" in message.lower():
728
- try:
729
- # Try to extract current page from message
730
- import re
731
- match = re.search(r'page (\d+)(/(\d+))?', message.lower())
732
- if match:
733
- current = int(match.group(1))
734
- total = int(match.group(3)) if match.group(3) else max_pages
735
- progress_pct = min(0.9, (current / total) * 0.8 + 0.1) # Reserve 10% for final processing
736
- progress(progress_pct, desc=f"🕷️ {message}")
737
- else:
738
- progress(None, desc=f"🕷️ {message}")
739
- except:
740
- progress(None, desc=f"🕷️ {message}")
741
- else:
742
- progress(None, desc=f"🕷️ {message}")
743
-
744
- # Call the scraper with progress callback and sort option
745
  md, json_data = scrape_and_monitor_hf_mcp_servers(max_pages, sort_by)
746
 
747
- progress(1.0, desc="✅ HuggingFace scraping complete!")
748
  return md, json_data
749
 
750
  # Default URLs for testing
 
1
+ from typing import List, Tuple
 
2
  import requests
3
  import time
4
  import json
 
97
 
98
  return json.dumps(result, indent=2)
99
 
100
+ def parse_huggingface_url_with_summary(url: str) -> tuple:
101
  """Parse URL and return both markdown summary and JSON."""
 
102
 
103
  if not url.strip():
104
  return "# ❌ No URL Provided\n\nPlease enter a URL to parse.", "{}"
105
 
 
106
  json_result = parse_huggingface_url(url)
107
 
 
108
  parsed_info = json.loads(json_result)
109
  md_summary = format_url_summary(parsed_info)
110
 
 
111
  return md_summary, json_result
112
 
113
  def format_url_summary(parsed_info: dict) -> str:
 
140
 
141
  return md
142
 
143
+ def check_single_server_health(url: str) -> tuple:
144
  """
145
  Check health of a single MCP server from any URL format.
146
 
 
150
  Returns:
151
  tuple: (markdown_summary, json_data)
152
  """
 
153
 
154
  if not url.strip():
155
  return "# ❌ No URL Provided\n\nPlease enter a URL to check.", "{}"
156
 
 
157
  parsed_info = json.loads(parse_huggingface_url(url))
158
 
159
  if not parsed_info["is_valid"]:
 
175
  }
176
 
177
  # Test 1: Check space URL health
 
178
  if parsed_info["space_url"] != "unknown":
179
  start_time = time.time()
180
  try:
 
198
  }
199
 
200
  # Test 2: Check MCP endpoint health
 
201
  start_time = time.time()
202
  try:
203
  response = requests.get(parsed_info["mcp_endpoint"], timeout=8, stream=True)
 
219
  "error": str(e)
220
  }
221
 
 
 
222
  # Determine overall status
223
  space_ok = results["space_health"] is None or results["space_health"]["accessible"]
224
  mcp_ok = results["mcp_health"]["accessible"]
 
235
  # Generate markdown summary
236
  md = format_health_summary(results)
237
 
 
238
  return md, json.dumps(results, indent=2)
239
 
240
  def format_health_summary(results: dict) -> str:
 
321
 
322
  return functions
323
 
324
+ def discover_server_tools(url: str) -> tuple:
325
  """
326
  Discover available MCP tools from a server.
327
 
 
331
  Returns:
332
  tuple: (markdown_summary, json_data)
333
  """
 
334
 
335
  if not url.strip():
336
  return "# ❌ No URL Provided\n\nPlease enter a URL to discover tools.", "{}"
337
 
 
338
  parsed_info = json.loads(parse_huggingface_url(url))
339
 
340
  if not parsed_info["is_valid"]:
 
350
  discovery_methods = []
351
 
352
  # Method: Analyze app.py source code
 
353
  try:
354
  # Try to get app.py from HF spaces
355
  if parsed_info["hf_spaces_url"] != "unknown":
356
  app_url = f"{parsed_info['hf_spaces_url']}/raw/main/app.py"
 
357
  response = requests.get(app_url, timeout=10)
358
  if response.status_code == 200:
 
359
  functions = extract_functions_from_source(response.text)
360
  for func_name, docstring, params in functions:
361
  tools.append({
 
368
  except Exception as e:
369
  discovery_methods.append(f"Failed to analyze app.py: {str(e)}")
370
 
 
 
371
  # Prepare result
372
  result = {
373
  "original_url": url,
 
385
  # Generate markdown summary
386
  md = format_tools_summary(result)
387
 
 
388
  return md, json.dumps(result, indent=2)
389
 
390
  def format_tools_summary(result: dict) -> str:
 
414
 
415
  return md
416
 
417
+ def monitor_multiple_servers(urls_text: str) -> tuple:
418
  """
419
  Monitor health and tools of multiple MCP servers simultaneously.
420
 
 
424
  Returns:
425
  tuple: (markdown_summary, json_data)
426
  """
 
427
 
428
  if not urls_text.strip():
429
  result = {
 
434
  md = "# ❌ No URLs Provided\n\nPlease enter URLs to monitor."
435
  return md, json.dumps(result, indent=2)
436
 
 
437
  urls = [url.strip() for url in urls_text.strip().split('\n') if url.strip()]
438
 
439
  if not urls:
440
  result = {
441
  "error": "No valid URLs found",
442
  "servers": [],
443
+ "total_servers": 0 }
 
444
  md = "# ❌ No Valid URLs\n\nPlease check the URL format."
445
  return md, json.dumps(result, indent=2)
446
 
447
  results = []
448
 
449
  for i, url in enumerate(urls, 1):
450
+ print(f"🔍 Checking server {i}/{len(urls)}: {url}")
 
451
 
452
  try:
 
 
453
  _, health_json = check_single_server_health(url)
454
  health_data = json.loads(health_json)
455
 
 
473
  "combined_status": "error"
474
  })
475
 
 
 
476
  final_result = {
477
  "servers": results,
478
  "total_servers": len(urls),
 
482
  # Generate markdown summary
483
  md = format_multiple_servers_summary(final_result)
484
 
 
485
  return md, json.dumps(final_result, indent=2)
486
 
487
  def format_multiple_servers_summary(result: dict) -> str:
 
517
 
518
  return md
519
 
520
+ def validate_mcp_endpoint(url: str) -> tuple:
521
  """
522
  Validate that a URL is a working MCP endpoint by checking its schema.
523
 
 
527
  Returns:
528
  tuple: (markdown_summary, json_data)
529
  """
 
530
 
531
  if not url.strip():
532
  return "# ❌ No URL Provided\n\nPlease enter a URL to validate.", "{}"
533
 
 
534
  parsed_info = json.loads(parse_huggingface_url(url))
535
 
536
  validation_result = {
 
550
 
551
  mcp_endpoint = validation_result["mcp_endpoint_url"]
552
 
 
 
553
  # Construct schema URL from MCP SSE endpoint
554
  # Example: https://user-space.hf.space/gradio_api/mcp/sse -> https://user-space.hf.space/gradio_api/mcp/schema
555
  if mcp_endpoint.endswith("/sse"):
 
563
 
564
  print(f"ℹ️ Validating MCP: Original URL='{url}', Endpoint='{mcp_endpoint}', Schema='{mcp_schema_url}'")
565
 
 
 
566
  # Test MCP schema endpoint
567
  try:
568
  headers = {'User-Agent': 'MCP-Validator/1.0'}
 
570
 
571
  validation_result["schema_http_status"] = response.status_code
572
 
 
 
573
  if response.status_code == 200:
574
  try:
575
  schema_data = response.json()
 
619
  validation_result["error"] = f"An unexpected error occurred during validation: {str(e_gen)}"
620
  print(f"❌ Unexpected error during MCP validation for {mcp_schema_url}: {str(e_gen)}")
621
 
 
 
622
  # Generate markdown summary
623
  md = format_validation_summary(validation_result)
624
 
 
625
  return md, json.dumps(validation_result, indent=2)
626
 
627
  def format_validation_summary(result: dict) -> str:
 
671
 
672
  return md
673
 
674
+ def scrape_hf_spaces_with_progress(max_pages: int, sort_by: str) -> tuple:
675
+ """Wrapper function for scraping."""
 
676
 
677
  # Validate sort option
678
  if sort_by not in SORT_OPTIONS:
679
  sort_by = "relevance"
680
 
681
+ # Call the scraper with sort option
682
+ # The imported scrape_and_monitor_hf_mcp_servers function
683
+ # will print its own progress to the console.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
684
  md, json_data = scrape_and_monitor_hf_mcp_servers(max_pages, sort_by)
685
 
 
686
  return md, json_data
687
 
688
  # Default URLs for testing