limitedonly41 commited on
Commit
2826f0b
·
verified ·
1 Parent(s): c3af572

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -42
app.py CHANGED
@@ -2,6 +2,7 @@
2
  import gradio as gr
3
  import pandas as pd
4
  import numpy as np
 
5
  import json
6
  import time
7
  import random
@@ -24,49 +25,47 @@ class WebScrapingSimulator:
24
  "https://python.org": "OTHER"
25
  }
26
 
27
- def simulate_scraping(self, urls_text, progress=gr.Progress()):
28
- """Simulate the web scraping and classification process"""
29
- if not urls_text.strip():
30
- return "Please enter at least one URL", "", ""
31
-
32
- urls = [url.strip() for url in urls_text.split('\n') if url.strip()]
33
- if not urls:
34
- return "Please enter valid URLs", "", ""
35
-
36
- results = {}
37
- progress_bar = progress.tqdm(urls, desc="Processing URLs")
38
-
39
- for url in progress_bar:
40
- time.sleep(1) # Simulate processing time
41
- # Use demo results or random classification
42
- if url in self.demo_results:
43
- classification = self.demo_results[url]
44
- else:
45
- classification = random.choice(["OTHER", "NEWS/BLOG", "E-COMMERCE"])
46
-
47
- results[url] = {
48
- "url": url,
49
- "classification": classification,
50
- "confidence": round(random.uniform(0.75, 0.99), 2),
51
- "processed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
52
- }
53
-
54
- # progress_bar.set_description(f"Processed {url}")
55
-
56
- # Format results for display
57
- results_text = "Classification Results:\n\n"
58
- for url, data in results.items():
59
- results_text += f"URL: {url}\n"
60
- results_text += f"Classification: {data['classification']}\n"
61
- results_text += f"Confidence: {data['confidence']}\n"
62
- results_text += f"Processed: {data['processed_at']}\n"
63
- results_text += "-" * 50 + "\n"
64
-
65
- # Create downloadable JSON
66
- json_results = json.dumps(results, indent=2)
67
-
68
- return results_text, json_results, f"Processed {len(results)} URLs successfully!"
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  # Computer Vision simulator
71
  @spaces.GPU
72
  def simulate_cv_processing(image, model_type):
 
2
  import gradio as gr
3
  import pandas as pd
4
  import numpy as np
5
+ import io
6
  import json
7
  import time
8
  import random
 
25
  "https://python.org": "OTHER"
26
  }
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ def simulate_scraping(self, urls_text, progress=gr.Progress()):
30
+ if not urls_text.strip():
31
+ return "Please enter at least one URL", None, ""
32
+
33
+ urls = [url.strip() for url in urls_text.split('\n') if url.strip()]
34
+ if not urls:
35
+ return "Please enter valid URLs", None, ""
36
+
37
+ results = {}
38
+ progress_bar = progress.tqdm(urls, desc="Processing URLs")
39
+
40
+ for url in progress_bar:
41
+ time.sleep(1)
42
+ if url in self.demo_results:
43
+ classification = self.demo_results[url]
44
+ else:
45
+ classification = random.choice(["OTHER", "NEWS/BLOG", "E-COMMERCE"])
46
+
47
+ results[url] = {
48
+ "url": url,
49
+ "classification": classification,
50
+ "confidence": round(random.uniform(0.75, 0.99), 2),
51
+ "processed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
52
+ }
53
+
54
+ # Format results for display
55
+ results_text = "Classification Results:\n\n"
56
+ for url, data in results.items():
57
+ results_text += f"URL: {url}\n"
58
+ results_text += f"Classification: {data['classification']}\n"
59
+ results_text += f"Confidence: {data['confidence']}\n"
60
+ results_text += f"Processed: {data['processed_at']}\n"
61
+ results_text += "-" * 50 + "\n"
62
+
63
+ # ✅ Create in-memory JSON file for download
64
+ json_bytes = json.dumps(results, indent=2).encode('utf-8')
65
+ file_obj = io.BytesIO(json_bytes)
66
+ file_obj.name = "scraping_results.json" # optional but helpful
67
+
68
+ return results_text, file_obj, f"Processed {len(results)} URLs successfully!"
69
  # Computer Vision simulator
70
  @spaces.GPU
71
  def simulate_cv_processing(image, model_type):