Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
| 2 |
import gradio as gr
|
| 3 |
import pandas as pd
|
| 4 |
import numpy as np
|
|
|
|
| 5 |
import json
|
| 6 |
import time
|
| 7 |
import random
|
|
@@ -24,49 +25,47 @@ class WebScrapingSimulator:
|
|
| 24 |
"https://python.org": "OTHER"
|
| 25 |
}
|
| 26 |
|
| 27 |
-
def simulate_scraping(self, urls_text, progress=gr.Progress()):
|
| 28 |
-
"""Simulate the web scraping and classification process"""
|
| 29 |
-
if not urls_text.strip():
|
| 30 |
-
return "Please enter at least one URL", "", ""
|
| 31 |
-
|
| 32 |
-
urls = [url.strip() for url in urls_text.split('\n') if url.strip()]
|
| 33 |
-
if not urls:
|
| 34 |
-
return "Please enter valid URLs", "", ""
|
| 35 |
-
|
| 36 |
-
results = {}
|
| 37 |
-
progress_bar = progress.tqdm(urls, desc="Processing URLs")
|
| 38 |
-
|
| 39 |
-
for url in progress_bar:
|
| 40 |
-
time.sleep(1) # Simulate processing time
|
| 41 |
-
# Use demo results or random classification
|
| 42 |
-
if url in self.demo_results:
|
| 43 |
-
classification = self.demo_results[url]
|
| 44 |
-
else:
|
| 45 |
-
classification = random.choice(["OTHER", "NEWS/BLOG", "E-COMMERCE"])
|
| 46 |
-
|
| 47 |
-
results[url] = {
|
| 48 |
-
"url": url,
|
| 49 |
-
"classification": classification,
|
| 50 |
-
"confidence": round(random.uniform(0.75, 0.99), 2),
|
| 51 |
-
"processed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 52 |
-
}
|
| 53 |
-
|
| 54 |
-
# progress_bar.set_description(f"Processed {url}")
|
| 55 |
-
|
| 56 |
-
# Format results for display
|
| 57 |
-
results_text = "Classification Results:\n\n"
|
| 58 |
-
for url, data in results.items():
|
| 59 |
-
results_text += f"URL: {url}\n"
|
| 60 |
-
results_text += f"Classification: {data['classification']}\n"
|
| 61 |
-
results_text += f"Confidence: {data['confidence']}\n"
|
| 62 |
-
results_text += f"Processed: {data['processed_at']}\n"
|
| 63 |
-
results_text += "-" * 50 + "\n"
|
| 64 |
-
|
| 65 |
-
# Create downloadable JSON
|
| 66 |
-
json_results = json.dumps(results, indent=2)
|
| 67 |
-
|
| 68 |
-
return results_text, json_results, f"Processed {len(results)} URLs successfully!"
|
| 69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
# Computer Vision simulator
|
| 71 |
@spaces.GPU
|
| 72 |
def simulate_cv_processing(image, model_type):
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
import pandas as pd
|
| 4 |
import numpy as np
|
| 5 |
+
import io
|
| 6 |
import json
|
| 7 |
import time
|
| 8 |
import random
|
|
|
|
| 25 |
"https://python.org": "OTHER"
|
| 26 |
}
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
+
def simulate_scraping(self, urls_text, progress=gr.Progress()):
|
| 30 |
+
if not urls_text.strip():
|
| 31 |
+
return "Please enter at least one URL", None, ""
|
| 32 |
+
|
| 33 |
+
urls = [url.strip() for url in urls_text.split('\n') if url.strip()]
|
| 34 |
+
if not urls:
|
| 35 |
+
return "Please enter valid URLs", None, ""
|
| 36 |
+
|
| 37 |
+
results = {}
|
| 38 |
+
progress_bar = progress.tqdm(urls, desc="Processing URLs")
|
| 39 |
+
|
| 40 |
+
for url in progress_bar:
|
| 41 |
+
time.sleep(1)
|
| 42 |
+
if url in self.demo_results:
|
| 43 |
+
classification = self.demo_results[url]
|
| 44 |
+
else:
|
| 45 |
+
classification = random.choice(["OTHER", "NEWS/BLOG", "E-COMMERCE"])
|
| 46 |
+
|
| 47 |
+
results[url] = {
|
| 48 |
+
"url": url,
|
| 49 |
+
"classification": classification,
|
| 50 |
+
"confidence": round(random.uniform(0.75, 0.99), 2),
|
| 51 |
+
"processed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
# Format results for display
|
| 55 |
+
results_text = "Classification Results:\n\n"
|
| 56 |
+
for url, data in results.items():
|
| 57 |
+
results_text += f"URL: {url}\n"
|
| 58 |
+
results_text += f"Classification: {data['classification']}\n"
|
| 59 |
+
results_text += f"Confidence: {data['confidence']}\n"
|
| 60 |
+
results_text += f"Processed: {data['processed_at']}\n"
|
| 61 |
+
results_text += "-" * 50 + "\n"
|
| 62 |
+
|
| 63 |
+
# ✅ Create in-memory JSON file for download
|
| 64 |
+
json_bytes = json.dumps(results, indent=2).encode('utf-8')
|
| 65 |
+
file_obj = io.BytesIO(json_bytes)
|
| 66 |
+
file_obj.name = "scraping_results.json" # optional but helpful
|
| 67 |
+
|
| 68 |
+
return results_text, file_obj, f"Processed {len(results)} URLs successfully!"
|
| 69 |
# Computer Vision simulator
|
| 70 |
@spaces.GPU
|
| 71 |
def simulate_cv_processing(image, model_type):
|