Spaces:
Build error
Build error
| import os | |
| import requests | |
| from bs4 import BeautifulSoup | |
| from urllib.parse import urljoin, urlparse | |
| from zipfile import ZipFile | |
| from io import BytesIO | |
| import gradio as gr | |
| def download_file(url, session): | |
| """Download a file and return its content.""" | |
| try: | |
| response = session.get(url) | |
| response.raise_for_status() | |
| return response.content | |
| except requests.exceptions.RequestException as e: | |
| print(f"Error downloading {url}: {e}") | |
| return None | |
| def save_webpage_as_zip(url): | |
| """Save a webpage and its assets as a ZIP file.""" | |
| session = requests.Session() | |
| response = session.get(url) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| temp_dir = 'temp_webpage' | |
| if not os.path.exists(temp_dir): | |
| os.makedirs(temp_dir) | |
| main_html_path = os.path.join(temp_dir, 'index.html') | |
| with open(main_html_path, 'wb') as f: | |
| f.write(response.content) | |
| assets = [] | |
| for tag in soup.find_all(['img', 'link', 'script']): | |
| if tag.name == 'img' and tag.get('src'): | |
| assets.append(tag['src']) | |
| elif tag.name == 'link' and tag.get('href'): | |
| assets.append(tag['href']) | |
| elif tag.name == 'script' and tag.get('src'): | |
| assets.append(tag['src']) | |
| for asset in assets: | |
| asset_url = urljoin(url, asset) | |
| asset_path = urlparse(asset_url).path.lstrip('/') | |
| asset_full_path = os.path.join(temp_dir, asset_path) | |
| if asset_path.endswith('/'): | |
| print(f"Skipping directory {asset_full_path}") | |
| continue | |
| os.makedirs(os.path.dirname(asset_full_path), exist_ok=True) | |
| content = download_file(asset_url, session) | |
| if content: | |
| if os.path.isdir(asset_full_path): | |
| print(f"Skipping directory {asset_full_path}") | |
| continue | |
| with open(asset_full_path, 'wb') as f: | |
| f.write(content) | |
| zip_buffer = BytesIO() | |
| with ZipFile(zip_buffer, 'w') as zipf: | |
| for root, _, files in os.walk(temp_dir): | |
| for file in files: | |
| file_path = os.path.join(root, file) | |
| zipf.write(file_path, os.path.relpath(file_path, temp_dir)) | |
| for root, _, files in os.walk(temp_dir, topdown=False): | |
| for file in files: | |
| os.remove(os.path.join(root, file)) | |
| os.rmdir(root) | |
| zip_buffer.seek(0) | |
| return zip_buffer | |
| def generate_zip_file(url): | |
| """Generate ZIP file from a webpage URL.""" | |
| zip_buffer = save_webpage_as_zip(url) | |
| temp_zip_path = "webpage.zip" | |
| with open(temp_zip_path, 'wb') as f: | |
| f.write(zip_buffer.read()) | |
| return temp_zip_path | |
| examples = [ | |
| "https://www.bmw.com/en/index.html", | |
| "https://www.ferrari.com/en-EN", | |
| "https://streamlit.io/" | |
| ] | |
| DESCRIPTION = """ | |
| ## Webpage to ZIP Downloader π | |
| """ | |
| with gr.Blocks(theme="gstaff/whiteboard") as demo: # Custom theme | |
| gr.Markdown(DESCRIPTION) | |
| gr.Markdown("Enter a URL to download the webpage and its assets as a ZIP file.") | |
| url_input = gr.Textbox(label="Website URL", placeholder="Enter a URL (e.g., https://www.example.com)") | |
| download_button = gr.Button("Download as ZIP") | |
| output_file = gr.File(label="Download") | |
| def set_example_url(url): | |
| url_input.value = url | |
| download_button.click(fn=generate_zip_file, inputs=url_input, outputs=output_file) | |
| gr.Examples( | |
| examples=examples, | |
| inputs=url_input, | |
| outputs=output_file, | |
| fn=generate_zip_file | |
| ) | |
| demo.launch() |