Spaces:
Paused
Paused
Commit ·
c946fc8
1
Parent(s): 4e04931
feat: Implement AI-powered web scraper and redesign application with Gradio UI and updated README.
Browse files- README.md +26 -7
- __pycache__/app.cpython-313.pyc +0 -0
- __pycache__/generator.cpython-313.pyc +0 -0
- __pycache__/scraper.cpython-313.pyc +0 -0
- app.py +79 -0
- generator.py +72 -0
- requirements.txt +6 -0
- scraper.py +57 -0
README.md
CHANGED
|
@@ -1,13 +1,32 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
-
short_description: Redesiner
|
| 11 |
---
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: AI Web Redesigner
|
| 3 |
+
emoji: 🎨
|
| 4 |
+
colorFrom: purple
|
| 5 |
+
colorTo: pink
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 4.0.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# 🎨 AI Web Scraper & Redesigner
|
| 13 |
+
|
| 14 |
+
This Space allows you to input any URL, scrape its content, and use an Advanced AI Model (Qwen2.5-Coder-32B) to redesign it into a modern, stunning webpage.
|
| 15 |
+
|
| 16 |
+
## How to Use
|
| 17 |
+
1. Enter a URL (e.g., `https://example.com`)
|
| 18 |
+
2. Select a Design Style (e.g., "Cyberpunk", "Minimalist")
|
| 19 |
+
3. Select an Output Format (HTML/CSS/JS or Frameworks)
|
| 20 |
+
4. (Optional) Enter your Hugging Face Token if you want to use your own quota, otherwise it uses the Space's secret.
|
| 21 |
+
5. Click **Redesign**!
|
| 22 |
+
|
| 23 |
+
## Features
|
| 24 |
+
- **Smart Scraping**: Extracts text and images from the target site.
|
| 25 |
+
- **AI Redesign**: Uses Large Language Models to write clean, modern code.
|
| 26 |
+
- **Live Preview**: See the generated website immediately in the browser.
|
| 27 |
+
|
| 28 |
+
## Setup
|
| 29 |
+
To use the AI generation, you must set a `HF_TOKEN` in the Space's **Settings > Repository secrets**.
|
| 30 |
+
1. Go to your HF Profile > Settings > Access Tokens.
|
| 31 |
+
2. Create a generic token.
|
| 32 |
+
3. In this Space, go to Settings and add `HF_TOKEN` with your key.
|
__pycache__/app.cpython-313.pyc
ADDED
|
Binary file (3.76 kB). View file
|
|
|
__pycache__/generator.cpython-313.pyc
ADDED
|
Binary file (2.93 kB). View file
|
|
|
__pycache__/scraper.cpython-313.pyc
ADDED
|
Binary file (2.37 kB). View file
|
|
|
app.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from scraper import scrape_webpage
|
| 3 |
+
from generator import generate_redesigned_code
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
# Get token from environment variable or user input
|
| 7 |
+
# In Hugging Face Spaces, set HF_TOKEN in Settings > Secrets
|
| 8 |
+
DEFAULT_HF_TOKEN = os.getenv("HF_TOKEN")
|
| 9 |
+
|
| 10 |
+
def process_pipeline(url, style, framework, custom_token):
|
| 11 |
+
token = custom_token if custom_token else DEFAULT_HF_TOKEN
|
| 12 |
+
|
| 13 |
+
if not token and not DEFAULT_HF_TOKEN:
|
| 14 |
+
return "Please provide a Hugging Face Token to use the API.", ""
|
| 15 |
+
|
| 16 |
+
# 1. Scrape
|
| 17 |
+
scraped_data = scrape_webpage(url)
|
| 18 |
+
if "error" in scraped_data:
|
| 19 |
+
return f"Scraping Error: {scraped_data['error']}", ""
|
| 20 |
+
|
| 21 |
+
# 2. Generate
|
| 22 |
+
generated_code = generate_redesigned_code(scraped_data, style, framework, hf_token=token)
|
| 23 |
+
|
| 24 |
+
# 3. Extract HTML for preview (Simplistic approach: assumes the model returns a markdown code block)
|
| 25 |
+
# We strip ```html and ``` to get raw code for the preview iframe
|
| 26 |
+
clean_code = generated_code
|
| 27 |
+
if "```html" in generated_code:
|
| 28 |
+
parts = generated_code.split("```html")
|
| 29 |
+
if len(parts) > 1:
|
| 30 |
+
clean_code = parts[1].split("```")[0]
|
| 31 |
+
elif "```" in generated_code: # Generic block
|
| 32 |
+
parts = generated_code.split("```")
|
| 33 |
+
if len(parts) > 1:
|
| 34 |
+
clean_code = parts[1]
|
| 35 |
+
|
| 36 |
+
return generated_code, clean_code
|
| 37 |
+
|
| 38 |
+
# Define the UI
|
| 39 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 40 |
+
gr.Markdown("# 🎨 AI Web Scraper & Redesigner")
|
| 41 |
+
gr.Markdown("Turn any URL into a Modern, Stunning Website Design using AI.")
|
| 42 |
+
|
| 43 |
+
with gr.Row():
|
| 44 |
+
with gr.Column(scale=1):
|
| 45 |
+
url_input = gr.Textbox(label="Website URL", placeholder="https://example.com")
|
| 46 |
+
style_dropdown = gr.Dropdown(
|
| 47 |
+
["Modern & Clean", "Dark & Cyberpunk", "Vibrant & Playful", "Minimalist"],
|
| 48 |
+
label="Design Style",
|
| 49 |
+
value="Modern & Clean"
|
| 50 |
+
)
|
| 51 |
+
framework_dropdown = gr.Dropdown(
|
| 52 |
+
["HTML/CSS/JS (Single File)", "React (Component)", "Angular (Component)"],
|
| 53 |
+
label="Output Format",
|
| 54 |
+
value="HTML/CSS/JS (Single File)"
|
| 55 |
+
)
|
| 56 |
+
token_input = gr.Textbox(
|
| 57 |
+
label="HF API Token (Optional if set in Space Secrets)",
|
| 58 |
+
type="password",
|
| 59 |
+
placeholder="hf_..."
|
| 60 |
+
)
|
| 61 |
+
generate_btn = gr.Button("🚀 Redesign Website", variant="primary")
|
| 62 |
+
|
| 63 |
+
with gr.Column(scale=2):
|
| 64 |
+
# Output Display
|
| 65 |
+
with gr.Tabs():
|
| 66 |
+
with gr.TabItem("Preview"):
|
| 67 |
+
# The iframe preview
|
| 68 |
+
preview_html = gr.HTML(label="Live Preview")
|
| 69 |
+
with gr.TabItem("Code"):
|
| 70 |
+
code_output = gr.Code(language="html", label="Generated Code")
|
| 71 |
+
|
| 72 |
+
generate_btn.click(
|
| 73 |
+
fn=process_pipeline,
|
| 74 |
+
inputs=[url_input, style_dropdown, framework_dropdown, token_input],
|
| 75 |
+
outputs=[code_output, preview_html]
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
if __name__ == "__main__":
|
| 79 |
+
demo.launch()
|
generator.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from huggingface_hub import InferenceClient
|
| 3 |
+
|
| 4 |
+
# You can change the model here.
|
| 5 |
+
# "Qwen/Qwen2.5-Coder-32B-Instruct" is excellent for coding.
|
| 6 |
+
# "deepseek-ai/DeepSeek-R1-Distill-Llama-70B" is also great if available via API.
|
| 7 |
+
MODEL_ID = "Qwen/Qwen2.5-Coder-32B-Instruct"
|
| 8 |
+
|
| 9 |
+
def generate_redesigned_code(scraped_data, style_preference, framework, hf_token=None):
|
| 10 |
+
"""
|
| 11 |
+
Generates modern web component code based on scraping data.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
if "error" in scraped_data:
|
| 15 |
+
return f"Error in scraping: {scraped_data['error']}", "", ""
|
| 16 |
+
|
| 17 |
+
client = InferenceClient(model=MODEL_ID, token=hf_token)
|
| 18 |
+
|
| 19 |
+
content_summary = f"""
|
| 20 |
+
Website Title: {scraped_data.get('title')}
|
| 21 |
+
Key Content: {scraped_data.get('summary')}
|
| 22 |
+
Main Image: {scraped_data.get('top_image')}
|
| 23 |
+
Other Images: {', '.join(scraped_data.get('other_images', []))}
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
system_prompt = """You are an expert Frontend Developer and UI/UX Designer.
|
| 27 |
+
Your task is to REDESIGN a website content into a STUNNING, MODERN, PREMIUM web interface.
|
| 28 |
+
|
| 29 |
+
Rules:
|
| 30 |
+
1. Focus entirely on UI/UX, Typography, Colors, and Layout.
|
| 31 |
+
2. Make it look expensive (glassmorphism, subtle shadows, gradients, modern sans-serif fonts).
|
| 32 |
+
3. Use the provided content and images.
|
| 33 |
+
4. Return valid, working code.
|
| 34 |
+
5. If the user asks for a Framework (React/Angular), output components.
|
| 35 |
+
If they ask for HTML/CSS, output a single HTML file with embedded CSS/JS.
|
| 36 |
+
|
| 37 |
+
IMPORTANT: Provide the code in a single code block if it's HTML, or clearly separated if it's a framework.
|
| 38 |
+
For this request, please output a SINGLE HTML file containing everything (HTML, CSS in <style>, JS in <script>) so it can be previewed easily, unless strictly impossible.
|
| 39 |
+
"""
|
| 40 |
+
|
| 41 |
+
user_prompt = f"""
|
| 42 |
+
Redesign this content:
|
| 43 |
+
{content_summary}
|
| 44 |
+
|
| 45 |
+
Style Preference: {style_preference}
|
| 46 |
+
Target Framework: {framework} (If 'HTML/CSS/JS', keep it in one file).
|
| 47 |
+
|
| 48 |
+
The design must be responsive, use a modern color palette, and include hover effects.
|
| 49 |
+
Ensure the images provided are used if they exist.
|
| 50 |
+
"""
|
| 51 |
+
|
| 52 |
+
messages = [
|
| 53 |
+
{"role": "system", "content": system_prompt},
|
| 54 |
+
{"role": "user", "content": user_prompt}
|
| 55 |
+
]
|
| 56 |
+
|
| 57 |
+
try:
|
| 58 |
+
# Streaming is cool but for simplicity we'll just wait for the response
|
| 59 |
+
response = client.chat_completion(
|
| 60 |
+
messages=messages,
|
| 61 |
+
max_tokens=4000,
|
| 62 |
+
temperature=0.7
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
generated_text = response.choices[0].message.content
|
| 66 |
+
|
| 67 |
+
# Simple extraction of code block
|
| 68 |
+
# In a real app, we might want more robust parsing
|
| 69 |
+
return generated_text
|
| 70 |
+
|
| 71 |
+
except Exception as e:
|
| 72 |
+
return f"Error generating code: {str(e)}"
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
huggingface_hub
|
| 3 |
+
requests
|
| 4 |
+
beautifulsoup4
|
| 5 |
+
newspaper3k
|
| 6 |
+
lxml
|
scraper.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
from bs4 import BeautifulSoup
|
| 3 |
+
from newspaper import Article
|
| 4 |
+
import logging
|
| 5 |
+
|
| 6 |
+
# Configure logging
|
| 7 |
+
logging.basicConfig(level=logging.INFO)
|
| 8 |
+
logger = logging.getLogger(__name__)
|
| 9 |
+
|
| 10 |
+
def scrape_webpage(url):
|
| 11 |
+
"""
|
| 12 |
+
Scrapes a webpage to extract title, text content, and image URLs.
|
| 13 |
+
Uses 'newspaper3k' for article text and 'BeautifulSoup' for structure/images.
|
| 14 |
+
"""
|
| 15 |
+
try:
|
| 16 |
+
logger.info(f"Scraping URL: {url}")
|
| 17 |
+
|
| 18 |
+
# Method 1: Newspaper3k for clean text extraction
|
| 19 |
+
article = Article(url)
|
| 20 |
+
article.download()
|
| 21 |
+
article.parse()
|
| 22 |
+
|
| 23 |
+
title = article.title
|
| 24 |
+
text_content = article.text
|
| 25 |
+
top_image = article.top_image
|
| 26 |
+
|
| 27 |
+
# Method 2: BeautifulSoup for more structural details (colors, fonts - strictly heuristic)
|
| 28 |
+
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (HTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
|
| 29 |
+
response = requests.get(url, headers=headers, timeout=10)
|
| 30 |
+
response.raise_for_status()
|
| 31 |
+
|
| 32 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
| 33 |
+
|
| 34 |
+
# Extract basic color palette if possible (checking meta theme-color or style tags is hard, so we skip complex analysis for now)
|
| 35 |
+
# We just get all image sources to give the AI some assets to work with
|
| 36 |
+
images = [img['src'] for img in soup.find_all('img', src=True) if img['src'].startswith('http')]
|
| 37 |
+
|
| 38 |
+
# Limit text content to avoid token overflow
|
| 39 |
+
summary_text = text_content[:4000] if len(text_content) > 4000 else text_content
|
| 40 |
+
|
| 41 |
+
data = {
|
| 42 |
+
"title": title,
|
| 43 |
+
"summary": summary_text,
|
| 44 |
+
"top_image": top_image,
|
| 45 |
+
"other_images": images[:5], # top 5 images
|
| 46 |
+
"url": url
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
return data
|
| 50 |
+
|
| 51 |
+
except Exception as e:
|
| 52 |
+
logger.error(f"Error scraping {url}: {e}")
|
| 53 |
+
return {"error": str(e)}
|
| 54 |
+
|
| 55 |
+
if __name__ == "__main__":
|
| 56 |
+
# Test
|
| 57 |
+
print(scrape_webpage("https://huggingface.co/"))
|