Mithun-999 commited on
Commit
c946fc8
·
1 Parent(s): 4e04931

feat: Implement AI-powered web scraper and redesign application with Gradio UI and updated README.

Browse files
README.md CHANGED
@@ -1,13 +1,32 @@
1
  ---
2
- title: WEB UIux
3
- emoji: 📉
4
- colorFrom: yellow
5
- colorTo: blue
6
  sdk: gradio
7
- sdk_version: 6.2.0
8
  app_file: app.py
9
  pinned: false
10
- short_description: Redesiner
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: AI Web Redesigner
3
+ emoji: 🎨
4
+ colorFrom: purple
5
+ colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 4.0.0
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
+ # 🎨 AI Web Scraper & Redesigner
13
+
14
+ This Space allows you to input any URL, scrape its content, and use an Advanced AI Model (Qwen2.5-Coder-32B) to redesign it into a modern, stunning webpage.
15
+
16
+ ## How to Use
17
+ 1. Enter a URL (e.g., `https://example.com`)
18
+ 2. Select a Design Style (e.g., "Cyberpunk", "Minimalist")
19
+ 3. Select an Output Format (HTML/CSS/JS or Frameworks)
20
+ 4. (Optional) Enter your Hugging Face Token if you want to use your own quota, otherwise it uses the Space's secret.
21
+ 5. Click **Redesign**!
22
+
23
+ ## Features
24
+ - **Smart Scraping**: Extracts text and images from the target site.
25
+ - **AI Redesign**: Uses Large Language Models to write clean, modern code.
26
+ - **Live Preview**: See the generated website immediately in the browser.
27
+
28
+ ## Setup
29
+ To use the AI generation, you must set a `HF_TOKEN` in the Space's **Settings > Repository secrets**.
30
+ 1. Go to your HF Profile > Settings > Access Tokens.
31
+ 2. Create a generic token.
32
+ 3. In this Space, go to Settings and add `HF_TOKEN` with your key.
__pycache__/app.cpython-313.pyc ADDED
Binary file (3.76 kB). View file
 
__pycache__/generator.cpython-313.pyc ADDED
Binary file (2.93 kB). View file
 
__pycache__/scraper.cpython-313.pyc ADDED
Binary file (2.37 kB). View file
 
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from scraper import scrape_webpage
3
+ from generator import generate_redesigned_code
4
+ import os
5
+
6
+ # Get token from environment variable or user input
7
+ # In Hugging Face Spaces, set HF_TOKEN in Settings > Secrets
8
+ DEFAULT_HF_TOKEN = os.getenv("HF_TOKEN")
9
+
10
+ def process_pipeline(url, style, framework, custom_token):
11
+ token = custom_token if custom_token else DEFAULT_HF_TOKEN
12
+
13
+ if not token and not DEFAULT_HF_TOKEN:
14
+ return "Please provide a Hugging Face Token to use the API.", ""
15
+
16
+ # 1. Scrape
17
+ scraped_data = scrape_webpage(url)
18
+ if "error" in scraped_data:
19
+ return f"Scraping Error: {scraped_data['error']}", ""
20
+
21
+ # 2. Generate
22
+ generated_code = generate_redesigned_code(scraped_data, style, framework, hf_token=token)
23
+
24
+ # 3. Extract HTML for preview (Simplistic approach: assumes the model returns a markdown code block)
25
+ # We strip ```html and ``` to get raw code for the preview iframe
26
+ clean_code = generated_code
27
+ if "```html" in generated_code:
28
+ parts = generated_code.split("```html")
29
+ if len(parts) > 1:
30
+ clean_code = parts[1].split("```")[0]
31
+ elif "```" in generated_code: # Generic block
32
+ parts = generated_code.split("```")
33
+ if len(parts) > 1:
34
+ clean_code = parts[1]
35
+
36
+ return generated_code, clean_code
37
+
38
+ # Define the UI
39
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
40
+ gr.Markdown("# 🎨 AI Web Scraper & Redesigner")
41
+ gr.Markdown("Turn any URL into a Modern, Stunning Website Design using AI.")
42
+
43
+ with gr.Row():
44
+ with gr.Column(scale=1):
45
+ url_input = gr.Textbox(label="Website URL", placeholder="https://example.com")
46
+ style_dropdown = gr.Dropdown(
47
+ ["Modern & Clean", "Dark & Cyberpunk", "Vibrant & Playful", "Minimalist"],
48
+ label="Design Style",
49
+ value="Modern & Clean"
50
+ )
51
+ framework_dropdown = gr.Dropdown(
52
+ ["HTML/CSS/JS (Single File)", "React (Component)", "Angular (Component)"],
53
+ label="Output Format",
54
+ value="HTML/CSS/JS (Single File)"
55
+ )
56
+ token_input = gr.Textbox(
57
+ label="HF API Token (Optional if set in Space Secrets)",
58
+ type="password",
59
+ placeholder="hf_..."
60
+ )
61
+ generate_btn = gr.Button("🚀 Redesign Website", variant="primary")
62
+
63
+ with gr.Column(scale=2):
64
+ # Output Display
65
+ with gr.Tabs():
66
+ with gr.TabItem("Preview"):
67
+ # The iframe preview
68
+ preview_html = gr.HTML(label="Live Preview")
69
+ with gr.TabItem("Code"):
70
+ code_output = gr.Code(language="html", label="Generated Code")
71
+
72
+ generate_btn.click(
73
+ fn=process_pipeline,
74
+ inputs=[url_input, style_dropdown, framework_dropdown, token_input],
75
+ outputs=[code_output, preview_html]
76
+ )
77
+
78
+ if __name__ == "__main__":
79
+ demo.launch()
generator.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from huggingface_hub import InferenceClient
3
+
4
+ # You can change the model here.
5
+ # "Qwen/Qwen2.5-Coder-32B-Instruct" is excellent for coding.
6
+ # "deepseek-ai/DeepSeek-R1-Distill-Llama-70B" is also great if available via API.
7
+ MODEL_ID = "Qwen/Qwen2.5-Coder-32B-Instruct"
8
+
9
+ def generate_redesigned_code(scraped_data, style_preference, framework, hf_token=None):
10
+ """
11
+ Generates modern web component code based on scraping data.
12
+ """
13
+
14
+ if "error" in scraped_data:
15
+ return f"Error in scraping: {scraped_data['error']}", "", ""
16
+
17
+ client = InferenceClient(model=MODEL_ID, token=hf_token)
18
+
19
+ content_summary = f"""
20
+ Website Title: {scraped_data.get('title')}
21
+ Key Content: {scraped_data.get('summary')}
22
+ Main Image: {scraped_data.get('top_image')}
23
+ Other Images: {', '.join(scraped_data.get('other_images', []))}
24
+ """
25
+
26
+ system_prompt = """You are an expert Frontend Developer and UI/UX Designer.
27
+ Your task is to REDESIGN a website content into a STUNNING, MODERN, PREMIUM web interface.
28
+
29
+ Rules:
30
+ 1. Focus entirely on UI/UX, Typography, Colors, and Layout.
31
+ 2. Make it look expensive (glassmorphism, subtle shadows, gradients, modern sans-serif fonts).
32
+ 3. Use the provided content and images.
33
+ 4. Return valid, working code.
34
+ 5. If the user asks for a Framework (React/Angular), output components.
35
+ If they ask for HTML/CSS, output a single HTML file with embedded CSS/JS.
36
+
37
+ IMPORTANT: Provide the code in a single code block if it's HTML, or clearly separated if it's a framework.
38
+ For this request, please output a SINGLE HTML file containing everything (HTML, CSS in <style>, JS in <script>) so it can be previewed easily, unless strictly impossible.
39
+ """
40
+
41
+ user_prompt = f"""
42
+ Redesign this content:
43
+ {content_summary}
44
+
45
+ Style Preference: {style_preference}
46
+ Target Framework: {framework} (If 'HTML/CSS/JS', keep it in one file).
47
+
48
+ The design must be responsive, use a modern color palette, and include hover effects.
49
+ Ensure the images provided are used if they exist.
50
+ """
51
+
52
+ messages = [
53
+ {"role": "system", "content": system_prompt},
54
+ {"role": "user", "content": user_prompt}
55
+ ]
56
+
57
+ try:
58
+ # Streaming is cool but for simplicity we'll just wait for the response
59
+ response = client.chat_completion(
60
+ messages=messages,
61
+ max_tokens=4000,
62
+ temperature=0.7
63
+ )
64
+
65
+ generated_text = response.choices[0].message.content
66
+
67
+ # Simple extraction of code block
68
+ # In a real app, we might want more robust parsing
69
+ return generated_text
70
+
71
+ except Exception as e:
72
+ return f"Error generating code: {str(e)}"
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio
2
+ huggingface_hub
3
+ requests
4
+ beautifulsoup4
5
+ newspaper3k
6
+ lxml
scraper.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ from newspaper import Article
4
+ import logging
5
+
6
+ # Configure logging
7
+ logging.basicConfig(level=logging.INFO)
8
+ logger = logging.getLogger(__name__)
9
+
10
+ def scrape_webpage(url):
11
+ """
12
+ Scrapes a webpage to extract title, text content, and image URLs.
13
+ Uses 'newspaper3k' for article text and 'BeautifulSoup' for structure/images.
14
+ """
15
+ try:
16
+ logger.info(f"Scraping URL: {url}")
17
+
18
+ # Method 1: Newspaper3k for clean text extraction
19
+ article = Article(url)
20
+ article.download()
21
+ article.parse()
22
+
23
+ title = article.title
24
+ text_content = article.text
25
+ top_image = article.top_image
26
+
27
+ # Method 2: BeautifulSoup for more structural details (colors, fonts - strictly heuristic)
28
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (HTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
29
+ response = requests.get(url, headers=headers, timeout=10)
30
+ response.raise_for_status()
31
+
32
+ soup = BeautifulSoup(response.content, 'html.parser')
33
+
34
+ # Extract basic color palette if possible (checking meta theme-color or style tags is hard, so we skip complex analysis for now)
35
+ # We just get all image sources to give the AI some assets to work with
36
+ images = [img['src'] for img in soup.find_all('img', src=True) if img['src'].startswith('http')]
37
+
38
+ # Limit text content to avoid token overflow
39
+ summary_text = text_content[:4000] if len(text_content) > 4000 else text_content
40
+
41
+ data = {
42
+ "title": title,
43
+ "summary": summary_text,
44
+ "top_image": top_image,
45
+ "other_images": images[:5], # top 5 images
46
+ "url": url
47
+ }
48
+
49
+ return data
50
+
51
+ except Exception as e:
52
+ logger.error(f"Error scraping {url}: {e}")
53
+ return {"error": str(e)}
54
+
55
+ if __name__ == "__main__":
56
+ # Test
57
+ print(scrape_webpage("https://huggingface.co/"))