|
|
|
|
|
""" |
|
|
Browser Automation Tool - Flask Interface (No Gradio Dependencies) |
|
|
""" |
|
|
|
|
|
from flask import Flask, render_template_string, request, jsonify |
|
|
import time |
|
|
from selenium import webdriver |
|
|
from selenium.webdriver.chrome.options import Options |
|
|
from selenium.webdriver.common.by import By |
|
|
from selenium.webdriver.support.ui import WebDriverWait |
|
|
from selenium.webdriver.support import expected_conditions as EC |
|
|
import io |
|
|
from PIL import Image |
|
|
import base64 |
|
|
|
|
|
app = Flask(__name__) |
|
|
|
|
|
def setup_driver(headless=True, window_size="1920,1080"): |
|
|
"""Setup Chrome driver with options""" |
|
|
chrome_options = Options() |
|
|
if headless: |
|
|
chrome_options.add_argument("--headless") |
|
|
chrome_options.add_argument("--no-sandbox") |
|
|
chrome_options.add_argument("--disable-dev-shm-usage") |
|
|
chrome_options.add_argument(f"--window-size={window_size}") |
|
|
chrome_options.add_argument("--disable-gpu") |
|
|
|
|
|
driver = webdriver.Chrome(options=chrome_options) |
|
|
return driver |
|
|
|
|
|
def navigate_and_screenshot(url, headless=True, window_size="1920,1080"): |
|
|
"""Navigate to URL and return screenshot as base64""" |
|
|
try: |
|
|
driver = setup_driver(headless, window_size) |
|
|
driver.get(url) |
|
|
time.sleep(3) |
|
|
|
|
|
|
|
|
screenshot = driver.get_screenshot_as_png() |
|
|
driver.quit() |
|
|
|
|
|
|
|
|
import base64 |
|
|
screenshot_b64 = base64.b64encode(screenshot).decode('utf-8') |
|
|
|
|
|
return { |
|
|
"success": True, |
|
|
"screenshot": screenshot_b64, |
|
|
"message": "Screenshot captured successfully" |
|
|
} |
|
|
except Exception as e: |
|
|
return { |
|
|
"success": False, |
|
|
"error": str(e), |
|
|
"message": "Error capturing screenshot" |
|
|
} |
|
|
|
|
|
def extract_text_content(url, headless=True): |
|
|
"""Extract text content from URL""" |
|
|
try: |
|
|
driver = setup_driver(headless) |
|
|
driver.get(url) |
|
|
time.sleep(2) |
|
|
|
|
|
title = driver.title |
|
|
html = driver.page_source |
|
|
|
|
|
|
|
|
from bs4 import BeautifulSoup |
|
|
soup = BeautifulSoup(html, 'html.parser') |
|
|
text = soup.get_text()[:1000] + "..." if len(soup.get_text()) > 1000 else soup.get_text() |
|
|
|
|
|
driver.quit() |
|
|
|
|
|
return { |
|
|
"success": True, |
|
|
"title": title, |
|
|
"content": text, |
|
|
"message": "Content extracted successfully" |
|
|
} |
|
|
except Exception as e: |
|
|
return { |
|
|
"success": False, |
|
|
"error": str(e), |
|
|
"message": "Error extracting content" |
|
|
} |
|
|
|
|
|
|
|
|
HTML_TEMPLATE = ''' |
|
|
<!DOCTYPE html> |
|
|
<html lang="en"> |
|
|
<head> |
|
|
<meta charset="UTF-8"> |
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
|
<title>Browser Automation Tool - Flask</title> |
|
|
<style> |
|
|
body { font-family: Arial, sans-serif; margin: 20px; background: #f5f5f5; } |
|
|
.container { max-width: 1200px; margin: 0 auto; background: white; padding: 20px; border-radius: 10px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); } |
|
|
.header { text-align: center; color: #333; margin-bottom: 30px; } |
|
|
.form-section { margin: 20px 0; padding: 20px; border: 1px solid #ddd; border-radius: 5px; } |
|
|
.form-group { margin: 10px 0; } |
|
|
label { display: block; margin-bottom: 5px; font-weight: bold; } |
|
|
input[type="text"], textarea { width: 100%; padding: 10px; border: 1px solid #ccc; border-radius: 5px; } |
|
|
button { background: #007bff; color: white; padding: 10px 20px; border: none; border-radius: 5px; cursor: pointer; margin: 5px; } |
|
|
button:hover { background: #0056b3; } |
|
|
.result { margin: 20px 0; padding: 15px; border-radius: 5px; } |
|
|
.success { background: #d4edda; color: #155724; border: 1px solid #c3e6cb; } |
|
|
.error { background: #f8d7da; color: #721c24; border: 1px solid #f5c6cb; } |
|
|
.screenshot { max-width: 100%; height: auto; border: 1px solid #ddd; border-radius: 5px; } |
|
|
.tabs { margin: 20px 0; } |
|
|
.tab-content { display: none; } |
|
|
.tab-content.active { display: block; } |
|
|
.tab-buttons { margin-bottom: 20px; } |
|
|
.tab-buttons button { margin-right: 10px; } |
|
|
pre { background: #f8f9fa; padding: 10px; border-radius: 5px; overflow-x: auto; } |
|
|
</style> |
|
|
<script> |
|
|
async function captureScreenshot() { |
|
|
const url = document.getElementById('url').value; |
|
|
const headless = document.getElementById('headless').checked; |
|
|
|
|
|
if (!url) { |
|
|
alert('Please enter a URL'); |
|
|
return; |
|
|
} |
|
|
|
|
|
const button = event.target; |
|
|
button.disabled = true; |
|
|
button.textContent = 'Capturing...'; |
|
|
|
|
|
try { |
|
|
const response = await fetch('/screenshot', { |
|
|
method: 'POST', |
|
|
headers: {'Content-Type': 'application/json'}, |
|
|
body: JSON.stringify({url: url, headless: headless}) |
|
|
}); |
|
|
|
|
|
const result = await response.json(); |
|
|
displayResult('screenshot-result', result); |
|
|
|
|
|
} catch (error) { |
|
|
displayError('screenshot-result', 'Error: ' + error.message); |
|
|
} finally { |
|
|
button.disabled = false; |
|
|
button.textContent = 'Capture Screenshot'; |
|
|
} |
|
|
} |
|
|
|
|
|
async function extractContent() { |
|
|
const url = document.getElementById('url').value; |
|
|
const headless = document.getElementById('headless').checked; |
|
|
|
|
|
if (!url) { |
|
|
alert('Please enter a URL'); |
|
|
return; |
|
|
} |
|
|
|
|
|
const button = event.target; |
|
|
button.disabled = true; |
|
|
button.textContent = 'Extracting...'; |
|
|
|
|
|
try { |
|
|
const response = await fetch('/extract', { |
|
|
method: 'POST', |
|
|
headers: {'Content-Type': 'application/json'}, |
|
|
body: JSON.stringify({url: url, headless: headless}) |
|
|
}); |
|
|
|
|
|
const result = await response.json(); |
|
|
displayResult('content-result', result); |
|
|
|
|
|
} catch (error) { |
|
|
displayError('content-result', 'Error: ' + error.message); |
|
|
} finally { |
|
|
button.disabled = false; |
|
|
button.textContent = 'Extract Content'; |
|
|
} |
|
|
} |
|
|
|
|
|
function displayResult(elementId, result) { |
|
|
const element = document.getElementById(elementId); |
|
|
if (result.success) { |
|
|
element.className = 'result success'; |
|
|
element.innerHTML = result.message; |
|
|
|
|
|
if (result.screenshot) { |
|
|
element.innerHTML += '<br><br><img src="data:image/png;base64,' + result.screenshot + '" class="screenshot" alt="Screenshot">'; |
|
|
} |
|
|
if (result.title) { |
|
|
element.innerHTML += '<br><br><strong>Title:</strong> ' + result.title; |
|
|
} |
|
|
if (result.content) { |
|
|
element.innerHTML += '<br><br><strong>Content:</strong><br><pre>' + result.content + '</pre>'; |
|
|
} |
|
|
} else { |
|
|
element.className = 'result error'; |
|
|
element.innerHTML = result.message + (result.error ? '<br><strong>Error:</strong> ' + result.error : ''); |
|
|
} |
|
|
element.style.display = 'block'; |
|
|
} |
|
|
|
|
|
function displayError(elementId, message) { |
|
|
const element = document.getElementById(elementId); |
|
|
element.className = 'result error'; |
|
|
element.innerHTML = message; |
|
|
element.style.display = 'block'; |
|
|
} |
|
|
|
|
|
function showTab(tabName) { |
|
|
// Hide all tab contents |
|
|
const tabContents = document.querySelectorAll('.tab-content'); |
|
|
tabContents.forEach(tab => tab.classList.remove('active')); |
|
|
|
|
|
// Remove active class from all buttons |
|
|
const buttons = document.querySelectorAll('.tab-buttons button'); |
|
|
buttons.forEach(btn => btn.classList.remove('active')); |
|
|
|
|
|
// Show selected tab |
|
|
document.getElementById(tabName).classList.add('active'); |
|
|
event.target.classList.add('active'); |
|
|
} |
|
|
</script> |
|
|
</head> |
|
|
<body> |
|
|
<div class="container"> |
|
|
<div class="header"> |
|
|
<h1>π Browser Automation Tool - Flask</h1> |
|
|
<p>Simple web interface for browser automation (No Gradio dependencies!)</p> |
|
|
</div> |
|
|
|
|
|
<div class="tabs"> |
|
|
<div class="tab-buttons"> |
|
|
<button onclick="showTab('single-url')" class="active">Single URL</button> |
|
|
<button onclick="showTab('batch-processing')">Batch Processing</button> |
|
|
</div> |
|
|
|
|
|
<div id="single-url" class="tab-content active"> |
|
|
<div class="form-section"> |
|
|
<h3>π Single URL Processing</h3> |
|
|
<div class="form-group"> |
|
|
<label for="url">URL to visit:</label> |
|
|
<input type="text" id="url" placeholder="https://example.com" value="https://example.com"> |
|
|
</div> |
|
|
<div class="form-group"> |
|
|
<label> |
|
|
<input type="checkbox" id="headless" checked> Headless mode |
|
|
</label> |
|
|
</div> |
|
|
<button onclick="captureScreenshot()">πΈ Capture Screenshot</button> |
|
|
<button onclick="extractContent()">π Extract Content</button> |
|
|
</div> |
|
|
|
|
|
<div id="screenshot-result" class="result" style="display:none;"></div> |
|
|
<div id="content-result" class="result" style="display:none;"></div> |
|
|
</div> |
|
|
|
|
|
<div id="batch-processing" class="tab-content"> |
|
|
<div class="form-section"> |
|
|
<h3>β‘ Batch Processing</h3> |
|
|
<p>Feature coming soon...</p> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<div class="form-section"> |
|
|
<h3>β¨ Features</h3> |
|
|
<ul> |
|
|
<li>π <strong>Web Browser Control</strong>: Navigate websites programmatically</li> |
|
|
<li>πΈ <strong>Screenshot Capture</strong>: Take screenshots of any webpage</li> |
|
|
<li>π <strong>Content Extraction</strong>: Extract text content from HTML</li> |
|
|
<li>π§ <strong>No Gradio Dependencies</strong>: Pure Flask approach</li> |
|
|
<li>β‘ <strong>Fast & Lightweight</strong>: Minimal dependencies</li> |
|
|
</ul> |
|
|
</div> |
|
|
</div> |
|
|
</body> |
|
|
</html> |
|
|
''' |
|
|
|
|
|
@app.route('/') |
|
|
def home(): |
|
|
return render_template_string(HTML_TEMPLATE) |
|
|
|
|
|
@app.route('/screenshot', methods=['POST']) |
|
|
def screenshot(): |
|
|
data = request.json |
|
|
url = data.get('url', '') |
|
|
headless = data.get('headless', True) |
|
|
|
|
|
result = navigate_and_screenshot(url, headless) |
|
|
return jsonify(result) |
|
|
|
|
|
@app.route('/extract', methods=['POST']) |
|
|
def extract(): |
|
|
data = request.json |
|
|
url = data.get('url', '') |
|
|
headless = data.get('headless', True) |
|
|
|
|
|
result = extract_text_content(url, headless) |
|
|
return jsonify(result) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
app.run(host="0.0.0.0", port=7860, debug=False) |