sheikhcoders commited on
Commit
f05894e
Β·
verified Β·
1 Parent(s): a0c7589

Flask-based interface - completely eliminates Gradio/OAuth issues

Browse files
Files changed (1) hide show
  1. app.py +235 -102
app.py CHANGED
@@ -1,10 +1,9 @@
1
  #!/usr/bin/env python3
2
  """
3
- Simple Browser Automation Tool for HuggingFace Spaces - Ultra Minimal
4
  """
5
 
6
- import gradio as gr
7
- import requests
8
  import time
9
  from selenium import webdriver
10
  from selenium.webdriver.chrome.options import Options
@@ -13,9 +12,9 @@ from selenium.webdriver.support.ui import WebDriverWait
13
  from selenium.webdriver.support import expected_conditions as EC
14
  import io
15
  from PIL import Image
 
16
 
17
- # Global driver storage
18
- active_drivers = {}
19
 
20
  def setup_driver(headless=True, window_size="1920,1080"):
21
  """Setup Chrome driver with options"""
@@ -31,19 +30,31 @@ def setup_driver(headless=True, window_size="1920,1080"):
31
  return driver
32
 
33
  def navigate_and_screenshot(url, headless=True, window_size="1920,1080"):
34
- """Navigate to URL and return screenshot"""
35
  try:
36
  driver = setup_driver(headless, window_size)
37
  driver.get(url)
38
- time.sleep(3) # Wait for page to load
39
 
40
- # Take screenshot
41
  screenshot = driver.get_screenshot_as_png()
42
  driver.quit()
43
 
44
- return screenshot
 
 
 
 
 
 
 
 
45
  except Exception as e:
46
- return f"Error: {str(e)}"
 
 
 
 
47
 
48
  def extract_text_content(url, headless=True):
49
  """Extract text content from URL"""
@@ -52,120 +63,242 @@ def extract_text_content(url, headless=True):
52
  driver.get(url)
53
  time.sleep(2)
54
 
55
- # Get page title
56
  title = driver.title
57
-
58
- # Get page source and extract text
59
  html = driver.page_source
60
 
61
- # Simple text extraction (remove HTML tags)
62
  from bs4 import BeautifulSoup
63
  soup = BeautifulSoup(html, 'html.parser')
64
  text = soup.get_text()[:1000] + "..." if len(soup.get_text()) > 1000 else soup.get_text()
65
 
66
  driver.quit()
67
 
68
- return f"Title: {title}\n\nContent:\n{text}"
 
 
 
 
 
69
  except Exception as e:
70
- return f"Error: {str(e)}"
 
 
 
 
71
 
72
- def batch_navigate(urls, headless=True):
73
- """Navigate to multiple URLs and return results"""
74
- results = []
75
- for i, url in enumerate(urls.split('\n')):
76
- url = url.strip()
77
- if not url:
78
- continue
79
-
80
- try:
81
- driver = setup_driver(headless)
82
- driver.get(url)
83
- time.sleep(2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
- # Get basic info
86
- title = driver.title
87
- current_url = driver.current_url
88
- screenshot = driver.get_screenshot_as_png()
89
 
90
- results.append({
91
- "url": url,
92
- "title": title,
93
- "current_url": current_url,
94
- "screenshot": screenshot
95
- })
96
 
97
- driver.quit()
98
- except Exception as e:
99
- results.append({
100
- "url": url,
101
- "error": str(e)
102
- })
103
-
104
- return results
105
-
106
- # Gradio Interface
107
- def main():
108
- with gr.Blocks(title="Browser Automation Tool") as demo:
109
- gr.Markdown("# 🌐 Browser Automation Tool - Build Fixed")
110
-
111
- with gr.Tab("Single URL"):
112
- with gr.Row():
113
- with gr.Column():
114
- url_input = gr.Textbox(label="URL to visit", placeholder="https://example.com")
115
- headless = gr.Checkbox(label="Headless mode", value=True)
116
- window_size = gr.Textbox(label="Window size", value="1920,1080")
117
 
118
- with gr.Column():
119
- navigate_btn = gr.Button("Navigate & Screenshot", variant="primary")
120
- extract_btn = gr.Button("Extract Content")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
- screenshot_output = gr.Image(label="Screenshot")
123
- content_output = gr.Textbox(label="Content", lines=10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
- with gr.Tab("Batch Processing"):
126
- with gr.Row():
127
- with gr.Column():
128
- urls_input = gr.Textbox(
129
- label="URLs (one per line)",
130
- placeholder="https://example.com\nhttps://google.com",
131
- lines=5
132
- )
133
- batch_headless = gr.Checkbox(label="Headless mode", value=True)
134
- batch_btn = gr.Button("Process URLs", variant="primary")
135
 
136
- with gr.Column():
137
- batch_results = gr.JSON(label="Results")
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
- # Button handlers
140
- navigate_btn.click(
141
- fn=navigate_and_screenshot,
142
- inputs=[url_input, headless, window_size],
143
- outputs=[screenshot_output]
144
- )
145
 
146
- extract_btn.click(
147
- fn=extract_text_content,
148
- inputs=[url_input, headless],
149
- outputs=[content_output]
150
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
- batch_btn.click(
153
- fn=batch_navigate,
154
- inputs=[urls_input, batch_headless],
155
- outputs=[batch_results]
156
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
- gr.Markdown("""
159
- ## Features
160
- - 🌐 **Web Browser Control**: Navigate websites programmatically
161
- - πŸ“Έ **Screenshot Capture**: Take screenshots of any webpage
162
- - πŸ” **Content Extraction**: Extract text content from HTML
163
- - ⚑ **Batch Processing**: Process multiple URLs at once
164
- - πŸ”§ **Configurable Options**: Headless mode, window sizes
165
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
- return demo
 
168
 
169
  if __name__ == "__main__":
170
- demo = main()
171
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  #!/usr/bin/env python3
2
  """
3
+ Browser Automation Tool - Flask Interface (No Gradio Dependencies)
4
  """
5
 
6
+ from flask import Flask, render_template_string, request, jsonify
 
7
  import time
8
  from selenium import webdriver
9
  from selenium.webdriver.chrome.options import Options
 
12
  from selenium.webdriver.support import expected_conditions as EC
13
  import io
14
  from PIL import Image
15
+ import base64
16
 
17
+ app = Flask(__name__)
 
18
 
19
  def setup_driver(headless=True, window_size="1920,1080"):
20
  """Setup Chrome driver with options"""
 
30
  return driver
31
 
32
  def navigate_and_screenshot(url, headless=True, window_size="1920,1080"):
33
+ """Navigate to URL and return screenshot as base64"""
34
  try:
35
  driver = setup_driver(headless, window_size)
36
  driver.get(url)
37
+ time.sleep(3)
38
 
39
+ # Take screenshot and convert to base64
40
  screenshot = driver.get_screenshot_as_png()
41
  driver.quit()
42
 
43
+ # Convert to base64 for web display
44
+ import base64
45
+ screenshot_b64 = base64.b64encode(screenshot).decode('utf-8')
46
+
47
+ return {
48
+ "success": True,
49
+ "screenshot": screenshot_b64,
50
+ "message": "Screenshot captured successfully"
51
+ }
52
  except Exception as e:
53
+ return {
54
+ "success": False,
55
+ "error": str(e),
56
+ "message": "Error capturing screenshot"
57
+ }
58
 
59
  def extract_text_content(url, headless=True):
60
  """Extract text content from URL"""
 
63
  driver.get(url)
64
  time.sleep(2)
65
 
 
66
  title = driver.title
 
 
67
  html = driver.page_source
68
 
69
+ # Simple text extraction
70
  from bs4 import BeautifulSoup
71
  soup = BeautifulSoup(html, 'html.parser')
72
  text = soup.get_text()[:1000] + "..." if len(soup.get_text()) > 1000 else soup.get_text()
73
 
74
  driver.quit()
75
 
76
+ return {
77
+ "success": True,
78
+ "title": title,
79
+ "content": text,
80
+ "message": "Content extracted successfully"
81
+ }
82
  except Exception as e:
83
+ return {
84
+ "success": False,
85
+ "error": str(e),
86
+ "message": "Error extracting content"
87
+ }
88
 
89
+ # HTML Template
90
+ HTML_TEMPLATE = '''
91
+ <!DOCTYPE html>
92
+ <html lang="en">
93
+ <head>
94
+ <meta charset="UTF-8">
95
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
96
+ <title>Browser Automation Tool - Flask</title>
97
+ <style>
98
+ body { font-family: Arial, sans-serif; margin: 20px; background: #f5f5f5; }
99
+ .container { max-width: 1200px; margin: 0 auto; background: white; padding: 20px; border-radius: 10px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); }
100
+ .header { text-align: center; color: #333; margin-bottom: 30px; }
101
+ .form-section { margin: 20px 0; padding: 20px; border: 1px solid #ddd; border-radius: 5px; }
102
+ .form-group { margin: 10px 0; }
103
+ label { display: block; margin-bottom: 5px; font-weight: bold; }
104
+ input[type="text"], textarea { width: 100%; padding: 10px; border: 1px solid #ccc; border-radius: 5px; }
105
+ button { background: #007bff; color: white; padding: 10px 20px; border: none; border-radius: 5px; cursor: pointer; margin: 5px; }
106
+ button:hover { background: #0056b3; }
107
+ .result { margin: 20px 0; padding: 15px; border-radius: 5px; }
108
+ .success { background: #d4edda; color: #155724; border: 1px solid #c3e6cb; }
109
+ .error { background: #f8d7da; color: #721c24; border: 1px solid #f5c6cb; }
110
+ .screenshot { max-width: 100%; height: auto; border: 1px solid #ddd; border-radius: 5px; }
111
+ .tabs { margin: 20px 0; }
112
+ .tab-content { display: none; }
113
+ .tab-content.active { display: block; }
114
+ .tab-buttons { margin-bottom: 20px; }
115
+ .tab-buttons button { margin-right: 10px; }
116
+ pre { background: #f8f9fa; padding: 10px; border-radius: 5px; overflow-x: auto; }
117
+ </style>
118
+ <script>
119
+ async function captureScreenshot() {
120
+ const url = document.getElementById('url').value;
121
+ const headless = document.getElementById('headless').checked;
122
 
123
+ if (!url) {
124
+ alert('Please enter a URL');
125
+ return;
126
+ }
127
 
128
+ const button = event.target;
129
+ button.disabled = true;
130
+ button.textContent = 'Capturing...';
 
 
 
131
 
132
+ try {
133
+ const response = await fetch('/screenshot', {
134
+ method: 'POST',
135
+ headers: {'Content-Type': 'application/json'},
136
+ body: JSON.stringify({url: url, headless: headless})
137
+ });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
+ const result = await response.json();
140
+ displayResult('screenshot-result', result);
141
+
142
+ } catch (error) {
143
+ displayError('screenshot-result', 'Error: ' + error.message);
144
+ } finally {
145
+ button.disabled = false;
146
+ button.textContent = 'Capture Screenshot';
147
+ }
148
+ }
149
+
150
+ async function extractContent() {
151
+ const url = document.getElementById('url').value;
152
+ const headless = document.getElementById('headless').checked;
153
+
154
+ if (!url) {
155
+ alert('Please enter a URL');
156
+ return;
157
+ }
158
 
159
+ const button = event.target;
160
+ button.disabled = true;
161
+ button.textContent = 'Extracting...';
162
+
163
+ try {
164
+ const response = await fetch('/extract', {
165
+ method: 'POST',
166
+ headers: {'Content-Type': 'application/json'},
167
+ body: JSON.stringify({url: url, headless: headless})
168
+ });
169
+
170
+ const result = await response.json();
171
+ displayResult('content-result', result);
172
+
173
+ } catch (error) {
174
+ displayError('content-result', 'Error: ' + error.message);
175
+ } finally {
176
+ button.disabled = false;
177
+ button.textContent = 'Extract Content';
178
+ }
179
+ }
180
 
181
+ function displayResult(elementId, result) {
182
+ const element = document.getElementById(elementId);
183
+ if (result.success) {
184
+ element.className = 'result success';
185
+ element.innerHTML = result.message;
 
 
 
 
 
186
 
187
+ if (result.screenshot) {
188
+ element.innerHTML += '<br><br><img src="data:image/png;base64,' + result.screenshot + '" class="screenshot" alt="Screenshot">';
189
+ }
190
+ if (result.title) {
191
+ element.innerHTML += '<br><br><strong>Title:</strong> ' + result.title;
192
+ }
193
+ if (result.content) {
194
+ element.innerHTML += '<br><br><strong>Content:</strong><br><pre>' + result.content + '</pre>';
195
+ }
196
+ } else {
197
+ element.className = 'result error';
198
+ element.innerHTML = result.message + (result.error ? '<br><strong>Error:</strong> ' + result.error : '');
199
+ }
200
+ element.style.display = 'block';
201
+ }
202
 
203
+ function displayError(elementId, message) {
204
+ const element = document.getElementById(elementId);
205
+ element.className = 'result error';
206
+ element.innerHTML = message;
207
+ element.style.display = 'block';
208
+ }
209
 
210
+ function showTab(tabName) {
211
+ // Hide all tab contents
212
+ const tabContents = document.querySelectorAll('.tab-content');
213
+ tabContents.forEach(tab => tab.classList.remove('active'));
214
+
215
+ // Remove active class from all buttons
216
+ const buttons = document.querySelectorAll('.tab-buttons button');
217
+ buttons.forEach(btn => btn.classList.remove('active'));
218
+
219
+ // Show selected tab
220
+ document.getElementById(tabName).classList.add('active');
221
+ event.target.classList.add('active');
222
+ }
223
+ </script>
224
+ </head>
225
+ <body>
226
+ <div class="container">
227
+ <div class="header">
228
+ <h1>🌐 Browser Automation Tool - Flask</h1>
229
+ <p>Simple web interface for browser automation (No Gradio dependencies!)</p>
230
+ </div>
231
 
232
+ <div class="tabs">
233
+ <div class="tab-buttons">
234
+ <button onclick="showTab('single-url')" class="active">Single URL</button>
235
+ <button onclick="showTab('batch-processing')">Batch Processing</button>
236
+ </div>
237
+
238
+ <div id="single-url" class="tab-content active">
239
+ <div class="form-section">
240
+ <h3>🌐 Single URL Processing</h3>
241
+ <div class="form-group">
242
+ <label for="url">URL to visit:</label>
243
+ <input type="text" id="url" placeholder="https://example.com" value="https://example.com">
244
+ </div>
245
+ <div class="form-group">
246
+ <label>
247
+ <input type="checkbox" id="headless" checked> Headless mode
248
+ </label>
249
+ </div>
250
+ <button onclick="captureScreenshot()">πŸ“Έ Capture Screenshot</button>
251
+ <button onclick="extractContent()">πŸ” Extract Content</button>
252
+ </div>
253
+
254
+ <div id="screenshot-result" class="result" style="display:none;"></div>
255
+ <div id="content-result" class="result" style="display:none;"></div>
256
+ </div>
257
+
258
+ <div id="batch-processing" class="tab-content">
259
+ <div class="form-section">
260
+ <h3>⚑ Batch Processing</h3>
261
+ <p>Feature coming soon...</p>
262
+ </div>
263
+ </div>
264
+ </div>
265
 
266
+ <div class="form-section">
267
+ <h3>✨ Features</h3>
268
+ <ul>
269
+ <li>🌐 <strong>Web Browser Control</strong>: Navigate websites programmatically</li>
270
+ <li>πŸ“Έ <strong>Screenshot Capture</strong>: Take screenshots of any webpage</li>
271
+ <li>πŸ” <strong>Content Extraction</strong>: Extract text content from HTML</li>
272
+ <li>πŸ”§ <strong>No Gradio Dependencies</strong>: Pure Flask approach</li>
273
+ <li>⚑ <strong>Fast & Lightweight</strong>: Minimal dependencies</li>
274
+ </ul>
275
+ </div>
276
+ </div>
277
+ </body>
278
+ </html>
279
+ '''
280
+
281
+ @app.route('/')
282
+ def home():
283
+ return render_template_string(HTML_TEMPLATE)
284
+
285
+ @app.route('/screenshot', methods=['POST'])
286
+ def screenshot():
287
+ data = request.json
288
+ url = data.get('url', '')
289
+ headless = data.get('headless', True)
290
+
291
+ result = navigate_and_screenshot(url, headless)
292
+ return jsonify(result)
293
+
294
+ @app.route('/extract', methods=['POST'])
295
+ def extract():
296
+ data = request.json
297
+ url = data.get('url', '')
298
+ headless = data.get('headless', True)
299
 
300
+ result = extract_text_content(url, headless)
301
+ return jsonify(result)
302
 
303
  if __name__ == "__main__":
304
+ app.run(host="0.0.0.0", port=7860, debug=False)