maylinejix commited on
Commit
9d8ea9a
·
verified ·
1 Parent(s): dba803c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -0
app.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, send_from_directory
2
+ import undetected_chromedriver as uc
3
+ from selenium.webdriver.common.by import By
4
+ from selenium.webdriver.support.ui import WebDriverWait
5
+ from selenium.webdriver.support import expected_conditions as EC
6
+ import os
7
+ import time
8
+ import base64
9
+ from datetime import datetime
10
+
11
+ app = Flask(__name__)
12
+ PUBLIC_DIR = 'public'
13
+ os.makedirs(PUBLIC_DIR, exist_ok=True)
14
+
15
+ def get_driver():
16
+ options = uc.ChromeOptions()
17
+ options.add_argument('--headless')
18
+ options.add_argument('--no-sandbox')
19
+ options.add_argument('--disable-dev-shm-usage')
20
+ options.add_argument('--disable-gpu')
21
+ return uc.Chrome(options=options, use_subprocess=False)
22
+
23
+ @app.route('/')
24
+ def index():
25
+ return jsonify({
26
+ 'message': 'Undetected Chrome Scraper API is running',
27
+ 'endpoints': {
28
+ 'POST /api/scrape': 'Get HTML content from URL',
29
+ 'POST /api/execute': 'Execute Python code with Selenium'
30
+ }
31
+ })
32
+
33
+ @app.route('/api/scrape', methods=['POST'])
34
+ def scrape():
35
+ data = request.get_json()
36
+ url = data.get('url')
37
+ wait_time = data.get('wait', 3)
38
+ screenshot = data.get('screenshot', False)
39
+
40
+ if not url:
41
+ return jsonify({'success': False, 'error': 'URL is required'}), 400
42
+
43
+ driver = None
44
+ try:
45
+ driver = get_driver()
46
+ driver.get(url)
47
+ time.sleep(wait_time)
48
+
49
+ html = driver.page_source
50
+ title = driver.title
51
+ current_url = driver.current_url
52
+
53
+ result = {
54
+ 'success': True,
55
+ 'data': {
56
+ 'html': html,
57
+ 'title': title,
58
+ 'url': current_url,
59
+ 'timestamp': datetime.now().isoformat()
60
+ }
61
+ }
62
+
63
+ if screenshot:
64
+ filename = f'screenshot-{int(time.time())}.png'
65
+ filepath = os.path.join(PUBLIC_DIR, filename)
66
+ driver.save_screenshot(filepath)
67
+ result['data']['screenshot'] = f"{request.host_url}files/{filename}"
68
+
69
+ return jsonify(result)
70
+
71
+ except Exception as e:
72
+ return jsonify({'success': False, 'error': str(e)}), 500
73
+ finally:
74
+ if driver:
75
+ driver.quit()
76
+
77
+ @app.route('/api/execute', methods=['POST'])
78
+ def execute():
79
+ data = request.get_json()
80
+ code = data.get('code')
81
+
82
+ if not code:
83
+ return jsonify({'success': False, 'error': 'Code is required'}), 400
84
+
85
+ driver = None
86
+ try:
87
+ driver = get_driver()
88
+
89
+ local_vars = {
90
+ 'driver': driver,
91
+ 'By': By,
92
+ 'WebDriverWait': WebDriverWait,
93
+ 'EC': EC,
94
+ 'time': time,
95
+ 'PUBLIC_DIR': PUBLIC_DIR,
96
+ 'os': os
97
+ }
98
+
99
+ exec(code, {}, local_vars)
100
+
101
+ result = local_vars.get('result', None)
102
+
103
+ files = []
104
+ if os.path.exists(PUBLIC_DIR):
105
+ for f in os.listdir(PUBLIC_DIR):
106
+ if f.startswith('screenshot-') and f.endswith('.png'):
107
+ files.append({
108
+ 'name': f,
109
+ 'publicURL': f"{request.host_url}files/{f}"
110
+ })
111
+
112
+ return jsonify({
113
+ 'success': True,
114
+ 'data': {
115
+ 'result': result,
116
+ 'files': files,
117
+ 'timestamp': int(time.time())
118
+ }
119
+ })
120
+
121
+ except Exception as e:
122
+ return jsonify({
123
+ 'success': False,
124
+ 'error': str(e)
125
+ }), 500
126
+ finally:
127
+ if driver:
128
+ driver.quit()
129
+
130
+ @app.route('/files/<path:filename>')
131
+ def serve_file(filename):
132
+ return send_from_directory(PUBLIC_DIR, filename)
133
+
134
+ if __name__ == '__main__':
135
+ app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860)))