File size: 11,528 Bytes
5492f08
20a3b47
909eff0
 
e1d36ec
305d870
5492f08
ed64977
4b62bbf
d5f5403
4b62bbf
909eff0
4b62bbf
909eff0
 
 
 
 
4b62bbf
909eff0
 
4b62bbf
909eff0
 
d5f5403
909eff0
 
e1d36ec
4b62bbf
 
 
 
 
 
 
e1d36ec
4b62bbf
 
909eff0
 
1f75665
4b62bbf
1f75665
4b62bbf
909eff0
e1d36ec
909eff0
d5f5403
e1d36ec
909eff0
 
e1d36ec
4b62bbf
 
 
 
 
 
 
 
 
 
 
1f75665
4b62bbf
e1d36ec
4b62bbf
 
909eff0
e1d36ec
1f75665
4b62bbf
1f75665
e1d36ec
 
 
d5f5403
e1d36ec
 
 
 
 
 
5c995e9
e1d36ec
 
 
 
 
4b62bbf
 
e1d36ec
4b62bbf
e1d36ec
4b62bbf
 
e1d36ec
909eff0
1f75665
4b62bbf
1f75665
909eff0
 
e1d36ec
909eff0
d5f5403
909eff0
e1d36ec
4b62bbf
 
 
1f75665
4b62bbf
e1d36ec
4b62bbf
 
909eff0
 
1f75665
4b62bbf
1f75665
909eff0
 
e1d36ec
d5f5403
909eff0
e1d36ec
4b62bbf
 
 
1f75665
4b62bbf
e1d36ec
4b62bbf
 
909eff0
 
4b62bbf
909eff0
4b62bbf
5cda5d3
4b62bbf
5cda5d3
4b62bbf
909eff0
e1d36ec
 
67163c5
909eff0
d5f5403
909eff0
 
4b62bbf
e1d36ec
4b62bbf
55f950b
 
4b62bbf
 
cce5650
55f950b
e1d36ec
909eff0
 
1f75665
e1d36ec
 
4b62bbf
 
 
 
 
 
 
 
 
 
1f75665
4b62bbf
 
 
 
909eff0
4b62bbf
 
1f75665
4b62bbf
1f75665
 
4b62bbf
e1d36ec
4b62bbf
758e870
4b62bbf
758e870
4b62bbf
ed64977
4b62bbf
909eff0
4b62bbf
 
 
 
 
909eff0
 
4b62bbf
909eff0
4b62bbf
 
b1c3f6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fec291d
4b62bbf
 
 
1f75665
4b62bbf
 
 
909eff0
b1c3f6a
4b62bbf
 
 
909eff0
b1c3f6a
4b62bbf
 
 
 
 
 
 
 
 
 
 
 
 
 
b1c3f6a
 
 
 
 
 
 
 
 
 
 
 
 
 
4b62bbf
 
 
b1c3f6a
4b62bbf
 
 
 
 
b1c3f6a
 
 
 
 
 
 
 
4b62bbf
b1c3f6a
 
 
 
 
 
 
 
 
 
 
 
 
 
fddc2d3
b1c3f6a
 
 
 
 
 
 
4b62bbf
 
 
 
 
b1c3f6a
4b62bbf
 
 
 
5492f08
909eff0
e1d36ec
909eff0
5492f08
b1c3f6a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
from flask import Flask, request, jsonify
import requests
import random
import string
import time

app = Flask(__name__)

# Authorization value used in requests (update with a valid Authorization)
TOKEN = "Bearer bp_pat_vTuxol25N0ymBpYaWqtWpFfGPKt260IfT784"

# -------------------------------------------------------------------
# Helper functions
# -------------------------------------------------------------------
def generate_random_name(length=5):
    return ''.join(random.choices(string.ascii_letters, k=length))

# -------------------------------------------------------------------
# Workspace and Bot management
# -------------------------------------------------------------------
def create_workspace():
    url = "https://api.botpress.cloud/v1/admin/workspaces"
    headers = {
        "User-Agent": "Mozilla/5.0",
        "Authorization": TOKEN
    }
    payload = {"name": generate_random_name()}
    try:
        r = requests.post(url, headers=headers, json=payload, timeout=60)
        if r.status_code == 200:
            ws_id = r.json().get("id")
            if ws_id:
                print(f"Workspace created: {ws_id}")
                return ws_id
        print(f"Workspace creation failed: {r.status_code} {r.text}")
    except Exception as e:
        print(f"Error creating workspace: {e}")
    return None

def create_bot(workspace_id):
    if not workspace_id:
        print("No workspace_id provided for bot creation")
        return None
    url = "https://api.botpress.cloud/v1/admin/bots"
    headers = {
        "User-Agent": "Mozilla/5.0",
        "x-workspace-id": workspace_id,
        "Authorization": TOKEN,
        "Content-Type": "application/json"
    }
    payload = {"name": generate_random_name()}
    try:
        r = requests.post(url, headers=headers, json=payload, timeout=60)
        if r.status_code == 200:
            bot_id = r.json().get("bot", {}).get("id")
            if bot_id:
                print(f"Bot created: {bot_id} in workspace: {workspace_id}")
                # Optionally install integration (same as original)
                installed = install_bot_integration(bot_id, workspace_id)
                if installed:
                    print(f"Integration installed for bot {bot_id}")
                else:
                    print(f"Integration install failed for bot {bot_id} (continuing)")
                return bot_id
        print(f"Bot creation failed: {r.status_code} {r.text}")
    except Exception as e:
        print(f"Error creating bot: {e}")
    return None

def install_bot_integration(bot_id, workspace_id):
    if not bot_id or not workspace_id:
        print("Missing bot_id or workspace_id for integration")
        return False
    url = f"https://api.botpress.cloud/v1/admin/bots/{bot_id}"
    headers = {
        "User-Agent": "Mozilla/5.0",
        "Authorization": TOKEN,
        "Content-Type": "application/json",
        "x-bot-id": bot_id,
        "x-workspace-id": workspace_id
    }
    payload = {
        "integrations": {
            "intver_01JZ6J0NKYBXC0V6K5DBDZKKDK": {
                "enabled": True
            }
        }
    }
    try:
        r = requests.put(url, headers=headers, json=payload, timeout=60)
        if r.status_code == 200:
            return True
        print(f"Integration install failed: {r.status_code} {r.text}")
    except Exception as e:
        print(f"Error installing integration: {e}")
    return False

def delete_bot(bot_id, workspace_id):
    if not bot_id or not workspace_id:
        print("Missing bot_id or workspace_id for bot deletion")
        return False
    url = f"https://api.botpress.cloud/v1/admin/bots/{bot_id}"
    headers = {
        "User-Agent": "Mozilla/5.0",
        "x-workspace-id": workspace_id,
        "Authorization": TOKEN
    }
    try:
        r = requests.delete(url, headers=headers, timeout=60)
        if r.status_code in [200, 204]:
            print(f"Bot deleted: {bot_id}")
            return True
        print(f"Bot deletion failed: {r.status_code} {r.text}")
    except Exception as e:
        print(f"Error deleting bot: {e}")
    return False

def delete_workspace(workspace_id):
    if not workspace_id:
        print("Missing workspace_id for workspace deletion")
        return False
    url = f"https://api.botpress.cloud/v1/admin/workspaces/{workspace_id}"
    headers = {
        "User-Agent": "Mozilla/5.0",
        "Authorization": TOKEN
    }
    try:
        r = requests.delete(url, headers=headers, timeout=60)
        if r.status_code in [200, 204]:
            print(f"Workspace deleted: {workspace_id}")
            return True
        print(f"Workspace deletion failed: {r.status_code} {r.text}")
    except Exception as e:
        print(f"Error deleting workspace: {e}")
    return False

# -------------------------------------------------------------------
# Capture Screenshot (Botpress chat action)
# -------------------------------------------------------------------
def capture_screenshot(url, width, height, full_page, bot_id, workspace_id, timeout=120):
    if not bot_id or not workspace_id:
        return {"error": "Missing bot_id or workspace_id"}

    api_url = "https://api.botpress.cloud/v1/chat/actions"
    headers = {
        "User-Agent": "Mozilla/5.0",
        "x-bot-id": bot_id,
        "x-workspace-id": workspace_id,
        "Content-Type": "application/json",
        "Authorization": TOKEN
    }
    payload = {
        "type": "browser:captureScreenshot",
        "input": {
            "url": url,
            "cssToInject": "greregreg",
            "javascriptToInject": "gerregr",
            "width": width,
            "height": height,
            "fullPage": bool(full_page),
            "waitFor": 10000
        }
    }

    max_retries = 3
    for attempt in range(max_retries):
        try:
            print(f"Capture attempt {attempt+1} for {url}")
            r = requests.post(api_url, headers=headers, json=payload, timeout=timeout)
            if r.status_code == 200:
                data = r.json()
                # Expecting:
                # { "output": {"imageUrl": "...", "htmlUrl": "..."}, "meta": {"cached": false}}
                return data
            elif r.status_code in [404, 408, 502, 503, 504]:
                print(f"Transient error {r.status_code}: retrying...")
                time.sleep(2)
                continue
            elif r.status_code == 403:
                print(f"Forbidden (403): {r.text}")
                # Not rotating bot/workspace here; per-request lifecycle handles cleanup
                return {"error": "Forbidden", "status": 403, "details": safe_text(r)}
            else:
                print(f"Unexpected {r.status_code}: {r.text}")
                return {"error": "Unexpected error", "status": r.status_code, "details": safe_text(r)}
        except requests.exceptions.Timeout:
            print("Request timed out, retrying...")
            if attempt < max_retries - 1:
                continue
            return {"error": "Timeout"}
        except Exception as e:
            print(f"Exception: {e}")
            if attempt < max_retries - 1:
                time.sleep(1)
                continue
            return {"error": str(e)}

    return {"error": "Failed after retries"}

def safe_text(response):
    try:
        return response.text
    except Exception:
        return "<unavailable>"

# -------------------------------------------------------------------
# Flask Endpoint
# -------------------------------------------------------------------
@app.route("/capture", methods=["POST"])
def capture_endpoint():
    """
    JSON body example:
    {
      "urls": ["https://x.com/USTreasury", "https://example.com"],   // required, array of one or more
      "width": 10850,                                                // optional, default 1080
      "height": 1920,                                                // optional, default 1920
      "fullPage": true                                               // optional, default true
    }
    Response:
    {
      "results": [
        {
          "url": "...",
          "output": {
            "imageUrl": "...png",
            "htmlUrl": "...html"
          },
          "meta": { "cached": false }
        },
        { ... }
      ],
      "errors": [
        { "url": "...", "error": "...", "status": 403, "details": "..." }
      ]
    }
    Behavior:
    - Create workspace, then bot.
    - Run capture for all URLs with the same bot/workspace.
    - Delete bot first, then workspace, at the end of the request.
    """
    # Parse and validate body
    try:
        body = request.get_json(force=True) or {}
    except Exception:
        return jsonify({"error": "Invalid JSON"}), 400

    urls = body.get("urls")
    if not urls or not isinstance(urls, list):
        return jsonify({"error": "urls is required and must be a list"}), 400

    # Optional params
    width = body.get("width", 1080)
    height = body.get("height", 1920)
    full_page = body.get("fullPage", True)

    # Normalize numeric params
    try:
        width = int(width)
        if width <= 0:
            width = 1080
    except Exception:
        width = 1080
    try:
        height = int(height)
        if height <= 0:
            height = 1920
    except Exception:
        height = 1920
    full_page = bool(full_page)

    workspace_id = None
    bot_id = None

    # Create (Workspace then Bot)
    workspace_id = create_workspace()
    if not workspace_id:
        return jsonify({"error": "Failed to create workspace"}), 500

    bot_id = create_bot(workspace_id)
    if not bot_id:
        # Cleanup: if bot creation fails, delete workspace
        delete_workspace(workspace_id)
        return jsonify({"error": "Failed to create bot"}), 500

    results = []
    errors = []

    # Process multiple URLs using the same bot/workspace
    for url in urls:
        if not isinstance(url, str) or not url.strip():
            errors.append({"url": url, "error": "Invalid URL"})
            continue

        capture = capture_screenshot(
            url=url.strip(),
            width=width,
            height=height,
            full_page=full_page,
            bot_id=bot_id,
            workspace_id=workspace_id
        )

        if isinstance(capture, dict) and capture.get("error"):
            errors.append({"url": url, **capture})
        else:
            # Ensure consistent output structure
            output = capture.get("output", {})
            meta = capture.get("meta", {})
            results.append({
                "url": url,
                "output": {
                    "imageUrl": output.get("imageUrl"),
                    "htmlUrl": output.get("htmlUrl")
                },
                "meta": meta
            })

    # Delete (Bot first, then Workspace) regardless of success
    try:
        if bot_id:
            delete_bot(bot_id, workspace_id)
    finally:
        if workspace_id:
            delete_workspace(workspace_id)

    response_body = {"results": results}
    if errors:
        response_body["errors"] = errors

    # If all failed, return 502; else 200
    if results:
        return jsonify(response_body), 200
    else:
        return jsonify(response_body), 502

# -------------------------------------------------------------------
# Run the Flask app
# -------------------------------------------------------------------
if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860, debug=True)