nexacore commited on
Commit
19b40c0
·
verified ·
1 Parent(s): 94e9d2e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -38
app.py CHANGED
@@ -1,68 +1,81 @@
1
  from fastapi import FastAPI, HTTPException
2
  from playwright.sync_api import sync_playwright
 
3
 
4
  app = FastAPI()
5
 
6
- # IMPORTANT: You need to confirm the text/selector of the download button.
7
- # Based on typical Pixeldrain layout, let's assume the button has the text "Download" or "Download File".
8
- # We'll use a robust selector that looks for the button element containing the text "Download".
9
- # --- app.py code ---
10
-
11
- # ... imports ...
12
-
13
  DOWNLOAD_BUTTON_SELECTOR = 'button:has-text("Download")'
14
- # Make sure to confirm the button text: "Download", "Download File", etc.
 
 
15
 
16
  def get_pixeldrain_info(pixeldrain_url: str):
 
 
 
 
 
 
 
 
 
17
  try:
18
  with sync_playwright() as p:
19
- # 1. Add User-Agent spoofing to the context creation
20
- user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
21
-
22
  browser = p.chromium.launch(headless=True, slow_mo=50)
23
- # Create a new context with a realistic User-Agent
24
- context = browser.new_context(user_agent=user_agent)
25
- page = context.new_page()
26
-
27
- # 2. Navigate to the Pixeldrain page with a less strict wait condition
28
- # Changed 'networkidle' to 'domcontentloaded' (or just 'load')
29
- page.goto(pixeldrain_url, wait_until="domcontentloaded", timeout=30000)
30
- print("Successfully navigated to Pixeldrain page (DOM loaded).")
31
 
32
- # --- Network Listener (Keep this logic) ---
33
- final_download_url = None
 
 
 
 
 
 
 
 
34
 
 
35
  def handle_response(response):
36
  nonlocal final_download_url
37
  # Look for the final redirect URL pattern (https://pd.pixeldrain.com/api/file/HASH/FILENAME)
38
- if "/api/file/" in response.url and response.status == 200:
 
39
  print(f"Captured final link via network: {response.url}")
40
  final_download_url = response.url
41
 
42
  page.on("response", handle_response)
43
 
44
- # 3. Wait for the download button to be visible and click it (to trigger s("download"))
 
 
 
 
45
  try:
46
- # Give it a bit more time to appear after DOMContentLoaded
47
- page.wait_for_selector(DOWNLOAD_BUTTON_SELECTOR, timeout=15000)
48
  print(f"Found button with selector: {DOWNLOAD_BUTTON_SELECTOR}. Clicking...")
49
  page.click(DOWNLOAD_BUTTON_SELECTOR)
50
- except Exception:
51
- raise ValueError(f"Timeout: Could not find or click the button with selector: {DOWNLOAD_BUTTON_SELECTOR}")
52
 
53
- # 4. Wait for the final URL to be captured by the network listener
 
54
  page.wait_for_timeout(15000)
55
 
56
- # ... (rest of the URL check/cookie extraction remains the same) ...
57
-
58
  if not final_download_url:
59
  raise ValueError("Could not capture the final tokenized download URL after clicking.")
60
 
61
- # 5. Extract cookies and user-agent
62
  cookies = page.context.cookies()
63
  cookie_string = "; ".join([f"{c['name']}={c['value']}" for c in cookies])
64
- # Use the explicit User-Agent we set earlier
65
- # user_agent is already defined at the start of the function
 
66
 
67
  browser.close()
68
 
@@ -70,21 +83,20 @@ def get_pixeldrain_info(pixeldrain_url: str):
70
  "download_url": final_download_url,
71
  "cookies": cookie_string,
72
  "user_agent": user_agent,
73
- "filename": filename # This comes from the URL split
74
  }
75
 
76
  except Exception as e:
77
  print(f"Error during Playwright execution: {e}")
78
- # Re-raise the exception details for debugging, but return None to signal failure
79
  return None
80
 
81
- # ... (FastAPI routes remain the same) ...
82
- # FastAPI routes remain the same
83
  @app.get("/resolve_pixeldrain")
84
  def resolve_url(url: str):
85
  data = get_pixeldrain_info(url)
86
  if not data or not data["download_url"]:
87
- # 500 status will trigger the error handling in the Cloudflare Worker
88
  raise HTTPException(status_code=500, detail="Failed to resolve URL via Playwright.")
89
  return data
90
 
 
1
  from fastapi import FastAPI, HTTPException
2
  from playwright.sync_api import sync_playwright
3
+ import re
4
 
5
  app = FastAPI()
6
 
7
+ # --- Configuration ---
8
+ # You MUST confirm the exact text on the download button.
9
+ # Assuming the text is exactly "Download"
 
 
 
 
10
  DOWNLOAD_BUTTON_SELECTOR = 'button:has-text("Download")'
11
+
12
+ # Set a long timeout for the entire Playwright operation (in milliseconds)
13
+ PLAYWRIGHT_TIMEOUT_MS = 60000
14
 
15
  def get_pixeldrain_info(pixeldrain_url: str):
16
+ """
17
+ Launches a headless Chromium browser using Playwright to bypass Pixeldrain's
18
+ client-side hotlink detection, extracts the final tokenized URL and cookies.
19
+ """
20
+
21
+ # 1. Initialization and Stealth Headers
22
+ user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
23
+ final_download_url = None
24
+
25
  try:
26
  with sync_playwright() as p:
27
+ # 2. Launch Browser with Stealth Context
28
+ # Set a slower speed (slow_mo) to give the page a chance to react
 
29
  browser = p.chromium.launch(headless=True, slow_mo=50)
 
 
 
 
 
 
 
 
30
 
31
+ # Use a custom context with realistic viewport and locale
32
+ context = browser.new_context(
33
+ user_agent=user_agent,
34
+ viewport={"width": 1920, "height": 1080},
35
+ locale="en-US"
36
+ )
37
+ # Set the overall operation timeout
38
+ context.set_default_timeout(PLAYWRIGHT_TIMEOUT_MS)
39
+
40
+ page = context.new_page()
41
 
42
+ # 3. Setup Network Listener
43
  def handle_response(response):
44
  nonlocal final_download_url
45
  # Look for the final redirect URL pattern (https://pd.pixeldrain.com/api/file/HASH/FILENAME)
46
+ # We check for '/api/file/' and a 200 OK status.
47
+ if "/api/file/" in response.url and response.status == 200 and len(response.url) > 50:
48
  print(f"Captured final link via network: {response.url}")
49
  final_download_url = response.url
50
 
51
  page.on("response", handle_response)
52
 
53
+ # 4. Navigate (Using the least strict wait condition: "load")
54
+ page.goto(pixeldrain_url, wait_until="load", timeout=PLAYWRIGHT_TIMEOUT_MS)
55
+ print("Successfully navigated to Pixeldrain page (waiting until 'load').")
56
+
57
+ # 5. Wait for the button and click it (to trigger s("download"))
58
  try:
59
+ # Wait for 30 seconds for the button to appear after the page has loaded
60
+ page.wait_for_selector(DOWNLOAD_BUTTON_SELECTOR, timeout=30000)
61
  print(f"Found button with selector: {DOWNLOAD_BUTTON_SELECTOR}. Clicking...")
62
  page.click(DOWNLOAD_BUTTON_SELECTOR)
63
+ except Exception as e:
64
+ raise ValueError(f"Button not found/Timeout during click: {e}")
65
 
66
+ # 6. Wait for the final URL to be captured
67
+ # Give it 15 seconds for the network request to fire and be captured
68
  page.wait_for_timeout(15000)
69
 
 
 
70
  if not final_download_url:
71
  raise ValueError("Could not capture the final tokenized download URL after clicking.")
72
 
73
+ # 7. Extract necessary data
74
  cookies = page.context.cookies()
75
  cookie_string = "; ".join([f"{c['name']}={c['value']}" for c in cookies])
76
+
77
+ # Extract filename from the URL (best guess is the last part)
78
+ filename = final_download_url.split('/')[-1]
79
 
80
  browser.close()
81
 
 
83
  "download_url": final_download_url,
84
  "cookies": cookie_string,
85
  "user_agent": user_agent,
86
+ "filename": filename
87
  }
88
 
89
  except Exception as e:
90
  print(f"Error during Playwright execution: {e}")
91
+ # The Cloudflare Worker will catch the 500 error raised below
92
  return None
93
 
94
+ # --- FastAPI Routes ---
 
95
  @app.get("/resolve_pixeldrain")
96
  def resolve_url(url: str):
97
  data = get_pixeldrain_info(url)
98
  if not data or not data["download_url"]:
99
+ # Raise 500 to send the failure detail back to the Cloudflare Worker
100
  raise HTTPException(status_code=500, detail="Failed to resolve URL via Playwright.")
101
  return data
102