nexacore commited on
Commit
94e9d2e
·
verified ·
1 Parent(s): 87d3007

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -30
app.py CHANGED
@@ -6,39 +6,44 @@ app = FastAPI()
6
  # IMPORTANT: You need to confirm the text/selector of the download button.
7
  # Based on typical Pixeldrain layout, let's assume the button has the text "Download" or "Download File".
8
  # We'll use a robust selector that looks for the button element containing the text "Download".
 
 
 
 
9
  DOWNLOAD_BUTTON_SELECTOR = 'button:has-text("Download")'
10
- # If the text is different (e.g., "Get File"), you MUST update the selector above.
11
 
12
  def get_pixeldrain_info(pixeldrain_url: str):
13
  try:
14
  with sync_playwright() as p:
15
- # Launch Chromium browser with slower speed for stability
 
 
16
  browser = p.chromium.launch(headless=True, slow_mo=50)
17
- page = browser.new_page()
 
 
18
 
19
- # 1. Navigate to the Pixeldrain page
20
- page.goto(pixeldrain_url, wait_until="networkidle")
21
- print("Successfully navigated to Pixeldrain page.")
22
-
23
- # --- We need to capture the network traffic to find the final direct link ---
24
 
25
- # Use a Python variable to store the final link we are looking for
26
  final_download_url = None
27
 
28
- # 2. Set up a listener to capture the network response
29
  def handle_response(response):
30
  nonlocal final_download_url
31
- # Look for the final redirect URL pattern (which often contains /api/file/...)
32
- # The final content download link will look like https://pd.pixeldrain.com/api/file/HASH/FILENAME
33
  if "/api/file/" in response.url and response.status == 200:
34
  print(f"Captured final link via network: {response.url}")
35
  final_download_url = response.url
36
- # Stop the network listener once the link is found (optional optimization)
37
 
38
  page.on("response", handle_response)
39
 
40
  # 3. Wait for the download button to be visible and click it (to trigger s("download"))
41
  try:
 
42
  page.wait_for_selector(DOWNLOAD_BUTTON_SELECTOR, timeout=15000)
43
  print(f"Found button with selector: {DOWNLOAD_BUTTON_SELECTOR}. Clicking...")
44
  page.click(DOWNLOAD_BUTTON_SELECTOR)
@@ -46,29 +51,18 @@ def get_pixeldrain_info(pixeldrain_url: str):
46
  raise ValueError(f"Timeout: Could not find or click the button with selector: {DOWNLOAD_BUTTON_SELECTOR}")
47
 
48
  # 4. Wait for the final URL to be captured by the network listener
49
- # We wait up to 15 seconds for the final URL variable to be set
50
- page.wait_for_timeout(15000) # Give it time to execute JS and fire network requests
51
 
52
- if not final_download_url:
53
- # If the URL wasn't found in network, try to find it in the DOM (e.g., a hidden link that appears)
54
- try:
55
- # Search for any link that looks like a final download link in the DOM
56
- dom_link = page.query_selector('a[href*="/api/file/"]')
57
- if dom_link:
58
- final_download_url = dom_link.get_attribute('href')
59
- except:
60
- pass
61
-
62
  if not final_download_url:
63
  raise ValueError("Could not capture the final tokenized download URL after clicking.")
64
 
65
  # 5. Extract cookies and user-agent
66
  cookies = page.context.cookies()
67
  cookie_string = "; ".join([f"{c['name']}={c['value']}" for c in cookies])
68
- user_agent = page.evaluate('() => navigator.userAgent')
69
-
70
- # Extract filename from the URL (best guess)
71
- filename = final_download_url.split('/')[-1]
72
 
73
  browser.close()
74
 
@@ -76,13 +70,15 @@ def get_pixeldrain_info(pixeldrain_url: str):
76
  "download_url": final_download_url,
77
  "cookies": cookie_string,
78
  "user_agent": user_agent,
79
- "filename": filename
80
  }
81
 
82
  except Exception as e:
83
  print(f"Error during Playwright execution: {e}")
 
84
  return None
85
 
 
86
  # FastAPI routes remain the same
87
  @app.get("/resolve_pixeldrain")
88
  def resolve_url(url: str):
 
6
  # IMPORTANT: You need to confirm the text/selector of the download button.
7
  # Based on typical Pixeldrain layout, let's assume the button has the text "Download" or "Download File".
8
  # We'll use a robust selector that looks for the button element containing the text "Download".
9
+ # --- app.py code ---
10
+
11
+ # ... imports ...
12
+
13
  DOWNLOAD_BUTTON_SELECTOR = 'button:has-text("Download")'
14
+ # Make sure to confirm the button text: "Download", "Download File", etc.
15
 
16
  def get_pixeldrain_info(pixeldrain_url: str):
17
  try:
18
  with sync_playwright() as p:
19
+ # 1. Add User-Agent spoofing to the context creation
20
+ user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
21
+
22
  browser = p.chromium.launch(headless=True, slow_mo=50)
23
+ # Create a new context with a realistic User-Agent
24
+ context = browser.new_context(user_agent=user_agent)
25
+ page = context.new_page()
26
 
27
+ # 2. Navigate to the Pixeldrain page with a less strict wait condition
28
+ # Changed 'networkidle' to 'domcontentloaded' (or just 'load')
29
+ page.goto(pixeldrain_url, wait_until="domcontentloaded", timeout=30000)
30
+ print("Successfully navigated to Pixeldrain page (DOM loaded).")
 
31
 
32
+ # --- Network Listener (Keep this logic) ---
33
  final_download_url = None
34
 
 
35
  def handle_response(response):
36
  nonlocal final_download_url
37
+ # Look for the final redirect URL pattern (https://pd.pixeldrain.com/api/file/HASH/FILENAME)
 
38
  if "/api/file/" in response.url and response.status == 200:
39
  print(f"Captured final link via network: {response.url}")
40
  final_download_url = response.url
 
41
 
42
  page.on("response", handle_response)
43
 
44
  # 3. Wait for the download button to be visible and click it (to trigger s("download"))
45
  try:
46
+ # Give it a bit more time to appear after DOMContentLoaded
47
  page.wait_for_selector(DOWNLOAD_BUTTON_SELECTOR, timeout=15000)
48
  print(f"Found button with selector: {DOWNLOAD_BUTTON_SELECTOR}. Clicking...")
49
  page.click(DOWNLOAD_BUTTON_SELECTOR)
 
51
  raise ValueError(f"Timeout: Could not find or click the button with selector: {DOWNLOAD_BUTTON_SELECTOR}")
52
 
53
  # 4. Wait for the final URL to be captured by the network listener
54
+ page.wait_for_timeout(15000)
 
55
 
56
+ # ... (rest of the URL check/cookie extraction remains the same) ...
57
+
 
 
 
 
 
 
 
 
58
  if not final_download_url:
59
  raise ValueError("Could not capture the final tokenized download URL after clicking.")
60
 
61
  # 5. Extract cookies and user-agent
62
  cookies = page.context.cookies()
63
  cookie_string = "; ".join([f"{c['name']}={c['value']}" for c in cookies])
64
+ # Use the explicit User-Agent we set earlier
65
+ # user_agent is already defined at the start of the function
 
 
66
 
67
  browser.close()
68
 
 
70
  "download_url": final_download_url,
71
  "cookies": cookie_string,
72
  "user_agent": user_agent,
73
+ "filename": filename # This comes from the URL split
74
  }
75
 
76
  except Exception as e:
77
  print(f"Error during Playwright execution: {e}")
78
+ # Re-raise the exception details for debugging, but return None to signal failure
79
  return None
80
 
81
+ # ... (FastAPI routes remain the same) ...
82
  # FastAPI routes remain the same
83
  @app.get("/resolve_pixeldrain")
84
  def resolve_url(url: str):