Pubgbc9799 commited on
Commit
e7f1a2b
·
verified ·
1 Parent(s): 20b6481

Upload teraboxdl.py

Browse files
Files changed (1) hide show
  1. teraboxdl.py +370 -0
teraboxdl.py ADDED
@@ -0,0 +1,370 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ import time
4
+ import requests
5
+ from datetime import date
6
+ from .__version__ import __version__
7
+ from urllib.parse import urlparse, parse_qs
8
+ from requests.exceptions import RequestException, ConnectionError, Timeout, ChunkedEncodingError
9
+
10
+ # URL for fetching configurations
11
+ CONFIGS="https://gist.githubusercontent.com/Damantha126/98270168b0d995f33d6d021746e1ce2f/raw/terabox_config.json"
12
+
13
+ # Fetch configurations from the URL
14
+ try:
15
+ req = requests.get(CONFIGS).json()
16
+ except:
17
+ req = {"LAST_VERSION": __version__}
18
+
19
+ class TeraboxDL:
20
+ """
21
+ A Python class to interact with Terabox and retrieve file information.
22
+ """
23
+
24
+ notice_displayed = False
25
+ def __init__(self, cookie: str):
26
+ """
27
+ Initialize the TeraboxDL instance with the required cookie.
28
+
29
+ Args:
30
+ cookie (str): The cookie string required for authentication.
31
+ """
32
+ if not cookie:
33
+ raise ValueError("Cookie cannot be empty.")
34
+ self.cookie = cookie
35
+ self.headers = {
36
+ "Accept": "application/json, text/plain, */*",
37
+ "Accept-Encoding": "gzip, deflate, br",
38
+ "Accept-Language": "en-US,en;q=0.9,hi;q=0.8",
39
+ "Connection": "keep-alive",
40
+ "DNT": "1",
41
+ "Host": "www.terabox.app",
42
+ "Sec-Fetch-Dest": "document",
43
+ "Sec-Fetch-Mode": "navigate",
44
+ "Sec-Fetch-Site": "none",
45
+ "Sec-Fetch-User": "?1",
46
+ "Upgrade-Insecure-Requests": "1",
47
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0",
48
+ "sec-ch-ua": '"Microsoft Edge";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
49
+ "Cookie": self.cookie,
50
+ "sec-ch-ua-mobile": "?0",
51
+ "sec-ch-ua-platform": '"Windows"',
52
+ }
53
+ self.dlheaders = {
54
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
55
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
56
+ 'Accept-Language': 'en-US,en;q=0.5',
57
+ 'Referer': 'https://www.terabox.com/',
58
+ 'DNT': '1',
59
+ 'Connection': 'keep-alive',
60
+ 'Upgrade-Insecure-Requests': '1',
61
+ 'Cookie':self.cookie
62
+ }
63
+ # Initialize notice_displayed flag
64
+ self.notice_displayed = False
65
+ self.VERSION = __version__
66
+ # Display notice only once
67
+ if not self.notice_displayed:
68
+ self._start()
69
+
70
+ def _start(self):
71
+ """Display version information and update notice.
72
+
73
+ Prints information about the current version of PySDBots and displays a notification if a newer version is available.
74
+
75
+ This method checks the latest available version of PySDBots by querying a remote configuration source.
76
+ """
77
+ # Set notice_displayed flag to True
78
+ self.notice_displayed = True
79
+ year = date.today().year
80
+ # Print version information
81
+ print(
82
+ f'TeraboxDL v{__version__}, Copyright (C) '
83
+ f'{year} Damantha Jasinghe <https://github.com/Damantha126>\n'
84
+ 'Licensed under the terms of the MIT License, '
85
+ 'Massachusetts Institute of Technology (MIT)\n',
86
+ )
87
+ # Check for newer version and print update notice
88
+ if req["LAST_VERSION"] != __version__:
89
+ text = f'Update Available!\n' \
90
+ f'New TeraboxDL v{req["LAST_VERSION"]} ' \
91
+ f'is now available!\n'
92
+ if not sys.platform.startswith('win'):
93
+ print(f'\033[93m{text}\033[0m')
94
+ else:
95
+ print(text)
96
+
97
+ def version(self):
98
+ return(self.VERSION)
99
+
100
+ @staticmethod
101
+ def _get_formatted_size(size_bytes: int) -> str:
102
+ """
103
+ Convert file size in bytes to a human-readable format.
104
+
105
+ Args:
106
+ size_bytes (int): File size in bytes.
107
+
108
+ Returns:
109
+ str: Human-readable file size.
110
+ """
111
+ if size_bytes >= 1024 * 1024 * 1024: # Add GB support
112
+ size = size_bytes / (1024 * 1024 * 1024)
113
+ unit = "GB"
114
+ elif size_bytes >= 1024 * 1024:
115
+ size = size_bytes / (1024 * 1024)
116
+ unit = "MB"
117
+ elif size_bytes >= 1024:
118
+ size = size_bytes / 1024
119
+ unit = "KB"
120
+ else:
121
+ size = size_bytes
122
+ unit = "bytes"
123
+ return f"{size:.2f} {unit}"
124
+
125
+ @staticmethod
126
+ def _find_between(s: str, start: str, end: str) -> str:
127
+ """
128
+ Extract a substring between two markers.
129
+
130
+ Args:
131
+ s (str): The string to search.
132
+ start (str): The starting marker.
133
+ end (str): The ending marker.
134
+
135
+ Returns:
136
+ str: The extracted substring.
137
+ """
138
+ start_index = s.find(start) + len(start)
139
+ end_index = s.find(end, start_index)
140
+ if start_index == -1 or end_index == -1:
141
+ return ""
142
+ return s[start_index:end_index]
143
+
144
+ def get_file_info(self, link: str) -> dict:
145
+ """
146
+ Retrieve file information from Terabox.
147
+
148
+ Args:
149
+ link (str): The Terabox link to retrieve file information for.
150
+
151
+ Returns:
152
+ dict: A dictionary containing file information.
153
+ """
154
+ try:
155
+ if not link:
156
+ return {"error": "Link cannot be empty."}
157
+
158
+ # First request
159
+ temp_req = requests.get(link, headers=self.headers, timeout=30)
160
+ if not temp_req.ok:
161
+ return {"error": f"Failed to fetch the initial link. Status code: {temp_req.status_code}"}
162
+
163
+ # Parse URL and check for 'surl' parameter
164
+ parsed_url = urlparse(temp_req.url)
165
+ query_params = parse_qs(parsed_url.query)
166
+ if "surl" not in query_params:
167
+ return {"error": "Invalid link. Please check the link."}
168
+
169
+ # Second request
170
+ req = requests.get(temp_req.url, headers=self.headers, timeout=30)
171
+ respo = req.text
172
+
173
+ # Extract tokens
174
+ js_token = self._find_between(respo, 'fn%28%22', '%22%29')
175
+ logid = self._find_between(respo, 'dp-logid=', '&')
176
+ bdstoken = self._find_between(respo, 'bdstoken":"', '"')
177
+
178
+ if not js_token or not logid or not bdstoken:
179
+ raise Exception("Failed to extract required tokens.")
180
+
181
+ surl = query_params["surl"][0]
182
+ params = {
183
+ "app_id": "250528",
184
+ "web": "1",
185
+ "channel": "dubox",
186
+ "clienttype": "0",
187
+ "jsToken": js_token,
188
+ "dp-logid": logid,
189
+ "page": "1",
190
+ "num": "20",
191
+ "by": "name",
192
+ "order": "asc",
193
+ "site_referer": temp_req.url,
194
+ "shorturl": surl,
195
+ "root": "1,",
196
+ }
197
+
198
+ # Third request to get file list
199
+ req2 = requests.get("https://www.terabox.app/share/list", headers=self.headers, params=params, timeout=30)
200
+ response_data2 = req2.json()
201
+
202
+ if (
203
+ not response_data2 or
204
+ "list" not in response_data2 or
205
+ not response_data2["list"] or
206
+ response_data2.get("errno")
207
+ ):
208
+ error_message = response_data2.get("errmsg", "Failed to retrieve file list.")
209
+ return {"error": error_message}
210
+
211
+ # Extract file information from the response
212
+ file_info = response_data2["list"][0]
213
+ return {
214
+ "file_name": file_info.get("server_filename", ""),
215
+ "download_link": file_info.get("dlink", ""),
216
+ "thumbnail": file_info.get("thumbs", {}).get("url3", ""),
217
+ "file_size": self._get_formatted_size(int(file_info.get("size", 0))),
218
+ "size_bytes": int(file_info.get("size", 0)),
219
+ }
220
+ except RequestException as e:
221
+ return {"error": f"Request error occurred while getting file info: {str(e)}"}
222
+ except Exception as e:
223
+ return {"error": f"An error occurred while retrieving file information: {str(e)}"}
224
+
225
+ def download(self, file_info: dict, save_path=None, callback=None, max_retries=5, timeout=60) -> dict:
226
+ """
227
+ Download a file from Terabox using the provided file information.
228
+
229
+ Args:
230
+ file_info (dict): A dictionary containing file information, including the download link and file name.
231
+ save_path (str, optional): The directory path where the file should be saved. Defaults to the current directory.
232
+ callback (callable, optional): A callback function that receives progress updates with parameters (downloaded_bytes, total_bytes, percentage)
233
+ max_retries (int, optional): Maximum number of retry attempts. Defaults to 5.
234
+ timeout (int, optional): Request timeout in seconds. Defaults to 60.
235
+
236
+ Returns:
237
+ dict: A dictionary containing the file path or an error message.
238
+ """
239
+ session = requests.Session()
240
+
241
+ try:
242
+ # Validate file_info
243
+ if not isinstance(file_info, dict):
244
+ return {"error": "Invalid file_info format. Expected a dictionary."}
245
+ if "file_name" not in file_info or "download_link" not in file_info:
246
+ return {"error": "file_info must contain 'file_name' and 'download_link' keys."}
247
+
248
+ # Determine the file save path
249
+ if save_path:
250
+ try:
251
+ os.makedirs(save_path, exist_ok=True)
252
+ if os.path.isdir(save_path):
253
+ file_path = os.path.join(save_path, file_info["file_name"])
254
+ else:
255
+ return {"error": "Provided save_path is not a directory."}
256
+ except Exception as e:
257
+ return {"error": f"Invalid save_path: {e}"}
258
+ else:
259
+ file_path = file_info["file_name"]
260
+
261
+ # Check if file already exists and get its size for resume capability
262
+ file_exists = os.path.exists(file_path)
263
+ downloaded_size = 0
264
+
265
+ if file_exists:
266
+ downloaded_size = os.path.getsize(file_path)
267
+ print(f"Found existing file with {self._get_formatted_size(downloaded_size)} already downloaded.")
268
+
269
+ retry_count = 0
270
+
271
+ while retry_count < max_retries:
272
+ try:
273
+ # Update headers for resumable download if needed
274
+ current_headers = dict(self.dlheaders)
275
+
276
+ if downloaded_size > 0:
277
+ current_headers['Range'] = f'bytes={downloaded_size}-'
278
+ print(f"Resuming download from byte {downloaded_size}")
279
+
280
+ # Start downloading the file
281
+ with session.get(
282
+ file_info["download_link"],
283
+ headers=current_headers,
284
+ stream=True,
285
+ timeout=timeout
286
+ ) as response:
287
+ response.raise_for_status()
288
+
289
+ # Handle resume response
290
+ if downloaded_size > 0 and response.status_code == 206: # Partial Content
291
+ print("Server accepted resume request.")
292
+ elif downloaded_size > 0 and response.status_code == 200: # OK, but not supporting resume
293
+ print("Warning: Server doesn't support resume. Starting from beginning.")
294
+ downloaded_size = 0
295
+
296
+ # Get total size from Content-Length header or from file_info
297
+ total_size = int(response.headers.get('content-length', 0))
298
+
299
+ if total_size == 0:
300
+ total_size = file_info.get("size_bytes", 0)
301
+
302
+ if downloaded_size > 0 and response.status_code == 206:
303
+ # For resumed downloads, we need to add the already downloaded size
304
+ total_size += downloaded_size
305
+
306
+ # Use a larger block size for faster download
307
+ block_size = 8192 * 8 # 64 KB
308
+
309
+ # Open file in append mode if resuming, otherwise write mode
310
+ mode = 'ab' if downloaded_size > 0 else 'wb'
311
+
312
+ with open(file_path, mode) as file:
313
+ current_downloaded = downloaded_size
314
+ start_time = time.time()
315
+ last_update_time = start_time
316
+ bytes_since_last_update = 0
317
+
318
+ for chunk in response.iter_content(chunk_size=block_size):
319
+ if chunk:
320
+ file.write(chunk)
321
+ current_downloaded += len(chunk)
322
+ bytes_since_last_update += len(chunk)
323
+
324
+ # Update progress less frequently to reduce console spam
325
+ current_time = time.time()
326
+ if current_time - last_update_time >= 0.5: # Update every 0.5 seconds
327
+ # Calculate download speed
328
+ elapsed = current_time - last_update_time
329
+ speed = bytes_since_last_update / elapsed if elapsed > 0 else 0
330
+
331
+ # Reset counters
332
+ bytes_since_last_update = 0
333
+ last_update_time = current_time
334
+
335
+ # Calculate percentage
336
+ percentage = (current_downloaded / total_size) * 100 if total_size > 0 else 0
337
+
338
+ if callback:
339
+ callback(current_downloaded, total_size, percentage)
340
+ else:
341
+ if total_size > 0:
342
+ done = int(50 * current_downloaded / total_size)
343
+ speed_text = f"{self._get_formatted_size(speed)}/s" if speed > 0 else "-- KB/s"
344
+ print(
345
+ f"\r[{'=' * done}{' ' * (50 - done)}] {percentage:.2f}% | "
346
+ f"{self._get_formatted_size(current_downloaded)} of {self._get_formatted_size(total_size)} | "
347
+ f"{speed_text}",
348
+ end=''
349
+ )
350
+
351
+ print(f"\nDownload complete: {file_path}")
352
+ return {"file_path": file_path}
353
+
354
+ except (ConnectionError, ChunkedEncodingError, Timeout, RequestException) as e:
355
+ retry_count += 1
356
+ if retry_count < max_retries:
357
+ wait_time = 2 ** retry_count # Exponential backoff
358
+ print(f"\nDownload error: {e}. Retrying in {wait_time} seconds... (Attempt {retry_count}/{max_retries})")
359
+ time.sleep(wait_time)
360
+
361
+ # Update downloaded size for next attempt
362
+ if os.path.exists(file_path):
363
+ downloaded_size = os.path.getsize(file_path)
364
+ else:
365
+ return {"error": f"Request error occurred after {max_retries} attempts: {e}"}
366
+
367
+ return {"error": f"Failed to download after {max_retries} attempts."}
368
+
369
+ except Exception as e:
370
+ return {"error": f"An unexpected error occurred: {e}"}