| import requests | |
| from concurrent.futures import ThreadPoolExecutor | |
| from tqdm import tqdm | |
| import os | |
| import random | |
| # Path to the file containing URLs | |
| file_path = "python_files.txt" | |
| # Function to get the size of a file from a URL using a HEAD request | |
| def get_file_size(url): | |
| try: | |
| response = requests.head(url, allow_redirects=True, timeout=10) | |
| # Extract the content length header | |
| size = int(response.headers.get("Content-Length", 0)) | |
| return size | |
| except (requests.RequestException, ValueError): | |
| # Return 0 if any error occurs (e.g., timeout, invalid URL, or missing header) | |
| return 0 | |
| # Main function to calculate total size of all URLs in file | |
| def calculate_total_size(file_path): | |
| # Read URLs from file | |
| if not os.path.exists(file_path): | |
| print("File not found!") | |
| return 0 | |
| with open(file_path, "r") as file: | |
| urls = [line.strip() for line in file if line.strip()] | |
| random.shuffle(urls) | |
| urls = urls[:5000] | |
| # Use threading to perform requests concurrently with a progress bar | |
| total_size = 0 | |
| with ThreadPoolExecutor() as executor: | |
| # Wrap the map in tqdm for a progress bar | |
| file_sizes = list( | |
| tqdm( | |
| executor.map(get_file_size, urls), | |
| total=len(urls), | |
| desc=f"Processing URLs.", | |
| ) | |
| ) | |
| # Calculate the total size | |
| total_size = sum(file_sizes) | |
| return total_size | |
| # Calculate and print the total size | |
| total_size = calculate_total_size(file_path) | |
| print(f"Total size of all files: {total_size / (1024 * 1024):.2f} MB") | |