import pandas as pd import requests import os import logging from requests.exceptions import RequestException def download_images(excel_file, output_folder): # Create output folder if it doesn't exist os.makedirs(output_folder, exist_ok=True) # Read Excel file df = pd.read_excel(excel_file) # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Base URL template base_url = "https://myshoolini.s3.ap-south-1.amazonaws.com/face_detect_data/{}/img/1.png" # Iterate through roll numbers for index, row in df.iterrows(): roll_number = str(row['Registration Id']) # Adjust column name as needed name = str(row['Student Name']) # Adjust column name as needed try: # Construct full URL url = base_url.format(roll_number) # Note: Changed from name to roll_number # Log the URL being accessed logger.info(f"Attempting to access URL: {url}") # Send GET request with timeout response = requests.get(url, timeout=10) # Check status code if response.status_code == 200: logger.info(f"Successfully accessed {url}") if len(response.content) > 0: # Save image with name as filename filename = os.path.join(output_folder, f"{name}_{roll_number}.png") with open(filename, 'wb') as file: file.write(response.content) logger.info(f"Successfully downloaded image for {roll_number} - {name}") else: logger.error(f"Empty response received for {roll_number} - {name}") elif response.status_code == 404: logger.error(f"Image not found for {roll_number} - {name}") elif response.status_code == 403: logger.error(f"Access forbidden for {roll_number} - {name}") else: logger.error(f"HTTP {response.status_code} error for {roll_number} - {name}") except requests.Timeout: logger.error(f"Request timed out for {roll_number} - {name}") except requests.ConnectionError: logger.error(f"Connection error for {roll_number} - {name}") except RequestException as e: logger.error(f"Request failed for {roll_number} - {name}: {str(e)}") except Exception as e: logger.error(f"Unexpected error for {roll_number} - {name}: {str(e)}") # Example usage if __name__ == "__main__": excel_file_path = "list6.xlsx" # Replace with your Excel file path output_directory = r"C:\Users\kashy\Downloads\source-code-face-recognition\source code\images" # Output folder for images download_images(excel_file_path, output_directory)