import os import json import aiohttp import asyncio # 创建目录 SAVE_DIR = '/datain/v-yinju/LLMBased_Multimodal_RS/Data/Musical_Instruments/Image' os.makedirs(SAVE_DIR, exist_ok=True) with open('/datain/v-yinju/LLMBased_Multimodal_RS/Data/Musical_Instruments/Musical_Instruments.features.complete.json', 'r', encoding = 'utf-8') as f: features = json.load(f) image_urls = [] for value in features.values(): image_urls.append(value['imageH'][0]) # 异步下载函数 async def download_image(session, url, save_path, retries=3): for i in range(retries): try: async with session.get(url, timeout=10) as response: if response.status == 200: with open(save_path, "wb") as file: file.write(await response.read()) print(f"✅ 成功下载: {save_path}") return True # 成功下载 else: print(f"⚠️ 状态码错误 {response.status},重试 {i+1}/{retries}") except Exception as e: print(f"⚠️ 请求失败 {e},重试 {i+1}/{retries}") await asyncio.sleep(2) # 休息2秒再尝试 print(f"❌ 下载失败: {url}") return False # 失败后返回 False # 异步任务调度 async def download_images_async(url_list): async with aiohttp.ClientSession() as session: tasks = [] for i, url in enumerate(url_list): name = url.split('/')[-1] filename = os.path.join(SAVE_DIR, name) tasks.append(download_image(session, url, filename)) # **保证所有任务都执行,即使有的任务失败** results = await asyncio.gather(*tasks, return_exceptions=True) # 统计失败的 URL failed_urls = [url_list[i] for i, success in enumerate(results) if success is False] print(f"\n❌ 下载失败 {len(failed_urls)} 张图片:") return failed_urls # for url in failed_urls: # print(url) # 运行 # image_urls = [ # "https://example.com/image1.jpg", # "https://example.com/image2.jpg", # ] failed_urls = asyncio.run(download_images_async(image_urls)) while len(failed_urls) > 0: failed_urls = asyncio.run(download_images_async(failed_urls))