Junyin's picture
Add files using upload-large-folder tool
8a506a6 verified
import os
import json
import aiohttp
import asyncio
# 创建目录
SAVE_DIR = '/datain/v-yinju/LLMBased_Multimodal_RS/Data/Musical_Instruments/Image'
os.makedirs(SAVE_DIR, exist_ok=True)
with open('/datain/v-yinju/LLMBased_Multimodal_RS/Data/Musical_Instruments/Musical_Instruments.features.complete.json', 'r', encoding = 'utf-8') as f:
features = json.load(f)
image_urls = []
for value in features.values():
image_urls.append(value['imageH'][0])
# 异步下载函数
async def download_image(session, url, save_path, retries=3):
for i in range(retries):
try:
async with session.get(url, timeout=10) as response:
if response.status == 200:
with open(save_path, "wb") as file:
file.write(await response.read())
print(f"✅ 成功下载: {save_path}")
return True # 成功下载
else:
print(f"⚠️ 状态码错误 {response.status},重试 {i+1}/{retries}")
except Exception as e:
print(f"⚠️ 请求失败 {e},重试 {i+1}/{retries}")
await asyncio.sleep(2) # 休息2秒再尝试
print(f"❌ 下载失败: {url}")
return False # 失败后返回 False
# 异步任务调度
async def download_images_async(url_list):
async with aiohttp.ClientSession() as session:
tasks = []
for i, url in enumerate(url_list):
name = url.split('/')[-1]
filename = os.path.join(SAVE_DIR, name)
tasks.append(download_image(session, url, filename))
# **保证所有任务都执行,即使有的任务失败**
results = await asyncio.gather(*tasks, return_exceptions=True)
# 统计失败的 URL
failed_urls = [url_list[i] for i, success in enumerate(results) if success is False]
print(f"\n❌ 下载失败 {len(failed_urls)} 张图片:")
return failed_urls
# for url in failed_urls:
# print(url)
# 运行
# image_urls = [
# "https://example.com/image1.jpg",
# "https://example.com/image2.jpg",
# ]
failed_urls = asyncio.run(download_images_async(image_urls))
while len(failed_urls) > 0:
failed_urls = asyncio.run(download_images_async(failed_urls))