File size: 2,349 Bytes
8a506a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import os
import json
import aiohttp
import asyncio

# 创建目录
SAVE_DIR = '/datain/v-yinju/LLMBased_Multimodal_RS/Data/Musical_Instruments/Image'
os.makedirs(SAVE_DIR, exist_ok=True)

with open('/datain/v-yinju/LLMBased_Multimodal_RS/Data/Musical_Instruments/Musical_Instruments.features.complete.json', 'r', encoding = 'utf-8') as f:
    features = json.load(f)

image_urls = []
for value in features.values():
    image_urls.append(value['imageH'][0])

# 异步下载函数
async def download_image(session, url, save_path, retries=3):
    for i in range(retries):
        try:
            async with session.get(url, timeout=10) as response:
                if response.status == 200:
                    with open(save_path, "wb") as file:
                        file.write(await response.read())
                    print(f"✅ 成功下载: {save_path}")
                    return True  # 成功下载
                else:
                    print(f"⚠️ 状态码错误 {response.status},重试 {i+1}/{retries}")
        except Exception as e:
            print(f"⚠️ 请求失败 {e},重试 {i+1}/{retries}")
        await asyncio.sleep(2)  # 休息2秒再尝试

    print(f"❌ 下载失败: {url}")
    return False  # 失败后返回 False

# 异步任务调度
async def download_images_async(url_list):
    async with aiohttp.ClientSession() as session:
        tasks = []
        for i, url in enumerate(url_list):
            name = url.split('/')[-1]
            filename = os.path.join(SAVE_DIR, name)
            tasks.append(download_image(session, url, filename))

        # **保证所有任务都执行,即使有的任务失败**
        results = await asyncio.gather(*tasks, return_exceptions=True)

        # 统计失败的 URL
        failed_urls = [url_list[i] for i, success in enumerate(results) if success is False]
        print(f"\n❌ 下载失败 {len(failed_urls)} 张图片:")
        return failed_urls
        # for url in failed_urls:
        #     print(url)

# 运行
# image_urls = [
#     "https://example.com/image1.jpg",
#     "https://example.com/image2.jpg",
# ]
failed_urls = asyncio.run(download_images_async(image_urls))

while len(failed_urls) > 0:
    failed_urls = asyncio.run(download_images_async(failed_urls))