|
|
import os
|
|
|
import json
|
|
|
import aiohttp
|
|
|
import asyncio
|
|
|
|
|
|
|
|
|
SAVE_DIR = '/datain/v-yinju/LLMBased_Multimodal_RS/Data/Musical_Instruments/Image'
|
|
|
os.makedirs(SAVE_DIR, exist_ok=True)
|
|
|
|
|
|
with open('/datain/v-yinju/LLMBased_Multimodal_RS/Data/Musical_Instruments/Musical_Instruments.features.complete.json', 'r', encoding = 'utf-8') as f:
|
|
|
features = json.load(f)
|
|
|
|
|
|
image_urls = []
|
|
|
for value in features.values():
|
|
|
image_urls.append(value['imageH'][0])
|
|
|
|
|
|
|
|
|
async def download_image(session, url, save_path, retries=3):
|
|
|
for i in range(retries):
|
|
|
try:
|
|
|
async with session.get(url, timeout=10) as response:
|
|
|
if response.status == 200:
|
|
|
with open(save_path, "wb") as file:
|
|
|
file.write(await response.read())
|
|
|
print(f"✅ 成功下载: {save_path}")
|
|
|
return True
|
|
|
else:
|
|
|
print(f"⚠️ 状态码错误 {response.status},重试 {i+1}/{retries}")
|
|
|
except Exception as e:
|
|
|
print(f"⚠️ 请求失败 {e},重试 {i+1}/{retries}")
|
|
|
await asyncio.sleep(2)
|
|
|
|
|
|
print(f"❌ 下载失败: {url}")
|
|
|
return False
|
|
|
|
|
|
|
|
|
async def download_images_async(url_list):
|
|
|
async with aiohttp.ClientSession() as session:
|
|
|
tasks = []
|
|
|
for i, url in enumerate(url_list):
|
|
|
name = url.split('/')[-1]
|
|
|
filename = os.path.join(SAVE_DIR, name)
|
|
|
tasks.append(download_image(session, url, filename))
|
|
|
|
|
|
|
|
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
|
|
|
|
|
|
|
failed_urls = [url_list[i] for i, success in enumerate(results) if success is False]
|
|
|
print(f"\n❌ 下载失败 {len(failed_urls)} 张图片:")
|
|
|
return failed_urls
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
failed_urls = asyncio.run(download_images_async(image_urls))
|
|
|
|
|
|
while len(failed_urls) > 0:
|
|
|
failed_urls = asyncio.run(download_images_async(failed_urls)) |