62 lines
2.0 KiB
Python
62 lines
2.0 KiB
Python
import os
|
||
import json
|
||
import requests
|
||
import time
|
||
import random
|
||
from urllib.parse import urlparse, unquote
|
||
from tqdm import tqdm
|
||
|
||
# ========== 配置 ==========
|
||
json_path = "kenney_data.json" # JSON 数据路径
|
||
output_root = "kenney_assets_images" # 存储根目录
|
||
headers = {"User-Agent": "Mozilla/5.0"}
|
||
|
||
# ========== 工具函数 ==========
|
||
|
||
|
||
def sanitize_filename(name):
|
||
return "".join(c for c in name if c.isalnum() or c in "._- ()").strip()
|
||
|
||
|
||
def download_image(url, save_path):
|
||
if os.path.exists(save_path):
|
||
print(f"✅ 已存在,跳过: {save_path}")
|
||
return
|
||
try:
|
||
response = requests.get(url, stream=True, timeout=30)
|
||
with open(save_path, "wb") as f:
|
||
for chunk in response.iter_content(chunk_size=8192):
|
||
f.write(chunk)
|
||
print(f"✅ 下载完成: {save_path}")
|
||
except Exception as e:
|
||
print(f"❌ 下载失败: {url} - {e}")
|
||
time.sleep(random.uniform(1.5, 4.0)) # 模拟人类访问
|
||
|
||
|
||
# ========== 加载 JSON ==========
|
||
with open(json_path, "r", encoding="utf-8") as f:
|
||
resources = json.load(f)
|
||
|
||
# ========== 批量处理 ==========
|
||
for entry in tqdm(resources, desc="处理资源"):
|
||
title = entry["title"]
|
||
category = entry["properties"].get("Category", ["Uncategorized"])[0]
|
||
series = entry["properties"].get("Series", [None])[0]
|
||
images = entry.get("images", [])
|
||
|
||
# 构建路径:Category/Series/Title/
|
||
path = os.path.join(output_root, sanitize_filename(category))
|
||
if series:
|
||
path = os.path.join(path, sanitize_filename(series))
|
||
path = os.path.join(path, sanitize_filename(title))
|
||
os.makedirs(path, exist_ok=True)
|
||
|
||
for img_url in images:
|
||
parsed_url = urlparse(img_url)
|
||
img_name = os.path.basename(parsed_url.path)
|
||
img_name = unquote(img_name) # 处理 URL 编码,如 %20 => 空格
|
||
img_path = os.path.join(path, img_name)
|
||
download_image(img_url, img_path)
|
||
|
||
print("\n🎉 所有图片处理完成!")
|