import os import json import requests import time import random from urllib.parse import urlparse, unquote from tqdm import tqdm # ========== 配置 ========== json_path = "kenney_data.json" # JSON 数据路径 output_root = "kenney_assets_images" # 存储根目录 headers = {"User-Agent": "Mozilla/5.0"} # ========== 工具函数 ========== def sanitize_filename(name): return "".join(c for c in name if c.isalnum() or c in "._- ()").strip() def download_image(url, save_path): if os.path.exists(save_path): print(f"✅ 已存在,跳过: {save_path}") return try: response = requests.get(url, stream=True, timeout=30) with open(save_path, "wb") as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) print(f"✅ 下载完成: {save_path}") except Exception as e: print(f"❌ 下载失败: {url} - {e}") time.sleep(random.uniform(1.5, 4.0)) # 模拟人类访问 # ========== 加载 JSON ========== with open(json_path, "r", encoding="utf-8") as f: resources = json.load(f) # ========== 批量处理 ========== for entry in tqdm(resources, desc="处理资源"): title = entry["title"] category = entry["properties"].get("Category", ["Uncategorized"])[0] series = entry["properties"].get("Series", [None])[0] images = entry.get("images", []) # 构建路径:Category/Series/Title/ path = os.path.join(output_root, sanitize_filename(category)) if series: path = os.path.join(path, sanitize_filename(series)) path = os.path.join(path, sanitize_filename(title)) os.makedirs(path, exist_ok=True) for img_url in images: parsed_url = urlparse(img_url) img_name = os.path.basename(parsed_url.path) img_name = unquote(img_name) # 处理 URL 编码,如 %20 => 空格 img_path = os.path.join(path, img_name) download_image(img_url, img_path) print("\n🎉 所有图片处理完成!")