kenney-asset-scrapper/scrapper/2 asset_downloader.py

import os
import json
import requests
import time
import random
from tqdm import tqdm

# === 配置路径 ===
json_path = "kenney_data.json"  # JSON 数据路径
output_dir = "kenney_assets"    # 下载根目录

# === 加载 JSON 数据 ===
with open(json_path, "r", encoding="utf-8") as f:
    resources = json.load(f)

# === 工具函数 ===


def sanitize_filename(name):
    return "".join(c for c in name if c.isalnum() or c in "._- ()").strip()


def download_zip(entry):
    title = entry["title"]
    version = entry["changelog"][0]["version"] if entry["changelog"] else "1.0"
    download_url = entry.get("download")

    # 提取分类、系列
    category = entry["properties"].get("Category", ["Uncategorized"])[0]
    series = entry["properties"].get("Series", [None])[0]

    # 构建目录结构
    folder_path = os.path.join(output_dir, sanitize_filename(category))
    if series:
        folder_path = os.path.join(folder_path, sanitize_filename(series))
    os.makedirs(folder_path, exist_ok=True)

    # 构建文件路径
    filename = f"{sanitize_filename(title)} V{version}.zip"
    filepath = os.path.join(folder_path, filename)

    if os.path.exists(filepath):
        print(f"✅ 已存在，跳过: {filename}")
        return

    try:
        print(f"⬇️ 开始下载: {filename}")
        with requests.get(download_url, stream=True, timeout=60) as r:
            r.raise_for_status()
            with open(filepath, "wb") as f:
                for chunk in r.iter_content(chunk_size=8192):
                    if chunk:
                        f.write(chunk)
        print(f"✅ 下载完成: {filename}")
    except Exception as e:
        print(f"❌ 下载失败: {filename} - {e}")

    # 模拟人类行为：随机等待
    time.sleep(random.uniform(1.5, 4.0))


# === 启动批量下载 ===
idx = 0
for resource in tqdm(resources, desc="处理资源"):
    if idx < 156:
        idx += 1
        continue
    if "download" in resource and resource["download"].endswith(".zip"):
        download_zip(resource)
    idx += 1

print("\n✅ 所有资源处理完成")