Files
kenney-asset-scrapper/scrapper/2 asset_downloader.py
2025-04-22 09:11:40 +08:00

73 lines
2.1 KiB
Python

import os
import json
import requests
import time
import random
from tqdm import tqdm
# === 配置路径 ===
json_path = "kenney_data.json" # JSON 数据路径
output_dir = "kenney_assets" # 下载根目录
# === 加载 JSON 数据 ===
with open(json_path, "r", encoding="utf-8") as f:
resources = json.load(f)
# === 工具函数 ===
def sanitize_filename(name):
return "".join(c for c in name if c.isalnum() or c in "._- ()").strip()
def download_zip(entry):
title = entry["title"]
version = entry["changelog"][0]["version"] if entry["changelog"] else "1.0"
download_url = entry.get("download")
# 提取分类、系列
category = entry["properties"].get("Category", ["Uncategorized"])[0]
series = entry["properties"].get("Series", [None])[0]
# 构建目录结构
folder_path = os.path.join(output_dir, sanitize_filename(category))
if series:
folder_path = os.path.join(folder_path, sanitize_filename(series))
os.makedirs(folder_path, exist_ok=True)
# 构建文件路径
filename = f"{sanitize_filename(title)} V{version}.zip"
filepath = os.path.join(folder_path, filename)
if os.path.exists(filepath):
print(f"✅ 已存在,跳过: {filename}")
return
try:
print(f"⬇️ 开始下载: {filename}")
with requests.get(download_url, stream=True, timeout=60) as r:
r.raise_for_status()
with open(filepath, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
print(f"✅ 下载完成: {filename}")
except Exception as e:
print(f"❌ 下载失败: {filename} - {e}")
# 模拟人类行为:随机等待
time.sleep(random.uniform(1.5, 4.0))
# === 启动批量下载 ===
idx = 0
for resource in tqdm(resources, desc="处理资源"):
if idx < 156:
idx += 1
continue
if "download" in resource and resource["download"].endswith(".zip"):
download_zip(resource)
idx += 1
print("\n✅ 所有资源处理完成")