Open Source
This commit is contained in:
72
scrapper/2 asset_downloader.py
Normal file
72
scrapper/2 asset_downloader.py
Normal file
@@ -0,0 +1,72 @@
|
||||
import os
|
||||
import json
|
||||
import requests
|
||||
import time
|
||||
import random
|
||||
from tqdm import tqdm
|
||||
|
||||
# === 配置路径 ===
|
||||
json_path = "kenney_data.json" # JSON 数据路径
|
||||
output_dir = "kenney_assets" # 下载根目录
|
||||
|
||||
# === 加载 JSON 数据 ===
|
||||
with open(json_path, "r", encoding="utf-8") as f:
|
||||
resources = json.load(f)
|
||||
|
||||
# === 工具函数 ===
|
||||
|
||||
|
||||
def sanitize_filename(name):
|
||||
return "".join(c for c in name if c.isalnum() or c in "._- ()").strip()
|
||||
|
||||
|
||||
def download_zip(entry):
|
||||
title = entry["title"]
|
||||
version = entry["changelog"][0]["version"] if entry["changelog"] else "1.0"
|
||||
download_url = entry.get("download")
|
||||
|
||||
# 提取分类、系列
|
||||
category = entry["properties"].get("Category", ["Uncategorized"])[0]
|
||||
series = entry["properties"].get("Series", [None])[0]
|
||||
|
||||
# 构建目录结构
|
||||
folder_path = os.path.join(output_dir, sanitize_filename(category))
|
||||
if series:
|
||||
folder_path = os.path.join(folder_path, sanitize_filename(series))
|
||||
os.makedirs(folder_path, exist_ok=True)
|
||||
|
||||
# 构建文件路径
|
||||
filename = f"{sanitize_filename(title)} V{version}.zip"
|
||||
filepath = os.path.join(folder_path, filename)
|
||||
|
||||
if os.path.exists(filepath):
|
||||
print(f"✅ 已存在,跳过: {filename}")
|
||||
return
|
||||
|
||||
try:
|
||||
print(f"⬇️ 开始下载: {filename}")
|
||||
with requests.get(download_url, stream=True, timeout=60) as r:
|
||||
r.raise_for_status()
|
||||
with open(filepath, "wb") as f:
|
||||
for chunk in r.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
print(f"✅ 下载完成: {filename}")
|
||||
except Exception as e:
|
||||
print(f"❌ 下载失败: {filename} - {e}")
|
||||
|
||||
# 模拟人类行为:随机等待
|
||||
time.sleep(random.uniform(1.5, 4.0))
|
||||
|
||||
|
||||
# === 启动批量下载 ===
|
||||
idx = 0
|
||||
for resource in tqdm(resources, desc="处理资源"):
|
||||
if idx < 156:
|
||||
idx += 1
|
||||
continue
|
||||
if "download" in resource and resource["download"].endswith(".zip"):
|
||||
download_zip(resource)
|
||||
idx += 1
|
||||
|
||||
print("\n✅ 所有资源处理完成")
|
||||
Reference in New Issue
Block a user