Open Source

This commit is contained in:
2025-04-22 09:11:40 +08:00
commit a4bf39a958
14 changed files with 1043 additions and 0 deletions

View File

@@ -0,0 +1,61 @@
import os
import json
import requests
import time
import random
from urllib.parse import urlparse, unquote
from tqdm import tqdm
# ========== 配置 ==========
json_path = "kenney_data.json" # JSON 数据路径
output_root = "kenney_assets_images" # 存储根目录
headers = {"User-Agent": "Mozilla/5.0"}
# ========== 工具函数 ==========
def sanitize_filename(name):
return "".join(c for c in name if c.isalnum() or c in "._- ()").strip()
def download_image(url, save_path):
if os.path.exists(save_path):
print(f"✅ 已存在,跳过: {save_path}")
return
try:
response = requests.get(url, stream=True, timeout=30)
with open(save_path, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
print(f"✅ 下载完成: {save_path}")
except Exception as e:
print(f"❌ 下载失败: {url} - {e}")
time.sleep(random.uniform(1.5, 4.0)) # 模拟人类访问
# ========== 加载 JSON ==========
with open(json_path, "r", encoding="utf-8") as f:
resources = json.load(f)
# ========== 批量处理 ==========
for entry in tqdm(resources, desc="处理资源"):
title = entry["title"]
category = entry["properties"].get("Category", ["Uncategorized"])[0]
series = entry["properties"].get("Series", [None])[0]
images = entry.get("images", [])
# 构建路径Category/Series/Title/
path = os.path.join(output_root, sanitize_filename(category))
if series:
path = os.path.join(path, sanitize_filename(series))
path = os.path.join(path, sanitize_filename(title))
os.makedirs(path, exist_ok=True)
for img_url in images:
parsed_url = urlparse(img_url)
img_name = os.path.basename(parsed_url.path)
img_name = unquote(img_name) # 处理 URL 编码,如 %20 => 空格
img_path = os.path.join(path, img_name)
download_image(img_url, img_path)
print("\n🎉 所有图片处理完成!")