Open Source

2025-04-22 09:11:40 +08:00
commit a4bf39a958
14 changed files with 1043 additions and 0 deletions
--- a/image_downloader.py
+++ b/image_downloader.py
@@ -0,0 +1,61 @@
+import os
+import json
+import requests
+import time
+import random
+from urllib.parse import urlparse, unquote
+from tqdm import tqdm
+
+# ========== 配置 ==========
+json_path = "kenney_data.json"         # JSON 数据路径
+output_root = "kenney_assets_images"   # 存储根目录
+headers = {"User-Agent": "Mozilla/5.0"}
+
+# ========== 工具函数 ==========
+
+
+def sanitize_filename(name):
+    return "".join(c for c in name if c.isalnum() or c in "._- ()").strip()
+
+
+def download_image(url, save_path):
+    if os.path.exists(save_path):
+        print(f"✅ 已存在，跳过: {save_path}")
+        return
+    try:
+        response = requests.get(url, stream=True, timeout=30)
+        with open(save_path, "wb") as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+        print(f"✅ 下载完成: {save_path}")
+    except Exception as e:
+        print(f"❌ 下载失败: {url} - {e}")
+    time.sleep(random.uniform(1.5, 4.0))  # 模拟人类访问
+
+
+# ========== 加载 JSON ==========
+with open(json_path, "r", encoding="utf-8") as f:
+    resources = json.load(f)
+
+# ========== 批量处理 ==========
+for entry in tqdm(resources, desc="处理资源"):
+    title = entry["title"]
+    category = entry["properties"].get("Category", ["Uncategorized"])[0]
+    series = entry["properties"].get("Series", [None])[0]
+    images = entry.get("images", [])
+
+    # 构建路径：Category/Series/Title/
+    path = os.path.join(output_root, sanitize_filename(category))
+    if series:
+        path = os.path.join(path, sanitize_filename(series))
+    path = os.path.join(path, sanitize_filename(title))
+    os.makedirs(path, exist_ok=True)
+
+    for img_url in images:
+        parsed_url = urlparse(img_url)
+        img_name = os.path.basename(parsed_url.path)
+        img_name = unquote(img_name)  # 处理 URL 编码，如 %20 => 空格
+        img_path = os.path.join(path, img_name)
+        download_image(img_url, img_path)
+
+print("\n🎉 所有图片处理完成！")