refactor(core): replace JSON-based pipeline with MySQL database

This commit introduces a major architectural overhaul, migrating the data backend from a flat JSON file to a relational MySQL database. - Replaced multiple scraping scripts with a unified `main.py` that handles crawling, parsing, and database synchronization. - Introduced `mysql_helper.py` for robust database interaction with a connection pool. - Added `queries.sql` defining the new database schema for assets, categories, tags, and changelogs. - Removed all obsolete frontend code (v1, v2) and old scraping scripts. This change provides a more scalable and maintainable foundation for managing asset data.
2025-09-14 23:11:00 +08:00
parent a8dd020c98
commit ad4462ef8a
18 changed files with 588 additions and 893 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -50,3 +50,6 @@ pnpm-debug.log*
 *.zip
 *.tar.gz
 *.rar
+
+# Output
+media
--- a/all_asset_infos.json
+++ b/all_asset_infos.json
--- a/all_asset_infos_detailed.json
+++ b/all_asset_infos_detailed.json
--- a/frontend/v1/index.html
+++ b/frontend/v1/index.html
@@ -1,67 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-  <head>
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <meta charset="UTF-8" />
-    <title>Kenney Asset Gallery</title>
-    <link
-      rel="stylesheet"
-      href="https://cdnjs.cloudflare.com/ajax/libs/lightgallery/2.8.3/css/lightgallery.min.css"
-      integrity="sha512-QMCloGTsG2vNSnHcsxYTapI6pFQNnUP6yNizuLL5Wh3ha6AraI6HrJ3ABBaw6SIUHqlSTPQDs/SydiR98oTeaQ=="
-      crossorigin="anonymous"
-      referrerpolicy="no-referrer"
-    />
-    <script
-      src="https://cdnjs.cloudflare.com/ajax/libs/lightgallery/2.8.3/lightgallery.min.js"
-      integrity="sha512-n02TbYimj64qb98ed5WwkNiSw/i9Xlvv4Ehvhg0jLp3qMAMWCYUHbOMbppZ0vimtyiyw9NqNqxUZC4hq86f4aQ=="
-      crossorigin="anonymous"
-      referrerpolicy="no-referrer"
-    ></script>
-    <link
-      rel="stylesheet"
-      href="https://cdnjs.cloudflare.com/ajax/libs/lightgallery/2.8.3/css/lg-zoom.min.css"
-      integrity="sha512-S/hU6dGSK3D7SRpCvRF/IEufIr6Ikgp5vDiJarhdeFGEnw36hWZ6gVBjnwBbzjA+NEP7D8Gdm+5LL1HEsyiB1w=="
-      crossorigin="anonymous"
-      referrerpolicy="no-referrer"
-    />
-    <script
-      src="https://cdnjs.cloudflare.com/ajax/libs/lightgallery/2.8.3/plugins/zoom/lg-zoom.min.js"
-      integrity="sha512-fwxc/NvaA3du4ZRE6J/Ilrqi2xwOB1QfHBR4neA+ha13/pkweiRfPgBiV4VbfAf/Vi3rXAXdQ3zexUJ1V2bWrg=="
-      crossorigin="anonymous"
-      referrerpolicy="no-referrer"
-    ></script>
-    <link
-      rel="stylesheet"
-      href="https://cdnjs.cloudflare.com/ajax/libs/lightgallery/2.8.3/css/lg-thumbnail.min.css"
-      integrity="sha512-rKuOh3xlF/027KUPuMok0ESsZ2zWPRzkniD3n5zZKCAtbiVkYw66DR4KtVAGf8dLPLr5DdyQs05BlSmEyXctkQ=="
-      crossorigin="anonymous"
-      referrerpolicy="no-referrer"
-    />
-    <script
-      src="https://cdnjs.cloudflare.com/ajax/libs/lightgallery/2.8.3/plugins/thumbnail/lg-thumbnail.min.js"
-      integrity="sha512-jZxB8WysJ6S6e4Hz5IZpAzR1WiflBl0hBxriHGlLkUN32T18+rD1aLNifa1KTll/zx8lIfWVP1NqEjHi/Khy5w=="
-      crossorigin="anonymous"
-      referrerpolicy="no-referrer"
-    ></script>
-    <link rel="stylesheet" href="style.css" />
-  </head>
-  <body>
-    <h1>🎮 Kenney Asset Gallery</h1>
-
-    <!-- 筛选栏 -->
-    <div id="filters">
-      <label for="categoryFilter">选择分类：</label>
-      <select name="categoryFilter" id="categoryFilter">
-        <option value="all">📂 所有分类</option>
-      </select>
-      <label for="tagFilter">选择标签：</label>
-      <select name="tagFilter" id="tagFilter">
-        <option value="all">🏷️ 所有标签</option>
-      </select>
-    </div>
-
-    <div id="gallery"></div>
-
-    <script src="script.js"></script>
-  </body>
-</html>
--- a/frontend/v1/script.js
+++ b/frontend/v1/script.js
@@ -1,98 +0,0 @@
-let allData = [];
-
-function sanitize(str) {
-  return str.replaceAll("\\", "/");
-}
-
-function populateFilters(data) {
-  const catSet = new Set();
-  const tagSet = new Set();
-
-  data.forEach(item => {
-    catSet.add(item.properties?.Category?.[0]);
-    (item.properties?.Tags || []).forEach(tag => tagSet.add(tag));
-  });
-
-  const catFilter = document.getElementById("categoryFilter");
-  [...catSet].sort().forEach(cat => {
-    const option = document.createElement("option");
-    option.value = cat;
-    option.textContent = cat;
-    catFilter.appendChild(option);
-  });
-
-  const tagFilter = document.getElementById("tagFilter");
-  [...tagSet].sort().forEach(tag => {
-    const option = document.createElement("option");
-    option.value = tag;
-    option.textContent = tag;
-    tagFilter.appendChild(option);
-  });
-}
-
-function render(data) {
-  const gallery = document.getElementById("gallery");
-  gallery.innerHTML = ""; // clear
-
-  data.forEach((item, index) => {
-    const images = (item.images || []).map(sanitize);
-    const tags = (item.properties?.Tags || []).join(', ');
-    const category = item.properties?.Category?.[0] || 'Uncategorized';
-    const downloadPath = sanitize(item.download);
-
-    const card = document.createElement("div");
-    card.className = "card";
-    card.setAttribute("data-category", category);
-    card.setAttribute("data-tags", tags);
-
-    const galleryGroupId = `gallery-${index}`;
-    card.innerHTML = `
-      <div class="lg-gallery" id="${galleryGroupId}">
-        <a href="${images[0]}" data-lg-size="1400-800">
-          <img src="${images[0]}" alt="${item.title}">
-        </a>
-        ${images.slice(1).map(img => `
-          <a href="${img}" data-lg-size="1400-800" style="display:none;"></a>
-        `).join("")}
-      </div>
-      <div class="card-body">
-        <div class="card-title">${item.title}</div>
-        <div class="card-tags">Tags: ${tags}</div>
-        <div class="card-footer">
-          <a class="download-btn" href="${downloadPath}" download>⬇️ 下载资源</a>
-        </div>
-      </div>
-    `;
-    gallery.appendChild(card);
-
-    // 初始化 lightGallery
-    lightGallery(document.getElementById(galleryGroupId), {
-      selector: 'a',
-      thumbnail: true,
-      zoom: true
-    });
-  });
-}
-
-function filterGallery() {
-  const cat = document.getElementById("categoryFilter").value;
-  const tag = document.getElementById("tagFilter").value;
-
-  const filtered = allData.filter(item => {
-    const matchCat = (cat === 'all') || (item.properties?.Category?.[0] === cat);
-    const matchTag = (tag === 'all') || (item.properties?.Tags || []).includes(tag);
-    return matchCat && matchTag;
-  });
-
-  render(filtered);
-}
-
-fetch("data/kenney_data_local.json")
-  .then(res => res.json())
-  .then(data => {
-    allData = data;
-    populateFilters(data);
-    render(data);
-    document.getElementById("categoryFilter").addEventListener("change", filterGallery);
-    document.getElementById("tagFilter").addEventListener("change", filterGallery);
-  });
--- a/frontend/v1/style.css
+++ b/frontend/v1/style.css
@@ -1,82 +0,0 @@
-body {
-  font-family: "Segoe UI", sans-serif;
-  background-color: #f0f2f5;
-  margin: 0;
-  padding: 2rem;
-  color: #333;
-}
-
-h1 {
-  text-align: center;
-  margin-bottom: 2rem;
-  color: #444;
-}
-
-#gallery {
-  display: grid;
-  grid-template-columns: repeat(auto-fit, minmax(320px, 1fr));
-  gap: 1.5rem;
-}
-
-.card {
-  background: white;
-  border-radius: 12px;
-  box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
-  overflow: hidden;
-  transition: transform 0.2s;
-}
-
-.card:hover {
-  transform: translateY(-5px);
-}
-
-.card img {
-  width: 100%;
-  height: 200px;
-  object-fit: cover;
-}
-
-.card-body {
-  padding: 1rem;
-}
-
-.card-title {
-  font-size: 1.2rem;
-  margin-bottom: 0.5rem;
-}
-
-.card-tags {
-  font-size: 0.85rem;
-  color: #666;
-}
-
-.card-footer {
-  margin-top: 1rem;
-}
-
-.download-btn {
-  display: inline-block;
-  padding: 0.4rem 0.8rem;
-  background: #4caf50;
-  color: white;
-  border-radius: 6px;
-  text-decoration: none;
-  font-size: 0.9rem;
-  transition: background 0.2s;
-}
-
-.download-btn:hover {
-  background: #45a049;
-}
-
-#filters {
-  display: flex;
-  justify-content: center;
-  gap: 1rem;
-  margin-bottom: 1.5rem;
-}
-
-select {
-  padding: 0.5rem;
-  font-size: 1rem;
-}
--- a/frontend/v2/index.html
+++ b/frontend/v2/index.html
@@ -1,41 +0,0 @@
-<!DOCTYPE html>
-<html lang="zh-CN">
-  <head>
-    <meta charset="UTF-8" />
-    <title>Kenney 资源库</title>
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/lightgallery/2.8.3/css/lightgallery.min.css" integrity="sha512-QMCloGTsG2vNSnHcsxYTapI6pFQNnUP6yNizuLL5Wh3ha6AraI6HrJ3ABBaw6SIUHqlSTPQDs/SydiR98oTeaQ==" crossorigin="anonymous" referrerpolicy="no-referrer" />
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/lightgallery/2.8.3/lightgallery.min.js" integrity="sha512-n02TbYimj64qb98ed5WwkNiSw/i9Xlvv4Ehvhg0jLp3qMAMWCYUHbOMbppZ0vimtyiyw9NqNqxUZC4hq86f4aQ==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
-    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/lightgallery/2.8.3/css/lg-zoom.min.css" integrity="sha512-S/hU6dGSK3D7SRpCvRF/IEufIr6Ikgp5vDiJarhdeFGEnw36hWZ6gVBjnwBbzjA+NEP7D8Gdm+5LL1HEsyiB1w==" crossorigin="anonymous" referrerpolicy="no-referrer" />
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/lightgallery/2.8.3/plugins/zoom/lg-zoom.min.js" integrity="sha512-fwxc/NvaA3du4ZRE6J/Ilrqi2xwOB1QfHBR4neA+ha13/pkweiRfPgBiV4VbfAf/Vi3rXAXdQ3zexUJ1V2bWrg==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
-    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/lightgallery/2.8.3/css/lg-thumbnail.min.css" integrity="sha512-rKuOh3xlF/027KUPuMok0ESsZ2zWPRzkniD3n5zZKCAtbiVkYw66DR4KtVAGf8dLPLr5DdyQs05BlSmEyXctkQ==" crossorigin="anonymous" referrerpolicy="no-referrer" />
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/lightgallery/2.8.3/plugins/thumbnail/lg-thumbnail.min.js" integrity="sha512-jZxB8WysJ6S6e4Hz5IZpAzR1WiflBl0hBxriHGlLkUN32T18+rD1aLNifa1KTll/zx8lIfWVP1NqEjHi/Khy5w==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
-    <link rel="stylesheet" href="style.css" />
-  </head>
-  <body>
-    <div class="container">
-      <aside class="sidebar">
-        <input type="text" id="searchInput" placeholder="🔍 搜索资源名称..." />
-        <ul id="resourceList"></ul>
-      </aside>
-
-      <main class="content">
-        <div id="details" class="hidden">
-          <h2 id="detailTitle"></h2>
-          <div id="detailTags"></div>
-          <div id="galleryWrapper"></div>
-          <div id="downloadBlock"></div>
-          <div id="versionTableWrapper"></div>
-        </div>
-        <div id="placeholder" class="placeholder">
-          ← 请选择左侧资源以查看详情
-        </div>
-      </main>
-    </div>
-
-    <script src="libs/lightgallery.min.js"></script>
-    <script src="libs/lg-thumbnail.min.js"></script>
-    <script src="libs/lg-zoom.min.js"></script>
-    <script src="script.js"></script>
-  </body>
-</html>
--- a/frontend/v2/script.js
+++ b/frontend/v2/script.js
@@ -1,108 +0,0 @@
-let allData = [];
-let currentActive = null;
-
-// 定义一个函数，用于将字符串中的反斜杠替换为正斜杠
-function sanitize(str) {
-  return str.replaceAll("\\", "/");
-}
-
-function renderList(data) {
-  const list = document.getElementById("resourceList");
-  list.innerHTML = "";
-
-  data.forEach((item, index) => {
-    const li = document.createElement("li");
-    li.setAttribute("data-index", index);
-
-    const thumb = item.images?.[0] ? sanitize(item.images[0]) : "";
-    const name = item.title;
-    const assets = item.properties?.Assets || "未知";
-
-    li.innerHTML = `
-      <img src="${thumb}" class="thumb" alt="">
-      <div>
-        <div><strong>${name}</strong></div>
-        <div style="font-size:0.85rem; color: #666;">素材量: ${assets}</div>
-      </div>
-    `;
-
-    li.addEventListener("click", () => showDetails(item, li));
-    list.appendChild(li);
-  });
-}
-
-function showDetails(item, li) {
-  if (currentActive) currentActive.classList.remove("active");
-  currentActive = li;
-  currentActive.classList.add("active");
-
-  document.getElementById("placeholder").classList.add("hidden");
-  document.getElementById("details").classList.remove("hidden");
-
-  document.getElementById("detailTitle").textContent = item.title;
-  document.getElementById("detailTags").textContent = `分类: ${
-    item.properties?.Category?.[0] || "N/A"
-  } | 标签: ${(item.properties?.Tags || []).join(", ")}`;
-
-  // 下载链接
-  const download = sanitize(item.download);
-  document.getElementById(
-    "downloadBlock"
-  ).innerHTML = `<a class="download-btn" href="${download}" download>⬇️ 下载资源</a>`;
-
-  // 图集
-  const gallery = document.createElement("div");
-  gallery.id = "gallery";
-  item.images?.forEach((img) => {
-    img = sanitize(img);
-    const a = document.createElement("a");
-    a.href = img;
-    a.innerHTML = `<img src="${img}" alt="">`;
-    gallery.appendChild(a);
-  });
-  const galleryWrapper = document.getElementById("galleryWrapper");
-  galleryWrapper.innerHTML = "";
-  galleryWrapper.appendChild(gallery);
-
-  lightGallery(gallery, {
-    selector: "a",
-    thumbnail: true,
-    zoom: true,
-  });
-
-  // 版本信息
-  const versionBlock = document.getElementById("versionTableWrapper");
-  if (item.changelog?.length > 0) {
-    let table = `<table><tr><th>日期</th><th>版本</th><th>描述</th></tr>`;
-    item.changelog.forEach((row) => {
-      table += `<tr><td>${row.date}</td><td>${row.version}</td><td>${
-        row.description || ""
-      }</td></tr>`;
-    });
-    table += `</table>`;
-    versionBlock.innerHTML = table;
-  } else {
-    versionBlock.innerHTML = "";
-  }
-}
-
-function handleSearch() {
-  const keyword = document
-    .getElementById("searchInput")
-    .value.trim()
-    .toLowerCase();
-  const filtered = allData.filter((item) =>
-    item.title.toLowerCase().includes(keyword)
-  );
-  renderList(filtered);
-}
-
-fetch("data/kenney_data_local.json")
-  .then((res) => res.json())
-  .then((data) => {
-    allData = data;
-    renderList(data);
-    document
-      .getElementById("searchInput")
-      .addEventListener("input", handleSearch);
-  });
--- a/frontend/v2/style.css
+++ b/frontend/v2/style.css
@@ -1,115 +0,0 @@
-body {
-  margin: 0;
-  font-family: "Segoe UI", sans-serif;
-  background-color: #f0f2f5;
-}
-
-.container {
-  display: flex;
-  height: 100vh;
-}
-
-.sidebar {
-  width: 320px;
-  background: #fff;
-  border-right: 1px solid #ddd;
-  padding: 1rem;
-  overflow-y: auto;
-}
-
-.sidebar input {
-  width: 100%;
-  padding: 0.5rem;
-  margin-bottom: 1rem;
-  font-size: 1rem;
-}
-
-.sidebar ul {
-  list-style: none;
-  padding: 0;
-  margin: 0;
-}
-
-.sidebar li {
-  padding: 0.5rem;
-  margin-bottom: 0.5rem;
-  cursor: pointer;
-  border-radius: 6px;
-  display: flex;
-  align-items: center;
-  gap: 1rem;
-  transition: background 0.2s;
-}
-
-.sidebar li:hover,
-.sidebar li.active {
-  background-color: #e6f7ff;
-}
-
-.sidebar img.thumb {
-  width: 48px;
-  height: 48px;
-  object-fit: cover;
-  border-radius: 4px;
-}
-
-.content {
-  flex: 1;
-  padding: 2rem;
-  overflow-y: auto;
-}
-
-.placeholder {
-  font-size: 1.2rem;
-  color: #999;
-}
-
-.hidden {
-  display: none;
-}
-
-#galleryWrapper {
-  margin-top: 1rem;
-}
-
-#galleryWrapper a img {
-  height: 120px;
-  margin: 5px;
-  object-fit: cover;
-  border-radius: 4px;
-}
-
-#detailTags {
-  margin-bottom: 1rem;
-  color: #666;
-  font-size: 0.9rem;
-}
-
-#downloadBlock {
-  margin: 1rem 0;
-}
-
-.download-btn {
-  padding: 0.5rem 1rem;
-  background-color: #4caf50;
-  color: white;
-  border-radius: 6px;
-  text-decoration: none;
-  font-size: 0.95rem;
-}
-
-.download-btn:hover {
-  background-color: #45a049;
-}
-
-#versionTableWrapper table {
-  margin-top: 1rem;
-  width: 100%;
-  border-collapse: collapse;
-}
-
-#versionTableWrapper th,
-#versionTableWrapper td {
-  padding: 0.5rem;
-  border: 1px solid #ccc;
-}
--- a/main.py
+++ b/main.py
@@ -0,0 +1,364 @@
+import os
+import re
+import requests
+import random
+import time
+import json
+from datetime import datetime
+from mysql_helper import MySQLHelper
+from bs4 import BeautifulSoup
+from tqdm import tqdm, trange
+
+KENNEY_ASSET_URL = "https://www.kenney.nl/assets/"
+
+def get_headers():
+    """生成随机 UA 的请求头"""
+    # 一些常见的桌面浏览器 UA 列表（可以自己扩充）
+    USER_AGENTS = [
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+        "AppleWebKit/537.36 (KHTML, like Gecko) "
+        "Chrome/126.0.0.0 Safari/537.36",
+
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
+        "AppleWebKit/605.1.15 (KHTML, like Gecko) "
+        "Version/17.3 Safari/605.1.15",
+
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) "
+        "Gecko/20100101 Firefox/128.0"
+    ]
+    return {
+        "User-Agent": random.choice(USER_AGENTS),
+        "Accept": (
+            "text/html,application/xhtml+xml,application/xml;"
+            "q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"
+        ),
+        "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
+        "Accept-Encoding": "gzip, deflate, br",
+        "Connection": "keep-alive",
+        "Referer": "https://www.google.com/",
+        "Upgrade-Insecure-Requests": "1"
+    }
+
+class SmartCrawler:
+    def __init__(self, delay_range=(1, 3)):
+        self.session = requests.Session()
+        self.delay_range = delay_range
+        self.headers = get_headers()
+
+    def get(self, url):
+        time.sleep(random.uniform(*self.delay_range))
+        return self.session.get(url, headers=self.headers)
+
+def parse_date(date_str):
+    """将 dd/mm/yyyy 转为 yyyy-mm-dd 格式"""
+    return datetime.strptime(date_str, "%d/%m/%Y").strftime("%Y-%m-%d")
+
+def get_total_pages() -> int:
+    """Get total asset pages"""
+    crawler = SmartCrawler()
+    resp = crawler.get(KENNEY_ASSET_URL)
+    soup = BeautifulSoup(resp.text, "lxml")
+    result = 0
+    last_page_button_selector = "#content > section > div > div.row.text-center.margin-top > div > ul > li:last-child > a"
+    last_page_button = soup.select_one(last_page_button_selector)
+    # Should obtain element like this:
+    # <a class="icon" href="https://www.kenney.nl/assets/page:13"> ...
+    # obtain the href attribute, and extract the number after "page:"
+    if last_page_button and "href" in last_page_button.attrs:
+        page_link = last_page_button["href"]
+        splitted_page_link = page_link.split(':')
+        if splitted_page_link[-1].isdigit():
+            result = int(splitted_page_link[-1])
+        else:
+            raise ValueError(f"Expected int in page number, got {splitted_page_link[-1]}")
+    else:
+        raise Exception("Last page button or its href attribute not found")
+    return result
+
+def get_all_asset_infos(total_pages: int) -> list[dict[str, str]]:
+    result = []
+    for page in trange(1, total_pages + 1, desc="Fetching all assets' page links"):
+        asset_page_url = KENNEY_ASSET_URL + f"page:{page}?search=&sort=release"
+        crawler = SmartCrawler(delay_range=(1, 3))
+        resp = crawler.get(asset_page_url)
+        soup = BeautifulSoup(resp.text, "lxml")
+        contents_selector = "#content > section > div > div:nth-of-type(1)"
+        contents_div = soup.select_one(contents_selector)
+        if contents_div:
+            item_divs = contents_div.find_all("div", recursive=False)
+            for item_div in item_divs:
+                # We may get these info from the grid
+                asset_info = {
+                    "name": "",
+                    "category": "",
+                    "series": "",
+                    "page_link": ""
+                }
+                h2_tag = item_div.find("h2")
+                asset_info["name"] = h2_tag.text if h2_tag else None
+                a_tags = item_div.find_all("a")
+                asset_info["category"] = a_tags[2].text if len(a_tags) > 2 else None
+                asset_info["series"] = a_tags[3].text if len(a_tags) > 3 else None
+                asset_info["page_link"] = a_tags[1]["href"] if "href" in a_tags[0].attrs else None
+                result.append(asset_info)
+    return result
+
+def get_asset_pack_info(asset: dict[str, ]) -> None:
+    crawler = SmartCrawler()
+    resp = crawler.get(asset["page_link"])
+    soup = BeautifulSoup(resp.text, "lxml")
+
+    properties = {}
+    prop_table = soup.select_one("#content > section > div > div > div.col-md-6.text-left > table:nth-of-type(1) > tbody")
+    if prop_table:
+        for row in prop_table.find_all("tr"):
+            cols = row.find_all('td')
+            if len(cols) == 2:
+                key = cols[0].text.strip().rstrip(':')
+                value_links = cols[1].find_all('a')
+                if value_links:
+                    value = [a.text.strip() for a in value_links]
+                else:
+                    value = cols[1].text.strip()
+                properties[key] = value
+    asset["tags"] = properties.get("Tags", [])
+    
+    zip_link = None
+    for a_tag in soup.find_all("a", href=True):
+        href = a_tag["href"]
+        if href.endswith(".zip"):
+            zip_link = "https://www.kenney.nl" + \
+                href if href.startswith("/") else href
+            break
+    
+    changelog = []
+    update_table = soup.select_one(
+        '#content > section > div > div > div:nth-of-type(1) > table:nth-of-type(2) > tbody')
+    if update_table:
+        for idx, row in enumerate(update_table.find_all('tr')):
+            cols = row.find_all('td')
+            if len(cols) == 2:
+                date = cols[0].text.strip()
+                spans = cols[1].find_all('span')
+                version = spans[0].text.strip() if len(spans) >= 1 else ''
+                description = spans[1].text.strip() if len(spans) >= 2 else ''
+                version_info = {
+                    'date': parse_date(date),
+                    'version': version,
+                    'description': description,
+                    'files': 0,
+                    'feat_animation': False,
+                    'feat_variation': False,
+                    'orig_file_link': None
+                }
+                if idx == 0:
+                    # Latest version
+                    version_info["files"] = int(''.join(ch for ch in properties["Files"] if ch.isdigit())) if "Files" in properties else 0
+                    version_info["feat_animation"] = "Animation" in properties["Features"]
+                    version_info["feat_variation"] = "Variation" in properties["Features"]
+                    version_info["orig_file_link"] = zip_link if zip_link else None
+                changelog.append(version_info)
+    changelog.reverse()
+    asset["changelog"] = changelog
+    asset["released_at"] = changelog[-1]["date"]
+    asset["updated_at"] = changelog[0]["date"]
+    
+    images = []
+    # 封面图（Cover）
+    cover_img = soup.select_one(
+        '#content > section > div > div > div:nth-of-type(2) > a > img')
+    if cover_img and cover_img.get("src"):
+        cover_url = cover_img["src"]
+        if cover_url.startswith("/"):
+            cover_url = "https://www.kenney.nl" + cover_url
+        images.append(cover_url)
+    # 图集中的图像
+    gallery_divs = soup.select(
+        '#content > section > div > div > div:nth-of-type(2) > div > div')
+    for div in gallery_divs:
+        img_tag = div.select_one("a > img")
+        if img_tag and img_tag.get("src"):
+            img_url = img_tag["src"]
+            if img_url.startswith("/"):
+                img_url = "https://www.kenney.nl" + img_url
+            images.append(img_url)
+    asset['images'] = images
+    
+def sync_table(
+    db,
+    table_name: str,
+    column_name: str,
+    items: set[str],
+) -> dict[str, int]:
+    """同步唯一字段数据到指定表，并返回 name -> id 的映射"""
+    # 从数据库读取已存在的记录
+    saved_records = db.fetch_all(f"SELECT * FROM {table_name}")
+    saved_names = {r[column_name] for r in saved_records}
+
+    # 找出缺失项
+    missing_items = sorted(items - saved_names)
+
+    # 插入缺失项（假设 id 是自增，不需要手动计算）
+    if missing_items:
+        insert_sql = f"INSERT INTO {table_name} ({column_name}) VALUES (%s)"
+        with db.get_conn() as conn:
+            cursor = conn.cursor()
+            cursor.executemany(insert_sql, [(name,) for name in missing_items])
+            conn.commit()
+            cursor.close()
+
+    # 重新获取完整映射（保证 ID 正确）
+    final_records = db.fetch_all(f"SELECT * FROM {table_name}")
+    return {r[column_name]: r["id"] for r in final_records}
+
+def build_id_map(db, table, name_field, values):
+    return sync_table(db, table, name_field, {v for v in values if v})
+
+def build_insert_sql(table: str, columns: list[str]):
+    cols_str = ', '.join(columns)
+    placeholders = ', '.join(['%s'] * len(columns))
+    return f"INSERT INTO {table} ({cols_str}) VALUES ({placeholders})"
+
+allowed_path_pattern = re.compile(r'[^a-zA-Z0-9._-]')
+def sanitize_path(path: str):
+    if not path:
+        return None
+    return '_'.join([allowed_path_pattern.sub('', word.lower()) for word in path.split()])
+
+def main() -> None:    
+    # total_pages = get_total_pages()
+    # all_asset_infos = get_all_asset_infos(total_pages)
+    
+    # # Let the oldest become first in the array to make sure it can be inserted into database first
+    # all_asset_infos.reverse()
+    
+    # for asset in tqdm(all_asset_infos, "Fetching asset pack info"):
+    #     get_asset_pack_info(asset)
+    
+    all_asset_infos: list[dict[str, ]] = json.load(open("all_asset_infos_detailed.json"))
+    # for asset in all_asset_infos:
+    #     asset["name"] = None if asset["name"] == "" else asset["name"]
+    #     asset["category"] = None if asset["category"] == "" else asset["category"]
+    #     asset["series"] = None if asset["series"] == "" else asset["series"]
+    #     asset["page_link"] = None if asset["page_link"] == "" else asset["page_link"]
+    #     # for log in asset["changelog"]:
+    #     #     log['files'] = 0 if "files" not in log else log["files"]
+    #     #     log['feat_animation'] = False if "feat_animation" not in log else log["feat_animation"]
+    #     #     log['feat_variation'] = False if "feat_variation" not in log else log["feat_variation"]
+    #     #     log['orig_file_link'] = asset["download"] if log["files"] != 0 else None
+    #     # asset.pop('download')
+    # json.dump(all_asset_infos, open("all_asset_infos_detailed_fix.json", "w"))
+    # exit()
+    
+    # Download file and save to database
+    output_dir = "media"
+    
+    for asset in tqdm(all_asset_infos, "Downloading assets and images"):
+        asset_name = sanitize_path(asset["name"])
+        asset_category = sanitize_path(asset["category"])
+        asset_version = asset["changelog"][-1]["version"]
+        asset_dir_path = os.path.join(output_dir, asset_category)
+        asset_dir_path = os.path.join(asset_dir_path, asset_name)
+        asset["base_asset_path"] = asset_dir_path
+        os.makedirs(asset_dir_path, exist_ok=True)
+        filename = f"{asset_name} V{asset_version}.zip"
+        filepath = os.path.join(asset_dir_path, filename)
+        if os.path.exists(filepath):
+            print(f"✅ {filename} exists, skipping...")
+        else:
+            try:
+                download_url = asset["changelog"][-1]["orig_file_link"]
+                with requests.get(download_url, headers=get_headers()) as resp:
+                    resp.raise_for_status()
+                    with open(filepath, "wb") as f:
+                        for chunk in resp.iter_content(chunk_size=8192):
+                            if chunk:
+                                f.write(chunk)
+                print(f"✅ {filename} download completed.")
+            except Exception as e:
+                print(f"❌ Download failed: {filename} - {e}")
+        time.sleep(random.uniform(1.5, 4.0))
+        # Download asset file
+    exit()
+    
+    # Initialize database
+    db = MySQLHelper(
+        "10.147.20.103",
+        "kenney-assets",
+        "9a77caa2a5c705db7e8a93c6a3fbc46a",
+        "kenney_assets"
+    )
+    
+    # Build mapping tables
+    category_id_map = build_id_map(db, "category", "name", (a["category"] for a in all_asset_infos))
+    series_id_map   = build_id_map(db, "series", "name", (a["series"] for a in all_asset_infos))
+    tags_id_map     = build_id_map(db, "tag", "name", (tag for a in all_asset_infos for tag in (a.get("tags") or [])))
+    
+    asset_pack_sql = build_insert_sql("asset_pack", ["name", "category_id", "series_id", "released_at", "updated_at", "base_asset_path", "orig_page_link"])
+    asset_pack_values: list[tuple] = []
+    # 给每个 asset 添加 category_id 和 series_id
+    for asset in all_asset_infos:
+        asset["category_id"] = category_id_map.get(asset.get("category"))
+        asset["series_id"] = series_id_map.get(asset.get("series"))
+        asset_pack_values.append((
+            asset["name"], 
+            asset["category_id"], 
+            asset["series_id"], 
+            asset["released_at"], 
+            asset["updated_at"],
+            asset["base_asset_path"],
+            asset["page_link"]
+        ))
+    db.bulk_insert(asset_pack_sql, asset_pack_values)
+    
+    # Get asset_pack id map
+    asset_pack_records = db.fetch_all(f"SELECT id, name FROM asset_pack")
+    asset_pack_id_map = {r["name"]: r["id"] for r in asset_pack_records}
+    
+    asset_pack_tag_sql = build_insert_sql("asset_pack_tag", ["asset_pack_id", "tag_id"])
+    asset_pack_tag_values: list[tuple] = []
+    
+    update_log_sql = build_insert_sql("update_log", ["asset_pack_id", "released_date", "version", "description", "files_count", "feat_animations", "feat_variations", "orig_download_link"])
+    update_log_values: list[tuple] = []
+    
+    asset_pack_image_sql = build_insert_sql("asset_pack_image", ["asset_pack_id", "orig_file_link"])
+    asset_pack_image_values: list[tuple] = []
+    
+    for asset in tqdm(all_asset_infos, "Preparing data to database"):
+        asset_pack_id = asset_pack_id_map.get(asset.get("name"))
+        tags = asset.get("tags")
+        for tag in tags:
+            asset_pack_tag_values.append((
+                asset_pack_id,
+                tags_id_map[tag]
+            ))
+        changelog = asset.get("changelog")
+        for log in changelog:
+            update_log_values.append((
+                asset_pack_id,
+                log["date"],
+                log["version"],
+                log["description"],
+                log["files"],
+                log["feat_animation"],
+                log["feat_variation"],
+                log["orig_file_link"]
+            ))
+        images = asset.get("images")
+        for image in images:
+            asset_pack_image_values.append((
+                asset_pack_id,
+                image
+            ))
+
+    with db.get_conn() as conn:
+        cursor = conn.cursor()
+        cursor.executemany(asset_pack_tag_sql, asset_pack_tag_values)
+        cursor.executemany(update_log_sql, update_log_values)
+        cursor.executemany(asset_pack_image_sql, asset_pack_image_values)
+        conn.commit()
+        cursor.close()
+    
+
+if __name__ == "__main__":
+    main()
--- a/mysql_helper.py
+++ b/mysql_helper.py
@@ -0,0 +1,145 @@
+from mysql.connector import pooling, Error
+import logging
+from contextlib import contextmanager
+import time
+
+# ========== 日志配置 ==========
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s"
+)
+logger = logging.getLogger("MySQLHelper")
+
+class MySQLHelper:
+    def __init__(self, host, user, password, database, pool_size=5):
+        """
+        初始化 MySQL 连接池
+        """
+        try:
+            self.pool = pooling.MySQLConnectionPool(
+                pool_name="mypool",
+                pool_size=pool_size,
+                pool_reset_session=True,
+                host=host,
+                user=user,
+                password=password,
+                database=database,
+                charset="utf8mb4"
+            )
+            logger.info("✅ MySQL 连接池已创建，大小=%s", pool_size)
+        except Error as e:
+            logger.error("❌ 创建连接池失败: %s", e)
+            raise
+
+    @contextmanager
+    def get_conn(self):
+        """
+        获取连接并在使用完毕后释放
+        """
+        conn = None
+        try:
+            conn = self.pool.get_connection()
+            yield conn
+        except Error as e:
+            logger.error("数据库连接错误: %s", e)
+            raise
+        finally:
+            if conn:
+                conn.close()
+
+    def execute(self, sql, params=None, commit=False, retry=3):
+        """
+        执行 INSERT/UPDATE/DELETE 等操作
+        """
+        for attempt in range(1, retry + 1):
+            try:
+                with self.get_conn() as conn:
+                    cursor = conn.cursor()
+                    cursor.execute(sql, params or ())
+                    if commit:
+                        conn.commit()
+                    affected = cursor.rowcount
+                    cursor.close()
+                    return affected
+            except Error as e:
+                logger.warning("执行 SQL 失败 (尝试 %s/%s): %s", attempt, retry, e)
+                time.sleep(1)
+                if attempt == retry:
+                    raise
+
+    def fetch_all(self, sql, params=None, retry=3):
+        """
+        查询多条记录
+        """
+        for attempt in range(1, retry + 1):
+            try:
+                with self.get_conn() as conn:
+                    cursor = conn.cursor(dictionary=True)
+                    cursor.execute(sql, params or ())
+                    result = cursor.fetchall()
+                    cursor.close()
+                    return result
+            except Error as e:
+                logger.warning("查询失败 (尝试 %s/%s): %s", attempt, retry, e)
+                time.sleep(1)
+                if attempt == retry:
+                    raise
+
+    def fetch_one(self, sql, params=None, retry=3):
+        """
+        查询单条记录
+        """
+        result = self.fetch_all(sql, params, retry)
+        return result[0] if result else None
+    
+    def bulk_insert(self, sql: str, rows: list[tuple]):
+        with self.get_conn() as conn:
+            cursor = conn.cursor()
+            cursor.executemany(sql, rows)
+            conn.commit()
+            cursor.close()
+
+    @contextmanager
+    def transaction(self):
+        """
+        事务上下文管理器
+        用法:
+        with db.transaction() as cursor:
+            cursor.execute(...)
+            cursor.execute(...)
+        """
+        with self.get_conn() as conn:
+            try:
+                cursor = conn.cursor()
+                yield cursor
+                conn.commit()
+            except:
+                conn.rollback()
+                raise
+            finally:
+                cursor.close()
+
+# ================= 使用示例 =================
+if __name__ == "__main__":
+    db = MySQLHelper(
+        host="localhost",
+        user="root",
+        password="123456",
+        database="test_db",
+        pool_size=5
+    )
+
+    # 插入数据
+    db.execute("INSERT INTO users(name, age) VALUES (%s, %s)", ("Alice", 25), commit=True)
+
+    # 查询数据
+    users = db.fetch_all("SELECT * FROM users WHERE age > %s", (18,))
+    logger.info("查询结果: %s", users)
+
+    # 事务示例
+    try:
+        with db.transaction() as cur:
+            cur.execute("UPDATE users SET age = age + 1 WHERE name = %s", ("Alice",))
+            cur.execute("INSERT INTO logs(message) VALUES (%s)", ("Alice age updated",))
+    except Error as e:
+        logger.error("事务失败: %s", e)
--- a/queries.sql
+++ b/queries.sql
@@ -0,0 +1,73 @@
+SET FOREIGN_KEY_CHECKS = 0;
+DROP TABLE IF EXISTS category;
+DROP TABLE IF EXISTS series;
+DROP TABLE IF EXISTS tag;
+DROP TABLE IF EXISTS asset_pack;
+DROP TABLE IF EXISTS asset_pack_image;
+DROP TABLE IF EXISTS asset_pack_tag;
+DROP TABLE IF EXISTS update_log;
+SET FOREIGN_KEY_CHECKS = 1;
+
+CREATE TABLE category (
+  id INT PRIMARY KEY AUTO_INCREMENT,
+  name VARCHAR(100) NOT NULL UNIQUE
+);
+
+CREATE TABLE series (
+  id INT PRIMARY KEY AUTO_INCREMENT,
+  name VARCHAR(100) NOT NULL UNIQUE
+);
+
+CREATE TABLE tag (
+  id INT PRIMARY KEY AUTO_INCREMENT,
+  name VARCHAR(100) NOT NULL UNIQUE
+);
+
+CREATE TABLE asset_pack (
+  id INT PRIMARY KEY AUTO_INCREMENT,
+  name VARCHAR(255) NOT NULL UNIQUE,
+  category_id INT NOT NULL,
+  series_id INT,
+  discovered_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+  released_at DATE,
+  updated_at DATE,
+  base_asset_path VARCHAR(255),
+  orig_page_link VARCHAR(255),
+  FOREIGN KEY (category_id) REFERENCES category(id) ON DELETE RESTRICT ON UPDATE CASCADE,
+  FOREIGN KEY (series_id) REFERENCES series(id) ON DELETE SET NULL ON UPDATE CASCADE,
+  INDEX idx_name (name),
+  INDEX idx_category (category_id),
+  INDEX idx_series (series_id)
+);
+
+CREATE TABLE asset_pack_image (
+  id INT PRIMARY KEY AUTO_INCREMENT,
+  asset_pack_id INT NOT NULL,
+  image_file_name VARCHAR(255),
+  orig_file_link VARCHAR(255),
+  FOREIGN KEY (asset_pack_id) REFERENCES asset_pack(id) ON DELETE CASCADE,
+  INDEX idx_asset_pack_id (asset_pack_id)
+);
+
+CREATE TABLE asset_pack_tag (
+  asset_pack_id INT NOT NULL,
+  tag_id INT NOT NULL,
+  PRIMARY KEY (asset_pack_id, tag_id),
+  FOREIGN KEY (asset_pack_id) REFERENCES asset_pack(id) ON DELETE CASCADE,
+  FOREIGN KEY (tag_id) REFERENCES tag(id) ON DELETE CASCADE
+);
+
+CREATE TABLE update_log (
+  id INT PRIMARY KEY AUTO_INCREMENT,
+  asset_pack_id INT NOT NULL,
+  released_date DATE NOT NULL,
+  version VARCHAR(20),
+  description VARCHAR(500),
+  files_count INT UNSIGNED DEFAULT 0,
+  feat_animations TINYINT(1) DEFAULT 0,
+  feat_variations TINYINT(1) DEFAULT 0,
+  zip_file_name VARCHAR(255),
+  orig_download_link VARCHAR(255),
+  FOREIGN KEY (asset_pack_id) REFERENCES asset_pack(id) ON DELETE CASCADE,
+  INDEX idx_released_date (released_date)
+);
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,11 +0,0 @@
-beautifulsoup4==4.13.4
-certifi==2025.1.31
-charset-normalizer==3.4.1
-colorama==0.4.6
-idna==3.10
-lxml==5.3.2
-requests==2.32.3
-soupsieve==2.6
-tqdm==4.67.1
-typing_extensions==4.13.2
-urllib3==2.4.0
--- a/scrapper/1
+++ b/scrapper/1
@@ -1,136 +0,0 @@
-import requests
-import json
-from bs4 import BeautifulSoup
-from tqdm import tqdm, trange
-
-base_url = "https://www.kenney.nl/assets/page:"
-total_pages = 13
-all_links = []
-
-headers = {
-    "User-Agent": "Mozilla/5.0"
-}
-
-
-def parse_resource_page(url):
-    response = requests.get(url, headers=headers)
-    soup = BeautifulSoup(response.text, "lxml")
-    result = {}
-
-    # 资源名称
-    title_tag = soup.select_one(
-        '#content > section > div > div > div:nth-of-type(1) > h1')
-    result['title'] = title_tag.text.strip() if title_tag else 'N/A'
-
-    # 属性表
-    properties = {}
-    prop_table = soup.select_one(
-        '#content > section > div > div > div:nth-of-type(1) > table:nth-of-type(1) > tbody')
-    if prop_table:
-        for row in prop_table.find_all('tr'):
-            cols = row.find_all('td')
-            if len(cols) == 2:
-                key = cols[0].text.strip().rstrip(':')
-                value_links = cols[1].find_all('a')
-                if value_links:
-                    value = [a.text.strip() for a in value_links]
-                else:
-                    value = cols[1].text.strip()
-                properties[key] = value
-    result['properties'] = properties
-
-    # 更新记录
-    changelog = []
-    update_table = soup.select_one(
-        '#content > section > div > div > div:nth-of-type(1) > table:nth-of-type(2) > tbody')
-    if update_table:
-        for row in update_table.find_all('tr'):
-            cols = row.find_all('td')
-            if len(cols) == 2:
-                date = cols[0].text.strip()
-                spans = cols[1].find_all('span')
-                version = spans[0].text.strip() if len(spans) >= 1 else ''
-                description = spans[1].text.strip() if len(spans) >= 2 else ''
-                changelog.append({
-                    'date': date,
-                    'version': version,
-                    'description': description
-                })
-    result['changelog'] = changelog
-
-    # 下载链接
-    zip_link = None
-    for a_tag in soup.find_all("a", href=True):
-        href = a_tag["href"]
-        if href.endswith(".zip"):
-            zip_link = "https://www.kenney.nl" + \
-                href if href.startswith("/") else href
-            break
-    result['download'] = zip_link if zip_link else "N/A"
-
-    # ✅ 图片提取
-    images = []
-
-    # 封面图（Cover）
-    cover_img = soup.select_one(
-        '#content > section > div > div > div:nth-of-type(2) > a > img')
-    if cover_img and cover_img.get("src"):
-        cover_url = cover_img["src"]
-        if cover_url.startswith("/"):
-            cover_url = "https://www.kenney.nl" + cover_url
-        images.append(cover_url)
-
-    # 图集中的图像
-    gallery_divs = soup.select(
-        '#content > section > div > div > div:nth-of-type(2) > div > div')
-    for div in gallery_divs:
-        img_tag = div.select_one("a > img")
-        if img_tag and img_tag.get("src"):
-            img_url = img_tag["src"]
-            if img_url.startswith("/"):
-                img_url = "https://www.kenney.nl" + img_url
-            images.append(img_url)
-
-    result['images'] = images
-
-    return result
-
-
-# for page in range(1, total_pages + 1):
-for page in trange(1, total_pages + 1, desc="Fetching all assets' page links"):
-    url = base_url + str(page)
-    response = requests.get(url, headers=headers)
-    soup = BeautifulSoup(response.text, "lxml")
-
-    # 定位到//*[@id="content"]/section/div/div[1]
-    content_div = soup.select_one(
-        "#content > section > div > div:nth-of-type(1)")
-
-    if content_div:
-        item_divs = content_div.find_all("div", recursive=False)
-
-        for item_div in item_divs:
-            a_tag = item_div.find("a")
-            if a_tag and "href" in a_tag.attrs:
-                link = a_tag["href"]
-                full_link = link
-                all_links.append(full_link)
-
-print(f"总共提取到 {len(all_links)} 个链接 ✅")
-
-with open("kenney_links.txt", "w", encoding="utf-8") as f:
-    for link in all_links:
-        f.write(link + "\n")
-
-# 爬取页面内的信息
-all_resource_data = []
-
-# for link in all_links:
-for link in tqdm(all_links, desc="Fetching all assets' data"):
-    resource_data = parse_resource_page(link)
-    all_resource_data.append(resource_data)
-
-with open("kenney_data.json", "w", encoding="utf-8") as f:
-    json.dump(all_resource_data, f, ensure_ascii=False, indent=4)
-
-print("数据爬取完成 ✅")
--- a/asset_downloader.py
+++ b/asset_downloader.py
@@ -1,72 +0,0 @@
-import os
-import json
-import requests
-import time
-import random
-from tqdm import tqdm
-
-# === 配置路径 ===
-json_path = "kenney_data.json"  # JSON 数据路径
-output_dir = "kenney_assets"    # 下载根目录
-
-# === 加载 JSON 数据 ===
-with open(json_path, "r", encoding="utf-8") as f:
-    resources = json.load(f)
-
-# === 工具函数 ===
-
-
-def sanitize_filename(name):
-    return "".join(c for c in name if c.isalnum() or c in "._- ()").strip()
-
-
-def download_zip(entry):
-    title = entry["title"]
-    version = entry["changelog"][0]["version"] if entry["changelog"] else "1.0"
-    download_url = entry.get("download")
-
-    # 提取分类、系列
-    category = entry["properties"].get("Category", ["Uncategorized"])[0]
-    series = entry["properties"].get("Series", [None])[0]
-
-    # 构建目录结构
-    folder_path = os.path.join(output_dir, sanitize_filename(category))
-    if series:
-        folder_path = os.path.join(folder_path, sanitize_filename(series))
-    os.makedirs(folder_path, exist_ok=True)
-
-    # 构建文件路径
-    filename = f"{sanitize_filename(title)} V{version}.zip"
-    filepath = os.path.join(folder_path, filename)
-
-    if os.path.exists(filepath):
-        print(f"✅ 已存在，跳过: {filename}")
-        return
-
-    try:
-        print(f"⬇️ 开始下载: {filename}")
-        with requests.get(download_url, stream=True, timeout=60) as r:
-            r.raise_for_status()
-            with open(filepath, "wb") as f:
-                for chunk in r.iter_content(chunk_size=8192):
-                    if chunk:
-                        f.write(chunk)
-        print(f"✅ 下载完成: {filename}")
-    except Exception as e:
-        print(f"❌ 下载失败: {filename} - {e}")
-
-    # 模拟人类行为：随机等待
-    time.sleep(random.uniform(1.5, 4.0))
-
-
-# === 启动批量下载 ===
-idx = 0
-for resource in tqdm(resources, desc="处理资源"):
-    if idx < 156:
-        idx += 1
-        continue
-    if "download" in resource and resource["download"].endswith(".zip"):
-        download_zip(resource)
-    idx += 1
-
-print("\n✅ 所有资源处理完成")
--- a/image_downloader.py
+++ b/image_downloader.py
@@ -1,61 +0,0 @@
-import os
-import json
-import requests
-import time
-import random
-from urllib.parse import urlparse, unquote
-from tqdm import tqdm
-
-# ========== 配置 ==========
-json_path = "kenney_data.json"         # JSON 数据路径
-output_root = "kenney_assets_images"   # 存储根目录
-headers = {"User-Agent": "Mozilla/5.0"}
-
-# ========== 工具函数 ==========
-
-
-def sanitize_filename(name):
-    return "".join(c for c in name if c.isalnum() or c in "._- ()").strip()
-
-
-def download_image(url, save_path):
-    if os.path.exists(save_path):
-        print(f"✅ 已存在，跳过: {save_path}")
-        return
-    try:
-        response = requests.get(url, stream=True, timeout=30)
-        with open(save_path, "wb") as f:
-            for chunk in response.iter_content(chunk_size=8192):
-                f.write(chunk)
-        print(f"✅ 下载完成: {save_path}")
-    except Exception as e:
-        print(f"❌ 下载失败: {url} - {e}")
-    time.sleep(random.uniform(1.5, 4.0))  # 模拟人类访问
-
-
-# ========== 加载 JSON ==========
-with open(json_path, "r", encoding="utf-8") as f:
-    resources = json.load(f)
-
-# ========== 批量处理 ==========
-for entry in tqdm(resources, desc="处理资源"):
-    title = entry["title"]
-    category = entry["properties"].get("Category", ["Uncategorized"])[0]
-    series = entry["properties"].get("Series", [None])[0]
-    images = entry.get("images", [])
-
-    # 构建路径：Category/Series/Title/
-    path = os.path.join(output_root, sanitize_filename(category))
-    if series:
-        path = os.path.join(path, sanitize_filename(series))
-    path = os.path.join(path, sanitize_filename(title))
-    os.makedirs(path, exist_ok=True)
-
-    for img_url in images:
-        parsed_url = urlparse(img_url)
-        img_name = os.path.basename(parsed_url.path)
-        img_name = unquote(img_name)  # 处理 URL 编码，如 %20 => 空格
-        img_path = os.path.join(path, img_name)
-        download_image(img_url, img_path)
-
-print("\n🎉 所有图片处理完成！")
--- a/kenney_data_to_local.py
+++ b/kenney_data_to_local.py
@@ -1,68 +0,0 @@
-import os
-import json
-from urllib.parse import urlparse, unquote
-
-# === 配置路径 ===
-input_json = "kenney_data.json"
-output_json = "kenney_data_local.json"
-
-zip_root = "kenney_assets"
-img_root = "kenney_assets_images"
-
-# === 工具函数 ===
-
-
-def sanitize_filename(name):
-    return "".join(c for c in name if c.isalnum() or c in "._- ()").strip()
-
-
-def build_zip_path(entry):
-    title = entry["title"]
-    version = entry["changelog"][0]["version"] if entry["changelog"] else "1.0"
-    category = entry["properties"].get("Category", ["Uncategorized"])[0]
-    series = entry["properties"].get("Series", [None])[0]
-
-    folder = os.path.join(zip_root, sanitize_filename(category))
-    if series:
-        folder = os.path.join(folder, sanitize_filename(series))
-    filename = f"{sanitize_filename(title)} V{version}.zip"
-    return os.path.join(folder, filename)
-
-
-def build_image_paths(entry):
-    title = entry["title"]
-    category = entry["properties"].get("Category", ["Uncategorized"])[0]
-    series = entry["properties"].get("Series", [None])[0]
-    images = entry.get("images", [])
-
-    folder = os.path.join(img_root, sanitize_filename(category))
-    if series:
-        folder = os.path.join(folder, sanitize_filename(series))
-    folder = os.path.join(folder, sanitize_filename(title))
-
-    local_paths = []
-    for img_url in images:
-        parsed = urlparse(img_url)
-        filename = unquote(os.path.basename(parsed.path))
-        local_paths.append(os.path.join(folder, filename))
-
-    return local_paths
-
-
-# === 主处理 ===
-with open(input_json, "r", encoding="utf-8") as f:
-    data = json.load(f)
-
-for entry in data:
-    if "download" in entry and entry["download"].endswith(".zip"):
-        zip_path = build_zip_path(entry)
-        if os.path.exists(zip_path):
-            entry["download"] = zip_path  # 替换为本地路径
-    if "images" in entry and isinstance(entry["images"], list):
-        entry["images"] = build_image_paths(entry)
-
-# === 保存修改后的 JSON ===
-with open(output_json, "w", encoding="utf-8") as f:
-    json.dump(data, f, ensure_ascii=False, indent=2)
-
-print("✅ 已更新 JSON：本地路径写入完毕！")
--- a/思路.txt
+++ b/思路.txt
@@ -1,33 +0,0 @@
-我想要爬取这个网页 https://www.kenney.nl/assets/page:1 下（共有 13 页）
-//*[@id="content"]/section/div/div[1] 这个元素（里边是 n 个 div 元素）
-我需要获取每个 Div 元素的
-//*[@id="content"]/section/div/div[1]/div[1]/div/a 这个 a 元素
-将它的链接保存成一个列表
-
-然后依次访问这个列表中的所有页面，
-
-CSV 格式如下：
-资源名称, 分类, 系列, 资源数, 标签（有多个）,资源数量
-
-访问这个界面，然后我需要获取它的所有信息：
-资源名称：//*[@id="content"]/section/div/div/div[1]/h1
-
-各种属性：//*[@id="content"]/section/div/div/div[1]/table[1]/tbody
-上面这个表，他是个两列 N 行的表。
-表的左侧可以看作是 key: Category, Series, Assets, Variation(s), Tags, License 等……
-表的右侧是 Value，但它可能会有各种组成方式，下边是常见的：
- 普通的 td > a
- 只有 td
- td 之后有多个 a（如 Tags)
-
-更新记录：//*[@id="content"]/section/div/div/div[1]/table[2]/tbody
-他也是个两行 N 列的表，
-左侧是更新日期 dd/mm/yyyy
-右侧是 td > 第一个 span 是版本，第二个 span 是更新内容，有时候没有第二个 span
-
-继续增加 parse_resource_page(url) 函数，
-我需要获取它的封面图：//*[@id="content"]/section/div/div/div[2]/a/img
-以及其他图片（如有）
-//*[@id="content"]/section/div/div/div[2]/div 这里子元素可能有 n 个 div，我需要拿到它的 img
-//*[@id="content"]/section/div/div/div[2]/div/div[1]/a/img
-封面图存了之后，也得加在图集的数组里。