feat(output): separate code and docs into subdirectories

Group copied project files under a dedicated code/ directory
Group generated analysis, manifest, and overview under docs/
This commit is contained in:
2026-04-02 10:36:29 +08:00
parent 5ba2e3217a
commit eef3464257
4 changed files with 56 additions and 27 deletions

View File

@@ -12,6 +12,8 @@ from .scanner import ScanResult, crawl_dependencies, find_entry_file, read_text,
class PipelineReport:
project_root: Path
output_root: Path
code_root: Path
docs_root: Path
entry_file: Path
initial_files: list[Path]
final_files: list[Path]
@@ -25,6 +27,8 @@ class PipelineReport:
{
"project_root": self.project_root.as_posix(),
"output_root": self.output_root.as_posix(),
"code_root": self.code_root.as_posix(),
"docs_root": self.docs_root.as_posix(),
"entry_file": self.entry_file.as_posix(),
"initial_files": [item.as_posix() for item in self.initial_files],
"final_files": [item.as_posix() for item in self.final_files],
@@ -42,6 +46,8 @@ class ProjectPipeline:
def __init__(self, project_root: Path, output_root: Path, entry_name: str, force: bool = False) -> None:
self.project_root = project_root.resolve()
self.output_root = output_root.resolve()
self.code_root = self.output_root / "code"
self.docs_root = self.output_root / "docs"
self.entry_name = entry_name
self.force = force
@@ -56,8 +62,8 @@ class ProjectPipeline:
self._copy_files(initial_rel_files)
cleaned_files = self._clean_copied_xaml_files(initial_rel_files)
final_scan = crawl_dependencies(self.output_root, self.output_root / entry_rel)
final_rel_files = self._relative_files(final_scan, self.output_root)
final_scan = crawl_dependencies(self.code_root, self.code_root / entry_rel)
final_rel_files = self._relative_files(final_scan, self.code_root)
pruned_files = self._prune_unused_files(initial_rel_files, final_rel_files)
analysis_files = self._write_analysis(final_rel_files, analyzer)
@@ -66,6 +72,8 @@ class ProjectPipeline:
report = PipelineReport(
project_root=self.project_root,
output_root=self.output_root,
code_root=self.code_root,
docs_root=self.docs_root,
entry_file=entry_rel,
initial_files=initial_rel_files,
final_files=final_rel_files,
@@ -82,12 +90,13 @@ class ProjectPipeline:
if not self.force:
raise FileExistsError(f"Output directory already exists: {self.output_root}")
rmtree(self.output_root)
self.output_root.mkdir(parents=True, exist_ok=True)
self.code_root.mkdir(parents=True, exist_ok=True)
self.docs_root.mkdir(parents=True, exist_ok=True)
def _copy_files(self, relative_files: list[Path]) -> None:
for relative_path in relative_files:
source = self.project_root / relative_path
destination = self.output_root / relative_path
destination = self.code_root / relative_path
destination.parent.mkdir(parents=True, exist_ok=True)
copy2(source, destination)
@@ -96,7 +105,7 @@ class ProjectPipeline:
for relative_path in relative_files:
if relative_path.suffix.lower() != ".xaml":
continue
output_file = self.output_root / relative_path
output_file = self.code_root / relative_path
original = read_text(output_file)
updated = strip_comment_out_blocks(original)
if updated != original:
@@ -110,7 +119,7 @@ class ProjectPipeline:
for relative_path in initial_files:
if relative_path in final_set:
continue
target = self.output_root / relative_path
target = self.code_root / relative_path
if target.exists():
target.unlink()
pruned.append(relative_path)
@@ -119,7 +128,7 @@ class ProjectPipeline:
def _cleanup_empty_dirs(self) -> None:
directories = sorted(
[path for path in self.output_root.rglob("*") if path.is_dir()],
[path for path in self.code_root.rglob("*") if path.is_dir()],
key=lambda item: len(item.parts),
reverse=True,
)
@@ -134,17 +143,17 @@ class ProjectPipeline:
output_files: list[Path] = []
for relative_path in self._ordered_files(final_files):
content = read_text(self.output_root / relative_path)
content = read_text(self.code_root / relative_path)
analysis = analyzer.analyze(relative_path, content)
analysis_path = self.output_root / f"{relative_path.as_posix()}.analysis.md"
analysis_path = self.docs_root / f"{relative_path.as_posix()}.analysis.md"
analysis_path.parent.mkdir(parents=True, exist_ok=True)
analysis_path.write_text(analysis, encoding="utf-8")
output_files.append(Path(f"{relative_path.as_posix()}.analysis.md"))
return output_files
def _write_report_files(self, report: PipelineReport) -> None:
(self.output_root / "manifest.json").write_text(report.to_json(), encoding="utf-8")
(self.output_root / "OVERVIEW.md").write_text(self._build_overview(report), encoding="utf-8")
(self.docs_root / "manifest.json").write_text(report.to_json(), encoding="utf-8")
(self.docs_root / "OVERVIEW.md").write_text(self._build_overview(report), encoding="utf-8")
def _build_overview(self, report: PipelineReport) -> str:
warnings = "\n".join(f"- {item}" for item in report.warnings) or "- 无"
@@ -155,6 +164,8 @@ class ProjectPipeline:
- Project Root: `{report.project_root.as_posix()}`
- Output Root: `{report.output_root.as_posix()}`
- Code Root: `{report.code_root.as_posix()}`
- Docs Root: `{report.docs_root.as_posix()}`
- Entry File: `{report.entry_file.as_posix()}`
- Initial Files: {len(report.initial_files)}
- Final Files: {len(report.final_files)}
@@ -167,18 +178,18 @@ class ProjectPipeline:
## Processing Logic
1. **Initial Scan**: 从入口文件开始递归扫描 `Invoke Workflow` 与 `Invoke VBA`,得到初始依赖集合。这个阶段会把 `Comment Out` 里的引用也一起算进去,因为此时还未清理失效代码。
2. **Copy to Workspace**: 将初始依赖集合完整复制到输出目录,保持相对目录结构不变,确保后续裁剪只发生在工作区副本中。
2. **Copy to Workspace**: 将初始依赖集合完整复制到 `code/` 目录,保持相对目录结构不变,确保后续裁剪只发生在代码副本中。
3. **Comment Cleanup**: 对已复制的 XAML 删除 `<ui:CommentOut>` 代码块,让后续扫描只看到当前真正生效的流程节点。
4. **Rescan After Cleanup**: 以清理后的入口文件重新递归扫描,得到清理后的实际依赖集合。
5. **Prune Unused Files**: 删除首次扫描能到达、但二次扫描已不可达的文件。这些文件通常来自被注释掉的工作流、VBA或清理后失效的调用链。
6. **Gemini Analysis**: 仅针对最终保留文件生成 `*.analysis.md`,让说明文档与实际可执行流程保持一致。
5. **Prune Unused Files**: 删除 `code/` 目录中首次扫描能到达、但二次扫描已不可达的文件。这些文件通常来自被注释掉的工作流、VBA或清理后失效的调用链。
6. **Gemini Analysis**: 仅针对最终保留文件在 `docs/` 目录生成 `*.analysis.md`,让说明文档与实际可执行流程保持一致。
## How To Read This Output
- `Initial Files`: 清理 `<ui:CommentOut>` 前扫描到的所有文件,代表“理论上被引用过”的集合。
- `Final Files`: 清理后仍可达的文件,代表“当前实际流程会用到”的集合。
- `Final Files`: 清理后 `code/` 目录中仍可达的文件,代表“当前实际流程会用到”的集合。
- `Cleaned XAML Files`: 被移除 `<ui:CommentOut>` 代码块的文件。
- `Pruned Files`: 只在失效分支中出现、已从输出目录删除的文件。
- `Analysis Files`: Gemini 生成的逐文件说明,重点解释该文件的流程、逻辑、数据与风险点。
- `Pruned Files`: 只在失效分支中出现、已从 `code/` 目录删除的文件。
- `Analysis Files`: `docs/` 目录下 Gemini 生成的逐文件说明,重点解释该文件的流程、逻辑、数据与风险点。
## Cleaned XAML Files
{cleaned}