feat(output): separate code and docs into subdirectories
Group copied project files under a dedicated code/ directory Group generated analysis, manifest, and overview under docs/
This commit is contained in:
13
README.md
13
README.md
@@ -2,6 +2,8 @@
|
|||||||
|
|
||||||
`uipath-explainator` 会按 UiPath 项目入口 `main.xaml` 递归提取 `Invoke Workflow` 和 `Invoke VBA`,复制到指定工作区,删除 `<ui:CommentOut>` 里的无效代码,再重新扫描并裁掉不再使用的文件,最后逐个调用 Gemini 生成说明文档。
|
`uipath-explainator` 会按 UiPath 项目入口 `main.xaml` 递归提取 `Invoke Workflow` 和 `Invoke VBA`,复制到指定工作区,删除 `<ui:CommentOut>` 里的无效代码,再重新扫描并裁掉不再使用的文件,最后逐个调用 Gemini 生成说明文档。
|
||||||
|
|
||||||
|
输出会自动拆成独立的 `code/` 与 `docs/` 两个子目录,避免说明文档和项目文件混在一起。
|
||||||
|
|
||||||
## 安装
|
## 安装
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -26,7 +28,7 @@ GEMINI_MODEL=gemini-2.5-flash
|
|||||||
```bash
|
```bash
|
||||||
./.venv/bin/python -m uipath_explainator \
|
./.venv/bin/python -m uipath_explainator \
|
||||||
/path/to/uipath-project \
|
/path/to/uipath-project \
|
||||||
--output-dir workspace/project-copy \
|
--output-dir workspace/project-bundle \
|
||||||
--force
|
--force
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -40,11 +42,12 @@ GEMINI_MODEL=gemini-2.5-flash
|
|||||||
|
|
||||||
## 输出内容
|
## 输出内容
|
||||||
|
|
||||||
输出目录会保留原项目的相对目录结构,并额外生成:
|
输出目录现在是一个 bundle 目录,里面固定包含:
|
||||||
|
|
||||||
- `manifest.json`: 扫描、剪枝、告警和分析结果清单
|
- `code/`: 复制出来并经过清理、剪枝后的项目文件,保留原项目相对目录结构
|
||||||
- `OVERVIEW.md`: 总览说明书,除了文件数量,还会解释“初次扫描 -> 清理 Comment Out -> 二次扫描 -> 剪枝 -> Gemini 分析”的整条流水线逻辑
|
- `docs/manifest.json`: 扫描、剪枝、告警和分析结果清单
|
||||||
- `*.analysis.md`: 每个最终保留文件对应的 Gemini 咨询式说明,固定包含:
|
- `docs/OVERVIEW.md`: 总览说明书,除了文件数量,还会解释“初次扫描 -> 清理 Comment Out -> 二次扫描 -> 剪枝 -> Gemini 分析”的整条流水线逻辑
|
||||||
|
- `docs/*.analysis.md`: 每个最终保留文件对应的 Gemini 咨询式说明,固定包含:
|
||||||
- 文件定位:这个文件在整个流程中的角色、触发方式、核心作用
|
- 文件定位:这个文件在整个流程中的角色、触发方式、核心作用
|
||||||
- 流程拆解:按步骤说明它是怎么执行的
|
- 流程拆解:按步骤说明它是怎么执行的
|
||||||
- 关键逻辑:条件判断、调用关系、边界情况
|
- 关键逻辑:条件判断、调用关系、边界情况
|
||||||
|
|||||||
@@ -11,7 +11,12 @@ from .pipeline import ProjectPipeline
|
|||||||
def build_parser() -> ArgumentParser:
|
def build_parser() -> ArgumentParser:
|
||||||
parser = ArgumentParser(description="Extract and explain UiPath project dependencies.")
|
parser = ArgumentParser(description="Extract and explain UiPath project dependencies.")
|
||||||
parser.add_argument("project_dir", type=Path, help="UiPath project root directory")
|
parser.add_argument("project_dir", type=Path, help="UiPath project root directory")
|
||||||
parser.add_argument("--output-dir", type=Path, default=Path("workspace"), help="Copied project output directory")
|
parser.add_argument(
|
||||||
|
"--output-dir",
|
||||||
|
type=Path,
|
||||||
|
default=Path("workspace"),
|
||||||
|
help="Output bundle directory containing separate code/ and docs/ folders",
|
||||||
|
)
|
||||||
parser.add_argument("--entry", default="main.xaml", help="Entry XAML file name")
|
parser.add_argument("--entry", default="main.xaml", help="Entry XAML file name")
|
||||||
parser.add_argument("--env-file", type=Path, default=Path(".env"), help="Environment file for Gemini config")
|
parser.add_argument("--env-file", type=Path, default=Path(".env"), help="Environment file for Gemini config")
|
||||||
parser.add_argument("--model", help="Override GEMINI_MODEL")
|
parser.add_argument("--model", help="Override GEMINI_MODEL")
|
||||||
@@ -44,6 +49,8 @@ def main(argv: list[str] | None = None) -> int:
|
|||||||
report = pipeline.run(analyzer=analyzer)
|
report = pipeline.run(analyzer=analyzer)
|
||||||
|
|
||||||
print(f"Output written to: {report.output_root}")
|
print(f"Output written to: {report.output_root}")
|
||||||
|
print(f"Code written to: {report.code_root}")
|
||||||
|
print(f"Docs written to: {report.docs_root}")
|
||||||
print(f"Final files: {len(report.final_files)}")
|
print(f"Final files: {len(report.final_files)}")
|
||||||
print(f"Pruned files: {len(report.pruned_files)}")
|
print(f"Pruned files: {len(report.pruned_files)}")
|
||||||
print(f"Warnings: {len(report.warnings)}")
|
print(f"Warnings: {len(report.warnings)}")
|
||||||
|
|||||||
@@ -12,6 +12,8 @@ from .scanner import ScanResult, crawl_dependencies, find_entry_file, read_text,
|
|||||||
class PipelineReport:
|
class PipelineReport:
|
||||||
project_root: Path
|
project_root: Path
|
||||||
output_root: Path
|
output_root: Path
|
||||||
|
code_root: Path
|
||||||
|
docs_root: Path
|
||||||
entry_file: Path
|
entry_file: Path
|
||||||
initial_files: list[Path]
|
initial_files: list[Path]
|
||||||
final_files: list[Path]
|
final_files: list[Path]
|
||||||
@@ -25,6 +27,8 @@ class PipelineReport:
|
|||||||
{
|
{
|
||||||
"project_root": self.project_root.as_posix(),
|
"project_root": self.project_root.as_posix(),
|
||||||
"output_root": self.output_root.as_posix(),
|
"output_root": self.output_root.as_posix(),
|
||||||
|
"code_root": self.code_root.as_posix(),
|
||||||
|
"docs_root": self.docs_root.as_posix(),
|
||||||
"entry_file": self.entry_file.as_posix(),
|
"entry_file": self.entry_file.as_posix(),
|
||||||
"initial_files": [item.as_posix() for item in self.initial_files],
|
"initial_files": [item.as_posix() for item in self.initial_files],
|
||||||
"final_files": [item.as_posix() for item in self.final_files],
|
"final_files": [item.as_posix() for item in self.final_files],
|
||||||
@@ -42,6 +46,8 @@ class ProjectPipeline:
|
|||||||
def __init__(self, project_root: Path, output_root: Path, entry_name: str, force: bool = False) -> None:
|
def __init__(self, project_root: Path, output_root: Path, entry_name: str, force: bool = False) -> None:
|
||||||
self.project_root = project_root.resolve()
|
self.project_root = project_root.resolve()
|
||||||
self.output_root = output_root.resolve()
|
self.output_root = output_root.resolve()
|
||||||
|
self.code_root = self.output_root / "code"
|
||||||
|
self.docs_root = self.output_root / "docs"
|
||||||
self.entry_name = entry_name
|
self.entry_name = entry_name
|
||||||
self.force = force
|
self.force = force
|
||||||
|
|
||||||
@@ -56,8 +62,8 @@ class ProjectPipeline:
|
|||||||
self._copy_files(initial_rel_files)
|
self._copy_files(initial_rel_files)
|
||||||
cleaned_files = self._clean_copied_xaml_files(initial_rel_files)
|
cleaned_files = self._clean_copied_xaml_files(initial_rel_files)
|
||||||
|
|
||||||
final_scan = crawl_dependencies(self.output_root, self.output_root / entry_rel)
|
final_scan = crawl_dependencies(self.code_root, self.code_root / entry_rel)
|
||||||
final_rel_files = self._relative_files(final_scan, self.output_root)
|
final_rel_files = self._relative_files(final_scan, self.code_root)
|
||||||
pruned_files = self._prune_unused_files(initial_rel_files, final_rel_files)
|
pruned_files = self._prune_unused_files(initial_rel_files, final_rel_files)
|
||||||
|
|
||||||
analysis_files = self._write_analysis(final_rel_files, analyzer)
|
analysis_files = self._write_analysis(final_rel_files, analyzer)
|
||||||
@@ -66,6 +72,8 @@ class ProjectPipeline:
|
|||||||
report = PipelineReport(
|
report = PipelineReport(
|
||||||
project_root=self.project_root,
|
project_root=self.project_root,
|
||||||
output_root=self.output_root,
|
output_root=self.output_root,
|
||||||
|
code_root=self.code_root,
|
||||||
|
docs_root=self.docs_root,
|
||||||
entry_file=entry_rel,
|
entry_file=entry_rel,
|
||||||
initial_files=initial_rel_files,
|
initial_files=initial_rel_files,
|
||||||
final_files=final_rel_files,
|
final_files=final_rel_files,
|
||||||
@@ -82,12 +90,13 @@ class ProjectPipeline:
|
|||||||
if not self.force:
|
if not self.force:
|
||||||
raise FileExistsError(f"Output directory already exists: {self.output_root}")
|
raise FileExistsError(f"Output directory already exists: {self.output_root}")
|
||||||
rmtree(self.output_root)
|
rmtree(self.output_root)
|
||||||
self.output_root.mkdir(parents=True, exist_ok=True)
|
self.code_root.mkdir(parents=True, exist_ok=True)
|
||||||
|
self.docs_root.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
def _copy_files(self, relative_files: list[Path]) -> None:
|
def _copy_files(self, relative_files: list[Path]) -> None:
|
||||||
for relative_path in relative_files:
|
for relative_path in relative_files:
|
||||||
source = self.project_root / relative_path
|
source = self.project_root / relative_path
|
||||||
destination = self.output_root / relative_path
|
destination = self.code_root / relative_path
|
||||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||||
copy2(source, destination)
|
copy2(source, destination)
|
||||||
|
|
||||||
@@ -96,7 +105,7 @@ class ProjectPipeline:
|
|||||||
for relative_path in relative_files:
|
for relative_path in relative_files:
|
||||||
if relative_path.suffix.lower() != ".xaml":
|
if relative_path.suffix.lower() != ".xaml":
|
||||||
continue
|
continue
|
||||||
output_file = self.output_root / relative_path
|
output_file = self.code_root / relative_path
|
||||||
original = read_text(output_file)
|
original = read_text(output_file)
|
||||||
updated = strip_comment_out_blocks(original)
|
updated = strip_comment_out_blocks(original)
|
||||||
if updated != original:
|
if updated != original:
|
||||||
@@ -110,7 +119,7 @@ class ProjectPipeline:
|
|||||||
for relative_path in initial_files:
|
for relative_path in initial_files:
|
||||||
if relative_path in final_set:
|
if relative_path in final_set:
|
||||||
continue
|
continue
|
||||||
target = self.output_root / relative_path
|
target = self.code_root / relative_path
|
||||||
if target.exists():
|
if target.exists():
|
||||||
target.unlink()
|
target.unlink()
|
||||||
pruned.append(relative_path)
|
pruned.append(relative_path)
|
||||||
@@ -119,7 +128,7 @@ class ProjectPipeline:
|
|||||||
|
|
||||||
def _cleanup_empty_dirs(self) -> None:
|
def _cleanup_empty_dirs(self) -> None:
|
||||||
directories = sorted(
|
directories = sorted(
|
||||||
[path for path in self.output_root.rglob("*") if path.is_dir()],
|
[path for path in self.code_root.rglob("*") if path.is_dir()],
|
||||||
key=lambda item: len(item.parts),
|
key=lambda item: len(item.parts),
|
||||||
reverse=True,
|
reverse=True,
|
||||||
)
|
)
|
||||||
@@ -134,17 +143,17 @@ class ProjectPipeline:
|
|||||||
|
|
||||||
output_files: list[Path] = []
|
output_files: list[Path] = []
|
||||||
for relative_path in self._ordered_files(final_files):
|
for relative_path in self._ordered_files(final_files):
|
||||||
content = read_text(self.output_root / relative_path)
|
content = read_text(self.code_root / relative_path)
|
||||||
analysis = analyzer.analyze(relative_path, content)
|
analysis = analyzer.analyze(relative_path, content)
|
||||||
analysis_path = self.output_root / f"{relative_path.as_posix()}.analysis.md"
|
analysis_path = self.docs_root / f"{relative_path.as_posix()}.analysis.md"
|
||||||
analysis_path.parent.mkdir(parents=True, exist_ok=True)
|
analysis_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
analysis_path.write_text(analysis, encoding="utf-8")
|
analysis_path.write_text(analysis, encoding="utf-8")
|
||||||
output_files.append(Path(f"{relative_path.as_posix()}.analysis.md"))
|
output_files.append(Path(f"{relative_path.as_posix()}.analysis.md"))
|
||||||
return output_files
|
return output_files
|
||||||
|
|
||||||
def _write_report_files(self, report: PipelineReport) -> None:
|
def _write_report_files(self, report: PipelineReport) -> None:
|
||||||
(self.output_root / "manifest.json").write_text(report.to_json(), encoding="utf-8")
|
(self.docs_root / "manifest.json").write_text(report.to_json(), encoding="utf-8")
|
||||||
(self.output_root / "OVERVIEW.md").write_text(self._build_overview(report), encoding="utf-8")
|
(self.docs_root / "OVERVIEW.md").write_text(self._build_overview(report), encoding="utf-8")
|
||||||
|
|
||||||
def _build_overview(self, report: PipelineReport) -> str:
|
def _build_overview(self, report: PipelineReport) -> str:
|
||||||
warnings = "\n".join(f"- {item}" for item in report.warnings) or "- 无"
|
warnings = "\n".join(f"- {item}" for item in report.warnings) or "- 无"
|
||||||
@@ -155,6 +164,8 @@ class ProjectPipeline:
|
|||||||
|
|
||||||
- Project Root: `{report.project_root.as_posix()}`
|
- Project Root: `{report.project_root.as_posix()}`
|
||||||
- Output Root: `{report.output_root.as_posix()}`
|
- Output Root: `{report.output_root.as_posix()}`
|
||||||
|
- Code Root: `{report.code_root.as_posix()}`
|
||||||
|
- Docs Root: `{report.docs_root.as_posix()}`
|
||||||
- Entry File: `{report.entry_file.as_posix()}`
|
- Entry File: `{report.entry_file.as_posix()}`
|
||||||
- Initial Files: {len(report.initial_files)}
|
- Initial Files: {len(report.initial_files)}
|
||||||
- Final Files: {len(report.final_files)}
|
- Final Files: {len(report.final_files)}
|
||||||
@@ -167,18 +178,18 @@ class ProjectPipeline:
|
|||||||
|
|
||||||
## Processing Logic
|
## Processing Logic
|
||||||
1. **Initial Scan**: 从入口文件开始递归扫描 `Invoke Workflow` 与 `Invoke VBA`,得到初始依赖集合。这个阶段会把 `Comment Out` 里的引用也一起算进去,因为此时还未清理失效代码。
|
1. **Initial Scan**: 从入口文件开始递归扫描 `Invoke Workflow` 与 `Invoke VBA`,得到初始依赖集合。这个阶段会把 `Comment Out` 里的引用也一起算进去,因为此时还未清理失效代码。
|
||||||
2. **Copy to Workspace**: 将初始依赖集合完整复制到输出目录,保持相对目录结构不变,确保后续裁剪只发生在工作区副本中。
|
2. **Copy to Workspace**: 将初始依赖集合完整复制到 `code/` 目录,保持相对目录结构不变,确保后续裁剪只发生在代码副本中。
|
||||||
3. **Comment Cleanup**: 对已复制的 XAML 删除 `<ui:CommentOut>` 代码块,让后续扫描只看到当前真正生效的流程节点。
|
3. **Comment Cleanup**: 对已复制的 XAML 删除 `<ui:CommentOut>` 代码块,让后续扫描只看到当前真正生效的流程节点。
|
||||||
4. **Rescan After Cleanup**: 以清理后的入口文件重新递归扫描,得到清理后的实际依赖集合。
|
4. **Rescan After Cleanup**: 以清理后的入口文件重新递归扫描,得到清理后的实际依赖集合。
|
||||||
5. **Prune Unused Files**: 删除首次扫描能到达、但二次扫描已不可达的文件。这些文件通常来自被注释掉的工作流、VBA,或清理后失效的调用链。
|
5. **Prune Unused Files**: 删除 `code/` 目录中首次扫描能到达、但二次扫描已不可达的文件。这些文件通常来自被注释掉的工作流、VBA,或清理后失效的调用链。
|
||||||
6. **Gemini Analysis**: 仅针对最终保留文件生成 `*.analysis.md`,让说明文档与实际可执行流程保持一致。
|
6. **Gemini Analysis**: 仅针对最终保留文件在 `docs/` 目录生成 `*.analysis.md`,让说明文档与实际可执行流程保持一致。
|
||||||
|
|
||||||
## How To Read This Output
|
## How To Read This Output
|
||||||
- `Initial Files`: 清理 `<ui:CommentOut>` 前扫描到的所有文件,代表“理论上被引用过”的集合。
|
- `Initial Files`: 清理 `<ui:CommentOut>` 前扫描到的所有文件,代表“理论上被引用过”的集合。
|
||||||
- `Final Files`: 清理后仍可达的文件,代表“当前实际流程会用到”的集合。
|
- `Final Files`: 清理后 `code/` 目录中仍可达的文件,代表“当前实际流程会用到”的集合。
|
||||||
- `Cleaned XAML Files`: 被移除 `<ui:CommentOut>` 代码块的文件。
|
- `Cleaned XAML Files`: 被移除 `<ui:CommentOut>` 代码块的文件。
|
||||||
- `Pruned Files`: 只在失效分支中出现、已从输出目录删除的文件。
|
- `Pruned Files`: 只在失效分支中出现、已从 `code/` 目录删除的文件。
|
||||||
- `Analysis Files`: Gemini 生成的逐文件说明,重点解释该文件的流程、逻辑、数据与风险点。
|
- `Analysis Files`: `docs/` 目录下 Gemini 生成的逐文件说明,重点解释该文件的流程、逻辑、数据与风险点。
|
||||||
|
|
||||||
## Cleaned XAML Files
|
## Cleaned XAML Files
|
||||||
{cleaned}
|
{cleaned}
|
||||||
|
|||||||
@@ -83,6 +83,8 @@ class PipelineTests(unittest.TestCase):
|
|||||||
tmp_path = Path(tmp)
|
tmp_path = Path(tmp)
|
||||||
project_root = tmp_path / "project"
|
project_root = tmp_path / "project"
|
||||||
output_root = tmp_path / "workspace"
|
output_root = tmp_path / "workspace"
|
||||||
|
code_root = output_root / "code"
|
||||||
|
docs_root = output_root / "docs"
|
||||||
(project_root / "Flows").mkdir(parents=True)
|
(project_root / "Flows").mkdir(parents=True)
|
||||||
(project_root / "Flows" / "Active.xaml").write_text(ACTIVE_XAML, encoding="utf-8")
|
(project_root / "Flows" / "Active.xaml").write_text(ACTIVE_XAML, encoding="utf-8")
|
||||||
(project_root / "Flows" / "Old.xaml").write_text(OLD_XAML, encoding="utf-8")
|
(project_root / "Flows" / "Old.xaml").write_text(OLD_XAML, encoding="utf-8")
|
||||||
@@ -104,15 +106,21 @@ class PipelineTests(unittest.TestCase):
|
|||||||
self.assertIn("Scripts/Keep.bas", final_files)
|
self.assertIn("Scripts/Keep.bas", final_files)
|
||||||
self.assertNotIn("Flows/Old.xaml", final_files)
|
self.assertNotIn("Flows/Old.xaml", final_files)
|
||||||
self.assertNotIn("Scripts/Drop.bas", final_files)
|
self.assertNotIn("Scripts/Drop.bas", final_files)
|
||||||
self.assertFalse((output_root / "Flows" / "Old.xaml").exists())
|
self.assertEqual(report.code_root, code_root.resolve())
|
||||||
self.assertFalse((output_root / "Scripts" / "Drop.bas").exists())
|
self.assertEqual(report.docs_root, docs_root.resolve())
|
||||||
self.assertTrue((output_root / "Flows" / "Active.xaml.analysis.md").exists())
|
self.assertFalse((code_root / "Flows" / "Old.xaml").exists())
|
||||||
|
self.assertFalse((code_root / "Scripts" / "Drop.bas").exists())
|
||||||
|
self.assertTrue((code_root / "Flows" / "Active.xaml").exists())
|
||||||
|
self.assertTrue((docs_root / "Flows" / "Active.xaml.analysis.md").exists())
|
||||||
|
self.assertTrue((docs_root / "manifest.json").exists())
|
||||||
|
|
||||||
overview = (output_root / "OVERVIEW.md").read_text(encoding="utf-8")
|
overview = (docs_root / "OVERVIEW.md").read_text(encoding="utf-8")
|
||||||
self.assertIn("## Processing Logic", overview)
|
self.assertIn("## Processing Logic", overview)
|
||||||
self.assertIn("Initial Scan", overview)
|
self.assertIn("Initial Scan", overview)
|
||||||
self.assertIn("## How To Read This Output", overview)
|
self.assertIn("## How To Read This Output", overview)
|
||||||
self.assertIn("## Cleaned XAML Files", overview)
|
self.assertIn("## Cleaned XAML Files", overview)
|
||||||
|
self.assertIn("Code Root", overview)
|
||||||
|
self.assertIn("Docs Root", overview)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Reference in New Issue
Block a user