Files
uipath-explainator/src/uipath_explainator/pipeline.py
xiaomai 0bdebd5368 feat(logging): add configurable logging with file output support
Introduce --log-level and --log-file CLI arguments.
Add execution time tracking and detailed logs across all modules.
2026-04-02 10:40:39 +08:00

255 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
from shutil import copy2, rmtree
import json
import logging
from time import perf_counter
from .scanner import ScanResult, crawl_dependencies, find_entry_file, read_text, strip_comment_out_blocks
logger = logging.getLogger(__name__)
@dataclass(slots=True)
class PipelineReport:
project_root: Path
output_root: Path
code_root: Path
docs_root: Path
entry_file: Path
initial_files: list[Path]
final_files: list[Path]
pruned_files: list[Path]
cleaned_files: list[Path]
warnings: list[str]
analysis_files: list[Path]
def to_json(self) -> str:
return json.dumps(
{
"project_root": self.project_root.as_posix(),
"output_root": self.output_root.as_posix(),
"code_root": self.code_root.as_posix(),
"docs_root": self.docs_root.as_posix(),
"entry_file": self.entry_file.as_posix(),
"initial_files": [item.as_posix() for item in self.initial_files],
"final_files": [item.as_posix() for item in self.final_files],
"pruned_files": [item.as_posix() for item in self.pruned_files],
"cleaned_files": [item.as_posix() for item in self.cleaned_files],
"warnings": self.warnings,
"analysis_files": [item.as_posix() for item in self.analysis_files],
},
ensure_ascii=False,
indent=2,
)
class ProjectPipeline:
def __init__(self, project_root: Path, output_root: Path, entry_name: str, force: bool = False) -> None:
self.project_root = project_root.resolve()
self.output_root = output_root.resolve()
self.code_root = self.output_root / "code"
self.docs_root = self.output_root / "docs"
self.entry_name = entry_name
self.force = force
def run(self, analyzer=None) -> PipelineReport:
started = perf_counter()
logger.info(
"Starting pipeline: project_root=%s output_root=%s entry=%s analysis=%s force=%s",
self.project_root,
self.output_root,
self.entry_name,
analyzer is not None,
self.force,
)
entry_file = find_entry_file(self.project_root, self.entry_name)
entry_rel = entry_file.relative_to(self.project_root)
logger.debug("Using entry file %s", entry_rel.as_posix())
initial_scan = crawl_dependencies(self.project_root, entry_file)
initial_rel_files = self._relative_files(initial_scan, self.project_root)
logger.info(
"Initial scan complete: files=%d warnings=%d",
len(initial_rel_files),
len(initial_scan.warnings),
)
self._prepare_output_root()
self._copy_files(initial_rel_files)
cleaned_files = self._clean_copied_xaml_files(initial_rel_files)
logger.info("Copied %d files and cleaned %d XAML files", len(initial_rel_files), len(cleaned_files))
final_scan = crawl_dependencies(self.code_root, self.code_root / entry_rel)
final_rel_files = self._relative_files(final_scan, self.code_root)
pruned_files = self._prune_unused_files(initial_rel_files, final_rel_files)
logger.info(
"Final scan complete: files=%d warnings=%d pruned=%d",
len(final_rel_files),
len(final_scan.warnings),
len(pruned_files),
)
analysis_files = self._write_analysis(final_rel_files, analyzer)
warnings = initial_scan.warnings + final_scan.warnings
report = PipelineReport(
project_root=self.project_root,
output_root=self.output_root,
code_root=self.code_root,
docs_root=self.docs_root,
entry_file=entry_rel,
initial_files=initial_rel_files,
final_files=final_rel_files,
pruned_files=pruned_files,
cleaned_files=cleaned_files,
warnings=warnings,
analysis_files=analysis_files,
)
self._write_report_files(report)
logger.info(
"Pipeline completed in %.2fs: final_files=%d analysis_files=%d warnings=%d",
perf_counter() - started,
len(report.final_files),
len(report.analysis_files),
len(report.warnings),
)
return report
def _prepare_output_root(self) -> None:
if self.output_root.exists():
if not self.force:
raise FileExistsError(f"Output directory already exists: {self.output_root}")
logger.info("Removing existing output directory because force=True: %s", self.output_root)
rmtree(self.output_root)
self.code_root.mkdir(parents=True, exist_ok=True)
self.docs_root.mkdir(parents=True, exist_ok=True)
logger.debug("Prepared output directories: code=%s docs=%s", self.code_root, self.docs_root)
def _copy_files(self, relative_files: list[Path]) -> None:
for relative_path in relative_files:
source = self.project_root / relative_path
destination = self.code_root / relative_path
destination.parent.mkdir(parents=True, exist_ok=True)
copy2(source, destination)
logger.debug("Copied file: %s -> %s", source, destination)
def _clean_copied_xaml_files(self, relative_files: list[Path]) -> list[Path]:
cleaned: list[Path] = []
for relative_path in relative_files:
if relative_path.suffix.lower() != ".xaml":
continue
output_file = self.code_root / relative_path
original = read_text(output_file)
updated = strip_comment_out_blocks(original)
if updated != original:
output_file.write_text(updated, encoding="utf-8")
cleaned.append(relative_path)
logger.debug("Removed CommentOut blocks from %s", output_file)
return cleaned
def _prune_unused_files(self, initial_files: list[Path], final_files: list[Path]) -> list[Path]:
final_set = set(final_files)
pruned: list[Path] = []
for relative_path in initial_files:
if relative_path in final_set:
continue
target = self.code_root / relative_path
if target.exists():
target.unlink()
pruned.append(relative_path)
logger.debug("Pruned unreachable file: %s", target)
self._cleanup_empty_dirs()
return pruned
def _cleanup_empty_dirs(self) -> None:
directories = sorted(
[path for path in self.code_root.rglob("*") if path.is_dir()],
key=lambda item: len(item.parts),
reverse=True,
)
for directory in directories:
if any(directory.iterdir()):
continue
directory.rmdir()
logger.debug("Removed empty directory: %s", directory)
def _write_analysis(self, final_files: list[Path], analyzer) -> list[Path]:
if analyzer is None:
logger.info("Skipping Gemini analysis because analyzer is disabled")
return []
output_files: list[Path] = []
for relative_path in self._ordered_files(final_files):
content = read_text(self.code_root / relative_path)
analysis = analyzer.analyze(relative_path, content)
analysis_path = self.docs_root / f"{relative_path.as_posix()}.analysis.md"
analysis_path.parent.mkdir(parents=True, exist_ok=True)
analysis_path.write_text(analysis, encoding="utf-8")
output_files.append(Path(f"{relative_path.as_posix()}.analysis.md"))
logger.debug("Wrote analysis file: %s", analysis_path)
return output_files
def _write_report_files(self, report: PipelineReport) -> None:
(self.docs_root / "manifest.json").write_text(report.to_json(), encoding="utf-8")
(self.docs_root / "OVERVIEW.md").write_text(self._build_overview(report), encoding="utf-8")
logger.debug("Wrote report files to %s", self.docs_root)
def _build_overview(self, report: PipelineReport) -> str:
warnings = "\n".join(f"- {item}" for item in report.warnings) or "- 无"
cleaned = "\n".join(f"- {item.as_posix()}" for item in report.cleaned_files) or "- 无"
pruned = "\n".join(f"- {item.as_posix()}" for item in report.pruned_files) or "- 无"
analyses = "\n".join(f"- {item.as_posix()}" for item in report.analysis_files) or "- 未启用 Gemini 分析"
return f"""# UiPath Explainator Overview
- Project Root: `{report.project_root.as_posix()}`
- Output Root: `{report.output_root.as_posix()}`
- Code Root: `{report.code_root.as_posix()}`
- Docs Root: `{report.docs_root.as_posix()}`
- Entry File: `{report.entry_file.as_posix()}`
- Initial Files: {len(report.initial_files)}
- Final Files: {len(report.final_files)}
- Cleaned XAML Files: {len(report.cleaned_files)}
- Pruned Files: {len(report.pruned_files)}
- Analysis Files: {len(report.analysis_files)}
## Final Files
{chr(10).join(f"- {item.as_posix()}" for item in report.final_files)}
## Processing Logic
1. **Initial Scan**: 从入口文件开始递归扫描 `Invoke Workflow` 与 `Invoke VBA`,得到初始依赖集合。这个阶段会把 `Comment Out` 里的引用也一起算进去,因为此时还未清理失效代码。
2. **Copy to Workspace**: 将初始依赖集合完整复制到 `code/` 目录,保持相对目录结构不变,确保后续裁剪只发生在代码副本中。
3. **Comment Cleanup**: 对已复制的 XAML 删除 `<ui:CommentOut>` 代码块,让后续扫描只看到当前真正生效的流程节点。
4. **Rescan After Cleanup**: 以清理后的入口文件重新递归扫描,得到清理后的实际依赖集合。
5. **Prune Unused Files**: 删除 `code/` 目录中首次扫描能到达、但二次扫描已不可达的文件。这些文件通常来自被注释掉的工作流、VBA或清理后失效的调用链。
6. **Gemini Analysis**: 仅针对最终保留文件在 `docs/` 目录生成 `*.analysis.md`,让说明文档与实际可执行流程保持一致。
## How To Read This Output
- `Initial Files`: 清理 `<ui:CommentOut>` 前扫描到的所有文件,代表“理论上被引用过”的集合。
- `Final Files`: 清理后 `code/` 目录中仍可达的文件,代表“当前实际流程会用到”的集合。
- `Cleaned XAML Files`: 被移除 `<ui:CommentOut>` 代码块的文件。
- `Pruned Files`: 只在失效分支中出现、已从 `code/` 目录删除的文件。
- `Analysis Files`: `docs/` 目录下 Gemini 生成的逐文件说明,重点解释该文件的流程、逻辑、数据与风险点。
## Cleaned XAML Files
{cleaned}
## Pruned Files
{pruned}
## Analysis Files
{analyses}
## Warnings
{warnings}
"""
def _relative_files(self, scan: ScanResult, root: Path) -> list[Path]:
return sorted(path.relative_to(root) for path in scan.files)
def _ordered_files(self, paths: list[Path]) -> list[Path]:
return sorted(paths, key=lambda item: (item.suffix.lower() != ".xaml", item.as_posix().lower()))