feat(analysis): prevent pipeline crash on single file analysis failure

Catch Gemini API errors (e.g., HTTP 429) and summarize upstream messages Generate fallback markdown for failed files instead of aborting Append analysis failures to pipeline warnings
2026-04-02 10:59:04 +08:00
parent 0bdebd5368
commit c73767073e
4 changed files with 149 additions and 25 deletions
--- a/src/uipath_explainator/gemini.py
+++ b/src/uipath_explainator/gemini.py
@@ -14,6 +14,10 @@ UNKNOWN_TEXT = "无法从当前文件确定"
 logger = logging.getLogger(__name__)
 class AnalysisError(RuntimeError):
    """Raised when a single file analysis cannot be completed."""
@dataclass(slots=True)
 class GeminiAnalyzer:
    settings: Settings
@@ -43,6 +47,7 @@ class GeminiAnalyzer:
        started = perf_counter()
        logger.info("Submitting Gemini analysis for %s (%d chars)", relative_path.as_posix(), len(content))
        prompt = self._build_prompt(relative_path, content)
        try:
            response = self._client.models.generate_content(
                model=self.settings.model,
                contents=prompt,
@@ -59,15 +64,20 @@ class GeminiAnalyzer:
                relative_path.as_posix(),
                len(response_text),
            )
        try:
            payload = json.loads(response_text)
-        except json.JSONDecodeError:
+        except json.JSONDecodeError as exc:
            logger.exception(
                "Gemini returned invalid JSON for %s. Response snippet: %r",
                relative_path.as_posix(),
                response_text[:500],
            )
-            raise
+            raise AnalysisError(
                f"Gemini 返回了无法解析的 JSON，无法生成该文件说明。原始错误: {exc}"
            ) from exc
        except Exception as exc:
            summary = self._summarize_error(exc)
            logger.exception("Gemini analysis failed for %s: %s", relative_path.as_posix(), summary)
            raise AnalysisError(summary) from exc
        logger.info(
            "Gemini analysis completed for %s in %.2fs",
            relative_path.as_posix(),
@@ -75,6 +85,38 @@ class GeminiAnalyzer:
        )
        return self._to_markdown(relative_path, payload)
    def _summarize_error(self, exc: Exception) -> str:
        status_code = getattr(exc, "status_code", None)
        response_json = getattr(exc, "response_json", None)
        upstream_message = self._extract_error_message(response_json)
        if status_code == 429:
            detail = upstream_message or "Resource has been exhausted"
            return (
                "Gemini 配额或速率限制已触发（HTTP 429），当前文件说明未生成。"
                f"上游信息: {detail}。可稍后重试，或使用 --skip-analysis 仅导出代码与依赖。"
            )
        if status_code is not None:
            detail = upstream_message or str(exc).strip() or exc.__class__.__name__
            return f"Gemini 调用失败（HTTP {status_code}）。上游信息: {detail}"
        detail = str(exc).strip()
        if detail:
            return f"Gemini 分析失败: {detail}"
        return f"Gemini 分析失败: {exc.__class__.__name__}"
    def _extract_error_message(self, response_json: Any) -> str | None:
        if not isinstance(response_json, dict):
            return None
        error = response_json.get("error")
        if isinstance(error, dict):
            message = error.get("message")
            if isinstance(message, str) and message.strip():
                return message.strip()
        return None
    def _response_schema(self) -> dict[str, Any]:
        return {
            "type": "OBJECT",
--- a/src/uipath_explainator/pipeline.py
+++ b/src/uipath_explainator/pipeline.py
@@ -93,8 +93,8 @@ class ProjectPipeline:
            len(pruned_files),
        )
-        analysis_files = self._write_analysis(final_rel_files, analyzer)
+        analysis_files, analysis_warnings = self._write_analysis(final_rel_files, analyzer)
-        warnings = initial_scan.warnings + final_scan.warnings
+        warnings = initial_scan.warnings + final_scan.warnings + analysis_warnings
        report = PipelineReport(
            project_root=self.project_root,
@@ -177,21 +177,28 @@ class ProjectPipeline:
            directory.rmdir()
            logger.debug("Removed empty directory: %s", directory)
-    def _write_analysis(self, final_files: list[Path], analyzer) -> list[Path]:
+    def _write_analysis(self, final_files: list[Path], analyzer) -> tuple[list[Path], list[str]]:
        if analyzer is None:
            logger.info("Skipping Gemini analysis because analyzer is disabled")
-            return []
+            return [], []
        output_files: list[Path] = []
        warnings: list[str] = []
        for relative_path in self._ordered_files(final_files):
            content = read_text(self.code_root / relative_path)
            try:
                analysis = analyzer.analyze(relative_path, content)
            except Exception as exc:
                warning = f"Analysis failed for {relative_path.as_posix()}: {self._format_analysis_error(exc)}"
                warnings.append(warning)
                logger.warning(warning)
                analysis = self._build_failed_analysis(relative_path, exc)
            analysis_path = self.docs_root / f"{relative_path.as_posix()}.analysis.md"
            analysis_path.parent.mkdir(parents=True, exist_ok=True)
            analysis_path.write_text(analysis, encoding="utf-8")
            output_files.append(Path(f"{relative_path.as_posix()}.analysis.md"))
            logger.debug("Wrote analysis file: %s", analysis_path)
-        return output_files
+        return output_files, warnings
    def _write_report_files(self, report: PipelineReport) -> None:
        (self.docs_root / "manifest.json").write_text(report.to_json(), encoding="utf-8")
@@ -252,3 +259,23 @@ class ProjectPipeline:
    def _ordered_files(self, paths: list[Path]) -> list[Path]:
        return sorted(paths, key=lambda item: (item.suffix.lower() != ".xaml", item.as_posix().lower()))
    def _build_failed_analysis(self, relative_path: Path, exc: Exception) -> str:
        reason = self._format_analysis_error(exc)
        return "\n".join(
            [
                f"# {relative_path.as_posix()}",
                "",
                "## 分析状态",
                "- 状态：Gemini 分析失败，当前文件未生成结构化说明。",
                f"- 原因：{reason}",
                "- 建议：稍后重试；如果当前只需要导出代码与依赖，可使用 `--skip-analysis`。",
                "",
            ]
        )
    def _format_analysis_error(self, exc: Exception) -> str:
        message = str(exc).strip()
        if message:
            return message
        return exc.__class__.__name__
--- a/tests/test_gemini.py
+++ b/tests/test_gemini.py
@@ -14,7 +14,7 @@ fake_dotenv.load_dotenv = lambda *args, **kwargs: None
 sys.modules.setdefault("dotenv", fake_dotenv)
 from uipath_explainator.config import Settings
-from uipath_explainator.gemini import GeminiAnalyzer
+from uipath_explainator.gemini import AnalysisError, GeminiAnalyzer
 class FakeHttpOptions:
@@ -159,6 +159,33 @@ class GeminiAnalyzerTests(unittest.TestCase):
        self.assertIn("先讲这个文件在整个流程中的定位", prompt)
        self.assertIn("判断逻辑、调用链、输入输出、关键变量、外部依赖", prompt)
    def test_analyze_wraps_rate_limit_error_with_clear_message(self) -> None:
        fake_types = SimpleNamespace(
            HttpOptions=FakeHttpOptions,
            GenerateContentConfig=FakeGenerateContentConfig,
        )
        fake_genai = ModuleType("google.genai")
        fake_genai.Client = FakeClient
        fake_genai.types = fake_types
        fake_google = ModuleType("google")
        fake_google.genai = fake_genai
        with patch.dict(sys.modules, {"google": fake_google, "google.genai": fake_genai}):
            analyzer = GeminiAnalyzer(Settings(api_key="test-key", base_url=None, model="gemini-test"))
        error = RuntimeError("quota exceeded")
        error.status_code = 429
        error.response_json = {"error": {"message": "Resource has been exhausted (e.g. check quota)."}}
        analyzer._client.models.generate_content = lambda **_: (_ for _ in ()).throw(error)
        with self.assertRaises(AnalysisError) as captured:
            analyzer.analyze(Path("main.xaml"), "<Sequence />")
        self.assertIn("HTTP 429", str(captured.exception))
        self.assertIn("Resource has been exhausted", str(captured.exception))
        self.assertIn("--skip-analysis", str(captured.exception))
 if __name__ == "__main__":
    unittest.main()
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -57,6 +57,13 @@ class StubAnalyzer:
        return f"# {relative_path.as_posix()}\n\n{len(content)}"
 class FlakyAnalyzer:
    def analyze(self, relative_path: Path, content: str) -> str:
        if relative_path.name == "Active.xaml":
            raise RuntimeError("HTTP 429 quota exhausted")
        return f"# {relative_path.as_posix()}\n\n{len(content)}"
 class PipelineTests(unittest.TestCase):
    def test_strip_comment_out_blocks_removes_nested_blocks(self) -> None:
        source = "<root><ui:CommentOut><x/><ui:CommentOut><y/></ui:CommentOut></ui:CommentOut><z/></root>"
@@ -145,6 +152,27 @@ class PipelineTests(unittest.TestCase):
            self.assertIn("Final scan complete:", combined)
            self.assertIn("Pipeline completed in", combined)
    def test_pipeline_keeps_running_when_single_analysis_fails(self) -> None:
        with TemporaryDirectory() as tmp:
            tmp_path = Path(tmp)
            project_root = tmp_path / "project"
            output_root = tmp_path / "workspace"
            docs_root = output_root / "docs"
            (project_root / "Flows").mkdir(parents=True)
            (project_root / "Flows" / "Active.xaml").write_text(ACTIVE_XAML, encoding="utf-8")
            (project_root / "Scripts").mkdir()
            (project_root / "Scripts" / "Keep.bas").write_text("Sub Keep()\nEnd Sub", encoding="utf-8")
            (project_root / "main.xaml").write_text(MAIN_XAML, encoding="utf-8")
            report = ProjectPipeline(project_root, output_root, "main.xaml", force=True).run(FlakyAnalyzer())
            self.assertTrue((docs_root / "Flows" / "Active.xaml.analysis.md").exists())
            fallback = (docs_root / "Flows" / "Active.xaml.analysis.md").read_text(encoding="utf-8")
            self.assertIn("Gemini 分析失败", fallback)
            self.assertIn("HTTP 429 quota exhausted", fallback)
            self.assertTrue((docs_root / "Scripts" / "Keep.bas.analysis.md").exists())
            self.assertTrue(any("Analysis failed for Flows/Active.xaml" in item for item in report.warnings))
 if __name__ == "__main__":
    unittest.main()