From c73767073eaa3eaefd9e63d3c9cf1338192fd26b Mon Sep 17 00:00:00 2001
From: xiaomai <xsbugh@gmail.com>
Date: Thu, 2 Apr 2026 10:59:04 +0800
Subject: [PATCH] feat(analysis): prevent pipeline crash on single file
 analysis failure

Catch Gemini API errors (e.g., HTTP 429) and summarize upstream messages
Generate fallback markdown for failed files instead of aborting
Append analysis failures to pipeline warnings
---
 src/uipath_explainator/gemini.py   | 78 +++++++++++++++++++++++-------
 src/uipath_explainator/pipeline.py | 39 ++++++++++++---
 tests/test_gemini.py               | 29 ++++++++++-
 tests/test_pipeline.py             | 28 +++++++++++
 4 files changed, 149 insertions(+), 25 deletions(-)

diff --git a/src/uipath_explainator/gemini.py b/src/uipath_explainator/gemini.py
index 6f968e6..4ec9614 100644
--- a/src/uipath_explainator/gemini.py
+++ b/src/uipath_explainator/gemini.py
@@ -14,6 +14,10 @@ UNKNOWN_TEXT = "无法从当前文件确定"
 logger = logging.getLogger(__name__)
 
 
+class AnalysisError(RuntimeError):
+    """Raised when a single file analysis cannot be completed."""
+
+
 @dataclass(slots=True)
 class GeminiAnalyzer:
     settings: Settings
@@ -43,31 +47,37 @@ class GeminiAnalyzer:
         started = perf_counter()
         logger.info("Submitting Gemini analysis for %s (%d chars)", relative_path.as_posix(), len(content))
         prompt = self._build_prompt(relative_path, content)
-        response = self._client.models.generate_content(
-            model=self.settings.model,
-            contents=prompt,
-            config=self._types.GenerateContentConfig(
-                temperature=0.2,
-                response_mime_type="application/json",
-                response_schema=self._response_schema(),
-            ),
-        )
-
-        response_text = response.text or ""
-        logger.debug(
-            "Gemini response received for %s (%d chars)",
-            relative_path.as_posix(),
-            len(response_text),
-        )
         try:
+            response = self._client.models.generate_content(
+                model=self.settings.model,
+                contents=prompt,
+                config=self._types.GenerateContentConfig(
+                    temperature=0.2,
+                    response_mime_type="application/json",
+                    response_schema=self._response_schema(),
+                ),
+            )
+
+            response_text = response.text or ""
+            logger.debug(
+                "Gemini response received for %s (%d chars)",
+                relative_path.as_posix(),
+                len(response_text),
+            )
             payload = json.loads(response_text)
-        except json.JSONDecodeError:
+        except json.JSONDecodeError as exc:
             logger.exception(
                 "Gemini returned invalid JSON for %s. Response snippet: %r",
                 relative_path.as_posix(),
                 response_text[:500],
             )
-            raise
+            raise AnalysisError(
+                f"Gemini 返回了无法解析的 JSON，无法生成该文件说明。原始错误: {exc}"
+            ) from exc
+        except Exception as exc:
+            summary = self._summarize_error(exc)
+            logger.exception("Gemini analysis failed for %s: %s", relative_path.as_posix(), summary)
+            raise AnalysisError(summary) from exc
         logger.info(
             "Gemini analysis completed for %s in %.2fs",
             relative_path.as_posix(),
@@ -75,6 +85,38 @@ class GeminiAnalyzer:
         )
         return self._to_markdown(relative_path, payload)
 
+    def _summarize_error(self, exc: Exception) -> str:
+        status_code = getattr(exc, "status_code", None)
+        response_json = getattr(exc, "response_json", None)
+        upstream_message = self._extract_error_message(response_json)
+
+        if status_code == 429:
+            detail = upstream_message or "Resource has been exhausted"
+            return (
+                "Gemini 配额或速率限制已触发（HTTP 429），当前文件说明未生成。"
+                f"上游信息: {detail}。可稍后重试，或使用 --skip-analysis 仅导出代码与依赖。"
+            )
+
+        if status_code is not None:
+            detail = upstream_message or str(exc).strip() or exc.__class__.__name__
+            return f"Gemini 调用失败（HTTP {status_code}）。上游信息: {detail}"
+
+        detail = str(exc).strip()
+        if detail:
+            return f"Gemini 分析失败: {detail}"
+        return f"Gemini 分析失败: {exc.__class__.__name__}"
+
+    def _extract_error_message(self, response_json: Any) -> str | None:
+        if not isinstance(response_json, dict):
+            return None
+
+        error = response_json.get("error")
+        if isinstance(error, dict):
+            message = error.get("message")
+            if isinstance(message, str) and message.strip():
+                return message.strip()
+        return None
+
     def _response_schema(self) -> dict[str, Any]:
         return {
             "type": "OBJECT",
diff --git a/src/uipath_explainator/pipeline.py b/src/uipath_explainator/pipeline.py
index 34a68bb..c8432f4 100644
--- a/src/uipath_explainator/pipeline.py
+++ b/src/uipath_explainator/pipeline.py
@@ -93,8 +93,8 @@ class ProjectPipeline:
             len(pruned_files),
         )
 
-        analysis_files = self._write_analysis(final_rel_files, analyzer)
-        warnings = initial_scan.warnings + final_scan.warnings
+        analysis_files, analysis_warnings = self._write_analysis(final_rel_files, analyzer)
+        warnings = initial_scan.warnings + final_scan.warnings + analysis_warnings
 
         report = PipelineReport(
             project_root=self.project_root,
@@ -177,21 +177,28 @@ class ProjectPipeline:
             directory.rmdir()
             logger.debug("Removed empty directory: %s", directory)
 
-    def _write_analysis(self, final_files: list[Path], analyzer) -> list[Path]:
+    def _write_analysis(self, final_files: list[Path], analyzer) -> tuple[list[Path], list[str]]:
         if analyzer is None:
             logger.info("Skipping Gemini analysis because analyzer is disabled")
-            return []
+            return [], []
 
         output_files: list[Path] = []
+        warnings: list[str] = []
         for relative_path in self._ordered_files(final_files):
             content = read_text(self.code_root / relative_path)
-            analysis = analyzer.analyze(relative_path, content)
+            try:
+                analysis = analyzer.analyze(relative_path, content)
+            except Exception as exc:
+                warning = f"Analysis failed for {relative_path.as_posix()}: {self._format_analysis_error(exc)}"
+                warnings.append(warning)
+                logger.warning(warning)
+                analysis = self._build_failed_analysis(relative_path, exc)
             analysis_path = self.docs_root / f"{relative_path.as_posix()}.analysis.md"
             analysis_path.parent.mkdir(parents=True, exist_ok=True)
             analysis_path.write_text(analysis, encoding="utf-8")
             output_files.append(Path(f"{relative_path.as_posix()}.analysis.md"))
             logger.debug("Wrote analysis file: %s", analysis_path)
-        return output_files
+        return output_files, warnings
 
     def _write_report_files(self, report: PipelineReport) -> None:
         (self.docs_root / "manifest.json").write_text(report.to_json(), encoding="utf-8")
@@ -252,3 +259,23 @@ class ProjectPipeline:
 
     def _ordered_files(self, paths: list[Path]) -> list[Path]:
         return sorted(paths, key=lambda item: (item.suffix.lower() != ".xaml", item.as_posix().lower()))
+
+    def _build_failed_analysis(self, relative_path: Path, exc: Exception) -> str:
+        reason = self._format_analysis_error(exc)
+        return "\n".join(
+            [
+                f"# {relative_path.as_posix()}",
+                "",
+                "## 分析状态",
+                "- 状态：Gemini 分析失败，当前文件未生成结构化说明。",
+                f"- 原因：{reason}",
+                "- 建议：稍后重试；如果当前只需要导出代码与依赖，可使用 `--skip-analysis`。",
+                "",
+            ]
+        )
+
+    def _format_analysis_error(self, exc: Exception) -> str:
+        message = str(exc).strip()
+        if message:
+            return message
+        return exc.__class__.__name__
diff --git a/tests/test_gemini.py b/tests/test_gemini.py
index 83ac7a4..b94441c 100644
--- a/tests/test_gemini.py
+++ b/tests/test_gemini.py
@@ -14,7 +14,7 @@ fake_dotenv.load_dotenv = lambda *args, **kwargs: None
 sys.modules.setdefault("dotenv", fake_dotenv)
 
 from uipath_explainator.config import Settings
-from uipath_explainator.gemini import GeminiAnalyzer
+from uipath_explainator.gemini import AnalysisError, GeminiAnalyzer
 
 
 class FakeHttpOptions:
@@ -159,6 +159,33 @@ class GeminiAnalyzerTests(unittest.TestCase):
         self.assertIn("先讲这个文件在整个流程中的定位", prompt)
         self.assertIn("判断逻辑、调用链、输入输出、关键变量、外部依赖", prompt)
 
+    def test_analyze_wraps_rate_limit_error_with_clear_message(self) -> None:
+        fake_types = SimpleNamespace(
+            HttpOptions=FakeHttpOptions,
+            GenerateContentConfig=FakeGenerateContentConfig,
+        )
+        fake_genai = ModuleType("google.genai")
+        fake_genai.Client = FakeClient
+        fake_genai.types = fake_types
+
+        fake_google = ModuleType("google")
+        fake_google.genai = fake_genai
+
+        with patch.dict(sys.modules, {"google": fake_google, "google.genai": fake_genai}):
+            analyzer = GeminiAnalyzer(Settings(api_key="test-key", base_url=None, model="gemini-test"))
+
+        error = RuntimeError("quota exceeded")
+        error.status_code = 429
+        error.response_json = {"error": {"message": "Resource has been exhausted (e.g. check quota)."}}
+        analyzer._client.models.generate_content = lambda **_: (_ for _ in ()).throw(error)
+
+        with self.assertRaises(AnalysisError) as captured:
+            analyzer.analyze(Path("main.xaml"), "<Sequence />")
+
+        self.assertIn("HTTP 429", str(captured.exception))
+        self.assertIn("Resource has been exhausted", str(captured.exception))
+        self.assertIn("--skip-analysis", str(captured.exception))
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index 25ee037..b773357 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -57,6 +57,13 @@ class StubAnalyzer:
         return f"# {relative_path.as_posix()}\n\n{len(content)}"
 
 
+class FlakyAnalyzer:
+    def analyze(self, relative_path: Path, content: str) -> str:
+        if relative_path.name == "Active.xaml":
+            raise RuntimeError("HTTP 429 quota exhausted")
+        return f"# {relative_path.as_posix()}\n\n{len(content)}"
+
+
 class PipelineTests(unittest.TestCase):
     def test_strip_comment_out_blocks_removes_nested_blocks(self) -> None:
         source = "<root><ui:CommentOut><x/><ui:CommentOut><y/></ui:CommentOut></ui:CommentOut><z/></root>"
@@ -145,6 +152,27 @@ class PipelineTests(unittest.TestCase):
             self.assertIn("Final scan complete:", combined)
             self.assertIn("Pipeline completed in", combined)
 
+    def test_pipeline_keeps_running_when_single_analysis_fails(self) -> None:
+        with TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            project_root = tmp_path / "project"
+            output_root = tmp_path / "workspace"
+            docs_root = output_root / "docs"
+            (project_root / "Flows").mkdir(parents=True)
+            (project_root / "Flows" / "Active.xaml").write_text(ACTIVE_XAML, encoding="utf-8")
+            (project_root / "Scripts").mkdir()
+            (project_root / "Scripts" / "Keep.bas").write_text("Sub Keep()\nEnd Sub", encoding="utf-8")
+            (project_root / "main.xaml").write_text(MAIN_XAML, encoding="utf-8")
+
+            report = ProjectPipeline(project_root, output_root, "main.xaml", force=True).run(FlakyAnalyzer())
+
+            self.assertTrue((docs_root / "Flows" / "Active.xaml.analysis.md").exists())
+            fallback = (docs_root / "Flows" / "Active.xaml.analysis.md").read_text(encoding="utf-8")
+            self.assertIn("Gemini 分析失败", fallback)
+            self.assertIn("HTTP 429 quota exhausted", fallback)
+            self.assertTrue((docs_root / "Scripts" / "Keep.bas.analysis.md").exists())
+            self.assertTrue(any("Analysis failed for Flows/Active.xaml" in item for item in report.warnings))
+
 
 if __name__ == "__main__":
     unittest.main()