feat(analysis): prevent pipeline crash on single file analysis failure

Catch Gemini API errors (e.g., HTTP 429) and summarize upstream messages
Generate fallback markdown for failed files instead of aborting
Append analysis failures to pipeline warnings
This commit is contained in:
2026-04-02 10:59:04 +08:00
parent 0bdebd5368
commit c73767073e
4 changed files with 149 additions and 25 deletions

View File

@@ -14,6 +14,10 @@ UNKNOWN_TEXT = "无法从当前文件确定"
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class AnalysisError(RuntimeError):
"""Raised when a single file analysis cannot be completed."""
@dataclass(slots=True) @dataclass(slots=True)
class GeminiAnalyzer: class GeminiAnalyzer:
settings: Settings settings: Settings
@@ -43,6 +47,7 @@ class GeminiAnalyzer:
started = perf_counter() started = perf_counter()
logger.info("Submitting Gemini analysis for %s (%d chars)", relative_path.as_posix(), len(content)) logger.info("Submitting Gemini analysis for %s (%d chars)", relative_path.as_posix(), len(content))
prompt = self._build_prompt(relative_path, content) prompt = self._build_prompt(relative_path, content)
try:
response = self._client.models.generate_content( response = self._client.models.generate_content(
model=self.settings.model, model=self.settings.model,
contents=prompt, contents=prompt,
@@ -59,15 +64,20 @@ class GeminiAnalyzer:
relative_path.as_posix(), relative_path.as_posix(),
len(response_text), len(response_text),
) )
try:
payload = json.loads(response_text) payload = json.loads(response_text)
except json.JSONDecodeError: except json.JSONDecodeError as exc:
logger.exception( logger.exception(
"Gemini returned invalid JSON for %s. Response snippet: %r", "Gemini returned invalid JSON for %s. Response snippet: %r",
relative_path.as_posix(), relative_path.as_posix(),
response_text[:500], response_text[:500],
) )
raise raise AnalysisError(
f"Gemini 返回了无法解析的 JSON无法生成该文件说明。原始错误: {exc}"
) from exc
except Exception as exc:
summary = self._summarize_error(exc)
logger.exception("Gemini analysis failed for %s: %s", relative_path.as_posix(), summary)
raise AnalysisError(summary) from exc
logger.info( logger.info(
"Gemini analysis completed for %s in %.2fs", "Gemini analysis completed for %s in %.2fs",
relative_path.as_posix(), relative_path.as_posix(),
@@ -75,6 +85,38 @@ class GeminiAnalyzer:
) )
return self._to_markdown(relative_path, payload) return self._to_markdown(relative_path, payload)
def _summarize_error(self, exc: Exception) -> str:
status_code = getattr(exc, "status_code", None)
response_json = getattr(exc, "response_json", None)
upstream_message = self._extract_error_message(response_json)
if status_code == 429:
detail = upstream_message or "Resource has been exhausted"
return (
"Gemini 配额或速率限制已触发HTTP 429当前文件说明未生成。"
f"上游信息: {detail}。可稍后重试,或使用 --skip-analysis 仅导出代码与依赖。"
)
if status_code is not None:
detail = upstream_message or str(exc).strip() or exc.__class__.__name__
return f"Gemini 调用失败HTTP {status_code})。上游信息: {detail}"
detail = str(exc).strip()
if detail:
return f"Gemini 分析失败: {detail}"
return f"Gemini 分析失败: {exc.__class__.__name__}"
def _extract_error_message(self, response_json: Any) -> str | None:
if not isinstance(response_json, dict):
return None
error = response_json.get("error")
if isinstance(error, dict):
message = error.get("message")
if isinstance(message, str) and message.strip():
return message.strip()
return None
def _response_schema(self) -> dict[str, Any]: def _response_schema(self) -> dict[str, Any]:
return { return {
"type": "OBJECT", "type": "OBJECT",

View File

@@ -93,8 +93,8 @@ class ProjectPipeline:
len(pruned_files), len(pruned_files),
) )
analysis_files = self._write_analysis(final_rel_files, analyzer) analysis_files, analysis_warnings = self._write_analysis(final_rel_files, analyzer)
warnings = initial_scan.warnings + final_scan.warnings warnings = initial_scan.warnings + final_scan.warnings + analysis_warnings
report = PipelineReport( report = PipelineReport(
project_root=self.project_root, project_root=self.project_root,
@@ -177,21 +177,28 @@ class ProjectPipeline:
directory.rmdir() directory.rmdir()
logger.debug("Removed empty directory: %s", directory) logger.debug("Removed empty directory: %s", directory)
def _write_analysis(self, final_files: list[Path], analyzer) -> list[Path]: def _write_analysis(self, final_files: list[Path], analyzer) -> tuple[list[Path], list[str]]:
if analyzer is None: if analyzer is None:
logger.info("Skipping Gemini analysis because analyzer is disabled") logger.info("Skipping Gemini analysis because analyzer is disabled")
return [] return [], []
output_files: list[Path] = [] output_files: list[Path] = []
warnings: list[str] = []
for relative_path in self._ordered_files(final_files): for relative_path in self._ordered_files(final_files):
content = read_text(self.code_root / relative_path) content = read_text(self.code_root / relative_path)
try:
analysis = analyzer.analyze(relative_path, content) analysis = analyzer.analyze(relative_path, content)
except Exception as exc:
warning = f"Analysis failed for {relative_path.as_posix()}: {self._format_analysis_error(exc)}"
warnings.append(warning)
logger.warning(warning)
analysis = self._build_failed_analysis(relative_path, exc)
analysis_path = self.docs_root / f"{relative_path.as_posix()}.analysis.md" analysis_path = self.docs_root / f"{relative_path.as_posix()}.analysis.md"
analysis_path.parent.mkdir(parents=True, exist_ok=True) analysis_path.parent.mkdir(parents=True, exist_ok=True)
analysis_path.write_text(analysis, encoding="utf-8") analysis_path.write_text(analysis, encoding="utf-8")
output_files.append(Path(f"{relative_path.as_posix()}.analysis.md")) output_files.append(Path(f"{relative_path.as_posix()}.analysis.md"))
logger.debug("Wrote analysis file: %s", analysis_path) logger.debug("Wrote analysis file: %s", analysis_path)
return output_files return output_files, warnings
def _write_report_files(self, report: PipelineReport) -> None: def _write_report_files(self, report: PipelineReport) -> None:
(self.docs_root / "manifest.json").write_text(report.to_json(), encoding="utf-8") (self.docs_root / "manifest.json").write_text(report.to_json(), encoding="utf-8")
@@ -252,3 +259,23 @@ class ProjectPipeline:
def _ordered_files(self, paths: list[Path]) -> list[Path]: def _ordered_files(self, paths: list[Path]) -> list[Path]:
return sorted(paths, key=lambda item: (item.suffix.lower() != ".xaml", item.as_posix().lower())) return sorted(paths, key=lambda item: (item.suffix.lower() != ".xaml", item.as_posix().lower()))
def _build_failed_analysis(self, relative_path: Path, exc: Exception) -> str:
reason = self._format_analysis_error(exc)
return "\n".join(
[
f"# {relative_path.as_posix()}",
"",
"## 分析状态",
"- 状态Gemini 分析失败,当前文件未生成结构化说明。",
f"- 原因:{reason}",
"- 建议:稍后重试;如果当前只需要导出代码与依赖,可使用 `--skip-analysis`。",
"",
]
)
def _format_analysis_error(self, exc: Exception) -> str:
message = str(exc).strip()
if message:
return message
return exc.__class__.__name__

View File

@@ -14,7 +14,7 @@ fake_dotenv.load_dotenv = lambda *args, **kwargs: None
sys.modules.setdefault("dotenv", fake_dotenv) sys.modules.setdefault("dotenv", fake_dotenv)
from uipath_explainator.config import Settings from uipath_explainator.config import Settings
from uipath_explainator.gemini import GeminiAnalyzer from uipath_explainator.gemini import AnalysisError, GeminiAnalyzer
class FakeHttpOptions: class FakeHttpOptions:
@@ -159,6 +159,33 @@ class GeminiAnalyzerTests(unittest.TestCase):
self.assertIn("先讲这个文件在整个流程中的定位", prompt) self.assertIn("先讲这个文件在整个流程中的定位", prompt)
self.assertIn("判断逻辑、调用链、输入输出、关键变量、外部依赖", prompt) self.assertIn("判断逻辑、调用链、输入输出、关键变量、外部依赖", prompt)
def test_analyze_wraps_rate_limit_error_with_clear_message(self) -> None:
fake_types = SimpleNamespace(
HttpOptions=FakeHttpOptions,
GenerateContentConfig=FakeGenerateContentConfig,
)
fake_genai = ModuleType("google.genai")
fake_genai.Client = FakeClient
fake_genai.types = fake_types
fake_google = ModuleType("google")
fake_google.genai = fake_genai
with patch.dict(sys.modules, {"google": fake_google, "google.genai": fake_genai}):
analyzer = GeminiAnalyzer(Settings(api_key="test-key", base_url=None, model="gemini-test"))
error = RuntimeError("quota exceeded")
error.status_code = 429
error.response_json = {"error": {"message": "Resource has been exhausted (e.g. check quota)."}}
analyzer._client.models.generate_content = lambda **_: (_ for _ in ()).throw(error)
with self.assertRaises(AnalysisError) as captured:
analyzer.analyze(Path("main.xaml"), "<Sequence />")
self.assertIn("HTTP 429", str(captured.exception))
self.assertIn("Resource has been exhausted", str(captured.exception))
self.assertIn("--skip-analysis", str(captured.exception))
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

View File

@@ -57,6 +57,13 @@ class StubAnalyzer:
return f"# {relative_path.as_posix()}\n\n{len(content)}" return f"# {relative_path.as_posix()}\n\n{len(content)}"
class FlakyAnalyzer:
def analyze(self, relative_path: Path, content: str) -> str:
if relative_path.name == "Active.xaml":
raise RuntimeError("HTTP 429 quota exhausted")
return f"# {relative_path.as_posix()}\n\n{len(content)}"
class PipelineTests(unittest.TestCase): class PipelineTests(unittest.TestCase):
def test_strip_comment_out_blocks_removes_nested_blocks(self) -> None: def test_strip_comment_out_blocks_removes_nested_blocks(self) -> None:
source = "<root><ui:CommentOut><x/><ui:CommentOut><y/></ui:CommentOut></ui:CommentOut><z/></root>" source = "<root><ui:CommentOut><x/><ui:CommentOut><y/></ui:CommentOut></ui:CommentOut><z/></root>"
@@ -145,6 +152,27 @@ class PipelineTests(unittest.TestCase):
self.assertIn("Final scan complete:", combined) self.assertIn("Final scan complete:", combined)
self.assertIn("Pipeline completed in", combined) self.assertIn("Pipeline completed in", combined)
def test_pipeline_keeps_running_when_single_analysis_fails(self) -> None:
with TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
project_root = tmp_path / "project"
output_root = tmp_path / "workspace"
docs_root = output_root / "docs"
(project_root / "Flows").mkdir(parents=True)
(project_root / "Flows" / "Active.xaml").write_text(ACTIVE_XAML, encoding="utf-8")
(project_root / "Scripts").mkdir()
(project_root / "Scripts" / "Keep.bas").write_text("Sub Keep()\nEnd Sub", encoding="utf-8")
(project_root / "main.xaml").write_text(MAIN_XAML, encoding="utf-8")
report = ProjectPipeline(project_root, output_root, "main.xaml", force=True).run(FlakyAnalyzer())
self.assertTrue((docs_root / "Flows" / "Active.xaml.analysis.md").exists())
fallback = (docs_root / "Flows" / "Active.xaml.analysis.md").read_text(encoding="utf-8")
self.assertIn("Gemini 分析失败", fallback)
self.assertIn("HTTP 429 quota exhausted", fallback)
self.assertTrue((docs_root / "Scripts" / "Keep.bas.analysis.md").exists())
self.assertTrue(any("Analysis failed for Flows/Active.xaml" in item for item in report.warnings))
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()