feat(analysis): prevent pipeline crash on single file analysis failure
Catch Gemini API errors (e.g., HTTP 429) and summarize upstream messages Generate fallback markdown for failed files instead of aborting Append analysis failures to pipeline warnings
This commit is contained in:
@@ -14,6 +14,10 @@ UNKNOWN_TEXT = "无法从当前文件确定"
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class AnalysisError(RuntimeError):
|
||||||
|
"""Raised when a single file analysis cannot be completed."""
|
||||||
|
|
||||||
|
|
||||||
@dataclass(slots=True)
|
@dataclass(slots=True)
|
||||||
class GeminiAnalyzer:
|
class GeminiAnalyzer:
|
||||||
settings: Settings
|
settings: Settings
|
||||||
@@ -43,6 +47,7 @@ class GeminiAnalyzer:
|
|||||||
started = perf_counter()
|
started = perf_counter()
|
||||||
logger.info("Submitting Gemini analysis for %s (%d chars)", relative_path.as_posix(), len(content))
|
logger.info("Submitting Gemini analysis for %s (%d chars)", relative_path.as_posix(), len(content))
|
||||||
prompt = self._build_prompt(relative_path, content)
|
prompt = self._build_prompt(relative_path, content)
|
||||||
|
try:
|
||||||
response = self._client.models.generate_content(
|
response = self._client.models.generate_content(
|
||||||
model=self.settings.model,
|
model=self.settings.model,
|
||||||
contents=prompt,
|
contents=prompt,
|
||||||
@@ -59,15 +64,20 @@ class GeminiAnalyzer:
|
|||||||
relative_path.as_posix(),
|
relative_path.as_posix(),
|
||||||
len(response_text),
|
len(response_text),
|
||||||
)
|
)
|
||||||
try:
|
|
||||||
payload = json.loads(response_text)
|
payload = json.loads(response_text)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError as exc:
|
||||||
logger.exception(
|
logger.exception(
|
||||||
"Gemini returned invalid JSON for %s. Response snippet: %r",
|
"Gemini returned invalid JSON for %s. Response snippet: %r",
|
||||||
relative_path.as_posix(),
|
relative_path.as_posix(),
|
||||||
response_text[:500],
|
response_text[:500],
|
||||||
)
|
)
|
||||||
raise
|
raise AnalysisError(
|
||||||
|
f"Gemini 返回了无法解析的 JSON,无法生成该文件说明。原始错误: {exc}"
|
||||||
|
) from exc
|
||||||
|
except Exception as exc:
|
||||||
|
summary = self._summarize_error(exc)
|
||||||
|
logger.exception("Gemini analysis failed for %s: %s", relative_path.as_posix(), summary)
|
||||||
|
raise AnalysisError(summary) from exc
|
||||||
logger.info(
|
logger.info(
|
||||||
"Gemini analysis completed for %s in %.2fs",
|
"Gemini analysis completed for %s in %.2fs",
|
||||||
relative_path.as_posix(),
|
relative_path.as_posix(),
|
||||||
@@ -75,6 +85,38 @@ class GeminiAnalyzer:
|
|||||||
)
|
)
|
||||||
return self._to_markdown(relative_path, payload)
|
return self._to_markdown(relative_path, payload)
|
||||||
|
|
||||||
|
def _summarize_error(self, exc: Exception) -> str:
|
||||||
|
status_code = getattr(exc, "status_code", None)
|
||||||
|
response_json = getattr(exc, "response_json", None)
|
||||||
|
upstream_message = self._extract_error_message(response_json)
|
||||||
|
|
||||||
|
if status_code == 429:
|
||||||
|
detail = upstream_message or "Resource has been exhausted"
|
||||||
|
return (
|
||||||
|
"Gemini 配额或速率限制已触发(HTTP 429),当前文件说明未生成。"
|
||||||
|
f"上游信息: {detail}。可稍后重试,或使用 --skip-analysis 仅导出代码与依赖。"
|
||||||
|
)
|
||||||
|
|
||||||
|
if status_code is not None:
|
||||||
|
detail = upstream_message or str(exc).strip() or exc.__class__.__name__
|
||||||
|
return f"Gemini 调用失败(HTTP {status_code})。上游信息: {detail}"
|
||||||
|
|
||||||
|
detail = str(exc).strip()
|
||||||
|
if detail:
|
||||||
|
return f"Gemini 分析失败: {detail}"
|
||||||
|
return f"Gemini 分析失败: {exc.__class__.__name__}"
|
||||||
|
|
||||||
|
def _extract_error_message(self, response_json: Any) -> str | None:
|
||||||
|
if not isinstance(response_json, dict):
|
||||||
|
return None
|
||||||
|
|
||||||
|
error = response_json.get("error")
|
||||||
|
if isinstance(error, dict):
|
||||||
|
message = error.get("message")
|
||||||
|
if isinstance(message, str) and message.strip():
|
||||||
|
return message.strip()
|
||||||
|
return None
|
||||||
|
|
||||||
def _response_schema(self) -> dict[str, Any]:
|
def _response_schema(self) -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
"type": "OBJECT",
|
"type": "OBJECT",
|
||||||
|
|||||||
@@ -93,8 +93,8 @@ class ProjectPipeline:
|
|||||||
len(pruned_files),
|
len(pruned_files),
|
||||||
)
|
)
|
||||||
|
|
||||||
analysis_files = self._write_analysis(final_rel_files, analyzer)
|
analysis_files, analysis_warnings = self._write_analysis(final_rel_files, analyzer)
|
||||||
warnings = initial_scan.warnings + final_scan.warnings
|
warnings = initial_scan.warnings + final_scan.warnings + analysis_warnings
|
||||||
|
|
||||||
report = PipelineReport(
|
report = PipelineReport(
|
||||||
project_root=self.project_root,
|
project_root=self.project_root,
|
||||||
@@ -177,21 +177,28 @@ class ProjectPipeline:
|
|||||||
directory.rmdir()
|
directory.rmdir()
|
||||||
logger.debug("Removed empty directory: %s", directory)
|
logger.debug("Removed empty directory: %s", directory)
|
||||||
|
|
||||||
def _write_analysis(self, final_files: list[Path], analyzer) -> list[Path]:
|
def _write_analysis(self, final_files: list[Path], analyzer) -> tuple[list[Path], list[str]]:
|
||||||
if analyzer is None:
|
if analyzer is None:
|
||||||
logger.info("Skipping Gemini analysis because analyzer is disabled")
|
logger.info("Skipping Gemini analysis because analyzer is disabled")
|
||||||
return []
|
return [], []
|
||||||
|
|
||||||
output_files: list[Path] = []
|
output_files: list[Path] = []
|
||||||
|
warnings: list[str] = []
|
||||||
for relative_path in self._ordered_files(final_files):
|
for relative_path in self._ordered_files(final_files):
|
||||||
content = read_text(self.code_root / relative_path)
|
content = read_text(self.code_root / relative_path)
|
||||||
|
try:
|
||||||
analysis = analyzer.analyze(relative_path, content)
|
analysis = analyzer.analyze(relative_path, content)
|
||||||
|
except Exception as exc:
|
||||||
|
warning = f"Analysis failed for {relative_path.as_posix()}: {self._format_analysis_error(exc)}"
|
||||||
|
warnings.append(warning)
|
||||||
|
logger.warning(warning)
|
||||||
|
analysis = self._build_failed_analysis(relative_path, exc)
|
||||||
analysis_path = self.docs_root / f"{relative_path.as_posix()}.analysis.md"
|
analysis_path = self.docs_root / f"{relative_path.as_posix()}.analysis.md"
|
||||||
analysis_path.parent.mkdir(parents=True, exist_ok=True)
|
analysis_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
analysis_path.write_text(analysis, encoding="utf-8")
|
analysis_path.write_text(analysis, encoding="utf-8")
|
||||||
output_files.append(Path(f"{relative_path.as_posix()}.analysis.md"))
|
output_files.append(Path(f"{relative_path.as_posix()}.analysis.md"))
|
||||||
logger.debug("Wrote analysis file: %s", analysis_path)
|
logger.debug("Wrote analysis file: %s", analysis_path)
|
||||||
return output_files
|
return output_files, warnings
|
||||||
|
|
||||||
def _write_report_files(self, report: PipelineReport) -> None:
|
def _write_report_files(self, report: PipelineReport) -> None:
|
||||||
(self.docs_root / "manifest.json").write_text(report.to_json(), encoding="utf-8")
|
(self.docs_root / "manifest.json").write_text(report.to_json(), encoding="utf-8")
|
||||||
@@ -252,3 +259,23 @@ class ProjectPipeline:
|
|||||||
|
|
||||||
def _ordered_files(self, paths: list[Path]) -> list[Path]:
|
def _ordered_files(self, paths: list[Path]) -> list[Path]:
|
||||||
return sorted(paths, key=lambda item: (item.suffix.lower() != ".xaml", item.as_posix().lower()))
|
return sorted(paths, key=lambda item: (item.suffix.lower() != ".xaml", item.as_posix().lower()))
|
||||||
|
|
||||||
|
def _build_failed_analysis(self, relative_path: Path, exc: Exception) -> str:
|
||||||
|
reason = self._format_analysis_error(exc)
|
||||||
|
return "\n".join(
|
||||||
|
[
|
||||||
|
f"# {relative_path.as_posix()}",
|
||||||
|
"",
|
||||||
|
"## 分析状态",
|
||||||
|
"- 状态:Gemini 分析失败,当前文件未生成结构化说明。",
|
||||||
|
f"- 原因:{reason}",
|
||||||
|
"- 建议:稍后重试;如果当前只需要导出代码与依赖,可使用 `--skip-analysis`。",
|
||||||
|
"",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
def _format_analysis_error(self, exc: Exception) -> str:
|
||||||
|
message = str(exc).strip()
|
||||||
|
if message:
|
||||||
|
return message
|
||||||
|
return exc.__class__.__name__
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ fake_dotenv.load_dotenv = lambda *args, **kwargs: None
|
|||||||
sys.modules.setdefault("dotenv", fake_dotenv)
|
sys.modules.setdefault("dotenv", fake_dotenv)
|
||||||
|
|
||||||
from uipath_explainator.config import Settings
|
from uipath_explainator.config import Settings
|
||||||
from uipath_explainator.gemini import GeminiAnalyzer
|
from uipath_explainator.gemini import AnalysisError, GeminiAnalyzer
|
||||||
|
|
||||||
|
|
||||||
class FakeHttpOptions:
|
class FakeHttpOptions:
|
||||||
@@ -159,6 +159,33 @@ class GeminiAnalyzerTests(unittest.TestCase):
|
|||||||
self.assertIn("先讲这个文件在整个流程中的定位", prompt)
|
self.assertIn("先讲这个文件在整个流程中的定位", prompt)
|
||||||
self.assertIn("判断逻辑、调用链、输入输出、关键变量、外部依赖", prompt)
|
self.assertIn("判断逻辑、调用链、输入输出、关键变量、外部依赖", prompt)
|
||||||
|
|
||||||
|
def test_analyze_wraps_rate_limit_error_with_clear_message(self) -> None:
|
||||||
|
fake_types = SimpleNamespace(
|
||||||
|
HttpOptions=FakeHttpOptions,
|
||||||
|
GenerateContentConfig=FakeGenerateContentConfig,
|
||||||
|
)
|
||||||
|
fake_genai = ModuleType("google.genai")
|
||||||
|
fake_genai.Client = FakeClient
|
||||||
|
fake_genai.types = fake_types
|
||||||
|
|
||||||
|
fake_google = ModuleType("google")
|
||||||
|
fake_google.genai = fake_genai
|
||||||
|
|
||||||
|
with patch.dict(sys.modules, {"google": fake_google, "google.genai": fake_genai}):
|
||||||
|
analyzer = GeminiAnalyzer(Settings(api_key="test-key", base_url=None, model="gemini-test"))
|
||||||
|
|
||||||
|
error = RuntimeError("quota exceeded")
|
||||||
|
error.status_code = 429
|
||||||
|
error.response_json = {"error": {"message": "Resource has been exhausted (e.g. check quota)."}}
|
||||||
|
analyzer._client.models.generate_content = lambda **_: (_ for _ in ()).throw(error)
|
||||||
|
|
||||||
|
with self.assertRaises(AnalysisError) as captured:
|
||||||
|
analyzer.analyze(Path("main.xaml"), "<Sequence />")
|
||||||
|
|
||||||
|
self.assertIn("HTTP 429", str(captured.exception))
|
||||||
|
self.assertIn("Resource has been exhausted", str(captured.exception))
|
||||||
|
self.assertIn("--skip-analysis", str(captured.exception))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -57,6 +57,13 @@ class StubAnalyzer:
|
|||||||
return f"# {relative_path.as_posix()}\n\n{len(content)}"
|
return f"# {relative_path.as_posix()}\n\n{len(content)}"
|
||||||
|
|
||||||
|
|
||||||
|
class FlakyAnalyzer:
|
||||||
|
def analyze(self, relative_path: Path, content: str) -> str:
|
||||||
|
if relative_path.name == "Active.xaml":
|
||||||
|
raise RuntimeError("HTTP 429 quota exhausted")
|
||||||
|
return f"# {relative_path.as_posix()}\n\n{len(content)}"
|
||||||
|
|
||||||
|
|
||||||
class PipelineTests(unittest.TestCase):
|
class PipelineTests(unittest.TestCase):
|
||||||
def test_strip_comment_out_blocks_removes_nested_blocks(self) -> None:
|
def test_strip_comment_out_blocks_removes_nested_blocks(self) -> None:
|
||||||
source = "<root><ui:CommentOut><x/><ui:CommentOut><y/></ui:CommentOut></ui:CommentOut><z/></root>"
|
source = "<root><ui:CommentOut><x/><ui:CommentOut><y/></ui:CommentOut></ui:CommentOut><z/></root>"
|
||||||
@@ -145,6 +152,27 @@ class PipelineTests(unittest.TestCase):
|
|||||||
self.assertIn("Final scan complete:", combined)
|
self.assertIn("Final scan complete:", combined)
|
||||||
self.assertIn("Pipeline completed in", combined)
|
self.assertIn("Pipeline completed in", combined)
|
||||||
|
|
||||||
|
def test_pipeline_keeps_running_when_single_analysis_fails(self) -> None:
|
||||||
|
with TemporaryDirectory() as tmp:
|
||||||
|
tmp_path = Path(tmp)
|
||||||
|
project_root = tmp_path / "project"
|
||||||
|
output_root = tmp_path / "workspace"
|
||||||
|
docs_root = output_root / "docs"
|
||||||
|
(project_root / "Flows").mkdir(parents=True)
|
||||||
|
(project_root / "Flows" / "Active.xaml").write_text(ACTIVE_XAML, encoding="utf-8")
|
||||||
|
(project_root / "Scripts").mkdir()
|
||||||
|
(project_root / "Scripts" / "Keep.bas").write_text("Sub Keep()\nEnd Sub", encoding="utf-8")
|
||||||
|
(project_root / "main.xaml").write_text(MAIN_XAML, encoding="utf-8")
|
||||||
|
|
||||||
|
report = ProjectPipeline(project_root, output_root, "main.xaml", force=True).run(FlakyAnalyzer())
|
||||||
|
|
||||||
|
self.assertTrue((docs_root / "Flows" / "Active.xaml.analysis.md").exists())
|
||||||
|
fallback = (docs_root / "Flows" / "Active.xaml.analysis.md").read_text(encoding="utf-8")
|
||||||
|
self.assertIn("Gemini 分析失败", fallback)
|
||||||
|
self.assertIn("HTTP 429 quota exhausted", fallback)
|
||||||
|
self.assertTrue((docs_root / "Scripts" / "Keep.bas.analysis.md").exists())
|
||||||
|
self.assertTrue(any("Analysis failed for Flows/Active.xaml" in item for item in report.warnings))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
Reference in New Issue
Block a user