Spaces:

Ma-Ri-Ba-Ku
/

Picarones

Sleeping

Claude commited on Mar 8

Commit

644be36

unverified ·

1 Parent(s): 4b1dc89

Fix CI : adapter les tests au 5e concurrent VLM introduit en Sprint 10

- test_report.py : 3 assertions == 4 → == 5 (3 OCR + 1 pipeline LLM + 1 VLM zero-shot)
- test_sprint3_llm_pipelines.py : exclure gpt-4o-vision (zero-shot) des vérifications
"non-pipeline", car un concurrent VLM zero-shot EST correctement un pipeline

https://claude.ai/code/session_017gXea9mxBQqDTAsSQd7aAq

Files changed (2) hide show

tests/test_report.py +5 -5
tests/test_sprint3_llm_pipelines.py +6 -2

tests/test_report.py CHANGED Viewed

@@ -32,8 +32,8 @@ class TestGenerateSampleBenchmark:
         assert isinstance(sample_benchmark, BenchmarkResult)
     def test_correct_engine_count(self, sample_benchmark):
-        # 3 moteurs OCR + 1 pipeline tesseract → gpt-4o
-        assert len(sample_benchmark.engine_reports) == 4
     def test_correct_doc_count(self, sample_benchmark):
         assert sample_benchmark.document_count == 3
@@ -89,8 +89,8 @@ class TestBuildReportData:
     def test_engines_count(self, sample_benchmark):
         data = _build_report_data(sample_benchmark, {})
-        # 3 moteurs OCR + 1 pipeline tesseract → gpt-4o
-        assert len(data["engines"]) == 4
     def test_engine_fields(self, sample_benchmark):
         data = _build_report_data(sample_benchmark, {})
@@ -221,7 +221,7 @@ class TestReportGenerator:
         data = json.loads(match.group(1))
         assert "engines" in data
         assert "documents" in data
-        assert len(data["engines"]) == 4  # 3 OCR + 1 pipeline
 # ---------------------------------------------------------------------------

         assert isinstance(sample_benchmark, BenchmarkResult)
     def test_correct_engine_count(self, sample_benchmark):
+        # 3 moteurs OCR + 1 pipeline tesseract → gpt-4o + 1 VLM zero-shot (Sprint 10)
+        assert len(sample_benchmark.engine_reports) == 5
     def test_correct_doc_count(self, sample_benchmark):
         assert sample_benchmark.document_count == 3
     def test_engines_count(self, sample_benchmark):
         data = _build_report_data(sample_benchmark, {})
+        # 3 moteurs OCR + 1 pipeline tesseract → gpt-4o + 1 VLM zero-shot (Sprint 10)
+        assert len(data["engines"]) == 5
     def test_engine_fields(self, sample_benchmark):
         data = _build_report_data(sample_benchmark, {})
         data = json.loads(match.group(1))
         assert "engines" in data
         assert "documents" in data
+        assert len(data["engines"]) == 5  # 3 OCR + 1 pipeline LLM + 1 VLM zero-shot (Sprint 10)
 # ---------------------------------------------------------------------------

tests/test_sprint3_llm_pipelines.py CHANGED Viewed

@@ -372,8 +372,10 @@ class TestFixturesPipeline:
         assert steps[1]["type"] == "llm"
     def test_non_pipeline_reports_empty_pipeline_info(self, benchmark):
         for report in benchmark.engine_reports:
-            if report.engine_name != "tesseract → gpt-4o":
                 assert not report.is_pipeline
                 assert report.pipeline_info == {}
@@ -401,8 +403,10 @@ class TestReportWithPipeline:
         assert pipeline_e["is_pipeline"] is True
     def test_non_pipeline_engines_not_flagged(self, report_data):
         for e in report_data["engines"]:
-            if e["name"] != "tesseract → gpt-4o":
                 assert e["is_pipeline"] is False
     def test_pipeline_has_over_normalization_in_info(self, report_data):

         assert steps[1]["type"] == "llm"
     def test_non_pipeline_reports_empty_pipeline_info(self, benchmark):
+        # Les concurrents pipeline (LLM ou VLM) ont un pipeline_info non vide
+        pipeline_engines = {"tesseract → gpt-4o", "gpt-4o-vision (zero-shot)"}
         for report in benchmark.engine_reports:
+            if report.engine_name not in pipeline_engines:
                 assert not report.is_pipeline
                 assert report.pipeline_info == {}
         assert pipeline_e["is_pipeline"] is True
     def test_non_pipeline_engines_not_flagged(self, report_data):
+        # Les concurrents pipeline (LLM ou VLM zero-shot) sont correctement marqués is_pipeline=True
+        pipeline_engines = {"tesseract → gpt-4o", "gpt-4o-vision (zero-shot)"}
         for e in report_data["engines"]:
+            if e["name"] not in pipeline_engines:
                 assert e["is_pipeline"] is False
     def test_pipeline_has_over_normalization_in_info(self, report_data):