Claude commited on
Commit
644be36
·
unverified ·
1 Parent(s): 4b1dc89

Fix CI : adapter les tests au 5e concurrent VLM introduit en Sprint 10

Browse files

- test_report.py : 3 assertions == 4 → == 5 (3 OCR + 1 pipeline LLM + 1 VLM zero-shot)
- test_sprint3_llm_pipelines.py : exclure gpt-4o-vision (zero-shot) des vérifications
"non-pipeline", car un concurrent VLM zero-shot EST correctement un pipeline

https://claude.ai/code/session_017gXea9mxBQqDTAsSQd7aAq

tests/test_report.py CHANGED
@@ -32,8 +32,8 @@ class TestGenerateSampleBenchmark:
32
  assert isinstance(sample_benchmark, BenchmarkResult)
33
 
34
  def test_correct_engine_count(self, sample_benchmark):
35
- # 3 moteurs OCR + 1 pipeline tesseract → gpt-4o
36
- assert len(sample_benchmark.engine_reports) == 4
37
 
38
  def test_correct_doc_count(self, sample_benchmark):
39
  assert sample_benchmark.document_count == 3
@@ -89,8 +89,8 @@ class TestBuildReportData:
89
 
90
  def test_engines_count(self, sample_benchmark):
91
  data = _build_report_data(sample_benchmark, {})
92
- # 3 moteurs OCR + 1 pipeline tesseract → gpt-4o
93
- assert len(data["engines"]) == 4
94
 
95
  def test_engine_fields(self, sample_benchmark):
96
  data = _build_report_data(sample_benchmark, {})
@@ -221,7 +221,7 @@ class TestReportGenerator:
221
  data = json.loads(match.group(1))
222
  assert "engines" in data
223
  assert "documents" in data
224
- assert len(data["engines"]) == 4 # 3 OCR + 1 pipeline
225
 
226
 
227
  # ---------------------------------------------------------------------------
 
32
  assert isinstance(sample_benchmark, BenchmarkResult)
33
 
34
  def test_correct_engine_count(self, sample_benchmark):
35
+ # 3 moteurs OCR + 1 pipeline tesseract → gpt-4o + 1 VLM zero-shot (Sprint 10)
36
+ assert len(sample_benchmark.engine_reports) == 5
37
 
38
  def test_correct_doc_count(self, sample_benchmark):
39
  assert sample_benchmark.document_count == 3
 
89
 
90
  def test_engines_count(self, sample_benchmark):
91
  data = _build_report_data(sample_benchmark, {})
92
+ # 3 moteurs OCR + 1 pipeline tesseract → gpt-4o + 1 VLM zero-shot (Sprint 10)
93
+ assert len(data["engines"]) == 5
94
 
95
  def test_engine_fields(self, sample_benchmark):
96
  data = _build_report_data(sample_benchmark, {})
 
221
  data = json.loads(match.group(1))
222
  assert "engines" in data
223
  assert "documents" in data
224
+ assert len(data["engines"]) == 5 # 3 OCR + 1 pipeline LLM + 1 VLM zero-shot (Sprint 10)
225
 
226
 
227
  # ---------------------------------------------------------------------------
tests/test_sprint3_llm_pipelines.py CHANGED
@@ -372,8 +372,10 @@ class TestFixturesPipeline:
372
  assert steps[1]["type"] == "llm"
373
 
374
  def test_non_pipeline_reports_empty_pipeline_info(self, benchmark):
 
 
375
  for report in benchmark.engine_reports:
376
- if report.engine_name != "tesseract → gpt-4o":
377
  assert not report.is_pipeline
378
  assert report.pipeline_info == {}
379
 
@@ -401,8 +403,10 @@ class TestReportWithPipeline:
401
  assert pipeline_e["is_pipeline"] is True
402
 
403
  def test_non_pipeline_engines_not_flagged(self, report_data):
 
 
404
  for e in report_data["engines"]:
405
- if e["name"] != "tesseract → gpt-4o":
406
  assert e["is_pipeline"] is False
407
 
408
  def test_pipeline_has_over_normalization_in_info(self, report_data):
 
372
  assert steps[1]["type"] == "llm"
373
 
374
  def test_non_pipeline_reports_empty_pipeline_info(self, benchmark):
375
+ # Les concurrents pipeline (LLM ou VLM) ont un pipeline_info non vide
376
+ pipeline_engines = {"tesseract → gpt-4o", "gpt-4o-vision (zero-shot)"}
377
  for report in benchmark.engine_reports:
378
+ if report.engine_name not in pipeline_engines:
379
  assert not report.is_pipeline
380
  assert report.pipeline_info == {}
381
 
 
403
  assert pipeline_e["is_pipeline"] is True
404
 
405
  def test_non_pipeline_engines_not_flagged(self, report_data):
406
+ # Les concurrents pipeline (LLM ou VLM zero-shot) sont correctement marqués is_pipeline=True
407
+ pipeline_engines = {"tesseract → gpt-4o", "gpt-4o-vision (zero-shot)"}
408
  for e in report_data["engines"]:
409
+ if e["name"] not in pipeline_engines:
410
  assert e["is_pipeline"] is False
411
 
412
  def test_pipeline_has_over_normalization_in_info(self, report_data):