agentic-rl-main / tests /test_deplot_pipeline.py
Jack04810's picture
Add files using upload-large-folder tool
534c64f verified
Raw
History Blame Contribute Delete
4.46 kB
import json
import os
import sys
import tempfile
from unittest.mock import patch
import pytest
from PIL import Image
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from data_utils.chart.deplot_pipeline import (
build_deplot_visual_fact,
enrich_entries_with_deplot,
format_deplot_for_teacher,
has_real_deplot,
is_deplot_placeholder,
load_deplot_cache,
placeholder_deplot_table,
save_deplot_cache,
)
from opsd_utils.privileged.providers import VisualFactsProvider
def test_is_deplot_placeholder():
ph = placeholder_deplot_table({"question": "q"})
assert is_deplot_placeholder(ph)
real = build_deplot_visual_fact({"question": "q"}, "A | 1\nB | 2")
assert not is_deplot_placeholder(real)
assert has_real_deplot(real)
def test_format_deplot_for_teacher():
real = build_deplot_visual_fact({"question": "q"}, "Year | Value\n2020 | 10")
assert format_deplot_for_teacher(real) == "Year | Value\n2020 | 10"
assert format_deplot_for_teacher(placeholder_deplot_table({"question": "q"})) == ""
assert format_deplot_for_teacher(None) == ""
assert format_deplot_for_teacher("") == ""
def test_visual_facts_provider_skips_placeholder_and_missing():
provider = VisualFactsProvider()
sample_ph = {
"visual_fact_deplot": placeholder_deplot_table({"question": "q"}),
"visual_fact_hint": "hint text",
}
suffix = provider.build_teacher_suffix(sample_ph)
assert "Visual Facts - Hint" in suffix
assert "Visual Facts - DePlot" not in suffix
sample_none = {"visual_fact_hint": "only hint"}
suffix2 = provider.build_teacher_suffix(sample_none)
assert "Visual Facts - DePlot" not in suffix2
assert "Visual Facts - Hint" in suffix2
def test_visual_facts_provider_real_deplot_table():
provider = VisualFactsProvider()
table = "Category | 2019 | 2020\nA | 1 | 2"
sample = {
"visual_fact_deplot": build_deplot_visual_fact({"question": "q"}, table),
}
suffix = provider.build_teacher_suffix(sample)
assert "Visual Facts - DePlot" in suffix
assert table in suffix
assert '"parsed_table"' not in suffix
def test_deplot_cache_roundtrip():
with tempfile.TemporaryDirectory() as tmp:
path = os.path.join(tmp, "cache.json")
save_deplot_cache(path, {"/a.png": "table text"})
loaded = load_deplot_cache(path)
assert loaded["/a.png"] == "table text"
def test_enrich_disabled_uses_placeholder():
entries = [{"question": "q1", "image": "missing.png"}]
stats = enrich_entries_with_deplot(entries, enabled=False)
assert stats["placeholder"] == 1
assert is_deplot_placeholder(entries[0]["visual_fact_deplot"])
def test_enrich_with_mock_runner():
with tempfile.TemporaryDirectory() as tmp:
img_path = os.path.join(tmp, "chart.png")
Image.new("RGB", (32, 32)).save(img_path)
cache_path = os.path.join(tmp, "deplot_cache.json")
entries = [{"question": "What?", "image": img_path}]
class _FakeRunner:
def load(self):
return True
def generate_batch_with_oom_retry(self, paths, batch_size=8):
return ["Col | Val\nA | 1" for _ in paths]
with patch("data_utils.chart.deplot_pipeline.DePlotRunner", return_value=_FakeRunner()):
stats = enrich_entries_with_deplot(
entries,
enabled=True,
cache_path=cache_path,
)
assert stats["real"] == 1
assert has_real_deplot(entries[0]["visual_fact_deplot"])
assert "Col | Val" in format_deplot_for_teacher(entries[0]["visual_fact_deplot"])
assert os.path.isfile(cache_path)
def test_build_script_disabled(tmp_path):
import subprocess
root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
inp = tmp_path / "in.json"
out = tmp_path / "out.json"
inp.write_text(json.dumps([{"question": "q", "hint": "h"}]), encoding="utf-8")
subprocess.run(
[
sys.executable,
os.path.join(root, "scripts", "build_visual_facts_chartqa_deplot.py"),
"--input",
str(inp),
"--output",
str(out),
"--no-enabled",
],
check=True,
cwd=root,
)
data = json.loads(out.read_text(encoding="utf-8"))
assert is_deplot_placeholder(data[0]["visual_fact_deplot"])