zeroshotGPU / tests /test_app.py
Arjunvir Singh
Frontend: zip + multi-file uploads, progress with stage labels, chunk detail tab
4e3af73
import tempfile
import unittest
from pathlib import Path
from unittest.mock import patch
try:
import app as space_app
except RuntimeError as exc:
space_app = None
APP_IMPORT_ERROR = str(exc)
else:
APP_IMPORT_ERROR = ""
class _UploadedFile:
def __init__(self, name: str):
self.name = name
class AppTests(unittest.TestCase):
def test_parse_uploaded_document_returns_artifact_validation(self):
if space_app is None:
self.skipTest(APP_IMPORT_ERROR)
with tempfile.TemporaryDirectory() as tmp:
input_path = Path(tmp) / "sample.md"
input_path.write_text("# Report\n\nHello from the Space UI.\n", encoding="utf-8")
outputs = space_app.parse_uploaded_document(_UploadedFile(str(input_path)), "Default lightweight")
self.assertEqual(len(outputs), 11)
summary = outputs[1]
artifact_validation = outputs[8]
archive_path = outputs[9]
individual_files = outputs[10]
self.assertTrue(summary["artifact_manifest_valid"])
self.assertTrue(artifact_validation["valid"])
self.assertTrue(Path(archive_path).exists())
# Per-artifact downloads.
self.assertIsInstance(individual_files, list)
self.assertGreater(len(individual_files), 0)
names = [Path(p).name for p in individual_files]
# Core artifacts every parse should produce.
for required in ("parsed_document.json", "document.md", "chunks.jsonl", "artifact_manifest.json"):
self.assertIn(required, names)
# Each path actually exists on disk so Gradio can serve it.
for path in individual_files:
self.assertTrue(Path(path).exists(), f"missing: {path}")
# The archive zip is a separate artifact and must NOT appear in the
# per-artifact list (zip is the bundled-everything view).
self.assertNotIn(Path(archive_path).name, names)
# Summary records the per-artifact count.
self.assertEqual(summary["individual_artifact_count"], len(individual_files))
class UploadGuardTests(unittest.TestCase):
def test_oversized_upload_rejected_with_clear_message(self):
if space_app is None:
self.skipTest(APP_IMPORT_ERROR)
with tempfile.TemporaryDirectory() as tmp:
input_path = Path(tmp) / "huge.md"
input_path.write_text("# Big\n\n" + "x" * 4096, encoding="utf-8")
with patch.object(space_app, "MAX_UPLOAD_BYTES", 1024):
outputs = space_app.parse_uploaded_document(
_UploadedFile(str(input_path)), "Default lightweight"
)
summary = outputs[1]
self.assertTrue(summary.get("rejected"))
self.assertIn("MB", summary["error"])
def test_high_page_count_rejected(self):
if space_app is None:
self.skipTest(APP_IMPORT_ERROR)
with tempfile.TemporaryDirectory() as tmp:
input_path = Path(tmp) / "doc.md"
input_path.write_text("# Doc\n\nSomething small.\n", encoding="utf-8")
class _FakeProfile:
page_count = 1000
with patch.object(space_app, "MAX_PAGE_COUNT", 50), patch.object(
space_app, "profile_document", return_value=_FakeProfile()
):
outputs = space_app.parse_uploaded_document(
_UploadedFile(str(input_path)), "Default lightweight"
)
summary = outputs[1]
self.assertTrue(summary.get("rejected"))
self.assertIn("pages", summary["error"])
def test_missing_upload_path_rejected(self):
if space_app is None:
self.skipTest(APP_IMPORT_ERROR)
outputs = space_app.parse_uploaded_document(
_UploadedFile("/tmp/zsgdp-does-not-exist.md"), "Default lightweight"
)
summary = outputs[1]
self.assertTrue(summary.get("rejected"))
self.assertIn("missing", summary["error"].lower())
def test_error_paths_return_full_tuple_width(self):
# Drift guard: every return path (success + error) must yield 11 outputs
# so the Gradio click handler doesn't error on shape mismatch.
if space_app is None:
self.skipTest(APP_IMPORT_ERROR)
# No upload at all.
outputs = space_app.parse_uploaded_document(None, "Default lightweight")
self.assertEqual(len(outputs), 11)
self.assertEqual(outputs[10], [])
# Missing-file rejection.
outputs = space_app.parse_uploaded_document(
_UploadedFile("/tmp/zsgdp-does-not-exist-xyz.md"), "Default lightweight"
)
self.assertEqual(len(outputs), 11)
self.assertEqual(outputs[10], [])
def test_normal_upload_passes_guards(self):
if space_app is None:
self.skipTest(APP_IMPORT_ERROR)
with tempfile.TemporaryDirectory() as tmp:
input_path = Path(tmp) / "ok.md"
input_path.write_text("# OK\n\nA normal document.\n", encoding="utf-8")
outputs = space_app.parse_uploaded_document(
_UploadedFile(str(input_path)), "Default lightweight"
)
summary = outputs[1]
self.assertNotIn("rejected", summary)
class BatchAndZipUploadTests(unittest.TestCase):
def test_zip_upload_extracts_and_parses_each_doc(self):
if space_app is None:
self.skipTest(APP_IMPORT_ERROR)
import zipfile
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
# Build a small zip with two markdown docs.
doc_a = tmp_path / "a.md"
doc_a.write_text("# Doc A\n\nFirst.\n", encoding="utf-8")
doc_b = tmp_path / "b.md"
doc_b.write_text("# Doc B\n\nSecond.\n", encoding="utf-8")
zip_path = tmp_path / "batch.zip"
with zipfile.ZipFile(zip_path, "w") as zf:
zf.write(doc_a, arcname="a.md")
zf.write(doc_b, arcname="b.md")
outputs = space_app.parse_uploaded_document(
_UploadedFile(str(zip_path)), "Default lightweight"
)
# Tuple width unchanged.
self.assertEqual(len(outputs), 11)
summary = outputs[1]
# Batch metadata recorded.
self.assertIn("batch", summary)
self.assertEqual(summary["batch"]["input_count"], 2)
self.assertEqual(summary["batch"]["successful_count"], 2)
self.assertEqual(summary["batch"]["failed_count"], 0)
self.assertEqual(len(summary["batch"]["documents"]), 2)
# Aggregate metrics populated.
agg = summary["batch"]["aggregate"]
self.assertGreater(agg["total_chunks"], 0)
self.assertGreater(agg["mean_quality_score"], 0.0)
def test_multiple_files_uploaded_as_list(self):
if space_app is None:
self.skipTest(APP_IMPORT_ERROR)
with tempfile.TemporaryDirectory() as tmp:
doc1 = Path(tmp) / "one.md"
doc1.write_text("# One\n\nFirst doc.\n", encoding="utf-8")
doc2 = Path(tmp) / "two.md"
doc2.write_text("# Two\n\nSecond doc.\n", encoding="utf-8")
outputs = space_app.parse_uploaded_document(
[_UploadedFile(str(doc1)), _UploadedFile(str(doc2))],
"Default lightweight",
)
summary = outputs[1]
self.assertIn("batch", summary)
self.assertEqual(summary["batch"]["input_count"], 2)
def test_zip_with_unsupported_files_filtered_out(self):
if space_app is None:
self.skipTest(APP_IMPORT_ERROR)
import zipfile
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
zip_path = tmp_path / "mixed.zip"
doc_a = tmp_path / "first.md"
doc_a.write_text("# First\n\nContent A.\n", encoding="utf-8")
doc_b = tmp_path / "second.md"
doc_b.write_text("# Second\n\nContent B.\n", encoding="utf-8")
junk = tmp_path / "ignore.exe"
junk.write_bytes(b"\x00\x01")
with zipfile.ZipFile(zip_path, "w") as zf:
zf.write(doc_a, arcname="first.md")
zf.write(doc_b, arcname="second.md")
zf.write(junk, arcname="ignore.exe")
outputs = space_app.parse_uploaded_document(
_UploadedFile(str(zip_path)), "Default lightweight"
)
summary = outputs[1]
# The two .md files parsed; the .exe was filtered out before parsing.
self.assertIn("batch", summary)
self.assertEqual(summary["batch"]["input_count"], 2)
self.assertEqual(summary["batch"]["successful_count"], 2)
def test_chunk_detail_payload_present(self):
if space_app is None:
self.skipTest(APP_IMPORT_ERROR)
with tempfile.TemporaryDirectory() as tmp:
doc = Path(tmp) / "rich.md"
doc.write_text(
"# Rich Doc\n\n"
"First paragraph with some prose to chunk.\n\n"
"Second paragraph with different content for variety.\n\n"
"| A | B |\n| --- | --- |\n| 1 | 2 |\n",
encoding="utf-8",
)
outputs = space_app.parse_uploaded_document(
_UploadedFile(str(doc)), "Default lightweight"
)
chunking_payload = outputs[4]
self.assertIn("plan", chunking_payload)
self.assertIn("detail", chunking_payload)
detail = chunking_payload["detail"]
self.assertGreater(detail["total_chunks"], 0)
self.assertIn("strategies", detail)
# Each strategy block has the expected shape.
for strategy_name, block in detail["strategies"].items():
self.assertIn("count", block)
self.assertIn("samples", block)
self.assertIn("token_count_min", block)
if __name__ == "__main__":
unittest.main()