Spaces:

ruslanmv
/

autoapp-builder

Sleeping

App Files Files Community

ruslanmv commited on Apr 1

Commit

bbda63d

verified ·

1 Parent(s): 2c304fc

fix: escape curly braces in chatbot template to prevent KeyError with .format()

Browse files

Files changed (9) hide show

.pytest_cache/.gitignore +2 -0
.pytest_cache/CACHEDIR.TAG +4 -0
.pytest_cache/README.md +8 -0
.pytest_cache/v/cache/lastfailed +1 -0
.pytest_cache/v/cache/nodeids +134 -0
app/codegen/gradio_generator.py +4 -4
tests/__init__.py +0 -0
tests/conftest.py +22 -0
tests/test_app.py +901 -0

.pytest_cache/.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # Created by pytest automatically.
2	+ *

.pytest_cache/CACHEDIR.TAG ADDED Viewed

	@@ -0,0 +1,4 @@

+Signature: 8a477f597d28d172789f06886806bc55
+# This file is a cache directory tag created by pytest.
+# For information about cache directory tags, see:
+#	https://bford.info/cachedir/spec.html

.pytest_cache/README.md ADDED Viewed

	@@ -0,0 +1,8 @@

+# pytest cache directory #
+This directory contains data from the pytest's cache plugin,
+which provides the `--lf` and `--ff` options, as well as the `cache` fixture.
+**Do not** commit this to version control.
+See [the docs](https://docs.pytest.org/en/stable/how-to/cache.html) for more information.

.pytest_cache/v/cache/lastfailed ADDED Viewed

	@@ -0,0 +1 @@


1	+ {}

.pytest_cache/v/cache/nodeids ADDED Viewed

	@@ -0,0 +1,134 @@

+[
+  "tests/test_app.py::TestAppPlanner::test_app_name_contains_meaningful_words",
+  "tests/test_app.py::TestAppPlanner::test_app_name_falls_back_when_only_stop_words",
+  "tests/test_app.py::TestAppPlanner::test_app_name_is_slugified",
+  "tests/test_app.py::TestAppPlanner::test_chatbot_matches_chatbot_template",
+  "tests/test_app.py::TestAppPlanner::test_plan_components_is_list",
+  "tests/test_app.py::TestAppPlanner::test_plan_has_required_key[app_name]",
+  "tests/test_app.py::TestAppPlanner::test_plan_has_required_key[app_type]",
+  "tests/test_app.py::TestAppPlanner::test_plan_has_required_key[components]",
+  "tests/test_app.py::TestAppPlanner::test_plan_has_required_key[description]",
+  "tests/test_app.py::TestAppPlanner::test_plan_has_required_key[model_task]",
+  "tests/test_app.py::TestAppPlanner::test_plan_has_required_key[original_prompt]",
+  "tests/test_app.py::TestAppPlanner::test_plan_has_required_key[sdk]",
+  "tests/test_app.py::TestAppPlanner::test_plan_has_required_key[template_key]",
+  "tests/test_app.py::TestAppPlanner::test_plan_has_required_key[title]",
+  "tests/test_app.py::TestAppPlanner::test_portfolio_matches_template",
+  "tests/test_app.py::TestAppPlanner::test_rest_api_matches_template",
+  "tests/test_app.py::TestAppPlanner::test_sdk_auto_selects_docker_for_api_service",
+  "tests/test_app.py::TestAppPlanner::test_sdk_auto_selects_gradio_for_chatbot",
+  "tests/test_app.py::TestAppPlanner::test_sdk_auto_selects_gradio_for_image_classifier",
+  "tests/test_app.py::TestAppPlanner::test_sdk_auto_selects_static_for_landing_page",
+  "tests/test_app.py::TestAppPlanner::test_sdk_defaults_to_gradio_when_no_signal",
+  "tests/test_app.py::TestAppPlanner::test_sdk_preference_overrides_auto",
+  "tests/test_app.py::TestAppPlanner::test_sentiment_matches_template",
+  "tests/test_app.py::TestAppPlanner::test_summarizer_matches_template",
+  "tests/test_app.py::TestCodeChecker::test_dangerous_pattern_warned",
+  "tests/test_app.py::TestCodeChecker::test_docker_missing_dockerfile_is_error",
+  "tests/test_app.py::TestCodeChecker::test_dockerfile_missing_cmd_is_error",
+  "tests/test_app.py::TestCodeChecker::test_dockerfile_missing_expose_is_warning",
+  "tests/test_app.py::TestCodeChecker::test_dockerfile_missing_from_is_error",
+  "tests/test_app.py::TestCodeChecker::test_empty_file_flagged",
+  "tests/test_app.py::TestCodeChecker::test_eval_warned",
+  "tests/test_app.py::TestCodeChecker::test_gradio_missing_app_py_is_error",
+  "tests/test_app.py::TestCodeChecker::test_html_missing_tags_warned",
+  "tests/test_app.py::TestCodeChecker::test_invalid_python_syntax_caught",
+  "tests/test_app.py::TestCodeChecker::test_overall_valid_when_no_errors",
+  "tests/test_app.py::TestCodeChecker::test_readme_missing_frontmatter_warned",
+  "tests/test_app.py::TestCodeChecker::test_static_missing_index_html_is_error",
+  "tests/test_app.py::TestCodeChecker::test_valid_dockerfile_passes",
+  "tests/test_app.py::TestCodeChecker::test_valid_html_passes",
+  "tests/test_app.py::TestCodeChecker::test_valid_python_passes",
+  "tests/test_app.py::TestDockerGenerator::test_app_py_is_valid_python",
+  "tests/test_app.py::TestDockerGenerator::test_docker_template_app_py_is_valid_python[generic_docker]",
+  "tests/test_app.py::TestDockerGenerator::test_docker_template_app_py_is_valid_python[rest_api]",
+  "tests/test_app.py::TestDockerGenerator::test_dockerfile_exposes_7860",
+  "tests/test_app.py::TestDockerGenerator::test_dockerfile_has_cmd",
+  "tests/test_app.py::TestDockerGenerator::test_dockerfile_has_from",
+  "tests/test_app.py::TestDockerGenerator::test_generate_produces_app_py",
+  "tests/test_app.py::TestDockerGenerator::test_generate_produces_dockerfile",
+  "tests/test_app.py::TestDockerGenerator::test_generate_produces_requirements",
+  "tests/test_app.py::TestDockerGenerator::test_generic_docker_fallback",
+  "tests/test_app.py::TestDockerGenerator::test_parse_files_empty_returns_empty",
+  "tests/test_app.py::TestDockerGenerator::test_parse_files_marker_format",
+  "tests/test_app.py::TestEndToEnd::test_chatbot_e2e",
+  "tests/test_app.py::TestEndToEnd::test_image_classifier_e2e",
+  "tests/test_app.py::TestEndToEnd::test_portfolio_e2e",
+  "tests/test_app.py::TestEndToEnd::test_rest_api_e2e",
+  "tests/test_app.py::TestEndToEnd::test_summarizer_small_model_e2e",
+  "tests/test_app.py::TestFastAPIApp::test_download_nonexistent_project_returns_404",
+  "tests/test_app.py::TestFastAPIApp::test_edit_nonexistent_project_returns_404",
+  "tests/test_app.py::TestFastAPIApp::test_get_file_nonexistent_project_returns_404",
+  "tests/test_app.py::TestFastAPIApp::test_home_contains_title",
+  "tests/test_app.py::TestFastAPIApp::test_home_returns_200",
+  "tests/test_app.py::TestFastAPIApp::test_post_generate_contains_file_content",
+  "tests/test_app.py::TestFastAPIApp::test_post_generate_returns_html",
+  "tests/test_app.py::TestGradioGenerator::test_extract_code_from_markdown_block",
+  "tests/test_app.py::TestGradioGenerator::test_extract_code_plain_python",
+  "tests/test_app.py::TestGradioGenerator::test_fallback_template_produces_valid_python[chatbot]",
+  "tests/test_app.py::TestGradioGenerator::test_fallback_template_produces_valid_python[image_classifier]",
+  "tests/test_app.py::TestGradioGenerator::test_fallback_template_produces_valid_python[question_answering]",
+  "tests/test_app.py::TestGradioGenerator::test_fallback_template_produces_valid_python[sentiment_analyzer]",
+  "tests/test_app.py::TestGradioGenerator::test_fallback_template_produces_valid_python[text_generator]",
+  "tests/test_app.py::TestGradioGenerator::test_fallback_template_produces_valid_python[text_summarizer]",
+  "tests/test_app.py::TestGradioGenerator::test_fallback_template_produces_valid_python[translator]",
+  "tests/test_app.py::TestGradioGenerator::test_generate_produces_valid_python_chatbot",
+  "tests/test_app.py::TestGradioGenerator::test_generate_produces_valid_python_image_classifier",
+  "tests/test_app.py::TestGradioGenerator::test_generate_produces_valid_python_qa",
+  "tests/test_app.py::TestGradioGenerator::test_generate_produces_valid_python_sentiment",
+  "tests/test_app.py::TestGradioGenerator::test_generate_produces_valid_python_summarizer",
+  "tests/test_app.py::TestGradioGenerator::test_generate_produces_valid_python_text_gen",
+  "tests/test_app.py::TestGradioGenerator::test_generate_produces_valid_python_translator",
+  "tests/test_app.py::TestGradioGenerator::test_generated_code_contains_gradio_import",
+  "tests/test_app.py::TestGradioGenerator::test_generated_code_contains_launch",
+  "tests/test_app.py::TestGradioGenerator::test_generic_fallback_for_unknown_template",
+  "tests/test_app.py::TestModelRecommender::test_catalog_task_has_all_sizes[automatic-speech-recognition]",
+  "tests/test_app.py::TestModelRecommender::test_catalog_task_has_all_sizes[image-classification]",
+  "tests/test_app.py::TestModelRecommender::test_catalog_task_has_all_sizes[object-detection]",
+  "tests/test_app.py::TestModelRecommender::test_catalog_task_has_all_sizes[question-answering]",
+  "tests/test_app.py::TestModelRecommender::test_catalog_task_has_all_sizes[summarization]",
+  "tests/test_app.py::TestModelRecommender::test_catalog_task_has_all_sizes[text-classification]",
+  "tests/test_app.py::TestModelRecommender::test_catalog_task_has_all_sizes[text-generation]",
+  "tests/test_app.py::TestModelRecommender::test_catalog_task_has_all_sizes[text-to-image]",
+  "tests/test_app.py::TestModelRecommender::test_catalog_task_has_all_sizes[token-classification]",
+  "tests/test_app.py::TestModelRecommender::test_catalog_task_has_all_sizes[translation]",
+  "tests/test_app.py::TestModelRecommender::test_different_tasks_produce_different_models",
+  "tests/test_app.py::TestModelRecommender::test_get_primary_model_none_for_missing_task",
+  "tests/test_app.py::TestModelRecommender::test_get_primary_model_returns_string",
+  "tests/test_app.py::TestModelRecommender::test_invalid_size_falls_back_to_medium",
+  "tests/test_app.py::TestModelRecommender::test_large_models_recommend_gpu",
+  "tests/test_app.py::TestModelRecommender::test_large_size_returns_models",
+  "tests/test_app.py::TestModelRecommender::test_no_task_returns_empty",
+  "tests/test_app.py::TestModelRecommender::test_recommend_returns_list",
+  "tests/test_app.py::TestModelRecommender::test_recommended_models_have_id",
+  "tests/test_app.py::TestModelRecommender::test_recommended_models_have_required_fields",
+  "tests/test_app.py::TestModelRecommender::test_small_and_large_return_different_models",
+  "tests/test_app.py::TestModelRecommender::test_small_size_returns_models",
+  "tests/test_app.py::TestModelRecommender::test_summarization_vs_translation",
+  "tests/test_app.py::TestReadmeGenerator::test_docker_readme_mentions_fastapi",
+  "tests/test_app.py::TestReadmeGenerator::test_frontmatter_contains_sdk_docker",
+  "tests/test_app.py::TestReadmeGenerator::test_frontmatter_contains_sdk_gradio",
+  "tests/test_app.py::TestReadmeGenerator::test_frontmatter_contains_sdk_static",
+  "tests/test_app.py::TestReadmeGenerator::test_frontmatter_contains_title",
+  "tests/test_app.py::TestReadmeGenerator::test_readme_contains_app_name_in_body",
+  "tests/test_app.py::TestReadmeGenerator::test_readme_contains_description",
+  "tests/test_app.py::TestReadmeGenerator::test_readme_contains_features_section",
+  "tests/test_app.py::TestReadmeGenerator::test_readme_contains_model_reference",
+  "tests/test_app.py::TestReadmeGenerator::test_readme_contains_tech_stack",
+  "tests/test_app.py::TestReadmeGenerator::test_readme_has_yaml_frontmatter",
+  "tests/test_app.py::TestReadmeGenerator::test_static_readme_mentions_html",
+  "tests/test_app.py::TestRepoGenerator::test_docker_repo_has_app_py",
+  "tests/test_app.py::TestRepoGenerator::test_docker_repo_has_dockerfile",
+  "tests/test_app.py::TestRepoGenerator::test_docker_repo_has_readme",
+  "tests/test_app.py::TestRepoGenerator::test_docker_repo_has_requirements",
+  "tests/test_app.py::TestRepoGenerator::test_gradio_chart_component_includes_matplotlib",
+  "tests/test_app.py::TestRepoGenerator::test_gradio_image_task_includes_pillow",
+  "tests/test_app.py::TestRepoGenerator::test_gradio_repo_has_app_py",
+  "tests/test_app.py::TestRepoGenerator::test_gradio_repo_has_gitignore",
+  "tests/test_app.py::TestRepoGenerator::test_gradio_repo_has_readme",
+  "tests/test_app.py::TestRepoGenerator::test_gradio_repo_has_requirements_txt",
+  "tests/test_app.py::TestRepoGenerator::test_static_index_contains_title",
+  "tests/test_app.py::TestRepoGenerator::test_static_repo_has_index_html",
+  "tests/test_app.py::TestRepoGenerator::test_static_repo_has_readme",
+  "tests/test_app.py::TestRepoGenerator::test_static_repo_has_style_css"
+]

app/codegen/gradio_generator.py CHANGED Viewed

@@ -18,13 +18,13 @@ from huggingface_hub import InferenceClient
 client = InferenceClient("{model_id}")
 def respond(message, history, system_message, max_tokens, temperature, top_p):
-    messages = [{"role": "system", "content": system_message}]
     for user_msg, bot_msg in history:
         if user_msg:
-            messages.append({"role": "user", "content": user_msg})
         if bot_msg:
-            messages.append({"role": "assistant", "content": bot_msg})
-    messages.append({"role": "user", "content": message})
     response = ""
     for chunk in client.chat_completion(

 client = InferenceClient("{model_id}")
 def respond(message, history, system_message, max_tokens, temperature, top_p):
+    messages = [{{"role": "system", "content": system_message}}]
     for user_msg, bot_msg in history:
         if user_msg:
+            messages.append({{"role": "user", "content": user_msg}})
         if bot_msg:
+            messages.append({{"role": "assistant", "content": bot_msg}})
+    messages.append({{"role": "user", "content": message}})
     response = ""
     for chunk in client.chat_completion(

tests/__init__.py ADDED Viewed

File without changes

tests/conftest.py ADDED Viewed

	@@ -0,0 +1,22 @@

+"""Shared fixtures for AutoApp Builder tests."""
+import sys
+from pathlib import Path
+import pytest
+# Ensure imports resolve from the hf/ directory.
+HF_ROOT = Path(__file__).resolve().parent.parent
+if str(HF_ROOT) not in sys.path:
+    sys.path.insert(0, str(HF_ROOT))
+from app.main import app as fastapi_app
+@pytest.fixture()
+def client():
+    """Synchronous test client for the FastAPI app using Starlette's TestClient."""
+    from starlette.testclient import TestClient
+    with TestClient(fastapi_app, raise_server_exceptions=False) as c:
+        yield c

tests/test_app.py ADDED Viewed

	@@ -0,0 +1,901 @@

+"""
+Comprehensive unit tests for the AutoApp Builder Hugging Face Space.
+All tests run offline -- no external API calls are made.
+"""
+import ast
+import sys
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+import pytest
+# ---------------------------------------------------------------------------
+# Path setup -- ensure ``app`` package is importable from the hf/ directory.
+# ---------------------------------------------------------------------------
+HF_ROOT = Path(__file__).resolve().parent.parent
+if str(HF_ROOT) not in sys.path:
+    sys.path.insert(0, str(HF_ROOT))
+from app.engine.app_planner import AppPlanner, APP_TEMPLATES
+from app.engine.model_recommender import ModelRecommender, MODEL_CATALOG
+from app.codegen.repo_generator import RepoGenerator
+from app.codegen.gradio_generator import GradioGenerator, GRADIO_TEMPLATES
+from app.codegen.docker_generator import DockerGenerator, DOCKER_TEMPLATES
+from app.codegen.readme_generator import ReadmeGenerator
+from app.validators.code_checker import CodeChecker
+def _can_run_fastapi_tests():
+    """Check if FastAPI integration tests can run in this environment."""
+    try:
+        from starlette.testclient import TestClient
+        from app.main import app
+        with TestClient(app, raise_server_exceptions=False) as c:
+            resp = c.get("/")
+            return resp.status_code != 500
+    except Exception:
+        return False
+# ===================================================================
+# 1. AppPlanner
+# ===================================================================
+class TestAppPlanner:
+    """Tests for AppPlanner.analyze()."""
+    def setup_method(self):
+        self.planner = AppPlanner()
+    # -- SDK auto-selection ------------------------------------------------
+    def test_sdk_auto_selects_gradio_for_chatbot(self):
+        plan = self.planner.analyze("Build a chatbot that answers questions", "auto")
+        assert plan["sdk"] == "gradio"
+    def test_sdk_auto_selects_gradio_for_image_classifier(self):
+        plan = self.planner.analyze(
+            "Build a Gradio app that classifies images using ResNet", "auto"
+        )
+        assert plan["sdk"] == "gradio"
+    def test_sdk_auto_selects_docker_for_api_service(self):
+        plan = self.planner.analyze(
+            "Build a REST API service with FastAPI endpoints", "auto"
+        )
+        assert plan["sdk"] == "docker"
+    def test_sdk_auto_selects_static_for_landing_page(self):
+        plan = self.planner.analyze(
+            "Create a beautiful static landing page for my portfolio", "auto"
+        )
+        assert plan["sdk"] == "static"
+    def test_sdk_preference_overrides_auto(self):
+        plan = self.planner.analyze("Build a chatbot", "docker")
+        assert plan["sdk"] == "docker"
+    def test_sdk_defaults_to_gradio_when_no_signal(self):
+        plan = self.planner.analyze("Do something cool", "auto")
+        assert plan["sdk"] == "gradio"
+    # -- App name generation (slugified) -----------------------------------
+    def test_app_name_is_slugified(self):
+        plan = self.planner.analyze("Build an Image Classifier for dogs", "auto")
+        name = plan["app_name"]
+        assert " " not in name
+        assert name == name.lower()
+        # Should contain only lowercase alphanumeric and hyphens
+        assert all(c.isalnum() or c == "-" for c in name)
+    def test_app_name_contains_meaningful_words(self):
+        plan = self.planner.analyze("Create a sentiment analysis dashboard", "auto")
+        name = plan["app_name"]
+        # Should contain at least one meaningful keyword
+        assert any(kw in name for kw in ["sentiment", "analysis", "dashboard"])
+    def test_app_name_falls_back_when_only_stop_words(self):
+        plan = self.planner.analyze("Build a the an is are", "auto")
+        name = plan["app_name"]
+        # Should still produce a valid slug (fallback to "my-app")
+        assert len(name) > 0
+        assert "-" in name or name.isalnum()
+    # -- Plan structure has required keys ----------------------------------
+    REQUIRED_KEYS = [
+        "sdk", "app_name", "app_type", "title", "description",
+        "components", "model_task", "template_key", "original_prompt",
+    ]
+    @pytest.mark.parametrize("key", REQUIRED_KEYS)
+    def test_plan_has_required_key(self, key):
+        plan = self.planner.analyze("Build a chatbot", "auto")
+        assert key in plan, f"Plan missing required key: {key}"
+    def test_plan_components_is_list(self):
+        plan = self.planner.analyze("Build a chatbot", "auto")
+        assert isinstance(plan["components"], list)
+        assert len(plan["components"]) > 0
+    # -- Template matching -------------------------------------------------
+    def test_chatbot_matches_chatbot_template(self):
+        plan = self.planner.analyze("Build a chatbot", "auto")
+        assert plan["template_key"] == "chatbot"
+    def test_summarizer_matches_template(self):
+        plan = self.planner.analyze("Build a text summarization tool", "auto")
+        assert plan["template_key"] == "text_summarizer"
+    def test_sentiment_matches_template(self):
+        plan = self.planner.analyze("Create a sentiment analysis app", "auto")
+        assert plan["template_key"] == "sentiment_analyzer"
+    def test_portfolio_matches_template(self):
+        plan = self.planner.analyze("Create a portfolio website to showcase my work", "auto")
+        assert plan["template_key"] == "portfolio"
+    def test_rest_api_matches_template(self):
+        plan = self.planner.analyze("Build a REST API server with FastAPI", "auto")
+        assert plan["template_key"] == "rest_api"
+# ===================================================================
+# 2. ModelRecommender
+# ===================================================================
+class TestModelRecommender:
+    """Tests for ModelRecommender."""
+    def setup_method(self):
+        self.recommender = ModelRecommender()
+    def _make_plan(self, task: str) -> dict:
+        return {"model_task": task}
+    # -- Recommendations return valid model IDs ----------------------------
+    def test_recommend_returns_list(self):
+        models = self.recommender.recommend(self._make_plan("text-generation"))
+        assert isinstance(models, list)
+        assert len(models) > 0
+    def test_recommended_models_have_id(self):
+        models = self.recommender.recommend(self._make_plan("text-generation"))
+        for m in models:
+            assert "id" in m
+            assert isinstance(m["id"], str)
+            assert len(m["id"]) > 0
+    def test_recommended_models_have_required_fields(self):
+        models = self.recommender.recommend(self._make_plan("summarization"))
+        for m in models:
+            assert "id" in m
+            assert "desc" in m
+            assert "size" in m
+            assert "gpu_recommended" in m
+    # -- Different task types produce different models ---------------------
+    def test_different_tasks_produce_different_models(self):
+        text_gen = self.recommender.recommend(self._make_plan("text-generation"))
+        img_cls = self.recommender.recommend(self._make_plan("image-classification"))
+        assert text_gen[0]["id"] != img_cls[0]["id"]
+    def test_summarization_vs_translation(self):
+        summ = self.recommender.recommend(self._make_plan("summarization"))
+        trans = self.recommender.recommend(self._make_plan("translation"))
+        assert summ[0]["id"] != trans[0]["id"]
+    # -- Model size filtering (small/medium/large) -------------------------
+    def test_small_size_returns_models(self):
+        models = self.recommender.recommend(
+            self._make_plan("text-generation"), model_size="small"
+        )
+        assert len(models) > 0
+    def test_large_size_returns_models(self):
+        models = self.recommender.recommend(
+            self._make_plan("text-generation"), model_size="large"
+        )
+        assert len(models) > 0
+    def test_small_and_large_return_different_models(self):
+        small = self.recommender.recommend(
+            self._make_plan("text-generation"), model_size="small"
+        )
+        large = self.recommender.recommend(
+            self._make_plan("text-generation"), model_size="large"
+        )
+        assert small[0]["id"] != large[0]["id"]
+    def test_invalid_size_falls_back_to_medium(self):
+        models = self.recommender.recommend(
+            self._make_plan("text-generation"), model_size="xxl"
+        )
+        medium = self.recommender.recommend(
+            self._make_plan("text-generation"), model_size="medium"
+        )
+        # Invalid size normalises to medium, so results should match
+        assert models[0]["id"] == medium[0]["id"]
+    def test_large_models_recommend_gpu(self):
+        models = self.recommender.recommend(
+            self._make_plan("text-generation"), model_size="large"
+        )
+        assert models[0]["gpu_recommended"] is True
+    def test_no_task_returns_empty(self):
+        models = self.recommender.recommend({"model_task": None})
+        assert models == []
+    # -- get_primary_model -------------------------------------------------
+    def test_get_primary_model_returns_string(self):
+        model_id = self.recommender.get_primary_model(
+            self._make_plan("text-generation")
+        )
+        assert isinstance(model_id, str)
+        assert "/" in model_id or model_id.startswith("models?")
+    def test_get_primary_model_none_for_missing_task(self):
+        model_id = self.recommender.get_primary_model({"model_task": None})
+        assert model_id is None
+    # -- All catalog tasks have all sizes ----------------------------------
+    @pytest.mark.parametrize("task", list(MODEL_CATALOG.keys()))
+    def test_catalog_task_has_all_sizes(self, task):
+        for size in ("small", "medium", "large"):
+            assert size in MODEL_CATALOG[task], (
+                f"MODEL_CATALOG['{task}'] missing size '{size}'"
+            )
+# ===================================================================
+# 3. RepoGenerator
+# ===================================================================
+class TestRepoGenerator:
+    """Tests for RepoGenerator (uses template fallbacks, no API)."""
+    def setup_method(self):
+        self.generator = RepoGenerator()
+    def _make_plan(self, sdk, template_key=None, **overrides):
+        plan = {
+            "sdk": sdk,
+            "app_name": "test-app",
+            "app_type": template_key or "custom",
+            "title": "Test App",
+            "description": "A test application",
+            "components": ["text_input", "text_output"],
+            "model_task": "text-generation",
+            "template_key": template_key,
+            "original_prompt": "build a test app",
+            "recommended_models": [
+                {"id": "Qwen/Qwen2.5-7B-Instruct", "desc": "Test", "size": "7B"}
+            ],
+            "extra_features": [],
+        }
+        plan.update(overrides)
+        return plan
+    # -- Gradio repo generation -------------------------------------------
+    def test_gradio_repo_has_app_py(self):
+        plan = self._make_plan("gradio", "chatbot")
+        files = self.generator.generate(plan, "build a chatbot")
+        assert "app.py" in files
+    def test_gradio_repo_has_requirements_txt(self):
+        plan = self._make_plan("gradio", "chatbot")
+        files = self.generator.generate(plan, "build a chatbot")
+        assert "requirements.txt" in files
+        assert "gradio" in files["requirements.txt"]
+    def test_gradio_repo_has_readme(self):
+        plan = self._make_plan("gradio", "chatbot")
+        files = self.generator.generate(plan, "build a chatbot")
+        assert "README.md" in files
+    def test_gradio_repo_has_gitignore(self):
+        plan = self._make_plan("gradio", "chatbot")
+        files = self.generator.generate(plan, "build a chatbot")
+        assert ".gitignore" in files
+    # -- Docker repo generation -------------------------------------------
+    def test_docker_repo_has_dockerfile(self):
+        plan = self._make_plan("docker", "rest_api")
+        files = self.generator.generate(plan, "build a rest api")
+        assert "Dockerfile" in files
+    def test_docker_repo_has_app_py(self):
+        plan = self._make_plan("docker", "rest_api")
+        files = self.generator.generate(plan, "build a rest api")
+        assert "app.py" in files
+    def test_docker_repo_has_requirements(self):
+        plan = self._make_plan("docker", "rest_api")
+        files = self.generator.generate(plan, "build a rest api")
+        assert "requirements.txt" in files
+    def test_docker_repo_has_readme(self):
+        plan = self._make_plan("docker", "rest_api")
+        files = self.generator.generate(plan, "build a rest api")
+        assert "README.md" in files
+    # -- Static repo generation -------------------------------------------
+    def test_static_repo_has_index_html(self):
+        plan = self._make_plan("static", "portfolio", model_task=None)
+        files = self.generator.generate(plan, "build a portfolio site")
+        assert "index.html" in files
+    def test_static_repo_has_style_css(self):
+        plan = self._make_plan("static", "portfolio", model_task=None)
+        files = self.generator.generate(plan, "build a portfolio site")
+        assert "style.css" in files
+    def test_static_repo_has_readme(self):
+        plan = self._make_plan("static", "portfolio", model_task=None)
+        files = self.generator.generate(plan, "build a portfolio site")
+        assert "README.md" in files
+    def test_static_index_contains_title(self):
+        plan = self._make_plan("static", "portfolio", title="My Portfolio", model_task=None)
+        files = self.generator.generate(plan, "portfolio site")
+        assert "My Portfolio" in files["index.html"]
+    # -- Gradio requirements include task-specific deps --------------------
+    def test_gradio_image_task_includes_pillow(self):
+        plan = self._make_plan("gradio", "image_classifier", model_task="image-classification")
+        files = self.generator.generate(plan, "image classifier")
+        assert "Pillow" in files["requirements.txt"]
+    def test_gradio_chart_component_includes_matplotlib(self):
+        plan = self._make_plan(
+            "gradio", "sentiment_analyzer",
+            model_task="text-classification",
+            components=["text_input", "chart_output"],
+        )
+        files = self.generator.generate(plan, "sentiment dashboard")
+        assert "matplotlib" in files["requirements.txt"]
+# ===================================================================
+# 4. GradioGenerator
+# ===================================================================
+class TestGradioGenerator:
+    """Tests for GradioGenerator (template fallback, no LLM calls)."""
+    def setup_method(self):
+        self.gen = GradioGenerator()
+    def _make_plan(self, template_key, model_task="text-generation"):
+        return {
+            "template_key": template_key,
+            "title": "Test App",
+            "description": "A test",
+            "model_task": model_task,
+            "components": [],
+            "recommended_models": [
+                {"id": "Qwen/Qwen2.5-7B-Instruct", "desc": "Test", "size": "7B"}
+            ],
+            "extra_features": [],
+        }
+    def test_generate_produces_valid_python_chatbot(self):
+        plan = self._make_plan("chatbot")
+        code = self.gen.generate(plan, "build a chatbot")
+        # Must parse without syntax errors
+        ast.parse(code)
+    def test_generate_produces_valid_python_image_classifier(self):
+        plan = self._make_plan("image_classifier", "image-classification")
+        code = self.gen.generate(plan, "image classifier")
+        ast.parse(code)
+    def test_generate_produces_valid_python_summarizer(self):
+        plan = self._make_plan("text_summarizer", "summarization")
+        code = self.gen.generate(plan, "text summarizer")
+        ast.parse(code)
+    def test_generate_produces_valid_python_sentiment(self):
+        plan = self._make_plan("sentiment_analyzer", "text-classification")
+        code = self.gen.generate(plan, "sentiment tool")
+        ast.parse(code)
+    def test_generate_produces_valid_python_translator(self):
+        plan = self._make_plan("translator", "translation")
+        code = self.gen.generate(plan, "translator")
+        ast.parse(code)
+    def test_generate_produces_valid_python_qa(self):
+        plan = self._make_plan("question_answering", "question-answering")
+        code = self.gen.generate(plan, "question answering")
+        ast.parse(code)
+    def test_generate_produces_valid_python_text_gen(self):
+        plan = self._make_plan("text_generator")
+        code = self.gen.generate(plan, "text generator")
+        ast.parse(code)
+    # -- Fallback templates work without API -------------------------------
+    @pytest.mark.parametrize("key", list(GRADIO_TEMPLATES.keys()))
+    def test_fallback_template_produces_valid_python(self, key):
+        """Every built-in Gradio template must produce parseable Python."""
+        code = GRADIO_TEMPLATES[key].format(
+            model_id="test/model",
+            title="Test",
+            description="Test description",
+        )
+        ast.parse(code)
+    def test_generic_fallback_for_unknown_template(self):
+        plan = self._make_plan(None)
+        code = self.gen.generate(plan, "something unknown")
+        ast.parse(code)
+    def test_generated_code_contains_gradio_import(self):
+        plan = self._make_plan("chatbot")
+        code = self.gen.generate(plan, "chatbot")
+        assert "import gradio" in code
+    def test_generated_code_contains_launch(self):
+        plan = self._make_plan("chatbot")
+        code = self.gen.generate(plan, "chatbot")
+        assert "demo.launch()" in code
+    # -- _extract_code helper ----------------------------------------------
+    def test_extract_code_from_markdown_block(self):
+        raw = "Here is the code:\n```python\nimport gradio as gr\nprint('hi')\n```\nDone."
+        code = self.gen._extract_code(raw)
+        assert "import gradio" in code
+        assert "```" not in code
+    def test_extract_code_plain_python(self):
+        raw = "import gradio as gr\nprint('hi')"
+        code = self.gen._extract_code(raw)
+        assert code == raw
+# ===================================================================
+# 5. DockerGenerator
+# ===================================================================
+class TestDockerGenerator:
+    """Tests for DockerGenerator (template fallback, no LLM calls)."""
+    def setup_method(self):
+        self.gen = DockerGenerator()
+    def _make_plan(self, template_key="rest_api"):
+        return {
+            "template_key": template_key,
+            "title": "Test API",
+            "description": "A test API service",
+            "model_task": "text-generation",
+            "components": ["fastapi_app", "model_endpoint"],
+            "recommended_models": [
+                {"id": "Qwen/Qwen2.5-7B-Instruct", "desc": "Test", "size": "7B"}
+            ],
+            "extra_features": [],
+        }
+    def test_generate_produces_dockerfile(self):
+        files = self.gen.generate(self._make_plan(), "build an api")
+        assert "Dockerfile" in files
+    def test_generate_produces_app_py(self):
+        files = self.gen.generate(self._make_plan(), "build an api")
+        assert "app.py" in files
+    def test_generate_produces_requirements(self):
+        files = self.gen.generate(self._make_plan(), "build an api")
+        assert "requirements.txt" in files
+    def test_app_py_is_valid_python(self):
+        files = self.gen.generate(self._make_plan(), "build an api")
+        ast.parse(files["app.py"])
+    def test_dockerfile_has_from(self):
+        files = self.gen.generate(self._make_plan(), "build an api")
+        assert "FROM" in files["Dockerfile"]
+    def test_dockerfile_exposes_7860(self):
+        files = self.gen.generate(self._make_plan(), "build an api")
+        assert "7860" in files["Dockerfile"]
+    def test_dockerfile_has_cmd(self):
+        files = self.gen.generate(self._make_plan(), "build an api")
+        assert "CMD" in files["Dockerfile"]
+    def test_generic_docker_fallback(self):
+        plan = self._make_plan("unknown_template_key")
+        files = self.gen.generate(plan, "build something")
+        assert "app.py" in files
+        assert "Dockerfile" in files
+        ast.parse(files["app.py"])
+    # -- All docker templates produce valid Python -------------------------
+    @pytest.mark.parametrize("template_name", list(DOCKER_TEMPLATES.keys()))
+    def test_docker_template_app_py_is_valid_python(self, template_name):
+        template = DOCKER_TEMPLATES[template_name]
+        code = template["app.py"].format(
+            model_id="test/model",
+            title="Test",
+            description="Test desc",
+        )
+        ast.parse(code)
+    # -- _parse_files helper -----------------------------------------------
+    def test_parse_files_marker_format(self):
+        text = (
+            "=== FILENAME: app.py ===\nprint('hello')\n"
+            "=== FILENAME: requirements.txt ===\nfastapi\n"
+        )
+        files = self.gen._parse_files(text)
+        assert "app.py" in files
+        assert "requirements.txt" in files
+    def test_parse_files_empty_returns_empty(self):
+        files = self.gen._parse_files("no files here")
+        assert files == {}
+# ===================================================================
+# 6. ReadmeGenerator
+# ===================================================================
+class TestReadmeGenerator:
+    """Tests for ReadmeGenerator."""
+    def setup_method(self):
+        self.gen = ReadmeGenerator()
+    def _make_plan(self, app_type="chatbot", app_name="test-chatbot"):
+        return {
+            "app_type": app_type,
+            "title": "Test Chatbot",
+            "description": "A test chatbot application",
+            "app_name": app_name,
+            "recommended_models": [
+                {"id": "Qwen/Qwen2.5-7B-Instruct", "desc": "Test", "size": "7B"}
+            ],
+            "components": ["chat_interface", "system_prompt_config"],
+        }
+    # -- YAML frontmatter --------------------------------------------------
+    def test_readme_has_yaml_frontmatter(self):
+        readme = self.gen.generate(self._make_plan(), "gradio")
+        assert readme.startswith("---")
+        # Should have opening and closing ---
+        parts = readme.split("---")
+        assert len(parts) >= 3  # before, frontmatter, after
+    def test_frontmatter_contains_sdk_gradio(self):
+        readme = self.gen.generate(self._make_plan(), "gradio")
+        frontmatter = readme.split("---")[1]
+        assert "sdk: gradio" in frontmatter
+    def test_frontmatter_contains_sdk_docker(self):
+        readme = self.gen.generate(self._make_plan("rest_api"), "docker")
+        frontmatter = readme.split("---")[1]
+        assert "sdk: docker" in frontmatter
+    def test_frontmatter_contains_sdk_static(self):
+        readme = self.gen.generate(self._make_plan("portfolio"), "static")
+        frontmatter = readme.split("---")[1]
+        assert "sdk: static" in frontmatter
+    def test_frontmatter_contains_title(self):
+        readme = self.gen.generate(self._make_plan(), "gradio")
+        frontmatter = readme.split("---")[1]
+        assert "Test Chatbot" in frontmatter
+    # -- README body -------------------------------------------------------
+    def test_readme_contains_app_name_in_body(self):
+        plan = self._make_plan(app_name="my-awesome-chatbot")
+        plan["title"] = "My Awesome Chatbot"
+        readme = self.gen.generate(plan, "gradio")
+        # The title (which comes from the plan) should appear in the body
+        assert "My Awesome Chatbot" in readme
+    def test_readme_contains_description(self):
+        readme = self.gen.generate(self._make_plan(), "gradio")
+        assert "A test chatbot application" in readme
+    def test_readme_contains_features_section(self):
+        readme = self.gen.generate(self._make_plan(), "gradio")
+        assert "## Features" in readme
+    def test_readme_contains_model_reference(self):
+        readme = self.gen.generate(self._make_plan(), "gradio")
+        assert "Qwen/Qwen2.5-7B-Instruct" in readme
+    def test_readme_contains_tech_stack(self):
+        readme = self.gen.generate(self._make_plan(), "gradio")
+        assert "## Tech Stack" in readme
+    def test_docker_readme_mentions_fastapi(self):
+        readme = self.gen.generate(self._make_plan("rest_api"), "docker")
+        assert "FastAPI" in readme
+    def test_static_readme_mentions_html(self):
+        readme = self.gen.generate(self._make_plan("portfolio"), "static")
+        assert "HTML" in readme
+# ===================================================================
+# 7. CodeChecker
+# ===================================================================
+class TestCodeChecker:
+    """Tests for CodeChecker."""
+    def setup_method(self):
+        self.checker = CodeChecker()
+    # -- Python syntax validation ------------------------------------------
+    def test_valid_python_passes(self):
+        files = {"app.py": "import os\nprint('hello')\n"}
+        result = self.checker.check(files, "gradio")
+        # Should have no python-specific errors (cross-file check may warn
+        # about missing requirements.txt etc.)
+        py_check = result["file_checks"]["app.py"]
+        assert py_check["valid"] is True
+        assert len(py_check["errors"]) == 0
+    def test_invalid_python_syntax_caught(self):
+        files = {"app.py": "def foo(\n    pass\n"}
+        result = self.checker.check(files, "gradio")
+        py_check = result["file_checks"]["app.py"]
+        assert py_check["valid"] is False
+        assert any("syntax error" in e.lower() for e in py_check["errors"])
+    def test_empty_file_flagged(self):
+        files = {"app.py": ""}
+        result = self.checker.check(files, "gradio")
+        py_check = result["file_checks"]["app.py"]
+        assert py_check["valid"] is False
+    def test_dangerous_pattern_warned(self):
+        files = {"app.py": "import os\nos.system('rm -rf /')\n"}
+        result = self.checker.check(files, "gradio")
+        py_check = result["file_checks"]["app.py"]
+        assert any("os.system" in w for w in py_check["warnings"])
+    def test_eval_warned(self):
+        files = {"app.py": "x = eval('1+2')\n"}
+        result = self.checker.check(files, "gradio")
+        py_check = result["file_checks"]["app.py"]
+        assert any("eval" in w for w in py_check["warnings"])
+    # -- Dockerfile validation ---------------------------------------------
+    def test_valid_dockerfile_passes(self):
+        dockerfile = (
+            "FROM python:3.11-slim\n"
+            "WORKDIR /app\n"
+            "COPY . .\n"
+            "EXPOSE 7860\n"
+            "CMD [\"python\", \"app.py\"]\n"
+        )
+        files = {"Dockerfile": dockerfile, "README.md": "---\ntest\n---\n"}
+        result = self.checker.check(files, "docker")
+        df_check = result["file_checks"]["Dockerfile"]
+        assert len(df_check["errors"]) == 0
+    def test_dockerfile_missing_from_is_error(self):
+        files = {"Dockerfile": "COPY . .\nCMD ['python']\n"}
+        result = self.checker.check(files, "docker")
+        df_check = result["file_checks"]["Dockerfile"]
+        assert any("FROM" in e for e in df_check["errors"])
+    def test_dockerfile_missing_cmd_is_error(self):
+        files = {"Dockerfile": "FROM python:3.11\nCOPY . .\n"}
+        result = self.checker.check(files, "docker")
+        df_check = result["file_checks"]["Dockerfile"]
+        assert any("CMD" in e or "ENTRYPOINT" in e for e in df_check["errors"])
+    def test_dockerfile_missing_expose_is_warning(self):
+        files = {"Dockerfile": "FROM python:3.11\nCMD ['python']\n"}
+        result = self.checker.check(files, "docker")
+        df_check = result["file_checks"]["Dockerfile"]
+        assert any("EXPOSE" in w for w in df_check["warnings"])
+    # -- Cross-file checks -------------------------------------------------
+    def test_gradio_missing_app_py_is_error(self):
+        files = {"README.md": "---\ntest\n---\n", "requirements.txt": "gradio\n"}
+        result = self.checker.check(files, "gradio")
+        assert result["valid"] is False
+        assert any("app.py" in e for e in result["errors"])
+    def test_docker_missing_dockerfile_is_error(self):
+        files = {"app.py": "print('hi')\n", "README.md": "---\ntest\n---\n"}
+        result = self.checker.check(files, "docker")
+        assert any("Dockerfile" in e for e in result["errors"])
+    def test_static_missing_index_html_is_error(self):
+        files = {"README.md": "---\ntest\n---\n", "style.css": "body{}\n"}
+        result = self.checker.check(files, "static")
+        assert any("index.html" in e for e in result["errors"])
+    def test_overall_valid_when_no_errors(self):
+        files = {
+            "app.py": "import gradio as gr\nprint('hi')\n",
+            "requirements.txt": "gradio>=5.0\n",
+            "README.md": "---\nsdk: gradio\n---\n# App\n",
+        }
+        result = self.checker.check(files, "gradio")
+        assert result["valid"] is True
+    # -- HTML validation ---------------------------------------------------
+    def test_valid_html_passes(self):
+        html = "<!DOCTYPE html><html><head></head><body></body></html>"
+        files = {"index.html": html}
+        result = self.checker.check(files, "static")
+        html_check = result["file_checks"]["index.html"]
+        assert len(html_check["errors"]) == 0
+    def test_html_missing_tags_warned(self):
+        files = {"index.html": "<div>Hello</div>"}
+        result = self.checker.check(files, "static")
+        html_check = result["file_checks"]["index.html"]
+        assert len(html_check["warnings"]) > 0
+    # -- README validation -------------------------------------------------
+    def test_readme_missing_frontmatter_warned(self):
+        files = {"README.md": "# My App\nNo frontmatter here.\n"}
+        result = self.checker.check(files, "gradio")
+        readme_check = result["file_checks"]["README.md"]
+        assert any("frontmatter" in w.lower() for w in readme_check["warnings"])
+# ===================================================================
+# 8. FastAPI App Integration Tests
+# ===================================================================
+@pytest.mark.skipif(
+    not _can_run_fastapi_tests(),
+    reason="Jinja2 version incompatible with Starlette TestClient in this environment",
+)
+class TestFastAPIApp:
+    """Integration tests for the FastAPI app endpoints."""
+    def test_home_returns_200(self, client):
+        resp = client.get("/")
+        assert resp.status_code == 200
+        assert "text/html" in resp.headers["content-type"]
+    def test_home_contains_title(self, client):
+        resp = client.get("/")
+        assert "AutoApp" in resp.text or "autoapp" in resp.text.lower() or "<html" in resp.text.lower()
+    def test_post_generate_returns_html(self, client):
+        """POST /generate with a chatbot prompt should return 200 HTML."""
+        resp = client.post(
+            "/generate",
+            data={
+                "prompt": "Build a chatbot that answers questions about science",
+                "sdk_preference": "auto",
+                "model_size": "medium",
+                "gpu_needed": "false",
+                "features": "",
+            },
+        )
+        assert resp.status_code == 200
+        assert "text/html" in resp.headers["content-type"]
+    def test_post_generate_contains_file_content(self, client):
+        """The generated result page should contain references to generated files."""
+        resp = client.post(
+            "/generate",
+            data={
+                "prompt": "Build a simple text summarizer",
+                "sdk_preference": "auto",
+                "model_size": "small",
+                "gpu_needed": "false",
+                "features": "",
+            },
+        )
+        assert resp.status_code == 200
+        # The result page should mention file names like app.py or README.md
+        body = resp.text.lower()
+        assert "app.py" in body or "readme" in body
+    def test_download_nonexistent_project_returns_404(self, client):
+        resp = client.get("/download/nonexistent")
+        assert resp.status_code == 404
+    def test_get_file_nonexistent_project_returns_404(self, client):
+        resp = client.get("/api/file/nonexistent/app.py")
+        assert resp.status_code == 404
+    def test_edit_nonexistent_project_returns_404(self, client):
+        resp = client.post(
+            "/edit",
+            data={"project_id": "nonexistent", "edit_prompt": "change colour"},
+        )
+        assert resp.status_code == 404
+# ===================================================================
+# 9. End-to-end generation (all SDK types, offline)
+# ===================================================================
+class TestEndToEnd:
+    """End-to-end tests exercising the full planner -> recommender -> generator
+    -> checker pipeline offline (no LLM calls)."""
+    def setup_method(self):
+        self.planner = AppPlanner()
+        self.recommender = ModelRecommender()
+        self.generator = RepoGenerator()
+        self.checker = CodeChecker()
+    def _run_pipeline(self, prompt, sdk_pref="auto", model_size="medium"):
+        plan = self.planner.analyze(prompt, sdk_pref)
+        models = self.recommender.recommend(plan, model_size)
+        plan["recommended_models"] = models
+        plan["extra_features"] = []
+        files = self.generator.generate(plan, prompt)
+        validation = self.checker.check(files, plan["sdk"])
+        return plan, files, validation
+    def test_chatbot_e2e(self):
+        plan, files, validation = self._run_pipeline("Build a chatbot")
+        assert plan["sdk"] == "gradio"
+        assert "app.py" in files
+        assert validation["valid"] is True
+    def test_image_classifier_e2e(self):
+        plan, files, validation = self._run_pipeline(
+            "Build a Gradio image classifier"
+        )
+        assert plan["sdk"] == "gradio"
+        assert "app.py" in files
+        ast.parse(files["app.py"])
+    def test_rest_api_e2e(self):
+        plan, files, validation = self._run_pipeline(
+            "Build a REST API with FastAPI endpoints"
+        )
+        assert plan["sdk"] == "docker"
+        assert "Dockerfile" in files
+        assert "app.py" in files
+        ast.parse(files["app.py"])
+    def test_portfolio_e2e(self):
+        plan, files, validation = self._run_pipeline(
+            "Create a portfolio website to showcase projects"
+        )
+        assert plan["sdk"] == "static"
+        assert "index.html" in files
+    def test_summarizer_small_model_e2e(self):
+        plan, files, validation = self._run_pipeline(
+            "Build a text summarization tool", model_size="small"
+        )
+        assert plan["sdk"] == "gradio"
+        assert "app.py" in files
+        assert validation["valid"] is True