Spaces:

LeafCat79
/

Image_Generator_for_HTML_Games

Sleeping

App Files Files Community

LeafCat79 commited on 26 days ago

Commit

eed87b4

verified ·

1 Parent(s): e2b36e5

Use AI prompt and image models

Browse files

Files changed (1) hide show

app.py +129 -13

app.py CHANGED Viewed

@@ -99,6 +99,8 @@ class StylePlan:
 HF_TOKEN = os.environ.get("HF_TOKEN", "")
 HF_IMAGE_MODEL = os.environ.get("HF_IMAGE_MODEL", "black-forest-labs/FLUX.1-schnell")
 HF_IMAGE_ENDPOINT = f"https://api-inference.huggingface.co/models/{HF_IMAGE_MODEL}"
 def slugify(value: str) -> str:
@@ -191,8 +193,8 @@ def build_asset_prompt(role: str, prompt: str, style_hint: str) -> str:
     )
-def parse_assets(raw_roles: str, style_hint: str) -> list[AssetSpec]:
-    specs: list[AssetSpec] = []
     for line in raw_roles.splitlines():
         line = line.strip()
         if not line or line.startswith("#"):
@@ -204,14 +206,126 @@ def parse_assets(raw_roles: str, style_hint: str) -> list[AssetSpec]:
             role, prompt = line.split("=", 1)
         else:
             role, prompt = line, line
         role = role.strip()
         prompt = prompt.strip() or role
         slug = slugify(role)
         is_background = any(word in slug for word in ("background", "backdrop", "scene", "map", "level"))
         width, height = (800, 450) if is_background else (128, 128)
         filename = f"sprite_{slug}.png"
-        full_prompt = build_asset_prompt(role, prompt, style_hint)
         specs.append(AssetSpec(role=role, prompt=full_prompt, filename=filename, width=width, height=height))
     return specs
@@ -883,16 +997,17 @@ def hf_image_png(spec: AssetSpec, index: int, run_id: int) -> bytes | None:
         return None
-def generate_asset(spec: AssetSpec, index: int, run_id: int) -> tuple[str, str, str | None]:
     png_content = hf_image_png(spec, index, run_id)
-    source = "hf"
     if png_content is None:
         png_content = local_asset_png(spec, index, run_id)
         source = "local style fallback"
     return (
         png_bytes_to_data_uri(png_content),
         write_gallery_image(png_content, spec.role),
-        None if source == "hf" else source,
     )
@@ -1038,15 +1153,16 @@ def build_prompt_preview(specs: list[AssetSpec]) -> str:
     return "\n\n".join(f"{spec.role}:\n{spec.prompt}" for spec in specs)
-def build_model_report(rows: list[tuple[str, str]]) -> str:
-    return "\n".join(f"{role}: {source}" for role, source in rows)
 def generate_images_and_game(html_code: str, roles: str, style_hint: str):
     if not html_code.strip():
         return "", "Paste HTML game code first.", [], "", "", ""
-    specs = parse_assets(roles, style_hint or "pixel art style")
     if not specs:
         return html_code, "Add at least one asset role, like `player: brave knight`.", [], "", "", build_preview(html_code)
@@ -1057,16 +1173,16 @@ def generate_images_and_game(html_code: str, roles: str, style_hint: str):
     run_id = time.time_ns()
     for index, spec in enumerate(specs):
-        data_uri, gallery_path, error = generate_asset(spec, index, run_id)
         assets[spec.role] = data_uri
         gallery.append((gallery_path, f"{spec.role} -> {spec.filename}"))
-        model_rows.append((spec.role, HF_IMAGE_MODEL if error is None and HF_TOKEN else "local style fallback"))
         if error:
             errors.append(f"{spec.role}: fallback used ({error})")
     rewritten = embed_assets(html_code, assets, specs)
     using_hf = bool(HF_TOKEN)
-    source = f"HF image model `{HF_IMAGE_MODEL}`" if using_hf else "local style fallback"
     status = f"Generated and embedded {len(specs)} fresh asset(s) using {source}. Run {str(run_id)[-6:]}."
     if errors:
         status += "\n\n" + "\n".join(f"{item}: no HF image returned, used local style fallback" for item in [e.split(':', 1)[0] for e in errors])

 HF_TOKEN = os.environ.get("HF_TOKEN", "")
 HF_IMAGE_MODEL = os.environ.get("HF_IMAGE_MODEL", "black-forest-labs/FLUX.1-schnell")
 HF_IMAGE_ENDPOINT = f"https://api-inference.huggingface.co/models/{HF_IMAGE_MODEL}"
+HF_PROMPT_MODEL = os.environ.get("HF_PROMPT_MODEL", "Qwen/Qwen2.5-Coder-7B-Instruct")
+HF_PROMPT_ENDPOINT = f"https://api-inference.huggingface.co/models/{HF_PROMPT_MODEL}"
 def slugify(value: str) -> str:
     )
+def parse_role_lines(raw_roles: str) -> list[tuple[str, str]]:
+    parsed: list[tuple[str, str]] = []
     for line in raw_roles.splitlines():
         line = line.strip()
         if not line or line.startswith("#"):
             role, prompt = line.split("=", 1)
         else:
             role, prompt = line, line
         role = role.strip()
         prompt = prompt.strip() or role
+        parsed.append((role, prompt))
+    return parsed
+def infer_code_context(html_code: str) -> str:
+    text = html_code[:12000]
+    filenames = sorted(set(re.findall(r"['\"]([^'\"]+?\.(?:png|jpg|jpeg|webp|gif))['\"]", text, flags=re.I)))
+    canvas = re.findall(r"<canvas[^>]*?(?:width=['\"]?(\d+)|height=['\"]?(\d+))", text, flags=re.I)
+    controls = []
+    lowered = text.lower()
+    for label, words in {
+        "top-down movement": ("arrowup", "arrowdown", "keys.has(\"w\")", "keys.has('w')"),
+        "platformer": ("gravity", "grounded", "platform"),
+        "shooting": ("bullet", "shoot", "projectile", "laser"),
+        "enemies": ("enemy", "monster", "spawn"),
+    }.items():
+        if any(word in lowered for word in words):
+            controls.append(label)
+    return (
+        f"Referenced asset filenames: {', '.join(filenames[:24]) or 'none found'}. "
+        f"Detected game mechanics: {', '.join(controls) or 'not obvious'}. "
+        f"Canvas hints found: {canvas[:4] or 'none'}."
+    )
+def local_prompt_map(role_lines: list[tuple[str, str]], style_hint: str) -> dict[str, str]:
+    return {role: build_asset_prompt(role, prompt, style_hint) for role, prompt in role_lines}
+def extract_json_object(text: str) -> dict | None:
+    match = re.search(r"\{.*\}", text, flags=re.S)
+    if not match:
+        return None
+    try:
+        value = json.loads(match.group(0))
+    except Exception:
+        return None
+    return value if isinstance(value, dict) else None
+def hf_prompt_json(html_code: str, role_lines: list[tuple[str, str]], style_hint: str) -> dict[str, str] | None:
+    if not HF_TOKEN:
+        return None
+    role_block = "\n".join(f"- {role}: {prompt}" for role, prompt in role_lines)
+    instruction = (
+        "You are a senior game art director and prompt engineer. Read the HTML game context, "
+        "the requested asset roles, and the shared theme/style. Return ONLY a JSON object where "
+        "each key is the exact role name and each value is one concise text-to-image prompt. "
+        "Each prompt must specify: subject silhouette/shape, camera angle, art style, palette, "
+        "transparent background for sprites/items, full scene for backgrounds, no text, no watermark. "
+        "Make different roles visually distinct and suitable for embedding in an HTML game."
+    )
+    user_text = (
+        f"HTML/game context summary: {infer_code_context(html_code)}\n\n"
+        f"Shared theme/style: {style_hint}\n\n"
+        f"Asset roles:\n{role_block}\n\n"
+        "Return JSON only."
+    )
+    prompt = f"<|im_start|>system\n{instruction}<|im_end|>\n<|im_start|>user\n{user_text}<|im_end|>\n<|im_start|>assistant\n"
+    payload = {
+        "inputs": prompt,
+        "parameters": {
+            "max_new_tokens": 900,
+            "temperature": 0.55,
+            "top_p": 0.9,
+            "return_full_text": False,
+        },
+        "options": {"wait_for_model": True},
+    }
+    request = Request(
+        HF_PROMPT_ENDPOINT,
+        data=json.dumps(payload).encode("utf-8"),
+        headers={
+            "Authorization": f"Bearer {HF_TOKEN}",
+            "Content-Type": "application/json",
+        },
+        method="POST",
+    )
+    try:
+        with urlopen(request, timeout=90) as response:
+            raw = response.read().decode("utf-8", errors="replace")
+        parsed = json.loads(raw)
+        if isinstance(parsed, list) and parsed:
+            text = parsed[0].get("generated_text", "") if isinstance(parsed[0], dict) else str(parsed[0])
+        elif isinstance(parsed, dict):
+            text = parsed.get("generated_text", parsed.get("text", ""))
+        else:
+            text = str(parsed)
+        obj = extract_json_object(text)
+        if not obj:
+            return None
+        return {
+            role: str(obj.get(role, "")).strip()
+            for role, _ in role_lines
+            if str(obj.get(role, "")).strip()
+        }
+    except Exception:
+        return None
+def build_prompt_map(html_code: str, raw_roles: str, style_hint: str) -> tuple[list[tuple[str, str]], dict[str, str], str]:
+    role_lines = parse_role_lines(raw_roles)
+    local_map = local_prompt_map(role_lines, style_hint)
+    ai_map = hf_prompt_json(html_code, role_lines, style_hint)
+    if ai_map and all(role in ai_map for role, _ in role_lines):
+        return role_lines, ai_map, HF_PROMPT_MODEL
+    return role_lines, local_map, "local prompt interpreter"
+def parse_assets(raw_roles: str, style_hint: str, prompt_map: dict[str, str] | None = None) -> list[AssetSpec]:
+    specs: list[AssetSpec] = []
+    for role, prompt in parse_role_lines(raw_roles):
         slug = slugify(role)
         is_background = any(word in slug for word in ("background", "backdrop", "scene", "map", "level"))
         width, height = (800, 450) if is_background else (128, 128)
         filename = f"sprite_{slug}.png"
+        full_prompt = (prompt_map or {}).get(role) or build_asset_prompt(role, prompt, style_hint)
         specs.append(AssetSpec(role=role, prompt=full_prompt, filename=filename, width=width, height=height))
     return specs
         return None
+def generate_asset(spec: AssetSpec, index: int, run_id: int) -> tuple[str, str, str | None, str]:
     png_content = hf_image_png(spec, index, run_id)
+    source = HF_IMAGE_MODEL
     if png_content is None:
         png_content = local_asset_png(spec, index, run_id)
         source = "local style fallback"
     return (
         png_bytes_to_data_uri(png_content),
         write_gallery_image(png_content, spec.role),
+        None if source == HF_IMAGE_MODEL else source,
+        source,
     )
     return "\n\n".join(f"{spec.role}:\n{spec.prompt}" for spec in specs)
+def build_model_report(rows: list[tuple[str, str, str]]) -> str:
+    return "\n".join(f"{role}: prompt={prompt_model}; image={image_model}" for role, prompt_model, image_model in rows)
 def generate_images_and_game(html_code: str, roles: str, style_hint: str):
     if not html_code.strip():
         return "", "Paste HTML game code first.", [], "", "", ""
+    role_lines, prompt_map, prompt_model = build_prompt_map(html_code, roles, style_hint or "pixel art style")
+    specs = parse_assets(roles, style_hint or "pixel art style", prompt_map)
     if not specs:
         return html_code, "Add at least one asset role, like `player: brave knight`.", [], "", "", build_preview(html_code)
     run_id = time.time_ns()
     for index, spec in enumerate(specs):
+        data_uri, gallery_path, error, image_model = generate_asset(spec, index, run_id)
         assets[spec.role] = data_uri
         gallery.append((gallery_path, f"{spec.role} -> {spec.filename}"))
+        model_rows.append((spec.role, prompt_model, image_model))
         if error:
             errors.append(f"{spec.role}: fallback used ({error})")
     rewritten = embed_assets(html_code, assets, specs)
     using_hf = bool(HF_TOKEN)
+    source = f"prompt `{prompt_model}` + image `{HF_IMAGE_MODEL}`" if using_hf else "local prompt interpreter + local style fallback"
     status = f"Generated and embedded {len(specs)} fresh asset(s) using {source}. Run {str(run_id)[-6:]}."
     if errors:
         status += "\n\n" + "\n".join(f"{item}: no HF image returned, used local style fallback" for item in [e.split(':', 1)[0] for e in errors])