Spaces:
Running
Running
| from __future__ import annotations | |
| import logging | |
| from typing import Any | |
| from velai.data_types import TextType | |
| from velai.dataflow.enums import NodeKind, PortDirection | |
| from velai.dataflow.nodes import NodeType | |
| from velai.dataflow.ports import PortSchema | |
| from velai.nodes.actions.node_action_decorator import as_action_name, node_action | |
| from velai.nodes.actions.node_action_models import AsyncNodeActionResultIterator, NodeActionArguments, NodeActionResult | |
| from velai.nodes.base_node import BaseNode, BaseNodeData | |
| from velai.nodes.base_node_renderable import T_BASE_NODE, BaseNodeRenderable | |
| from velai.services.registry import get_service_registry | |
| from velai.services.registry_utils import get_services_by_config | |
| from velai.services.text.TextGenerator import TextGenerationInput, TextGenerator | |
| logger = logging.getLogger(__name__) | |
| TextDataNodeType = NodeType( | |
| kind=NodeKind.TEXT_DATA, | |
| display_name="Text", | |
| inputs=[], | |
| outputs=[PortSchema(name="text", dtype=TextType, direction=PortDirection.OUTPUT, tooltip="Text")], | |
| ) | |
| class TextDataNode(BaseNode[BaseNodeData]): | |
| """Node that exposes a constant text value via its output port. | |
| The text is stored on the "text" output port and edited through the UI. | |
| """ | |
| pass | |
| class TextDataNodeRenderable(BaseNodeRenderable[TextDataNode]): | |
| def get_fields(self, node: T_BASE_NODE) -> list[dict[str, Any]]: | |
| return [ | |
| *super().get_fields(node), | |
| { | |
| "name": "text", | |
| "kind": "textarea", | |
| "label": "Text", | |
| "placeholder": "Enter text...", | |
| }, | |
| ] | |
| def get_header_buttons(self, node: T_BASE_NODE) -> list[dict[str, Any]]: | |
| buttons = super().get_header_buttons(node) | |
| buttons.append( | |
| { | |
| "name": "enhance", | |
| "icon": "text_increase", | |
| "tooltip": "Enhance", | |
| "action": as_action_name(self._on_enhance_text_action), | |
| "requiresContent": ["text"], | |
| "disableWhileProcessing": True, | |
| } | |
| ) | |
| return buttons | |
| async def _on_enhance_text_action(self, args: NodeActionArguments) -> AsyncNodeActionResultIterator: | |
| yield NodeActionResult(progress_message="enhancing...") | |
| text_output = args.node.outputs["text"] | |
| # run model | |
| service_info = get_services_by_config(TextGenerator) | |
| service_id = service_info.default.get_service_id() | |
| registry = get_service_registry() | |
| text_service = registry.create(TextGenerator, service_id) | |
| # todo: improve prompt to correctly enhance text | |
| # maybe also add multiple enhancement targets | |
| prompt = ( | |
| "You are an expert prompt engineer for generative visual models, including image and video generation.\n" | |
| "Your task is to expand the user's input into a clear, detailed, and optimized generation prompt.\n\n" | |
| "MODALITY AWARENESS:\n" | |
| "- The output may be used for IMAGE generation or VIDEO generation.\n" | |
| "- If the user mentions motion, time, camera movement, transitions, or duration, optimize for video.\n" | |
| "- Otherwise, optimize for a single image.\n\n" | |
| "REFERENCE IMAGE HANDLING:\n" | |
| "- Only assume reference images exist if the user explicitly refers to them.\n" | |
| "- If the user does not clearly indicate reference images, assume none are provided.\n" | |
| "- When reference images are used, they are provided directly to the generation model.\n" | |
| "- You do NOT know what reference images look like.\n" | |
| "- You MUST NOT describe, infer, or invent any visual details of reference images.\n" | |
| '- Refer to them only abstractly, for example: "the provided image", "the provided car image", "the reference image".\n\n' | |
| "WHEN REFERENCE IMAGES ARE PRESENT:\n" | |
| "- Focus on transformation rules, relationships, and constraints.\n" | |
| "- Describe how elements are combined, placed, blended, animated, or stylized.\n" | |
| "- Specify which image provides dominant style, structure, or motion cues if relevant.\n" | |
| "- Ensure realism, consistency, scale matching, perspective coherence, lighting continuity, " | |
| "clean blending, and natural integration.\n\n" | |
| "WHEN NO REFERENCE IMAGES ARE PRESENT:\n" | |
| "- Write a complete, descriptive prompt suitable for text only generation.\n" | |
| "- Include subject, environment, artistic style, lighting, color palette, composition.\n" | |
| "- For video, also include motion, pacing, camera behavior, and temporal consistency.\n\n" | |
| "VIDEO SPECIFIC GUIDELINES (only if applicable):\n" | |
| "- Describe motion clearly and continuously over time.\n" | |
| "- Specify camera movement, subject motion, scene evolution, and visual continuity.\n" | |
| "- Avoid jump cuts unless explicitly requested.\n\n" | |
| "STRICT OUTPUT RULES:\n" | |
| "- Return ONLY the final prompt text.\n" | |
| "- Do NOT include explanations, headings, or markdown.\n" | |
| "- Do NOT mention that rules were followed.\n\n" | |
| f"User input:\n{text_output.value}" | |
| ) | |
| input_data = TextGenerationInput(prompt=prompt) | |
| result = await text_service.generate_async(input_data) | |
| await args.node.on_generation_result(result) | |
| text_output.value = result.text | |
| yield NodeActionResult.update_node() | |