from __future__ import annotations import logging from typing import Any from velai.data_types import TextType from velai.dataflow.enums import NodeKind, PortDirection from velai.dataflow.nodes import NodeType from velai.dataflow.ports import PortSchema from velai.nodes.actions.node_action_decorator import as_action_name, node_action from velai.nodes.actions.node_action_models import AsyncNodeActionResultIterator, NodeActionArguments, NodeActionResult from velai.nodes.base_node import BaseNode, BaseNodeData from velai.nodes.base_node_renderable import T_BASE_NODE, BaseNodeRenderable from velai.services.registry import get_service_registry from velai.services.registry_utils import get_services_by_config from velai.services.text.TextGenerator import TextGenerationInput, TextGenerator logger = logging.getLogger(__name__) TextDataNodeType = NodeType( kind=NodeKind.TEXT_DATA, display_name="Text", inputs=[], outputs=[PortSchema(name="text", dtype=TextType, direction=PortDirection.OUTPUT, tooltip="Text")], ) class TextDataNode(BaseNode[BaseNodeData]): """Node that exposes a constant text value via its output port. The text is stored on the "text" output port and edited through the UI. """ pass class TextDataNodeRenderable(BaseNodeRenderable[TextDataNode]): def get_fields(self, node: T_BASE_NODE) -> list[dict[str, Any]]: return [ *super().get_fields(node), { "name": "text", "kind": "textarea", "label": "Text", "placeholder": "Enter text...", }, ] def get_header_buttons(self, node: T_BASE_NODE) -> list[dict[str, Any]]: buttons = super().get_header_buttons(node) buttons.append( { "name": "enhance", "icon": "text_increase", "tooltip": "Enhance", "action": as_action_name(self._on_enhance_text_action), "requiresContent": ["text"], "disableWhileProcessing": True, } ) return buttons @node_action async def _on_enhance_text_action(self, args: NodeActionArguments) -> AsyncNodeActionResultIterator: yield NodeActionResult(progress_message="enhancing...") text_output = args.node.outputs["text"] # run model service_info = get_services_by_config(TextGenerator) service_id = service_info.default.get_service_id() registry = get_service_registry() text_service = registry.create(TextGenerator, service_id) # todo: improve prompt to correctly enhance text # maybe also add multiple enhancement targets prompt = ( "You are an expert prompt engineer for generative visual models, including image and video generation.\n" "Your task is to expand the user's input into a clear, detailed, and optimized generation prompt.\n\n" "MODALITY AWARENESS:\n" "- The output may be used for IMAGE generation or VIDEO generation.\n" "- If the user mentions motion, time, camera movement, transitions, or duration, optimize for video.\n" "- Otherwise, optimize for a single image.\n\n" "REFERENCE IMAGE HANDLING:\n" "- Only assume reference images exist if the user explicitly refers to them.\n" "- If the user does not clearly indicate reference images, assume none are provided.\n" "- When reference images are used, they are provided directly to the generation model.\n" "- You do NOT know what reference images look like.\n" "- You MUST NOT describe, infer, or invent any visual details of reference images.\n" '- Refer to them only abstractly, for example: "the provided image", "the provided car image", "the reference image".\n\n' "WHEN REFERENCE IMAGES ARE PRESENT:\n" "- Focus on transformation rules, relationships, and constraints.\n" "- Describe how elements are combined, placed, blended, animated, or stylized.\n" "- Specify which image provides dominant style, structure, or motion cues if relevant.\n" "- Ensure realism, consistency, scale matching, perspective coherence, lighting continuity, " "clean blending, and natural integration.\n\n" "WHEN NO REFERENCE IMAGES ARE PRESENT:\n" "- Write a complete, descriptive prompt suitable for text only generation.\n" "- Include subject, environment, artistic style, lighting, color palette, composition.\n" "- For video, also include motion, pacing, camera behavior, and temporal consistency.\n\n" "VIDEO SPECIFIC GUIDELINES (only if applicable):\n" "- Describe motion clearly and continuously over time.\n" "- Specify camera movement, subject motion, scene evolution, and visual continuity.\n" "- Avoid jump cuts unless explicitly requested.\n\n" "STRICT OUTPUT RULES:\n" "- Return ONLY the final prompt text.\n" "- Do NOT include explanations, headings, or markdown.\n" "- Do NOT mention that rules were followed.\n\n" f"User input:\n{text_output.value}" ) input_data = TextGenerationInput(prompt=prompt) result = await text_service.generate_async(input_data) await args.node.on_generation_result(result) text_output.value = result.text yield NodeActionResult.update_node()