velai-workshop / velai /nodes /text_data.py
kratadata's picture
Upload folder via script
0f8b3a0 verified
from __future__ import annotations
import logging
from typing import Any
from velai.data_types import TextType
from velai.dataflow.enums import NodeKind, PortDirection
from velai.dataflow.nodes import NodeType
from velai.dataflow.ports import PortSchema
from velai.nodes.actions.node_action_decorator import as_action_name, node_action
from velai.nodes.actions.node_action_models import AsyncNodeActionResultIterator, NodeActionArguments, NodeActionResult
from velai.nodes.base_node import BaseNode, BaseNodeData
from velai.nodes.base_node_renderable import T_BASE_NODE, BaseNodeRenderable
from velai.services.registry import get_service_registry
from velai.services.registry_utils import get_services_by_config
from velai.services.text.TextGenerator import TextGenerationInput, TextGenerator
logger = logging.getLogger(__name__)
TextDataNodeType = NodeType(
kind=NodeKind.TEXT_DATA,
display_name="Text",
inputs=[],
outputs=[PortSchema(name="text", dtype=TextType, direction=PortDirection.OUTPUT, tooltip="Text")],
)
class TextDataNode(BaseNode[BaseNodeData]):
"""Node that exposes a constant text value via its output port.
The text is stored on the "text" output port and edited through the UI.
"""
pass
class TextDataNodeRenderable(BaseNodeRenderable[TextDataNode]):
def get_fields(self, node: T_BASE_NODE) -> list[dict[str, Any]]:
return [
*super().get_fields(node),
{
"name": "text",
"kind": "textarea",
"label": "Text",
"placeholder": "Enter text...",
},
]
def get_header_buttons(self, node: T_BASE_NODE) -> list[dict[str, Any]]:
buttons = super().get_header_buttons(node)
buttons.append(
{
"name": "enhance",
"icon": "text_increase",
"tooltip": "Enhance",
"action": as_action_name(self._on_enhance_text_action),
"requiresContent": ["text"],
"disableWhileProcessing": True,
}
)
return buttons
@node_action
async def _on_enhance_text_action(self, args: NodeActionArguments) -> AsyncNodeActionResultIterator:
yield NodeActionResult(progress_message="enhancing...")
text_output = args.node.outputs["text"]
# run model
service_info = get_services_by_config(TextGenerator)
service_id = service_info.default.get_service_id()
registry = get_service_registry()
text_service = registry.create(TextGenerator, service_id)
# todo: improve prompt to correctly enhance text
# maybe also add multiple enhancement targets
prompt = (
"You are an expert prompt engineer for generative visual models, including image and video generation.\n"
"Your task is to expand the user's input into a clear, detailed, and optimized generation prompt.\n\n"
"MODALITY AWARENESS:\n"
"- The output may be used for IMAGE generation or VIDEO generation.\n"
"- If the user mentions motion, time, camera movement, transitions, or duration, optimize for video.\n"
"- Otherwise, optimize for a single image.\n\n"
"REFERENCE IMAGE HANDLING:\n"
"- Only assume reference images exist if the user explicitly refers to them.\n"
"- If the user does not clearly indicate reference images, assume none are provided.\n"
"- When reference images are used, they are provided directly to the generation model.\n"
"- You do NOT know what reference images look like.\n"
"- You MUST NOT describe, infer, or invent any visual details of reference images.\n"
'- Refer to them only abstractly, for example: "the provided image", "the provided car image", "the reference image".\n\n'
"WHEN REFERENCE IMAGES ARE PRESENT:\n"
"- Focus on transformation rules, relationships, and constraints.\n"
"- Describe how elements are combined, placed, blended, animated, or stylized.\n"
"- Specify which image provides dominant style, structure, or motion cues if relevant.\n"
"- Ensure realism, consistency, scale matching, perspective coherence, lighting continuity, "
"clean blending, and natural integration.\n\n"
"WHEN NO REFERENCE IMAGES ARE PRESENT:\n"
"- Write a complete, descriptive prompt suitable for text only generation.\n"
"- Include subject, environment, artistic style, lighting, color palette, composition.\n"
"- For video, also include motion, pacing, camera behavior, and temporal consistency.\n\n"
"VIDEO SPECIFIC GUIDELINES (only if applicable):\n"
"- Describe motion clearly and continuously over time.\n"
"- Specify camera movement, subject motion, scene evolution, and visual continuity.\n"
"- Avoid jump cuts unless explicitly requested.\n\n"
"STRICT OUTPUT RULES:\n"
"- Return ONLY the final prompt text.\n"
"- Do NOT include explanations, headings, or markdown.\n"
"- Do NOT mention that rules were followed.\n\n"
f"User input:\n{text_output.value}"
)
input_data = TextGenerationInput(prompt=prompt)
result = await text_service.generate_async(input_data)
await args.node.on_generation_result(result)
text_output.value = result.text
yield NodeActionResult.update_node()