Spaces:

kratadata
/

velai-workshop

Running

App Files Files Community

velai-workshop / velai /nodes /text_data.py

kratadata

Upload folder via script

0f8b3a0 verified 23 days ago

raw

history blame contribute delete

5.58 kB

	from __future__ import annotations

	import logging
	from typing import Any

	from velai.data_types import TextType
	from velai.dataflow.enums import NodeKind, PortDirection
	from velai.dataflow.nodes import NodeType
	from velai.dataflow.ports import PortSchema
	from velai.nodes.actions.node_action_decorator import as_action_name, node_action
	from velai.nodes.actions.node_action_models import AsyncNodeActionResultIterator, NodeActionArguments, NodeActionResult
	from velai.nodes.base_node import BaseNode, BaseNodeData
	from velai.nodes.base_node_renderable import T_BASE_NODE, BaseNodeRenderable
	from velai.services.registry import get_service_registry
	from velai.services.registry_utils import get_services_by_config
	from velai.services.text.TextGenerator import TextGenerationInput, TextGenerator

	logger = logging.getLogger(__name__)

	TextDataNodeType = NodeType(
	kind=NodeKind.TEXT_DATA,
	display_name="Text",
	inputs=[],
	outputs=[PortSchema(name="text", dtype=TextType, direction=PortDirection.OUTPUT, tooltip="Text")],
	)


	class TextDataNode(BaseNode[BaseNodeData]):
	"""Node that exposes a constant text value via its output port.

	The text is stored on the "text" output port and edited through the UI.
	"""

	pass


	class TextDataNodeRenderable(BaseNodeRenderable[TextDataNode]):
	def get_fields(self, node: T_BASE_NODE) -> list[dict[str, Any]]:
	return [
	*super().get_fields(node),
	{
	"name": "text",
	"kind": "textarea",
	"label": "Text",
	"placeholder": "Enter text...",
	},
	]

	def get_header_buttons(self, node: T_BASE_NODE) -> list[dict[str, Any]]:
	buttons = super().get_header_buttons(node)

	buttons.append(
	{
	"name": "enhance",
	"icon": "text_increase",
	"tooltip": "Enhance",
	"action": as_action_name(self._on_enhance_text_action),
	"requiresContent": ["text"],
	"disableWhileProcessing": True,
	}
	)

	return buttons

	@node_action
	async def _on_enhance_text_action(self, args: NodeActionArguments) -> AsyncNodeActionResultIterator:
	yield NodeActionResult(progress_message="enhancing...")

	text_output = args.node.outputs["text"]

	# run model
	service_info = get_services_by_config(TextGenerator)
	service_id = service_info.default.get_service_id()

	registry = get_service_registry()
	text_service = registry.create(TextGenerator, service_id)

	# todo: improve prompt to correctly enhance text
	# maybe also add multiple enhancement targets
	prompt = (
	"You are an expert prompt engineer for generative visual models, including image and video generation.\n"
	"Your task is to expand the user's input into a clear, detailed, and optimized generation prompt.\n\n"
	"MODALITY AWARENESS:\n"
	"- The output may be used for IMAGE generation or VIDEO generation.\n"
	"- If the user mentions motion, time, camera movement, transitions, or duration, optimize for video.\n"
	"- Otherwise, optimize for a single image.\n\n"
	"REFERENCE IMAGE HANDLING:\n"
	"- Only assume reference images exist if the user explicitly refers to them.\n"
	"- If the user does not clearly indicate reference images, assume none are provided.\n"
	"- When reference images are used, they are provided directly to the generation model.\n"
	"- You do NOT know what reference images look like.\n"
	"- You MUST NOT describe, infer, or invent any visual details of reference images.\n"
	'- Refer to them only abstractly, for example: "the provided image", "the provided car image", "the reference image".\n\n'
	"WHEN REFERENCE IMAGES ARE PRESENT:\n"
	"- Focus on transformation rules, relationships, and constraints.\n"
	"- Describe how elements are combined, placed, blended, animated, or stylized.\n"
	"- Specify which image provides dominant style, structure, or motion cues if relevant.\n"
	"- Ensure realism, consistency, scale matching, perspective coherence, lighting continuity, "
	"clean blending, and natural integration.\n\n"
	"WHEN NO REFERENCE IMAGES ARE PRESENT:\n"
	"- Write a complete, descriptive prompt suitable for text only generation.\n"
	"- Include subject, environment, artistic style, lighting, color palette, composition.\n"
	"- For video, also include motion, pacing, camera behavior, and temporal consistency.\n\n"
	"VIDEO SPECIFIC GUIDELINES (only if applicable):\n"
	"- Describe motion clearly and continuously over time.\n"
	"- Specify camera movement, subject motion, scene evolution, and visual continuity.\n"
	"- Avoid jump cuts unless explicitly requested.\n\n"
	"STRICT OUTPUT RULES:\n"
	"- Return ONLY the final prompt text.\n"
	"- Do NOT include explanations, headings, or markdown.\n"
	"- Do NOT mention that rules were followed.\n\n"
	f"User input:\n{text_output.value}"
	)

	input_data = TextGenerationInput(prompt=prompt)
	result = await text_service.generate_async(input_data)

	await args.node.on_generation_result(result)

	text_output.value = result.text
	yield NodeActionResult.update_node()