Spaces:

stacknet
/

stacknet-1-1-preview-demo

Running

hwonder commited on Feb 5

Commit

26cd782

1 Parent(s): 5a42256

Use MCP tool calls for image/video generation

- Add submit_tool_task method to client for MCP tool calls
- Update ImageService to use generate_image_5 and generate_image_edit_5
- Update VideoService to use generate_video_2 and generate_image_to_video_2

Files changed (3) hide show

src/api/client.py +63 -0
src/services/image.py +28 -21
src/services/video.py +16 -21

src/api/client.py CHANGED Viewed

@@ -62,6 +62,69 @@ class StackNetClient:
         self.timeout = timeout
         self._temp_dir = tempfile.mkdtemp(prefix="stacknet_")
     async def submit_media_task(
         self,
         action: MediaAction,

         self.timeout = timeout
         self._temp_dir = tempfile.mkdtemp(prefix="stacknet_")
+    async def submit_tool_task(
+        self,
+        tool_name: str,
+        parameters: dict,
+        server_name: str = "geoff",
+        on_progress: Optional[Callable[[float, str], None]] = None
+    ) -> TaskResult:
+        """
+        Submit an MCP tool task and wait for completion.
+        Args:
+            tool_name: The tool to invoke (e.g., generate_image_5)
+            parameters: Tool parameters
+            server_name: MCP server name (default: geoff)
+            on_progress: Callback for progress updates
+        Returns:
+            TaskResult with success status and output data
+        """
+        payload = {
+            "type": "mcp-tool",
+            "serverName": server_name,
+            "toolName": tool_name,
+            "stream": True,
+            "parameters": parameters
+        }
+        headers = {"Content-Type": "application/json"}
+        if self.api_key:
+            auth_header = self.api_key if self.api_key.startswith("Bearer ") else f"Bearer {self.api_key}"
+            headers["Authorization"] = auth_header
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            try:
+                async with client.stream(
+                    "POST",
+                    f"{self.base_url}/tasks",
+                    json=payload,
+                    headers=headers
+                ) as response:
+                    if response.status_code != 200:
+                        error_text = await response.aread()
+                        return TaskResult(
+                            success=False,
+                            data={},
+                            error=f"API request failed ({response.status_code}): {error_text.decode()[:200]}"
+                        )
+                    return await self._process_sse_stream(response, on_progress)
+            except httpx.TimeoutException:
+                return TaskResult(
+                    success=False,
+                    data={},
+                    error="Request timed out. The operation took too long."
+                )
+            except httpx.RequestError as e:
+                return TaskResult(
+                    success=False,
+                    data={},
+                    error=f"Network error: {str(e)}"
+                )
     async def submit_media_task(
         self,
         action: MediaAction,

src/services/image.py CHANGED Viewed

@@ -8,7 +8,7 @@ Abstracts all API complexity from the UI layer.
 from typing import Callable, Optional, List
 from dataclasses import dataclass
-from ..api.client import StackNetClient, MediaAction
 @dataclass
@@ -41,7 +41,7 @@ class ImageService:
         on_progress: Optional[Callable[[float, str], None]] = None
     ) -> List[GeneratedImage]:
         """
-        Generate image from a text prompt.
         Args:
             prompt: Description of desired image
@@ -56,14 +56,24 @@ class ImageService:
         if style and style != "Photorealistic":
             full_prompt = f"{prompt}, {style.lower()} style"
-        options = {}
-        if aspect_ratio:
-            options["aspect_ratio"] = aspect_ratio
-        result = await self.client.submit_media_task(
-            action=MediaAction.ANALYZE_VISUAL,
-            prompt=full_prompt,
-            options=options if options else None,
             on_progress=on_progress
         )
@@ -80,7 +90,7 @@ class ImageService:
         on_progress: Optional[Callable[[float, str], None]] = None
     ) -> List[GeneratedImage]:
         """
-        Edit/transform an existing image.
         Args:
             image_url: URL to source image
@@ -91,16 +101,13 @@ class ImageService:
         Returns:
             List of edited images
         """
-        options = {
-            "strength": strength,
-            "edit_mode": True
-        }
-        result = await self.client.submit_media_task(
-            action=MediaAction.ANALYZE_VISUAL,
-            media_url=image_url,
-            prompt=edit_prompt,
-            options=options,
             on_progress=on_progress
         )

 from typing import Callable, Optional, List
 from dataclasses import dataclass
+from ..api.client import StackNetClient
 @dataclass
         on_progress: Optional[Callable[[float, str], None]] = None
     ) -> List[GeneratedImage]:
         """
+        Generate image from a text prompt using generate_image_5 tool.
         Args:
             prompt: Description of desired image
         if style and style != "Photorealistic":
             full_prompt = f"{prompt}, {style.lower()} style"
+        # Determine dimensions from aspect ratio
+        width, height = 1024, 1024
+        if aspect_ratio == "16:9":
+            width, height = 1280, 720
+        elif aspect_ratio == "9:16":
+            width, height = 720, 1280
+        elif aspect_ratio == "4:3":
+            width, height = 1024, 768
+        elif aspect_ratio == "3:4":
+            width, height = 768, 1024
+        result = await self.client.submit_tool_task(
+            tool_name="generate_image_5",
+            parameters={
+                "prompt": full_prompt,
+                "width": width,
+                "height": height
+            },
             on_progress=on_progress
         )
         on_progress: Optional[Callable[[float, str], None]] = None
     ) -> List[GeneratedImage]:
         """
+        Edit/transform an existing image using generate_image_edit_5 tool.
         Args:
             image_url: URL to source image
         Returns:
             List of edited images
         """
+        result = await self.client.submit_tool_task(
+            tool_name="generate_image_edit_5",
+            parameters={
+                "prompt": edit_prompt,
+                "image_url": image_url,
+                "strength": strength
+            },
             on_progress=on_progress
         )

src/services/video.py CHANGED Viewed

@@ -8,7 +8,7 @@ Abstracts all API complexity from the UI layer.
 from typing import Callable, Optional, List
 from dataclasses import dataclass
-from ..api.client import StackNetClient, MediaAction
 @dataclass
@@ -41,7 +41,7 @@ class VideoService:
         on_progress: Optional[Callable[[float, str], None]] = None
     ) -> List[GeneratedVideo]:
         """
-        Generate video from a text prompt.
         Args:
             prompt: Description of desired video
@@ -56,14 +56,12 @@ class VideoService:
         if style and style != "Cinematic":
             full_prompt = f"{prompt}, {style.lower()} style"
-        options = {
-            "duration": duration
-        }
-        result = await self.client.submit_media_task(
-            action=MediaAction.DESCRIBE_VIDEO,
-            prompt=full_prompt,
-            options=options,
             on_progress=on_progress
         )
@@ -80,7 +78,7 @@ class VideoService:
         on_progress: Optional[Callable[[float, str], None]] = None
     ) -> List[GeneratedVideo]:
         """
-        Animate a static image into video.
         Args:
             image_url: URL to source image
@@ -91,16 +89,13 @@ class VideoService:
         Returns:
             List of animated videos
         """
-        options = {
-            "duration": duration,
-            "animate_mode": True
-        }
-        result = await self.client.submit_media_task(
-            action=MediaAction.DESCRIBE_VIDEO,
-            media_url=image_url,
-            prompt=motion_prompt,
-            options=options,
             on_progress=on_progress
         )

 from typing import Callable, Optional, List
 from dataclasses import dataclass
+from ..api.client import StackNetClient
 @dataclass
         on_progress: Optional[Callable[[float, str], None]] = None
     ) -> List[GeneratedVideo]:
         """
+        Generate video from a text prompt using generate_video_2 tool.
         Args:
             prompt: Description of desired video
         if style and style != "Cinematic":
             full_prompt = f"{prompt}, {style.lower()} style"
+        result = await self.client.submit_tool_task(
+            tool_name="generate_video_2",
+            parameters={
+                "prompt": full_prompt,
+                "duration": duration
+            },
             on_progress=on_progress
         )
         on_progress: Optional[Callable[[float, str], None]] = None
     ) -> List[GeneratedVideo]:
         """
+        Animate a static image into video using generate_image_to_video_2 tool.
         Args:
             image_url: URL to source image
         Returns:
             List of animated videos
         """
+        result = await self.client.submit_tool_task(
+            tool_name="generate_image_to_video_2",
+            parameters={
+                "prompt": motion_prompt,
+                "image_url": image_url,
+                "duration": duration
+            },
             on_progress=on_progress
         )