Spaces:

ibyteohdear
/

agentLLM

Paused

App Files Files Community

ibyteohdear commited on Jan 25

Commit

52d433d

verified ·

1 Parent(s): da96fc3

Update app.py

Browse files

Files changed (1) hide show

app.py +182 -67

app.py CHANGED Viewed

@@ -46,6 +46,74 @@ text_to_image_client = InferenceClient(
     api_key=token
 )
 @tool
 def nsfw_detection_tool(nsfw_detection_input:  Image.Image) -> str:
     """
@@ -54,7 +122,8 @@ def nsfw_detection_tool(nsfw_detection_input:  Image.Image) -> str:
         nsfw_detection_input (Image.Image): The image to check.
     Returns:
         str: Highest score result.
-    """
     try:
         tmp_path = pil_to_tempfile(nsfw_detection_input)
@@ -105,7 +174,6 @@ def image_tool(prompt: str) -> str:
     except Exception as e:
         image_output = None
-        print(f"Image generation failed: {e}")
         return f"Image generation failed: {e}"
 @tool
@@ -135,6 +203,7 @@ model = InferenceClientModel(
 agent = CodeAgent(
     model=model,
     tools=[
         image_tool,
         nsfw_detection_tool,
         search_tool,
@@ -150,6 +219,8 @@ agent.prompt_templates["system_prompt"] += """
     - search_tool(query: str) -> str
     - Search the web and return the most relevant results.
     - Used for sentiment analysis
     - image_tool(prompt: str) -> str
     - Generate an image from a text prompt, if successfull or not you will be notified by the return string.
     - nsfw_detection_tool(nsfw_detection_input: Image.Image) -> str
@@ -162,110 +233,154 @@ agent.prompt_templates["system_prompt"] += """
     - You must use this answer in final_answer
 """
-def run_agent(query, nsfw_detection_input):
     global image_output
     image_output = None
     yield None, "⏳ Jerry is thinking... please wait"
     try:
         response = agent.run(
             query if query else "",
-            additional_args={"nsfw_detection_input": nsfw_detection_input}
         )
-        yield image_output, str(response)
     except Exception as e:
-        yield None, f"��� Agent Error: {str(e)}"
 with gr.Blocks(title="Jerry AI Assistant") as demo:
     gr.Markdown("# 🤖 Jerry - Your AI Assistant")
     agent_response = gr.Textbox(
         label="Response",
         lines=5,
         interactive=False
     )
     with gr.Tab("💬 Chat"):
-        with gr.Row():
-            query_chat = gr.Textbox(
-                lines=3,
-                label="Ask me anything...",
-                placeholder="Generate an image of a cat, analyze its sentiment, etc.",
-                scale=4
-            )
-        with gr.Row():
-            run_chat_btn = gr.Button("🚀 Run", variant="primary", scale=1)
-            clear_chat_btn = gr.Button("🗑️ Clear", scale=0)
-        gr.Examples(
-            examples=[
-                "How do i cook a curry quickly",
-                "Analyze the sentiment: This is terrible service",
-                "Translate this text to English 读写汉字 - 学中文",
-            ],
-            inputs=[query_chat],
-            label="💡 Try these:"
         )
         hidden_image_chat = gr.Image(visible=False)
         run_chat_btn.click(
             fn=run_agent,
-            inputs=[query_chat, hidden_image_chat],
             outputs=[hidden_image_chat, agent_response]
         )
-    with gr.Tab("🎨 Image Tools"):
-        with gr.Row():
-            nsfw_detection_input = gr.Image(
-                label="Upload for NSFW check",
-                type="pil",
-                height=300
-            )
-            image_output = gr.Image(
-                label="Generated Image",
-                height=300
-            )
-        with gr.Row():
-            query_img = gr.Textbox(
-                lines=2,
-                label="Image generation prompt",
-                placeholder="A beautiful sunset over mountains..."
-            )
         with gr.Row():
-            check_nsfw_btn = gr.Button("🔍 Check NSFW")
-            run_img_btn = gr.Button("🎨 Generate Image", variant="primary")
-        gr.Examples(
-            examples=[
-                "A cyberpunk cat with neon glowing eyes",
-                "A serene Japanese garden with cherry blossoms",
-                "A futuristic city with flying cars at sunset",
-                "A magical forest with bioluminescent plants",
-                "A steampunk robot drinking tea in a Victorian parlor"
             ],
-            inputs=[query_img],
-            label="🎨 Try these prompts:"
         )
         hidden_text_img = gr.Textbox(visible=False)
         hidden_image_img = gr.Image(visible=False)
         check_nsfw_btn.click(
             fn=run_agent,
-            inputs=[hidden_text_img, nsfw_detection_input],
             outputs=[hidden_image_img, agent_response]
         )
         run_img_btn.click(
             fn=run_agent,
-            inputs=[query_img, hidden_image_img],
             outputs=[image_output, agent_response]
         )

     api_key=token
 )
+def resize_and_crop(image, target_res=(832, 480)):
+    tw, th = target_res
+    iw, ih = image.size
+    scale = max(tw / iw, th / ih)
+    nw, nh = int(iw * scale), int(ih * scale)
+    image = image.resize((nw, nh), Image.LANCZOS)
+    left = (nw - tw) // 2
+    if ih > iw:
+        top = int((nh - th) * 0.25)
+    else:
+        top = (nh - th) // 2
+    right = left + tw
+    bottom = top + th
+    return image.crop((left, top, right, bottom))
+def aligned_num_frames(duration, fps=16):
+    n = int(duration * fps)
+    return ((n - 1) // 4) * 4 + 1
+video_output = None
+@tool
+def video_tool(video_image_input, video_prompt, video_duration, video_steps, video_guidance, video_randomize) -> str:
+    """
+    Generates a video from a starting image and a text prompt using Wan 2.1 via fal-ai.
+    Args:
+        video_image_input (Image.Image): The source image to be animated.
+        video_prompt (str): A text description of the motion or scene to generate.
+        video_duration (float): Length of the video in seconds.
+        video_steps (int): The number of diffusion inference steps (higher is better quality).
+        video_guidance (float): Classifier-free guidance scale for prompt adherence.
+        video_randomize (bool): Whether to use a random seed for varied results.
+    Returns:
+        str: A confirmation message.
+    """
+    try:
+        FPS = 16
+        num_frames = aligned_num_frames(video_duration, FPS)
+        seed = random.randint(0, 1_000_000_000) if video_randomize else 42
+        video_bytes = client.image_to_video(
+            image=video_image_input.resize((832, 480)),
+            prompt=video_prompt,
+            negative_prompt="low quality, deformed",
+            num_frames=num_frames,
+            num_inference_steps=int(video_steps),
+            seed=seed,
+            guidance_scale=float(video_guidance),
+        )
+        out = tempfile.mktemp(suffix=".mp4")
+        with open(out, "wb") as f:
+            f.write(video_bytes)
+        gc.collect()
+        video_output = out
+        return "Video successfully generated and stored for Gradio UI."
+    except Exception as e:
+        video_output = None
+        return f"Video generation failed: {e}"
 @tool
 def nsfw_detection_tool(nsfw_detection_input:  Image.Image) -> str:
     """
         nsfw_detection_input (Image.Image): The image to check.
     Returns:
         str: Highest score result.
+    """
+    global video_output
     try:
         tmp_path = pil_to_tempfile(nsfw_detection_input)
     except Exception as e:
         image_output = None
         return f"Image generation failed: {e}"
 @tool
 agent = CodeAgent(
     model=model,
     tools=[
+        video_tool,
         image_tool,
         nsfw_detection_tool,
         search_tool,
     - search_tool(query: str) -> str
     - Search the web and return the most relevant results.
     - Used for sentiment analysis
+    - video_tool(video_image_input: Image.Imagem, video_prompt: str, video_duration: float, video_steps: int, video_guidance: float, video_randomize: bool) -> str
+    - Generate a video from a text prompt and an image input, if successfull or not you will be notified by the return string.
     - image_tool(prompt: str) -> str
     - Generate an image from a text prompt, if successfull or not you will be notified by the return string.
     - nsfw_detection_tool(nsfw_detection_input: Image.Image) -> str
     - You must use this answer in final_answer
 """
+def run_agent(query, nsfw_detection_input, video_image_input, video_prompt, video_duration, video_steps, video_guidance, video_randomize):
     global image_output
+    global video_output
     image_output = None
+    video_output = None
     yield None, "⏳ Jerry is thinking... please wait"
     try:
         response = agent.run(
             query if query else "",
+            additional_args={
+                "nsfw_detection_input": nsfw_detection_input,
+                "video_image_input": video_image_input,
+                "video_prompt": video_prompt,
+                "video_duration": video_duration,
+                "video_steps": video_steps,
+                "video_guidance": video_guidance,
+                "video_randomize": video_randomize,
+            }
         )
+        yield image_output, video_output, str(response)
     except Exception as e:
+        yield None, None, f"❌ Agent Error: {str(e)}"
+hidden_none_img = gr.Image(visible=False)
+hidden_none_txt = gr.Textbox(visible=False)
+hidden_none_float = gr.Number(visible=False, value=0)
+hidden_none_bool = gr.Checkbox(visible=False, value=False)
 with gr.Blocks(title="Jerry AI Assistant") as demo:
     gr.Markdown("# 🤖 Jerry - Your AI Assistant")
     agent_response = gr.Textbox(
         label="Response",
         lines=5,
         interactive=False
     )
     with gr.Tab("💬 Chat"):
+        query_chat = gr.Textbox(
+            lines=3,
+            label="Ask me anything...",
+            placeholder="Generate an image of a cat, analyze sentiment, etc."
         )
+        run_chat_btn = gr.Button("🚀 Run", variant="primary")
+        clear_chat_btn = gr.Button("🗑️ Clear")
         hidden_image_chat = gr.Image(visible=False)
         run_chat_btn.click(
             fn=run_agent,
+            inputs=[
+                query_chat,
+                hidden_none_img,
+                hidden_none_img,
+                hidden_none_txt,
+                hidden_none_float,
+                hidden_none_float,
+                hidden_none_float,
+                hidden_none_bool,
+            ],
             outputs=[hidden_image_chat, agent_response]
         )
+        clear_chat_btn.click(
+            lambda: ("", ""),
+            outputs=[query_chat, agent_response]
+        )
+    with gr.Tab("🎨 Video Tools"):
         with gr.Row():
+            with gr.Column():
+                video_image_input = gr.Image(type="pil", label="Input Image")
+                video_prompt = gr.Textbox(lines=3, label="Prompt")
+                video_duration = gr.Slider(1, 4, value=4, step=0.1, label="Duration (s)")
+                video_steps = gr.Slider(4, 20, value=20, step=1, label="Steps")
+                video_guidance = gr.Slider(1.0, 6.0, value=3.0, step=0.1, label="Guidance")
+                video_randomize = gr.Checkbox(value=True, label="Randomize Seed")
+                gen_btn = gr.Button("🎬 Generate Video", variant="primary")
+            with gr.Column():
+                output_vid = gr.Video(label="Generated Video")
+        gen_btn.click(
+            fn=run_agent,
+            inputs=[
+                hidden_none_txt,
+                hidden_none_img,
+                video_image_input,
+                video_prompt,
+                video_duration,
+                video_steps,
+                video_guidance,
+                video_randomize,
             ],
+            outputs=[hidden_none_img, output_vid, agent_response]
         )
+    with gr.Tab("🎨 Image Tools"):
+        nsfw_detection_input = gr.Image(type="pil", label="Upload for NSFW Check")
+        image_output = gr.Image(label="Generated Image")
+        query_img = gr.Textbox(
+            lines=2,
+            label="Image generation prompt",
+            placeholder="A cyberpunk cat with neon eyes..."
+        )
+        check_nsfw_btn = gr.Button("🔍 Check NSFW")
+        run_img_btn = gr.Button("🎨 Generate Image", variant="primary")
         hidden_text_img = gr.Textbox(visible=False)
         hidden_image_img = gr.Image(visible=False)
         check_nsfw_btn.click(
             fn=run_agent,
+            inputs=[
+                hidden_text_img,
+                nsfw_detection_input,
+                hidden_none_img,
+                hidden_none_txt,
+                hidden_none_float,
+                hidden_none_float,
+                hidden_none_float,
+                hidden_none_bool,
+            ],
             outputs=[hidden_image_img, agent_response]
         )
         run_img_btn.click(
             fn=run_agent,
+            inputs=[
+                query_img,
+                hidden_none_img,
+                hidden_none_img,
+                hidden_none_txt,
+                hidden_none_float,
+                hidden_none_float,
+                hidden_none_float,
+                hidden_none_bool,
+            ],
             outputs=[image_output, agent_response]
         )