Spaces:

DarwinAnim8or
/

SmolLM3-img-gen

Paused

App Files Files Community

DarwinAnim8or commited on Jul 14

Commit

e2c3a04

verified ·

1 Parent(s): 99eb889

Create app.py

Browse files

Files changed (1) hide show

app.py +299 -0

app.py ADDED Viewed

	@@ -0,0 +1,299 @@

+import gradio as gr
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import requests
+import io
+from PIL import Image
+import re
+import json
+import xml.etree.ElementTree as ET
+class SmolLMWithTools:
+    def __init__(self):
+        # Initialize SmolLM3
+        self.checkpoint = "HuggingFaceTB/SmolLM3-3B"
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"Loading SmolLM3 on {self.device}...")
+        self.tokenizer = AutoTokenizer.from_pretrained(self.checkpoint)
+        self.model = AutoModelForCausalLM.from_pretrained(
+            self.checkpoint,
+            torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
+        ).to(self.device)
+        # HF API setup for FLUX
+        self.hf_token = None
+        self.flux_api_url = "https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-schnell"
+        # Define available tools
+        self.tools = [
+            {
+                "name": "generate_image",
+                "description": "Generate an image using AI based on a text description. Use this when the user asks for images, pictures, drawings, or visual content.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "prompt": {
+                            "type": "string",
+                            "description": "A detailed description of the image to generate. Be specific and descriptive."
+                        }
+                    },
+                    "required": ["prompt"]
+                }
+            }
+        ]
+        print("Model loaded successfully!")
+    def set_hf_token(self, token):
+        """Set the Hugging Face API token"""
+        self.hf_token = token
+        return "✅ HF Token set successfully!"
+    def generate_image_tool(self, prompt):
+        """Tool function to generate images using FLUX"""
+        if not self.hf_token:
+            return {"success": False, "error": "HF token not set", "image": None}
+        headers = {"Authorization": f"Bearer {self.hf_token}"}
+        data = {"inputs": prompt}
+        try:
+            response = requests.post(self.flux_api_url, headers=headers, json=data)
+            if response.status_code == 200:
+                image = Image.open(io.BytesIO(response.content))
+                return {"success": True, "message": f"Successfully generated image: {prompt}", "image": image}
+            elif response.status_code == 503:
+                return {"success": False, "error": "Model is loading, please try again", "image": None}
+            else:
+                return {"success": False, "error": f"API error: {response.status_code}", "image": None}
+        except Exception as e:
+            return {"success": False, "error": str(e), "image": None}
+    def parse_tool_calls(self, text):
+        """Parse tool calls from model output"""
+        tool_calls = []
+        # Look for XML-style tool calls
+        tool_call_pattern = r'<tool_call>\s*<invoke name="([^"]+)">\s*<parameter name="([^"]+)">([^<]+)</parameter>\s*</invoke>\s*</tool_call>'
+        matches = re.findall(tool_call_pattern, text, re.DOTALL)
+        for match in matches:
+            tool_name, param_name, param_value = match
+            tool_calls.append({
+                "name": tool_name,
+                "parameters": {param_name: param_value.strip()}
+            })
+        return tool_calls
+    def execute_tool_call(self, tool_call):
+        """Execute a tool call and return results"""
+        tool_name = tool_call["name"]
+        parameters = tool_call["parameters"]
+        if tool_name == "generate_image":
+            prompt = parameters.get("prompt", "")
+            return self.generate_image_tool(prompt)
+        else:
+            return {"success": False, "error": f"Unknown tool: {tool_name}"}
+    def chat_with_tools(self, messages):
+        """Generate response with tool calling capability"""
+        try:
+            # Apply chat template with tools
+            inputs = self.tokenizer.apply_chat_template(
+                messages,
+                enable_thinking=False,
+                xml_tools=self.tools,
+                add_generation_prompt=True,
+                tokenize=True,
+                return_tensors="pt"
+            )
+            inputs = inputs.to(self.device)
+            # Generate response
+            with torch.no_grad():
+                outputs = self.model.generate(
+                    inputs,
+                    max_new_tokens=1024,
+                    temperature=0.7,
+                    do_sample=True,
+                    pad_token_id=self.tokenizer.eos_token_id
+                )
+            # Decode the full response
+            full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # Extract just the new content (after the prompt)
+            prompt_text = self.tokenizer.decode(inputs[0], skip_special_tokens=True)
+            new_content = full_response[len(prompt_text):].strip()
+            return new_content
+        except Exception as e:
+            return f"Error generating response: {str(e)}"
+    def process_conversation(self, user_message, history, hf_token):
+        """Process a conversation turn with potential tool calls"""
+        if hf_token and not self.hf_token:
+            self.set_hf_token(hf_token)
+        # Build message history
+        messages = []
+        for h in history:
+            messages.append({"role": "user", "content": h[0]})
+            if h[1]:
+                messages.append({"role": "assistant", "content": h[1]})
+        messages.append({"role": "user", "content": user_message})
+        # Get model response
+        assistant_response = self.chat_with_tools(messages)
+        # Check for tool calls in the response
+        tool_calls = self.parse_tool_calls(assistant_response)
+        generated_image = None
+        final_response = assistant_response
+        if tool_calls:
+            # Execute tool calls
+            tool_results = []
+            for tool_call in tool_calls:
+                result = self.execute_tool_call(tool_call)
+                tool_results.append(result)
+                if tool_call["name"] == "generate_image" and result.get("image"):
+                    generated_image = result["image"]
+            # Continue conversation with tool results
+            messages.append({"role": "assistant", "content": assistant_response})
+            # Add tool results as a system message
+            tool_summary = "\n".join([
+                f"Tool {i+1} result: {result.get('message', result.get('error', 'Unknown result'))}"
+                for i, result in enumerate(tool_results)
+            ])
+            messages.append({"role": "user", "content": f"Tool execution results: {tool_summary}\n\nPlease respond to the user about the results."})
+            # Get final response
+            final_response = self.chat_with_tools(messages)
+        # Update history
+        history.append([user_message, final_response])
+        return history, "", generated_image
+# Initialize the system
+chat_system = SmolLMWithTools()
+def create_interface():
+    with gr.Blocks(title="SmolLM3 Tool Calling + FLUX", theme=gr.themes.Soft()) as app:
+        gr.Markdown("""
+        # 🤖🛠️ SmolLM3 with Tool Calling + FLUX
+        SmolLM3 can autonomously decide when to generate images based on your conversation!
+        Just chat naturally - the model will call the image generation tool when appropriate.
+        **Examples:**
+        - "Can you create a picture of a sunset?"
+        - "I need an image of a robot for my presentation"
+        - "Draw me a fantasy landscape"
+        - "Show me what a purple elephant would look like"
+        """)
+        with gr.Row():
+            with gr.Column(scale=2):
+                # HF Token input
+                hf_token_input = gr.Textbox(
+                    label="🔑 Hugging Face API Token",
+                    placeholder="Enter your HF token for image generation",
+                    type="password"
+                )
+                # Chat interface
+                chatbot = gr.Chatbot(
+                    label="Chat with SmolLM3 (Tool Calling Enabled)",
+                    height=500,
+                    show_copy_button=True
+                )
+                msg_input = gr.Textbox(
+                    label="Message",
+                    placeholder="Ask for anything - SmolLM3 will decide if it needs to generate an image...",
+                    lines=3
+                )
+                with gr.Row():
+                    send_btn = gr.Button("Send 📤", variant="primary")
+                    clear_btn = gr.Button("Clear 🗑️")
+            with gr.Column(scale=1):
+                image_output = gr.Image(
+                    label="Generated Images",
+                    height=500
+                )
+                gr.Markdown("""
+                ### 🔧 Available Tools:
+                - **generate_image**: Creates images from text descriptions
+                The model decides autonomously when to use tools based on context!
+                """)
+        # Event handlers
+        def respond(message, history, hf_token):
+            if not message.strip():
+                return history, "", None
+            return chat_system.process_conversation(message, history, hf_token)
+        # Send message
+        send_btn.click(
+            respond,
+            inputs=[msg_input, chatbot, hf_token_input],
+            outputs=[chatbot, msg_input, image_output]
+        )
+        # Enter key
+        msg_input.submit(
+            respond,
+            inputs=[msg_input, chatbot, hf_token_input],
+            outputs=[chatbot, msg_input, image_output]
+        )
+        # Clear chat
+        clear_btn.click(
+            lambda: ([], None),
+            outputs=[chatbot, image_output]
+        )
+        gr.Markdown("""
+        ### 📝 Setup Instructions:
+        1. **Get HF Token**: Visit [HuggingFace Tokens](https://huggingface.co/settings/tokens)
+        2. **Create Token**: Generate a token with "Read" permissions
+        3. **Enter Token**: Paste it in the field above
+        4. **Start Chatting**: Ask for anything - images, questions, explanations!
+        ### 🧠 How it Works:
+        - SmolLM3 analyzes your message
+        - Decides if it needs to call tools
+        - Generates appropriate tool calls
+        - Executes the tools and responds with results
+        **The AI is in full control of when and how to use tools!**
+        """)
+    return app
+if __name__ == "__main__":
+    app = create_interface()
+    app.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        debug=True
+    )