Spaces:

Nishan30
/

n8n-workflow-generator-app

Running

File size: 17,362 Bytes

"""
n8n Workflow Generator - Gradio Web Interface
Deploy this to Hugging Face Spaces
"""

import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch
import json
import re

# ==============================================================================
# CONFIGURATION
# ==============================================================================

MODEL_REPO = "Nishan30/n8n-workflow-generator"  # Update with your HF repo
BASE_MODEL = "Qwen/Qwen2.5-Coder-1.5B-Instruct"

# ==============================================================================
# MODEL LOADING
# ==============================================================================

def load_model():
    """Load model once and cache it"""
    print("Loading model...")

    # Load base model
    base_model = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True
    )

    # Load LoRA adapter with error handling for unsupported parameters
    try:
        model = PeftModel.from_pretrained(base_model, MODEL_REPO)
    except TypeError as e:
        if "unexpected keyword argument" in str(e):
            print(f"⚠️ Warning: {e}")
            print("Attempting to load with filtered config...")

            # Download and modify config
            from huggingface_hub import hf_hub_download
            import tempfile
            import shutil

            config_path = hf_hub_download(repo_id=MODEL_REPO, filename="adapter_config.json")
            with open(config_path, 'r') as f:
                config = json.load(f)

            # Remove unsupported parameters
            unsupported_params = ['alora_invocation_tokens', 'alora_invocation_token_ids']
            for param in unsupported_params:
                if param in config:
                    print(f"Removing unsupported parameter: {param}")
                    del config[param]

            # Save modified config to temp directory
            temp_dir = tempfile.mkdtemp()
            temp_config_path = f"{temp_dir}/adapter_config.json"
            with open(temp_config_path, 'w') as f:
                json.dump(config, f, indent=2)

            # Copy other adapter files
            for filename in ['adapter_model.safetensors', 'adapter_model.bin']:
                try:
                    src = hf_hub_download(repo_id=MODEL_REPO, filename=filename)
                    shutil.copy(src, f"{temp_dir}/{filename}")
                    break
                except:
                    continue

            # Load from temp directory
            model = PeftModel.from_pretrained(base_model, temp_dir)

            # Cleanup
            shutil.rmtree(temp_dir)
        else:
            raise

    tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO)

    print("Model loaded successfully!")
    return model, tokenizer

# Load model at startup (global variable for caching)
print("🔄 Loading model at startup...")
model, tokenizer = load_model()
print("✅ Model loaded and ready!")

# ==============================================================================
# CODE GENERATION
# ==============================================================================

def generate_workflow(prompt, temperature=0.5, max_tokens=1024):
    """Generate n8n workflow code from prompt"""

    if not prompt.strip():
        return "Please enter a workflow description.", None, None

    # IMPORTANT: Use the exact format the model was trained with
    formatted_prompt = f"""### System:
You are an expert n8n workflow generator. Given a user's request, you generate clean, functional TypeScript code using the @n8n-generator/core DSL.

Your output should:
- Only contain the code, no explanations
- Use the Workflow class from @n8n-generator/core
- Use workflow.add() to create nodes
- Use .to() or workflow.connect() for connections
- Be ready to compile directly to n8n JSON

### Instruction:
{prompt}

### Response:
"""

    # Debug: Print formatted prompt (first 500 chars)
    print(f"\n{'='*60}")
    print(f"User Prompt: {prompt}")
    print(f"Formatted Input (truncated):\n{formatted_prompt[:500]}...")
    print(f"{'='*60}\n")

    # Tokenize
    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
    input_length = inputs.input_ids.shape[1]
    print(f"Input tokens: {input_length}, Max new tokens: {max_tokens}")

    # Generate with parameters matching training
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            temperature=max(temperature, 0.1),
            do_sample=True,
            top_p=0.95,
            top_k=50,
            repetition_penalty=1.1,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id,
        )

    # Decode
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Debug: Print generated text
    print(f"Generated text length: {len(generated_text)} chars")
    print(f"Generated text (first 500 chars):\n{generated_text[:500]}...\n")

    # Extract code from response (handle ### Response: format)
    code = extract_code_from_instruction_format(generated_text)

    # Convert to n8n JSON
    n8n_json = convert_to_n8n_json(code)

    # Create visualization
    visualization = create_visualization(n8n_json)

    return code, json.dumps(n8n_json, indent=2), visualization

def extract_code_from_instruction_format(text):
    """Extract TypeScript code from ### Response: format"""

    # Split by ### Response: and get the part after it
    try:
        response_part = text.split("### Response:")[-1].strip()
    except:
        response_part = text

    # Remove any subsequent ### markers (like ### Instruction:, ### System:)
    for stop_marker in ["### Instruction:", "### System:", "\n\n\n\n"]:
        if stop_marker in response_part:
            response_part = response_part.split(stop_marker)[0].strip()

    # Try to extract code from markdown blocks
    code_match = re.search(r'```(?:typescript|ts)?\n(.*?)```', response_part, re.DOTALL)
    if code_match:
        return code_match.group(1).strip()

    # Remove markdown code block markers if present
    response_part = re.sub(r'```(?:typescript|ts)?', '', response_part)

    return response_part.strip()

def extract_code(text):
    """Legacy extraction function - kept for compatibility"""
    return extract_code_from_instruction_format(text)

# ==============================================================================
# N8N JSON CONVERSION
# ==============================================================================

def parse_js_object(js_obj_str):
    """Convert JavaScript object notation to Python dict"""
    if not js_obj_str or js_obj_str.strip() == "{}":
        return {}

    try:
        # First try direct JSON parsing
        return json.loads(js_obj_str)
    except:
        pass

    try:
        # Convert JS object notation to JSON
        # Replace single quotes with double quotes
        json_str = js_obj_str.replace("'", '"')

        # Add quotes around unquoted keys (e.g., {path: "data"} -> {"path": "data"})
        json_str = re.sub(r'(\w+):', r'"\1":', json_str)

        # Parse the JSON
        return json.loads(json_str)
    except Exception as e:
        print(f"Warning: Could not parse parameters '{js_obj_str}': {e}")
        return {}

def convert_to_n8n_json(typescript_code):
    """Convert TypeScript DSL to n8n JSON format"""

    nodes = []
    connections = {}
    workflow_name = "Generated Workflow"

    # Extract workflow name
    name_match = re.search(r"new Workflow\(['\"](.*?)['\"]\)", typescript_code)
    if name_match:
        workflow_name = name_match.group(1)

    # Extract node definitions with improved parameter parsing
    node_pattern = r'const\s+(\w+)\s*=\s*workflow\.add\([\'"]([^\'\"]+)[\'"](?:,\s*(\{[^}]*\}))?\)'
    node_matches = re.finditer(node_pattern, typescript_code)

    node_map = {}  # variable name -> node id
    position_y = 250
    position_x = 300

    for i, match in enumerate(node_matches):
        var_name = match.group(1)
        node_type = match.group(2)
        params_str = match.group(3) if match.group(3) else "{}"

        # Convert JavaScript object notation to valid JSON
        parameters = parse_js_object(params_str)

        node_id = str(i)
        node_map[var_name] = node_id

        nodes.append({
            "id": node_id,
            "name": var_name,
            "type": node_type,
            "typeVersion": 1,
            "position": [position_x, position_y],
            "parameters": parameters
        })

        position_x += 300

    # Extract connections
    connection_pattern = r'(\w+)\.to\((\w+)\)'
    connection_matches = re.finditer(connection_pattern, typescript_code)

    for match in connection_matches:
        source_var = match.group(1)
        target_var = match.group(2)

        if source_var in node_map and target_var in node_map:
            source_id = node_map[source_var]
            target_id = node_map[target_var]

            # Find source node name
            source_node = next((n for n in nodes if n["id"] == source_id), None)
            if source_node:
                source_name = source_node["name"]

                if source_name not in connections:
                    connections[source_name] = {"main": [[]] }

                connections[source_name]["main"][0].append({
                    "node": target_var,
                    "type": "main",
                    "index": 0
                })

    return {
        "name": workflow_name,
        "nodes": nodes,
        "connections": connections,
        "active": False,
        "settings": {}
    }

# ==============================================================================
# VISUALIZATION
# ==============================================================================

def create_visualization(n8n_json):
    """Create HTML visualization of the workflow"""

    nodes = n8n_json.get("nodes", [])
    connections = n8n_json.get("connections", {})

    if not nodes:
        return "<div style='padding:20px;text-align:center;color:#666;'>No nodes found in workflow</div>"

    html = """
    <div style="font-family: Arial, sans-serif; padding: 20px; background: #f5f5f5; border-radius: 8px;">
        <h3 style="margin-top:0; color: #ff6d5a;">📊 Workflow Visualization</h3>
        <div style="display: flex; flex-direction: column; gap: 15px;">
    """

    # Display nodes
    for i, node in enumerate(nodes):
        node_name = node.get("name", f"Node{i}")
        node_type = node.get("type", "unknown").split(".")[-1]
        params = node.get("parameters", {})

        # Count outgoing connections
        outgoing = 0
        for source, conns in connections.items():
            if source == node_name:
                outgoing = len(conns.get("main", [[]])[0])

        # Node card
        html += f"""
        <div style="background: white; padding: 15px; border-radius: 8px; border-left: 4px solid #ff6d5a; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
            <div style="display: flex; justify-content: space-between; align-items: center;">
                <div>
                    <div style="font-weight: bold; font-size: 16px; color: #333;">{node_name}</div>
                    <div style="color: #666; font-size: 14px; margin-top: 4px;">
                        <code style="background: #f0f0f0; padding: 2px 6px; border-radius: 3px;">{node_type}</code>
                    </div>
                </div>
                <div style="text-align: right; color: #999; font-size: 12px;">
                    Node #{i+1}
                </div>
            </div>
        """

        # Show key parameters
        if params:
            html += "<div style='margin-top: 10px; font-size: 13px; color: #555;'>"
            html += "<strong>Parameters:</strong><br>"
            for key, value in list(params.items())[:3]:  # Show first 3 params
                value_str = str(value)[:50]
                html += f"&nbsp;&nbsp;• {key}: <code style='background:#f9f9f9;padding:1px 4px;'>{value_str}</code><br>"
            html += "</div>"

        # Show connections
        if outgoing > 0:
            html += f"<div style='margin-top: 8px; color: #4CAF50; font-size: 12px;'>→ {outgoing} connection(s)</div>"

        html += "</div>"

        # Show arrow between nodes
        if i < len(nodes) - 1:
            html += "<div style='text-align: center; color: #999; font-size: 20px;'>↓</div>"

    html += """
        </div>
        <div style="margin-top: 15px; padding: 10px; background: #e3f2fd; border-radius: 4px; font-size: 12px; color: #1976d2;">
            💡 <strong>Tip:</strong> Copy the n8n JSON and import it directly into your n8n instance!
        </div>
    </div>
    """

    return html

# ==============================================================================
# GRADIO INTERFACE
# ==============================================================================

def create_ui():
    """Create Gradio interface"""

    with gr.Blocks(title="n8n Workflow Generator", theme=gr.themes.Soft()) as demo:

        gr.Markdown("""
        # 🚀 n8n Workflow Generator

        Generate n8n workflows using natural language! Powered by fine-tuned **Qwen2.5-Coder-1.5B**.

        ### How to use:
        1. Describe your workflow in plain English
        2. Click "Generate Workflow"
        3. Copy the generated code or n8n JSON
        4. Import into your n8n instance
        """)

        with gr.Row():
            with gr.Column(scale=1):
                prompt_input = gr.Textbox(
                    label="Workflow Description",
                    placeholder="Example: Create a webhook that receives data, filters active users, and sends to Slack",
                    lines=3
                )

                with gr.Row():
                    temperature = gr.Slider(
                        minimum=0.0,
                        maximum=1.0,
                        value=0.5,
                        step=0.1,
                        label="Temperature (creativity)",
                        info="Lower = more consistent, Higher = more creative"
                    )
                    max_tokens = gr.Slider(
                        minimum=256,
                        maximum=2048,
                        value=1024,
                        step=128,
                        label="Max tokens",
                        info="Maximum length of generated code"
                    )

                generate_btn = gr.Button("🎯 Generate Workflow", variant="primary", size="lg")

                gr.Markdown("""
                ### 📝 Example Prompts:
                - *Create a webhook that sends data to Slack*
                - *Schedule that runs daily and backs up database to Google Drive*
                - *Webhook receives form data, validates email, saves to Airtable*
                - *Monitor RSS feed and post new items to Twitter*
                """)

            with gr.Column(scale=1):
                visualization_output = gr.HTML(label="Visual Workflow")

        with gr.Row():
            with gr.Column():
                code_output = gr.Code(
                    label="Generated TypeScript Code",
                    language="typescript",
                    lines=15
                )

            with gr.Column():
                json_output = gr.Code(
                    label="n8n JSON (import this into n8n)",
                    language="json",
                    lines=15
                )

        # Examples
        gr.Examples(
            examples=[
                ["Create a webhook that sends data to Slack"],
                ["Build a workflow that fetches GitHub issues and sends daily summary email"],
                ["Webhook receives order, if amount > $1000 send to priority queue, else standard processing"],
                ["Schedule that runs every Monday, fetches data from API, transforms it, and updates Google Sheets"],
                ["Monitor RSS feeds, remove duplicates, and post to Twitter"],
            ],
            inputs=prompt_input
        )

        # Event handler
        generate_btn.click(
            fn=generate_workflow,
            inputs=[prompt_input, temperature, max_tokens],
            outputs=[code_output, json_output, visualization_output]
        )

        gr.Markdown("""
        ---
        ### ℹ️ About

        This model achieved **92.4% accuracy** on diverse n8n workflow generation tasks.

        **Model:** Fine-tuned Qwen2.5-Coder-1.5B with LoRA
        **Training:** 247 curated workflow examples
        **Performance:** Production-ready quality

        [🤗 Model Card](https://huggingface.co/{}) | [📊 GitHub](https://github.com/yourusername/n8n-generator)
        """.format(MODEL_REPO))

    return demo

# ==============================================================================
# LAUNCH
# ==============================================================================

if __name__ == "__main__":
    demo = create_ui()
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False
    )