Spaces:

Bhishaj
/

MerchFlow-AI

Sleeping

App Files Files Community

Gaurav vashistha commited on 17 days ago

Commit

842ca78

1 Parent(s): c6137f0

Final Release: Updated README and Sync

Browse files

Files changed (19) hide show

README.md +23 -10
check_basic.py +20 -0
check_gemini.py +26 -0
check_gemini_clean.py +24 -0
check_groq_models.py +14 -0
check_groq_vision.py +36 -0
check_idefics.py +33 -0
check_idefics_raw.py +29 -0
check_idefics_v2.py +31 -0
check_idefics_v3.py +30 -0
check_llama.py +33 -0
check_llava.py +33 -0
check_models_list.py +15 -0
check_qwen.py +48 -0
check_qwen_raw.py +52 -0
scan_vision_models.py +44 -0
start_n8n.ps1 +7 -0
test_gemini_fix.py +48 -0
test_vision_hf.py +41 -0

README.md CHANGED Viewed

@@ -1,10 +1,23 @@
----
-title: MerchFlow AI
-emoji: 🚀
-colorFrom: blue
-colorTo: indigo
-sdk: docker
-pinned: false
----
-# MerchFlow AI
-An AI-powered merchandising agent.

+# MerchFlow AI - Open Source E-Commerce Agent
+![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)
+![Python](https://img.shields.io/badge/Made%20with-Python-blue.svg)
+## About
+MerchFlow AI is a **Multi-Agent System built in Pure Python (No LangChain)** that automates product listing generation using Enterprise-Grade Computer Vision. It leverages advanced visual analysis to streamline e-commerce workflows.
+## Architecture
+The system operates through a coordinated pipeline:
+1.  **Input Image** → Raw processing
+2.  **Visual Analyst (Gemini 1.5)** → Extracts visual features and metadata
+3.  **Manager Agent** → Orchestrates data flow and decision making
+4.  **n8n Webhook (Self-Hosted)** → Triggers downstream automation and integration
+## Open Source Notice
+This project is open source under the MIT License. Feel free to fork and contribute.
+## Setup
+To run this project, you need to configure the following Environment Variables:
+*   `GOOGLE_API_KEY`: Your Google Gemini API Key.
+*   `N8N_WEBHOOK_URL`: The URL for your self-hosted n8n webhook connector.

check_basic.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import os
+from huggingface_hub import InferenceClient
+from dotenv import load_dotenv
+load_dotenv()
+api_key = os.getenv("HF_TOKEN")
+client = InferenceClient(api_key=api_key)
+print(f"Testing token with microsoft/resnet-50")
+try:
+    # Pass the URL directly as the input (InferenceClient handles URLs for image tasks)
+    result = client.image_classification(
+        model="microsoft/resnet-50",
+        image="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true"
+    )
+    print("Success:", result)
+except Exception as e:
+    print("Failed:", e)

check_gemini.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import os
+import google.generativeai as genai
+from dotenv import load_dotenv
+load_dotenv()
+api_key = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
+genai.configure(api_key=api_key)
+print("Listing available Gemini models...")
+try:
+    for m in genai.list_models():
+        if 'generateContent' in m.supported_generation_methods:
+            print(m.name)
+except Exception as e:
+    print(f"List models failed: {e}")
+model_name = "gemini-1.5-flash"
+print(f"\nTesting model: {model_name}")
+try:
+    model = genai.GenerativeModel(model_name)
+    response = model.generate_content("Hello, can you see this?")
+    print("Response:", response.text)
+except Exception as e:
+    print(f"Test failed: {e}")

check_gemini_clean.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import os
+import google.generativeai as genai
+from dotenv import load_dotenv
+load_dotenv()
+api_key = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
+genai.configure(api_key=api_key)
+candidates = [
+    "gemini-2.0-flash",
+    "gemini-2.0-flash-exp",
+    "models/gemini-2.0-flash"
+]
+for model_name in candidates:
+    print(f"\nTesting model: {model_name}")
+    try:
+        model = genai.GenerativeModel(model_name)
+        response = model.generate_content("Hello")
+        print(f"✅ Success with {model_name}: {response.text}")
+        break
+    except Exception as e:
+        print(f"❌ Failed with {model_name}: {e}")

check_groq_models.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import os
+from groq import Groq
+from dotenv import load_dotenv
+load_dotenv()
+try:
+    client = Groq(api_key=os.getenv("GROQ_API_KEY"))
+    models = client.models.list()
+    print("Available Models:")
+    for model in models.data:
+        print(f"- {model.id}")
+except Exception as e:
+    print(f"Error listing models: {e}")

check_groq_vision.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import os
+from groq import Groq
+from dotenv import load_dotenv
+import base64
+load_dotenv()
+client = Groq(api_key=os.getenv("GROQ_API_KEY"))
+model = "llama-3.2-11b-vision-preview"
+print(f"Testing Groq Vision model: {model}")
+# Test 1: Image URL
+print("\n--- Test 1: Image URL ---")
+try:
+    image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true"
+    completion = client.chat.completions.create(
+        model=model,
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "What's in this image?"},
+                    {"type": "image_url", "image_url": {"url": image_url}},
+                ],
+            }
+        ],
+        temperature=1,
+        max_tokens=1024,
+        top_p=1,
+        stream=False,
+        stop=None,
+    )
+    print("Response:", completion.choices[0].message.content)
+except Exception as e:
+    print("Groq Vision failed:", e)

check_idefics.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import os
+from huggingface_hub import InferenceClient
+from dotenv import load_dotenv
+load_dotenv()
+api_key = os.getenv("HF_TOKEN")
+client = InferenceClient(api_key=api_key)
+model = "HuggingFaceM4/idefics2-8b"
+print(f"Testing model: {model}")
+# Test 1: Image URL
+print("\n--- Test 1: Image URL ---")
+try:
+    image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true"
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image_url", "image_url": {"url": image_url}},
+                {"type": "text", "text": "What is in this image?"}
+            ]
+        }
+    ]
+    completion = client.chat.completions.create(
+        model=model,
+        messages=messages,
+        max_tokens=100
+    )
+    print("Response:", completion.choices[0].message.content)
+except Exception as e:
+    print("Image URL failed:", e)

check_idefics_raw.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import os
+import requests
+import json
+from dotenv import load_dotenv
+load_dotenv()
+api_key = os.getenv("HF_TOKEN")
+model = "HuggingFaceM4/idefics2-8b"
+url = f"https://router.huggingface.co/models/{model}"
+headers = {"Authorization": f"Bearer {api_key}"}
+print(f"Testing URL: {url}")
+# Test A: Simple text inputs
+print("\n--- Test A: Simple Text ---")
+response = requests.post(url, headers=headers, json={"inputs": "Hello"})
+print(f"Status: {response.status_code}")
+print("Response:", response.text)
+# Test B: Formatted inputs (Standard for some VLM APIs)
+# Often they accept { "inputs": "User: ...", "parameters": ... }
+print("\n--- Test B: Formatted Prompt ---")
+image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true"
+prompt = f"User: ![]({image_url}) Describe this image.<end_of_utterance>\nAssistant:"
+response = requests.post(url, headers=headers, json={"inputs": prompt, "parameters": {"max_new_tokens": 50}})
+print(f"Status: {response.status_code}")
+print("Response:", response.text)

check_idefics_v2.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import os
+from huggingface_hub import InferenceClient
+from dotenv import load_dotenv
+load_dotenv()
+api_key = os.getenv("HF_TOKEN")
+client = InferenceClient(api_key=api_key)
+model = "HuggingFaceM4/idefics2-8b"
+print(f"Testing model: {model}")
+image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true"
+# Format for Idefics2:
+# User: ![](<image_url>) <text><end_of_utterance>\nAssistant:
+prompt = f"User: ![]({image_url}) Describe this image.<end_of_utterance>\nAssistant:"
+print(f"\n--- Testing with text_generation and specific prompt ---")
+print(f"Prompt: {prompt}")
+try:
+    # Use text_generation for models that don't support chat
+    response = client.text_generation(
+        prompt=prompt,
+        model=model,
+        max_new_tokens=100
+    )
+    print("Response:", response)
+except Exception as e:
+    print("Failed:", e)

check_idefics_v3.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import os
+import traceback
+from huggingface_hub import InferenceClient
+from dotenv import load_dotenv
+load_dotenv()
+api_key = os.getenv("HF_TOKEN")
+client = InferenceClient(api_key=api_key)
+model = "HuggingFaceM4/idefics2-8b"
+print(f"Testing model: {model}")
+print("\n--- Test 1: Image to Text (Captioning) ---")
+try:
+    # This might work if the API treats it as captioning
+    res = client.image_to_text(
+        "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true",
+        model=model
+    )
+    print("Response:", res)
+except Exception:
+    traceback.print_exc()
+print("\n--- Test 2: Text Generation (Simple) ---")
+try:
+    res = client.text_generation("describe a car", model=model, max_new_tokens=50)
+    print("Response:", res)
+except Exception:
+    traceback.print_exc()

check_llama.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import os
+from huggingface_hub import InferenceClient
+from dotenv import load_dotenv
+load_dotenv()
+api_key = os.getenv("HF_TOKEN")
+client = InferenceClient(api_key=api_key)
+model = "meta-llama/Llama-3.2-11B-Vision-Instruct"
+print(f"Testing model: {model}")
+# Test 1: Image URL (Llama Vision)
+print("\n--- Test 1: Image URL ---")
+try:
+    image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true"
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image_url", "image_url": {"url": image_url}},
+                {"type": "text", "text": "What is in this image?"}
+            ]
+        }
+    ]
+    completion = client.chat.completions.create(
+        model=model,
+        messages=messages,
+        max_tokens=100
+    )
+    print("Response:", completion.choices[0].message.content)
+except Exception as e:
+    print("Image URL failed:", e)

check_llava.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import os
+from huggingface_hub import InferenceClient
+from dotenv import load_dotenv
+load_dotenv()
+api_key = os.getenv("HF_TOKEN")
+client = InferenceClient(api_key=api_key)
+model = "llava-hf/llava-1.5-7b-hf"
+print(f"Testing model: {model}")
+# Test 1: Image URL
+print("\n--- Test 1: Image URL ---")
+try:
+    image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true"
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image_url", "image_url": {"url": image_url}},
+                {"type": "text", "text": "What is in this image?"}
+            ]
+        }
+    ]
+    completion = client.chat.completions.create(
+        model=model,
+        messages=messages,
+        max_tokens=100
+    )
+    print("Response:", completion.choices[0].message.content)
+except Exception as e:
+    print("Image URL failed:", e)

check_models_list.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import google.generativeai as genai
+import os
+from dotenv import load_dotenv
+load_dotenv()
+api_key = os.getenv("GEMINI_API_KEY")
+if not api_key:
+    print("❌ API Key not found")
+else:
+    genai.configure(api_key=api_key)
+    print("Listing available models...")
+    for m in genai.list_models():
+        if 'generateContent' in m.supported_generation_methods:
+            print(m.name)

check_qwen.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import os
+from huggingface_hub import InferenceClient
+from dotenv import load_dotenv
+load_dotenv()
+api_key = os.getenv("HF_TOKEN")
+client = InferenceClient(api_key=api_key)
+model = "Qwen/Qwen2-VL-7B-Instruct"
+print(f"Testing model: {model}")
+# Test 1: Text only
+print("\n--- Test 1: Text Only ---")
+try:
+    messages = [
+        {"role": "user", "content": "Hello, are you working?"}
+    ]
+    completion = client.chat.completions.create(
+        model=model,
+        messages=messages,
+        max_tokens=100
+    )
+    print("Response:", completion.choices[0].message.content)
+except Exception as e:
+    print("Text only failed:", e)
+# Test 2: Image (using a public URL to avoid base64 issues first)
+print("\n--- Test 2: Image URL ---")
+try:
+    image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true"
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image_url", "image_url": {"url": image_url}},
+                {"type": "text", "text": "What is in this image?"}
+            ]
+        }
+    ]
+    completion = client.chat.completions.create(
+        model=model,
+        messages=messages,
+        max_tokens=100
+    )
+    print("Response:", completion.choices[0].message.content)
+except Exception as e:
+    print("Image URL failed:", e)

check_qwen_raw.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import os
+import requests
+import json
+from dotenv import load_dotenv
+load_dotenv()
+api_key = os.getenv("HF_TOKEN")
+model = "Qwen/Qwen2-VL-7B-Instruct"
+# Update URL to router
+url = f"https://router.huggingface.co/models/{model}"
+headers = {"Authorization": f"Bearer {api_key}"}
+print(f"Testing URL: {url}")
+# Test 1: Simple text generation payload (inputs string)
+data_text = {
+    "inputs": "Hello",
+    "parameters": {"max_new_tokens": 50}
+}
+print("\n--- Test 1: Text Generation (inputs string) ---")
+response = requests.post(url, headers=headers, json=data_text)
+print(f"Status: {response.status_code}")
+print("Response:", response.text)
+# Test 2: VQA format
+data_vqa = {
+    "inputs": {
+        "image": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true",
+        "question": "What is in this image?"
+    }
+}
+print("\n--- Test 2: VQA Format ---")
+response = requests.post(url, headers=headers, json=data_vqa)
+print(f"Status: {response.status_code}")
+print("Response:", response.text)
+# Test 3: Chat Completions API (OpenAI style)
+url_chat = f"https://router.huggingface.co/models/{model}/v1/chat/completions"
+print(f"\nTesting URL: {url_chat}")
+data_chat = {
+    "model": model, # Sometimes required in body
+    "messages": [
+         {"role": "user", "content": "Hello"}
+    ],
+    "max_tokens": 50
+}
+print("\n--- Test 3: Chat Completion ---")
+response = requests.post(url_chat, headers=headers, json=data_chat)
+print(f"Status: {response.status_code}")
+print("Response:", response.text)

scan_vision_models.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import os
+import requests
+import json
+from dotenv import load_dotenv
+load_dotenv()
+api_key = os.getenv("HF_TOKEN")
+headers = {"Authorization": f"Bearer {api_key}"}
+candidates = [
+    "HuggingFaceM4/idefics2-8b",
+    "HuggingFaceM4/idefics2-8b-chatty",
+    "llava-hf/llava-1.5-7b-hf",
+    "llava-hf/llava-v1.6-mistral-7b-hf",
+    "microsoft/Phi-3-vision-128k-instruct",
+    "NousResearch/Nous-Hermes-2-Vision-Alpha",
+    "OpenGVLab/InternVL-Chat-V1-5",
+    "Qwen/Qwen2.5-VL-7B-Instruct",
+    "google/paligemma-3b-mix-224"
+]
+print("Scanning for working Serverless Vision Models...\n")
+for model in candidates:
+    url = f"https://router.huggingface.co/models/{model}"
+    print(f"Testing: {model}")
+    try:
+        # Simple probe payload
+        response = requests.post(url, headers=headers, json={"inputs": "Hello"})
+        if response.status_code == 200:
+             print(f"✅ WORKS! {model} (Status: 200)")
+             print(f"Response: {response.text[:100]}...")
+        elif response.status_code == 400:
+             # 400 might mean it Exists but input format is wrong (which is good!)
+             print(f"⚠️  EXISTS but 400 (Bad Request): {model}")
+             print(f"Response: {response.text[:100]}...")
+        elif response.status_code == 404:
+             print(f"❌ 404 Not Found: {model}")
+        else:
+             print(f"❌ Error {response.status_code}: {model}")
+    except Exception as e:
+        print(f"❌ Exception: {e}")
+    print("-" * 30)

start_n8n.ps1 ADDED Viewed

	@@ -0,0 +1,7 @@

+Write-Host "🚀 Killing stuck Node.js processes..." -ForegroundColor Yellow
+Stop-Process -Name "node" -Force -ErrorAction SilentlyContinue
+Write-Host "✅ Ports cleared." -ForegroundColor Green
+Write-Host "🚀 Starting n8n with Tunnel..." -ForegroundColor Yellow
+# This starts n8n and keeps the window open
+npx n8n start --tunnel

test_gemini_fix.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import os
+import json
+from agents.visual_analyst import VisualAnalyst
+def test_gemini_visual_analyst():
+    print("🚀 Starting Visual Analyst Test (Gemini 1.5 Flash)...")
+    # 1. Initialize
+    try:
+        agent = VisualAnalyst()
+    except Exception as e:
+        print(f"❌ Failed to initialize VisualAnalyst: {e}")
+        return
+    # 2. Define Image Path (Use a known existing image or creating dummy if needed,
+    #    but better to use one if available. previous context showed 'test_image.jpg')
+    image_path = "test_image.jpg"
+    if not os.path.exists(image_path):
+        print(f"⚠️ {image_path} not found. Using 'screen.jpg' if available or skipping.")
+        image_path = "screen.jpg"
+        if not os.path.exists(image_path):
+            print("❌ No test image found.")
+            return
+    print(f"📸 Analyzing image: {image_path}")
+    # 3. Analyze
+    try:
+        result = agent.analyze_image(image_path)
+        print("✅ Raw Result:", result)
+        # 4. Verify Structure
+        required_keys = ["main_color", "product_type", "design_style", "visual_features"]
+        missing = [k for k in required_keys if k not in result]
+        if missing:
+            print(f"❌ Missing keys in JSON: {missing}")
+        else:
+            print("✅ JSON Structure Validated")
+            print("🎨 Main Color:", result.get("main_color"))
+            print("📦 Product Type:", result.get("product_type"))
+    except Exception as e:
+        print(f"❌ Analysis failed with exception: {e}")
+if __name__ == "__main__":
+    test_gemini_visual_analyst()

test_vision_hf.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import os
+import sys
+from dotenv import load_dotenv
+# Load env variables (for HF_TOKEN)
+load_dotenv()
+# Add project root to path so we can import agents
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+from agents.visual_analyst import VisualAnalyst
+def test_hf_vision():
+    image_path = "test_image.jpg"
+    if not os.path.exists(image_path):
+        print(f"❌ '{image_path}' not found. Please provide a test image.")
+        return
+    print(f"🔍 Testing Hugging Face Qwen2-VL Vision Analyst with {image_path}...")
+    try:
+        analyst = VisualAnalyst()
+        result = analyst.analyze_image(image_path)
+        print("\n✅ Analysis Result:")
+        print(result)
+        required_keys = ["main_color", "product_type", "design_style", "visual_features"]
+        missing_keys = [k for k in required_keys if k not in result]
+        if missing_keys:
+            print(f"\n❌ Missing keys in response: {missing_keys}")
+        else:
+            print("\n🎉 Output format verified!")
+    except Exception as e:
+        print(f"\n❌ Test Failed: {e}")
+if __name__ == "__main__":
+    test_hf_vision()