Spaces:
Running
Running
| import asyncio | |
| import os | |
| import uuid | |
| from prisma import Prisma | |
| from qdrant_client import QdrantClient | |
| from qdrant_client.http.models import Distance, VectorParams, PointStruct | |
| from PIL import Image | |
| import requests | |
| import io | |
| import torch | |
| from transformers import CLIPProcessor, CLIPModel | |
| # Configuration | |
| QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333") | |
| COLLECTION_NAME = "booth_items" | |
| HEADERS = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"} | |
| # Sample Data | |
| SAMPLE_ITEMS = [ | |
| { | |
| "title": "幽狐族の娘「桔梗」専用【3D衣装モデル】Royal Dress", | |
| "price": 2000, | |
| "shopName": "Mame-Shop", | |
| "boothUrl": "https://booth.pm/ja/items/1234567", | |
| "thumbnailUrl": "https://images.booth.pm/c/cc495213-9799-4d69-90bc-2c70034a7429/18a29a43-6c7e-4b72-9e8d-8a5840d892d1/thumbnail_600x600.png" | |
| }, | |
| { | |
| "title": "【萌専用】ゴスロリメイド服", | |
| "price": 1800, | |
| "shopName": "Alice-Atelier", | |
| "boothUrl": "https://booth.pm/ja/items/2345678", | |
| "thumbnailUrl": "https://images.booth.pm/c/7951d3b4-4b52-4e8a-8a58-8a8b1c1d1e1f/1a2b3c4d-5e6f-7a8b-9c0d-1e1f2a3b4c5d/thumbnail_600x600.png" | |
| } | |
| ] | |
| async def seed(): | |
| prisma = Prisma() | |
| await prisma.connect() | |
| # Local mode: no server needed | |
| qdrant = QdrantClient(path="qdrant_local") | |
| # Initialize CLIP model for embedding generation | |
| print("Loading CLIP model...") | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device) | |
| processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") | |
| # Ensure Qdrant collection | |
| print(f"Ensuring Qdrant collection: {COLLECTION_NAME}") | |
| collections = qdrant.get_collections() | |
| if not any(c.name == COLLECTION_NAME for c in collections.collections): | |
| qdrant.create_collection( | |
| collection_name=COLLECTION_NAME, | |
| vectors_config=VectorParams(size=512, distance=Distance.COSINE), | |
| ) | |
| for item in SAMPLE_ITEMS: | |
| print(f"Processing: {item['title']}") | |
| # 1. Download image and generate embedding | |
| try: | |
| response = requests.get(item['thumbnailUrl'], headers=HEADERS, timeout=10) | |
| response.raise_for_status() | |
| image = Image.open(io.BytesIO(response.content)).convert("RGB") | |
| inputs = processor(images=image, return_tensors="pt").to(device) | |
| with torch.no_grad(): | |
| outputs = model.get_image_features(**inputs) | |
| # Robustly handle different CLIP output formats | |
| if hasattr(outputs, "image_embeds"): | |
| features = outputs.image_embeds | |
| else: | |
| features = outputs | |
| # Normalize and convert to list | |
| features = features / features.norm(p=2, dim=-1, keepdim=True) | |
| vector = features.cpu().numpy()[0].tolist() | |
| # 2. Save to PostgreSQL via Prisma | |
| # First, ensure shop exists | |
| shop = await prisma.shop.upsert( | |
| where={'url': f"https://{item['shopName'].lower()}.booth.pm"}, | |
| data={ | |
| 'create': { | |
| 'name': item['shopName'], | |
| 'url': f"https://{item['shopName'].lower()}.booth.pm" | |
| }, | |
| 'update': {'name': item['shopName']} | |
| } | |
| ) | |
| # Create product | |
| product = await prisma.product.create( | |
| data={ | |
| 'shopId': shop.id, | |
| 'title': item['title'], | |
| 'price': item['price'], | |
| 'thumbnailUrl': item['thumbnailUrl'] | |
| } | |
| ) | |
| # 3. Save to Qdrant | |
| vector_id = str(uuid.uuid4()) | |
| qdrant.upsert( | |
| collection_name=COLLECTION_NAME, | |
| points=[ | |
| PointStruct( | |
| id=vector_id, | |
| vector=vector, | |
| payload={ | |
| "productId": product.id, | |
| "title": item['title'], | |
| "price": item['price'], | |
| "shopName": item['shopName'], | |
| "boothUrl": item['boothUrl'], | |
| "thumbnailUrl": item['thumbnailUrl'] | |
| } | |
| ) | |
| ] | |
| ) | |
| # Link vectorId back to DB image if we were storing images specifically | |
| # For MVP, we use the vector payload for display | |
| print(f"Successfully seeded: {item['title']}") | |
| except Exception as e: | |
| print(f"Error seeding {item['title']}: {e}") | |
| await prisma.disconnect() | |
| print("Seeding complete.") | |
| if __name__ == "__main__": | |
| asyncio.run(seed()) | |