github-actions
Deploy to HF (clean history with LFS)
a06f06c
import asyncio
import os
import uuid
from prisma import Prisma
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams, PointStruct
from PIL import Image
import requests
import io
import torch
from transformers import CLIPProcessor, CLIPModel
# Configuration
QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
COLLECTION_NAME = "booth_items"
HEADERS = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"}
# Sample Data
SAMPLE_ITEMS = [
{
"title": "幽狐族の娘「桔梗」専用【3D衣装モデル】Royal Dress",
"price": 2000,
"shopName": "Mame-Shop",
"boothUrl": "https://booth.pm/ja/items/1234567",
"thumbnailUrl": "https://images.booth.pm/c/cc495213-9799-4d69-90bc-2c70034a7429/18a29a43-6c7e-4b72-9e8d-8a5840d892d1/thumbnail_600x600.png"
},
{
"title": "【萌専用】ゴスロリメイド服",
"price": 1800,
"shopName": "Alice-Atelier",
"boothUrl": "https://booth.pm/ja/items/2345678",
"thumbnailUrl": "https://images.booth.pm/c/7951d3b4-4b52-4e8a-8a58-8a8b1c1d1e1f/1a2b3c4d-5e6f-7a8b-9c0d-1e1f2a3b4c5d/thumbnail_600x600.png"
}
]
async def seed():
prisma = Prisma()
await prisma.connect()
# Local mode: no server needed
qdrant = QdrantClient(path="qdrant_local")
# Initialize CLIP model for embedding generation
print("Loading CLIP model...")
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
# Ensure Qdrant collection
print(f"Ensuring Qdrant collection: {COLLECTION_NAME}")
collections = qdrant.get_collections()
if not any(c.name == COLLECTION_NAME for c in collections.collections):
qdrant.create_collection(
collection_name=COLLECTION_NAME,
vectors_config=VectorParams(size=512, distance=Distance.COSINE),
)
for item in SAMPLE_ITEMS:
print(f"Processing: {item['title']}")
# 1. Download image and generate embedding
try:
response = requests.get(item['thumbnailUrl'], headers=HEADERS, timeout=10)
response.raise_for_status()
image = Image.open(io.BytesIO(response.content)).convert("RGB")
inputs = processor(images=image, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model.get_image_features(**inputs)
# Robustly handle different CLIP output formats
if hasattr(outputs, "image_embeds"):
features = outputs.image_embeds
else:
features = outputs
# Normalize and convert to list
features = features / features.norm(p=2, dim=-1, keepdim=True)
vector = features.cpu().numpy()[0].tolist()
# 2. Save to PostgreSQL via Prisma
# First, ensure shop exists
shop = await prisma.shop.upsert(
where={'url': f"https://{item['shopName'].lower()}.booth.pm"},
data={
'create': {
'name': item['shopName'],
'url': f"https://{item['shopName'].lower()}.booth.pm"
},
'update': {'name': item['shopName']}
}
)
# Create product
product = await prisma.product.create(
data={
'shopId': shop.id,
'title': item['title'],
'price': item['price'],
'thumbnailUrl': item['thumbnailUrl']
}
)
# 3. Save to Qdrant
vector_id = str(uuid.uuid4())
qdrant.upsert(
collection_name=COLLECTION_NAME,
points=[
PointStruct(
id=vector_id,
vector=vector,
payload={
"productId": product.id,
"title": item['title'],
"price": item['price'],
"shopName": item['shopName'],
"boothUrl": item['boothUrl'],
"thumbnailUrl": item['thumbnailUrl']
}
)
]
)
# Link vectorId back to DB image if we were storing images specifically
# For MVP, we use the vector payload for display
print(f"Successfully seeded: {item['title']}")
except Exception as e:
print(f"Error seeding {item['title']}: {e}")
await prisma.disconnect()
print("Seeding complete.")
if __name__ == "__main__":
asyncio.run(seed())