Spaces:
Running
Running
| import os | |
| import uuid | |
| import torch | |
| from PIL import Image | |
| import requests | |
| import io | |
| from transformers import CLIPProcessor, CLIPModel | |
| from qdrant_client import QdrantClient | |
| from qdrant_client.http.models import Distance, VectorParams, PointStruct | |
| # Configuration | |
| COLLECTION_NAME = "booth_items" | |
| HEADERS = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"} | |
| # Sample Data | |
| SAMPLE_ITEMS = [ | |
| { | |
| "title": "幽狐族の娘「桔梗」専用【3D衣装モデル】Royal Dress", | |
| "price": 2000, | |
| "shopName": "Mame-Shop", | |
| "boothUrl": "https://booth.pm/ja/items/1234567", | |
| "thumbnailUrl": "https://picsum.photos/seed/royal_dress/600/600" | |
| }, | |
| { | |
| "title": "【萌専用】ゴスロリメイド服", | |
| "price": 1800, | |
| "shopName": "Alice-Atelier", | |
| "boothUrl": "https://booth.pm/ja/items/2345678", | |
| "thumbnailUrl": "https://picsum.photos/seed/maid_goth/600/600" | |
| } | |
| ] | |
| def seed_qdrant_only(): | |
| print("--- [DEBUG] Starting Simplified Seeding (Qdrant Only) ---") | |
| # Initialize CLIP | |
| print("Loading CLIP model...") | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device) | |
| processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") | |
| # Initialize Qdrant Local | |
| qdrant = QdrantClient(path="qdrant_local") | |
| # Ensure collection | |
| collections = qdrant.get_collections() | |
| if not any(c.name == COLLECTION_NAME for c in collections.collections): | |
| print(f"Creating collection: {COLLECTION_NAME}") | |
| qdrant.create_collection( | |
| collection_name=COLLECTION_NAME, | |
| vectors_config=VectorParams(size=512, distance=Distance.COSINE), | |
| ) | |
| for item in SAMPLE_ITEMS: | |
| print(f"Processing: {item['title']}") | |
| try: | |
| # 1. Download image | |
| response = requests.get(item['thumbnailUrl'], headers=HEADERS, timeout=15) | |
| response.raise_for_status() | |
| image = Image.open(io.BytesIO(response.content)).convert("RGB") | |
| # 2. Get embedding | |
| inputs = processor(images=image, return_tensors="pt").to(device) | |
| with torch.no_grad(): | |
| outputs = model.get_image_features(**inputs) | |
| # Extract tensor robustly | |
| if hasattr(outputs, "image_embeds"): | |
| features = outputs.image_embeds | |
| elif hasattr(outputs, "pooler_output"): | |
| features = outputs.pooler_output | |
| else: | |
| features = outputs[0] if isinstance(outputs, (list, tuple)) else outputs | |
| features = features / features.norm(p=2, dim=-1, keepdim=True) | |
| vector = features.cpu().numpy()[0].tolist() | |
| # 3. Save to Qdrant | |
| qdrant.upsert( | |
| collection_name=COLLECTION_NAME, | |
| points=[ | |
| PointStruct( | |
| id=str(uuid.uuid4()), | |
| vector=vector, | |
| payload={ | |
| "title": item['title'], | |
| "price": item['price'], | |
| "shopName": item['shopName'], | |
| "boothUrl": item['boothUrl'], | |
| "thumbnailUrl": item['thumbnailUrl'] | |
| } | |
| ) | |
| ] | |
| ) | |
| print(f" -> Successfully seeded into Qdrant payload") | |
| except Exception as e: | |
| print(f" -> ERROR seeding {item['title']}: {e}") | |
| print("--- [DEBUG] Seeding Complete ---") | |
| if __name__ == "__main__": | |
| seed_qdrant_only() | |