| # precalcular_modelos_b16.py | |
| import torch | |
| import open_clip | |
| import pandas as pd | |
| # Solo marca + modelo | |
| df = pd.read_excel("modelos.xlsx") | |
| textos = (df["Marca"] + " " + df["Modelo"]).tolist() | |
| MODEL_NAME = "ViT-B-16" | |
| PRETRAINED = "openai" | |
| model, _, _ = open_clip.create_model_and_transforms(MODEL_NAME, pretrained=PRETRAINED) | |
| tokenizer = open_clip.get_tokenizer(MODEL_NAME) | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model = model.to(device) | |
| with torch.no_grad(): | |
| text_inputs = tokenizer(textos).to(device) # tensor en GPU o CPU | |
| text_features = model.encode_text(text_inputs) | |
| text_features /= text_features.norm(dim=-1, keepdim=True) | |
| torch.save({"embeddings": text_features.cpu(), "labels": textos}, "text_embeddings_modelos_b16.pt") | |
| print("Embeddings de modelos guardados en 'text_embeddings_modelos_b16.pt'") | |