Spaces:
Paused
Paused
| # clip_utils.py | |
| import logging | |
| import os | |
| from typing import Union, List | |
| import cn_clip.clip as clip | |
| import torch | |
| from PIL import Image | |
| from cn_clip.clip import load_from_name | |
| from config import MODELS_PATH | |
| # 配置日志 | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # 环境变量配置 | |
| MODEL_NAME_CN = os.environ.get('MODEL_NAME_CN', 'ViT-B-16') | |
| # 设备配置 | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| # 模型初始化 | |
| model = None | |
| preprocess = None | |
| def init_clip_model(): | |
| """初始化CLIP模型""" | |
| global model, preprocess | |
| try: | |
| model, preprocess = load_from_name(MODEL_NAME_CN, device=device, download_root=MODELS_PATH) | |
| model.eval() | |
| logger.info(f"CLIP model initialized successfully, dimension: {model.visual.output_dim}") | |
| return True | |
| except Exception as e: | |
| logger.error(f"CLIP model initialization failed: {e}") | |
| return False | |
| def is_clip_available(): | |
| """检查CLIP模型是否可用""" | |
| return model is not None and preprocess is not None | |
| def encode_image(image_path: str) -> torch.Tensor: | |
| """编码图片为向量""" | |
| if not is_clip_available(): | |
| raise RuntimeError("CLIP模型未初始化") | |
| image = Image.open(image_path).convert("RGB") | |
| image_tensor = preprocess(image).unsqueeze(0).to(device) | |
| with torch.no_grad(): | |
| features = model.encode_image(image_tensor) | |
| features = features / features.norm(p=2, dim=-1, keepdim=True) | |
| return features.cpu() | |
| def encode_text(text: Union[str, List[str]]) -> torch.Tensor: | |
| """编码文本为向量""" | |
| if not is_clip_available(): | |
| raise RuntimeError("CLIP模型未初始化") | |
| texts = [text] if isinstance(text, str) else text | |
| text_tokens = clip.tokenize(texts).to(device) | |
| with torch.no_grad(): | |
| features = model.encode_text(text_tokens) | |
| features = features / features.norm(p=2, dim=-1, keepdim=True) | |
| return features.cpu() | |