afdx2 commited on
Commit
347833f
·
verified ·
1 Parent(s): 5f29f90

Upload 5 files

Browse files
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dockerfile
2
+ FROM python:3.10-slim
3
+
4
+ ENV PIP_NO_CACHE_DIR=1 \
5
+ PYTHONDONTWRITEBYTECODE=1 \
6
+ PYTHONUNBUFFERED=1
7
+
8
+ # deps del sistema minimas
9
+ RUN apt-get update && apt-get install -y --no-install-recommends \
10
+ build-essential git && rm -rf /var/lib/apt/lists/*
11
+
12
+ WORKDIR /app
13
+ COPY requirements.txt /app/
14
+ RUN pip install --upgrade pip && pip install -r requirements.txt
15
+
16
+ # copia todo (incluye tus .pt)
17
+ COPY . /app
18
+
19
+ # Hugging Face expone el puerto en $PORT; usamos ese valor
20
+ CMD sh -c 'uvicorn app:app --host 0.0.0.0 --port ${PORT:-7860}'
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ torch
4
+ torchvision
5
+ open-clip-torch
6
+ Pillow
server1.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ # comentarios sin tildes / sin enye
3
+
4
+ import os, io
5
+ from typing import Optional
6
+ import torch
7
+ from fastapi import FastAPI, File, UploadFile
8
+ from fastapi.responses import JSONResponse
9
+ from PIL import Image, UnidentifiedImageError
10
+ import open_clip
11
+ from torchvision import transforms as T
12
+
13
+ # limites basicos
14
+ torch.set_num_threads(1)
15
+ os.environ["OMP_NUM_THREADS"] = "1"
16
+ os.environ["MKL_NUM_THREADS"] = "1"
17
+
18
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
19
+ DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32
20
+
21
+ # nombres de ficheros (en el mismo repo)
22
+ MODEL_EMB_PATH = os.getenv("MODEL_EMB_PATH", "text_embeddings_modelos_h14.pt")
23
+ VERS_EMB_PATH = os.getenv("VERS_EMB_PATH", "text_embeddings_h14.pt")
24
+
25
+ app = FastAPI(title="CLIP H14 Vehicle API")
26
+
27
+ # ============== modelo CLIP ==============
28
+ clip_model, _, preprocess = open_clip.create_model_and_transforms(
29
+ "ViT-H-14", pretrained="laion2b_s32b_b79k"
30
+ )
31
+ clip_model = clip_model.to(device=DEVICE, dtype=DTYPE).eval()
32
+ for p in clip_model.parameters():
33
+ p.requires_grad = False
34
+
35
+ normalize = next(t for t in preprocess.transforms if isinstance(t, T.Normalize))
36
+ transform = T.Compose([
37
+ T.Resize((224, 224), interpolation=T.InterpolationMode.BICUBIC),
38
+ T.ToTensor(),
39
+ T.Normalize(mean=normalize.mean, std=normalize.std),
40
+ ])
41
+
42
+ # ============== embeddings ==============
43
+ def _ensure_label_list(x):
44
+ if isinstance(x, (list, tuple)):
45
+ return list(x)
46
+ if hasattr(x, "tolist"):
47
+ return [str(s) for s in x.tolist()]
48
+ return [str(s) for s in x]
49
+
50
+ def _load_embeddings(path: str):
51
+ ckpt = torch.load(path, map_location="cpu")
52
+ labels = _ensure_label_list(ckpt["labels"])
53
+ embeds = ckpt["embeddings"].to("cpu", dtype=torch.float16)
54
+ embeds = embeds / embeds.norm(dim=-1, keepdim=True)
55
+ return labels, embeds
56
+
57
+ model_labels, model_embeddings = _load_embeddings(MODEL_EMB_PATH) # "Marca Modelo"
58
+ version_labels, version_embeddings = _load_embeddings(VERS_EMB_PATH) # "Marca Modelo Version"
59
+
60
+ # ============== inferencia ==============
61
+ @torch.inference_mode()
62
+ def _encode_image(img_tensor: torch.Tensor) -> torch.Tensor:
63
+ if DEVICE == "cuda":
64
+ with torch.cuda.amp.autocast(dtype=DTYPE):
65
+ feats = clip_model.encode_image(img_tensor)
66
+ else:
67
+ feats = clip_model.encode_image(img_tensor)
68
+ return feats / feats.norm(dim=-1, keepdim=True)
69
+
70
+ def _predict_top(text_feats_dev: torch.Tensor, text_labels: list[str], image_tensor: torch.Tensor, topk: int = 1):
71
+ img_f = _encode_image(image_tensor)
72
+ sim = (100.0 * img_f @ text_feats_dev.T).softmax(dim=-1)[0]
73
+ vals, idxs = torch.topk(sim, k=topk)
74
+ return [{"label": text_labels[i], "confidence": round(float(v)*100.0, 2)} for v, i in zip(vals, idxs)]
75
+
76
+ def process_image_bytes(image_bytes: bytes):
77
+ if not image_bytes or len(image_bytes) < 128:
78
+ raise UnidentifiedImageError("imagen invalida")
79
+
80
+ img = Image.open(io.BytesIO(image_bytes))
81
+ if img.mode != "RGB":
82
+ img = img.convert("RGB")
83
+
84
+ img_tensor = transform(img).unsqueeze(0).to(device=DEVICE, dtype=DTYPE)
85
+
86
+ # paso 1: top-1 modelo
87
+ model_feats_dev = model_embeddings.to(DEVICE) if DEVICE == "cuda" else model_embeddings
88
+ top_model = _predict_top(model_feats_dev, model_labels, img_tensor, topk=1)[0]
89
+ modelo_full = top_model["label"]; conf_m = top_model["confidence"]
90
+
91
+ partes = modelo_full.split(" ", 1)
92
+ marca = partes[0] if len(partes) >= 1 else ""
93
+ modelo = partes[1] if len(partes) == 2 else ""
94
+
95
+ # paso 2: filtrar versiones por prefijo
96
+ matches = [(lab, idx) for idx, lab in enumerate(version_labels) if lab.startswith(modelo_full)]
97
+ if not matches:
98
+ return {
99
+ "marca": marca.upper(), "modelo": modelo.title(),
100
+ "version": "", "confianza_modelo": conf_m, "confianza_version": 0.0
101
+ }
102
+
103
+ idxs = [i for _, i in matches]
104
+ labels_sub = [lab for lab, _ in matches]
105
+ embeds_sub = version_embeddings[idxs].to(DEVICE) if DEVICE == "cuda" else version_embeddings[idxs]
106
+
107
+ # paso 3: top-1 version
108
+ top_ver = _predict_top(embeds_sub, labels_sub, img_tensor, topk=1)[0]
109
+ raw = top_ver["label"]; conf_v = top_ver["confidence"]
110
+
111
+ prefix = modelo_full + " "
112
+ ver = raw[len(prefix):] if raw.startswith(prefix) else raw
113
+ ver = ver.split(" ")[0]
114
+ if conf_v < 25.0:
115
+ ver = "Version no identificada con suficiente confianza"
116
+
117
+ return {
118
+ "marca": marca.upper(),
119
+ "modelo": modelo.title(),
120
+ "version": ver.title() if ver else "",
121
+ "confianza_modelo": conf_m,
122
+ "confianza_version": conf_v
123
+ }
124
+
125
+ # ============== endpoints ==============
126
+ @app.get("/")
127
+ def root():
128
+ return {"status": "ok", "device": DEVICE}
129
+
130
+ @app.post("/predict")
131
+ async def predict(front: UploadFile = File(...), back: Optional[UploadFile] = File(None)):
132
+ try:
133
+ front_bytes = await front.read()
134
+ if back is not None:
135
+ _ = await back.read()
136
+ result = process_image_bytes(front_bytes)
137
+ return JSONResponse(content={"code": 200, "data": result})
138
+ except Exception:
139
+ return JSONResponse(content={"code": 404, "data": {}}, status_code=200)
text_embeddings_h14.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d2840ca4c8c94783b39b166e8144f7fe084dc91d9aae38f1042c9732b385bf2
3
+ size 2346749
text_embeddings_modelos_h14.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4f6be76db1d261daf5ca4e29e491cee866100101c81cb0829da9e2c7b4afd6c
3
+ size 843829