yossss2 commited on
Commit
cd058ce
·
verified ·
1 Parent(s): 69b70a3

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +23 -0
  2. app.py +139 -0
  3. requirements (1).txt +9 -0
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Dépendances système pour OpenCV
6
+ RUN apt-get update && apt-get install -y \
7
+ libglib2.0-0 \
8
+ libsm6 \
9
+ libxext6 \
10
+ libxrender-dev \
11
+ libgomp1 \
12
+ ffmpeg \
13
+ && rm -rf /var/lib/apt/lists/*
14
+
15
+ COPY requirements.txt .
16
+ RUN pip install --no-cache-dir -r requirements.txt
17
+
18
+ COPY app.py .
19
+
20
+ # HuggingFace Spaces utilise le port 7860
21
+ EXPOSE 7860
22
+
23
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ TUNILip+ — HuggingFace Spaces (Docker SDK)
3
+ Même pipeline que main.py, adapté pour HF Spaces.
4
+ 2GB RAM gratuit — suffisant pour VideoMAE (86M params ~330MB)
5
+ """
6
+
7
+ from fastapi import FastAPI, UploadFile, File, HTTPException
8
+ from fastapi.middleware.cors import CORSMiddleware
9
+ from fastapi.responses import JSONResponse
10
+ import numpy as np
11
+ import cv2
12
+ import torch
13
+ import tempfile
14
+ import os
15
+ import logging
16
+ from contextlib import asynccontextmanager
17
+
18
+ logging.basicConfig(level=logging.INFO)
19
+ logger = logging.getLogger("tunilip")
20
+
21
+ vmae_processor = None
22
+ vmae_model = None
23
+ DEVICE = None
24
+ VMAE_MODEL_ID = "MCG-NJU/videomae-base"
25
+ NUM_FRAMES = 16
26
+
27
+
28
+ @asynccontextmanager
29
+ async def lifespan(app: FastAPI):
30
+ global vmae_processor, vmae_model, DEVICE
31
+ logger.info(f"⏳ Chargement {VMAE_MODEL_ID} …")
32
+ try:
33
+ from transformers import VideoMAEModel, VideoMAEImageProcessor
34
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
35
+ logger.info(f" Device : {DEVICE}")
36
+ vmae_processor = VideoMAEImageProcessor.from_pretrained(VMAE_MODEL_ID)
37
+ vmae_model = VideoMAEModel.from_pretrained(VMAE_MODEL_ID)
38
+ vmae_model.eval()
39
+ vmae_model = vmae_model.to(DEVICE)
40
+ for p in vmae_model.parameters():
41
+ p.requires_grad = False
42
+ logger.info(f"✅ VideoMAE chargé sur {DEVICE}")
43
+ except Exception as e:
44
+ logger.error(f"❌ Erreur chargement VideoMAE : {e}")
45
+ yield
46
+ logger.info("Shutdown")
47
+
48
+
49
+ app = FastAPI(title="TUNILip+ Feature Extractor", lifespan=lifespan)
50
+
51
+ app.add_middleware(
52
+ CORSMiddleware,
53
+ allow_origins=["*"],
54
+ allow_methods=["*"],
55
+ allow_headers=["*"],
56
+ )
57
+
58
+
59
+ def extract_frames_224(video_path: str, num_frames: int = NUM_FRAMES):
60
+ cap = cv2.VideoCapture(video_path)
61
+ if not cap.isOpened():
62
+ raise ValueError(f"Impossible d'ouvrir : {video_path}")
63
+ total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
64
+ if total == 0:
65
+ cap.release()
66
+ raise ValueError("Vidéo vide")
67
+ indices = np.linspace(0, total - 1, num_frames, dtype=int)
68
+ frames = []
69
+ for idx in indices:
70
+ cap.set(cv2.CAP_PROP_POS_FRAMES, int(idx))
71
+ ret, frame = cap.read()
72
+ if ret:
73
+ frame = cv2.resize(frame, (224, 224))
74
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
75
+ frames.append(frame)
76
+ cap.release()
77
+ while len(frames) < num_frames:
78
+ frames.append(np.zeros((224, 224, 3), dtype=np.uint8))
79
+ return frames[:num_frames]
80
+
81
+
82
+ @torch.no_grad()
83
+ def extract_videomae_features(video_path: str) -> np.ndarray:
84
+ if vmae_model is None or vmae_processor is None:
85
+ raise RuntimeError("VideoMAE non chargé")
86
+ frames = extract_frames_224(video_path, NUM_FRAMES)
87
+ inputs = vmae_processor(frames, return_tensors="pt")
88
+ inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
89
+ out = vmae_model(**inputs)
90
+ hidden = out.last_hidden_state.squeeze(0).cpu().numpy() # (1568, 768)
91
+ T_temp, T_spat = 8, 196
92
+ hidden = hidden[:T_temp * T_spat].reshape(T_temp, T_spat, 768)
93
+ hidden = hidden.mean(axis=1) # (8, 768)
94
+ return hidden.astype(np.float32)
95
+
96
+
97
+ @app.get("/health")
98
+ def health():
99
+ return {
100
+ "status": "ok",
101
+ "model_ready": vmae_model is not None,
102
+ "device": str(DEVICE) if DEVICE else "unknown",
103
+ }
104
+
105
+
106
+ @app.post("/extract-features")
107
+ async def extract_features(video: UploadFile = File(...)):
108
+ suffix = os.path.splitext(video.filename or "video.mp4")[-1] or ".mp4"
109
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
110
+ content = await video.read()
111
+ tmp.write(content)
112
+ tmp_path = tmp.name
113
+ try:
114
+ features = extract_videomae_features(tmp_path)
115
+ return JSONResponse({
116
+ "features": features.tolist(),
117
+ "shape": list(features.shape),
118
+ "model_id": VMAE_MODEL_ID,
119
+ })
120
+ except RuntimeError as e:
121
+ raise HTTPException(status_code=503, detail=str(e))
122
+ except ValueError as e:
123
+ raise HTTPException(status_code=422, detail=str(e))
124
+ except Exception as e:
125
+ logger.error(f"Erreur : {e}", exc_info=True)
126
+ raise HTTPException(status_code=500, detail=str(e))
127
+ finally:
128
+ os.unlink(tmp_path)
129
+
130
+
131
+ @app.get("/")
132
+ def root():
133
+ return {"service": "TUNILip+ VideoMAE Feature Extractor"}
134
+
135
+
136
+ # HuggingFace Spaces lance uvicorn sur le port 7860
137
+ if __name__ == "__main__":
138
+ import uvicorn
139
+ uvicorn.run(app, host="0.0.0.0", port=7860)
requirements (1).txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi>=0.115.0
2
+ uvicorn[standard]>=0.30.0
3
+ python-multipart>=0.0.9
4
+ transformers>=4.44.2
5
+ torch>=2.9.0
6
+ torchvision>=0.19.0
7
+ opencv-python-headless>=4.10.0
8
+ numpy>=1.26.0
9
+ huggingface-hub>=0.24.0