Fred808 commited on
Commit
faec853
·
verified ·
1 Parent(s): e43d20b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +182 -0
app.py CHANGED
@@ -7,6 +7,9 @@ from io import BytesIO
7
  import base64
8
  from transformers import AutoProcessor, AutoModelForCausalLM
9
  import os
 
 
 
10
 
11
  # Attempt to install flash-attn
12
  try:
@@ -132,6 +135,185 @@ def describe_image_from_url(image_url, model_choice):
132
  except Exception as e:
133
  return {"error": f"Error processing image: {str(e)}"}
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  # Description for the interface
136
  description = "> Select the model to use for generating the image description. 'Base' is smaller and faster, while 'Large' is more accurate but slower."
137
  if device == "cpu":
 
7
  import base64
8
  from transformers import AutoProcessor, AutoModelForCausalLM
9
  import os
10
+ import threading
11
+ import time
12
+ import urllib.parse
13
 
14
  # Attempt to install flash-attn
15
  try:
 
135
  except Exception as e:
136
  return {"error": f"Error processing image: {str(e)}"}
137
 
138
+
139
+ # ---- Background captioning worker -------------------------------------------------
140
+ # This worker will start in a daemon thread before Gradio launches. It polls the
141
+ # image middleware on IMAGE_SERVER_BASE, downloads frames, captions them using
142
+ # the already-loaded Florence models, posts results to DATA_COLLECTION_BASE:/submit,
143
+ # then releases frames and courses. It uses blocking requests so it runs in a
144
+ # separate thread and will not interfere with the UI thread.
145
+
146
+ IMAGE_SERVER_BASE = os.getenv("IMAGE_SERVER_BASE", "https://fred808-vssee.hf.space")
147
+ DATA_COLLECTION_BASE = os.getenv("DATA_COLLECTION_BASE", "https://fred808-flow.hf.space")
148
+ REQUESTER_ID = os.getenv("FLO_REQUESTER_ID", f"florence-2-{os.getpid()}")
149
+ MODEL_CHOICE = os.getenv("FLO_MODEL_CHOICE", "Florence-2-base")
150
+
151
+
152
+ def _build_download_url(course: str, video: str, frame: str) -> str:
153
+ file_param = f"frame:{course}/{video}/{frame}"
154
+ return f"{IMAGE_SERVER_BASE.rstrip('/')}/download?course={urllib.parse.quote(course, safe='')}&file={urllib.parse.quote(file_param, safe='') }"
155
+
156
+
157
+ def _download_bytes(url: str, timeout: int = 30):
158
+ try:
159
+ r = requests.get(url, timeout=timeout)
160
+ r.raise_for_status()
161
+ return r.content, r.headers.get('content-type')
162
+ except Exception as e:
163
+ print(f"[BACKGROUND] download failed {url}: {e}")
164
+ return None, None
165
+
166
+
167
+ def _post_submit(caption: str, image_name: str, course: str, image_url: str, image_bytes: bytes):
168
+ submit_url = f"{DATA_COLLECTION_BASE.rstrip('/')}/submit"
169
+ files = {'image': (image_name, image_bytes, 'application/octet-stream')}
170
+ data = {'caption': caption, 'image_name': image_name, 'course': course, 'image_url': image_url}
171
+ try:
172
+ r = requests.post(submit_url, data=data, files=files, timeout=30)
173
+ try:
174
+ return r.status_code, r.json()
175
+ except Exception:
176
+ return r.status_code, r.text
177
+ except Exception as e:
178
+ print(f"[BACKGROUND] submit POST failed: {e}")
179
+ return None, None
180
+
181
+
182
+ def _release_frame(course: str, video: str, frame: str):
183
+ try:
184
+ release_url = f"{IMAGE_SERVER_BASE.rstrip('/')}/middleware/release/frame/{urllib.parse.quote(course, safe='')}/{urllib.parse.quote(video, safe='')}/{urllib.parse.quote(frame, safe='')}"
185
+ requests.post(release_url, params={"requester_id": REQUESTER_ID}, timeout=10)
186
+ except Exception as e:
187
+ print(f"[BACKGROUND] release frame failed: {e}")
188
+
189
+
190
+ def _release_course(course: str):
191
+ try:
192
+ release_url = f"{IMAGE_SERVER_BASE.rstrip('/')}/middleware/release/course/{urllib.parse.quote(course, safe='')}"
193
+ requests.post(release_url, params={"requester_id": REQUESTER_ID}, timeout=10)
194
+ except Exception as e:
195
+ print(f"[BACKGROUND] release course failed: {e}")
196
+
197
+
198
+ def background_worker():
199
+ print("[BACKGROUND] Worker waiting for model to be available...")
200
+ # wait for model(s) to load (respect existing loading logic)
201
+ waited = 0
202
+ while waited < 120:
203
+ if MODEL_CHOICE == "Florence-2-base":
204
+ if vision_language_model_base is not None and vision_language_processor_base is not None:
205
+ break
206
+ else:
207
+ if vision_language_model_large is not None and vision_language_processor_large is not None:
208
+ break
209
+ time.sleep(1)
210
+ waited += 1
211
+
212
+ if waited >= 120:
213
+ print("[BACKGROUND] Model not available after timeout; background worker exiting.")
214
+ return
215
+
216
+ print("[BACKGROUND] Model loaded; starting polling loop")
217
+
218
+ while True:
219
+ try:
220
+ # Acquire next course
221
+ try:
222
+ r = requests.get(f"{IMAGE_SERVER_BASE.rstrip('/')}/middleware/next/course", params={"requester_id": REQUESTER_ID}, timeout=15)
223
+ if r.status_code == 404:
224
+ time.sleep(3)
225
+ continue
226
+ r.raise_for_status()
227
+ course_json = r.json()
228
+ except Exception as e:
229
+ print(f"[BACKGROUND] failed to get next course: {e}")
230
+ time.sleep(3)
231
+ continue
232
+
233
+ course = course_json.get('course_id') or course_json.get('course')
234
+ if not course:
235
+ print(f"[BACKGROUND] invalid course response: {course_json}")
236
+ time.sleep(2)
237
+ continue
238
+
239
+ print(f"[BACKGROUND] processing course: {course}")
240
+
241
+ # Pull images until none left
242
+ while True:
243
+ try:
244
+ img_url = f"{IMAGE_SERVER_BASE.rstrip('/')}/middleware/next/image/{urllib.parse.quote(course, safe='')}"
245
+ rimg = requests.get(img_url, params={"requester_id": REQUESTER_ID}, timeout=15)
246
+ if rimg.status_code == 404:
247
+ print(f"[BACKGROUND] no images for course {course}")
248
+ break
249
+ rimg.raise_for_status()
250
+ img_json = rimg.json()
251
+ except Exception as e:
252
+ print(f"[BACKGROUND] failed to get next image: {e}")
253
+ time.sleep(1)
254
+ continue
255
+
256
+ video = img_json.get('video')
257
+ frame = img_json.get('frame')
258
+ file_id = img_json.get('file_id')
259
+ if not (video and frame and file_id):
260
+ print(f"[BACKGROUND] unexpected image entry: {img_json}")
261
+ time.sleep(0.5)
262
+ continue
263
+
264
+ download_url = _build_download_url(course, video, frame)
265
+ print(f"[BACKGROUND] downloading {download_url}")
266
+ img_bytes, content_type = _download_bytes(download_url)
267
+ if not img_bytes:
268
+ print(f"[BACKGROUND] failed to download image, releasing frame {file_id}")
269
+ _release_frame(course, video, frame)
270
+ time.sleep(1)
271
+ continue
272
+
273
+ try:
274
+ pil_img = Image.open(BytesIO(img_bytes)).convert('RGB')
275
+ except Exception as e:
276
+ print(f"[BACKGROUND] failed to open image bytes: {e}")
277
+ _release_frame(course, video, frame)
278
+ time.sleep(1)
279
+ continue
280
+
281
+ # Choose model and processor according to MODEL_CHOICE
282
+ if MODEL_CHOICE == "Florence-2-base":
283
+ model = vision_language_model_base
284
+ processor = vision_language_processor_base
285
+ else:
286
+ model = vision_language_model_large
287
+ processor = vision_language_processor_large
288
+
289
+ caption = ""
290
+ try:
291
+ # Reuse existing processing function: process_image_description(model, processor, image)
292
+ caption = process_image_description(model, processor, pil_img)
293
+ except Exception as e:
294
+ print(f"[BACKGROUND] captioning failed: {e}")
295
+
296
+ status, resp = _post_submit(caption, frame, course, download_url, img_bytes)
297
+ print(f"[BACKGROUND] submitted caption for {frame}: status={status}")
298
+
299
+ # release frame
300
+ _release_frame(course, video, frame)
301
+ time.sleep(0.2)
302
+
303
+ # release course
304
+ _release_course(course)
305
+ time.sleep(1)
306
+
307
+ except Exception as e:
308
+ print(f"[BACKGROUND] unexpected loop error: {e}")
309
+ time.sleep(5)
310
+
311
+ # Start background worker thread (daemon) so it doesn't block shutdown
312
+ def _start_worker_thread():
313
+ t = threading.Thread(target=background_worker, daemon=True)
314
+ t.start()
315
+
316
+
317
  # Description for the interface
318
  description = "> Select the model to use for generating the image description. 'Base' is smaller and faster, while 'Large' is more accurate but slower."
319
  if device == "cpu":