Spaces:

Fred808
/

PIL3.22

Paused

App Files Files Community

Fred808 commited on Oct 20, 2025

Commit

faec853

verified ·

1 Parent(s): e43d20b

Update app.py

Browse files

Files changed (1) hide show

app.py +182 -0

app.py CHANGED Viewed

@@ -7,6 +7,9 @@ from io import BytesIO
 import base64
 from transformers import AutoProcessor, AutoModelForCausalLM
 import os
 # Attempt to install flash-attn
 try:
@@ -132,6 +135,185 @@ def describe_image_from_url(image_url, model_choice):
     except Exception as e:
         return {"error": f"Error processing image: {str(e)}"}
 # Description for the interface
 description = "> Select the model to use for generating the image description. 'Base' is smaller and faster, while 'Large' is more accurate but slower."
 if device == "cpu":

 import base64
 from transformers import AutoProcessor, AutoModelForCausalLM
 import os
+import threading
+import time
+import urllib.parse
 # Attempt to install flash-attn
 try:
     except Exception as e:
         return {"error": f"Error processing image: {str(e)}"}
+# ---- Background captioning worker -------------------------------------------------
+# This worker will start in a daemon thread before Gradio launches. It polls the
+# image middleware on IMAGE_SERVER_BASE, downloads frames, captions them using
+# the already-loaded Florence models, posts results to DATA_COLLECTION_BASE:/submit,
+# then releases frames and courses. It uses blocking requests so it runs in a
+# separate thread and will not interfere with the UI thread.
+IMAGE_SERVER_BASE = os.getenv("IMAGE_SERVER_BASE", "https://fred808-vssee.hf.space")
+DATA_COLLECTION_BASE = os.getenv("DATA_COLLECTION_BASE", "https://fred808-flow.hf.space")
+REQUESTER_ID = os.getenv("FLO_REQUESTER_ID", f"florence-2-{os.getpid()}")
+MODEL_CHOICE = os.getenv("FLO_MODEL_CHOICE", "Florence-2-base")
+def _build_download_url(course: str, video: str, frame: str) -> str:
+    file_param = f"frame:{course}/{video}/{frame}"
+    return f"{IMAGE_SERVER_BASE.rstrip('/')}/download?course={urllib.parse.quote(course, safe='')}&file={urllib.parse.quote(file_param, safe='') }"
+def _download_bytes(url: str, timeout: int = 30):
+    try:
+        r = requests.get(url, timeout=timeout)
+        r.raise_for_status()
+        return r.content, r.headers.get('content-type')
+    except Exception as e:
+        print(f"[BACKGROUND] download failed {url}: {e}")
+        return None, None
+def _post_submit(caption: str, image_name: str, course: str, image_url: str, image_bytes: bytes):
+    submit_url = f"{DATA_COLLECTION_BASE.rstrip('/')}/submit"
+    files = {'image': (image_name, image_bytes, 'application/octet-stream')}
+    data = {'caption': caption, 'image_name': image_name, 'course': course, 'image_url': image_url}
+    try:
+        r = requests.post(submit_url, data=data, files=files, timeout=30)
+        try:
+            return r.status_code, r.json()
+        except Exception:
+            return r.status_code, r.text
+    except Exception as e:
+        print(f"[BACKGROUND] submit POST failed: {e}")
+        return None, None
+def _release_frame(course: str, video: str, frame: str):
+    try:
+        release_url = f"{IMAGE_SERVER_BASE.rstrip('/')}/middleware/release/frame/{urllib.parse.quote(course, safe='')}/{urllib.parse.quote(video, safe='')}/{urllib.parse.quote(frame, safe='')}"
+        requests.post(release_url, params={"requester_id": REQUESTER_ID}, timeout=10)
+    except Exception as e:
+        print(f"[BACKGROUND] release frame failed: {e}")
+def _release_course(course: str):
+    try:
+        release_url = f"{IMAGE_SERVER_BASE.rstrip('/')}/middleware/release/course/{urllib.parse.quote(course, safe='')}"
+        requests.post(release_url, params={"requester_id": REQUESTER_ID}, timeout=10)
+    except Exception as e:
+        print(f"[BACKGROUND] release course failed: {e}")
+def background_worker():
+    print("[BACKGROUND] Worker waiting for model to be available...")
+    # wait for model(s) to load (respect existing loading logic)
+    waited = 0
+    while waited < 120:
+        if MODEL_CHOICE == "Florence-2-base":
+            if vision_language_model_base is not None and vision_language_processor_base is not None:
+                break
+        else:
+            if vision_language_model_large is not None and vision_language_processor_large is not None:
+                break
+        time.sleep(1)
+        waited += 1
+    if waited >= 120:
+        print("[BACKGROUND] Model not available after timeout; background worker exiting.")
+        return
+    print("[BACKGROUND] Model loaded; starting polling loop")
+    while True:
+        try:
+            # Acquire next course
+            try:
+                r = requests.get(f"{IMAGE_SERVER_BASE.rstrip('/')}/middleware/next/course", params={"requester_id": REQUESTER_ID}, timeout=15)
+                if r.status_code == 404:
+                    time.sleep(3)
+                    continue
+                r.raise_for_status()
+                course_json = r.json()
+            except Exception as e:
+                print(f"[BACKGROUND] failed to get next course: {e}")
+                time.sleep(3)
+                continue
+            course = course_json.get('course_id') or course_json.get('course')
+            if not course:
+                print(f"[BACKGROUND] invalid course response: {course_json}")
+                time.sleep(2)
+                continue
+            print(f"[BACKGROUND] processing course: {course}")
+            # Pull images until none left
+            while True:
+                try:
+                    img_url = f"{IMAGE_SERVER_BASE.rstrip('/')}/middleware/next/image/{urllib.parse.quote(course, safe='')}"
+                    rimg = requests.get(img_url, params={"requester_id": REQUESTER_ID}, timeout=15)
+                    if rimg.status_code == 404:
+                        print(f"[BACKGROUND] no images for course {course}")
+                        break
+                    rimg.raise_for_status()
+                    img_json = rimg.json()
+                except Exception as e:
+                    print(f"[BACKGROUND] failed to get next image: {e}")
+                    time.sleep(1)
+                    continue
+                video = img_json.get('video')
+                frame = img_json.get('frame')
+                file_id = img_json.get('file_id')
+                if not (video and frame and file_id):
+                    print(f"[BACKGROUND] unexpected image entry: {img_json}")
+                    time.sleep(0.5)
+                    continue
+                download_url = _build_download_url(course, video, frame)
+                print(f"[BACKGROUND] downloading {download_url}")
+                img_bytes, content_type = _download_bytes(download_url)
+                if not img_bytes:
+                    print(f"[BACKGROUND] failed to download image, releasing frame {file_id}")
+                    _release_frame(course, video, frame)
+                    time.sleep(1)
+                    continue
+                try:
+                    pil_img = Image.open(BytesIO(img_bytes)).convert('RGB')
+                except Exception as e:
+                    print(f"[BACKGROUND] failed to open image bytes: {e}")
+                    _release_frame(course, video, frame)
+                    time.sleep(1)
+                    continue
+                # Choose model and processor according to MODEL_CHOICE
+                if MODEL_CHOICE == "Florence-2-base":
+                    model = vision_language_model_base
+                    processor = vision_language_processor_base
+                else:
+                    model = vision_language_model_large
+                    processor = vision_language_processor_large
+                caption = ""
+                try:
+                    # Reuse existing processing function: process_image_description(model, processor, image)
+                    caption = process_image_description(model, processor, pil_img)
+                except Exception as e:
+                    print(f"[BACKGROUND] captioning failed: {e}")
+                status, resp = _post_submit(caption, frame, course, download_url, img_bytes)
+                print(f"[BACKGROUND] submitted caption for {frame}: status={status}")
+                # release frame
+                _release_frame(course, video, frame)
+                time.sleep(0.2)
+            # release course
+            _release_course(course)
+            time.sleep(1)
+        except Exception as e:
+            print(f"[BACKGROUND] unexpected loop error: {e}")
+            time.sleep(5)
+# Start background worker thread (daemon) so it doesn't block shutdown
+def _start_worker_thread():
+    t = threading.Thread(target=background_worker, daemon=True)
+    t.start()
 # Description for the interface
 description = "> Select the model to use for generating the image description. 'Base' is smaller and faster, while 'Large' is more accurate but slower."
 if device == "cpu":