Spaces:

Fred808
/

PIL2

Paused

App Files Files Community

Fred808 commited on Jul 16, 2025

Commit

03901aa

verified ·

1 Parent(s): 92cce7f

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -11

app.py CHANGED Viewed

@@ -6,10 +6,11 @@ from PIL import Image
 from transformers import AutoProcessor, AutoModelForCausalLM
 # ===== CONFIG =====
-VIDEO_PATH = "How.mp4"  # Set to your local video file
-FRAMES_DIR = "extracted"
-FPS = 3
-DEVICE = "cpu"  # Force CPU to avoid NCCL GPU issue
 # ===== Ensure Output Directory =====
 def ensure_dir(path):
@@ -50,14 +51,23 @@ def extract_frames(video_path, output_dir, fps=3):
     return frame_paths
 # ===== Load Florence-2 Base Model =====
-print("[INFO] Loading Florence-2-base model on CPU")
 processor = AutoProcessor.from_pretrained("microsoft/Florence-2-base", trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained("microsoft/Florence-2-base", trust_remote_code=True, attn_implementation="eager").to(DEVICE).eval()
 # ===== Analyze a Frame =====
 def analyze_frame(image_path):
     image = Image.open(image_path).convert("RGB")
-    inputs = processor(text="<MORE_DETAILED_CAPTION>", images=image, return_tensors="pt").to(DEVICE)
     with torch.no_grad():
         generated_ids = model.generate(
             input_ids=inputs["input_ids"],
@@ -70,18 +80,21 @@ def analyze_frame(image_path):
     result = processor.post_process_generation(
         generated_text,
         task="<MORE_DETAILED_CAPTION>",
-        image_size=(image.width, image.height)
     )
     return result["<MORE_DETAILED_CAPTION>"]
 # ===== Main Execution =====
 if __name__ == "__main__":
     frame_list = extract_frames(VIDEO_PATH, FRAMES_DIR, FPS)
-    print(f"[INFO] {len(frame_list)} frames extracted.")
     for idx, frame_path in enumerate(frame_list):
         print(f"\n[FRAME {idx+1}] Analyzing: {frame_path}")
         caption = analyze_frame(frame_path)
         print(f"[RESULT] {caption}")
     import uvicorn
-    port = int(os.getenv("PORT", 7860))  # Spaces set PORT env var
-    uvicorn.run("app:app", host="0.0.0.0", port=port)

 from transformers import AutoProcessor, AutoModelForCausalLM
 # ===== CONFIG =====
+VIDEO_PATH = "How.mp4"               # Local video file in root
+FRAMES_DIR = "extracted"            # Where frames are stored
+FPS = 3                             # Frames to extract per second
+DEVICE = "cpu"                      # Use CPU for compatibility
+RESIZE_DIM = (512, 512)             # Resize images to this resolution
 # ===== Ensure Output Directory =====
 def ensure_dir(path):
     return frame_paths
 # ===== Load Florence-2 Base Model =====
+print("[INFO] Loading Florence-2-base model on CPU...")
 processor = AutoProcessor.from_pretrained("microsoft/Florence-2-base", trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(
+    "microsoft/Florence-2-base",
+    trust_remote_code=True,
+    attn_implementation="eager"
+).to(DEVICE).eval()
 # ===== Analyze a Frame =====
 def analyze_frame(image_path):
     image = Image.open(image_path).convert("RGB")
+    image = image.resize(RESIZE_DIM, Image.BILINEAR)  # Resize for speed
+    inputs = processor(
+        text="<MORE_DETAILED_CAPTION>",
+        images=image,
+        return_tensors="pt"
+    ).to(DEVICE)
     with torch.no_grad():
         generated_ids = model.generate(
             input_ids=inputs["input_ids"],
     result = processor.post_process_generation(
         generated_text,
         task="<MORE_DETAILED_CAPTION>",
+        image_size=RESIZE_DIM
     )
     return result["<MORE_DETAILED_CAPTION>"]
 # ===== Main Execution =====
 if __name__ == "__main__":
     frame_list = extract_frames(VIDEO_PATH, FRAMES_DIR, FPS)
+    print(f"[INFO] Extracted {len(frame_list)} frames.")
     for idx, frame_path in enumerate(frame_list):
         print(f"\n[FRAME {idx+1}] Analyzing: {frame_path}")
         caption = analyze_frame(frame_path)
         print(f"[RESULT] {caption}")
+    # Optional: Start a dummy Uvicorn server (if you want to expand into an API later)
     import uvicorn
+    port = int(os.getenv("PORT", 7860))  # for Gradio Spaces compatibility
+    uvicorn.run("main:app", host="0.0.0.0", port=port) if os.getenv("RUN_SERVER") else None