Spaces:

Fred808
/

FSERV2

Paused

App Files Files Community

Fred808 commited on Oct 11, 2025

Commit

c14f018

verified ·

1 Parent(s): 9706dce

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -65

app.py CHANGED Viewed

@@ -1,16 +1,11 @@
 import os
-import json
 import time
-from typing import Dict
 from PIL import Image
-from io import BytesIO
 import torch
-from transformers import AutoModelForVision2Seq, AutoProcessor
-from fastapi import FastAPI, File, UploadFile
-from fastapi.responses import JSONResponse
-import uvicorn
-# Configure PyTorch settings
 torch.backends.cuda.enable_flash_sdp(False)
 torch.backends.cuda.enable_math_sdp(True)
 torch.backends.cuda.enable_mem_efficient_sdp(True)
@@ -20,9 +15,6 @@ torch.backends.cuda.enable_mem_efficient_sdp(True)
 MODEL_ID = "microsoft/Florence-2-large"
 DEVICE = "cpu"  # Using CPU instead of GPU
-# Create FastAPI app
-app = FastAPI(title="Florence-2 Image Captioning API")
 # Florence-2 Model (will be loaded once)
 model = None
 processor = None
@@ -39,22 +31,12 @@ def load_florence_model():
         try:
             log_message("[*] Loading Florence-2 model and processor...")
-            # Load model and processor with specific configuration
-            processor = AutoProcessor.from_pretrained(
-                MODEL_ID,
-                trust_remote_code=True,
-                revision="9a515b7",  # Pin to a specific version
-            )
-            model = AutoModelForVision2Seq.from_pretrained(
-                MODEL_ID,
-                trust_remote_code=True,
-                revision="9a515b7",  # Pin to a specific version
-                torch_dtype=torch.float32,
-            ).to(DEVICE)
             model.eval()
-            log_message("[ ] Florence-2 loaded and ready.")
         except Exception as e:
             log_message(f"[ERROR] Failed to load Florence-2 model: {e}")
             raise
@@ -62,7 +44,7 @@ def load_florence_model():
 def caption_image(image: Image.Image) -> str:
     """Generate detailed caption for an image using Florence-2"""
     if model is None or processor is None:
-        return "Model not loaded."
     task_prompt = "<MORE_DETAILED_CAPTION>"
     prompt = task_prompt
@@ -89,52 +71,49 @@ def caption_image(image: Image.Image) -> str:
             )
         generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
         return generated_text
     except Exception as e:
-        log_message(f"[!] Caption generation failed: {e}")
-        return "Captioning error."
-@app.on_event("startup")
-async def startup_event():
-    """Load model on startup"""
-    load_florence_model()
-@app.post("/caption")
-async def create_caption(file: UploadFile = File(...)) -> Dict:
-    """
-    API endpoint to receive an image and return its caption
-    """
     try:
-        log_message(f"[API] Received image: {file.filename}")
-        # Read and validate image
-        contents = await file.read()
-        image = Image.open(BytesIO(contents)).convert("RGB")
-        # Generate caption
-        log_message(f"[API] Generating caption for {file.filename}")
-        caption = caption_image(image)
-        log_message(f"[API] Caption generated for {file.filename}: {caption[:100]}...")
-        return {
-            "status": "success",
-            "filename": file.filename,
-            "caption": caption
-        }
     except Exception as e:
         error_msg = f"Error processing image: {str(e)}"
         log_message(f"[ERROR] {error_msg}")
-        return JSONResponse(
-            status_code=500,
-            content={
-                "status": "error",
-                "message": error_msg
-            }
-        )
 if __name__ == "__main__":
-    log_message("Starting Florence-2 Vision Analysis API Server")
-    uvicorn.run(app, host="0.0.0.0", port=8000)

 import os
 import time
 from PIL import Image
 import torch
+from transformers import AutoModelForCausalLM, AutoProcessor
+import gradio as gr
+# Disable SDPA if not supported
 torch.backends.cuda.enable_flash_sdp(False)
 torch.backends.cuda.enable_math_sdp(True)
 torch.backends.cuda.enable_mem_efficient_sdp(True)
 MODEL_ID = "microsoft/Florence-2-large"
 DEVICE = "cpu"  # Using CPU instead of GPU
 # Florence-2 Model (will be loaded once)
 model = None
 processor = None
         try:
             log_message("[*] Loading Florence-2 model and processor...")
+            # Load model on CPU
+            model = AutoModelForCausalLM.from_pretrained(MODEL_ID, trust_remote_code=True).to(DEVICE)
             model.eval()
+            processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
+            log_message("[ ] Florence-2 loaded and ready on CPU")
         except Exception as e:
             log_message(f"[ERROR] Failed to load Florence-2 model: {e}")
             raise
 def caption_image(image: Image.Image) -> str:
     """Generate detailed caption for an image using Florence-2"""
     if model is None or processor is None:
+        load_florence_model()
     task_prompt = "<MORE_DETAILED_CAPTION>"
     prompt = task_prompt
             )
         generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
+        log_message(f"[SUCCESS] Generated caption: {generated_text[:100]}...")
         return generated_text
     except Exception as e:
+        error_msg = f"[!] Caption generation failed: {e}"
+        log_message(error_msg)
+        return error_msg
+def process_image(input_image):
+    """Process image for Gradio interface"""
+    if input_image is None:
+        return "No image provided"
     try:
+        # Convert to PIL Image if needed
+        if not isinstance(input_image, Image.Image):
+            input_image = Image.fromarray(input_image)
+        log_message("[INFO] Processing new image...")
+        caption = caption_image(input_image)
+        return caption
     except Exception as e:
         error_msg = f"Error processing image: {str(e)}"
         log_message(f"[ERROR] {error_msg}")
+        return error_msg
+# Create Gradio interface
+demo = gr.Interface(
+    fn=process_image,
+    inputs=gr.Image(type="pil", label="Upload Image"),
+    outputs=gr.Textbox(label="Generated Caption", lines=3),
+    title="Florence-2 Image Captioning",
+    description="Upload an image to get a detailed caption generated by Florence-2 model.",
+    examples=[
+        ["example1.jpg"],
+        ["example2.jpg"]
+    ],
+    cache_examples=True,
+    theme=gr.themes.Soft()
+)
 if __name__ == "__main__":
+    log_message("Starting Florence-2 Gradio Server")
+    # Launch with share=True to get a public URL
+    demo.launch(server_name="0.0.0.0", server_port=7860)