Spaces:

Fred808
/

FSERV2

Paused

App Files Files Community

Fred808 commited on Oct 11, 2025

Commit

133b29b

verified ·

1 Parent(s): c14f018

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -44

app.py CHANGED Viewed

@@ -1,11 +1,16 @@
 import os
 import time
 from PIL import Image
 import torch
-from transformers import AutoModelForCausalLM, AutoProcessor
-import gradio as gr
-# Disable SDPA if not supported
 torch.backends.cuda.enable_flash_sdp(False)
 torch.backends.cuda.enable_math_sdp(True)
 torch.backends.cuda.enable_mem_efficient_sdp(True)
@@ -15,6 +20,9 @@ torch.backends.cuda.enable_mem_efficient_sdp(True)
 MODEL_ID = "microsoft/Florence-2-large"
 DEVICE = "cpu"  # Using CPU instead of GPU
 # Florence-2 Model (will be loaded once)
 model = None
 processor = None
@@ -31,12 +39,20 @@ def load_florence_model():
         try:
             log_message("[*] Loading Florence-2 model and processor...")
-            # Load model on CPU
-            model = AutoModelForCausalLM.from_pretrained(MODEL_ID, trust_remote_code=True).to(DEVICE)
-            model.eval()
-            processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
-            log_message("[ ] Florence-2 loaded and ready on CPU")
         except Exception as e:
             log_message(f"[ERROR] Failed to load Florence-2 model: {e}")
             raise
@@ -44,7 +60,7 @@ def load_florence_model():
 def caption_image(image: Image.Image) -> str:
     """Generate detailed caption for an image using Florence-2"""
     if model is None or processor is None:
-        load_florence_model()
     task_prompt = "<MORE_DETAILED_CAPTION>"
     prompt = task_prompt
@@ -71,49 +87,52 @@ def caption_image(image: Image.Image) -> str:
             )
         generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
-        log_message(f"[SUCCESS] Generated caption: {generated_text[:100]}...")
         return generated_text
     except Exception as e:
-        error_msg = f"[!] Caption generation failed: {e}"
-        log_message(error_msg)
-        return error_msg
-def process_image(input_image):
-    """Process image for Gradio interface"""
-    if input_image is None:
-        return "No image provided"
     try:
-        # Convert to PIL Image if needed
-        if not isinstance(input_image, Image.Image):
-            input_image = Image.fromarray(input_image)
-        log_message("[INFO] Processing new image...")
-        caption = caption_image(input_image)
-        return caption
     except Exception as e:
         error_msg = f"Error processing image: {str(e)}"
         log_message(f"[ERROR] {error_msg}")
-        return error_msg
-# Create Gradio interface
-demo = gr.Interface(
-    fn=process_image,
-    inputs=gr.Image(type="pil", label="Upload Image"),
-    outputs=gr.Textbox(label="Generated Caption", lines=3),
-    title="Florence-2 Image Captioning",
-    description="Upload an image to get a detailed caption generated by Florence-2 model.",
-    examples=[
-        ["example1.jpg"],
-        ["example2.jpg"]
-    ],
-    cache_examples=True,
-    theme=gr.themes.Soft()
-)
 if __name__ == "__main__":
-    log_message("Starting Florence-2 Gradio Server")
-    # Launch with share=True to get a public URL
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import os
+import json
 import time
+from typing import Dict
 from PIL import Image
+from io import BytesIO
 import torch
+from transformers import AutoModelForVision2Seq, AutoProcessor
+from fastapi import FastAPI, File, UploadFile
+from fastapi.responses import JSONResponse
+import uvicorn
+# Configure PyTorch settings
 torch.backends.cuda.enable_flash_sdp(False)
 torch.backends.cuda.enable_math_sdp(True)
 torch.backends.cuda.enable_mem_efficient_sdp(True)
 MODEL_ID = "microsoft/Florence-2-large"
 DEVICE = "cpu"  # Using CPU instead of GPU
+# Create FastAPI app
+app = FastAPI(title="Florence-2 Image Captioning API")
 # Florence-2 Model (will be loaded once)
 model = None
 processor = None
         try:
             log_message("[*] Loading Florence-2 model and processor...")
+            # Load model and processor
+            processor = AutoProcessor.from_pretrained(
+                MODEL_ID,
+                trust_remote_code=True
+            )
+            model = AutoModelForVision2Seq.from_pretrained(
+                MODEL_ID,
+                trust_remote_code=True,
+                torch_dtype=torch.float32
+            ).to(DEVICE)
+            model.eval()
+            log_message("[ ] Florence-2 loaded and ready.")
         except Exception as e:
             log_message(f"[ERROR] Failed to load Florence-2 model: {e}")
             raise
 def caption_image(image: Image.Image) -> str:
     """Generate detailed caption for an image using Florence-2"""
     if model is None or processor is None:
+        return "Model not loaded."
     task_prompt = "<MORE_DETAILED_CAPTION>"
     prompt = task_prompt
             )
         generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
         return generated_text
     except Exception as e:
+        log_message(f"[!] Caption generation failed: {e}")
+        return "Captioning error."
+@app.on_event("startup")
+async def startup_event():
+    """Load model on startup"""
+    load_florence_model()
+@app.post("/caption")
+async def create_caption(file: UploadFile = File(...)) -> Dict:
+    """
+    API endpoint to receive an image and return its caption
+    """
     try:
+        log_message(f"[API] Received image: {file.filename}")
+        # Read and validate image
+        contents = await file.read()
+        image = Image.open(BytesIO(contents)).convert("RGB")
+        # Generate caption
+        log_message(f"[API] Generating caption for {file.filename}")
+        caption = caption_image(image)
+        log_message(f"[API] Caption generated for {file.filename}: {caption[:100]}...")
+        return {
+            "status": "success",
+            "filename": file.filename,
+            "caption": caption
+        }
     except Exception as e:
         error_msg = f"Error processing image: {str(e)}"
         log_message(f"[ERROR] {error_msg}")
+        return JSONResponse(
+            status_code=500,
+            content={
+                "status": "error",
+                "message": error_msg
+            }
+        )
 if __name__ == "__main__":
+    log_message("Starting Florence-2 Vision Analysis API Server")
+    uvicorn.run(app, host="0.0.0.0", port=8000)