Fred808 commited on
Commit
133b29b
·
verified ·
1 Parent(s): c14f018

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -44
app.py CHANGED
@@ -1,11 +1,16 @@
1
  import os
 
2
  import time
 
3
  from PIL import Image
 
4
  import torch
5
- from transformers import AutoModelForCausalLM, AutoProcessor
6
- import gradio as gr
 
 
7
 
8
- # Disable SDPA if not supported
9
  torch.backends.cuda.enable_flash_sdp(False)
10
  torch.backends.cuda.enable_math_sdp(True)
11
  torch.backends.cuda.enable_mem_efficient_sdp(True)
@@ -15,6 +20,9 @@ torch.backends.cuda.enable_mem_efficient_sdp(True)
15
  MODEL_ID = "microsoft/Florence-2-large"
16
  DEVICE = "cpu" # Using CPU instead of GPU
17
 
 
 
 
18
  # Florence-2 Model (will be loaded once)
19
  model = None
20
  processor = None
@@ -31,12 +39,20 @@ def load_florence_model():
31
  try:
32
  log_message("[*] Loading Florence-2 model and processor...")
33
 
34
- # Load model on CPU
35
- model = AutoModelForCausalLM.from_pretrained(MODEL_ID, trust_remote_code=True).to(DEVICE)
36
- model.eval()
 
 
 
 
 
 
 
 
37
 
38
- processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
39
- log_message("[ ] Florence-2 loaded and ready on CPU")
40
  except Exception as e:
41
  log_message(f"[ERROR] Failed to load Florence-2 model: {e}")
42
  raise
@@ -44,7 +60,7 @@ def load_florence_model():
44
  def caption_image(image: Image.Image) -> str:
45
  """Generate detailed caption for an image using Florence-2"""
46
  if model is None or processor is None:
47
- load_florence_model()
48
 
49
  task_prompt = "<MORE_DETAILED_CAPTION>"
50
  prompt = task_prompt
@@ -71,49 +87,52 @@ def caption_image(image: Image.Image) -> str:
71
  )
72
 
73
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
74
- log_message(f"[SUCCESS] Generated caption: {generated_text[:100]}...")
75
  return generated_text
76
 
77
  except Exception as e:
78
- error_msg = f"[!] Caption generation failed: {e}"
79
- log_message(error_msg)
80
- return error_msg
81
 
82
- def process_image(input_image):
83
- """Process image for Gradio interface"""
84
- if input_image is None:
85
- return "No image provided"
86
-
 
 
 
 
 
87
  try:
88
- # Convert to PIL Image if needed
89
- if not isinstance(input_image, Image.Image):
90
- input_image = Image.fromarray(input_image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
- log_message("[INFO] Processing new image...")
93
- caption = caption_image(input_image)
94
- return caption
95
-
96
  except Exception as e:
97
  error_msg = f"Error processing image: {str(e)}"
98
  log_message(f"[ERROR] {error_msg}")
99
- return error_msg
100
-
101
- # Create Gradio interface
102
- demo = gr.Interface(
103
- fn=process_image,
104
- inputs=gr.Image(type="pil", label="Upload Image"),
105
- outputs=gr.Textbox(label="Generated Caption", lines=3),
106
- title="Florence-2 Image Captioning",
107
- description="Upload an image to get a detailed caption generated by Florence-2 model.",
108
- examples=[
109
- ["example1.jpg"],
110
- ["example2.jpg"]
111
- ],
112
- cache_examples=True,
113
- theme=gr.themes.Soft()
114
- )
115
 
116
  if __name__ == "__main__":
117
- log_message("Starting Florence-2 Gradio Server")
118
- # Launch with share=True to get a public URL
119
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import os
2
+ import json
3
  import time
4
+ from typing import Dict
5
  from PIL import Image
6
+ from io import BytesIO
7
  import torch
8
+ from transformers import AutoModelForVision2Seq, AutoProcessor
9
+ from fastapi import FastAPI, File, UploadFile
10
+ from fastapi.responses import JSONResponse
11
+ import uvicorn
12
 
13
+ # Configure PyTorch settings
14
  torch.backends.cuda.enable_flash_sdp(False)
15
  torch.backends.cuda.enable_math_sdp(True)
16
  torch.backends.cuda.enable_mem_efficient_sdp(True)
 
20
  MODEL_ID = "microsoft/Florence-2-large"
21
  DEVICE = "cpu" # Using CPU instead of GPU
22
 
23
+ # Create FastAPI app
24
+ app = FastAPI(title="Florence-2 Image Captioning API")
25
+
26
  # Florence-2 Model (will be loaded once)
27
  model = None
28
  processor = None
 
39
  try:
40
  log_message("[*] Loading Florence-2 model and processor...")
41
 
42
+ # Load model and processor
43
+ processor = AutoProcessor.from_pretrained(
44
+ MODEL_ID,
45
+ trust_remote_code=True
46
+ )
47
+
48
+ model = AutoModelForVision2Seq.from_pretrained(
49
+ MODEL_ID,
50
+ trust_remote_code=True,
51
+ torch_dtype=torch.float32
52
+ ).to(DEVICE)
53
 
54
+ model.eval()
55
+ log_message("[ ] Florence-2 loaded and ready.")
56
  except Exception as e:
57
  log_message(f"[ERROR] Failed to load Florence-2 model: {e}")
58
  raise
 
60
  def caption_image(image: Image.Image) -> str:
61
  """Generate detailed caption for an image using Florence-2"""
62
  if model is None or processor is None:
63
+ return "Model not loaded."
64
 
65
  task_prompt = "<MORE_DETAILED_CAPTION>"
66
  prompt = task_prompt
 
87
  )
88
 
89
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
 
90
  return generated_text
91
 
92
  except Exception as e:
93
+ log_message(f"[!] Caption generation failed: {e}")
94
+ return "Captioning error."
 
95
 
96
+ @app.on_event("startup")
97
+ async def startup_event():
98
+ """Load model on startup"""
99
+ load_florence_model()
100
+
101
+ @app.post("/caption")
102
+ async def create_caption(file: UploadFile = File(...)) -> Dict:
103
+ """
104
+ API endpoint to receive an image and return its caption
105
+ """
106
  try:
107
+ log_message(f"[API] Received image: {file.filename}")
108
+
109
+ # Read and validate image
110
+ contents = await file.read()
111
+ image = Image.open(BytesIO(contents)).convert("RGB")
112
+
113
+ # Generate caption
114
+ log_message(f"[API] Generating caption for {file.filename}")
115
+ caption = caption_image(image)
116
+
117
+ log_message(f"[API] Caption generated for {file.filename}: {caption[:100]}...")
118
+
119
+ return {
120
+ "status": "success",
121
+ "filename": file.filename,
122
+ "caption": caption
123
+ }
124
 
 
 
 
 
125
  except Exception as e:
126
  error_msg = f"Error processing image: {str(e)}"
127
  log_message(f"[ERROR] {error_msg}")
128
+ return JSONResponse(
129
+ status_code=500,
130
+ content={
131
+ "status": "error",
132
+ "message": error_msg
133
+ }
134
+ )
 
 
 
 
 
 
 
 
 
135
 
136
  if __name__ == "__main__":
137
+ log_message("Starting Florence-2 Vision Analysis API Server")
138
+ uvicorn.run(app, host="0.0.0.0", port=8000)