Fred808 commited on
Commit
c14f018
·
verified ·
1 Parent(s): 9706dce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -65
app.py CHANGED
@@ -1,16 +1,11 @@
1
  import os
2
- import json
3
  import time
4
- from typing import Dict
5
  from PIL import Image
6
- from io import BytesIO
7
  import torch
8
- from transformers import AutoModelForVision2Seq, AutoProcessor
9
- from fastapi import FastAPI, File, UploadFile
10
- from fastapi.responses import JSONResponse
11
- import uvicorn
12
 
13
- # Configure PyTorch settings
14
  torch.backends.cuda.enable_flash_sdp(False)
15
  torch.backends.cuda.enable_math_sdp(True)
16
  torch.backends.cuda.enable_mem_efficient_sdp(True)
@@ -20,9 +15,6 @@ torch.backends.cuda.enable_mem_efficient_sdp(True)
20
  MODEL_ID = "microsoft/Florence-2-large"
21
  DEVICE = "cpu" # Using CPU instead of GPU
22
 
23
- # Create FastAPI app
24
- app = FastAPI(title="Florence-2 Image Captioning API")
25
-
26
  # Florence-2 Model (will be loaded once)
27
  model = None
28
  processor = None
@@ -39,22 +31,12 @@ def load_florence_model():
39
  try:
40
  log_message("[*] Loading Florence-2 model and processor...")
41
 
42
- # Load model and processor with specific configuration
43
- processor = AutoProcessor.from_pretrained(
44
- MODEL_ID,
45
- trust_remote_code=True,
46
- revision="9a515b7", # Pin to a specific version
47
- )
48
-
49
- model = AutoModelForVision2Seq.from_pretrained(
50
- MODEL_ID,
51
- trust_remote_code=True,
52
- revision="9a515b7", # Pin to a specific version
53
- torch_dtype=torch.float32,
54
- ).to(DEVICE)
55
-
56
  model.eval()
57
- log_message("[ ] Florence-2 loaded and ready.")
 
 
58
  except Exception as e:
59
  log_message(f"[ERROR] Failed to load Florence-2 model: {e}")
60
  raise
@@ -62,7 +44,7 @@ def load_florence_model():
62
  def caption_image(image: Image.Image) -> str:
63
  """Generate detailed caption for an image using Florence-2"""
64
  if model is None or processor is None:
65
- return "Model not loaded."
66
 
67
  task_prompt = "<MORE_DETAILED_CAPTION>"
68
  prompt = task_prompt
@@ -89,52 +71,49 @@ def caption_image(image: Image.Image) -> str:
89
  )
90
 
91
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
 
92
  return generated_text
93
 
94
  except Exception as e:
95
- log_message(f"[!] Caption generation failed: {e}")
96
- return "Captioning error."
97
-
98
- @app.on_event("startup")
99
- async def startup_event():
100
- """Load model on startup"""
101
- load_florence_model()
102
 
103
- @app.post("/caption")
104
- async def create_caption(file: UploadFile = File(...)) -> Dict:
105
- """
106
- API endpoint to receive an image and return its caption
107
- """
108
  try:
109
- log_message(f"[API] Received image: {file.filename}")
110
-
111
- # Read and validate image
112
- contents = await file.read()
113
- image = Image.open(BytesIO(contents)).convert("RGB")
114
-
115
- # Generate caption
116
- log_message(f"[API] Generating caption for {file.filename}")
117
- caption = caption_image(image)
118
-
119
- log_message(f"[API] Caption generated for {file.filename}: {caption[:100]}...")
120
-
121
- return {
122
- "status": "success",
123
- "filename": file.filename,
124
- "caption": caption
125
- }
126
 
 
 
 
 
127
  except Exception as e:
128
  error_msg = f"Error processing image: {str(e)}"
129
  log_message(f"[ERROR] {error_msg}")
130
- return JSONResponse(
131
- status_code=500,
132
- content={
133
- "status": "error",
134
- "message": error_msg
135
- }
136
- )
 
 
 
 
 
 
 
 
 
137
 
138
  if __name__ == "__main__":
139
- log_message("Starting Florence-2 Vision Analysis API Server")
140
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
 
1
  import os
 
2
  import time
 
3
  from PIL import Image
 
4
  import torch
5
+ from transformers import AutoModelForCausalLM, AutoProcessor
6
+ import gradio as gr
 
 
7
 
8
+ # Disable SDPA if not supported
9
  torch.backends.cuda.enable_flash_sdp(False)
10
  torch.backends.cuda.enable_math_sdp(True)
11
  torch.backends.cuda.enable_mem_efficient_sdp(True)
 
15
  MODEL_ID = "microsoft/Florence-2-large"
16
  DEVICE = "cpu" # Using CPU instead of GPU
17
 
 
 
 
18
  # Florence-2 Model (will be loaded once)
19
  model = None
20
  processor = None
 
31
  try:
32
  log_message("[*] Loading Florence-2 model and processor...")
33
 
34
+ # Load model on CPU
35
+ model = AutoModelForCausalLM.from_pretrained(MODEL_ID, trust_remote_code=True).to(DEVICE)
 
 
 
 
 
 
 
 
 
 
 
 
36
  model.eval()
37
+
38
+ processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
39
+ log_message("[ ] Florence-2 loaded and ready on CPU")
40
  except Exception as e:
41
  log_message(f"[ERROR] Failed to load Florence-2 model: {e}")
42
  raise
 
44
  def caption_image(image: Image.Image) -> str:
45
  """Generate detailed caption for an image using Florence-2"""
46
  if model is None or processor is None:
47
+ load_florence_model()
48
 
49
  task_prompt = "<MORE_DETAILED_CAPTION>"
50
  prompt = task_prompt
 
71
  )
72
 
73
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
74
+ log_message(f"[SUCCESS] Generated caption: {generated_text[:100]}...")
75
  return generated_text
76
 
77
  except Exception as e:
78
+ error_msg = f"[!] Caption generation failed: {e}"
79
+ log_message(error_msg)
80
+ return error_msg
 
 
 
 
81
 
82
+ def process_image(input_image):
83
+ """Process image for Gradio interface"""
84
+ if input_image is None:
85
+ return "No image provided"
86
+
87
  try:
88
+ # Convert to PIL Image if needed
89
+ if not isinstance(input_image, Image.Image):
90
+ input_image = Image.fromarray(input_image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
+ log_message("[INFO] Processing new image...")
93
+ caption = caption_image(input_image)
94
+ return caption
95
+
96
  except Exception as e:
97
  error_msg = f"Error processing image: {str(e)}"
98
  log_message(f"[ERROR] {error_msg}")
99
+ return error_msg
100
+
101
+ # Create Gradio interface
102
+ demo = gr.Interface(
103
+ fn=process_image,
104
+ inputs=gr.Image(type="pil", label="Upload Image"),
105
+ outputs=gr.Textbox(label="Generated Caption", lines=3),
106
+ title="Florence-2 Image Captioning",
107
+ description="Upload an image to get a detailed caption generated by Florence-2 model.",
108
+ examples=[
109
+ ["example1.jpg"],
110
+ ["example2.jpg"]
111
+ ],
112
+ cache_examples=True,
113
+ theme=gr.themes.Soft()
114
+ )
115
 
116
  if __name__ == "__main__":
117
+ log_message("Starting Florence-2 Gradio Server")
118
+ # Launch with share=True to get a public URL
119
+ demo.launch(server_name="0.0.0.0", server_port=7860)