Fred808 commited on
Commit
b87f1bf
·
verified ·
1 Parent(s): d5e0925

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -79
app.py CHANGED
@@ -29,25 +29,15 @@ except subprocess.CalledProcessError as e:
29
  # Determine the device to use
30
  device = "cuda" if torch.cuda.is_available() else "cpu"
31
 
32
- # Load the base model and processor
33
  try:
34
- vision_language_model_base = AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True).to(device).eval()
35
- vision_language_processor_base = AutoProcessor.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True)
36
- print("✓ Base model loaded successfully")
37
  except Exception as e:
38
- print(f"Error loading base model: {e}")
39
- vision_language_model_base = None
40
- vision_language_processor_base = None
41
-
42
- # Load the large model and processor
43
- try:
44
- vision_language_model_large = AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-large', trust_remote_code=True).to(device).eval()
45
- vision_language_processor_large = AutoProcessor.from_pretrained('microsoft/Florence-2-large', trust_remote_code=True)
46
- print("✓ Large model loaded successfully")
47
- except Exception as e:
48
- print(f"Error loading large model: {e}")
49
- vision_language_model_large = None
50
- vision_language_processor_large = None
51
 
52
  def load_image_from_url(image_url):
53
  """Load an image from a URL."""
@@ -88,47 +78,32 @@ def describe_image(uploaded_image, model_choice):
88
  if uploaded_image is None:
89
  return "Please upload an image."
90
 
91
- if model_choice == "Florence-2-base":
92
- if vision_language_model_base is None:
93
- return "Base model failed to load."
94
- model = vision_language_model_base
95
- processor = vision_language_processor_base
96
- elif model_choice == "Florence-2-large":
97
- if vision_language_model_large is None:
98
- return "Large model failed to load."
99
- model = vision_language_model_large
100
- processor = vision_language_processor_large
101
- else:
102
- return "Invalid model choice."
103
 
104
  try:
105
  return process_image_description(model, processor, uploaded_image)
106
  except Exception as e:
107
  return f"Error generating caption: {str(e)}"
108
 
109
- def describe_image_from_url(image_url, model_choice):
110
  """Generate description from image URL."""
111
  try:
112
  if not image_url:
113
  return {"error": "image_url is required"}
114
 
115
- if model_choice not in ["Florence-2-base", "Florence-2-large"]:
116
- return {"error": "Invalid model choice. Use 'Florence-2-base' or 'Florence-2-large'"}
117
-
118
  # Load image from URL
119
  image = load_image_from_url(image_url)
120
 
121
- # Select model and processor
122
- if model_choice == "Florence-2-base":
123
- if vision_language_model_base is None:
124
- return {"error": "Base model not available"}
125
- model = vision_language_model_base
126
- processor = vision_language_processor_base
127
- else:
128
- if vision_language_model_large is None:
129
- return {"error": "Large model not available"}
130
- model = vision_language_model_large
131
- processor = vision_language_processor_large
132
 
133
  # Generate caption
134
  caption = process_image_description(model, processor, image)
@@ -147,7 +122,7 @@ def describe_image_from_url(image_url, model_choice):
147
  IMAGE_SERVER_BASE = os.getenv("IMAGE_SERVER_BASE", " ")
148
  DATA_COLLECTION_BASE = os.getenv("DATA_COLLECTION_BASE", "https://fred808-flow.hf.space")
149
  REQUESTER_ID = os.getenv("FLO_REQUESTER_ID", f"florence-2-{os.getpid()}")
150
- MODEL_CHOICE = os.getenv("FLO_MODEL_CHOICE", "Florence-2-base")
151
 
152
 
153
  def sanitize_name(name: str, max_len: int = 200) -> str:
@@ -259,9 +234,7 @@ def background_worker():
259
  # Wait for model to be ready
260
  waited = 0
261
  while waited < 120:
262
- if MODEL_CHOICE == "Florence-2-base" and vision_language_model_base:
263
- break
264
- elif MODEL_CHOICE == "Florence-2-large" and vision_language_model_large:
265
  break
266
  time.sleep(1)
267
  waited += 1
@@ -344,12 +317,8 @@ def background_worker():
344
  try:
345
  pil_img = Image.open(BytesIO(img_bytes)).convert('RGB')
346
 
347
- if MODEL_CHOICE == "Florence-2-base":
348
- model = vision_language_model_base
349
- processor = vision_language_processor_base
350
- else:
351
- model = vision_language_model_large
352
- processor = vision_language_processor_large
353
 
354
  print(f"[BACKGROUND] Generating caption for {filename}")
355
  caption = process_image_description(model, processor, pil_img)
@@ -410,8 +379,8 @@ async def root():
410
  return {
411
  "name": "Florence-2 Image Captioning Server",
412
  "status": "running",
413
- "model_base": vision_language_model_base is not None,
414
- "model_large": vision_language_model_large is not None,
415
  "device": device
416
  }
417
 
@@ -419,21 +388,12 @@ async def root():
419
  async def health():
420
  return {
421
  "status": "healthy",
422
- "model_base": vision_language_model_base is not None,
423
- "model_large": vision_language_model_large is not None,
424
  "device": device,
425
  "model_choice": MODEL_CHOICE
426
  }
427
 
428
- # Start background worker thread (daemon) so it doesn't block shutdown
429
- def _start_worker_thread():
430
- t = threading.Thread(target=background_worker, daemon=True)
431
- t.start()
432
-
433
- # Start background worker when FastAPI starts
434
- @app.on_event("startup")
435
- async def startup_event():
436
- _start_worker_thread()
437
 
438
 
439
  @app.get("/analyze")
@@ -459,7 +419,6 @@ async def analyze_get(image_url: str = None, model_choice: str = None):
459
  async def analyze_post(file: UploadFile = File(None), model_choice: str = Form(None)):
460
  """Analyze an uploaded image (multipart/form-data). Returns caption JSON."""
461
  try:
462
- mc = model_choice or MODEL_CHOICE
463
  if file is None:
464
  raise HTTPException(status_code=400, detail="file is required")
465
 
@@ -469,17 +428,11 @@ async def analyze_post(file: UploadFile = File(None), model_choice: str = Form(N
469
  except Exception as e:
470
  raise HTTPException(status_code=400, detail=f"Failed to read uploaded image: {e}")
471
 
472
- # Choose model
473
- if mc == "Florence-2-large":
474
- if vision_language_model_large is None:
475
- raise HTTPException(status_code=503, detail="Base model not loaded")
476
- model = vision_language_model_large
477
- processor = vision_language_processor_large
478
- else:
479
- if vision_language_model_large is None:
480
- raise HTTPException(status_code=503, detail="Large model not loaded")
481
- model = vision_language_model_large
482
- processor = vision_language_processor_large
483
 
484
  caption = process_image_description(model, processor, pil_img)
485
  return JSONResponse(content={"success": True, "caption": caption})
 
29
  # Determine the device to use
30
  device = "cuda" if torch.cuda.is_available() else "cpu"
31
 
32
+ # Load Florence-2-large model and processor
33
  try:
34
+ vision_language_model = AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-large', trust_remote_code=True).to(device).eval()
35
+ vision_language_processor = AutoProcessor.from_pretrained('microsoft/Florence-2-large', trust_remote_code=True)
36
+ print("✓ Florence-2-large model loaded successfully")
37
  except Exception as e:
38
+ print(f"Error loading Florence-2-large model: {e}")
39
+ vision_language_model = None
40
+ vision_language_processor = None
 
 
 
 
 
 
 
 
 
 
41
 
42
  def load_image_from_url(image_url):
43
  """Load an image from a URL."""
 
78
  if uploaded_image is None:
79
  return "Please upload an image."
80
 
81
+ if vision_language_model is None:
82
+ return "Florence-2-large model failed to load."
83
+
84
+ model = vision_language_model
85
+ processor = vision_language_processor
 
 
 
 
 
 
 
86
 
87
  try:
88
  return process_image_description(model, processor, uploaded_image)
89
  except Exception as e:
90
  return f"Error generating caption: {str(e)}"
91
 
92
+ def describe_image_from_url(image_url, model_choice=None):
93
  """Generate description from image URL."""
94
  try:
95
  if not image_url:
96
  return {"error": "image_url is required"}
97
 
98
+ if vision_language_model is None:
99
+ return {"error": "Florence-2-large model not available"}
100
+
101
  # Load image from URL
102
  image = load_image_from_url(image_url)
103
 
104
+ # Use the loaded large model
105
+ model = vision_language_model
106
+ processor = vision_language_processor
 
 
 
 
 
 
 
 
107
 
108
  # Generate caption
109
  caption = process_image_description(model, processor, image)
 
122
  IMAGE_SERVER_BASE = os.getenv("IMAGE_SERVER_BASE", " ")
123
  DATA_COLLECTION_BASE = os.getenv("DATA_COLLECTION_BASE", "https://fred808-flow.hf.space")
124
  REQUESTER_ID = os.getenv("FLO_REQUESTER_ID", f"florence-2-{os.getpid()}")
125
+ MODEL_CHOICE = "Florence-2-large" # Always use large model
126
 
127
 
128
  def sanitize_name(name: str, max_len: int = 200) -> str:
 
234
  # Wait for model to be ready
235
  waited = 0
236
  while waited < 120:
237
+ if vision_language_model is not None:
 
 
238
  break
239
  time.sleep(1)
240
  waited += 1
 
317
  try:
318
  pil_img = Image.open(BytesIO(img_bytes)).convert('RGB')
319
 
320
+ model = vision_language_model
321
+ processor = vision_language_processor
 
 
 
 
322
 
323
  print(f"[BACKGROUND] Generating caption for {filename}")
324
  caption = process_image_description(model, processor, pil_img)
 
379
  return {
380
  "name": "Florence-2 Image Captioning Server",
381
  "status": "running",
382
+ "model": "Florence-2-large",
383
+ "model_loaded": vision_language_model is not None,
384
  "device": device
385
  }
386
 
 
388
  async def health():
389
  return {
390
  "status": "healthy",
391
+ "model": "Florence-2-large",
392
+ "model_loaded": vision_language_model is not None,
393
  "device": device,
394
  "model_choice": MODEL_CHOICE
395
  }
396
 
 
 
 
 
 
 
 
 
 
397
 
398
 
399
  @app.get("/analyze")
 
419
  async def analyze_post(file: UploadFile = File(None), model_choice: str = Form(None)):
420
  """Analyze an uploaded image (multipart/form-data). Returns caption JSON."""
421
  try:
 
422
  if file is None:
423
  raise HTTPException(status_code=400, detail="file is required")
424
 
 
428
  except Exception as e:
429
  raise HTTPException(status_code=400, detail=f"Failed to read uploaded image: {e}")
430
 
431
+ if vision_language_model is None:
432
+ raise HTTPException(status_code=503, detail="Florence-2-large model not loaded")
433
+
434
+ model = vision_language_model
435
+ processor = vision_language_processor
 
 
 
 
 
 
436
 
437
  caption = process_image_description(model, processor, pil_img)
438
  return JSONResponse(content={"success": True, "caption": caption})