Update app.py
Browse files
app.py
CHANGED
|
@@ -5,7 +5,7 @@ from PIL import Image
|
|
| 5 |
from io import BytesIO
|
| 6 |
from fastapi import FastAPI, HTTPException
|
| 7 |
from pydantic import BaseModel, HttpUrl
|
| 8 |
-
from transformers import AutoProcessor,
|
| 9 |
import uvicorn
|
| 10 |
|
| 11 |
# ===== CONFIG =====
|
|
@@ -35,10 +35,9 @@ print("[INFO] Loading Florence-2 model on CPU...")
|
|
| 35 |
try:
|
| 36 |
MODEL_ID = "microsoft/Florence-2-large"
|
| 37 |
processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
|
| 38 |
-
model =
|
| 39 |
MODEL_ID,
|
| 40 |
-
trust_remote_code=True
|
| 41 |
-
torch_dtype=torch.float32
|
| 42 |
).to(DEVICE).eval()
|
| 43 |
print("[INFO] Model loaded successfully!")
|
| 44 |
except Exception as e:
|
|
@@ -88,9 +87,7 @@ def analyze_image(image: Image.Image, task: str = "<MORE_DETAILED_CAPTION>") ->
|
|
| 88 |
inputs = processor(
|
| 89 |
text=task,
|
| 90 |
images=image,
|
| 91 |
-
return_tensors="pt"
|
| 92 |
-
padding=True,
|
| 93 |
-
truncation=True
|
| 94 |
).to(DEVICE)
|
| 95 |
|
| 96 |
# Generate caption
|
|
@@ -98,27 +95,15 @@ def analyze_image(image: Image.Image, task: str = "<MORE_DETAILED_CAPTION>") ->
|
|
| 98 |
generated_ids = model.generate(
|
| 99 |
input_ids=inputs["input_ids"],
|
| 100 |
pixel_values=inputs["pixel_values"],
|
| 101 |
-
max_new_tokens=1024,
|
| 102 |
num_beams=3,
|
| 103 |
-
do_sample=False
|
| 104 |
-
repetition_penalty=1.2 # Helps avoid repetitive outputs
|
| 105 |
)
|
| 106 |
|
| 107 |
-
# Decode
|
| 108 |
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
| 109 |
print(f"[DEBUG] Generated text: {generated_text}")
|
| 110 |
-
|
| 111 |
-
generated_text,
|
| 112 |
-
task=task,
|
| 113 |
-
image_size=RESIZE_DIM
|
| 114 |
-
)
|
| 115 |
-
|
| 116 |
-
if result is None:
|
| 117 |
-
print("[ERROR] Post-processing returned None. The model may not have generated a valid output for the given task.")
|
| 118 |
-
raise ValueError("Post-processing returned None. The model may not have generated a valid output for the given task.")
|
| 119 |
-
|
| 120 |
-
print(f"[DEBUG] Post-processed result: {result}")
|
| 121 |
-
return result.get(task, "No caption generated.")
|
| 122 |
|
| 123 |
except Exception as e:
|
| 124 |
print(f"[ERROR] Exception in analyze_image: {e}")
|
|
|
|
| 5 |
from io import BytesIO
|
| 6 |
from fastapi import FastAPI, HTTPException
|
| 7 |
from pydantic import BaseModel, HttpUrl
|
| 8 |
+
from transformers import AutoProcessor, AutoModelForCausalLM
|
| 9 |
import uvicorn
|
| 10 |
|
| 11 |
# ===== CONFIG =====
|
|
|
|
| 35 |
try:
|
| 36 |
MODEL_ID = "microsoft/Florence-2-large"
|
| 37 |
processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
|
| 38 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 39 |
MODEL_ID,
|
| 40 |
+
trust_remote_code=True
|
|
|
|
| 41 |
).to(DEVICE).eval()
|
| 42 |
print("[INFO] Model loaded successfully!")
|
| 43 |
except Exception as e:
|
|
|
|
| 87 |
inputs = processor(
|
| 88 |
text=task,
|
| 89 |
images=image,
|
| 90 |
+
return_tensors="pt"
|
|
|
|
|
|
|
| 91 |
).to(DEVICE)
|
| 92 |
|
| 93 |
# Generate caption
|
|
|
|
| 95 |
generated_ids = model.generate(
|
| 96 |
input_ids=inputs["input_ids"],
|
| 97 |
pixel_values=inputs["pixel_values"],
|
| 98 |
+
max_new_tokens=1024,
|
| 99 |
num_beams=3,
|
| 100 |
+
do_sample=False
|
|
|
|
| 101 |
)
|
| 102 |
|
| 103 |
+
# Decode
|
| 104 |
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
| 105 |
print(f"[DEBUG] Generated text: {generated_text}")
|
| 106 |
+
return generated_text.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
except Exception as e:
|
| 109 |
print(f"[ERROR] Exception in analyze_image: {e}")
|