File size: 6,181 Bytes
f344f14 057293d f344f14 0c63059 057293d f344f14 0c63059 f344f14 057293d 0c63059 057293d 0c63059 057293d 0c63059 057293d 0c63059 057293d f344f14 057293d 0c63059 f344f14 0c63059 f344f14 0c63059 057293d 0c63059 057293d 0c63059 057293d f344f14 057293d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
# handler.py — thêm các import mới
import os
import io, base64, requests
from typing import Any, Dict, Optional
from PIL import Image, ImageDraw
from ultralytics import YOLO
# ==== Gemini ====
from google import genai
from google.genai import types
def _to_ints(xyxy):
x1, y1, x2, y2 = xyxy
return int(x1), int(y1), int(x2), int(y2)
def _annotate_to_b64(img: Image.Image, boxes_out):
if not boxes_out:
return None
draw = ImageDraw.Draw(img)
for b in boxes_out:
draw.rectangle([(b["x1"], b["y1"]), (b["x2"], b["y2"])], outline=(255, 0, 0), width=3)
buf = io.BytesIO()
img.save(buf, format="PNG")
return base64.b64encode(buf.getvalue()).decode("utf-8")
def _ask_gemini_with_image(image_bytes: bytes, meta: dict) -> str:
api_key = os.getenv("API_KEY", "")
if not api_key:
return ""
# Dùng model PRO qua biến môi trường, default dùng model pro
model_name = os.getenv("MODEL_NAME", "gemini-2.0-pro")
client = genai.Client(api_key=api_key)
context_lines = [
f"YOLO is_diseased: {meta.get('is_diseased')}",
f"YOLO max_confidence: {meta.get('max_confidence')}",
f"YOLO num_detections: {meta.get('num_detections')}",
f"Image size: {meta.get('image_width')}x{meta.get('image_height')}",
f"threshold_conf: {meta.get('threshold_conf')}",
]
for i, b in enumerate(meta.get("boxes", []), 1):
context_lines.append(
f"Box#{i}: ({b['x1']},{b['y1']},{b['x2']},{b['y2']}), conf={b['conf']}"
)
system_instruction = (
"You are a plant pathology assistant for Brassica (bok choy). "
"Analyze the annotated image (rectangles show suspicious regions). "
"Be concise and avoid over-diagnosis beyond visible evidence. "
"Also, re-evaluate based on your own understanding and not 100% on the data provided."
)
user_prompt = (
"Given the annotated image of bok choy leaves, determine whether disease signs are present.\n"
"If sick, describe the visible symptoms and suggest possible illnesses with a brief explanation. Also answer in one sentence\n"
"Context from detector:\n" + "\n".join(context_lines) + "\n"
"Finally transform result to vietnamese. Only reply result vietnamese (not include english)"
"Please answer clearly with just one sentence."
)
try:
resp = client.models.generate_content(
model=model_name,
contents=[
system_instruction,
types.Part.from_bytes(data=image_bytes, mime_type="image/png"),
user_prompt,
],
)
return (resp.text or "").strip()
except Exception:
return ""
class EndpointHandler:
def __init__(self, path: str = ""):
self.model = YOLO(f"{path}/best.pt")
def _load_image(self, data: Dict) -> Image.Image:
if "inputs" in data and isinstance(data["inputs"], dict):
data = data["inputs"]
if isinstance(data, dict) and data.get("image_url"):
img_bytes = requests.get(data["image_url"], timeout=15).content
return Image.open(io.BytesIO(img_bytes)).convert("RGB")
if isinstance(data, dict) and data.get("image_base64"):
img_bytes = base64.b64decode(data["image_base64"])
return Image.open(io.BytesIO(img_bytes)).convert("RGB")
if isinstance(data, (bytes, bytearray)):
return Image.open(io.BytesIO(data)).convert("RGB")
raise ValueError("No image provided. Use 'image_url' or 'image_base64' or raw bytes.")
def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
# conf mặc định 0.5 nếu không truyền
conf = 0.5
if isinstance(data, dict):
# chấp nhận ở root hoặc trong "inputs"
if "conf" in data:
conf = float(data["conf"])
elif "inputs" in data and isinstance(data["inputs"], dict) and "conf" in data["inputs"]:
conf = float(data["inputs"]["conf"])
# 1) Load ảnh
img = self._load_image(data)
W, H = img.width, img.height
# 2) Predict
results = self.model.predict(img, conf=conf, verbose=False)
r = results[0]
# 3) Build boxes theo schemas.py
boxes_out = []
max_conf = 0.0
if r.boxes is not None:
for b in r.boxes:
xyxy = b.xyxy[0].tolist()
confv = float(b.conf[0])
x1, y1, x2, y2 = _to_ints(xyxy)
boxes_out.append({
"cls": "diseased",
"conf": confv,
"x1": x1, "y1": y1, "x2": x2, "y2": y2,
})
if confv > max_conf:
max_conf = confv
is_diseased = len(boxes_out) > 0
# 4) Annotate ảnh -> base64 (nếu có box)
annotated_b64: Optional[str] = _annotate_to_b64(img.copy(), boxes_out) if is_diseased else None
# 5) Gọi Gemini (model PRO) nếu có bệnh và có ảnh annotated
gemini_text = ""
if is_diseased and annotated_b64:
meta = {
"is_diseased": is_diseased,
"max_confidence": max_conf,
"num_detections": len(boxes_out),
"image_width": W,
"image_height": H,
"threshold_conf": conf,
"boxes": boxes_out,
}
try:
gemini_text = _ask_gemini_with_image(base64.b64decode(annotated_b64), meta)
except Exception:
gemini_text = ""
return {
"is_diseased": is_diseased,
"max_confidence": max_conf,
"num_detections": len(boxes_out),
"image_width": W,
"image_height": H,
"threshold_conf": conf,
"boxes": boxes_out,
"prediction_text": gemini_text,
}
|