File size: 6,181 Bytes

# handler.py — thêm các import mới
import os
import io, base64, requests
from typing import Any, Dict, Optional
from PIL import Image, ImageDraw
from ultralytics import YOLO

# ==== Gemini ====
from google import genai
from google.genai import types

def _to_ints(xyxy):
    x1, y1, x2, y2 = xyxy
    return int(x1), int(y1), int(x2), int(y2)

def _annotate_to_b64(img: Image.Image, boxes_out):
    if not boxes_out:
        return None
    draw = ImageDraw.Draw(img)
    for b in boxes_out:
        draw.rectangle([(b["x1"], b["y1"]), (b["x2"], b["y2"])], outline=(255, 0, 0), width=3)
    buf = io.BytesIO()
    img.save(buf, format="PNG")
    return base64.b64encode(buf.getvalue()).decode("utf-8")

def _ask_gemini_with_image(image_bytes: bytes, meta: dict) -> str:
    api_key = os.getenv("API_KEY", "")
    if not api_key:
        return ""
    # Dùng model PRO qua biến môi trường, default dùng model pro
    model_name = os.getenv("MODEL_NAME", "gemini-2.0-pro")

    client = genai.Client(api_key=api_key)

    context_lines = [
        f"YOLO is_diseased: {meta.get('is_diseased')}",
        f"YOLO max_confidence: {meta.get('max_confidence')}",
        f"YOLO num_detections: {meta.get('num_detections')}",
        f"Image size: {meta.get('image_width')}x{meta.get('image_height')}",
        f"threshold_conf: {meta.get('threshold_conf')}",
    ]
    for i, b in enumerate(meta.get("boxes", []), 1):
        context_lines.append(
            f"Box#{i}: ({b['x1']},{b['y1']},{b['x2']},{b['y2']}), conf={b['conf']}"
        )

    system_instruction = (
        "You are a plant pathology assistant for Brassica (bok choy). "
        "Analyze the annotated image (rectangles show suspicious regions). "
        "Be concise and avoid over-diagnosis beyond visible evidence. "
        "Also, re-evaluate based on your own understanding and not 100% on the data provided."
    )
    user_prompt = (
        "Given the annotated image of bok choy leaves, determine whether disease signs are present.\n"
        "If sick, describe the visible symptoms and suggest possible illnesses with a brief explanation. Also answer in one sentence\n"
        "Context from detector:\n" + "\n".join(context_lines) + "\n"
        "Finally transform result to vietnamese. Only reply result vietnamese (not include english)"
        "Please answer clearly with just one sentence."
    )

    try:
        resp = client.models.generate_content(
            model=model_name,
            contents=[
                system_instruction,
                types.Part.from_bytes(data=image_bytes, mime_type="image/png"),
                user_prompt,
            ],
        )
        return (resp.text or "").strip()
    except Exception:
        return ""

class EndpointHandler:
    def __init__(self, path: str = ""):
        self.model = YOLO(f"{path}/best.pt")

    def _load_image(self, data: Dict) -> Image.Image:
        if "inputs" in data and isinstance(data["inputs"], dict):
            data = data["inputs"]

        if isinstance(data, dict) and data.get("image_url"):
            img_bytes = requests.get(data["image_url"], timeout=15).content
            return Image.open(io.BytesIO(img_bytes)).convert("RGB")

        if isinstance(data, dict) and data.get("image_base64"):
            img_bytes = base64.b64decode(data["image_base64"])
            return Image.open(io.BytesIO(img_bytes)).convert("RGB")

        if isinstance(data, (bytes, bytearray)):
            return Image.open(io.BytesIO(data)).convert("RGB")

        raise ValueError("No image provided. Use 'image_url' or 'image_base64' or raw bytes.")

    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
        # conf mặc định 0.5 nếu không truyền
        conf = 0.5
        if isinstance(data, dict):
            # chấp nhận ở root hoặc trong "inputs"
            if "conf" in data:
                conf = float(data["conf"])
            elif "inputs" in data and isinstance(data["inputs"], dict) and "conf" in data["inputs"]:
                conf = float(data["inputs"]["conf"])

        # 1) Load ảnh
        img = self._load_image(data)
        W, H = img.width, img.height

        # 2) Predict
        results = self.model.predict(img, conf=conf, verbose=False)
        r = results[0]

        # 3) Build boxes theo schemas.py
        boxes_out = []
        max_conf = 0.0

        if r.boxes is not None:
            for b in r.boxes:
                xyxy = b.xyxy[0].tolist()      
                confv = float(b.conf[0])
                x1, y1, x2, y2 = _to_ints(xyxy)
                boxes_out.append({
                    "cls": "diseased",      
                    "conf": confv,
                    "x1": x1, "y1": y1, "x2": x2, "y2": y2,
                })
                if confv > max_conf:
                    max_conf = confv

        is_diseased = len(boxes_out) > 0

        # 4) Annotate ảnh -> base64 (nếu có box)
        annotated_b64: Optional[str] = _annotate_to_b64(img.copy(), boxes_out) if is_diseased else None

        # 5) Gọi Gemini (model PRO) nếu có bệnh và có ảnh annotated
        gemini_text = ""
        if is_diseased and annotated_b64:
            meta = {
                "is_diseased": is_diseased,
                "max_confidence": max_conf,
                "num_detections": len(boxes_out),
                "image_width": W,
                "image_height": H,
                "threshold_conf": conf,
                "boxes": boxes_out,
            }
            try:
                gemini_text = _ask_gemini_with_image(base64.b64decode(annotated_b64), meta)
            except Exception:
                gemini_text = ""

        return {
            "is_diseased": is_diseased,
            "max_confidence": max_conf,
            "num_detections": len(boxes_out),
            "image_width": W,
            "image_height": H,
            "threshold_conf": conf,
            "boxes": boxes_out,
            "prediction_text": gemini_text,
        }