Spaces:

Wills17
/

Fridge2Dish

Running

App Files Files Community

Wills17 commited on Nov 28, 2025

Commit

c1d85a4

verified ·

1 Parent(s): 15e9a5a

Update FastAPI_app.py

Browse files

Files changed (1) hide show

FastAPI_app.py +86 -8

FastAPI_app.py CHANGED Viewed

@@ -20,11 +20,11 @@ from fastapi.middleware.cors import CORSMiddleware
 import torch
 import tensorflow as tf
 import google.generativeai as genai
-from transformers import AutoTokenizer, AutoModelForCausalLM
 # Ingredient model (load once)
-MODEL_PATH = "models/ingredient_model.h5"
 if not os.path.exists(MODEL_PATH):
     raise FileNotFoundError(f"Ingredient model not found at {MODEL_PATH}")
@@ -53,7 +53,38 @@ def timeout_handler(signum, frame):
 _lock = threading.Lock()
 _tokenizer = None
 _model = None
 def load_Qwen():
     global _tokenizer, _model
     if _model is not None:
@@ -70,8 +101,7 @@ def load_Qwen():
             return _tokenizer, _model
         except TimeoutError:
-            print("\n🔴 [Fallback] Qwen load timed out.")
-            raise RuntimeError("\n🔴 Model load failed.")
 def generate_recipe_qwen(ingredient_names):
@@ -126,7 +156,7 @@ def infer_image(pil_image):
     img = pil_image.resize((224, 224))
     arr = np.expand_dims(np.array(img) / 255.0, axis=0)
     preds = MODEL.predict(arr)[0]
-    top_idxs = np.argsort(preds)[::-1][:5]
     ingredients = []
     for i in top_idxs:
         ingredients.append({"name": CLASS_NAMES[i].capitalize(), "confidence": float(preds[i])})
@@ -136,12 +166,60 @@ def infer_image(pil_image):
     return ingredients
 # initialize FastAPI app
 app = FastAPI(
     title="Fridge2Dish",
     description="Upload an image → Detect ingredients → Generate recipes",
-    version="3.0.0"
 )
 # static and templates
@@ -173,9 +251,9 @@ async def detect_ingredients(file: UploadFile = File(...)):
     img_bytes = await file.read()
     pil_img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
-    ingredients = infer_image(pil_img)
     end = time.time()
-    print(f"Detected ingredients: {ingredients} (⌛ Took {end-start:.2f}s)")
     return {"ingredients": ingredients}

 import torch
 import tensorflow as tf
 import google.generativeai as genai
+from transformers import AutoTokenizer, AutoModelForCausalLM, AutoProcessor
 # Ingredient model (load once)
+MODEL_PATH = "models/ingredient_model_2.h5"
 if not os.path.exists(MODEL_PATH):
     raise FileNotFoundError(f"Ingredient model not found at {MODEL_PATH}")
 _lock = threading.Lock()
 _tokenizer = None
 _model = None
+_florence_processor = None
+_florence_model = None
+_florence_lock = threading.Lock()
+# Florence2 detection first time function
+def load_florence2():
+    global _florence_processor, _florence_model
+    if _florence_model is not None:
+        return _florence_processor, _florence_model
+    with _florence_lock:
+        if _florence_model is not None:
+            return _florence_processor, _florence_model
+        try:
+            print("\n🔵 Loading Florence-2 for accurate detection...")
+            _florence_processor = AutoProcessor.from_pretrained("microsoft/Florence-2-base", trust_remote_code=True)
+            _florence_model = AutoModelForCausalLM.from_pretrained(
+                "microsoft/Florence-2-base",
+                torch_dtype=torch.float16,
+                device_map="auto",
+                trust_remote_code=True)
+        except TimeoutError:
+            raise RuntimeError("\n🔴 [Fallback] Florence load timed out.")
+        print("\n🟢 Florence-2 ready!\n")
+        return _florence_processor, _florence_model
+# Qwen fallback first time function
 def load_Qwen():
     global _tokenizer, _model
     if _model is not None:
             return _tokenizer, _model
         except TimeoutError:
+            raise RuntimeError("\n🔴 [Fallback] Qwen load timed out.")
 def generate_recipe_qwen(ingredient_names):
     img = pil_image.resize((224, 224))
     arr = np.expand_dims(np.array(img) / 255.0, axis=0)
     preds = MODEL.predict(arr)[0]
+    top_idxs = np.argsort(preds)[::-1][:3]
     ingredients = []
     for i in top_idxs:
         ingredients.append({"name": CLASS_NAMES[i].capitalize(), "confidence": float(preds[i])})
     return ingredients
+# Florence2 infer function
+def infer_image2(pil_image):
+    """
+    Uses Florence-2 for zero-shot object detection — detects real fridge items accurately.
+    Returns: [{"name": "Banana", "confidence": 0.95}, ...] (top 5, confidence estimated from model output)
+    """
+    processor, model = load_florence2()
+    prompt = "<OD>"  # Florence-2's magic prompt for detecting all objects
+    # Process image
+    inputs = processor(text=prompt, images=pil_image, return_tensors="pt")
+    # Generate detection
+    with torch.no_grad():
+        outputs = model.generate(
+            input_ids=inputs["input_ids"],
+            pixel_values=inputs["pixel_values"],
+            max_new_tokens=100,
+            do_sample=False,
+            num_beams=3
+        )
+    # Parse output (Florence-2 returns "<OD> <LOC>object1<LOC> <LOC>object2<LOC>...")
+    generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0]
+    parsed = processor.post_process_generation(
+        generated_text,
+        task=prompt,
+        image_size=(pil_image.width, pil_image.height)
+    )
+    # Extract detected objects (top 5, with estimated confidence based on parsing)
+    detected_objects = parsed.get("<OD>", [])
+    ingredients = []
+    for obj in detected_objects[:5]:
+        name = obj.get("labels", [obj.get("label", "Unknown")])[0] if isinstance(obj.get("labels"), list) else obj.get("label", "Unknown")
+        # Since Florence-2 doesn't output confidence, estimate (0.9+ for strong detections)
+        conf = 0.95 if len(detected_objects) > 1 else 0.70  # Simple heuristic
+        ingredients.append({"name": name.capitalize(), "confidence": conf})
+    if not ingredients:
+        return [{"name": "Unknown", "confidence": 0.0}]
+    return ingredients
 # initialize FastAPI app
 app = FastAPI(
     title="Fridge2Dish",
     description="Upload an image → Detect ingredients → Generate recipes",
+    version="4.0.0"
 )
 # static and templates
     img_bytes = await file.read()
     pil_img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
+    ingredients = infer_image2(pil_img)
     end = time.time()
+    print(f"Top 3 Detected ingredients: {ingredients} (⌛ Took {end-start:.2f}s)")
     return {"ingredients": ingredients}