Spaces:

Wills17
/

Fridge2Dish

Running

App Files Files Community

Wills17 commited on Nov 24, 2025

Commit

d148f3b

verified ·

1 Parent(s): 896c9d4

Update FastAPI_app.py

Browse files

Files changed (1) hide show

FastAPI_app.py +144 -187

FastAPI_app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 # FastAPI application for Fridge2Dish
 # import libraries
 import os
@@ -20,166 +21,32 @@ from fastapi.middleware.cors import CORSMiddleware
 import tensorflow as tf
 import google.generativeai as genai
-# Transformers libraries (Gemma local fallback)
-from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
-import torch
-# Gemma model download status
-GEMMA_STATUS = {
-    "downloading": False,
-    "completed": False,
-    "error": None
-}
-# create presistent storage for Gemma-2b-it model
-LOCAL_GEMMA_DIR = "/data/gemma-2b-it"
-GEMMA_MODEL_NAME = "google/gemma-2b-it"
-# Load ingredients model
 MODEL_PATH = "models/ingredient_model.h5"
-# Protect loading the large local Gemma model by locking.
-_local_lock = threading.Lock()
-_local_generator = None
-# load or download (as applicable) the Gemma model
-def load_or_download_gemma():
-    global _local_generator, GEMMA_STATUS
-    if _local_generator is not None:
-        return _local_generator
-    with _local_lock:
-        if _local_generator is not None:
-            return _local_generator
-        os.makedirs(LOCAL_GEMMA_DIR, exist_ok=True)
-        try:
-            # Mark download start
-            if not os.listdir(LOCAL_GEMMA_DIR):
-                GEMMA_STATUS["downloading"] = True
-                GEMMA_STATUS["completed"] = False
-                GEMMA_STATUS["error"] = None
-                print("\n🟡 Downloading Gemma-2-2b-it from Hugging Face (first run)...")
-                tokenizer = AutoTokenizer.from_pretrained(GEMMA_MODEL_NAME)
-                model = AutoModelForCausalLM.from_pretrained(GEMMA_MODEL_NAME)
-                print("\n🟢 Saving Gemma model to persistent storage…")
-                tokenizer.save_pretrained(LOCAL_GEMMA_DIR)
-                model.save_pretrained(LOCAL_GEMMA_DIR)
-            else:
-                print("\n🔵 Loading Gemma from local cache…")
-                tokenizer = AutoTokenizer.from_pretrained(LOCAL_GEMMA_DIR)
-                model = AutoModelForCausalLM.from_pretrained(LOCAL_GEMMA_DIR)
-            GEMMA_STATUS["downloading"] = False
-            GEMMA_STATUS["completed"] = True
-        except Exception as e:
-            GEMMA_STATUS["downloading"] = False
-            GEMMA_STATUS["completed"] = False
-            GEMMA_STATUS["error"] = str(e)
-            raise e
-        # Choose device: GPU if available, otherwise CPU
-        device = 0 if torch.cuda.is_available() else -1
-        print(f"\n[Gemma] creating pipeline (device={device}) -- this may take a moment")
-        _local_generator = pipeline(
-            "text-generation",
-            model=model,
-            tokenizer=tokenizer,
-            device=device,
-            # reduce returned tokens to keep small responses
-            max_new_tokens=300,
-            do_sample=True,
-            top_p=0.95,
-            temperature=0.7
-        )
-        print("\n\n✅ Gemma ready for generation.")
-        return _local_generator
-# improve LM output by cleaning
-def _clean_generated_text(text: str) -> str:
-    """
-    Basic cleaning of the LM output:
-    - remove obvious leading garbage,
-    - remove repeated lines,
-    - trim long tails after a natural stopping point.
-    """
-    if not text:
-        return ""
-    # If model echoes prompt, try to cut at 'Recipe' or '### Ingredients' or similar markers
-    markers = ["### Ingredients", "### Steps", "Ingredients:", "Steps:", "Recipe"]
-    for m in markers:
-        if m in text:
-            # keep starting at the marker if there is garbage before
-            try:
-                idx = text.index(m)
-                text = text[idx:]
-                break
-            except ValueError:
-                pass
-    # Deduplicate repeated consecutive lines
-    out_lines = []
-    prev = None
-    for line in text.splitlines():
-        s = line.rstrip()
-        if s and s == prev:
-            continue
-        out_lines.append(line)
-        prev = s
-    cleaned = "\n".join(out_lines).strip()
-    # Trim at a long trailing repeated token if present
-    if len(cleaned) > 2000:
-        cleaned = cleaned[:2000].rsplit("\n", 1)[0]
-    return cleaned
-# generate recipe using local Gemma
-def generate_recipe_local_gemma(ingredient_names):
-    """
-    Use local Gemma pipeline to generate a well-formatted recipe in markdown.
-    """
-    gen = load_or_download_gemma()
-    prompt = (
-        "You are a professional chef and recipe writer. Create a concise, well-formatted recipe in Markdown "
-        f"using ONLY the following ingredients: {', '.join(ingredient_names)}.\n\n"
-        "Requirements:\n"
-        "- Start with the recipe title on one line.\n"
-        "- One-sentence description.\n"
-        "- Then a '### Ingredients' section with bullet points and approximate quantities.\n"
-        "- Then a '### Steps' section with 6-8 numbered steps.\n"
-        "- Keep it concise, no filler, no disclaimers, and end after the steps.\n\n"
-        "Output only the recipe in Markdown.\n\nRecipe:\n"
-    )
-    out = gen(prompt, do_sample=True, temperature=0.7, top_p=0.95, max_new_tokens=300, num_return_sequences=1)
-    generated = out[0].get("generated_text", "")
-    # If the model reprints the prompt, remove the leading prompt part:
-    if "Recipe:" in generated:
-        generated = generated.split("Recipe:", 1)[1].strip()
-    cleaned = _clean_generated_text(generated)
-    return cleaned
-# Ingredient detection model loading
 MODEL = tf.keras.models.load_model(MODEL_PATH)
@@ -203,16 +70,111 @@ def infer_image(pil_image):
     img = pil_image.resize((224, 224))
     arr = np.expand_dims(np.array(img) / 255.0, axis=0)
     preds = MODEL.predict(arr)[0]
-    # Top 3 predictions
-    top_idxs = np.argsort(preds)[::-1][:3]
     ingredients = []
     for i in top_idxs:
-        ingredients.append({"name": CLASS_NAMES[i].capitalize(), "confidence": float(preds[i])})
     if not ingredients:
         return [{"name": "Unknown", "confidence": 0.0}]
     return ingredients
 # initialize FastAPI app
 app = FastAPI(
     title="Fridge2Dish",
@@ -237,19 +199,6 @@ app.add_middleware(
 # ROUTES
-# Gemma model download status tracking
-@app.get("/model-status")
-def model_status():
-    """
-    This function reports whether Gemma fallback model is downloaded, downloading, or errored.
-    """
-    return {
-        "downloading": GEMMA_STATUS["downloading"],
-        "completed": GEMMA_STATUS["completed"],
-        "error": GEMMA_STATUS["error"]
-    }
 # Home Route
 @app.get("/", response_class=HTMLResponse)
 def home(request: Request):
@@ -267,28 +216,27 @@ async def upload_image(
         if not file.filename.lower().endswith((".jpg", ".jpeg", ".png")):
             raise HTTPException(status_code=400, detail="Invalid image format.")
-        # read image
         img_bytes = await file.read()
         pil_img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
         # detect ingredients
         start = time.time()
         ingredients = infer_image(pil_img)
-        dur = time.time() - start
-        print(f"Detected ingredients: {ingredients} (took {dur:.2f}s)")
-        ingredient_names = [it["name"] for it in ingredients]
         recipe_text = None
-        api_key = user_api_key.strip()
-        # Try server Gemini if api_key provided
         if api_key:
             try:
-                # Try Gemini first...
                 genai.configure(api_key=api_key)
                 model = genai.GenerativeModel("gemini-2.5-flash")
                 prompt = f"""
                 You are an AI chef. Create a short recipe using only: {', '.join(ingredient_names)}.
                 Include:
@@ -299,30 +247,39 @@ async def upload_image(
                 - Optional fun tips or variations
                 Return results in markdown format.
                 """
                 response = model.generate_content(prompt)
                 recipe_text = response.text.strip()
                 print("\nGemini succeeded.")
-            except Exception as e_gem:
-                # Log and fallback to local Gemma
-                print("Gemini failed or threw exception; falling back to local Gemma:", e_gem)
-                recipe_text = generate_recipe_local_gemma(ingredient_names)
         else:
-            # No API key -> local Gemma
-            print("\nNo API key provided -> Using local Gemma fallback.")
-            recipe_text = generate_recipe_local_gemma(ingredient_names)
-        # Return structured response (ingredients keep confidence)
         return {"ingredients": ingredients, "recipe": recipe_text}
     except HTTPException:
         raise
     except Exception as e:
         traceback.print_exc()
         raise HTTPException(status_code=500, detail=f"Server Error: {str(e)}")
 # Health check
 @app.get("/health")

 # FastAPI application for Fridge2Dish
+# Fallback: OpenChef-3B-v2 (GGUF) via llama-cpp-python
 # import libraries
 import os
 import tensorflow as tf
 import google.generativeai as genai
+# llama-cpp-python for GGUF fallback
+try:
+    from llama_cpp import Llama
+except Exception as e:
+    Llama = None
+    print("Warning: llama_cpp not available. Install llama-cpp-python to use local OpenChef fallback.", e)
+# -----------------------------
+# CONFIG — adjust this path
+# -----------------------------
+# Set LOCAL_GGUF_PATH to the path of your OpenChef-3B-v2 GGUF file that you've
+# uploaded into the repo/persistent storage. Example:
+# LOCAL_GGUF_PATH = "/data/OpenChef-3B-v2.Q4_K_M.gguf"
+#
+# Developer note: replace the value below with the actual uploaded file path.
+LOCAL_GGUF_PATH = "models/OpenChef-3B-v2.Q4_K_M.gguf"
+# -----------------------------
+# Ingredient model (load once)
 MODEL_PATH = "models/ingredient_model.h5"
+if not os.path.exists(MODEL_PATH):
+    raise FileNotFoundError(f"Ingredient model not found at {MODEL_PATH}")
 MODEL = tf.keras.models.load_model(MODEL_PATH)
     img = pil_image.resize((224, 224))
     arr = np.expand_dims(np.array(img) / 255.0, axis=0)
     preds = MODEL.predict(arr)[0]
+    top_idxs = np.argsort(preds)[::-1][:5]
     ingredients = []
     for i in top_idxs:
+        ingredients.append({
+            "name": CLASS_NAMES[i].capitalize(),
+            "confidence": float(preds[i])
+        })
     if not ingredients:
         return [{"name": "Unknown", "confidence": 0.0}]
     return ingredients
+# Protect loading by locking.
+_llama_lock = threading.Lock()
+_llama_model = None
+def load_local_openchef():
+    """Load the OpenChef GGUF via llama-cpp-python. Thread-safe and cached."""
+    global _llama_model
+    if _llama_model is not None:
+        return _llama_model
+    if Llama is None:
+        raise RuntimeError("llama_cpp is not installed. Install 'llama-cpp-python' to use local OpenChef fallback.")
+    with _llama_lock:
+        if _llama_model is not None:
+            return _llama_model
+        if not os.path.exists(LOCAL_GGUF_PATH):
+            # be explicit about missing model
+            raise FileNotFoundError(
+                f"Local OpenChef GGUF not found at {LOCAL_GGUF_PATH}. "
+                "Place the .gguf file there or update LOCAL_GGUF_PATH."
+            )
+        # instantiate; adjust n_ctx if needed
+        print(f"[openchef] Loading GGUF model from {LOCAL_GGUF_PATH} ...")
+        _llama_model = Llama(model_path=LOCAL_GGUF_PATH, n_ctx=2048)
+        print("[openchef] Loaded.")
+        return _llama_model
+def generate_recipe_local_openchef(ingredient_names: list, max_tokens: int = 512, temperature: float = 0.7):
+    """
+    Generate a markdown recipe using the local OpenChef (GGUF).
+    Returns plain text (markdown).
+    """
+    llama = load_local_openchef()
+    # clean ingredient list string
+    ing_str = ", ".join(ingredient_names)
+    prompt = f"""You are a concise AI chef. Use ONLY these ingredients: {ing_str}
+Rules:
+- Title on one line.
+- One-sentence description.
+- "### Ingredients" followed by a bullet list with approximate quantities.
+- "### Steps" followed by 6-8 numbered concise steps.
+- Optionally a "Tip:" line at the end.
+- No extra commentary, no apologias. Return only the recipe in markdown.
+Recipe:
+"""
+    # llama-cpp-python returns dict with 'choices' etc or direct text depending on version
+    # Use completion with stop tokens to keep output concise.
+    try:
+        resp = llama.create(
+            prompt=prompt,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=0.95,
+            stop=["\n\n\n"]
+        )
+    except TypeError:
+        # older/newer llama-cpp-python API differences
+        resp = llama(prompt, max_tokens=max_tokens, temperature=temperature)
+    # extract text
+    # resp may be dict-like: {'choices': [{'text': '...'}], ...}
+    text = ""
+    try:
+        if isinstance(resp, dict) and "choices" in resp:
+            # new style
+            text = resp["choices"][0].get("text", "").strip()
+        elif hasattr(resp, "choices"):
+            text = resp.choices[0].text.strip()
+        elif isinstance(resp, str):
+            text = resp.strip()
+        else:
+            # fallback, str conversion
+            text = str(resp).strip()
+    except Exception:
+        text = str(resp).strip()
+    # sanity clean: if the model repeated the prompt, strip it
+    if text.startswith("Recipe:"):
+        text = text.split("Recipe:", 1)[1].strip()
+    return text
 # initialize FastAPI app
 app = FastAPI(
     title="Fridge2Dish",
 # ROUTES
 # Home Route
 @app.get("/", response_class=HTMLResponse)
 def home(request: Request):
         if not file.filename.lower().endswith((".jpg", ".jpeg", ".png")):
             raise HTTPException(status_code=400, detail="Invalid image format.")
+        # load image
         img_bytes = await file.read()
         pil_img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
         # detect ingredients
         start = time.time()
         ingredients = infer_image(pil_img)
+        end = time.time()
+        print(f"Detected ingredients: {ingredients} (took {end-start:.2f}s)")
+        ingredient_names = [i["name"] for i in ingredients]
         recipe_text = None
+        api_key = (user_api_key or "").strip()
         if api_key:
+            # try Gemini first
             try:
                 genai.configure(api_key=api_key)
                 model = genai.GenerativeModel("gemini-2.5-flash")
                 prompt = f"""
                 You are an AI chef. Create a short recipe using only: {', '.join(ingredient_names)}.
                 Include:
                 - Optional fun tips or variations
                 Return results in markdown format.
                 """
+                print("Trying Gemini...")
                 response = model.generate_content(prompt)
                 recipe_text = response.text.strip()
                 print("\nGemini succeeded.")
+            except Exception as e_gemini:
+                print("\nGemini failed:", e_gemini)
+                # fallback to local OpenChef
+                try:
+                    recipe_text = generate_recipe_local_openchef(ingredient_names)
+                except Exception as e_local:
+                    print("\nLocal OpenChef failed:", e_local)
+                    raise e_local
         else:
+            # no API key: use local OpenChef fallback
+            try:
+                print("\nNo API key provided —> Using local OpenChef fallback.")
+                recipe_text = generate_recipe_local_openchef(ingredient_names)
+            except Exception as e_local:
+                print("Local OpenChef failed:", e_local)
+                raise e_local
         return {"ingredients": ingredients, "recipe": recipe_text}
     except HTTPException:
+        # re-raise known HTTP errors
         raise
     except Exception as e:
         traceback.print_exc()
         raise HTTPException(status_code=500, detail=f"Server Error: {str(e)}")
 # Health check
 @app.get("/health")