Spaces:

shingguy1
/

Calorie_Estimator

Sleeping

App Files Files Community

shingguy1 commited on May 18, 2025

Commit

404eb80

verified ·

1 Parent(s): 2ce966d

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +23 -22

src/streamlit_app.py CHANGED Viewed

@@ -1,24 +1,24 @@
 import streamlit as st
 import torch
 import os
 from transformers import (
     ConvNextForImageClassification,
-    BlipProcessor,
-    BlipForConditionalGeneration
 )
-from PIL import Image
-import torchvision.transforms as transforms
-# Page setup
 st.set_page_config(page_title="🍽️ Food Nutrition Estimator", page_icon="🥗", layout="centered")
-# Environment & cache
 hf_token = os.getenv("HF_TOKEN")
 cache_dir = "/tmp/cache"
 os.makedirs(cache_dir, exist_ok=True)
 os.environ["HUGGINGFACE_HUB_CACHE"] = cache_dir
-# Image preprocessing based on ConvNeXt preprocessor config
 manual_transform = transforms.Compose([
     transforms.Resize(224),
     transforms.CenterCrop(196),
@@ -30,32 +30,33 @@ manual_transform = transforms.Compose([
 # Sidebar Info
 st.sidebar.header("Model Info")
 st.sidebar.markdown("""
-- 🔍 **Classifier**: ConvNeXt (`shingguy1/food-calorie-convnext`)
-- 🧠 **Captioner**: BLIP (`Salesforce/blip-image-captioning-base`)
-- 🧾 **Caption Output**: Calories, macros, and nutritional description
 """)
 # Load models
 @st.cache_resource
 def load_models():
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     model_convnext = ConvNextForImageClassification.from_pretrained(
         "shingguy1/food-calorie-convnext", cache_dir=cache_dir, token=hf_token
     ).to(device)
-    blip_processor = BlipProcessor.from_pretrained(
-        "Salesforce/blip-image-captioning-base", cache_dir=cache_dir
     )
-    blip_model = BlipForConditionalGeneration.from_pretrained(
-        "Salesforce/blip-image-captioning-base", cache_dir=cache_dir
-    ).to(device)
     return model_convnext, blip_processor, blip_model, device
 model_convnext, blip_processor, blip_model, device = load_models()
-# Image upload
 uploaded_file = st.file_uploader("Upload a food image (jpg/png)...", type=["jpg", "jpeg", "png"])
 if uploaded_file is not None:
@@ -63,7 +64,7 @@ if uploaded_file is not None:
         image = Image.open(uploaded_file).convert("RGB")
         st.image(image, caption="Uploaded Image", use_column_width=True)
-        # Preprocess and predict
         input_tensor = manual_transform(image).unsqueeze(0).to(device)
         with torch.no_grad():
             outputs = model_convnext(pixel_values=input_tensor)
@@ -71,13 +72,13 @@ if uploaded_file is not None:
         pred_label = model_convnext.config.id2label[pred_idx]
         st.success(f"🍴 Predicted Food: **{pred_label}**")
-        # Generate nutrition caption with BLIP
-        st.subheader("🧾 Nutritional Facts (via BLIP)")
         prompt = f"Describe the nutritional facts and calories of {pred_label}"
-        inputs = blip_processor(image, prompt, return_tensors="pt").to(device)
         with torch.no_grad():
-            output = blip_model.generate(**inputs, max_length=128)
         caption = blip_processor.decode(output[0], skip_special_tokens=True)
         st.info(caption)

 import streamlit as st
 import torch
 import os
+from PIL import Image
+import torchvision.transforms as transforms
 from transformers import (
     ConvNextForImageClassification,
+    Blip2Processor,
+    Blip2ForConditionalGeneration
 )
+# Streamlit setup
 st.set_page_config(page_title="🍽️ Food Nutrition Estimator", page_icon="🥗", layout="centered")
+# Environment setup
 hf_token = os.getenv("HF_TOKEN")
 cache_dir = "/tmp/cache"
 os.makedirs(cache_dir, exist_ok=True)
 os.environ["HUGGINGFACE_HUB_CACHE"] = cache_dir
+# Manual transform for ConvNeXt
 manual_transform = transforms.Compose([
     transforms.Resize(224),
     transforms.CenterCrop(196),
 # Sidebar Info
 st.sidebar.header("Model Info")
 st.sidebar.markdown("""
+- 🤖 **Classifier**: ConvNeXt (`shingguy1/food-calorie-convnext`)
+- 🧠 **Captioner**: BLIP-2 (`Salesforce/blip2-opt-2.7b`)
+- 📋 **Output**: Nutrition facts and calorie descriptions
 """)
 # Load models
 @st.cache_resource
 def load_models():
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model_convnext = ConvNextForImageClassification.from_pretrained(
         "shingguy1/food-calorie-convnext", cache_dir=cache_dir, token=hf_token
     ).to(device)
+    blip_processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b", cache_dir=cache_dir)
+    blip_model = Blip2ForConditionalGeneration.from_pretrained(
+        "Salesforce/blip2-opt-2.7b",
+        cache_dir=cache_dir,
+        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+        device_map="auto" if torch.cuda.is_available() else None
     )
     return model_convnext, blip_processor, blip_model, device
 model_convnext, blip_processor, blip_model, device = load_models()
+# Upload image
 uploaded_file = st.file_uploader("Upload a food image (jpg/png)...", type=["jpg", "jpeg", "png"])
 if uploaded_file is not None:
         image = Image.open(uploaded_file).convert("RGB")
         st.image(image, caption="Uploaded Image", use_column_width=True)
+        # ConvNeXt classification
         input_tensor = manual_transform(image).unsqueeze(0).to(device)
         with torch.no_grad():
             outputs = model_convnext(pixel_values=input_tensor)
         pred_label = model_convnext.config.id2label[pred_idx]
         st.success(f"🍴 Predicted Food: **{pred_label}**")
+        # BLIP-2 generation
+        st.subheader("🧾 Nutritional Facts (via BLIP-2)")
         prompt = f"Describe the nutritional facts and calories of {pred_label}"
+        inputs = blip_processor(image, text=prompt, return_tensors="pt").to(device)
         with torch.no_grad():
+            output = blip_model.generate(**inputs, max_new_tokens=100)
         caption = blip_processor.decode(output[0], skip_special_tokens=True)
         st.info(caption)