Spaces:

shingguy1
/

Calorie_Estimator

Sleeping

App Files Files Community

shingguy1 commited on May 18, 2025

Commit

9129b6f

verified ·

1 Parent(s): 94cfed3

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +33 -37

src/streamlit_app.py CHANGED Viewed

@@ -5,36 +5,35 @@ from PIL import Image
 import torchvision.transforms as transforms
 from transformers import (
     ConvNextForImageClassification,
-    Blip2Processor,
-    Blip2ForConditionalGeneration
 )
-# Set Streamlit page config
 st.set_page_config(page_title="🍽️ Food Nutrition Estimator", page_icon="🥗", layout="centered")
-# Use Hugging Face token (for private models if needed)
 hf_token = os.getenv("HF_TOKEN")
-# Set Hugging Face cache directory
 cache_dir = "/tmp/cache"
 os.makedirs(cache_dir, exist_ok=True)
 os.environ["HUGGINGFACE_HUB_CACHE"] = cache_dir
-# Manual transform to match ConvNeXt's preprocessor config
 manual_transform = transforms.Compose([
     transforms.Resize(224),
-    transforms.CenterCrop(196),  # crop_pct: 0.875 * 224
     transforms.ToTensor(),
     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
     transforms.ConvertImageDtype(torch.float32)
 ])
-# Sidebar information
 st.sidebar.header("Models Used")
 st.sidebar.markdown("""
-- 🤖 **Classifier**: `shingguy1/food-calorie-convnext` (ConvNeXt)
-- 🧠 **Captioner**: `Salesforce/blip2-flan-t5-xl` (BLIP-2)
-- 📝 **Description**: Automatically generates nutritional facts based on food image
 """)
 # Load models
@@ -42,31 +41,27 @@ st.sidebar.markdown("""
 def load_models():
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    # Load ConvNeXt
     model_convnext = ConvNextForImageClassification.from_pretrained(
-        "shingguy1/food-calorie-convnext", cache_dir=cache_dir, token=hf_token
     ).to(device)
-    # Load BLIP-2
-    blip_processor = Blip2Processor.from_pretrained(
-        "Salesforce/blip2-flan-t5-xl", cache_dir=cache_dir
-    )
-    blip_model = Blip2ForConditionalGeneration.from_pretrained(
-        "Salesforce/blip2-flan-t5-xl",
         cache_dir=cache_dir,
-        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-        device_map="auto" if torch.cuda.is_available() else None
     )
-    return model_convnext, blip_processor, blip_model, device
-model_convnext, blip_processor, blip_model, device = load_models()
-# Main interface
-st.title("🍽️ Food Nutrition Estimator")
-st.markdown("Upload a food image and get a nutrition description generated by AI!")
-# File uploader
 uploaded_file = st.file_uploader("Upload a food image...", type=["jpg", "jpeg", "png"])
 if uploaded_file is not None:
@@ -74,7 +69,7 @@ if uploaded_file is not None:
         image = Image.open(uploaded_file).convert("RGB")
         st.image(image, caption="Uploaded Image", use_column_width=True)
-        # Classification with ConvNeXt
         input_tensor = manual_transform(image).unsqueeze(0).to(device)
         with torch.no_grad():
             outputs = model_convnext(pixel_values=input_tensor)
@@ -82,15 +77,16 @@ if uploaded_file is not None:
         pred_label = model_convnext.config.id2label[pred_idx]
         st.success(f"🍴 Predicted Food: **{pred_label}**")
-        # Caption generation with BLIP-2
-        st.subheader("🧾 Nutritional Facts (via BLIP-2)")
-        prompt = f"Describe the nutritional facts and calories of {pred_label}"
-        inputs = blip_processor(image, text=prompt, return_tensors="pt").to(device)
         with torch.no_grad():
-            output = blip_model.generate(**inputs, max_new_tokens=100)
-        caption = blip_processor.decode(output[0], skip_special_tokens=True)
         st.info(caption)
     except Exception as e:
@@ -98,4 +94,4 @@ if uploaded_file is not None:
 # Footer
 st.markdown("---")
-st.markdown("Built with ❤️ using Streamlit and Hugging Face by **shingguy1**")

 import torchvision.transforms as transforms
 from transformers import (
     ConvNextForImageClassification,
+    AutoTokenizer,
+    AutoModelForCausalLM
 )
+# Set Streamlit UI
 st.set_page_config(page_title="🍽️ Food Nutrition Estimator", page_icon="🥗", layout="centered")
+st.title("🍽️ Food Nutrition Estimator")
+st.markdown("Upload a food image and get nutritional information generated by AI!")
+# Environment & cache setup
 hf_token = os.getenv("HF_TOKEN")
 cache_dir = "/tmp/cache"
 os.makedirs(cache_dir, exist_ok=True)
 os.environ["HUGGINGFACE_HUB_CACHE"] = cache_dir
+# Transform for ConvNeXt
 manual_transform = transforms.Compose([
     transforms.Resize(224),
+    transforms.CenterCrop(196),
     transforms.ToTensor(),
     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
     transforms.ConvertImageDtype(torch.float32)
 ])
+# Sidebar info
 st.sidebar.header("Models Used")
 st.sidebar.markdown("""
+- 🖼️ **Image Classifier**: `shingguy1/food-calorie-convnext`
+- 💬 **Text Generator**: `TinyLlama/TinyLlama-1.1B-Chat-v1.0`
 """)
 # Load models
 def load_models():
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # ConvNeXt for classification
     model_convnext = ConvNextForImageClassification.from_pretrained(
+        "shingguy1/food-calorie-convnext",
+        cache_dir=cache_dir,
+        token=hf_token
     ).to(device)
+    # TinyLlama for nutritional facts
+    tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", cache_dir=cache_dir)
+    model_llm = AutoModelForCausalLM.from_pretrained(
+        "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
         cache_dir=cache_dir,
+        torch_dtype=torch.float32,
+        device_map="auto"
     )
+    return model_convnext, tokenizer, model_llm, device
+model_convnext, tokenizer, model_llm, device = load_models()
+# Upload image
 uploaded_file = st.file_uploader("Upload a food image...", type=["jpg", "jpeg", "png"])
 if uploaded_file is not None:
         image = Image.open(uploaded_file).convert("RGB")
         st.image(image, caption="Uploaded Image", use_column_width=True)
+        # Predict with ConvNeXt
         input_tensor = manual_transform(image).unsqueeze(0).to(device)
         with torch.no_grad():
             outputs = model_convnext(pixel_values=input_tensor)
         pred_label = model_convnext.config.id2label[pred_idx]
         st.success(f"🍴 Predicted Food: **{pred_label}**")
+        # Generate nutrition caption using TinyLlama
+        prompt = f"Give the calories, macros, and nutritional facts of a {pred_label}."
+        st.subheader("🧾 Nutrition Information")
+        st.write(f"🤖 Prompt: `{prompt}`")
+        input_ids = tokenizer(prompt, return_tensors="pt").to(model_llm.device)
         with torch.no_grad():
+            output = model_llm.generate(**input_ids, max_new_tokens=100)
+        caption = tokenizer.decode(output[0], skip_special_tokens=True)
         st.info(caption)
     except Exception as e:
 # Footer
 st.markdown("---")
+st.markdown("Built with ❤️ using Streamlit and Hugging Face by **shingguy1**")