Spaces:

shingguy1
/

Calorie_Estimator

Sleeping

App Files Files Community

shingguy1 commited on May 18, 2025

Commit

50e8acf

verified ·

1 Parent(s): c5c8acf

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +39 -33

src/streamlit_app.py CHANGED Viewed

@@ -16,7 +16,7 @@ st.set_page_config(
     layout="centered"
 )
 st.title("🍽️ Food Nutrition Estimator")
-st.markdown("Upload a food image and get nutritional information generated by AI!")
 # 2. Environment & cache
 hf_token = os.getenv("HF_TOKEN", None)
@@ -28,7 +28,7 @@ os.environ["HUGGINGFACE_HUB_CACHE"] = cache_dir
 manual_transform = transforms.Compose([
     transforms.Resize(256),
     transforms.CenterCrop(224),
-    transforms.Lambda(lambda img: img.convert("RGB")),  # ensure 3 channels
     transforms.ToTensor(),
     transforms.Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225]),
@@ -39,7 +39,7 @@ manual_transform = transforms.Compose([
 st.sidebar.header("Models Used")
 st.sidebar.markdown("""
 - 🖼️ **Image Classifier**: `shingguy1/fine_tuned_vit`
-- 💬 **Text Generator**: `TinyLlama/TinyLlama-1.1B-Chat-v1.0`
 """)
 # 5. Load models (cached)
@@ -47,35 +47,37 @@ st.sidebar.markdown("""
 def load_models():
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    # ViT for classification
     model_vit = ViTForImageClassification.from_pretrained(
         "shingguy1/fine_tuned_vit",
         cache_dir=cache_dir,
         use_auth_token=hf_token
     ).to(device)
-    # TinyLlama for nutrition text
-    tokenizer = AutoTokenizer.from_pretrained(
-        "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-        cache_dir=cache_dir
     )
     model_llm = AutoModelForCausalLM.from_pretrained(
-        "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
         cache_dir=cache_dir,
-        torch_dtype=torch.float32,
         device_map="auto"
     )
-    return model_vit, tokenizer, model_llm, device
-model_vit, tokenizer, model_llm, device = load_models()
 # 6. Image uploader
 uploaded_file = st.file_uploader("Upload a food image...", type=["jpg", "jpeg", "png"])
 if uploaded_file is not None:
     try:
-        # Load & display image
         image = Image.open(uploaded_file)
         st.image(image, caption="Uploaded Image", use_column_width=True)
@@ -87,42 +89,46 @@ if uploaded_file is not None:
         pred_label = model_vit.config.id2label[pred_idx]
         st.success(f"🍴 Predicted Food: **{pred_label}**")
-        # Prepare LLM prompt
         prompt = (
-            "Provide a concise nutritional overview for a tacos. "
-            "Include serving size, calories, protein, carbs, fat, "
-            "main ingredients, cooking method, and one substitution. "
-            "Answer only the overview—do not repeat this instruction."
         )
         st.subheader("🧾 Nutrition Information")
-        st.write(f"🤖 Prompt to LLM:\n\n{prompt}")
-        # Tokenize & move to device
-        inputs = tokenizer(prompt, return_tensors="pt")
         inputs = {k: v.to(model_llm.device) for k, v in inputs.items()}
         input_len = inputs["input_ids"].shape[1]
-        # Generate with constraints
         outputs = model_llm.generate(
             **inputs,
             max_length=input_len + 150,
-            do_sample=True,
-            temperature=0.8,
             top_p=0.9,
             no_repeat_ngram_size=2,
             early_stopping=True,
-            pad_token_id=tokenizer.eos_token_id,
-            eos_token_id=tokenizer.eos_token_id
         )
-        # Decode generated tokens only
-        gen_ids = outputs[0][input_len:]
-        caption = tokenizer.decode(gen_ids, skip_special_tokens=True).strip()
-        if caption:
-            st.info(caption)
         else:
-            st.error("⚠️ The LLM failed to generate any text.")
     except Exception as e:
         st.error(f"Something went wrong: {e}")

     layout="centered"
 )
 st.title("🍽️ Food Nutrition Estimator")
+st.markdown("Upload a food image and get a nutritional overview generated by an instruction‐tuned LLM!")
 # 2. Environment & cache
 hf_token = os.getenv("HF_TOKEN", None)
 manual_transform = transforms.Compose([
     transforms.Resize(256),
     transforms.CenterCrop(224),
+    transforms.Lambda(lambda img: img.convert("RGB")),
     transforms.ToTensor(),
     transforms.Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225]),
 st.sidebar.header("Models Used")
 st.sidebar.markdown("""
 - 🖼️ **Image Classifier**: `shingguy1/fine_tuned_vit`
+- 💬 **Text Generator**: `tiiuae/falcon-7b-instruct`
 """)
 # 5. Load models (cached)
 def load_models():
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # ViT classifier
     model_vit = ViTForImageClassification.from_pretrained(
         "shingguy1/fine_tuned_vit",
         cache_dir=cache_dir,
         use_auth_token=hf_token
     ).to(device)
+    # Falcon‐7B Instruct LLM
+    tokenizer_llm = AutoTokenizer.from_pretrained(
+        "tiiuae/falcon-7b-instruct",
+        cache_dir=cache_dir,
+        use_auth_token=hf_token
     )
     model_llm = AutoModelForCausalLM.from_pretrained(
+        "tiiuae/falcon-7b-instruct",
         cache_dir=cache_dir,
+        use_auth_token=hf_token,
+        torch_dtype=torch.float16,
         device_map="auto"
     )
+    return model_vit, tokenizer_llm, model_llm, device
+model_vit, tokenizer_llm, model_llm, device = load_models()
 # 6. Image uploader
 uploaded_file = st.file_uploader("Upload a food image...", type=["jpg", "jpeg", "png"])
 if uploaded_file is not None:
     try:
+        # Display image
         image = Image.open(uploaded_file)
         st.image(image, caption="Uploaded Image", use_column_width=True)
         pred_label = model_vit.config.id2label[pred_idx]
         st.success(f"🍴 Predicted Food: **{pred_label}**")
+        # Build a single, unified instruction prompt
         prompt = (
+            "### Instruction\n"
+            f"Provide a concise nutritional overview for a {pred_label}, including:\n"
+            "- Serving size (exact measurements & ingestion guidelines)\n"
+            "- Calories\n"
+            "- Protein, carbohydrates, and fat\n"
+            "- Main ingredients\n"
+            "- Cooking method\n"
+            "- One healthy substitution\n"
+            "### Response"
         )
         st.subheader("🧾 Nutrition Information")
+        st.write(f"🤖 Prompt sent to LLM:\n\n{prompt}")
+        # Tokenize & generate
+        inputs = tokenizer_llm(prompt, return_tensors="pt")
         inputs = {k: v.to(model_llm.device) for k, v in inputs.items()}
         input_len = inputs["input_ids"].shape[1]
         outputs = model_llm.generate(
             **inputs,
             max_length=input_len + 150,
+            temperature=0.7,
             top_p=0.9,
+            do_sample=True,
             no_repeat_ngram_size=2,
             early_stopping=True,
+            pad_token_id=tokenizer_llm.eos_token_id,
+            eos_token_id=tokenizer_llm.eos_token_id
         )
+        # Decode and strip prompt
+        full = tokenizer_llm.decode(outputs[0], skip_special_tokens=True).strip()
+        if full.startswith("### Response"):
+            caption = full.split("### Response", 1)[1].strip()
         else:
+            caption = full[input_len:].strip()
+        st.info(caption or "⚠️ The LLM did not generate any text.")
     except Exception as e:
         st.error(f"Something went wrong: {e}")