Spaces:

Maham930
/

Aqib_prototype

Runtime error

App Files Files Community

Maham930 commited on Jan 26

Commit

e4866f8

verified ·

1 Parent(s): eb666b5

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -99

app.py CHANGED Viewed

@@ -1,124 +1,77 @@
 import streamlit as st
-import torch
 from PIL import Image
-import os
 from transformers import Blip2Processor, Blip2ForConditionalGeneration
-# --------------------------------------------------
-# STREAMLIT CONFIG (MUST BE FIRST STREAMLIT CALL)
-# --------------------------------------------------
-st.set_page_config(
-    page_title="Multimodal Image Understanding AI",
-    layout="centered"
 )
-st.write("🚀 App is starting...")  # Debug indicator
-# --------------------------------------------------
-# MODEL CONFIG (SAFE FOR HF SPACES)
-# --------------------------------------------------
-MODEL_NAME = "TeichAI/Qwen3-4B-Instruct-2507-Gemini-3-Pro-Preview-Distill-GGUF"  # NOT XL
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-HF_TOKEN = os.getenv("HF_TOKEN")  # Optional but recommended
-# --------------------------------------------------
-# LOAD MODEL (STREAMLIT-SAFE)
-# --------------------------------------------------
-@st.cache_resource(show_spinner="🔄 Loading AI model (first time only)...")
 def load_model():
-    processor = Blip2Processor.from_pretrained(
-        MODEL_NAME,
-        token=HF_TOKEN
-    )
     model = Blip2ForConditionalGeneration.from_pretrained(
         MODEL_NAME,
-        torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
-        device_map="auto" if DEVICE == "cuda" else None,
         token=HF_TOKEN
     )
     model.eval()
     return processor, model
 processor, model = load_model()
-# --------------------------------------------------
-# HELPER FUNCTION
-# --------------------------------------------------
-def ask_model(prompt, image):
-    inputs = processor(
-        images=image,
-        text=prompt,
-        return_tensors="pt"
-    ).to(DEVICE)
-    with torch.no_grad():
-        output = model.generate(
-            **inputs,
-            max_new_tokens=150
-        )
-    return processor.decode(output[0], skip_special_tokens=True)
-# --------------------------------------------------
-# UI
-# --------------------------------------------------
-st.title("📸 Multimodal Image Understanding & Storytelling AI")
-st.markdown(
-    """
-Upload an image and the AI will generate:
-- A factual caption
-- A descriptive summary
-- Detected objects
-- Emotional tone
-- A short story
-"""
-)
-image_file = st.file_uploader(
-    "Upload an image",
-    type=["jpg", "jpeg", "png"]
-)
-# --------------------------------------------------
-# IMAGE PROCESSING
-# --------------------------------------------------
-if image_file:
     image = Image.open(image_file).convert("RGB")
-    st.image(image, caption="Uploaded Image", use_column_width=True)
-    with st.spinner("🧠 Analyzing image..."):
-        caption = ask_model(
-            "Describe this image in one factual sentence.",
-            image
-        )
-        summary = ask_model(
-            "Give a concise 3–5 line descriptive summary of this image.",
-            image
-        )
-        objects = ask_model(
-            "List the main objects and entities visible in this image.",
-            image
-        )
-        emotion = ask_model(
-            "What emotional tone or mood does this image convey?",
-            image
-        )
-        story = ask_model(
-            "Write a short fictional story (5–10 lines) inspired by this image.",
-            image
-        )
-    # --------------------------------------------------
-    # OUTPUT
-    # --------------------------------------------------
     st.subheader("📝 Caption")
     st.write(caption)
@@ -135,4 +88,4 @@ if image_file:
     st.write(story)
 else:
-    st.info("⬆️ Upload an image to begin.")

 import streamlit as st
 from PIL import Image
+import torch
 from transformers import Blip2Processor, Blip2ForConditionalGeneration
+import os
+# -----------------------
+# Streamlit config
+# -----------------------
+st.set_page_config(page_title="Multimodal Image Understanding AI", layout="centered")
+st.title("📸 Multimodal Image Understanding & Storytelling AI")
+st.markdown(
+    "Upload an image or use live camera, and get:\n"
+    "- Caption\n"
+    "- Summary\n"
+    "- Detected objects\n"
+    "- Emotion/mood\n"
+    "- Short story inspired by the image"
 )
+# -----------------------
+# Model settings
+# -----------------------
+MODEL_NAME = "Salesforce/blip2-flan-t5-large"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+HF_TOKEN = os.getenv("HF_TOKEN")  # Add HF_TOKEN as secret in Spaces (recommended)
+@st.cache_resource(show_spinner="🔄 Loading AI model, please wait...")
 def load_model():
+    processor = Blip2Processor.from_pretrained(MODEL_NAME, use_fast=False, token=HF_TOKEN)
     model = Blip2ForConditionalGeneration.from_pretrained(
         MODEL_NAME,
+        torch_dtype=torch.float16 if DEVICE=="cuda" else torch.float32,
+        device_map="auto" if DEVICE=="cuda" else None,
         token=HF_TOKEN
     )
     model.eval()
     return processor, model
 processor, model = load_model()
+# -----------------------
+# Image input
+# -----------------------
+image_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
+camera_image = st.camera_input("Or take a live picture")
+image = None
+if camera_image:
+    image = Image.open(camera_image).convert("RGB")
+elif image_file:
     image = Image.open(image_file).convert("RGB")
+if image:
+    st.image(image, caption="Your Image", use_column_width=True)
+    # -----------------------
+    # Helper function
+    # -----------------------
+    def ask_model(prompt):
+        inputs = processor(images=image, text=prompt, return_tensors="pt").to(DEVICE)
+        out = model.generate(**inputs, max_new_tokens=150)
+        return processor.decode(out[0], skip_special_tokens=True)
+    with st.spinner("🧠 Analyzing image..."):
+        caption = ask_model("Describe this image in one factual sentence.")
+        summary = ask_model("Give a concise 3–5 line descriptive summary of this image.")
+        objects = ask_model("List the main objects and entities visible in this image.")
+        emotion = ask_model("Detect the emotional tone or mood of this image (happy, calm, tense, etc.).")
+        story = ask_model("Write a short story (5–10 lines) inspired by this image.")
+    # -----------------------
+    # Output
+    # -----------------------
     st.subheader("📝 Caption")
     st.write(caption)
     st.write(story)
 else:
+    st.info("⬆️ Upload an image or use the camera above to begin.")