Spaces:

Maham930
/

Aqib_prototype

Runtime error

App Files Files Community

Maham930 commited on Jan 26

Commit

bd901c8

verified ·

1 Parent(s): 2ba5249

Update app.py

Browse files

Files changed (1) hide show

app.py +108 -24

app.py CHANGED Viewed

@@ -1,54 +1,138 @@
 import streamlit as st
-from PIL import Image
 import torch
 from transformers import Blip2Processor, Blip2ForConditionalGeneration
-st.set_page_config(page_title="Multimodal Image Understanding AI")
-@st.cache_resource
 def load_model():
     processor = Blip2Processor.from_pretrained(
-        "Salesforce/blip2-flan-t5-xl"
     )
     model = Blip2ForConditionalGeneration.from_pretrained(
-        "Salesforce/blip2-flan-t5-xl",
-        torch_dtype=torch.float16,
-        device_map="auto"
     )
     return processor, model
 processor, model = load_model()
-st.title("📸 Multimodal Image Understanding & Storytelling")
-image = st.file_uploader("Upload an image", type=["jpg", "png"])
-def ask(prompt, image):
-    inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda")
-    out = model.generate(**inputs, max_new_tokens=200)
-    return processor.decode(out[0], skip_special_tokens=True)
-if image:
-    img = Image.open(image).convert("RGB")
-    st.image(img, use_column_width=True)
-    caption = ask("Describe this image in one factual sentence.", img)
-    summary = ask("Give a 3–5 line descriptive summary of this image.", img)
-    objects = ask("List the main objects and entities visible in this image.", img)
-    emotion = ask("What emotional tone or mood does this image convey?", img)
-    story = ask("Write a short fictional story (5–10 lines) inspired by this image.", img)
     st.subheader("📝 Caption")
     st.write(caption)
     st.subheader("📄 Summary")
     st.write(summary)
-    st.subheader("📦 Objects Detected")
     st.write(objects)
-    st.subheader("😊 Emotion / Mood")
     st.write(emotion)
-    st.subheader("📖 Story")
     st.write(story)

 import streamlit as st
 import torch
+from PIL import Image
+import os
 from transformers import Blip2Processor, Blip2ForConditionalGeneration
+# --------------------------------------------------
+# STREAMLIT CONFIG (MUST BE FIRST STREAMLIT CALL)
+# --------------------------------------------------
+st.set_page_config(
+    page_title="Multimodal Image Understanding AI",
+    layout="centered"
+)
+st.write("🚀 App is starting...")  # Debug indicator
+# --------------------------------------------------
+# MODEL CONFIG (SAFE FOR HF SPACES)
+# --------------------------------------------------
+MODEL_NAME = "Salesforce/blip2-flan-t5-large"  # NOT XL
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+HF_TOKEN = os.getenv("HF_TOKEN")  # Optional but recommended
+# --------------------------------------------------
+# LOAD MODEL (STREAMLIT-SAFE)
+# --------------------------------------------------
+@st.cache_resource(show_spinner="🔄 Loading AI model (first time only)...")
 def load_model():
     processor = Blip2Processor.from_pretrained(
+        MODEL_NAME,
+        token=HF_TOKEN
     )
     model = Blip2ForConditionalGeneration.from_pretrained(
+        MODEL_NAME,
+        torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
+        device_map="auto" if DEVICE == "cuda" else None,
+        token=HF_TOKEN
     )
+    model.eval()
     return processor, model
 processor, model = load_model()
+# --------------------------------------------------
+# HELPER FUNCTION
+# --------------------------------------------------
+def ask_model(prompt, image):
+    inputs = processor(
+        images=image,
+        text=prompt,
+        return_tensors="pt"
+    ).to(DEVICE)
+    with torch.no_grad():
+        output = model.generate(
+            **inputs,
+            max_new_tokens=150
+        )
+    return processor.decode(output[0], skip_special_tokens=True)
+# --------------------------------------------------
+# UI
+# --------------------------------------------------
+st.title("📸 Multimodal Image Understanding & Storytelling AI")
+st.markdown(
+    """
+Upload an image and the AI will generate:
+- A factual caption
+- A descriptive summary
+- Detected objects
+- Emotional tone
+- A short story
+"""
+)
+image_file = st.file_uploader(
+    "Upload an image",
+    type=["jpg", "jpeg", "png"]
+)
+# --------------------------------------------------
+# IMAGE PROCESSING
+# --------------------------------------------------
+if image_file:
+    image = Image.open(image_file).convert("RGB")
+    st.image(image, caption="Uploaded Image", use_column_width=True)
+    with st.spinner("🧠 Analyzing image..."):
+        caption = ask_model(
+            "Describe this image in one factual sentence.",
+            image
+        )
+        summary = ask_model(
+            "Give a concise 3–5 line descriptive summary of this image.",
+            image
+        )
+        objects = ask_model(
+            "List the main objects and entities visible in this image.",
+            image
+        )
+        emotion = ask_model(
+            "What emotional tone or mood does this image convey?",
+            image
+        )
+        story = ask_model(
+            "Write a short fictional story (5–10 lines) inspired by this image.",
+            image
+        )
+    # --------------------------------------------------
+    # OUTPUT
+    # --------------------------------------------------
     st.subheader("📝 Caption")
     st.write(caption)
     st.subheader("📄 Summary")
     st.write(summary)
+    st.subheader("📦 Detected Objects")
     st.write(objects)
+    st.subheader("😊 Emotional Tone")
     st.write(emotion)
+    st.subheader("📖 Short Story")
     st.write(story)
+else:
+    st.info("⬆️ Upload an image to begin.")