Spaces:

daniloedu
/

GemmaTest

Sleeping

App Files Files Community

daniloedu commited on Aug 5, 2025

Commit

9ffd0bd

verified ·

1 Parent(s): 5b561ab

Update to call the Gemma model

Browse files

Files changed (1) hide show

src/streamlit_app.py +170 -34

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,176 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import streamlit as st
+from transformers import AutoProcessor, AutoModelForImageTextToText
+from PIL import Image
+import torch
+import io
+# Set page config
+st.set_page_config(
+    page_title="Gemma-3n E4B Vision-Language Model",
+    page_icon="🤖",
+    layout="wide"
+)
+@st.cache_resource
+def load_model():
+    """Load the model and processor with caching"""
+    try:
+        processor = AutoProcessor.from_pretrained("google/gemma-3n-E4B-it")
+        model = AutoModelForImageTextToText.from_pretrained(
+            "google/gemma-3n-E4B-it",
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+            device_map="auto" if torch.cuda.is_available() else "cpu"
+        )
+        return processor, model
+    except Exception as e:
+        st.error(f"Error loading model: {str(e)}")
+        st.error("Make sure you have access to the model and are logged in to HuggingFace.")
+        return None, None
+def generate_response(processor, model, image, text_prompt, max_tokens=100):
+    """Generate response from the model"""
+    try:
+        # Prepare messages in the expected format
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image", "image": image},
+                    {"type": "text", "text": text_prompt}
+                ]
+            }
+        ]
+        # Process inputs
+        inputs = processor.apply_chat_template(
+            messages,
+            add_generation_prompt=True,
+            tokenize=True,
+            return_dict=True,
+            return_tensors="pt",
+        ).to(model.device)
+        # Generate response
+        with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=max_tokens,
+                do_sample=True,
+                temperature=0.7,
+                pad_token_id=processor.tokenizer.eos_token_id
+            )
+        # Decode response
+        response = processor.decode(
+            outputs[0][inputs["input_ids"].shape[-1]:],
+            skip_special_tokens=True
+        )
+        return response
+    except Exception as e:
+        return f"Error generating response: {str(e)}"
+def main():
+    st.title("🤖 Gemma-3n E4B Vision-Language Model")
+    st.markdown("Upload an image and ask questions about it!")
+    # Check if user is authenticated
+    st.sidebar.markdown("### 📋 Setup Instructions")
+    st.sidebar.markdown("""
+    1. Make sure you have access to the gated model
+    2. Login to HuggingFace using your token:
+       ```bash
+       huggingface-cli login
+       ```
+    3. Or set your token as an environment variable:
+       ```bash
+       export HUGGINGFACE_HUB_TOKEN=your_token_here
+       ```
+    """)
+    # Load model
+    with st.spinner("Loading model... This may take a few minutes on first run."):
+        processor, model = load_model()
+    if processor is None or model is None:
+        st.error("Failed to load model. Please check your setup and try again.")
+        return
+    st.success("Model loaded successfully!")
+    # Create two columns
+    col1, col2 = st.columns([1, 1])
+    with col1:
+        st.subheader("📤 Input")
+        # Image upload
+        uploaded_file = st.file_uploader(
+            "Choose an image...",
+            type=['png', 'jpg', 'jpeg', 'gif', 'bmp'],
+            help="Upload an image to analyze"
+        )
+        # Text input
+        text_prompt = st.text_area(
+            "Ask a question about the image:",
+            placeholder="What do you see in this image?",
+            height=100
+        )
+        # Generation parameters
+        max_tokens = st.slider(
+            "Max tokens to generate:",
+            min_value=10,
+            max_value=200,
+            value=100,
+            help="Maximum number of tokens to generate"
+        )
+        # Generate button
+        generate_btn = st.button("🚀 Generate Response", type="primary")
+    with col2:
+        st.subheader("📤 Output")
+        if uploaded_file is not None:
+            # Display uploaded image
+            image = Image.open(uploaded_file)
+            st.image(image, caption="Uploaded image", use_column_width=True)
+            # Generate response when button is clicked
+            if generate_btn:
+                if not text_prompt.strip():
+                    st.warning("Please enter a question about the image.")
+                else:
+                    with st.spinner("Generating response..."):
+                        response = generate_response(
+                            processor, model, image, text_prompt, max_tokens
+                        )
+                    st.subheader("🤖 Model Response:")
+                    st.write(response)
+        else:
+            st.info("👆 Please upload an image to get started")
+    # Example section
+    st.markdown("---")
+    st.subheader("💡 Example Questions to Try:")
+    st.markdown("""
+    - What objects do you see in this image?
+    - Describe the scene in detail
+    - What colors are present in the image?
+    - What is the main subject of this image?
+    - Can you identify any text in this image?
+    """)
+    # Footer
+    st.markdown("---")
+    st.markdown(
+        "Built with ❤️ using [Streamlit](https://streamlit.io) and "
+        "[Hugging Face Transformers](https://huggingface.co/transformers/)"
+    )
+if __name__ == "__main__":
+    main()