daniloedu commited on
Commit
9ffd0bd
Β·
verified Β·
1 Parent(s): 5b561ab

Update to call the Gemma model

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +170 -34
src/streamlit_app.py CHANGED
@@ -1,40 +1,176 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
 
 
 
 
8
 
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
1
  import streamlit as st
2
+ from transformers import AutoProcessor, AutoModelForImageTextToText
3
+ from PIL import Image
4
+ import torch
5
+ import io
6
 
7
+ # Set page config
8
+ st.set_page_config(
9
+ page_title="Gemma-3n E4B Vision-Language Model",
10
+ page_icon="πŸ€–",
11
+ layout="wide"
12
+ )
13
 
14
+ @st.cache_resource
15
+ def load_model():
16
+ """Load the model and processor with caching"""
17
+ try:
18
+ processor = AutoProcessor.from_pretrained("google/gemma-3n-E4B-it")
19
+ model = AutoModelForImageTextToText.from_pretrained(
20
+ "google/gemma-3n-E4B-it",
21
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
22
+ device_map="auto" if torch.cuda.is_available() else "cpu"
23
+ )
24
+ return processor, model
25
+ except Exception as e:
26
+ st.error(f"Error loading model: {str(e)}")
27
+ st.error("Make sure you have access to the model and are logged in to HuggingFace.")
28
+ return None, None
29
 
30
+ def generate_response(processor, model, image, text_prompt, max_tokens=100):
31
+ """Generate response from the model"""
32
+ try:
33
+ # Prepare messages in the expected format
34
+ messages = [
35
+ {
36
+ "role": "user",
37
+ "content": [
38
+ {"type": "image", "image": image},
39
+ {"type": "text", "text": text_prompt}
40
+ ]
41
+ }
42
+ ]
43
+
44
+ # Process inputs
45
+ inputs = processor.apply_chat_template(
46
+ messages,
47
+ add_generation_prompt=True,
48
+ tokenize=True,
49
+ return_dict=True,
50
+ return_tensors="pt",
51
+ ).to(model.device)
52
+
53
+ # Generate response
54
+ with torch.no_grad():
55
+ outputs = model.generate(
56
+ **inputs,
57
+ max_new_tokens=max_tokens,
58
+ do_sample=True,
59
+ temperature=0.7,
60
+ pad_token_id=processor.tokenizer.eos_token_id
61
+ )
62
+
63
+ # Decode response
64
+ response = processor.decode(
65
+ outputs[0][inputs["input_ids"].shape[-1]:],
66
+ skip_special_tokens=True
67
+ )
68
+
69
+ return response
70
+
71
+ except Exception as e:
72
+ return f"Error generating response: {str(e)}"
73
 
74
+ def main():
75
+ st.title("πŸ€– Gemma-3n E4B Vision-Language Model")
76
+ st.markdown("Upload an image and ask questions about it!")
77
+
78
+ # Check if user is authenticated
79
+ st.sidebar.markdown("### πŸ“‹ Setup Instructions")
80
+ st.sidebar.markdown("""
81
+ 1. Make sure you have access to the gated model
82
+ 2. Login to HuggingFace using your token:
83
+ ```bash
84
+ huggingface-cli login
85
+ ```
86
+ 3. Or set your token as an environment variable:
87
+ ```bash
88
+ export HUGGINGFACE_HUB_TOKEN=your_token_here
89
+ ```
90
+ """)
91
+
92
+ # Load model
93
+ with st.spinner("Loading model... This may take a few minutes on first run."):
94
+ processor, model = load_model()
95
+
96
+ if processor is None or model is None:
97
+ st.error("Failed to load model. Please check your setup and try again.")
98
+ return
99
+
100
+ st.success("Model loaded successfully!")
101
+
102
+ # Create two columns
103
+ col1, col2 = st.columns([1, 1])
104
+
105
+ with col1:
106
+ st.subheader("πŸ“€ Input")
107
+
108
+ # Image upload
109
+ uploaded_file = st.file_uploader(
110
+ "Choose an image...",
111
+ type=['png', 'jpg', 'jpeg', 'gif', 'bmp'],
112
+ help="Upload an image to analyze"
113
+ )
114
+
115
+ # Text input
116
+ text_prompt = st.text_area(
117
+ "Ask a question about the image:",
118
+ placeholder="What do you see in this image?",
119
+ height=100
120
+ )
121
+
122
+ # Generation parameters
123
+ max_tokens = st.slider(
124
+ "Max tokens to generate:",
125
+ min_value=10,
126
+ max_value=200,
127
+ value=100,
128
+ help="Maximum number of tokens to generate"
129
+ )
130
+
131
+ # Generate button
132
+ generate_btn = st.button("πŸš€ Generate Response", type="primary")
133
+
134
+ with col2:
135
+ st.subheader("πŸ“€ Output")
136
+
137
+ if uploaded_file is not None:
138
+ # Display uploaded image
139
+ image = Image.open(uploaded_file)
140
+ st.image(image, caption="Uploaded image", use_column_width=True)
141
+
142
+ # Generate response when button is clicked
143
+ if generate_btn:
144
+ if not text_prompt.strip():
145
+ st.warning("Please enter a question about the image.")
146
+ else:
147
+ with st.spinner("Generating response..."):
148
+ response = generate_response(
149
+ processor, model, image, text_prompt, max_tokens
150
+ )
151
+
152
+ st.subheader("πŸ€– Model Response:")
153
+ st.write(response)
154
+ else:
155
+ st.info("πŸ‘† Please upload an image to get started")
156
+
157
+ # Example section
158
+ st.markdown("---")
159
+ st.subheader("πŸ’‘ Example Questions to Try:")
160
+ st.markdown("""
161
+ - What objects do you see in this image?
162
+ - Describe the scene in detail
163
+ - What colors are present in the image?
164
+ - What is the main subject of this image?
165
+ - Can you identify any text in this image?
166
+ """)
167
+
168
+ # Footer
169
+ st.markdown("---")
170
+ st.markdown(
171
+ "Built with ❀️ using [Streamlit](https://streamlit.io) and "
172
+ "[Hugging Face Transformers](https://huggingface.co/transformers/)"
173
+ )
174
 
175
+ if __name__ == "__main__":
176
+ main()