Spaces:

daniloedu
/

GemmaTest

Sleeping

App Files Files Community

daniloedu commited on Aug 5, 2025

Commit

20a8ad7

verified ·

1 Parent(s): 5996fc9

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +26 -36

src/streamlit_app.py CHANGED Viewed

@@ -1,8 +1,12 @@
 import streamlit as st
-from transformers import AutoProcessor, AutoModelForImageTextToText
 from PIL import Image
 import torch
-import io
 # Set page config
 st.set_page_config(
@@ -13,21 +17,22 @@ st.set_page_config(
 @st.cache_resource
 def load_model():
-    """Load the model and processor with caching"""
     try:
-        processor = AutoProcessor.from_pretrained("google/gemma-3n-E4B-it")
-        model = AutoModelForImageTextToText.from_pretrained(
-            "google/gemma-3n-E4B-it",
             torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
             device_map="auto" if torch.cuda.is_available() else "cpu"
         )
-        return processor, model
     except Exception as e:
         st.error(f"Error loading model: {str(e)}")
         st.error("Make sure you have access to the model and are logged in to HuggingFace.")
-        return None, None
-def generate_response(processor, model, image, text_prompt, max_tokens=100):
     """Generate response from the model"""
     try:
         # Prepare messages in the expected format
@@ -41,32 +46,17 @@ def generate_response(processor, model, image, text_prompt, max_tokens=100):
             }
         ]
-        # Process inputs
-        inputs = processor.apply_chat_template(
-            messages,
-            add_generation_prompt=True,
-            tokenize=True,
-            return_dict=True,
-            return_tensors="pt",
-        ).to(model.device)
-        # Generate response
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=max_tokens,
-                do_sample=True,
-                temperature=0.7,
-                pad_token_id=processor.tokenizer.eos_token_id
-            )
-        # Decode response
-        response = processor.decode(
-            outputs[0][inputs["input_ids"].shape[-1]:],
-            skip_special_tokens=True
-        )
-        return response
     except Exception as e:
         return f"Error generating response: {str(e)}"
@@ -91,9 +81,9 @@ def main():
     # Load model
     with st.spinner("Loading model... This may take a few minutes on first run."):
-        processor, model = load_model()
-    if processor is None or model is None:
         st.error("Failed to load model. Please check your setup and try again.")
         return
@@ -146,7 +136,7 @@ def main():
                 else:
                     with st.spinner("Generating response..."):
                         response = generate_response(
-                            processor, model, image, text_prompt, max_tokens
                         )
                     st.subheader("🤖 Model Response:")

 import streamlit as st
+from transformers import pipeline
 from PIL import Image
 import torch
+import os
+# Set cache directory to avoid permission issues
+os.environ["TRANSFORMERS_CACHE"] = "/tmp/transformers_cache"
+os.environ["HF_HOME"] = "/tmp/hf_home"
 # Set page config
 st.set_page_config(
 @st.cache_resource
 def load_model():
+    """Load the model pipeline with caching"""
     try:
+        # Use pipeline approach which is more compatible
+        pipe = pipeline(
+            "image-text-to-text",
+            model="google/gemma-3n-E4B-it",
             torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
             device_map="auto" if torch.cuda.is_available() else "cpu"
         )
+        return pipe
     except Exception as e:
         st.error(f"Error loading model: {str(e)}")
         st.error("Make sure you have access to the model and are logged in to HuggingFace.")
+        return None
+def generate_response(pipe, image, text_prompt, max_tokens=100):
     """Generate response from the model"""
     try:
         # Prepare messages in the expected format
             }
         ]
+        # Generate response using pipeline
+        response = pipe(messages, max_new_tokens=max_tokens)
+        # Extract text from response
+        if isinstance(response, list) and len(response) > 0:
+            if isinstance(response[0], dict) and 'generated_text' in response[0]:
+                return response[0]['generated_text']
+            elif isinstance(response[0], str):
+                return response[0]
+        return str(response)
     except Exception as e:
         return f"Error generating response: {str(e)}"
     # Load model
     with st.spinner("Loading model... This may take a few minutes on first run."):
+        pipe = load_model()
+    if pipe is None:
         st.error("Failed to load model. Please check your setup and try again.")
         return
                 else:
                     with st.spinner("Generating response..."):
                         response = generate_response(
+                            pipe, image, text_prompt, max_tokens
                         )
                     st.subheader("🤖 Model Response:")