Spaces:

razaali10
/

Llama

Build error

App Files Files Community

razaali10 commited on Apr 22, 2025

Commit

035fa81

verified ·

1 Parent(s): b97295c

Upload 3 files

Browse files

Files changed (3) hide show

ap.py +46 -0
readme.md +16 -0
requirements.txt +4 -0

ap.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import streamlit as st
+from PIL import Image
+import requests
+import torch
+from transformers import MllamaForConditionalGeneration, AutoProcessor
+# Load the model and processor
+@st.cache_resource
+def load_model_and_processor():
+    model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
+    model = MllamaForConditionalGeneration.from_pretrained(
+        model_id, torch_dtype=torch.bfloat16, device_map="auto"
+    )
+    processor = AutoProcessor.from_pretrained(model_id)
+    return model, processor
+model, processor = load_model_and_processor()
+# Title and instructions
+st.title("Llama 3.2 Vision-Instruct")
+st.write("Upload an image and ask a question about it. The model will analyze the image and provide an answer.")
+# File uploader for image
+uploaded_file = st.file_uploader("Upload an Image (JPG/PNG)", type=["jpg", "png", "jpeg"])
+# Text input for the question
+user_question = st.text_input("Enter your question about the image:")
+# Process and respond
+if uploaded_file and user_question:
+    # Display the uploaded image
+    image = Image.open(uploaded_file)
+    st.image(image, caption="Uploaded Image", use_column_width=True)
+    # Prepare input for the model
+    messages = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": user_question}]}]
+    input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
+    inputs = processor(image, input_text, add_special_tokens=False, return_tensors="pt").to(model.device)
+    # Generate the response
+    with st.spinner("Processing..."):
+        output = model.generate(**inputs, max_new_tokens=30)
+        response = processor.decode(output[0])
+    # Display the response
+    st.write(f"**Model's Response:** {response}")

readme.md ADDED Viewed

	@@ -0,0 +1,16 @@

+# Llama 3.2 Vision-Instruct Space
+This application uses the Llama-3.2-11B-Vision-Instruct model to process and answer questions about images.
+Simply upload an image and ask your question in the input field!
+## How It Works
+- Upload an image in JPG or PNG format.
+- Enter your question in the text input box.
+- The model analyzes the image and provides a response.
+## Requirements
+- Python 3.8 or later
+- See `requirements.txt` for dependencies.
+## License
+This application uses the Llama-3.2-11B-Vision-Instruct model under the Llama 3.2 Community License.

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+torch
+transformers>=4.45.0
+Pillow
+streamlit