Spaces:

WaysAheadGlobal
/

VLM

Sleeping

App Files Files Community

WaysAheadGlobal commited on Jun 28

Commit

621bb5d

verified ·

1 Parent(s): b48f2c7

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -49

app.py CHANGED Viewed

@@ -1,63 +1,31 @@
 # app.py
 import streamlit as st
 from PIL import Image
-import torch
-# ✅ Local TinyLLaVA from real LLaVA repo
-from tinyllava.model.builder import load_pretrained_model
-from tinyllava.utils import disable_torch_init
-from tinyllava.mm_utils import (
-    process_images,
-    tokenizer_image_token,
-    get_model_name_from_path
-)
-# Disable torch default init for faster startup
-disable_torch_init()
-# Load TinyLLaVA 3.1B (best small version)
-MODEL_PATH = "bczhou/TinyLLaVA-3.1B"
-# Loads tokenizer, model, image processor, context length
-tokenizer, model, image_processor, context_len = load_pretrained_model(
-    model_path=MODEL_PATH,
-    model_base=None,
-    model_name="TinyLLaVA-3.1B"
 )
-device = torch.device("cpu")
-model.to(device)
-# Streamlit UI
-st.set_page_config(page_title="TinyLLaVA 3.1B (Streamlit)", layout="centered")
-st.title("🦙 TinyLLaVA 3.1B — Vision-Language Q&A")
-uploaded_file = st.file_uploader("📷 Upload an image", type=["jpg", "png", "jpeg"])
-prompt = st.text_input("💬 Ask a question about the image:")
-if uploaded_file is not None and prompt:
     image = Image.open(uploaded_file).convert("RGB")
-    # Process image
-    image_tensor = process_images([image], image_processor, model.config)
-    image_tensor = image_tensor.to(device)
-    # Build prompt with image tokens
-    prompt_text = tokenizer_image_token(prompt, tokenizer, context_len)
-    inputs = tokenizer([prompt_text])
-    input_ids = torch.tensor(inputs.input_ids).unsqueeze(0).to(device)
-    # Generate
     with st.spinner("Generating answer..."):
-        output_ids = model.generate(
-            input_ids,
-            images=image_tensor,
-            do_sample=True,
-            temperature=0.2,
-            max_new_tokens=200
-        )
-        out_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
     st.subheader("📝 Answer:")
-    st.write(out_text)

 # app.py
 import streamlit as st
+from transformers import pipeline
 from PIL import Image
+import requests
+st.set_page_config(page_title="TinyLLaVA (Streamlit)", layout="centered")
+st.title("🦙 TinyLLaVA — Vision-Language Q&A")
+pipe = pipeline(
+    task="image-to-text",
+    model="bczhou/tiny-llava-v1-hf",
+    trust_remote_code=True,
+    device_map="cpu"
 )
+uploaded_file = st.file_uploader("📷 Upload an image", type=["jpg","png","jpeg"])
+prompt = st.text_input("💬 Ask a question (post `<image>` token):", value="What is happening?")
+if uploaded_file and prompt:
     image = Image.open(uploaded_file).convert("RGB")
+    st.image(image, caption="Uploaded Image", use_column_width=True)
+    query = f"USER: <image>\n{prompt}\nASSISTANT:"
     with st.spinner("Generating answer..."):
+        result = pipe(query, image)
+        answer = result[0]["generated_text"]
     st.subheader("📝 Answer:")
+    st.write(answer)