WaysAheadGlobal commited on
Commit
621bb5d
Β·
verified Β·
1 Parent(s): b48f2c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -49
app.py CHANGED
@@ -1,63 +1,31 @@
1
  # app.py
2
 
3
  import streamlit as st
 
4
  from PIL import Image
5
- import torch
6
 
7
- # βœ… Local TinyLLaVA from real LLaVA repo
8
- from tinyllava.model.builder import load_pretrained_model
9
- from tinyllava.utils import disable_torch_init
10
- from tinyllava.mm_utils import (
11
- process_images,
12
- tokenizer_image_token,
13
- get_model_name_from_path
14
- )
15
-
16
- # Disable torch default init for faster startup
17
- disable_torch_init()
18
 
19
- # Load TinyLLaVA 3.1B (best small version)
20
- MODEL_PATH = "bczhou/TinyLLaVA-3.1B"
21
-
22
- # Loads tokenizer, model, image processor, context length
23
- tokenizer, model, image_processor, context_len = load_pretrained_model(
24
- model_path=MODEL_PATH,
25
- model_base=None,
26
- model_name="TinyLLaVA-3.1B"
27
  )
28
 
29
- device = torch.device("cpu")
30
- model.to(device)
31
-
32
- # Streamlit UI
33
- st.set_page_config(page_title="TinyLLaVA 3.1B (Streamlit)", layout="centered")
34
- st.title("πŸ¦™ TinyLLaVA 3.1B β€” Vision-Language Q&A")
35
 
36
- uploaded_file = st.file_uploader("πŸ“· Upload an image", type=["jpg", "png", "jpeg"])
37
- prompt = st.text_input("πŸ’¬ Ask a question about the image:")
38
-
39
- if uploaded_file is not None and prompt:
40
  image = Image.open(uploaded_file).convert("RGB")
 
41
 
42
- # Process image
43
- image_tensor = process_images([image], image_processor, model.config)
44
- image_tensor = image_tensor.to(device)
45
-
46
- # Build prompt with image tokens
47
- prompt_text = tokenizer_image_token(prompt, tokenizer, context_len)
48
- inputs = tokenizer([prompt_text])
49
- input_ids = torch.tensor(inputs.input_ids).unsqueeze(0).to(device)
50
-
51
- # Generate
52
  with st.spinner("Generating answer..."):
53
- output_ids = model.generate(
54
- input_ids,
55
- images=image_tensor,
56
- do_sample=True,
57
- temperature=0.2,
58
- max_new_tokens=200
59
- )
60
- out_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
61
 
62
  st.subheader("πŸ“ Answer:")
63
- st.write(out_text)
 
1
  # app.py
2
 
3
  import streamlit as st
4
+ from transformers import pipeline
5
  from PIL import Image
6
+ import requests
7
 
8
+ st.set_page_config(page_title="TinyLLaVA (Streamlit)", layout="centered")
9
+ st.title("πŸ¦™ TinyLLaVA β€” Vision-Language Q&A")
 
 
 
 
 
 
 
 
 
10
 
11
+ pipe = pipeline(
12
+ task="image-to-text",
13
+ model="bczhou/tiny-llava-v1-hf",
14
+ trust_remote_code=True,
15
+ device_map="cpu"
 
 
 
16
  )
17
 
18
+ uploaded_file = st.file_uploader("πŸ“· Upload an image", type=["jpg","png","jpeg"])
19
+ prompt = st.text_input("πŸ’¬ Ask a question (post `<image>` token):", value="What is happening?")
 
 
 
 
20
 
21
+ if uploaded_file and prompt:
 
 
 
22
  image = Image.open(uploaded_file).convert("RGB")
23
+ st.image(image, caption="Uploaded Image", use_column_width=True)
24
 
25
+ query = f"USER: <image>\n{prompt}\nASSISTANT:"
 
 
 
 
 
 
 
 
 
26
  with st.spinner("Generating answer..."):
27
+ result = pipe(query, image)
28
+ answer = result[0]["generated_text"]
 
 
 
 
 
 
29
 
30
  st.subheader("πŸ“ Answer:")
31
+ st.write(answer)