Spaces:
Build error
Build error
| # app.py | |
| import gradio as gr | |
| from tinyllava.model.builder import load_pretrained_model | |
| from tinyllava.utils import disable_torch_init | |
| from tinyllava.mm_utils import process_images, tokenizer_image_token, get_model_name_from_path | |
| import torch | |
| from PIL import Image | |
| # --- Disable unnecessary torch init --- | |
| disable_torch_init() | |
| # --- Load TinyLLaVA 3.1B --- | |
| model_path = "bczhou/TinyLLaVA-3.1B" # official HF ID | |
| tokenizer, model, image_processor, context_len = load_pretrained_model( | |
| model_path=model_path, | |
| model_base=None, # If you have a base model, point it here; else leave as is | |
| model_name="TinyLLaVA-3.1B" | |
| ) | |
| device = torch.device("cpu") | |
| model.to(device) | |
| # --- Gradio handler --- | |
| def describe_image(image, prompt): | |
| # TinyLLaVA wants PIL | |
| image = Image.fromarray(image) | |
| image_tensor = process_images([image], image_processor, model.config) | |
| image_tensor = image_tensor.to(device) | |
| prompt = tokenizer_image_token(prompt, tokenizer, context_len) | |
| inputs = tokenizer([prompt]) | |
| input_ids = torch.tensor(inputs.input_ids).unsqueeze(0).to(device) | |
| with torch.no_grad(): | |
| output_ids = model.generate( | |
| input_ids, | |
| images=image_tensor, | |
| do_sample=True, | |
| temperature=0.2, | |
| max_new_tokens=200 | |
| ) | |
| out_text = tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
| return out_text | |
| iface = gr.Interface( | |
| fn=describe_image, | |
| inputs=[ | |
| gr.Image(type="numpy", label="Image"), | |
| gr.Textbox(label="Your question", placeholder="What's happening in this image?") | |
| ], | |
| outputs=gr.Textbox(label="TinyLLaVA Answer"), | |
| title="π¦ TinyLLaVA-3.1B β Vision-Language Q&A", | |
| description="A lightweight LLaVA variant that runs on CPU Spaces. Upload an image, ask a question." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |