Ishgan commited on
Commit
8c1f0af
Β·
verified Β·
1 Parent(s): 0aa5f77

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -0
app.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import LlavaProcessor, LlavaForConditionalGeneration
3
+ import gradio as gr
4
+ from PIL import Image
5
+
6
+ # Load LLaVA model and processor
7
+ model_id = "llava-hf/llava-1.5-7b"
8
+ processor = LlavaProcessor.from_pretrained(model_id)
9
+ model = LlavaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
10
+
11
+ # Function to generate captions
12
+ def generate_caption(image, prompt="Describe this image."):
13
+ inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda")
14
+ output = model.generate(**inputs, max_new_tokens=50)
15
+ return processor.batch_decode(output, skip_special_tokens=True)[0]
16
+
17
+ # Gradio UI
18
+ demo = gr.Interface(fn=generate_caption, inputs=[gr.Image(type="pil"), gr.Textbox()], outputs="text")
19
+ demo.launch()