gopalagra commited on
Commit
739fb9a
·
verified ·
1 Parent(s): 880b908

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -2
app.py CHANGED
@@ -74,9 +74,46 @@ import torch
74
  from PIL import Image
75
 
76
  # Load small LLaVA model
77
- processor = AutoProcessor.from_pretrained("LLaVA/LLaVA-7B-small")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  model = AutoModelForCausalLM.from_pretrained(
79
- "LLaVA/LLaVA-7B-small",
80
  torch_dtype=torch.float16,
81
  device_map="auto" # Automatically use GPU if available
82
  )
 
74
  from PIL import Image
75
 
76
  # Load small LLaVA model
77
+ processor = AutoProcessor.from_pretrained("import gradio as gr
78
+ from transformers import AutoProcessor, AutoModelForCausalLM
79
+ import torch
80
+ from PIL import Image
81
+
82
+ # Load small LLaVA model
83
+ processor = AutoProcessor.from_pretrained("LLaVA/LLaVA-7B-llm-small")
84
+ model = AutoModelForCausalLM.from_pretrained(
85
+ "LLaVA/LLaVA-7B-llm-small",
86
+ torch_dtype=torch.float16,
87
+ device_map="auto" # Automatically use GPU if available
88
+ )
89
+
90
+ def generate_caption(image):
91
+ # Convert to PIL if needed
92
+ if isinstance(image, str):
93
+ image = Image.open(image).convert("RGB")
94
+
95
+ # Prepare inputs
96
+ inputs = processor(images=image, return_tensors="pt").to(model.device)
97
+
98
+ # Generate output
99
+ outputs = model.generate(**inputs, max_new_tokens=50)
100
+
101
+ # Decode result
102
+ caption = processor.decode(outputs[0], skip_special_tokens=True)
103
+ return caption
104
+
105
+ # Gradio Interface
106
+ interface = gr.Interface(
107
+ fn=generate_caption,
108
+ inputs=gr.Image(type="pil"),
109
+ outputs=gr.Textbox(label="Generated Caption"),
110
+ title="LLaVA Image Captioning"
111
+ )
112
+
113
+ interface.launch()
114
+ ")
115
  model = AutoModelForCausalLM.from_pretrained(
116
+ "LLaVA/LLaVA-7B-llm-small",
117
  torch_dtype=torch.float16,
118
  device_map="auto" # Automatically use GPU if available
119
  )