Spaces:

gopalagra
/

blind-image-captioning

Sleeping

gopalagra commited on Sep 3

Commit

739fb9a

verified ·

1 Parent(s): 880b908

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -74,9 +74,46 @@ import torch
 from PIL import Image
 # Load small LLaVA model
-processor = AutoProcessor.from_pretrained("LLaVA/LLaVA-7B-small")
 model = AutoModelForCausalLM.from_pretrained(
-    "LLaVA/LLaVA-7B-small",
     torch_dtype=torch.float16,
     device_map="auto"  # Automatically use GPU if available
 )

 from PIL import Image
 # Load small LLaVA model
+processor = AutoProcessor.from_pretrained("import gradio as gr
+from transformers import AutoProcessor, AutoModelForCausalLM
+import torch
+from PIL import Image
+# Load small LLaVA model
+processor = AutoProcessor.from_pretrained("LLaVA/LLaVA-7B-llm-small")
+model = AutoModelForCausalLM.from_pretrained(
+    "LLaVA/LLaVA-7B-llm-small",
+    torch_dtype=torch.float16,
+    device_map="auto"  # Automatically use GPU if available
+)
+def generate_caption(image):
+    # Convert to PIL if needed
+    if isinstance(image, str):
+        image = Image.open(image).convert("RGB")
+    # Prepare inputs
+    inputs = processor(images=image, return_tensors="pt").to(model.device)
+    # Generate output
+    outputs = model.generate(**inputs, max_new_tokens=50)
+    # Decode result
+    caption = processor.decode(outputs[0], skip_special_tokens=True)
+    return caption
+# Gradio Interface
+interface = gr.Interface(
+    fn=generate_caption,
+    inputs=gr.Image(type="pil"),
+    outputs=gr.Textbox(label="Generated Caption"),
+    title="LLaVA Image Captioning"
+)
+interface.launch()
+")
 model = AutoModelForCausalLM.from_pretrained(
+    "LLaVA/LLaVA-7B-llm-small",
     torch_dtype=torch.float16,
     device_map="auto"  # Automatically use GPU if available
 )