Spaces:

gopalagra
/

blind-image-captioning

Sleeping

gopalagra commited on Sep 3

Commit

0576f19

verified ·

1 Parent(s): 739fb9a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -74,15 +74,9 @@ import torch
 from PIL import Image
 # Load small LLaVA model
-processor = AutoProcessor.from_pretrained("import gradio as gr
-from transformers import AutoProcessor, AutoModelForCausalLM
-import torch
-from PIL import Image
-# Load small LLaVA model
-processor = AutoProcessor.from_pretrained("LLaVA/LLaVA-7B-llm-small")
 model = AutoModelForCausalLM.from_pretrained(
-    "LLaVA/LLaVA-7B-llm-small",
     torch_dtype=torch.float16,
     device_map="auto"  # Automatically use GPU if available
 )
@@ -111,36 +105,6 @@ interface = gr.Interface(
 )
 interface.launch()
-")
-model = AutoModelForCausalLM.from_pretrained(
-    "LLaVA/LLaVA-7B-llm-small",
-    torch_dtype=torch.float16,
-    device_map="auto"  # Automatically use GPU if available
-)
-def generate_caption(image):
-    # Convert to PIL if needed
-    if isinstance(image, str):
-        image = Image.open(image).convert("RGB")
-    # Prepare inputs
-    inputs = processor(images=image, return_tensors="pt").to(model.device)
-    # Generate output
-    outputs = model.generate(**inputs, max_new_tokens=50)
-    # Decode result
-    caption = processor.decode(outputs[0], skip_special_tokens=True)
-    return caption
-# Gradio Interface
-interface = gr.Interface(
-    fn=generate_caption,
-    inputs=gr.Image(type="pil"),
-    outputs=gr.Textbox(label="Generated Caption"),
-    title="LLaVA Image Captioning"
-)
-interface.launch()

 from PIL import Image
 # Load small LLaVA model
+processor = AutoProcessor.from_pretrained("llava/LLaVA-7B-llm-small")
 model = AutoModelForCausalLM.from_pretrained(
+    "llava/LLaVA-7B-llm-small",
     torch_dtype=torch.float16,
     device_map="auto"  # Automatically use GPU if available
 )
 )
 interface.launch()