Spaces:
Running
Running
File size: 1,511 Bytes
6151483 289622b b6863da 289622b b6863da 289622b b6863da 289622b b6863da 289622b b6863da |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import gradio as gr
from transformers import AutoProcessor, LlavaForConditionalGeneration
from PIL import Image
import torch
model_id = "llava-hf/llava-1.5-7b-hf"
processor = AutoProcessor.from_pretrained(model_id)
model = LlavaForConditionalGeneration.from_pretrained(
model_id,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
low_cpu_mem_usage=True,
device_map="auto",
)
def chat_with_llava(image, question, history=[]):
if image is None or not question.strip():
history.append([question, "Please provide both an image and a question."])
return history
# Format multimodal prompt
conversation = [
{"role": "user", "content": [
{"type": "text", "text": question},
{"type": "image"}
]}
]
prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
# Encode inputs
inputs = processor(images=image, text=prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=512)
answer = processor.decode(outputs[0], skip_special_tokens=True)
history.append([question, answer])
return history
chat_interface = gr.ChatInterface(
fn=chat_with_llava,
inputs=[gr.Image(type="pil", label="Palm Image"), gr.Textbox(label="Your Question")],
title="🖐️ AI Palm Reader",
description="Upload your palm image and ask a question—LLaVA will respond with a palmistry-style reading."
)
chat_interface.launch()
|