Spaces:
Running
Running
| import gradio as gr | |
| from transformers import AutoProcessor, LlavaForConditionalGeneration | |
| from PIL import Image | |
| import torch | |
| model_id = "llava-hf/llava-1.5-7b-hf" | |
| processor = AutoProcessor.from_pretrained(model_id) | |
| model = LlavaForConditionalGeneration.from_pretrained( | |
| model_id, | |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
| low_cpu_mem_usage=True, | |
| device_map="auto", | |
| ) | |
| def chat_with_llava(image, question, history=[]): | |
| if image is None or not question.strip(): | |
| history.append([question, "Please provide both an image and a question."]) | |
| return history | |
| # Format multimodal prompt | |
| conversation = [ | |
| {"role": "user", "content": [ | |
| {"type": "text", "text": question}, | |
| {"type": "image"} | |
| ]} | |
| ] | |
| prompt = processor.apply_chat_template(conversation, add_generation_prompt=True) | |
| # Encode inputs | |
| inputs = processor(images=image, text=prompt, return_tensors="pt").to(model.device) | |
| outputs = model.generate(**inputs, max_new_tokens=512) | |
| answer = processor.decode(outputs[0], skip_special_tokens=True) | |
| history.append([question, answer]) | |
| return history | |
| chat_interface = gr.ChatInterface( | |
| fn=chat_with_llava, | |
| inputs=[gr.Image(type="pil", label="Palm Image"), gr.Textbox(label="Your Question")], | |
| title="🖐️ AI Palm Reader", | |
| description="Upload your palm image and ask a question—LLaVA will respond with a palmistry-style reading." | |
| ) | |
| chat_interface.launch() | |