Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from PIL import Image | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| # ========================= | |
| # Model config | |
| # ========================= | |
| MODEL_ID = "vikhyatk/moondream2" | |
| REVISION = None | |
| DEVICE = "cpu" | |
| # ========================= | |
| # Load model | |
| # ========================= | |
| print("Loading tokenizer...") | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| MODEL_ID, | |
| revision=REVISION, | |
| trust_remote_code=True | |
| ) | |
| print("Loading model...") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, | |
| revision=REVISION, | |
| trust_remote_code=True, | |
| torch_dtype=torch.float32, | |
| low_cpu_mem_usage=True | |
| ).to(DEVICE) | |
| model.eval() | |
| print("Model loaded successfully!") | |
| # ========================= | |
| # Inference function | |
| # ========================= | |
| def understand_image(image, prompt): | |
| if image is None: | |
| return "โ Please upload an image." | |
| if not prompt or prompt.strip() == "": | |
| return "โ Please enter a question." | |
| try: | |
| image = image.convert("RGB") | |
| print(f"Processing question: {prompt}") | |
| # Moondream2ใฎๆญฃใใAPI | |
| with torch.no_grad(): | |
| # ็ปๅใใจใณใณใผใ | |
| image_embeds = model.encode_image(image) | |
| # ่ณชๅใซๅ็ญ | |
| answer = model.answer_question( | |
| image_embeds=image_embeds, | |
| question=prompt, | |
| tokenizer=tokenizer | |
| ) | |
| print(f"Answer generated: {answer}") | |
| return answer | |
| except Exception as e: | |
| error_msg = str(e) | |
| print(f"Error occurred: {error_msg}") | |
| # ใใใใฐๆ ๅ ฑใ่ฟฝๅ | |
| available_methods = [method for method in dir(model) if not method.startswith('_')] | |
| return f"โ Error: {error_msg}\n\n๐ Available model methods:\n{', '.join(available_methods[:20])}" | |
| # ========================= | |
| # Gradio UI | |
| # ========================= | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# ๐ Moondream2 Image Understanding") | |
| gr.Markdown( | |
| "Upload an image and ask questions about it. โ ๏ธ CPU processing may take 20-40 seconds." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| image_input = gr.Image(type="pil", label="๐ธ Upload Image") | |
| text_input = gr.Textbox( | |
| label="โ Your Question", | |
| placeholder="What do you see in this image?", | |
| value="Describe this image in detail.", | |
| lines=2 | |
| ) | |
| btn = gr.Button("๐ Analyze Image", variant="primary", size="lg") | |
| with gr.Column(): | |
| output = gr.Textbox( | |
| label="๐ฌ Answer", | |
| lines=10, | |
| placeholder="The AI's response will appear here..." | |
| ) | |
| gr.Markdown("### ๐ก Example Questions:") | |
| gr.Examples( | |
| examples=[ | |
| ["Describe what you see in this image."], | |
| ["What objects are in this image?"], | |
| ["What is the main subject?"], | |
| ["What colors are most prominent?"], | |
| ["Is this indoors or outdoors?"], | |
| ["How many people are in the image?"] | |
| ], | |
| inputs=text_input, | |
| label="Click to use these questions" | |
| ) | |
| btn.click( | |
| understand_image, | |
| inputs=[image_input, text_input], | |
| outputs=output | |
| ) | |
| text_input.submit( | |
| understand_image, | |
| inputs=[image_input, text_input], | |
| outputs=output | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |