Spaces:

stpete2
/

image_understand

Sleeping

App Files Files Community

image_understand / app.py

stpete2

Update app.py

b812f5d verified 16 days ago

raw

history blame contribute delete

3.63 kB

	import gradio as gr
	import torch
	from PIL import Image
	from transformers import AutoModelForCausalLM, AutoTokenizer

	# =========================
	# Model config
	# =========================
	MODEL_ID = "vikhyatk/moondream2"
	REVISION = None
	DEVICE = "cpu"

	# =========================
	# Load model
	# =========================
	print("Loading tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(
	MODEL_ID,
	revision=REVISION,
	trust_remote_code=True
	)

	print("Loading model...")
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	revision=REVISION,
	trust_remote_code=True,
	torch_dtype=torch.float32,
	low_cpu_mem_usage=True
	).to(DEVICE)

	model.eval()
	print("Model loaded successfully!")

	# =========================
	# Inference function
	# =========================
	def understand_image(image, prompt):
	if image is None:
	return "❌ Please upload an image."

	if not prompt or prompt.strip() == "":
	return "❌ Please enter a question."

	try:
	image = image.convert("RGB")

	print(f"Processing question: {prompt}")

	# Moondream2の正しいAPI
	with torch.no_grad():
	# 画像をエンコード
	image_embeds = model.encode_image(image)

	# 質問に回答
	answer = model.answer_question(
	image_embeds=image_embeds,
	question=prompt,
	tokenizer=tokenizer
	)

	print(f"Answer generated: {answer}")
	return answer

	except Exception as e:
	error_msg = str(e)
	print(f"Error occurred: {error_msg}")

	# デバッグ情報を追加
	available_methods = [method for method in dir(model) if not method.startswith('_')]
	return f"❌ Error: {error_msg}\n\n🔍 Available model methods:\n{', '.join(available_methods[:20])}"

	# =========================
	# Gradio UI
	# =========================
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🌓 Moondream2 Image Understanding")
	gr.Markdown(
	"Upload an image and ask questions about it. ⚠️ CPU processing may take 20-40 seconds."
	)

	with gr.Row():
	with gr.Column():
	image_input = gr.Image(type="pil", label="📸 Upload Image")
	text_input = gr.Textbox(
	label="❓ Your Question",
	placeholder="What do you see in this image?",
	value="Describe this image in detail.",
	lines=2
	)
	btn = gr.Button("🔍 Analyze Image", variant="primary", size="lg")

	with gr.Column():
	output = gr.Textbox(
	label="💬 Answer",
	lines=10,
	placeholder="The AI's response will appear here..."
	)

	gr.Markdown("### 💡 Example Questions:")
	gr.Examples(
	examples=[
	["Describe what you see in this image."],
	["What objects are in this image?"],
	["What is the main subject?"],
	["What colors are most prominent?"],
	["Is this indoors or outdoors?"],
	["How many people are in the image?"]
	],
	inputs=text_input,
	label="Click to use these questions"
	)

	btn.click(
	understand_image,
	inputs=[image_input, text_input],
	outputs=output
	)

	text_input.submit(
	understand_image,
	inputs=[image_input, text_input],
	outputs=output
	)

	if __name__ == "__main__":
	demo.launch()