Spaces:

Dangindev
/

vietmeagent

Sleeping

App Files Files Community

vietmeagent / app.py

Dangindev

Upload app.py with huggingface_hub

cd3928f verified 7 months ago

raw

history blame contribute delete

6.79 kB

	import gradio as gr
	import os
	from PIL import Image
	import tempfile

	# Production API key
	os.environ["GEMINI_API_KEY"] = "AIzaSyCgatP7izHkaBn6im8AfXq0Ufmb0Fr-7dc"

	# Import VietMEAgent
	try:
	from core.viet_meagent import VietMEAgent
	agent = VietMEAgent()
	DEMO_MODE = False
	print("✅ VietMEAgent initialized successfully!")
	except Exception as e:
	print(f"❌ VietMEAgent initialization failed: {e}")
	DEMO_MODE = True

	def process_image(image, question):
	"""Process image and question through VietMEAgent"""
	if DEMO_MODE:
	return {
	"answer": "System initialization failed. Please check API key configuration.",
	"explanation": "VietMEAgent could not be initialized. Please ensure GEMINI_API_KEY is set correctly.",
	"cultural_objects": ["error"],
	"confidence": 0.0
	}

	try:
	# Save uploaded image temporarily
	with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp:
	image.save(tmp.name)

	# Process with VietMEAgent
	result = agent.generate_vietnamese_vqa(tmp.name, question)

	# Clean up
	os.unlink(tmp.name)

	if result.get("questions"):
	first_q = result["questions"][0]
	return {
	"answer": first_q.get("answer", "Không có câu trả lời"),
	"explanation": first_q.get("explanation", "Không có giải thích"),
	"cultural_objects": result.get("cultural_objects", []),
	"confidence": first_q.get("confidence", 0.0)
	}
	else:
	return {
	"answer": "Không thể xử lý hình ảnh",
	"explanation": "Có lỗi xảy ra trong quá trình xử lý",
	"cultural_objects": [],
	"confidence": 0.0
	}
	except Exception as e:
	return {
	"answer": f"Lỗi: {str(e)}",
	"explanation": "Vui lòng thử lại hoặc kiểm tra kết nối mạng",
	"cultural_objects": [],
	"confidence": 0.0
	}

	def main_interface(image, question):
	"""Main Gradio interface"""
	if not question.strip():
	question = "Đây là gì?"

	result = process_image(image, question)

	return (
	result["answer"],
	result["explanation"],
	f"Đối tượng văn hóa: {', '.join(result['cultural_objects']) if result['cultural_objects'] else 'Không phát hiện'}",
	f"Độ tin cậy: {result['confidence']:.2f}"
	)

	# Create Gradio interface
	with gr.Blocks(
	title="🏛️ VietMEAgent - Vietnamese Cultural VQA",
	theme=gr.themes.Soft(),
	css="""
	.gradio-container {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	}
	.gr-button-primary {
	background: linear-gradient(45deg, #ff6b6b, #ffa500);
	border: none;
	}
	"""
	) as demo:
	gr.Markdown("""
	# 🏛️ VietMEAgent: Vietnamese Multimodal Explanation Agent

	🇻🇳 Hệ thống AI trả lời câu hỏi về hình ảnh văn hóa Việt Nam

	Được phát triển dựa trên nghiên cứu FS-MEVQA (ACM MM 2024) với khả năng:
	- 🔍 Phân tích hình ảnh văn hóa Việt Nam
	- 💬 Trả lời câu hỏi bằng tiếng Việt
	- 🎯 Giải thích có bối cảnh văn hóa
	- 🏛️ Nhận diện 50+ đối tượng văn hóa truyền thống

	---
	""")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 📤 Input")
	image_input = gr.Image(
	type="pil",
	label="🖼️ Hình ảnh văn hóa Việt Nam",
	height=300
	)
	question_input = gr.Textbox(
	label="❓ Câu hỏi (Tiếng Việt)",
	placeholder="Đây là gì? / Món ăn này có ý nghĩa gì trong văn hóa Việt Nam?",
	value="Đây là gì?",
	lines=2
	)
	submit_btn = gr.Button("🔍 Phân tích Cultural VQA", variant="primary", size="lg")

	with gr.Column(scale=1):
	gr.Markdown("### 📊 Results")
	answer_output = gr.Textbox(
	label="🎯 Câu trả lời",
	lines=3,
	interactive=False
	)
	explanation_output = gr.Textbox(
	label="📝 Giải thích văn hóa",
	lines=5,
	interactive=False
	)
	cultural_objects_output = gr.Textbox(
	label="🏛️ Đối tượng văn hóa phát hiện",
	lines=2,
	interactive=False
	)
	confidence_output = gr.Textbox(
	label="📊 Độ tin cậy",
	lines=1,
	interactive=False
	)

	submit_btn.click(
	fn=main_interface,
	inputs=[image_input, question_input],
	outputs=[answer_output, explanation_output, cultural_objects_output, confidence_output]
	)

	with gr.Row():
	gr.Examples(
	examples=[
	["data_sample/images/am_thuc/000040.jpg", "Món ăn này có nguồn gốc từ đâu?"],
	["data_sample/images/kien_truc/000042.jpg", "Kiến trúc này thể hiện phong cách gì?"],
	["data_sample/images/phong_canh/000040.jpg", "Đây là danh lam thắng cảnh nào của Việt Nam?"]
	],
	inputs=[image_input, question_input]
	)

	gr.Markdown("""
	## 📚 Categories được hỗ trợ:

	\| 🍜 Ẩm thực \| 🏛️ Kiến trúc \| 👘 Trang phục \| 🎭 Lễ hội \|
	\|---\|---\|---\|---\|
	\| Phở, bánh mì, chè \| Chùa, đình, nhà sàn \| Áo dài, nón lá \| Tết, Trung thu \|
	\| 🎨 Thủ công \| 🎵 Nhạc cụ \| 🏞️ Phong cảnh \| 🎪 Dân gian \|
	\| Gốm sứ, thêu \| Đàn bầu, trống \| Vịnh Hạ Long \| Múa lân, rối nước \|

	## 🔧 Technical Details:
	- Models: CLIP, BLIP-VQA, Gemini, EasyOCR
	- Languages: Vietnamese + English
	- Architecture: Few-shot multimodal reasoning
	- Cultural KB: 459+ Vietnamese cultural objects

	---

	📖 Research: Based on FS-MEVQA paper (ACM MM 2024)
	👨‍💻 Developer: [@Dangindev](https://huggingface.co/Dangindev)
	🔗 GitHub: Coming soon...
	""")

	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=True
	)