Spaces:

Sangmin
/

conduct-regulation

Sleeping

App Files Files Community

conduct-regulation / app_simple.py

Sangmin

Update app_simple.py

b07ff96 verified 8 months ago

raw

history blame contribute delete

5.94 kB

	import json
	import ast
	from PIL import Image, ImageDraw, ImageFont
	from openai import OpenAI
	import os
	import base64
	import gradio as gr
	import tempfile

	def plot_bounding_boxes(image, bounding_boxes):
	"""Simple bounding box plotter."""
	if isinstance(image, str):
	img = Image.open(image)
	else:
	img = image.copy()

	width, height = img.size
	draw = ImageDraw.Draw(img)

	# Parse JSON
	lines = bounding_boxes.splitlines()
	for i, line in enumerate(lines):
	if line == "```json":
	bounding_boxes = "\n".join(lines[i+1:])
	bounding_boxes = bounding_boxes.split("```")[0]
	break

	# Try to load font with CJK support
	font = None
	font_paths = [
	# Noto CJK fonts (installed via packages.txt)
	"/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
	"/usr/share/fonts/truetype/noto-cjk/NotoSansCJK-Regular.ttc",
	# Fallback fonts
	"/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf",
	"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
	]

	for font_path in font_paths:
	try:
	font = ImageFont.truetype(font_path, size=14)
	break
	except:
	continue

	if font is None:
	font = ImageFont.load_default()

	# Parse and plot
	try:
	bbox_list = ast.literal_eval(bounding_boxes)
	if not isinstance(bbox_list, list):
	bbox_list = [bbox_list]

	for bbox in bbox_list:
	coords = bbox.get("bbox_2d", [])
	text = bbox.get("text_content", "")
	if len(coords) < 4:
	continue

	x1, y1, x2, y2 = coords[0], coords[1], coords[2], coords[3]

	# Ensure order
	if x1 > x2:
	x1, x2 = x2, x1
	if y1 > y2:
	y1, y2 = y2, y1

	# Clamp to image bounds
	x1 = max(0, min(x1, width - 1))
	y1 = max(0, min(y1, height - 1))
	x2 = max(0, min(x2, width - 1))
	y2 = max(0, min(y2, height - 1))

	# Draw box
	draw.rectangle(((x1, y1), (x2, y2)), outline='green', width=2)

	# Draw text label if we have font
	if text and font:
	text_x = x1 + 2
	text_y = y2 + 2
	draw.text((text_x, text_y), text, fill='green', font=font)

	except Exception as e:
	print(f"Error plotting boxes: {e}")

	return img


	def process_image(image, prompt):
	"""Process image using API."""

	if image is None:
	return None, "Please upload an image"

	# Get API key from environment variable (HF Secret)
	api_key = os.environ.get("QWEN_API_KEY")
	if not api_key:
	return None, "API key not configured. Please set QWEN_API_KEY in Space secrets."

	# Fixed model
	model = "qwen2.5-vl-7b-instruct"

	try:
	# Save image temporarily
	with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
	image.save(tmp.name, format='JPEG', quality=95)
	temp_path = tmp.name

	# Encode image
	with open(temp_path, "rb") as image_file:
	base64_image = base64.b64encode(image_file.read()).decode("utf-8")

	# Call API
	client = OpenAI(
	api_key=api_key,
	base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
	)

	messages = [
	{
	"role": "system",
	"content": [{"type": "text", "text": "You are a helpful assistant."}]
	},
	{
	"role": "user",
	"content": [
	{
	"type": "image_url",
	"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
	},
	{"type": "text", "text": prompt}
	]
	}
	]

	completion = client.chat.completions.create(
	model=model,
	messages=messages,
	)

	response = completion.choices[0].message.content

	# Plot boxes
	annotated_image = plot_bounding_boxes(image, response)

	# Clean up
	os.unlink(temp_path)

	return annotated_image, response

	except Exception as e:
	return None, f"Error: {str(e)}"


	# Create interface
	with gr.Blocks(title="安全なう - 行為規制") as demo:
	# Simple text-based logo
	gr.Markdown(
	"""
	<h2 style='color: #15803d; font-family: monospace; margin: 10px 0;'>
	◆ <span style='background: #15803d; color: white; padding: 2px 6px;'>ANZEN</span><span style='color: #15803d;'>NOW</span>
	</h2>
	"""
	)
	gr.Markdown("# 行為規制違反の検出")
	gr.Markdown("画像をアップロードしますと、行為規制違反の検出を行います。")

	with gr.Row():
	with gr.Column():
	image_input = gr.Image(type="pil", label="Upload Image")
	prompt_input = gr.Textbox(
	value="Spotting all the text in the image with line-level, and output in JSON format.",
	label="Prompt",
	lines=2
	)
	submit_btn = gr.Button("🔍 行為規制違反事項を検出", variant="primary", size="lg")

	with gr.Column():
	output_image = gr.Image(label="Annotated Result")
	output_text = gr.Textbox(label="JSON Output", lines=12)

	submit_btn.click(
	fn=process_image,
	inputs=[image_input, prompt_input],
	outputs=[output_image, output_text]
	)

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)