conduct-regulation / app_simple.py
Sangmin's picture
Update app_simple.py
b07ff96 verified
import json
import ast
from PIL import Image, ImageDraw, ImageFont
from openai import OpenAI
import os
import base64
import gradio as gr
import tempfile
def plot_bounding_boxes(image, bounding_boxes):
"""Simple bounding box plotter."""
if isinstance(image, str):
img = Image.open(image)
else:
img = image.copy()
width, height = img.size
draw = ImageDraw.Draw(img)
# Parse JSON
lines = bounding_boxes.splitlines()
for i, line in enumerate(lines):
if line == "```json":
bounding_boxes = "\n".join(lines[i+1:])
bounding_boxes = bounding_boxes.split("```")[0]
break
# Try to load font with CJK support
font = None
font_paths = [
# Noto CJK fonts (installed via packages.txt)
"/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
"/usr/share/fonts/truetype/noto-cjk/NotoSansCJK-Regular.ttc",
# Fallback fonts
"/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf",
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
]
for font_path in font_paths:
try:
font = ImageFont.truetype(font_path, size=14)
break
except:
continue
if font is None:
font = ImageFont.load_default()
# Parse and plot
try:
bbox_list = ast.literal_eval(bounding_boxes)
if not isinstance(bbox_list, list):
bbox_list = [bbox_list]
for bbox in bbox_list:
coords = bbox.get("bbox_2d", [])
text = bbox.get("text_content", "")
if len(coords) < 4:
continue
x1, y1, x2, y2 = coords[0], coords[1], coords[2], coords[3]
# Ensure order
if x1 > x2:
x1, x2 = x2, x1
if y1 > y2:
y1, y2 = y2, y1
# Clamp to image bounds
x1 = max(0, min(x1, width - 1))
y1 = max(0, min(y1, height - 1))
x2 = max(0, min(x2, width - 1))
y2 = max(0, min(y2, height - 1))
# Draw box
draw.rectangle(((x1, y1), (x2, y2)), outline='green', width=2)
# Draw text label if we have font
if text and font:
text_x = x1 + 2
text_y = y2 + 2
draw.text((text_x, text_y), text, fill='green', font=font)
except Exception as e:
print(f"Error plotting boxes: {e}")
return img
def process_image(image, prompt):
"""Process image using API."""
if image is None:
return None, "Please upload an image"
# Get API key from environment variable (HF Secret)
api_key = os.environ.get("QWEN_API_KEY")
if not api_key:
return None, "API key not configured. Please set QWEN_API_KEY in Space secrets."
# Fixed model
model = "qwen2.5-vl-7b-instruct"
try:
# Save image temporarily
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
image.save(tmp.name, format='JPEG', quality=95)
temp_path = tmp.name
# Encode image
with open(temp_path, "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode("utf-8")
# Call API
client = OpenAI(
api_key=api_key,
base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
)
messages = [
{
"role": "system",
"content": [{"type": "text", "text": "You are a helpful assistant."}]
},
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
},
{"type": "text", "text": prompt}
]
}
]
completion = client.chat.completions.create(
model=model,
messages=messages,
)
response = completion.choices[0].message.content
# Plot boxes
annotated_image = plot_bounding_boxes(image, response)
# Clean up
os.unlink(temp_path)
return annotated_image, response
except Exception as e:
return None, f"Error: {str(e)}"
# Create interface
with gr.Blocks(title="安全なう - 行為規制") as demo:
# Simple text-based logo
gr.Markdown(
"""
<h2 style='color: #15803d; font-family: monospace; margin: 10px 0;'>
◆ <span style='background: #15803d; color: white; padding: 2px 6px;'>ANZEN</span><span style='color: #15803d;'>NOW</span>
</h2>
"""
)
gr.Markdown("# 行為規制違反の検出")
gr.Markdown("画像をアップロードしますと、行為規制違反の検出を行います。")
with gr.Row():
with gr.Column():
image_input = gr.Image(type="pil", label="Upload Image")
prompt_input = gr.Textbox(
value="Spotting all the text in the image with line-level, and output in JSON format.",
label="Prompt",
lines=2
)
submit_btn = gr.Button("🔍 行為規制違反事項を検出", variant="primary", size="lg")
with gr.Column():
output_image = gr.Image(label="Annotated Result")
output_text = gr.Textbox(label="JSON Output", lines=12)
submit_btn.click(
fn=process_image,
inputs=[image_input, prompt_input],
outputs=[output_image, output_text]
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)