Spaces:
Sleeping
Sleeping
| import json | |
| import ast | |
| from PIL import Image, ImageDraw, ImageFont | |
| from openai import OpenAI | |
| import os | |
| import base64 | |
| import gradio as gr | |
| import tempfile | |
| def plot_bounding_boxes(image, bounding_boxes): | |
| """Simple bounding box plotter.""" | |
| if isinstance(image, str): | |
| img = Image.open(image) | |
| else: | |
| img = image.copy() | |
| width, height = img.size | |
| draw = ImageDraw.Draw(img) | |
| # Parse JSON | |
| lines = bounding_boxes.splitlines() | |
| for i, line in enumerate(lines): | |
| if line == "```json": | |
| bounding_boxes = "\n".join(lines[i+1:]) | |
| bounding_boxes = bounding_boxes.split("```")[0] | |
| break | |
| # Try to load font with CJK support | |
| font = None | |
| font_paths = [ | |
| # Noto CJK fonts (installed via packages.txt) | |
| "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc", | |
| "/usr/share/fonts/truetype/noto-cjk/NotoSansCJK-Regular.ttc", | |
| # Fallback fonts | |
| "/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf", | |
| "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", | |
| ] | |
| for font_path in font_paths: | |
| try: | |
| font = ImageFont.truetype(font_path, size=14) | |
| break | |
| except: | |
| continue | |
| if font is None: | |
| font = ImageFont.load_default() | |
| # Parse and plot | |
| try: | |
| bbox_list = ast.literal_eval(bounding_boxes) | |
| if not isinstance(bbox_list, list): | |
| bbox_list = [bbox_list] | |
| for bbox in bbox_list: | |
| coords = bbox.get("bbox_2d", []) | |
| text = bbox.get("text_content", "") | |
| if len(coords) < 4: | |
| continue | |
| x1, y1, x2, y2 = coords[0], coords[1], coords[2], coords[3] | |
| # Ensure order | |
| if x1 > x2: | |
| x1, x2 = x2, x1 | |
| if y1 > y2: | |
| y1, y2 = y2, y1 | |
| # Clamp to image bounds | |
| x1 = max(0, min(x1, width - 1)) | |
| y1 = max(0, min(y1, height - 1)) | |
| x2 = max(0, min(x2, width - 1)) | |
| y2 = max(0, min(y2, height - 1)) | |
| # Draw box | |
| draw.rectangle(((x1, y1), (x2, y2)), outline='green', width=2) | |
| # Draw text label if we have font | |
| if text and font: | |
| text_x = x1 + 2 | |
| text_y = y2 + 2 | |
| draw.text((text_x, text_y), text, fill='green', font=font) | |
| except Exception as e: | |
| print(f"Error plotting boxes: {e}") | |
| return img | |
| def process_image(image, prompt): | |
| """Process image using API.""" | |
| if image is None: | |
| return None, "Please upload an image" | |
| # Get API key from environment variable (HF Secret) | |
| api_key = os.environ.get("QWEN_API_KEY") | |
| if not api_key: | |
| return None, "API key not configured. Please set QWEN_API_KEY in Space secrets." | |
| # Fixed model | |
| model = "qwen2.5-vl-7b-instruct" | |
| try: | |
| # Save image temporarily | |
| with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp: | |
| image.save(tmp.name, format='JPEG', quality=95) | |
| temp_path = tmp.name | |
| # Encode image | |
| with open(temp_path, "rb") as image_file: | |
| base64_image = base64.b64encode(image_file.read()).decode("utf-8") | |
| # Call API | |
| client = OpenAI( | |
| api_key=api_key, | |
| base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1", | |
| ) | |
| messages = [ | |
| { | |
| "role": "system", | |
| "content": [{"type": "text", "text": "You are a helpful assistant."}] | |
| }, | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "image_url", | |
| "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"} | |
| }, | |
| {"type": "text", "text": prompt} | |
| ] | |
| } | |
| ] | |
| completion = client.chat.completions.create( | |
| model=model, | |
| messages=messages, | |
| ) | |
| response = completion.choices[0].message.content | |
| # Plot boxes | |
| annotated_image = plot_bounding_boxes(image, response) | |
| # Clean up | |
| os.unlink(temp_path) | |
| return annotated_image, response | |
| except Exception as e: | |
| return None, f"Error: {str(e)}" | |
| # Create interface | |
| with gr.Blocks(title="安全なう - 行為規制") as demo: | |
| # Simple text-based logo | |
| gr.Markdown( | |
| """ | |
| <h2 style='color: #15803d; font-family: monospace; margin: 10px 0;'> | |
| ◆ <span style='background: #15803d; color: white; padding: 2px 6px;'>ANZEN</span><span style='color: #15803d;'>NOW</span> | |
| </h2> | |
| """ | |
| ) | |
| gr.Markdown("# 行為規制違反の検出") | |
| gr.Markdown("画像をアップロードしますと、行為規制違反の検出を行います。") | |
| with gr.Row(): | |
| with gr.Column(): | |
| image_input = gr.Image(type="pil", label="Upload Image") | |
| prompt_input = gr.Textbox( | |
| value="Spotting all the text in the image with line-level, and output in JSON format.", | |
| label="Prompt", | |
| lines=2 | |
| ) | |
| submit_btn = gr.Button("🔍 行為規制違反事項を検出", variant="primary", size="lg") | |
| with gr.Column(): | |
| output_image = gr.Image(label="Annotated Result") | |
| output_text = gr.Textbox(label="JSON Output", lines=12) | |
| submit_btn.click( | |
| fn=process_image, | |
| inputs=[image_input, prompt_input], | |
| outputs=[output_image, output_text] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |