import cv2 import numpy as np import gradio as gr from datetime import datetime import os import tempfile from PIL import Image, ImageDraw, ImageFont #zh-tw # 定義固定的暫存目錄 TEMP_DIR = os.path.join(tempfile.gettempdir(), "gradio_side2front") os.makedirs(TEMP_DIR, exist_ok=True) def manual_perspective_transform(image, points): if len(points) != 4: return None pts1 = np.array(points, dtype=np.float32) pts2 = np.float32([[0, 0], [300, 0], [0, 400], [300, 400]]) matrix = cv2.getPerspectiveTransform(pts1, pts2) result = cv2.warpPerspective(image, matrix, (300, 400)) return result def format_selected_points(points): labels = ['左上', '右上', '左下', '右下'] formatted_points = [] for i, point in enumerate(points): if i >= len(labels): break # 防止超出標籤範圍 formatted_points.append(f"{labels[i]}[{point[0]}, {point[1]}]") return "、".join(formatted_points) def update_coordinates(original_image, evt: gr.SelectData, points): if original_image is None: # 如果圖片被清除,重置點和狀態訊息 return [], "已重置初始化,請上傳新圖片", "已重置初始化,請上傳新圖片", None if len(points) < 4: # 使用者選擇新的點 points.append([evt.index[0], evt.index[1]]) # 根據已選擇的點生成訊息 formatted_message = f"已選擇的點({len(points)}/4):{format_selected_points(points)}" # 獲取圖片尺寸 height, width = original_image.shape[:2] min_dim = min(width, height) # 定義圓點半徑和字體大小(根據圖片尺寸動態調整) circle_radius = max(10, int(min_dim * 0.005)) # 0.5% 的最小邊長,至少 10 像素 font_size = max(16, int(min_dim * 0.02)) # 2% 的最小邊長,至少 16 像素 # 繪製選取的點在圖片上 annotated = original_image.copy() labels = ['左上', '右上', '左下', '右下'] for i, point in enumerate(points): if i >= len(labels): label = f"Point{i+1}" else: label = labels[i] # 繪製較大的圓點 cv2.circle(annotated, tuple(point), circle_radius, (255, 0, 0), -1) # 紅色圓點RGB # 使用Pillow來繪製文字 pil_image = Image.fromarray(cv2.cvtColor(annotated, cv2.COLOR_BGR2RGB)) # pil_image = Image.fromarray(annotated) draw = ImageDraw.Draw(pil_image) try: # 載入支援中文的字型 font = ImageFont.truetype("GenSekiGothic-B.ttc", font_size) # 確保字型文件路徑正確 except IOError: # 如果字型載入失敗,使用默認字型 font = ImageFont.load_default() for i, point in enumerate(points): if i >= len(labels): label = f"Point{i+1}" else: label = labels[i] # 添加文字標籤,稍微偏移以避免重疊 text_position = (point[0] + 10 + circle_radius, point[1] - 5 - circle_radius) draw.text(text_position, label, font=font, fill=(0, 0, 255)) # 紅色文字BGR # 將Pillow圖像轉回OpenCV圖像 (BGR) annotated = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR) return points, formatted_message, "", annotated def reset_points(original_image): if original_image is None: return [], "已重置所有點", "已重置所有點", None else: return [], "", "已重置所有點", original_image def process_image(image, points, custom_filename, file_format): # 驗證輸入 if image is None: return None, None, None, "請先上傳圖片。" if len(points) != 4: return None, None, None, "請選擇準確的4個點(左上、右上、左下、右下)。" height, width = image.shape[:2] if height < 300 or width < 300: return None, None, None, "圖片尺寸太小。請上傳至少 300x300 像素的圖片。" try: # 處理圖片 result = manual_perspective_transform(image, points) if result is None: return None, None, None, "轉換失敗,請重試。" # 準備檔案名稱 if not custom_filename: custom_filename = f"processedimg_{datetime.now().strftime('%Y%m%d_%H%M%S')}" # 移除任何不安全的字符 custom_filename = "".join(c for c in custom_filename if c.isalnum() or c in ('-', '_')) if not custom_filename: custom_filename = f"processedimg_{datetime.now().strftime('%Y%m%d_%H%M%S')}" # 把結果存到暫存檔 (以便下載) # temp_dir = tempfile.mkdtemp() # out_path = os.path.join(temp_dir, custom_filename + file_format) # cv2.imwrite(out_path, cv2.cvtColor(result, cv2.COLOR_RGB2BGR)) # 把結果存到固定的暫存檔,覆蓋舊檔案 out_path = os.path.join(TEMP_DIR, custom_filename + file_format) cv2.imwrite(out_path, cv2.cvtColor(result, cv2.COLOR_RGB2BGR)) return result, out_path, out_path, "處理成功!" except Exception as e: return None, None, None, f"處理過程發生錯誤:{str(e)}" # 修改圖片變更事件處理函數 def handle_image_change(img): if img is not None: return img, [], "", "已上傳新圖片,請標示四個角落點", img return None, [], "已清除圖片,請上傳新圖", "已清除圖片,請上傳新圖", None # Gradio Interface Setup with gr.Blocks(theme=gr.themes.Default(primary_hue=gr.themes.colors.yellow, secondary_hue=gr.themes.colors.red)) as interface: # 標題和描述 gr.HTML("""

OpenCV工具:書本側面轉正面視圖

使用者選擇圖檔輸入方式:上傳檔案或是透過已複製到剪貼簿的圖片,平台進行圖片拉平渲染。

""") # with gr.Blocks() as demo: # gr.Markdown("## 書本側面轉正面工具") # gr.Markdown("### 注意:請上傳至少 300x300 像素的清晰圖片") with gr.Row(): with gr.Column(): # 輸入區域 gr.Markdown("## 上傳側面圖片檔案(至少300x300像素的清晰圖片)") gr.Markdown("#### 請於下方圖區以+字點擊四個角落點(請依序定位:左上、右上、左下、右下)") # gr.HTML(""" #
#

上傳側面圖片檔案 PS.至少 300x300 像素的清晰圖片

#
請上傳後於此圖上標示四個角落點
#
# """) image_input = gr.Image(label="圖檔標示區", type="numpy", interactive=True) points_tracker = gr.State([]) with gr.Column(): gr.Markdown("## 角落點視覺化參考") gr.Markdown("#### 已選取的點示意") # gr.HTML(""" #
#

角落點視覺化參考

#
已選取的點示意
#
# """) annotated_image = gr.Image(label="視覺化圖區", type="numpy", interactive=False) with gr.Row(): with gr.Column(): coords_output = gr.Textbox(label="已選擇的點位置", interactive=False) reset_button = gr.Button("重置選擇的點") original_image = gr.State() # 用於儲存原始圖片 gr.HTML("""
""") with gr.Row(): with gr.Column(): # 輸出區域 gr.Markdown("## 輸出檔案設定") custom_filename = gr.Textbox( label="自定義檔案名稱(可選)", placeholder="輸入檔名或留空使用時間戳", value="" ) file_format = gr.Dropdown( label="檔案格式", choices=[".png", ".jpg"], value=".png" ) process_button = gr.Button("生成正面視圖", variant="primary") gr.Markdown("## 狀態提示") status_output = gr.Textbox(label="狀態訊息", interactive=False) with gr.Column(): gr.Markdown("## 正面視圖結果") output_preview = gr.Image(label="預覽區") # 新增 File 元件來做檔案下載 download_file = gr.File(label="檔案完成下載區") # 事件綁定 image_input.select( update_coordinates, inputs=[original_image, points_tracker], outputs=[points_tracker, coords_output, status_output, annotated_image] ) # 新增圖片變更事件,當圖片被上傳或清除時更新 original_image 和 annotated_image image_input.change( handle_image_change, inputs=image_input, outputs=[original_image, points_tracker, coords_output, status_output, annotated_image] ) reset_button.click( reset_points, inputs=[original_image], outputs=[points_tracker, coords_output, status_output, annotated_image] ) process_button.click( process_image, inputs=[ image_input, points_tracker, custom_filename, file_format ], outputs=[ output_preview, download_file, download_file, # 同一個檔案路徑給 File(第一個是檔案路徑,第二個是 MIME path) status_output ] ) if __name__ == "__main__": interface.launch()