import gradio as gr import cv2 import numpy as np import easyocr from PIL import Image, ImageDraw import os import tempfile import torch from torch.hub import download_url_to_file # PPTXライブラリのインポートチェック try: from pptx import Presentation from pptx.util import Pt from pptx.dml.color import RGBColor except ImportError: print("⚠️ python-pptx is not installed. Please run: pip install python-pptx") # ============================================================================== # 1. CPU/GPU両対応 LaMa (Inpaintingモデル) クラス # ============================================================================== class SafeLama: def __init__(self): # GPUが使えるならGPU、だめならCPUを選択 self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(f"LaMa is running on: {self.device}") # モデルファイルのダウンロードとキャッシュ self.model_url = "https://github.com/sanster/models/releases/download/add_big_lama/big-lama.pt" self.model_path = os.path.join(os.path.expanduser("~"), ".cache", "big-lama.pt") if not os.path.exists(self.model_path): print(f"Downloading LaMa model to {self.model_path}...") os.makedirs(os.path.dirname(self.model_path), exist_ok=True) download_url_to_file(self.model_url, self.model_path) # モデルロード (map_locationでCPU/GPU自動振り分け) try: self.model = torch.jit.load(self.model_path, map_location=self.device) self.model.eval() self.model.to(self.device) print("LaMa model loaded successfully.") except Exception as e: print(f"Fatal Error loading LaMa model: {e}") raise e def __call__(self, image: Image.Image, mask: Image.Image) -> Image.Image: """ image: PIL Image (RGB) mask: PIL Image (L or RGB, 0=Keep, 255=Remove) """ # 前処理: Numpy化 & 正規化 img_np = np.array(image).astype(np.float32) / 255.0 mask_np = np.array(mask.convert("L")).astype(np.float32) / 255.0 # Tensor化 img_t = torch.from_numpy(img_np).permute(2, 0, 1).unsqueeze(0).to(self.device) mask_t = torch.from_numpy(mask_np).unsqueeze(0).unsqueeze(0).to(self.device) mask_t = (mask_t > 0.5).float() # 2値化 # 推論 with torch.no_grad(): output = self.model(img_t, mask_t) # 後処理: 画像形式に戻す cur_res = output[0].permute(1, 2, 0).detach().cpu().numpy() cur_res = np.clip(cur_res * 255, 0, 255).astype(np.uint8) return Image.fromarray(cur_res) # ============================================================================== # 2. コアロジック (OCR, 描画, PPTX生成) # ============================================================================== class SlideCleanerCore: def __init__(self): print("Initializing EasyOCR...") use_gpu = torch.cuda.is_available() self.reader = easyocr.Reader(['ja', 'en'], gpu=use_gpu) self.lama_model = None try: print("Loading LaMa wrapper...") self.lama_model = SafeLama() except Exception as e: print(f"LaMa load failed: {e}") pass def detect_text_initial(self, image_np): """ OCRを実行し、編集可能なボックス状態リストを作成して返す """ print("Running OCR detection...") results = self.reader.readtext(image_np) box_states = [] for (bbox, text, prob) in results: (tl, tr, br, bl) = bbox # 座標を整数に変換 x1 = int(min(tl[0], bl[0])) y1 = int(min(tl[1], tr[1])) x2 = int(max(tr[0], br[0])) y2 = int(max(bl[1], br[1])) box_states.append({ 'bbox': [x1, y1, x2, y2], 'text': text, 'active': True # 初期値: 消去対象 }) return box_states def draw_preview(self, image_np, box_states, temp_point=None, highlight_idx=None): """ プレビュー画像の描画 - Activeなボックス: 赤枠 (消去対象) - Highlightなボックス(統合待ち): シアン枠 - 手動追加中の始点: 黄色い点 """ pil_img = Image.fromarray(image_np).convert("RGBA") overlay = Image.new("RGBA", pil_img.size, (255, 255, 255, 0)) draw = ImageDraw.Draw(overlay) for i, item in enumerate(box_states): if item['active']: x1, y1, x2, y2 = item['bbox'] # デフォルト色 fill_color = (255, 0, 0, 100) # 赤 半透明 outline_color = "red" # 統合待機中のハイライト if highlight_idx is not None and i == highlight_idx: fill_color = (0, 255, 255, 150) # シアン 半透明 outline_color = "cyan" draw.rectangle([x1, y1, x2, y2], fill=fill_color, outline=outline_color, width=2) # 手動追加モードの始点マーカー if temp_point: tx, ty = temp_point r = 5 draw.ellipse((tx-r, ty-r, tx+r, ty+r), fill="yellow", outline="black") return Image.alpha_composite(pil_img, overlay).convert("RGB") def create_mask_from_states(self, image_shape, box_states, dilation=10): """ 現在のボックス状態からInpainting用のマスク画像を生成 """ h, w = image_shape[:2] mask = np.zeros((h, w), dtype=np.uint8) for item in box_states: if item['active']: x1, y1, x2, y2 = item['bbox'] cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1) if dilation > 0: kernel = np.ones((dilation, dilation), np.uint8) mask = cv2.dilate(mask, kernel, iterations=1) return mask def inpaint_image(self, image_np, mask_np, method): """ 文字消去実行 """ if method == "LaMa" and self.lama_model is not None: pil_img = Image.fromarray(image_np) pil_mask = Image.fromarray(mask_np) return np.array(self.lama_model(pil_img, pil_mask)) else: # OpenCV Fallback return cv2.inpaint(image_np, mask_np, 3, cv2.INPAINT_TELEA) def add_slide_to_prs(self, prs, original_img_np, clean_img_np, box_states): """ PPTXのスライドを1枚追加する処理 (エラー回避・型変換強化版) """ try: slide = prs.slides.add_slide(prs.slide_layouts[6]) # 空白スライド # 画像サイズをPython標準のintに変換 img_h, img_w = original_img_np.shape[:2] img_h, img_w = int(img_h), int(img_w) # 背景画像の設定 with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_bg: Image.fromarray(clean_img_np).save(tmp_bg.name) slide.shapes.add_picture(tmp_bg.name, 0, 0, width=prs.slide_width, height=prs.slide_height) tmp_bg_path = tmp_bg.name # スライドのサイズ (EMU) slide_width_emu = prs.slide_width slide_height_emu = prs.slide_height # テキストボックス配置ループ for item in box_states: if item['active']: try: bbox = item['bbox'] text = item['text'] # 【重要】Numpy型 -> Python int型への強制変換 x1, y1, x2, y2 = [int(v) for v in bbox] # 座標異常チェック if x2 <= x1 or y2 <= y1: continue # 画像に対する相対位置を計算 (0.0 - 1.0) rel_x = x1 / img_w rel_y = y1 / img_h rel_w = (x2 - x1) / img_w rel_h = (y2 - y1) / img_h # スライド上の座標 (EMU) に変換 left = int(slide_width_emu * rel_x) top = int(slide_height_emu * rel_y) width = int(slide_width_emu * rel_w) height = int(slide_height_emu * rel_h) # テキストボックス作成 textbox = slide.shapes.add_textbox(left, top, width, height) tf = textbox.text_frame tf.word_wrap = True p = tf.paragraphs[0] p.text = str(text) # フォント設定 # 高さの70%程度をフォントサイズとする font_size_emu = slide_height_emu * rel_h * 0.7 # Pt換算 (1 Pt = 12700 EMU) p.font.size = Pt(max(8, font_size_emu / 12700)) # 【要望対応】文字色は黒(#000000)固定 p.font.color.rgb = RGBColor(0, 0, 0) p.font.name = "Meiryo" # 日本語対応フォント except Exception as e_box: print(f"Skipping text box due to error: {e_box}") continue # 一時ファイル掃除 if os.path.exists(tmp_bg_path): os.remove(tmp_bg_path) except Exception as e_slide: print(f"Error adding slide: {e_slide}") # インスタンス化 core = SlideCleanerCore() # ============================================================================== # 3. UIロジック (イベントハンドラ) # ============================================================================== def on_files_upload(files): """ ファイルアップロード時の初期化処理 """ if not files: return None, [], 0, [], "No files selected." file_paths = [f.name for f in files] # バッチデータ構造の初期化 # メモリ節約のため画像本体は保持せず、パスとメタデータ(boxes)だけ持つ batch_data = [{'path': p, 'boxes': None} for p in file_paths] # 1枚目をロードして初期表示 try: first_img = Image.open(file_paths[0]).convert("RGB") first_np = np.array(first_img) # OCR実行 first_boxes = core.detect_text_initial(first_np) batch_data[0]['boxes'] = first_boxes preview = core.draw_preview(first_np, first_boxes) msg = f"Loaded {len(files)} images. Showing 1/{len(files)}." except Exception as e: return None, [], 0, [], f"Error loading image: {e}" return preview, batch_data, 0, first_boxes, msg def load_image_at_index(batch_data, index): """ 指定インデックスの画像を読み込み、必要ならOCRしてプレビューを返す """ if not batch_data or index < 0 or index >= len(batch_data): return None, None, None path = batch_data[index]['path'] try: img = Image.open(path).convert("RGB") img_np = np.array(img) # OCRキャッシュがなければ実行 if batch_data[index]['boxes'] is None: boxes = core.detect_text_initial(img_np) batch_data[index]['boxes'] = boxes else: boxes = batch_data[index]['boxes'] preview = core.draw_preview(img_np, boxes) return preview, boxes, img_np except Exception as e: print(f"Load error: {e}") return None, None, None def navigate(direction, batch_data, current_index, current_boxes_state): """ Prev/Next ボタン処理 """ if not batch_data: return None, batch_data, 0, [], "No Data" # 現在の編集内容を保存 batch_data[current_index]['boxes'] = current_boxes_state new_index = current_index + direction # 範囲制限 if new_index < 0: new_index = 0 if new_index >= len(batch_data): new_index = len(batch_data) - 1 preview, new_boxes, _ = load_image_at_index(batch_data, new_index) return preview, batch_data, new_index, new_boxes, f"Image {new_index+1}/{len(batch_data)}" def canvas_click(batch_data, current_index, current_boxes_state, drawing_point, merge_src_idx, mode, evt: gr.SelectData): """ 画像クリック時のアクション分岐 """ if not batch_data: return None, current_boxes_state, drawing_point, merge_src_idx, "No Data" path = batch_data[current_index]['path'] img_np = np.array(Image.open(path).convert("RGB")) click_x, click_y = evt.index msg = "" # ------------------------------------------------------------------ # A. Toggle Mode (消す/残す 切り替え) # ------------------------------------------------------------------ if mode.startswith("Toggle"): for item in current_boxes_state: x1, y1, x2, y2 = item['bbox'] # クリック座標がBox内か判定 if x1 <= click_x <= x2 and y1 <= click_y <= y2: item['active'] = not item['active'] # 反転 break # 他の状態はリセット drawing_point = None merge_src_idx = None msg = "Toggled box state." # ------------------------------------------------------------------ # B. Merge Boxes Mode (2つの箱を合体) # ------------------------------------------------------------------ elif mode.startswith("Merge"): # クリックされたBoxを探す clicked_idx = -1 for i, item in enumerate(current_boxes_state): x1, y1, x2, y2 = item['bbox'] if x1 <= click_x <= x2 and y1 <= click_y <= y2: clicked_idx = i break if clicked_idx != -1: if merge_src_idx is None: # 1つ目を選択 merge_src_idx = clicked_idx msg = "Select 2nd box to merge." else: if merge_src_idx == clicked_idx: # 同じ箱をクリック -> キャンセル merge_src_idx = None msg = "Merge canceled (Same box)." else: # 2つ目を選択 -> 統合実行 box_a = current_boxes_state[merge_src_idx] box_b = current_boxes_state[clicked_idx] # 座標の結合 nx1 = min(box_a['bbox'][0], box_b['bbox'][0]) ny1 = min(box_a['bbox'][1], box_b['bbox'][1]) nx2 = max(box_a['bbox'][2], box_b['bbox'][2]) ny2 = max(box_a['bbox'][3], box_b['bbox'][3]) # テキスト結合 (Y座標が上の方を先に連結) if box_a['bbox'][1] < box_b['bbox'][1]: new_text = str(box_a['text']) + " " + str(box_b['text']) else: new_text = str(box_b['text']) + " " + str(box_a['text']) new_box = {'bbox': [nx1, ny1, nx2, ny2], 'text': new_text, 'active': True} # 古い方を削除 (インデックスずれ防止のため降順削除) indices = sorted([merge_src_idx, clicked_idx], reverse=True) for idx in indices: current_boxes_state.pop(idx) # 新しいボックスを追加 current_boxes_state.append(new_box) merge_src_idx = None msg = "Merged successfully!" else: # 何もないところをクリック -> キャンセル merge_src_idx = None msg = "Canceled." drawing_point = None # ------------------------------------------------------------------ # C. Add Manual Box Mode (手動追加) # ------------------------------------------------------------------ else: merge_src_idx = None if drawing_point is None: # 1クリック目: 始点登録 drawing_point = (click_x, click_y) msg = "Start point set. Click End point." else: # 2クリック目: 終点登録 -> Box生成 sx, sy = drawing_point x1, x2 = int(min(sx, click_x)), int(max(sx, click_x)) y1, y2 = int(min(sy, click_y)), int(max(sy, click_y)) # 幅があまりに小さい場合は無視 if (x2 - x1) > 5 and (y2 - y1) > 5: current_boxes_state.append({'bbox': [x1, y1, x2, y2], 'text': "", 'active': True}) msg = "Manual Box added." else: msg = "Box too small." drawing_point = None # 描画更新 batch_data[current_index]['boxes'] = current_boxes_state # merge_src_idx がある場合はその箱をハイライト preview = core.draw_preview(img_np, current_boxes_state, temp_point=drawing_point, highlight_idx=merge_src_idx) return preview, batch_data, current_boxes_state, drawing_point, merge_src_idx, msg def generate_final_pptx_batch(batch_data, dilation, method, progress=gr.Progress()): """ 全ページを結合したPPTXを生成する (エラーハンドリング強化版) """ if not batch_data: return "No data", None try: prs = Presentation() # 1枚目の画像でスライドサイズを決定 if not os.path.exists(batch_data[0]['path']): return "Error: Image file not found.", None img0 = Image.open(batch_data[0]['path']) prs.slide_width = Pt(img0.width) prs.slide_height = Pt(img0.height) # 全スライド処理ループ for i, data in enumerate(progress.tqdm(batch_data, desc="Generating Slides")): if not os.path.exists(data['path']): continue img = Image.open(data['path']).convert("RGB") img_np = np.array(img) # OCR未実行なら実行 boxes = data['boxes'] if boxes is None: boxes = core.detect_text_initial(img_np) # マスク生成 & Inpaint mask = core.create_mask_from_states(img_np.shape, boxes, dilation) clean_img = core.inpaint_image(img_np, mask, method) # スライド追加 core.add_slide_to_prs(prs, img_np, clean_img, boxes) # 保存 out_path = "final_presentation.pptx" prs.save(out_path) return f"Completed! {len(batch_data)} slides merged.", out_path except Exception as e: import traceback traceback.print_exc() return f"Error during PPTX generation: {e}", None # ============================================================================== # 4. Gradio UI 構築 # ============================================================================== # テーマ設定 theme = gr.themes.Soft( primary_hue="indigo", secondary_hue="slate", neutral_hue="slate", font=["Meiryo", "sans-serif"] ).set( body_background_fill="#0b0f19", block_background_fill="#111827", block_border_color="#374151", input_background_fill="#1f2937", button_primary_background_fill="#4f46e5", body_text_color="#f3f4f6", block_label_text_color="#f3f4f6", block_title_text_color="#f3f4f6" ) css = """ footer {visibility: hidden} .gradio-container {min-height: 100vh;} """ with gr.Blocks(theme=theme, css=css, title="Slide Fixer Pro Max") as demo: gr.Markdown("# 🚀 Slide Fixer Pro Max") gr.Markdown("画像をアップロード -> 確認・編集 -> **文字が消えた編集可能なPPTX** を生成!") # ----- 状態変数 (State) ----- batch_data_state = gr.State([]) # 全画像のパスとOCRデータ current_idx_state = gr.State(0) # 現在表示中の画像インデックス current_boxes_state = gr.State([]) # 現在表示中の画像のボックスリスト draw_point_state = gr.State(None) # 手動追加モード用: 始点 merge_src_idx_state = gr.State(None) # 統合モード用: 1つ目のボックスインデックス # ----- UI Layout ----- with gr.Row(): files_input = gr.File(file_count="multiple", label="1. Upload Images (Multiple Support)") with gr.Row(): # 左カラム: 画像プレビュー & エディタ with gr.Column(scale=2): canvas = gr.Image(label="Preview & Click Editor", interactive=False) # 右カラム: 操作パネル with gr.Column(scale=1): info_text = gr.Textbox(label="Status", value="Waiting for upload...") # ページ送り with gr.Row(): prev_btn = gr.Button("<< Prev Image") next_btn = gr.Button("Next Image >>") gr.Markdown("### 🛠️ Edit Mode") edit_mode = gr.Radio( [ "Toggle (Click to Remove/Keep)", "Merge Boxes (Click 2 Boxes)", "Add Manual Box (Click Start/End)" ], value="Toggle (Click to Remove/Keep)", label="Mode Select" ) gr.Markdown(""" - **Toggle**: 赤枠をクリックで ON/OFF 切り替え。 - **Merge**: 離れた枠を2つクリックして合体。 - **Add**: 認識されなかった文字を手動で囲む。 """) gr.Markdown("---") # 設定 dilation_sld = gr.Slider(0, 30, 10, label="Dilation (Mask Expansion)") method_radio = gr.Radio(["LaMa", "OpenCV"], value="LaMa", label="Inpaint Method") # 生成ボタン gen_btn = gr.Button("✨ Generate Merged PPTX (All Black Text)", variant="primary", size="lg") dl_file = gr.File(label="Download Result") # ----- イベント定義 ----- # 1. 画像アップロード files_input.upload( fn=on_files_upload, inputs=[files_input], outputs=[canvas, batch_data_state, current_idx_state, current_boxes_state, info_text] ) # 2. 前へ戻る prev_btn.click( fn=lambda d, b, i, c: navigate(-1, b, i, c), inputs=[batch_data_state, batch_data_state, current_idx_state, current_boxes_state], outputs=[canvas, batch_data_state, current_idx_state, current_boxes_state, info_text] ) # 3. 次へ進む next_btn.click( fn=lambda d, b, i, c: navigate(1, b, i, c), inputs=[batch_data_state, batch_data_state, current_idx_state, current_boxes_state], outputs=[canvas, batch_data_state, current_idx_state, current_boxes_state, info_text] ) # 4. 画像クリック (編集アクション) canvas.select( fn=canvas_click, inputs=[ batch_data_state, current_idx_state, current_boxes_state, draw_point_state, merge_src_idx_state, edit_mode ], outputs=[ canvas, batch_data_state, current_boxes_state, draw_point_state, merge_src_idx_state, info_text ] ) # 5. PPTX生成 gen_btn.click( fn=generate_final_pptx_batch, inputs=[batch_data_state, dilation_sld, method_radio], outputs=[info_text, dl_file] ) if __name__ == "__main__": # HuggingFace Spacesで動かすための設定 demo.queue().launch(share=False)