Spaces:

fumiyaaa
/

dokoCame

Sleeping

Fumiya Imazato Claude Opus 4.5 commited on Dec 6, 2025

Commit

1eff75e

1 Parent(s): 251cf10

Fix: PaddleOCR 3.x use_gpu + overlay camera switch + auto analyze

- Remove use_gpu arg (deprecated in PaddleOCR 3.x)
- Add camera switch button as overlay on video
- Enable streaming=True for auto analysis
- Use webcam.stream() instead of manual button
- Update TROUBLESHOOTING.md

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Files changed (3) hide show

TROUBLESHOOTING.md +50 -0
app.py +106 -118
core/ocr_engine.py +7 -5

TROUBLESHOOTING.md CHANGED Viewed

@@ -323,3 +323,53 @@ const initRearCamera = async () => {
 };
 initRearCamera();
 ```

 };
 initRearCamera();
 ```
+## PaddleOCR 3.x で use_gpu が廃止
+### 問題
+```
+ValueError: Unknown argument: use_gpu
+```
+### 原因
+- PaddleOCR 3.x で `use_gpu` 引数も廃止された
+- `show_log`, `enable_mkldnn`, `cpu_threads` に続いて `use_gpu` も削除
+### 解決策
+```python
+# 修正前
+self._ocr = PaddleOCR(
+    use_angle_cls=True,
+    lang=self.lang,
+    use_gpu=self.use_gpu,
+)
+# 修正後（PaddleOCR 3.x）
+self._ocr = PaddleOCR(
+    use_angle_cls=True,
+    lang=self.lang,
+)
+```
+### 教訓
+PaddleOCR 3.x では以下の引数のみ使用：
+- `use_angle_cls`
+- `lang`
+## カメラ切り替えボタンをオーバーレイ表示
+### 問題
+- カメラ切り替えボタンが撮影画面の外にあり使いづらい
+### 解決策
+JavaScriptでボタンを動的に追加：
+```javascript
+const btn = document.createElement('button');
+btn.className = 'camera-switch-overlay';
+btn.innerHTML = '🔄 内/外';
+btn.style.cssText = 'position:absolute;top:10px;right:10px;z-index:1000;...';
+imageContainer.appendChild(btn);
+```
+MutationObserverでDOMの変更を監視し、ボタンが消えたら再追加。

app.py CHANGED Viewed

@@ -6,7 +6,6 @@ GPSに頼らず位置を特定するサービス
 """
 import time
-import asyncio
 from typing import Optional
 import numpy as np
 import gradio as gr
@@ -47,17 +46,7 @@ class DokoCameApp:
         self._hint_lon: float = 139.7671
     def process_frame(self, frame: np.ndarray) -> dict:
-        """
-        フレームを処理
-        Returns:
-            {
-                "ocr_texts": [...],
-                "landmarks": [...],
-                "location_status": "...",
-                "result": AggregatedResult or None
-            }
-        """
         if frame is None:
             return self._empty_result()
@@ -74,17 +63,21 @@ class DokoCameApp:
         # OCR処理
         if sample.should_ocr:
             raw_texts = self.ocr_engine.detect_text_only(frame)
             ocr_texts = [clean_ocr_text(t) for t in raw_texts if t]
             self._latest_ocr_texts = ocr_texts
         # VLM処理
         if sample.should_vlm and self.vlm_analyzer.is_available:
             try:
                 analysis = self.vlm_analyzer.analyze(frame)
                 if analysis.success:
                     self._latest_analysis = analysis
                     vlm_keywords = self.vlm_analyzer.get_search_keywords(analysis)
             except Exception as e:
                 print(f"VLM error: {e}")
@@ -138,27 +131,18 @@ class DokoCameApp:
         self._latest_ocr_texts = []
         self._latest_analysis = None
-    def set_hint_location(self, lat: float, lon: float):
-        """ヒント座標を設定"""
-        self._hint_lat = lat
-        self._hint_lon = lon
 # グローバルアプリインスタンス
 app = DokoCameApp()
 def process_webcam(frame):
-    """Webcam入力を処理（Gradio Image入力用）"""
-    print(f"[DEBUG] process_webcam called, frame type: {type(frame)}")
     if frame is None:
-        print("[DEBUG] frame is None")
-        return None, "カメラを起動してください", ""
-    print(f"[DEBUG] frame shape: {frame.shape if hasattr(frame, 'shape') else 'no shape'}")
     result = app.process_frame(frame)
-    print(f"[DEBUG] process_frame result: {result}")
     # OCRテキストをフォーマット
     ocr_display = ""
@@ -183,47 +167,102 @@ def process_webcam(frame):
         location_display += f"\n\n座標: {r.estimated_lat:.6f}, {r.estimated_lon:.6f}"
         location_display += f"\n信頼度: {r.confidence:.1%}"
-    return frame, location_display, info_display
 def reset_state():
     """状態リセット"""
     app.reset()
-    return "リセットしました"
 def create_ui():
     """Gradio UIを作成"""
-    # ページ読み込み時に外カメラをデフォルトにするJavaScript
-    init_camera_js = """
     () => {
-        // Gradioのカメラが起動したら外カメラに切り替える
         const initRearCamera = async () => {
-            // 少し待ってからカメラを探す
             await new Promise(r => setTimeout(r, 2000));
             const video = document.querySelector('video');
-            if (!video) {
-                console.log('Video element not found, retrying...');
-                setTimeout(initRearCamera, 1000);
-                return;
-            }
-            // カメラが起動するまで待つ
-            if (!video.srcObject) {
-                console.log('Camera not started yet, retrying...');
                 setTimeout(initRearCamera, 1000);
                 return;
             }
             try {
                 const tracks = video.srcObject.getVideoTracks();
-                if (tracks.length > 0) {
-                    tracks.forEach(track => track.stop());
-                }
-                // 外カメラを要求
                 const stream = await navigator.mediaDevices.getUserMedia({
                     video: { facingMode: { ideal: 'environment' } }
                 });
@@ -234,39 +273,43 @@ def create_ui():
             }
         };
-        // ページ読み込み後に実行
         initRearCamera();
     }
     """
     with gr.Blocks(
         title="どこカメ - リアルタイム位置特定",
         theme=gr.themes.Soft(),
-        js=init_camera_js,
     ) as demo:
         gr.Markdown(
             """
             # 📍 どこカメ (dokoCame)
-            ### かざすだけで、視界がそのまま住所になる
             """
         )
         with gr.Row():
             with gr.Column(scale=2):
-                # カメラ入力（streaming無効にして手動キャプチャ）
                 webcam = gr.Image(
                     sources=["webcam"],
-                    streaming=False,
-                    label="📷 カメラで撮影してから解析ボタンを押してください",
                     mirror_webcam=False,
                 )
-                with gr.Row():
-                    # 解析ボタン
-                    analyze_btn = gr.Button("🔍 解析する", variant="primary", size="lg")
-                    # カメラ切り替えボタン
-                    switch_camera_btn = gr.Button("🔄 内/外カメラ切替", variant="secondary")
             with gr.Column(scale=1):
                 # 位置情報表示
                 location_output = gr.Textbox(
@@ -278,91 +321,36 @@ def create_ui():
                 # 検出情報表示
                 info_output = gr.Textbox(
                     label="🔍 検出情報",
-                    lines=10,
                     interactive=False,
                 )
                 # リセットボタン
                 reset_btn = gr.Button("🔄 リセット", variant="secondary")
-        # カメラ切り替え用JavaScript
-        switch_camera_js = """
-        async () => {
-            // 現在のカメラを取得
-            const video = document.querySelector('video');
-            if (!video || !video.srcObject) {
-                alert('カメラを先に起動してください');
-                return;
-            }
-            const tracks = video.srcObject.getVideoTracks();
-            if (tracks.length === 0) return;
-            const currentSettings = tracks[0].getSettings();
-            const currentFacing = currentSettings.facingMode || 'user';
-            // 反対のカメラに切り替え
-            const newFacing = currentFacing === 'user' ? 'environment' : 'user';
-            try {
-                // 古いトラックを停止
-                tracks.forEach(track => track.stop());
-                // 新しいカメラを取得
-                const newStream = await navigator.mediaDevices.getUserMedia({
-                    video: { facingMode: { exact: newFacing } }
-                });
-                video.srcObject = newStream;
-                console.log('カメラ切り替え成功:', newFacing);
-            } catch (err) {
-                console.error('カメラ切り替え失敗:', err);
-                // exactが失敗した場合、idealで試す
-                try {
-                    const newStream = await navigator.mediaDevices.getUserMedia({
-                        video: { facingMode: { ideal: newFacing } }
-                    });
-                    video.srcObject = newStream;
-                } catch (err2) {
-                    alert('カメラの切り替えに失敗しました。このデバイスでは切り替えできない可能性があります。');
-                }
-            }
-        }
-        """
-        # イベントハンドラ
-        analyze_btn.click(
             fn=process_webcam,
             inputs=[webcam],
-            outputs=[webcam, location_output, info_output],
-        )
-        switch_camera_btn.click(
-            fn=None,
-            inputs=[],
-            outputs=[],
-            js=switch_camera_js,
         )
         reset_btn.click(
             fn=reset_state,
-            outputs=[location_output],
         )
         gr.Markdown(
             """
             ---
             ### 使い方
-            1. カメラを許可して起動（自動で外カメラが選択されます）
-            2. 必要に応じて**「内/外カメラ切替」**で切り替え
-            3. 周囲の看板や店舗が見える位置で**カメラUI内の撮影ボタン**を押して写真を撮る
-            4. **「解析する」**ボタンを押して位置を特定
-            5. 複数回解析すると精度が上がります
             ### 注意事項
             - GPSは使用していません（映像のみで位置を推定）
             - コンビニ、飲食店、駅などが見えると精度が上がります
-            - **先に写真を撮ってから解析ボタンを押してください**
             """
         )

 """
 import time
 from typing import Optional
 import numpy as np
 import gradio as gr
         self._hint_lon: float = 139.7671
     def process_frame(self, frame: np.ndarray) -> dict:
+        """フレームを処理"""
         if frame is None:
             return self._empty_result()
         # OCR処理
         if sample.should_ocr:
+            print("[DEBUG] Running OCR...")
             raw_texts = self.ocr_engine.detect_text_only(frame)
             ocr_texts = [clean_ocr_text(t) for t in raw_texts if t]
             self._latest_ocr_texts = ocr_texts
+            print(f"[DEBUG] OCR detected: {ocr_texts}")
         # VLM処理
         if sample.should_vlm and self.vlm_analyzer.is_available:
             try:
+                print("[DEBUG] Running VLM...")
                 analysis = self.vlm_analyzer.analyze(frame)
                 if analysis.success:
                     self._latest_analysis = analysis
                     vlm_keywords = self.vlm_analyzer.get_search_keywords(analysis)
+                    print(f"[DEBUG] VLM keywords: {vlm_keywords}")
             except Exception as e:
                 print(f"VLM error: {e}")
         self._latest_ocr_texts = []
         self._latest_analysis = None
 # グローバルアプリインスタンス
 app = DokoCameApp()
 def process_webcam(frame):
+    """Webcam入力を処理"""
     if frame is None:
+        return "カメラを起動してください", ""
+    print(f"[DEBUG] process_webcam called, frame shape: {frame.shape}")
     result = app.process_frame(frame)
     # OCRテキストをフォーマット
     ocr_display = ""
         location_display += f"\n\n座標: {r.estimated_lat:.6f}, {r.estimated_lon:.6f}"
         location_display += f"\n信頼度: {r.confidence:.1%}"
+    return location_display, info_display
 def reset_state():
     """状態リセット"""
     app.reset()
+    return "リセットしました", ""
 def create_ui():
     """Gradio UIを作成"""
+    # カスタムCSS - カメラ切り替えボタンをオーバーレイ表示
+    custom_css = """
+    .camera-container {
+        position: relative;
+    }
+    .camera-switch-btn {
+        position: absolute;
+        top: 10px;
+        right: 10px;
+        z-index: 100;
+        background: rgba(0,0,0,0.7) !important;
+        color: white !important;
+        border: none !important;
+        padding: 8px 12px !important;
+        border-radius: 20px !important;
+        font-size: 14px !important;
+    }
+    .camera-switch-btn:hover {
+        background: rgba(0,0,0,0.9) !important;
+    }
+    """
+    # 初期化JavaScript（外カメラデフォルト + カメラ切り替えボタン追加）
+    init_js = """
     () => {
+        // カメラ切り替えボタンを追加
+        const addSwitchButton = () => {
+            const imageContainer = document.querySelector('.image-container, [data-testid="image"]');
+            if (!imageContainer) {
+                setTimeout(addSwitchButton, 500);
+                return;
+            }
+            // 既にボタンがあれば追加しない
+            if (document.querySelector('.camera-switch-overlay')) return;
+            const btn = document.createElement('button');
+            btn.className = 'camera-switch-overlay';
+            btn.innerHTML = '🔄 内/外';
+            btn.style.cssText = 'position:absolute;top:10px;right:10px;z-index:1000;background:rgba(0,0,0,0.7);color:white;border:none;padding:10px 15px;border-radius:25px;font-size:16px;cursor:pointer;';
+            btn.onclick = async () => {
+                const video = document.querySelector('video');
+                if (!video || !video.srcObject) {
+                    alert('カメラを先に起動してください');
+                    return;
+                }
+                const tracks = video.srcObject.getVideoTracks();
+                if (tracks.length === 0) return;
+                const currentFacing = tracks[0].getSettings().facingMode || 'user';
+                const newFacing = currentFacing === 'user' ? 'environment' : 'user';
+                try {
+                    tracks.forEach(track => track.stop());
+                    const newStream = await navigator.mediaDevices.getUserMedia({
+                        video: { facingMode: { ideal: newFacing } }
+                    });
+                    video.srcObject = newStream;
+                    btn.innerHTML = newFacing === 'environment' ? '🔄 外カメラ' : '🔄 内カメラ';
+                } catch (err) {
+                    console.error('カメラ切り替え失敗:', err);
+                }
+            };
+            imageContainer.style.position = 'relative';
+            imageContainer.appendChild(btn);
+        };
+        // 外カメラをデフォルトに
         const initRearCamera = async () => {
             await new Promise(r => setTimeout(r, 2000));
             const video = document.querySelector('video');
+            if (!video || !video.srcObject) {
                 setTimeout(initRearCamera, 1000);
                 return;
             }
             try {
                 const tracks = video.srcObject.getVideoTracks();
+                tracks.forEach(track => track.stop());
                 const stream = await navigator.mediaDevices.getUserMedia({
                     video: { facingMode: { ideal: 'environment' } }
                 });
             }
         };
+        addSwitchButton();
         initRearCamera();
+        // 監視して再追加
+        const observer = new MutationObserver(() => {
+            if (!document.querySelector('.camera-switch-overlay')) {
+                addSwitchButton();
+            }
+        });
+        observer.observe(document.body, { childList: true, subtree: true });
     }
     """
     with gr.Blocks(
         title="どこカメ - リアルタイム位置特定",
         theme=gr.themes.Soft(),
+        css=custom_css,
+        js=init_js,
     ) as demo:
         gr.Markdown(
             """
             # 📍 どこカメ (dokoCame)
+            **かざすだけで、視界がそのまま住所になる**
             """
         )
         with gr.Row():
             with gr.Column(scale=2):
+                # カメラ入力（streaming=Trueで自動解析）
                 webcam = gr.Image(
                     sources=["webcam"],
+                    streaming=True,
+                    label="カメラ映像",
                     mirror_webcam=False,
+                    elem_classes=["camera-container"],
                 )
             with gr.Column(scale=1):
                 # 位置情報表示
                 location_output = gr.Textbox(
                 # 検出情報表示
                 info_output = gr.Textbox(
                     label="🔍 検出情報",
+                    lines=8,
                     interactive=False,
                 )
                 # リセットボタン
                 reset_btn = gr.Button("🔄 リセット", variant="secondary")
+        # 自動解析（streaming=Trueで画像が変わるたびに呼ばれる）
+        webcam.stream(
             fn=process_webcam,
             inputs=[webcam],
+            outputs=[location_output, info_output],
         )
         reset_btn.click(
             fn=reset_state,
+            outputs=[location_output, info_output],
         )
         gr.Markdown(
             """
             ---
             ### 使い方
+            1. カメラを許可（自動で外カメラが選択されます）
+            2. 右上の**「🔄 内/外」**ボタンでカメラ切り替え可能
+            3. 周囲の看板や店舗を映すと**自動で解析**されます
             ### 注意事項
             - GPSは使用していません（映像のみで位置を推定）
             - コンビニ、飲食店、駅などが見えると精度が上がります
             """
         )

core/ocr_engine.py CHANGED Viewed

@@ -20,14 +20,12 @@ class OCREngine:
     日本語テキスト抽出に最適化
     """
-    def __init__(self, lang: str = "japan", use_gpu: bool = False):
         """
         Args:
             lang: 言語設定 ("japan", "en", "ch" など)
-            use_gpu: GPU使用フラグ（Hugging Face Free TierではFalse）
         """
         self.lang = lang
-        self.use_gpu = use_gpu
         self._ocr = None
         self._initialized = False
@@ -39,16 +37,20 @@ class OCREngine:
         try:
             from paddleocr import PaddleOCR
-            # PaddleOCR 3.x では引数が変更されている
             self._ocr = PaddleOCR(
                 use_angle_cls=True,
                 lang=self.lang,
-                use_gpu=self.use_gpu,
             )
             self._initialized = True
         except ImportError:
             print("Warning: PaddleOCR not installed. OCR will not work.")
             self._initialized = False
     def detect(self, frame: np.ndarray) -> List[OCRResult]:
         """

     日本語テキスト抽出に最適化
     """
+    def __init__(self, lang: str = "japan"):
         """
         Args:
             lang: 言語設定 ("japan", "en", "ch" など)
         """
         self.lang = lang
         self._ocr = None
         self._initialized = False
         try:
             from paddleocr import PaddleOCR
+            # PaddleOCR 3.x では use_gpu, show_log 等が廃止
+            # lang と use_angle_cls のみ使用
             self._ocr = PaddleOCR(
                 use_angle_cls=True,
                 lang=self.lang,
             )
             self._initialized = True
+            print("[OCR] PaddleOCR initialized successfully")
         except ImportError:
             print("Warning: PaddleOCR not installed. OCR will not work.")
             self._initialized = False
+        except Exception as e:
+            print(f"Warning: PaddleOCR init error: {e}")
+            self._initialized = False
     def detect(self, frame: np.ndarray) -> List[OCRResult]:
         """