Spaces:

WatNeru
/

LLMView

Paused

App Files Files Community

WatNeru commited on Nov 20, 2025

Commit

09c17cd

1 Parent(s): da001e6

Add FastAPI word tree server

Browse files

Files changed (14) hide show

app.py +129 -0
package/__pycache__/ai.cpython-310.pyc +0 -0
package/__pycache__/config.cpython-310.pyc +0 -0
package/__pycache__/path_manager.cpython-310.pyc +0 -0
package/__pycache__/rust_adapter.cpython-310.pyc +0 -0
package/__pycache__/word_counter.cpython-310.pyc +0 -0
package/__pycache__/word_processor.cpython-310.pyc +0 -0
package/ai.py +156 -0
package/config.py +157 -0
package/path_manager.py +151 -0
package/rust_adapter.py +72 -0
package/word_counter.py +203 -0
package/word_processor.py +519 -0
requirements.txt +23 -0

app.py ADDED Viewed

	@@ -0,0 +1,129 @@

+#!/usr/bin/env python3
+"""
+FastAPI 版 LLMView Word Tree サーバー
+"""
+import threading
+from typing import List, Dict, Any, Optional
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+try:
+    from package.path_manager import get_path_manager
+except ImportError:
+    from path_manager import get_path_manager  # type: ignore
+path_manager = get_path_manager()
+path_manager.setup_sys_path()
+adapter = None
+status_message = "モデル初期化中..."
+status_lock = threading.Lock()
+class WordTreeRequest(BaseModel):
+    prompt_text: str = Field(..., description="生成に使用するプロンプト")
+    root_text: str = Field("", description="任意のルートテキスト")
+    top_k: int = Field(5, ge=1, le=50, description="取得する候補数")
+    max_depth: int = Field(10, ge=1, le=50, description="探索深さ")
+class WordTreeResponse(BaseModel):
+    text: str
+    probability: float
+def _set_status(message: str) -> None:
+    global status_message
+    with status_lock:
+        status_message = message
+def initialize_model() -> None:
+    """RustAdapter とモデルを初期化"""
+    global adapter
+    try:
+        _set_status("モデルを読み込み中です...")
+        from package.rust_adapter import RustAdapter
+        model_path = path_manager.get_model_path()
+        adapter = RustAdapter.get_instance(model_path)
+        _set_status("モデル準備完了")
+    except Exception as exc:  # pragma: no cover
+        _set_status(f"モデル初期化に失敗しました: {exc}")
+        import traceback
+        traceback.print_exc()
+# Space 起動時にバックグラウンドで初期化
+threading.Thread(target=initialize_model, daemon=True).start()
+app = FastAPI(
+    title="LLMView Word Tree API",
+    description="LLMView の単語ツリー構築 API。/build_word_tree にPOSTしてください。",
+    version="1.0.0",
+)
+@app.get("/")
+def root() -> Dict[str, str]:
+    """簡易案内"""
+    return {
+        "message": "LLMView Word Tree API",
+        "status_endpoint": "/health",
+        "build_endpoint": "/build_word_tree",
+    }
+@app.get("/health")
+def health() -> Dict[str, Any]:
+    """状態確認"""
+    with status_lock:
+        current_status = status_message
+    return {
+        "model_loaded": adapter is not None,
+        "status": current_status,
+        "model_path": path_manager.get_model_path(),
+    }
+@app.post("/build_word_tree", response_model=List[WordTreeResponse])
+def build_word_tree(payload: WordTreeRequest) -> List[WordTreeResponse]:
+    """単語ツリーを構築"""
+    if not payload.prompt_text.strip():
+        raise HTTPException(status_code=400, detail="prompt_text を入力してください。")
+    if adapter is None:
+        raise HTTPException(
+            status_code=503, detail=f"モデル準備中です: {status_message}"
+        )
+    try:
+        results = adapter.build_word_tree(
+            prompt_text=payload.prompt_text,
+            root_text=payload.root_text,
+            top_k=payload.top_k,
+            max_depth=payload.max_depth,
+        )
+        if not results:
+            raise HTTPException(status_code=500, detail="候補を生成できませんでした。")
+        return results
+    except HTTPException:
+        raise
+    except Exception as exc:
+        import traceback
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=f"内部エラー: {exc}") from exc
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)

package/__pycache__/ai.cpython-310.pyc ADDED Viewed

Binary file (5.28 kB). View file

package/__pycache__/config.cpython-310.pyc ADDED Viewed

Binary file (4.93 kB). View file

package/__pycache__/path_manager.cpython-310.pyc ADDED Viewed

Binary file (4.76 kB). View file

package/__pycache__/rust_adapter.cpython-310.pyc ADDED Viewed

Binary file (3.2 kB). View file

package/__pycache__/word_counter.cpython-310.pyc ADDED Viewed

Binary file (5.42 kB). View file

package/__pycache__/word_processor.cpython-310.pyc ADDED Viewed

Binary file (15.2 kB). View file

package/ai.py ADDED Viewed

	@@ -0,0 +1,156 @@

+from typing import List, Tuple, Any, Optional
+import os
+from config import Config
+class AI:
+    """AIクラス - モデルをロードして文章とkを引数にトークンと確率のリストを返す（常駐版）"""
+    _instances = {}  # モデルパスごとのインスタンスをキャッシュ（常駐）
+    def __new__(cls, model_path: str = None):
+        """シングルトンパターンでモデルを常駐"""
+        path = model_path or Config.get_default_model_path()
+        if path not in cls._instances:
+            cls._instances[path] = super().__new__(cls)
+            cls._instances[path]._initialized = False
+        return cls._instances[path]
+    def __init__(self, model_path: str = None):
+        """
+        モデルをロードして初期化（一度だけ実行、常駐）
+        Args:
+            model_path: モデルファイルのパス（Noneの場合はデフォルトパスを使用）
+        """
+        if hasattr(self, '_initialized') and self._initialized:
+            return
+        self.model_path = model_path or Config.get_default_model_path()
+        self.model = self._load_model(self.model_path)
+        self._initialized = True
+        if self.model is None:
+            raise ValueError(f"モデルのロードに失敗しました: {self.model_path}")
+    @classmethod
+    def get_model(cls, model_path: str = None) -> 'AI':
+        """モデルインスタンスを取得（常駐キャッシュから）"""
+        return cls(model_path)
+    @classmethod
+    def clear_cache(cls):
+        """キャッシュをクリア（開発・テスト用）"""
+        cls._instances.clear()
+    def _load_model(self, model_path: str) -> Optional[Any]:
+        """モデルをロード"""
+        try:
+            if not model_path or not os.path.exists(model_path):
+                return None
+            # llama-cpp-pythonを使用してモデルをロード
+            try:
+                from llama_cpp import Llama
+                llm = Llama(
+                    model_path=model_path,
+                    n_ctx=2048,
+                    logits_all=True,
+                    n_gpu_layers=-1,
+                    verbose=False,
+                )
+                return llm
+            except Exception as e:
+                print(f"llama-cpp-pythonでのロードに失敗: {e}")
+                return None
+        except Exception as e:
+            print(f"モデルロードエラー: {e}")
+            return None
+    def get_token_probabilities(self, text: str, k: int = 5) -> List[Tuple[str, float]]:
+        """
+        文章とkを引数に、{token, 確率}のリストを返す
+        Args:
+            text: 入力文章
+            k: 取得するトークン数
+        Returns:
+            List[Tuple[str, float]]: (トークン, 確率)のリスト
+        """
+        if self.model is None:
+            return []
+        try:
+            # llama-cpp-pythonのcreate_completionを使用
+            if hasattr(self.model, "create_completion"):
+                resp = self.model.create_completion(
+                    prompt=text,
+                    max_tokens=1,
+                    logprobs=k,
+                    temperature=0.0,
+                    echo=False,
+                )
+                # レスポンスからトークンと確率を抽出
+                items: List[Tuple[str, float]] = []
+                choice = resp.get("choices", [{}])[0]
+                lp = choice.get("logprobs", {})
+                top = lp.get("top_logprobs", [])
+                if top and isinstance(top[0], dict):
+                    cand_dict = top[0]
+                    tokens = list(cand_dict.keys())
+                    logprobs = [cand_dict[t] for t in tokens]
+                    # logprobsを確率に変換
+                    probs = self._softmax_from_logprobs(logprobs)
+                    for token, prob in zip(tokens, probs):
+                        items.append((token, float(prob)))
+                # 確率順でソートして上位k個を返す
+                items = sorted(items, key=lambda x: x[1], reverse=True)[:k]
+                # 確率を正規化
+                if items:
+                    total_prob = sum(prob for _, prob in items)
+                    if total_prob > 0:
+                        normalized_items: List[Tuple[str, float]] = []
+                        for token, prob in items:
+                            normalized_prob = prob / total_prob
+                            normalized_items.append((token, normalized_prob))
+                        return normalized_items
+                return items
+            else:
+                print("モデルがcreate_completion���ソッドをサポートしていません")
+                return []
+        except Exception as e:
+            print(f"トークン確率取得エラー: {e}")
+            return []
+    def _softmax_from_logprobs(self, logprobs: List[float]) -> List[float]:
+        """logprobsをsoftmaxで確率に変換"""
+        if not logprobs:
+            return []
+        # 数値安定性のため最大値を引く
+        max_logprob = max(logprobs)
+        exp_logprobs = [exp(logprob - max_logprob) for logprob in logprobs]
+        sum_exp = sum(exp_logprobs)
+        if sum_exp == 0:
+            return [0.0] * len(logprobs)
+        return [exp_logprob / sum_exp for exp_logprob in exp_logprobs]
+def exp(x: float) -> float:
+    """指数関数の近似実装（math.expの代替）"""
+    import math
+    return math.exp(x)

package/config.py ADDED Viewed

	@@ -0,0 +1,157 @@

+"""
+設定ファイル - パス設定専用（Tauriアプリ用）
+"""
+import os
+import sys
+from typing import Dict, Any
+from path_manager import get_path_manager
+class Config:
+    """設定管理クラス（Tauriアプリ用）"""
+    # 実行環境の判定
+    @classmethod
+    def _get_base_path(cls) -> str:
+        """ベースパスを取得（pyinstaller対応）"""
+        if getattr(sys, 'frozen', False):
+            # pyinstallerでビルドされた場合
+            return os.path.dirname(sys.executable)
+        else:
+            # 開発環境の場合
+            return os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    # MeCab設定（従来。必要なら使用）
+    MECAB_CONFIG_PATH = "/opt/homebrew/etc/mecabrc"
+    MECAB_DICT_PATH = "/opt/homebrew/lib/mecab/dic/ipadic"
+    # AIモデル設定（Tauriアプリ用）
+    # 環境変数で指定されていない場合は、ユーザーのDocumentsフォルダをデフォルトに
+    DEFAULT_MODEL_PATH = os.path.expanduser("~/Documents/models/llama-3.2-3b-instruct-q4_k_m.gguf")
+    # fugashi設定（MeCab/IPA 用）。Sudachi は SudachiPy を直接使用する。
+    # sudachidict_core のパスは参照のみ（情報表示やデバッグ用途）。
+    try:
+        import importlib.util
+        spec = importlib.util.find_spec("sudachidict_core")
+        if spec and spec.origin:
+            # origin は __init__.py のパス。辞書ディレクトリはその親
+            import os as _os
+            _pkg_dir = _os.path.dirname(spec.origin)
+            SUDACHI_DICT_PATH = _pkg_dir
+        else:
+            SUDACHI_DICT_PATH = ""
+    except Exception:
+        SUDACHI_DICT_PATH = ""
+    # fugashi は常に MeCab 設定を使用（IPA）。
+    FUGASHI_ARGS = f"-r {MECAB_CONFIG_PATH}"
+    @classmethod
+    def get_mecab_config_path(cls) -> str:
+        """MeCab設定ファイルのパスを取得"""
+        return cls.MECAB_CONFIG_PATH
+    @classmethod
+    def get_mecab_dict_path(cls) -> str:
+        """MeCab辞書のパスを取得"""
+        return cls.MECAB_DICT_PATH
+    @classmethod
+    def get_fugashi_args(cls) -> str:
+        """fugashi用の引数を取得"""
+        return cls.FUGASHI_ARGS
+    @classmethod
+    def get_default_model_path(cls) -> str:
+        """デフォルトのモデルパスを取得"""
+        return get_path_manager().get_model_path()
+    @classmethod
+    def get_package_path(cls) -> str:
+        """パッケージパスを取得"""
+        return get_path_manager().get_package_path()
+    @classmethod
+    def validate_paths(cls) -> Dict[str, bool]:
+        """パスの存在確認"""
+        return {
+            "mecab_config": os.path.exists(cls.MECAB_CONFIG_PATH),
+            "mecab_dict": os.path.exists(cls.MECAB_DICT_PATH),
+            "default_model": os.path.exists(cls.DEFAULT_MODEL_PATH)
+        }
+    @classmethod
+    def print_status(cls):
+        """設定状況を表示"""
+        print("=== 設定状況 ===")
+        print(f"MeCab設定ファイル: {cls.MECAB_CONFIG_PATH}")
+        print(f"MeCab辞書: {cls.MECAB_DICT_PATH}")
+        print(f"デフォルトモデル: {cls.DEFAULT_MODEL_PATH}")
+        print(f"fugashi引数: {cls.FUGASHI_ARGS}")
+        print(f"Sudachi辞書パス検出: {getattr(cls, 'SUDACHI_DICT_PATH', '')}")
+        print("\n=== パス存在確認 ===")
+        status = cls.validate_paths()
+        for name, exists in status.items():
+            status_text = "✓" if exists else "✗"
+            print(f"{name}: {status_text}")
+# 環境変数での上書き対応
+def load_config_from_env():
+    """環境変数から設定を読み込み"""
+    Config.MECAB_CONFIG_PATH = os.getenv("MECAB_CONFIG_PATH", Config.MECAB_CONFIG_PATH)
+    Config.MECAB_DICT_PATH = os.getenv("MECAB_DICT_PATH", Config.MECAB_DICT_PATH)
+    Config.DEFAULT_MODEL_PATH = os.getenv("DEFAULT_MODEL_PATH", Config.DEFAULT_MODEL_PATH)
+    # 環境変数 SUDACHI_DICT_PATH があれば記録のみ（SudachiPy が使用）。
+    sudachi_env = os.getenv("SUDACHI_DICT_PATH", getattr(Config, "SUDACHI_DICT_PATH", ""))
+    if sudachi_env:
+        Config.SUDACHI_DICT_PATH = sudachi_env
+    # fugashiは常にMeCab設定
+    Config.FUGASHI_ARGS = f"-r {Config.MECAB_CONFIG_PATH}"
+# 環境変数から設定を読み込み
+load_config_from_env()
+# テスト関数
+def test_config():
+    """設定のテスト"""
+    print("=== Configテスト ===")
+    # 設定状況を表示
+    Config.print_status()
+    # fugashiテスト
+    try:
+        import fugashi
+        print(f"\n=== fugashiテスト ===")
+        tagger = fugashi.GenericTagger(Config.get_fugashi_args())
+        test_text = "こんにちは世界"
+        tokens = tagger(test_text)
+        print(f"テストテキスト: '{test_text}'")
+        print(f"形態素: {[token.surface for token in tokens]}")
+        print("✓ fugashi動作確認完了")
+    except Exception as e:
+        print(f"✗ fugashiテスト失敗: {e}")
+    # AIクラステスト
+    try:
+        from ai import AI
+        print(f"\n=== AIクラステスト ===")
+        if os.path.exists(Config.get_default_model_path()):
+            ai = AI(Config.get_default_model_path())
+            tokens = ai.get_token_probabilities("こんにちは", k=3)
+            print(f"テスト結果: {tokens}")
+            print("✓ AIクラス動作確認完了")
+        else:
+            print("✗ デフォルトモデルが見つかりません")
+    except Exception as e:
+        print(f"✗ AIクラステスト失敗: {e}")
+if __name__ == "__main__":
+    test_config()

package/path_manager.py ADDED Viewed

	@@ -0,0 +1,151 @@

+#!/usr/bin/env python3
+"""
+パス管理モジュール
+PyInstaller対応とサーバー起動の両方に対応
+"""
+import os
+import sys
+from pathlib import Path
+from typing import Optional
+class PathManager:
+    """パス管理クラス（シングルトン）"""
+    _instance: Optional["PathManager"] = None
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+            cls._instance._initialized = False
+        return cls._instance
+    def __init__(self):
+        if self._initialized:
+            return
+        self._initialized = True
+        self._setup_paths()
+    def _setup_paths(self):
+        """パス設定を初期化"""
+        # 実行環境の判定
+        if getattr(sys, 'frozen', False):
+            # PyInstallerでビルドされた場合
+            self.is_pyinstaller = True
+            self.base_path = Path(sys._MEIPASS)
+            self.package_path = self.base_path / 'package'
+            self.python_executable = sys.executable
+        else:
+            # 開発環境の場合
+            self.is_pyinstaller = False
+            self.base_path = Path(__file__).parent.parent
+            self.package_path = self.base_path / 'package'
+            self.python_executable = sys.executable
+        # モデルパス設定
+        self.model_path = self._get_model_path()
+        # ログ出力
+        print(f"[PATH] PyInstaller: {self.is_pyinstaller}")
+        print(f"[PATH] Base path: {self.base_path}")
+        print(f"[PATH] Package path: {self.package_path}")
+        print(f"[PATH] Model path: {self.model_path}")
+    def _get_model_path(self) -> str:
+        """モデルファイルのパスを取得"""
+        # 環境変数から取得
+        env_model_path = os.getenv('LLM_MODEL_PATH')
+        if env_model_path and os.path.exists(env_model_path):
+            return env_model_path
+        # 配布物からの相対位置（優先）
+        try:
+            # 実行ファイルのあるディレクトリ（PyInstaller実行時は dist/.../tauri_python_server の場所）
+            executable_dir = Path(os.path.dirname(sys.executable)) if getattr(sys, 'frozen', False) else Path(__file__).parent.parent
+            candidate_relative_paths = [
+                # 配布レイアウト: <root>/models/<file> （exe が <root>/python/ にある想定）
+                (executable_dir.parent / 'models' / 'llama-3.2-3b-instruct-q4_k_m.gguf'),
+                # exe と同階層に models/
+                (executable_dir / 'models' / 'llama-3.2-3b-instruct-q4_k_m.gguf'),
+                # MEIPASS（展開一時ディレクトリ）配下
+                (Path(getattr(sys, '_MEIPASS', str(self.base_path))) / 'models' / 'llama-3.2-3b-instruct-q4_k_m.gguf'),
+            ]
+            for rel in candidate_relative_paths:
+                rel_str = str(rel)
+                if os.path.exists(rel_str):
+                    return rel_str
+        except Exception:
+            pass
+        # デフォルトパス
+        default_paths = [
+            os.path.expanduser("~/Documents/GitHub/LLMV_app_frontend/src-tauri/python/models/llama-3.2-3b-instruct-q4_k_m.gguf"),
+            os.path.expanduser("~/Documents/models/llama-3.2-3b-instruct-q4_k_m.gguf"),
+            os.path.expanduser("~/models/llama-3.2-3b-instruct-q4_k_m.gguf"),
+            "/opt/models/llama-3.2-3b-instruct-q4_k_m.gguf"
+        ]
+        for path in default_paths:
+            if os.path.exists(path):
+                return path
+        # 見つからない場合は最初のデフォルトパスを返す
+        return default_paths[0]
+    def get_package_path(self) -> str:
+        """パッケージパスを取得"""
+        return str(self.package_path)
+    def get_model_path(self) -> str:
+        """モデルパスを取得"""
+        return self.model_path
+    def get_python_executable(self) -> str:
+        """Python実行ファイルのパスを取得"""
+        return self.python_executable
+    def setup_sys_path(self):
+        """sys.pathを設定"""
+        package_path_str = self.get_package_path()
+        # パッケージパスを追加
+        if package_path_str not in sys.path:
+            sys.path.insert(0, package_path_str)
+        # conda環境のパッケージパスも追加（開発環境のみ）
+        if not self.is_pyinstaller:
+            conda_package_path = '/Users/wataru/Documents/AppProject/LLMView_Server/package'
+            if os.path.exists(conda_package_path) and conda_package_path not in sys.path:
+                sys.path.insert(0, conda_package_path)
+        print(f"[PATH] sys.path configured: {len(sys.path)} paths")
+    def get_server_config(self) -> dict:
+        """サーバー設定を取得"""
+        return {
+            'host': '127.0.0.1',
+            'port': 5000,
+            'debug': False,
+            'use_reloader': False,
+            'threaded': True
+        }
+    def get_pyinstaller_spec_path(self) -> str:
+        """PyInstaller specファイルのパスを取得"""
+        return str(self.base_path / 'tauri_python_server.spec')
+    def get_dist_path(self) -> str:
+        """ビルド出力ディレクトリのパスを取得"""
+        return str(self.base_path / 'dist')
+    def get_executable_name(self) -> str:
+        """実行ファイル名を取得"""
+        return 'tauri_python_server'
+# グローバルインスタンス
+path_manager = PathManager()
+def get_path_manager() -> PathManager:
+    """PathManagerインスタンスを取得"""
+    return path_manager

package/rust_adapter.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from typing import Any, Dict, List, Optional
+from threading import Lock
+from word_processor import WordDeterminer, WordPiece
+class RustAdapter:
+    """
+    Rust から呼び出すためのアダプタ。
+    - 初期化コストの高いコンポーネント（WordDeterminer, AIモデル）を1回だけ生成して保持
+    - メソッドでビルド処理を提供
+    - 返却はシリアライズしやすい dict/list 形式
+    """
+    _instance: Optional["RustAdapter"] = None
+    _lock: Lock = Lock()
+    def __init__(self, model_path: Optional[str] = None):
+        # WordDeterminer（内部で Sudachi C モードの WordCounter を使用）
+        self.determiner = WordDeterminer()
+        # AIモデルは共有キャッシュ取得
+        # model_path が None の場合はデフォルトモデル
+        from ai import AI
+        self.model = AI.get_model(model_path)
+    @classmethod
+    def get_instance(cls, model_path: Optional[str] = None) -> "RustAdapter":
+        """シングルトン取得。model_path 指定時は初回のみ反映。"""
+        if cls._instance is not None:
+            return cls._instance
+        with cls._lock:
+            if cls._instance is None:
+                cls._instance = RustAdapter(model_path)
+        return cls._instance
+    # ===== 公開API =====
+    def build_word_tree(self, prompt_text: str, root_text: str = "", top_k: int = 5, max_depth: int = 10) -> List[Dict[str, Any]]:
+        """
+        単語ツリーを構築して、完成ピースを dict の配列で返す。
+        各要素: { text: str, probability: float }
+        """
+        pieces: List[WordPiece] = self.determiner.build_word_tree(
+            prompt_text=prompt_text,
+            root_text=root_text,
+            model=self.model,
+            top_k=top_k,
+            max_depth=max_depth,
+        )
+        return [
+            {"text": p.get_full_word(), "probability": float(p.probability)}
+            for p in pieces
+        ]
+    def build_chat_prompt(self, user_content: str, system_content: str = "あなたは親切で役に立つAIアシスタントです。") -> str:
+        """チャットプロンプト文字列を返す。"""
+        return self.determiner.build_chat_prompt(user_content, system_content)
+    def count_words(self, text: str) -> int:
+        """Sudachi(C) ベースでの語数カウント。"""
+        return self.determiner._count_words(text)
+# 簡易動作テスト
+if __name__ == "__main__":
+    adapter = RustAdapter.get_instance()
+    prompt = "電球を作ったのは誰？"
+    results = adapter.build_word_tree(prompt_text=prompt, root_text="", top_k=5, max_depth=5)
+    print("=== RustAdapter 確認 ===")
+    for i, r in enumerate(results, 1):
+        print(f"{i}. {r['text']} ({r['probability']:.4f})")

package/word_counter.py ADDED Viewed

	@@ -0,0 +1,203 @@

+from typing import Any
+import fugashi
+from config import Config
+try:
+    # SudachiPy があれば直接利用してモードCを使用
+    from sudachipy import dictionary as sudachi_dictionary
+    from sudachipy import tokenizer as sudachi_tokenizer
+    _SUDACHI_AVAILABLE = True
+except Exception:
+    _SUDACHI_AVAILABLE = False
+class WordCounter:
+    """単語数を数えるクラス（SudachiPyがあれば mode=C、なければfugashi）"""
+    def __init__(self, tokenizer: Any = None):
+        """
+        初期化
+        Args:
+            tokenizer: fugashiトークナイザー（Noneの場合はデフォルトを使用）
+        """
+        # 優先順位: 引数tokenizer > SudachiPy > fugashi(GenericTagger)
+        self._use_sudachi = False
+        self._sudachi_mode = None
+        if tokenizer is not None:
+            self.tokenizer = tokenizer
+        elif _SUDACHI_AVAILABLE:
+            # SudachiPyの辞書は自動で同梱辞書を参照（sudachidict_core）
+            # 外部設定不要。SplitMode.C を使用
+            self._use_sudachi = True
+            self.tokenizer = sudachi_dictionary.Dictionary().create()
+            self._sudachi_mode = sudachi_tokenizer.Tokenizer.SplitMode.C
+        else:
+            # fugashi (MeCab) フォールバック
+            self.tokenizer = fugashi.GenericTagger(Config.get_fugashi_args())
+    def count_words(self, text: str) -> int:
+        """
+        テキストの単語数をカウント
+        Args:
+            text: カウントするテキスト
+        Returns:
+            int: 単語数
+        """
+        if not text:
+            return 0
+        try:
+            # fugashiで形態素解析して単語数をカウント
+            if self._use_sudachi:
+                tokens = self.tokenizer.tokenize(text, self._sudachi_mode)
+                return len(tokens)
+            else:
+                tokens = self.tokenizer(text)
+                return len(tokens)
+        except Exception as e:
+            print(f"fugashi単語数カウントエラー: {e}")
+            # フォールバック: 空白で分割
+            return len(text.split())
+    def is_word_boundary(self, text: str, position: int) -> bool:
+        """
+        指定位置が単語境界かどうかを判定
+        Args:
+            text: テキスト
+            position: 位置（負の値で末尾から指定可能、-1は末尾）
+        Returns:
+            bool: 単語境界かどうか
+        """
+        if not text:
+            return True
+        # 負のインデックスを正のインデックスに変換
+        if position < 0:
+            position = len(text) + position
+        if position >= len(text):
+            return True
+        try:
+            # fugashiで形態素解析
+            if self._use_sudachi:
+                tokens = self.tokenizer.tokenize(text, self._sudachi_mode)
+                surfaces = [m.surface() for m in tokens]
+            else:
+                tokens = self.tokenizer(text)
+                surfaces = [m.surface for m in tokens]
+            current_pos = 0
+            for surface in surfaces:
+                token_length = len(surface)
+                if current_pos <= position < current_pos + token_length:
+                    return False
+                if position == current_pos + token_length:
+                    return True
+                current_pos += token_length
+            return True
+        except Exception as e:
+            print(f"fugashi境界判定エラー: {e}")
+            # フォールバック: 空白文字で判定
+            return position < len(text) and text[position].isspace()
+# テスト関数
+def test_word_counter():
+    """WordCounterのテスト"""
+    print("=== WordCounterテスト ===")
+    try:
+        counter = WordCounter()
+        # 基本的な単語数カウントテスト
+        print("単語数カウントテスト:")
+        test_texts = [
+            "私はエ",
+            "私はエジ",
+            "私はエジソ",
+            "私はエジソン",
+            "私はエジソンで"
+        ]
+        for text in test_texts:
+            word_count = counter.count_words(text)
+            print(f"  '{text}' → {word_count}語")
+        # 単語境界テスト
+        print("\n単語境界テスト:")
+        test_text = "私はエジソンで"
+        print(f"  '{test_text}' の境界判定:")
+        for i in range(len(test_text) + 1):
+            is_boundary = counter.is_word_boundary(test_text, i)
+            print(f"    位置{i}: {is_boundary}")
+        # 負のインデックステスト
+        print("\n負のインデックステスト:")
+        print(f"  '{test_text}' の負のインデックス境界判定:")
+        for i in range(-len(test_text), 1):
+            is_boundary = counter.is_word_boundary(test_text, i)
+            print(f"    位置{i}: {is_boundary}")
+        # Sudachi(C) と fugashi(IPA) の分割比較
+        print("\n分割比較: Sudachi(C) vs fugashi(IPA)")
+        compare_texts = [
+            "私はエジソンで有名な科学者です。",
+            "電球を作ったのは誰？",
+            "自然言語処理は面白い。",
+            "電球を"
+        ]
+        # 準備: 各トークナイザ
+        sudachi_ok = False
+        sudachi_tok = None
+        sudachi_mode = None
+        try:
+            if _SUDACHI_AVAILABLE:
+                sudachi_tok = sudachi_dictionary.Dictionary().create()
+                sudachi_mode = sudachi_tokenizer.Tokenizer.SplitMode.C
+                sudachi_ok = True
+        except Exception:
+            sudachi_ok = False
+        ipa_tagger = fugashi.GenericTagger(Config.get_fugashi_args())
+        for text in compare_texts:
+            print(f"\n--- テキスト: {text}")
+            # fugashi(IPA)
+            try:
+                ipa_tokens = ipa_tagger(text)
+                ipa_surfaces = [t.surface for t in ipa_tokens]
+                print(f"fugashi(IPA) {len(ipa_surfaces)}語: {' | '.join(ipa_surfaces)}")
+            except Exception as e:
+                print(f"fugashi(IPA) 解析失敗: {e}")
+            # Sudachi(C)
+            if sudachi_ok:
+                try:
+                    s_tokens = sudachi_tok.tokenize(text, sudachi_mode)
+                    s_surfaces = [m.surface() for m in s_tokens]
+                    print(f"Sudachi(C) {len(s_surfaces)}語: {' | '.join(s_surfaces)}")
+                except Exception as e:
+                    print(f"Sudachi(C) 解析失敗: {e}")
+            else:
+                print("Sudachi(C) は利用不可（モジュール未検出）")
+        print("\nテスト完了")
+    except ImportError:
+        print("fugashiがインストールされていません。pip install fugashi でインストールしてください。")
+    except Exception as e:
+        print(f"テストエラー: {e}")
+if __name__ == "__main__":
+    test_word_counter()

package/word_processor.py ADDED Viewed

	@@ -0,0 +1,519 @@

+from typing import List, Tuple, Any, Optional
+from dataclasses import dataclass
+from enum import Enum
+import os
+import math
+from word_counter import WordCounter
+from config import Config
+class WordState(Enum):
+    """単語の状態"""
+    INCOMPLETE = "incomplete"  # 未完成
+    COMPLETE = "complete"      # 完成
+    TRIGGER = "trigger"        # トリガー（次語の開始）
+class KList:
+    def __init__(self, num: int):
+        self.num = num
+        self.list: List[Any] = []
+    def check_k(self) -> None:
+        if len(self.list) >= self.num:
+            self.list.sort(key=lambda x: x.probability, reverse=True)
+            self.list = self.list[:self.num]
+        else:
+            self.list.sort(key=lambda x: x.probability, reverse=True)
+    def add(self, piece_word: Any) -> None:
+        # 重複チェック: 同じテキストのピースが既に存在するか確認
+        new_text = piece_word.get_full_text()
+        for existing_piece in self.list:
+            if existing_piece.get_full_text() == new_text:
+                # 既存のピースに確率を足す
+                existing_piece.probability += piece_word.probability
+                # 確率を更新したので、ソートし直す
+                self.check_k()
+                return
+        # 重複がない場合は追加
+        self.list.append(piece_word)
+        self.check_k()
+    def pop(self) -> Any:
+        if self.list:
+            return self.list.pop(0)
+        raise IndexError("List is empty")
+    def empty(self) -> bool:
+        return len(self.list) == 0
+@dataclass
+class WordPiece:
+    """単語のピース（部分）"""
+    text: str              # ピースのテキスト
+    probability: float     # 確率
+    next_tokens: Optional[List[Tuple[str, float]]] = None  # 次のトークン候補
+    parent: Optional['WordPiece'] = None  # 親ピース
+    children: List['WordPiece'] = None   # 子ピース
+    def __post_init__(self):
+        if self.children is None:
+            self.children = []
+    def get_full_text(self) -> str:
+        """ルートからこのピースまでの完全なテキストを取得"""
+        pieces = []
+        current = self
+        while current is not None:
+            if current.text:
+                pieces.append(current.text)
+            current = current.parent
+        return "".join(reversed(pieces))
+    def get_full_word(self) -> str:
+        """ルートの次語からこのピースまでの完全な単語を取得"""
+        pieces = []
+        current = self
+        while current is not None:
+            if current.text:
+                pieces.append(current.text)
+            current = current.parent
+        reversed_pieces = reversed(pieces[:-1])
+        return "".join(reversed_pieces)
+    def add_child(self, text: str, probability: float, next_tokens: Optional[List[Tuple[str, float]]] = None) -> 'WordPiece':
+        """子ピースを追加"""
+        child = WordPiece(
+            text=text,
+            probability=probability,
+            next_tokens=next_tokens,
+            parent=self
+        )
+        self.children.append(child)
+        return child
+    def is_leaf(self) -> bool:
+        """葉ノードかどうか"""
+        return len(self.children) == 0
+    def get_depth(self) -> int:
+        """ルートからの深さを取得"""
+        depth = 0
+        current = self.parent
+        while current is not None:
+            depth += 1
+            current = current.parent
+        return depth
+class WordDeterminer:
+    """単語確定システム（ストリーミング向けリアルタイムアルゴリズム）"""
+    def __init__(self, word_counter: WordCounter = None):
+        """
+        初期化
+        Args:
+            word_counter: WordCounterインスタンス（Noneの場合はデフォルトを使用）
+        """
+        self.word_counter = word_counter or WordCounter()
+    def is_boundary_char(self, char: str) -> bool:
+        """境界文字かどうかを判定（fugashi使用）"""
+        if not char:
+            return False
+        # 空白文字
+        if char.isspace():
+            return True
+        # 句読点
+        punctuation = ",，.．。！？!?:；;、\n\t"
+        return char in punctuation
+    def is_word_boundary(self, text: str, position: int) -> bool:
+        """
+        WordCounterを使用して単語境界を判定
+        Args:
+            text: テキスト
+            position: 位置（負の値で末尾から指定可能）
+        Returns:
+            bool: 単語境界かどうか
+        """
+        return self.word_counter.is_word_boundary(text, position)
+    def check_word_completion(self, piece: WordPiece, root_count: int, model: Any = None) -> Tuple[WordState, Optional[Any]]:
+        """
+        ストリーミング向けリアルタ��ム単語決定アルゴリズム
+        戦略:
+        1. 確率エントロピー: 次のトークンの不確実性を測定
+        2. 確率重み付き境界検出: 高確率トークンの挙動を重視
+        3. 信頼度ベース判定: 高確率トークンが明確に境界を示す場合のみ確定
+        アルゴリズム:
+        - エントロピーが低い（確率が集中）→ 単語継続の可能性が高い
+        - エントロピーが高い（確率が分散）→ 単語境界の可能性
+        - 高確率トークンが境界を示す → 確定
+        - 低確率トークンだけが境界を示す → 無視
+        Args:
+            piece: チェックするピース
+            root_count: ルートテキストの単語数
+            model: LLMモデル（必要に応じて）
+        Returns:
+            Tuple[WordState, Optional[Any]]: (状態, ペイロード)
+        """
+        full_text = piece.get_full_text()
+        # next_tokensを取得
+        if not piece.next_tokens:
+            if model:
+                piece.next_tokens = self._get_next_tokens_from_model(model, full_text)
+            else:
+                return (WordState.COMPLETE, None)
+        if not piece.next_tokens:
+            return (WordState.COMPLETE, None)
+        # 確率順にソート（念のため）
+        sorted_tokens = sorted(piece.next_tokens, key=lambda x: x[1], reverse=True)
+        # sorted_tokens = piece.next_tokens
+        if piece.get_full_word()[-1] in ["(","「","（","【","〈","《","［","｛","｟","《","［","｛","｟","《","［","｛","｟","《","［","｛","｟","《","［","｛","｟"]:
+            return (WordState.INCOMPLETE, None)
+        if piece.get_full_word()[-1] in [")","]","}","》","〉","》","］","｝","｠","》","］","｝","｠","》","］","｝","｠","》","］","｝","｠","》","］","｝","｠"]:
+            return (WordState.COMPLETE, None)
+        # 2.全トークンの挙動を確認
+        count = max(1, len(sorted_tokens) )
+        tokens = sorted_tokens[:count]
+        boundary_prob = 0.0  # 境界を示すトークンの確率合計
+        continuation_prob = 0.0  # 継続を示すトークンの確率合計
+        total = sum(prob for _, prob in tokens)
+        for token, prob in tokens:
+            test_text = full_text + token
+            test_word_count = self._count_words(test_text)
+            # 単語数がより多く増えた場合のみ境界と判定（まとまりを上げる）
+            if test_word_count > root_count + 1:
+                boundary_prob += prob
+            else:
+                continuation_prob += prob
+        # 3. 判定ロジック
+        if total > 0:
+            boundary_ratio = boundary_prob / total
+            # トークンの多くが境界を示す場合 → 確定（閾値を上げてまとまりを上げる）
+            if boundary_ratio > 0.85:
+                return (WordState.COMPLETE, None)
+            # トークンの多くが継続を示す場合 → 継続（閾値を下げて継続しやすく）
+            if boundary_ratio < 0.2:
+                return (WordState.INCOMPLETE, None)
+                # 1. 確率エントロピーを計算
+        probs = [prob for _, prob in sorted_tokens]
+        entropy = -sum(p * math.log(p + 1e-10) for p in probs if p > 0)
+        max_entropy = math.log(len(sorted_tokens)) if len(sorted_tokens) > 1 else 1.0
+        normalized_entropy = entropy / max_entropy if max_entropy > 0 else 0
+        # 4. エントロピーベース判定
+        # エントロピーが低い（確率が集中）→ 単語継続の可能性
+        # エントロピーが高い（確率が分散）→ 単語境界の可能性
+        return (WordState.INCOMPLETE, None)
+    def _count_words(self, text: str) -> int:
+        """
+        WordCounterを使用してテキストの単語数をカウント
+        Args:
+            text: カウントするテキスト
+        Returns:
+            int: 単語数
+        """
+        return self.word_counter.count_words(text)
+    def _get_next_tokens_from_model(self, model: Any, text: str, top_k: int = 5) -> List[Tuple[str, float]]:
+        """
+        モデルから次のトークン候補を取得（常駐AIモデルを使用）
+        Args:
+            model: LLMモデル（パス文字列またはモデルオブジェクト）
+            text: 入力テキスト
+            top_k: 取得する候補数
+        Returns:
+            List[Tuple[str, float]]: (トークン, 確率)のリスト
+        """
+        try:
+            # AIクラスをインポート
+            from ai import AI
+            # モデルパスを取得
+            if isinstance(model, str):
+                model_path = model
+            elif hasattr(model, 'model_path'):
+                model_path = model.model_path
+            else:
+                # デフォルトモデルを使用
+                model_path = None
+            # 常駐AIモデルを使用
+            ai_model = AI.get_model(model_path)
+            return ai_model.get_token_probabilities(text, top_k)
+        except Exception as e:
+            print(f"モデルからのトークン取得に失敗: {e}")
+        return []
+    def expand_piece(self, piece: WordPiece, model: Any = None) -> List[WordPiece]:
+        """
+        ピースを展開して子ピースを生成
+        Args:
+            piece: 展開するピース
+            model: LLMモデル
+        Returns:
+            List[WordPiece]: 生成された子ピースのリスト
+        """
+        children = []
+        full_text = piece.get_full_text()
+        #1#print(f"[WORD_PROCESSOR_STREAMING] expand_piece: '{full_text}'")
+        if piece.next_tokens:
+            # 既存のnext_tokensを使用
+            #1#print(f"[WORD_PROCESSOR_STREAMING] Using existing next_tokens: {len(piece.next_tokens)}")
+            for token, prob in piece.next_tokens:
+                # 空文字列トークンを無視
+                if not token:
+                    continue
+                child_prob = piece.probability * prob
+                child = piece.add_child(token, child_prob)
+                children.append(child)
+        elif model:
+            # モデルから次のトークンを取得
+            #1#print(f"[WORD_PROCESSOR_STREAMING] Getting tokens from model for: '{full_text}'")
+            next_tokens = self._get_next_tokens_from_model(model, full_text)
+            #1#print(f"[WORD_PROCESSOR_STREAMING] Got {len(next_tokens)} tokens from model")
+            if next_tokens:
+                piece.next_tokens = next_tokens
+                for token, prob in next_tokens:
+                    # 空文字列トークンを無視
+                    if not token:
+                        continue
+                    child_prob = piece.probability * prob
+                    child = piece.add_child(token, child_prob)
+                    children.append(child)
+        else:
+            print(f"[WORD_PROCESSOR_STREAMING] No model provided for expansion")
+        #1#print(f"[WORD_PROCESSOR_STREAMING] Generated {len(children)} children")
+        return children
+    def build_word_tree(self, prompt_text: str, root_text: str, model: Any, top_k: int = 5, max_depth: int = 10) -> List[WordPiece]:
+        """
+        単語ツリーを構築
+        Args:
+            root_text: ルートテキスト
+            model: LLMモデル
+            top_k: 取得する候補数
+            max_depth: 最大深さ
+        Returns:
+            List[WordPiece]: 完成した単語ピースのリスト
+        """
+        #1#print(f"[WORD_PROCESSOR_STREAMING] build_word_tree called: prompt='{prompt_text}', root='{root_text}', top_k={top_k}")
+        # ルートピースを作成
+        root = WordPiece(text=self.build_chat_prompt(prompt_text, )+root_text, probability=1.0)
+        #1#print(f"[WORD_PROCESSOR_STREAMING] Root piece created: '{root.get_full_text()}'")
+        # 優先度付きキュー（確率順）
+        candidates = KList(2*top_k)
+        completed = []
+        iteration = 0
+        max_iterations = 1000
+        children = self.expand_piece(root, model)
+        #1#print(f"[WORD_PROCESSOR_STREAMING] Initial children: {len(children)}")
+        for child in children:
+            candidates.add(child)
+        while not candidates.empty() and iteration < max_iterations and len(completed) < top_k:
+            iteration += 1
+            # 最も確率の高い候補を取得
+            current = candidates.pop()
+            # # 深さ制限チェック
+            # if current.get_depth() >= max_depth:
+            #     completed.append(current)
+            #     continue
+            # 単語完成状態をチェック
+            root_count = self._count_words(root.get_full_text())
+            state, payload = self.check_word_completion(current, root_count, model)
+            if state == WordState.COMPLETE:
+                completed.append(current)
+                # print(f"☆☆☆☆☆complete: {current.get_full_text()}")
+            elif state == WordState.INCOMPLETE:
+                # ピースを展開
+                children = self.expand_piece(current, model)
+                if len(children) == 0:
+                    # 子が生成できない場合、ピースを完成として扱う（無限ループ防止）
+                    print(f"[WORD_PROCESSOR_STREAMING] No children generated for '{current.get_full_text()}', marking as COMPLETE")
+                    completed.append(current)
+                else:
+                    for child in children:
+                        candidates.add(child)
+        # print(f"☆☆☆☆☆while end{len(completed),candidates.empty(),iteration}")
+        # 確率で正規化
+        total_prob = sum(p.probability for p in completed)
+        if total_prob > 0:
+            for piece in completed:
+                piece.probability = piece.probability / total_prob
+        return completed[:top_k]
+    def build_chat_prompt(self, user_content: str,
+        system_content: str = "あなたは親切で役に立つAIアシスタントです。簡潔な回答をしてください") -> str:
+        """
+        チャットプロンプトを構築
+        注意: Rust側で既に整形済みのプロンプトが渡される場合は、そのまま返す
+        後方互換性のため、単一のuser_contentが渡された場合は従来の形式で整形
+        """
+        # Rust側で既に整形済みのプロンプトが渡されている場合（複数行、ヘッダーを含む）
+        # そのまま返す
+        if "<|start_header_id|>" in user_content or "<|eot_id|>" in user_content:
+            return user_content
+        # 後方互換性: 単一のuser_contentが渡された場合の従来の形式
+        prompt_text = (
+            f"<|begin_of_text|>"
+            f"<|start_header_id|>system<|end_header_id|>\n"
+            f"{system_content}\n<|eot_id|>"
+            f"<|start_header_id|>user<|end_header_id|>\n"
+            f"{user_content}\n<|eot_id|>"
+            f"<|start_header_id|>assistant<|end_header_id|>\n"
+        )
+        # BOS(<|begin_of_text|>) の重複を抑止: 先頭のBOSを全て除去
+        # llama-cpp 側でBOSが自動付与されるため、ここでは付与しない
+        BOS = "<|begin_of_text|>"
+        s = prompt_text.lstrip()
+        while s.startswith(BOS):
+            s = s[len(BOS):]
+        prompt_text = s
+        return prompt_text
+if __name__ == "__main__":
+    """WordDeterminerのテスト（ストリーミング版）"""
+    print("=== WordDeterminerテスト（ストリーミング版） ===")
+    try:
+        # WordDeterminerを初期化
+        determiner = WordDeterminer()
+        # プロンプト設定
+        prompt_text = "電球を作ったのは誰？"
+        root_text = ""
+        print(f"プロンプト: '{prompt_text}'")
+        print(f"ルートテキスト: '{root_text}'")
+        print("\nAIモデルテスト:")
+        prompt_text = "電球を作ったのは誰？"
+        root_text = "電球を作ったのは候補1：トマス"
+        try:
+            from ai import AI
+            # モデルを取得
+            model = AI.get_model()
+            print(f"モデル取得成功: {type(model)}")
+            # トークン確率取得テスト
+            test_text = prompt_text
+            tokens = model.get_token_probabilities(test_text, k=5)
+            print(f"トークン確率 ({test_text}): {tokens}")
+            # 単語ツリー構築テスト
+            print("\n単語ツリー構築テスト:")
+            completed_pieces = determiner.build_word_tree(
+                prompt_text=prompt_text,
+                root_text=root_text,
+                model=model,
+                top_k=3,
+                max_depth=5
+            )
+            print(f"完成したピース数: {len(completed_pieces)}")
+            for i, piece in enumerate(completed_pieces):
+                full_text = piece.get_full_text()
+                print(f"  ピース{i+1}: '{full_text}' (確率: {piece.probability:.4f})")
+        except Exception as e:
+            print(f"AIモデルテスト失敗: {e}")
+        # 単語数カウントテスト
+        print("\n単語数カウントテスト:")
+        test_texts = [
+            "電球",
+            "電球を作った",
+            "電球を作ったのは",
+            "電球を作ったのは誰",
+            "電球を作ったのは誰？"
+        ]
+        for text in test_texts:
+            word_count = determiner._count_words(text)
+            tokens = determiner._get_next_tokens_from_model(model, text)
+            print(f"  '{text}' → {word_count}語: {tokens}")
+        # 単語確定テスト
+        print("\n単語確定テスト:")
+        test_sequence = ["電球", "電球を", "電球を作", "電球を作った", "電球を作ったの", "電球を作ったのは"]
+        prev_count = 0
+        for text in test_sequence:
+            current_count = determiner._count_words(text)
+            if current_count > prev_count:
+                print(f"  '{text}' → {current_count}語 (確定!)")
+                prev_count = current_count
+            else:
+                print(f"  '{text}' → {current_count}語 (継続)")
+        # 境界文字テスト
+        print("\n境界文字テスト:")
+        test_chars = [" ", "？", "、", "。", "a", "1"]
+        for char in test_chars:
+            is_boundary = determiner.is_boundary_char(char)
+            print(f"  '{char}': {is_boundary}")
+        # ピース作成テスト
+        print("\nピース作成テスト:")
+        root = WordPiece(text="電球", probability=1.0)
+        child1 = root.add_child("を", 0.6)
+        child2 = root.add_child("の", 0.3)
+        print(f"ルートテキスト: {root.get_full_text()}")
+        print(f"子1テキスト: {child1.get_full_text()}")
+        print(f"子2テキスト: {child2.get_full_text()}")
+        print("\nテスト完了")
+    except ImportError as e:
+        print(f"必要なライブラリがインストールされていません: {e}")
+    except Exception as e:
+        print(f"テストエラー: {e}")

requirements.txt ADDED Viewed

	@@ -0,0 +1,23 @@

+# Tauriアプリ用Python依存関係
+# Web API
+flask>=2.0.0
+flask-cors>=3.0.0
+# 形態素解析
+fugashi>=1.3.0
+sudachipy>=0.6.7
+sudachidict-core>=20240125
+# AI/LLM
+llama-cpp-python>=0.2.0
+# UI
+gradio>=4.38.0
+# API
+fastapi>=0.111.0
+uvicorn>=0.30.0
+# その他
+typing-extensions>=4.0.0