Spaces:

WatNeru
/

LLMView_multi_model

Sleeping

App Files Files Community

LLMView_multi_model / package /adapter.py

WatNeru

first commit

0447f30 4 months ago

raw

history blame contribute delete

3.35 kB

	"""
	ModelAdapter - マルチモデル対応アダプター
	新しいAIインターフェース（BaseAI）に対応
	"""
	from typing import Any, Dict, List, Optional
	from .word_processor import WordDeterminer, WordPiece
	from .ai.base import BaseAI


	class ModelAdapter:
	"""
	マルチモデル対応アダプター
	- 初期化コストの高いコンポーネント（WordDeterminer, AIモデル）を1回だけ生成して保持
	- メソッドでビルド処理を提供
	- 返却はシリアライズしやすい dict/list 形式
	"""

	def __init__(self, ai_model: BaseAI):
	"""
	初期化

	Args:
	ai_model: BaseAIを実装したモデルインスタンス
	"""
	# WordDeterminer（内部で Sudachi C モードの WordCounter を使用）
	self.determiner = WordDeterminer()

	# AIモデルを保持
	self.model = ai_model

	def _clean_text(self, text: str) -> str:
	"""制御文字・不可視文字・置換文字を厳密に取り除く（最終出力用）"""
	if not text:
	return ""

	# 制御文字（0x00-0x1F、0x7F-0x9F）を除去
	# ただし、改行・タブ・復帰は許可
	cleaned = []
	for ch in text:
	code = ord(ch)
	# 許可する制御文字: 改行(0x0A), タブ(0x09), 復帰(0x0D)
	if code in [0x09, 0x0A, 0x0D]:
	cleaned.append(ch)
	# 通常の印刷可能文字
	elif ch.isprintable():
	# 置換文字（U+FFFD）を除去
	if ch != "\uFFFD":
	cleaned.append(ch)
	# その他の制御文字や不可視文字は除去

	result = "".join(cleaned)
	# ゼロ幅文字を除去
	result = result.replace("\u200B", "") # Zero-width space
	result = result.replace("\u200C", "") # Zero-width non-joiner
	result = result.replace("\u200D", "") # Zero-width joiner
	result = result.replace("\uFEFF", "") # Zero-width no-break space
	return result.strip()

	def build_word_tree(
	self,
	prompt_text: str,
	root_text: str = "",
	top_k: int = 5,
	max_depth: int = 10
	) -> List[Dict[str, Any]]:
	"""
	単語ツリーを構築して、完成ピースを dict の配列で返す。
	各要素: { text: str, probability: float }
	"""
	pieces: List[WordPiece] = self.determiner.build_word_tree(
	prompt_text=prompt_text,
	root_text=root_text,
	model=self.model,
	top_k=top_k,
	max_depth=max_depth,
	)
	return [
	{"text": self._clean_text(p.get_full_word()), "probability": float(p.probability)}
	for p in pieces
	]

	def build_chat_prompt(
	self,
	user_content: str,
	system_content: str = "あなたは親切で役に立つAIアシスタントです。"
	) -> str:
	"""チャットプロンプト文字列を返す。"""
	return self.model.build_chat_prompt(user_content, system_content)

	def count_words(self, text: str) -> int:
	"""Sudachi(C) ベースでの語数カウント。"""
	return self.determiner._count_words(text)