|
|
|
|
|
""" |
|
|
Coordinate Auto-Estimation Module |
|
|
|
|
|
AIを使って知識タイルの6次元座標を自動推定します。 |
|
|
座標: [x, y, z, c, g, v] |
|
|
- medical_space [x, y, z]: ドメイン固有の3次元空間 |
|
|
- meta_space [c, g, v]: Certainty, Granularity, Verification |
|
|
""" |
|
|
|
|
|
import logging |
|
|
import json |
|
|
from typing import List, Dict, Any, Optional |
|
|
import asyncio |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
class CoordinateEstimator: |
|
|
""" |
|
|
LLMを使って6次元座標を自動推定するクラス |
|
|
""" |
|
|
|
|
|
def __init__(self): |
|
|
self.domain_schemas = self._load_domain_schemas() |
|
|
|
|
|
def _load_domain_schemas(self) -> Dict[str, Dict[str, str]]: |
|
|
""" |
|
|
各ドメインの座標軸の定義を返す |
|
|
|
|
|
将来的には設定ファイルから読み込む |
|
|
""" |
|
|
return { |
|
|
"medical": { |
|
|
"x": "Anatomical location (0.0=nervous system, 0.5=cardiovascular, 1.0=digestive)", |
|
|
"y": "Pathological classification (0.0=infectious, 0.5=metabolic, 1.0=trauma)", |
|
|
"z": "Treatment level (0.0=prevention, 0.5=diagnosis, 1.0=treatment)" |
|
|
}, |
|
|
"general": { |
|
|
"x": "Knowledge category (0.0=science, 0.5=technology, 1.0=humanities)", |
|
|
"y": "Complexity level (0.0=basic, 0.5=intermediate, 1.0=advanced)", |
|
|
"z": "Application scope (0.0=theoretical, 0.5=practical, 1.0=applied)" |
|
|
}, |
|
|
"legal": { |
|
|
"x": "Legal field (0.0=civil, 0.5=criminal, 1.0=commercial)", |
|
|
"y": "Court level (0.0=district, 0.5=high, 1.0=supreme)", |
|
|
"z": "Era (0.0=classical, 0.5=modern, 1.0=contemporary)" |
|
|
}, |
|
|
"technology": { |
|
|
"x": "Technology domain (0.0=hardware, 0.5=software, 1.0=network)", |
|
|
"y": "Maturity (0.0=emerging, 0.5=established, 1.0=legacy)", |
|
|
"z": "Scale (0.0=personal, 0.5=enterprise, 1.0=global)" |
|
|
} |
|
|
} |
|
|
|
|
|
async def estimate_coordinates( |
|
|
self, |
|
|
prompt: str, |
|
|
response: str, |
|
|
domain_id: str, |
|
|
llm_inference_func, |
|
|
use_reasoning: bool = True |
|
|
) -> Dict[str, Any]: |
|
|
""" |
|
|
6次元座標を推定 |
|
|
|
|
|
Args: |
|
|
prompt: ユーザーの質問 |
|
|
response: AIの回答 |
|
|
domain_id: ドメインID |
|
|
llm_inference_func: LLM推論関数(async) |
|
|
use_reasoning: 推論過程を含めるか |
|
|
|
|
|
Returns: |
|
|
{ |
|
|
"coordinates": [x, y, z, c, g, v], |
|
|
"reasoning": "推定の理由", |
|
|
"confidence": 0.85 |
|
|
} |
|
|
""" |
|
|
|
|
|
domain_schema = self.domain_schemas.get( |
|
|
domain_id, |
|
|
self.domain_schemas["general"] |
|
|
) |
|
|
|
|
|
|
|
|
estimation_prompt = self._build_estimation_prompt( |
|
|
prompt, response, domain_id, domain_schema, use_reasoning |
|
|
) |
|
|
|
|
|
|
|
|
try: |
|
|
llm_response = await llm_inference_func(estimation_prompt) |
|
|
|
|
|
|
|
|
result = self._parse_llm_response(llm_response) |
|
|
|
|
|
|
|
|
if self._validate_coordinates(result["coordinates"]): |
|
|
logger.info(f"Estimated coordinates for domain '{domain_id}': {result['coordinates']}") |
|
|
return result |
|
|
else: |
|
|
logger.error(f"Invalid coordinates: {result['coordinates']}") |
|
|
return self._get_default_coordinates(domain_id) |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Coordinate estimation failed: {e}") |
|
|
return self._get_default_coordinates(domain_id) |
|
|
|
|
|
def _build_estimation_prompt( |
|
|
self, |
|
|
prompt: str, |
|
|
response: str, |
|
|
domain_id: str, |
|
|
domain_schema: Dict[str, str], |
|
|
use_reasoning: bool |
|
|
) -> str: |
|
|
""" |
|
|
座標推定用のプロンプトを構築 |
|
|
""" |
|
|
base_prompt = f"""You are an expert in knowledge space mapping and coordinate estimation. |
|
|
|
|
|
Your task is to estimate the 6-dimensional coordinates that best represent the following knowledge in the domain of "{domain_id}". |
|
|
|
|
|
**Coordinate System:** |
|
|
|
|
|
1. **Domain-specific space [x, y, z]** (each 0.0-1.0): |
|
|
- x-axis: {domain_schema['x']} |
|
|
- y-axis: {domain_schema['y']} |
|
|
- z-axis: {domain_schema['z']} |
|
|
|
|
|
2. **Meta-information space [c, g, v]** (each 0.0-1.0): |
|
|
- c (Certainty): How certain/verified is this knowledge? |
|
|
* 0.0 = hypothesis, speculation |
|
|
* 0.5 = established theory, widely accepted |
|
|
* 1.0 = proven fact, empirically verified |
|
|
|
|
|
- g (Granularity): How detailed/specific is this knowledge? |
|
|
* 0.0 = high-level overview, general concept |
|
|
* 0.5 = detailed explanation |
|
|
* 1.0 = highly specialized, expert-level detail |
|
|
|
|
|
- v (Verification): What is the verification status? |
|
|
* 0.0 = unverified, no sources |
|
|
* 0.5 = expert-reviewed, single source |
|
|
* 1.0 = peer-reviewed, multiple sources confirmed |
|
|
|
|
|
**Knowledge to estimate:** |
|
|
|
|
|
Question: {prompt} |
|
|
|
|
|
Answer: {response} |
|
|
|
|
|
**Instructions:** |
|
|
""" |
|
|
|
|
|
if use_reasoning: |
|
|
base_prompt += """ |
|
|
1. First, analyze the knowledge and explain your reasoning for each coordinate. |
|
|
2. Then, output the final coordinates. |
|
|
|
|
|
Format your response as JSON: |
|
|
{ |
|
|
"reasoning": "Your detailed reasoning here...", |
|
|
"coordinates": [x, y, z, c, g, v], |
|
|
"confidence": 0.85 |
|
|
} |
|
|
""" |
|
|
else: |
|
|
base_prompt += """ |
|
|
Output ONLY the coordinates as a JSON object: |
|
|
{ |
|
|
"coordinates": [x, y, z, c, g, v], |
|
|
"confidence": 0.85 |
|
|
} |
|
|
""" |
|
|
|
|
|
base_prompt += """ |
|
|
**Important:** |
|
|
- All coordinates must be between 0.0 and 1.0 |
|
|
- Use 2 decimal places (e.g., 0.75) |
|
|
- confidence should reflect how confident you are in this estimation (0.0-1.0) |
|
|
""" |
|
|
|
|
|
return base_prompt |
|
|
|
|
|
def _parse_llm_response(self, llm_response: str) -> Dict[str, Any]: |
|
|
""" |
|
|
LLMのレスポンスから座標を抽出 |
|
|
""" |
|
|
try: |
|
|
|
|
|
|
|
|
if "```json" in llm_response: |
|
|
json_start = llm_response.find("```json") + 7 |
|
|
json_end = llm_response.find("```", json_start) |
|
|
json_str = llm_response[json_start:json_end].strip() |
|
|
elif "```" in llm_response: |
|
|
json_start = llm_response.find("```") + 3 |
|
|
json_end = llm_response.find("```", json_start) |
|
|
json_str = llm_response[json_start:json_end].strip() |
|
|
else: |
|
|
|
|
|
json_str = llm_response.strip() |
|
|
|
|
|
|
|
|
result = json.loads(json_str) |
|
|
|
|
|
|
|
|
if "coordinates" not in result: |
|
|
raise ValueError("Missing 'coordinates' field") |
|
|
|
|
|
|
|
|
if "reasoning" not in result: |
|
|
result["reasoning"] = "No reasoning provided" |
|
|
if "confidence" not in result: |
|
|
result["confidence"] = 0.5 |
|
|
|
|
|
return result |
|
|
|
|
|
except json.JSONDecodeError as e: |
|
|
logger.error(f"JSON parse error: {e}") |
|
|
logger.debug(f"LLM response: {llm_response}") |
|
|
|
|
|
|
|
|
return self._fallback_parse(llm_response) |
|
|
|
|
|
def _fallback_parse(self, llm_response: str) -> Dict[str, Any]: |
|
|
""" |
|
|
JSONパースに失敗した場合のフォールバック |
|
|
""" |
|
|
import re |
|
|
|
|
|
|
|
|
pattern = r'\[[\s]*([0-9.]+)[\s]*,[\s]*([0-9.]+)[\s]*,[\s]*([0-9.]+)[\s]*,[\s]*([0-9.]+)[\s]*,[\s]*([0-9.]+)[\s]*,[\s]*([0-9.]+)[\s]*\]' |
|
|
match = re.search(pattern, llm_response) |
|
|
|
|
|
if match: |
|
|
coords = [float(match.group(i)) for i in range(1, 7)] |
|
|
return { |
|
|
"coordinates": coords, |
|
|
"reasoning": "Parsed from array notation", |
|
|
"confidence": 0.5 |
|
|
} |
|
|
|
|
|
|
|
|
raise ValueError("Could not parse coordinates from LLM response") |
|
|
|
|
|
def _validate_coordinates(self, coordinates: List[float]) -> bool: |
|
|
""" |
|
|
座標の妥当性をチェック |
|
|
""" |
|
|
if not isinstance(coordinates, list): |
|
|
return False |
|
|
|
|
|
if len(coordinates) != 6: |
|
|
logger.error(f"Expected 6 coordinates, got {len(coordinates)}") |
|
|
return False |
|
|
|
|
|
for i, coord in enumerate(coordinates): |
|
|
if not isinstance(coord, (int, float)): |
|
|
logger.error(f"Coordinate {i} is not a number: {coord}") |
|
|
return False |
|
|
|
|
|
if not (0.0 <= coord <= 1.0): |
|
|
logger.error(f"Coordinate {i} out of range [0.0, 1.0]: {coord}") |
|
|
return False |
|
|
|
|
|
return True |
|
|
|
|
|
def _get_default_coordinates(self, domain_id: str) -> Dict[str, Any]: |
|
|
""" |
|
|
推定に失敗した場合のデフォルト座標 |
|
|
""" |
|
|
logger.warning(f"Using default coordinates for domain '{domain_id}'") |
|
|
|
|
|
|
|
|
return { |
|
|
"coordinates": [0.5, 0.5, 0.5, 0.5, 0.5, 0.5], |
|
|
"reasoning": "Default coordinates (estimation failed)", |
|
|
"confidence": 0.3 |
|
|
} |
|
|
|
|
|
async def estimate_batch( |
|
|
self, |
|
|
knowledge_items: List[Dict[str, str]], |
|
|
llm_inference_func, |
|
|
max_concurrent: int = 3 |
|
|
) -> List[Dict[str, Any]]: |
|
|
""" |
|
|
複数の知識アイテムの座標を一括推定 |
|
|
|
|
|
Args: |
|
|
knowledge_items: [{"prompt": "...", "response": "...", "domain_id": "..."}, ...] |
|
|
llm_inference_func: LLM推論関数 |
|
|
max_concurrent: 同時実行数 |
|
|
|
|
|
Returns: |
|
|
推定結果のリスト |
|
|
""" |
|
|
semaphore = asyncio.Semaphore(max_concurrent) |
|
|
|
|
|
async def estimate_with_semaphore(item): |
|
|
async with semaphore: |
|
|
return await self.estimate_coordinates( |
|
|
prompt=item["prompt"], |
|
|
response=item["response"], |
|
|
domain_id=item.get("domain_id", "general"), |
|
|
llm_inference_func=llm_inference_func |
|
|
) |
|
|
|
|
|
tasks = [estimate_with_semaphore(item) for item in knowledge_items] |
|
|
results = await asyncio.gather(*tasks) |
|
|
|
|
|
return results |
|
|
|
|
|
def get_domain_schema(self, domain_id: str) -> Dict[str, str]: |
|
|
""" |
|
|
ドメインスキーマを取得(UI表示用) |
|
|
""" |
|
|
return self.domain_schemas.get(domain_id, self.domain_schemas["general"]) |
|
|
|
|
|
def add_domain_schema(self, domain_id: str, schema: Dict[str, str]): |
|
|
""" |
|
|
新しいドメインスキーマを追加 |
|
|
""" |
|
|
if not all(key in schema for key in ["x", "y", "z"]): |
|
|
raise ValueError("Schema must contain 'x', 'y', 'z' definitions") |
|
|
|
|
|
self.domain_schemas[domain_id] = schema |
|
|
logger.info(f"Added domain schema for '{domain_id}'") |
|
|
|
|
|
def interpolate_coordinates( |
|
|
self, |
|
|
coord1: List[float], |
|
|
coord2: List[float], |
|
|
weight: float = 0.5 |
|
|
) -> List[float]: |
|
|
""" |
|
|
2つの座標の間を補間(類似知識の座標推定に使用) |
|
|
|
|
|
Args: |
|
|
coord1: 座標1 |
|
|
coord2: 座標2 |
|
|
weight: 補間ウェイト (0.0=coord1, 1.0=coord2) |
|
|
|
|
|
Returns: |
|
|
補間された座標 |
|
|
""" |
|
|
if len(coord1) != 6 or len(coord2) != 6: |
|
|
raise ValueError("Both coordinates must be 6-dimensional") |
|
|
|
|
|
interpolated = [ |
|
|
coord1[i] * (1 - weight) + coord2[i] * weight |
|
|
for i in range(6) |
|
|
] |
|
|
|
|
|
return interpolated |
|
|
|