ryoshimu
commited on
Commit
·
668c519
1
Parent(s):
3d8412f
commit
Browse files- __pycache__/app.cpython-313.pyc +0 -0
- app.py +15 -11
__pycache__/app.cpython-313.pyc
CHANGED
|
Binary files a/__pycache__/app.cpython-313.pyc and b/__pycache__/app.cpython-313.pyc differ
|
|
|
app.py
CHANGED
|
@@ -8,11 +8,13 @@ import requests
|
|
| 8 |
|
| 9 |
|
| 10 |
class ToolCallError(RuntimeError):
|
| 11 |
-
"""
|
| 12 |
|
| 13 |
|
| 14 |
@dataclass
|
| 15 |
class StorageHit:
|
|
|
|
|
|
|
| 16 |
file_id: Optional[str]
|
| 17 |
file_name: Optional[str]
|
| 18 |
chunk_id: Optional[str]
|
|
@@ -24,9 +26,10 @@ VECTOR_STORE_ID_RE = re.compile(r"vs_[a-zA-Z0-9]{8,}")
|
|
| 24 |
|
| 25 |
|
| 26 |
class OpenAIStorageClient:
|
| 27 |
-
"""
|
| 28 |
|
| 29 |
def __init__(self, api_key: str, base_url: Optional[str] = None, timeout: float = 15.0):
|
|
|
|
| 30 |
if not api_key:
|
| 31 |
raise ValueError("api_key is required")
|
| 32 |
self.api_key = api_key
|
|
@@ -35,6 +38,7 @@ class OpenAIStorageClient:
|
|
| 35 |
self.timeout = timeout
|
| 36 |
|
| 37 |
def _post(self, path: str, payload: Dict) -> Dict:
|
|
|
|
| 38 |
url = f"{self.base_url}{path}"
|
| 39 |
headers = {
|
| 40 |
"Authorization": f"Bearer {self.api_key}",
|
|
@@ -60,6 +64,7 @@ class OpenAIStorageClient:
|
|
| 60 |
filters: Optional[Dict] = None,
|
| 61 |
retries: int = 1,
|
| 62 |
) -> List[StorageHit]:
|
|
|
|
| 63 |
payload: Dict[str, object] = {"query": query, "top_k": top_k}
|
| 64 |
if filters:
|
| 65 |
payload["filters"] = filters
|
|
@@ -92,6 +97,7 @@ class OpenAIStorageClient:
|
|
| 92 |
file_id: Optional[str] = None,
|
| 93 |
retries: int = 1,
|
| 94 |
) -> Optional[str]:
|
|
|
|
| 95 |
if not chunk_id and not file_id:
|
| 96 |
raise ValueError("Either chunk_id or file_id must be provided.")
|
| 97 |
|
|
@@ -114,7 +120,7 @@ class OpenAIStorageClient:
|
|
| 114 |
if attempts > retries:
|
| 115 |
raise
|
| 116 |
def get_vector_store_ids_from_env() -> List[str]:
|
| 117 |
-
"""
|
| 118 |
raw = os.getenv("VECTOR_STORE_ID", "").strip()
|
| 119 |
if not raw:
|
| 120 |
return []
|
|
@@ -129,7 +135,7 @@ def get_vector_store_ids_from_env() -> List[str]:
|
|
| 129 |
|
| 130 |
|
| 131 |
def generate_search_queries(question: str) -> List[str]:
|
| 132 |
-
"""
|
| 133 |
normalized = question.strip()
|
| 134 |
if not normalized:
|
| 135 |
return []
|
|
@@ -151,7 +157,7 @@ def generate_search_queries(question: str) -> List[str]:
|
|
| 151 |
|
| 152 |
|
| 153 |
def deduplicate_hits(hits_by_query: Iterable[Tuple[str, List[StorageHit]]]) -> List[StorageHit]:
|
| 154 |
-
"""
|
| 155 |
seen_keys = set()
|
| 156 |
merged: List[StorageHit] = []
|
| 157 |
for _, hits in hits_by_query:
|
|
@@ -165,7 +171,7 @@ def deduplicate_hits(hits_by_query: Iterable[Tuple[str, List[StorageHit]]]) -> L
|
|
| 165 |
|
| 166 |
|
| 167 |
def build_excerpt(text: str, limit: int = 280) -> str:
|
| 168 |
-
"""
|
| 169 |
collapsed = re.sub(r"\s+", " ", text).strip()
|
| 170 |
if len(collapsed) <= limit:
|
| 171 |
return collapsed
|
|
@@ -173,7 +179,7 @@ def build_excerpt(text: str, limit: int = 280) -> str:
|
|
| 173 |
|
| 174 |
|
| 175 |
def produce_answer(question: str, top_k: int) -> str:
|
| 176 |
-
"""
|
| 177 |
question = (question or "").strip()
|
| 178 |
if not question:
|
| 179 |
return "質問が入力されていません。"
|
|
@@ -239,7 +245,7 @@ def produce_answer(question: str, top_k: int) -> str:
|
|
| 239 |
|
| 240 |
|
| 241 |
def respond(question: str, top_k: int = 5) -> str:
|
| 242 |
-
"""
|
| 243 |
try:
|
| 244 |
return produce_answer(question, top_k=int(top_k))
|
| 245 |
except Exception: # pragma: no cover - defensive fallback for UI
|
|
@@ -249,9 +255,7 @@ def respond(question: str, top_k: int = 5) -> str:
|
|
| 249 |
with gr.Blocks() as demo:
|
| 250 |
gr.Markdown(
|
| 251 |
"""
|
| 252 |
-
## MCP Storage
|
| 253 |
-
OpenAI Storage (Files / Vector Store) に登録されたデータを検索し、根拠付きで回答します。
|
| 254 |
-
OPENAI_API_KEY と VECTOR_STORE_ID を環境変数に設定してからご利用ください。
|
| 255 |
"""
|
| 256 |
)
|
| 257 |
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
class ToolCallError(RuntimeError):
|
| 11 |
+
"""リトライを行ってもツール呼び出しが失敗した場合に発生する例外。"""
|
| 12 |
|
| 13 |
|
| 14 |
@dataclass
|
| 15 |
class StorageHit:
|
| 16 |
+
"""storage_search が返すヒット1件分の情報を保持する構造体。"""
|
| 17 |
+
|
| 18 |
file_id: Optional[str]
|
| 19 |
file_name: Optional[str]
|
| 20 |
chunk_id: Optional[str]
|
|
|
|
| 26 |
|
| 27 |
|
| 28 |
class OpenAIStorageClient:
|
| 29 |
+
"""OpenAI Storage の検索・取得APIを呼び出す最小限のクライアント。"""
|
| 30 |
|
| 31 |
def __init__(self, api_key: str, base_url: Optional[str] = None, timeout: float = 15.0):
|
| 32 |
+
"""APIキーと各種設定を初期化する。"""
|
| 33 |
if not api_key:
|
| 34 |
raise ValueError("api_key is required")
|
| 35 |
self.api_key = api_key
|
|
|
|
| 38 |
self.timeout = timeout
|
| 39 |
|
| 40 |
def _post(self, path: str, payload: Dict) -> Dict:
|
| 41 |
+
"""与えられたパスにPOSTし、JSONレスポンスを辞書で返す。"""
|
| 42 |
url = f"{self.base_url}{path}"
|
| 43 |
headers = {
|
| 44 |
"Authorization": f"Bearer {self.api_key}",
|
|
|
|
| 64 |
filters: Optional[Dict] = None,
|
| 65 |
retries: int = 1,
|
| 66 |
) -> List[StorageHit]:
|
| 67 |
+
"""storage_search エンドポイントを呼び出してヒットを整形する。"""
|
| 68 |
payload: Dict[str, object] = {"query": query, "top_k": top_k}
|
| 69 |
if filters:
|
| 70 |
payload["filters"] = filters
|
|
|
|
| 97 |
file_id: Optional[str] = None,
|
| 98 |
retries: int = 1,
|
| 99 |
) -> Optional[str]:
|
| 100 |
+
"""storage_get エンドポイントからチャンクまたはファイルを取得する。"""
|
| 101 |
if not chunk_id and not file_id:
|
| 102 |
raise ValueError("Either chunk_id or file_id must be provided.")
|
| 103 |
|
|
|
|
| 120 |
if attempts > retries:
|
| 121 |
raise
|
| 122 |
def get_vector_store_ids_from_env() -> List[str]:
|
| 123 |
+
"""環境変数 VECTOR_STORE_ID からベクターストアIDのリストを抽出する。"""
|
| 124 |
raw = os.getenv("VECTOR_STORE_ID", "").strip()
|
| 125 |
if not raw:
|
| 126 |
return []
|
|
|
|
| 135 |
|
| 136 |
|
| 137 |
def generate_search_queries(question: str) -> List[str]:
|
| 138 |
+
"""自然言語の質問から最大3件の検索クエリを生成する。"""
|
| 139 |
normalized = question.strip()
|
| 140 |
if not normalized:
|
| 141 |
return []
|
|
|
|
| 157 |
|
| 158 |
|
| 159 |
def deduplicate_hits(hits_by_query: Iterable[Tuple[str, List[StorageHit]]]) -> List[StorageHit]:
|
| 160 |
+
"""クエリごとのヒットを重複除去しながら結合する。"""
|
| 161 |
seen_keys = set()
|
| 162 |
merged: List[StorageHit] = []
|
| 163 |
for _, hits in hits_by_query:
|
|
|
|
| 171 |
|
| 172 |
|
| 173 |
def build_excerpt(text: str, limit: int = 280) -> str:
|
| 174 |
+
"""テキストを指定文字数以内に要約した抜粋を返す。"""
|
| 175 |
collapsed = re.sub(r"\s+", " ", text).strip()
|
| 176 |
if len(collapsed) <= limit:
|
| 177 |
return collapsed
|
|
|
|
| 179 |
|
| 180 |
|
| 181 |
def produce_answer(question: str, top_k: int) -> str:
|
| 182 |
+
"""質問に応じて検索を実行し、仕様に沿った回答テキストを生成する。"""
|
| 183 |
question = (question or "").strip()
|
| 184 |
if not question:
|
| 185 |
return "質問が入力されていません。"
|
|
|
|
| 245 |
|
| 246 |
|
| 247 |
def respond(question: str, top_k: int = 5) -> str:
|
| 248 |
+
"""Gradio UI から呼び出されるエントリーポイント。"""
|
| 249 |
try:
|
| 250 |
return produce_answer(question, top_k=int(top_k))
|
| 251 |
except Exception: # pragma: no cover - defensive fallback for UI
|
|
|
|
| 255 |
with gr.Blocks() as demo:
|
| 256 |
gr.Markdown(
|
| 257 |
"""
|
| 258 |
+
## MCP Storage
|
|
|
|
|
|
|
| 259 |
"""
|
| 260 |
)
|
| 261 |
|