ryoshimu commited on
Commit
a204c7a
ยท
1 Parent(s): 408311c
Files changed (3) hide show
  1. __pycache__/app.cpython-313.pyc +0 -0
  2. app.py +260 -0
  3. requirements.txt +3 -0
__pycache__/app.cpython-313.pyc ADDED
Binary file (11.6 kB). View file
 
app.py ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ from dataclasses import dataclass
4
+ from typing import Dict, Iterable, List, Optional, Tuple
5
+
6
+ import gradio as gr
7
+ import requests
8
+
9
+
10
+ class ToolCallError(RuntimeError):
11
+ """Raised when a tool invocation fails even after retry."""
12
+
13
+
14
+ @dataclass
15
+ class StorageHit:
16
+ file_id: Optional[str]
17
+ file_name: Optional[str]
18
+ chunk_id: Optional[str]
19
+ score: Optional[float]
20
+ snippet: Optional[str]
21
+
22
+
23
+ class OpenAIStorageClient:
24
+ """Minimal client for OpenAI Storage search/get endpoints."""
25
+
26
+ def __init__(self, api_key: str, base_url: Optional[str] = None, timeout: float = 15.0):
27
+ if not api_key:
28
+ raise ValueError("api_key is required")
29
+ self.api_key = api_key
30
+ base = base_url or os.getenv("OPENAI_API_BASE", "https://api.openai.com/v1")
31
+ self.base_url = base.rstrip("/")
32
+ self.timeout = timeout
33
+
34
+ def _post(self, path: str, payload: Dict) -> Dict:
35
+ url = f"{self.base_url}{path}"
36
+ headers = {
37
+ "Authorization": f"Bearer {self.api_key}",
38
+ "Content-Type": "application/json",
39
+ }
40
+ try:
41
+ response = requests.post(url, json=payload, headers=headers, timeout=self.timeout)
42
+ except requests.RequestException as exc:
43
+ raise ToolCallError(str(exc)) from exc
44
+
45
+ if response.status_code >= 400:
46
+ raise ToolCallError(f"Tool call failed with status {response.status_code}: {response.text}")
47
+
48
+ try:
49
+ return response.json()
50
+ except ValueError as exc:
51
+ raise ToolCallError("Tool call returned non-JSON response") from exc
52
+
53
+ def storage_search(
54
+ self,
55
+ query: str,
56
+ top_k: int = 5,
57
+ filters: Optional[Dict] = None,
58
+ retries: int = 1,
59
+ ) -> List[StorageHit]:
60
+ payload: Dict[str, object] = {"query": query, "top_k": top_k}
61
+ if filters:
62
+ payload["filters"] = filters
63
+
64
+ attempts = 0
65
+ while True:
66
+ try:
67
+ raw = self._post("/tools/storage_search", payload)
68
+ hits = []
69
+ for item in raw.get("hits", []):
70
+ hits.append(
71
+ StorageHit(
72
+ file_id=item.get("file_id"),
73
+ file_name=item.get("file_name"),
74
+ chunk_id=item.get("chunk_id"),
75
+ score=item.get("score"),
76
+ snippet=item.get("snippet"),
77
+ )
78
+ )
79
+ return hits
80
+ except ToolCallError:
81
+ attempts += 1
82
+ if attempts > retries:
83
+ raise
84
+
85
+ def storage_get(
86
+ self,
87
+ *,
88
+ chunk_id: Optional[str] = None,
89
+ file_id: Optional[str] = None,
90
+ retries: int = 1,
91
+ ) -> Optional[str]:
92
+ if not chunk_id and not file_id:
93
+ raise ValueError("Either chunk_id or file_id must be provided.")
94
+
95
+ payload: Dict[str, object] = {}
96
+ if chunk_id:
97
+ payload["chunk_id"] = chunk_id
98
+ if file_id:
99
+ payload["file_id"] = file_id
100
+
101
+ attempts = 0
102
+ while True:
103
+ try:
104
+ raw = self._post("/tools/storage_get", payload)
105
+ content = raw.get("content")
106
+ if isinstance(content, str):
107
+ return content
108
+ return None
109
+ except ToolCallError:
110
+ attempts += 1
111
+ if attempts > retries:
112
+ raise
113
+
114
+
115
+ def generate_search_queries(question: str) -> List[str]:
116
+ """Generate up to three search queries following the spec."""
117
+ normalized = question.strip()
118
+ if not normalized:
119
+ return []
120
+
121
+ queries: List[str] = [normalized]
122
+
123
+ ascii_tokens = re.findall(r"[A-Za-z0-9]+", normalized)
124
+ if ascii_tokens:
125
+ english_query = " ".join(ascii_tokens)
126
+ if english_query and english_query.lower() != normalized.lower():
127
+ queries.append(english_query)
128
+
129
+ japanese_clean = re.sub(r"[ใ€ใ€‚๏ผ›๏ผ›๏ผŒ]", " ", normalized)
130
+ japanese_clean = re.sub(r"\s+", " ", japanese_clean).strip()
131
+ if japanese_clean and japanese_clean not in queries:
132
+ queries.append(japanese_clean)
133
+
134
+ return queries[:3]
135
+
136
+
137
+ def deduplicate_hits(hits_by_query: Iterable[Tuple[str, List[StorageHit]]]) -> List[StorageHit]:
138
+ """Merge and deduplicate hits by chunk/file IDs while preserving ordering."""
139
+ seen_keys = set()
140
+ merged: List[StorageHit] = []
141
+ for _, hits in hits_by_query:
142
+ for hit in hits:
143
+ key = (hit.chunk_id, hit.file_id)
144
+ if key in seen_keys:
145
+ continue
146
+ seen_keys.add(key)
147
+ merged.append(hit)
148
+ return merged
149
+
150
+
151
+ def build_excerpt(text: str, limit: int = 280) -> str:
152
+ """Return a compact excerpt trimmed to the requested limit."""
153
+ collapsed = re.sub(r"\s+", " ", text).strip()
154
+ if len(collapsed) <= limit:
155
+ return collapsed
156
+ return collapsed[: limit - 1] + "โ€ฆ"
157
+
158
+
159
+ def produce_answer(question: str, top_k: int) -> str:
160
+ """Main orchestration pipeline for the MCP responder."""
161
+ question = (question or "").strip()
162
+ if not question:
163
+ return "่ณชๅ•ใŒๅ…ฅๅŠ›ใ•ใ‚Œใฆใ„ใพใ›ใ‚“ใ€‚"
164
+
165
+ api_key = os.getenv("OPENAI_API_KEY")
166
+ if not api_key:
167
+ return "OPENAI_API_KEY ใŒ่จญๅฎšใ•ใ‚Œใฆใ„ใพใ›ใ‚“ใ€‚็’ฐๅขƒๅค‰ๆ•ฐใ‚’็ขบ่ชใ—ใฆใใ ใ•ใ„ใ€‚"
168
+
169
+ client = OpenAIStorageClient(api_key=api_key)
170
+
171
+ queries = generate_search_queries(question)
172
+ if not queries:
173
+ return "่ฉฒๅฝ“ใƒ‡ใƒผใ‚ฟใŒใ‚ใ‚Šใพใ›ใ‚“ใ€‚"
174
+
175
+ try:
176
+ hits_by_query = []
177
+ for query in queries:
178
+ hits = client.storage_search(query=query, top_k=top_k)
179
+ if hits:
180
+ hits_by_query.append((query, hits))
181
+
182
+ if not hits_by_query:
183
+ return "่ฉฒๅฝ“ใƒ‡ใƒผใ‚ฟใŒใ‚ใ‚Šใพใ›ใ‚“ใ€‚"
184
+
185
+ merged_hits = deduplicate_hits(hits_by_query)
186
+ evidence_lines: List[str] = []
187
+
188
+ for hit in merged_hits[:3]:
189
+ content: Optional[str] = None
190
+ try:
191
+ if hit.chunk_id:
192
+ content = client.storage_get(chunk_id=hit.chunk_id)
193
+ if not content and hit.file_id:
194
+ content = client.storage_get(file_id=hit.file_id)
195
+ except ToolCallError:
196
+ # Propagate after exceeding retry budget according to spec.
197
+ return "ใ‚ทใ‚นใƒ†ใƒ ใ‚จใƒฉใƒผใŒ็™บ็”Ÿใ—ใพใ—ใŸใ€‚ๆ™‚้–“ใ‚’ใŠใ„ใฆๅ†ๅบฆใŠ่ฉฆใ—ใใ ใ•ใ„ใ€‚"
198
+
199
+ source_text = content or hit.snippet
200
+ if not source_text:
201
+ continue
202
+
203
+ excerpt = build_excerpt(source_text)
204
+ if not excerpt:
205
+ continue
206
+
207
+ file_ref = hit.file_name or (hit.file_id or "unknown")
208
+ chunk_ref = hit.chunk_id or "chunk"
209
+ evidence_lines.append(f"- {excerpt} [{file_ref}#{chunk_ref}]")
210
+
211
+ if not evidence_lines:
212
+ return "่ฉฒๅฝ“ใƒ‡ใƒผใ‚ฟใŒใ‚ใ‚Šใพใ›ใ‚“ใ€‚"
213
+
214
+ return "\n".join(evidence_lines)
215
+
216
+ except ToolCallError:
217
+ return "ใ‚ทใ‚นใƒ†ใƒ ใ‚จใƒฉใƒผใŒ็™บ็”Ÿใ—ใพใ—ใŸใ€‚ๆ™‚้–“ใ‚’ใŠใ„ใฆๅ†ๅบฆใŠ่ฉฆใ—ใใ ใ•ใ„ใ€‚"
218
+
219
+
220
+ def respond(question: str, top_k: int = 5) -> str:
221
+ """Wrapper for the Gradio UI."""
222
+ try:
223
+ return produce_answer(question, top_k=int(top_k))
224
+ except Exception: # pragma: no cover - defensive fallback for UI
225
+ return "ใ‚ทใ‚นใƒ†ใƒ ใ‚จใƒฉใƒผใŒ็™บ็”Ÿใ—ใพใ—ใŸใ€‚ๆ™‚้–“ใ‚’ใŠใ„ใฆๅ†ๅบฆใŠ่ฉฆใ—ใใ ใ•ใ„ใ€‚"
226
+
227
+
228
+ with gr.Blocks() as demo:
229
+ gr.Markdown(
230
+ """
231
+ ## MCP Storage ใƒฌใ‚นใƒใƒณใƒ€
232
+ OpenAI Storage (Files / Vector Store) ใซ็™ป้Œฒใ•ใ‚ŒใŸใƒ‡ใƒผใ‚ฟใ‚’ๆคœ็ดขใ—ใ€ๆ นๆ‹ ไป˜ใใงๅ›ž็ญ”ใ—ใพใ™ใ€‚
233
+ OPENAI_API_KEY ใ‚’็’ฐๅขƒๅค‰ๆ•ฐใซ่จญๅฎšใ—ใฆใ‹ใ‚‰ใ”ๅˆฉ็”จใใ ใ•ใ„ใ€‚
234
+ """
235
+ )
236
+
237
+ with gr.Row():
238
+ question_box = gr.Textbox(
239
+ label="่ณชๅ•",
240
+ placeholder="ใ“ใ“ใซ่ณชๅ•ใ‚’ๅ…ฅๅŠ›ใ—ใฆใใ ใ•ใ„๏ผˆไพ‹: ่ฃฝๅ“AใฎๅฏพๅฟœOSใฏ๏ผŸ๏ผ‰",
241
+ lines=4,
242
+ )
243
+ with gr.Row():
244
+ top_k_slider = gr.Slider(
245
+ minimum=1,
246
+ maximum=10,
247
+ value=5,
248
+ step=1,
249
+ label="ๆคœ็ดขไปถๆ•ฐ (top_k)",
250
+ )
251
+ with gr.Row():
252
+ output_box = gr.Textbox(label="ๅ›ž็ญ”", lines=10)
253
+ with gr.Row():
254
+ submit_button = gr.Button("ๆคœ็ดข")
255
+
256
+ submit_button.click(respond, inputs=[question_box, top_k_slider], outputs=output_box)
257
+
258
+
259
+ if __name__ == "__main__":
260
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==5.49.1
2
+ openai>=1.51.0
3
+ requests>=2.32.3