aiqtech commited on
Commit
8c1ebc3
Β·
verified Β·
1 Parent(s): 0fa7d12

Create app-backup.py

Browse files
Files changed (1) hide show
  1. app-backup.py +894 -0
app-backup.py ADDED
@@ -0,0 +1,894 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ⚑ Speed-Optimized Multi-Agent RAG System for Complex Questions
3
+ 병렬 처리, 슀마트 캐싱, 동적 νŒŒμ΄ν”„λΌμΈμœΌλ‘œ λ³΅μž‘ν•œ μ§ˆλ¬Έλ„ λΉ λ₯΄κ²Œ 처리
4
+ """
5
+
6
+ import os
7
+ import json
8
+ import time
9
+ import asyncio
10
+ import hashlib
11
+ from typing import Optional, List, Dict, Any, Tuple, Generator, AsyncGenerator
12
+ from datetime import datetime, timedelta
13
+ from enum import Enum
14
+ from collections import deque
15
+ import threading
16
+ import queue
17
+ from concurrent.futures import ThreadPoolExecutor, as_completed
18
+ import aiohttp
19
+
20
+ import requests
21
+ import gradio as gr
22
+ from pydantic import BaseModel, Field
23
+ from dotenv import load_dotenv
24
+
25
+ # ν™˜κ²½λ³€μˆ˜ λ‘œλ“œ
26
+ load_dotenv()
27
+
28
+
29
+ # ============================================================================
30
+ # 데이터 λͺ¨λΈ μ •μ˜
31
+ # ============================================================================
32
+
33
+ class AgentRole(Enum):
34
+ """μ—μ΄μ „νŠΈ μ—­ν•  μ •μ˜"""
35
+ SUPERVISOR = "supervisor"
36
+ CREATIVE = "creative"
37
+ CRITIC = "critic"
38
+ FINALIZER = "finalizer"
39
+
40
+
41
+ class ExecutionMode(Enum):
42
+ """μ‹€ν–‰ λͺ¨λ“œ μ •μ˜"""
43
+ PARALLEL = "parallel" # 병렬 처리
44
+ SEQUENTIAL = "sequential" # 순차 처리
45
+ HYBRID = "hybrid" # ν•˜μ΄λΈŒλ¦¬λ“œ
46
+
47
+
48
+ class Message(BaseModel):
49
+ role: str
50
+ content: str
51
+ timestamp: Optional[datetime] = None
52
+
53
+
54
+ class AgentResponse(BaseModel):
55
+ role: AgentRole
56
+ content: str
57
+ processing_time: float
58
+ metadata: Optional[Dict] = None
59
+
60
+
61
+ # ============================================================================
62
+ # 슀마트 캐싱 μ‹œμŠ€ν…œ
63
+ # ============================================================================
64
+
65
+ class SmartCache:
66
+ """μ§€λŠ₯ν˜• 캐싱 μ‹œμŠ€ν…œ"""
67
+
68
+ def __init__(self, max_size: int = 100, ttl_hours: int = 24):
69
+ self.cache = {}
70
+ self.access_count = {}
71
+ self.timestamps = {}
72
+ self.max_size = max_size
73
+ self.ttl = timedelta(hours=ttl_hours)
74
+ self.reasoning_patterns = self._init_reasoning_patterns()
75
+
76
+ def _init_reasoning_patterns(self) -> Dict:
77
+ """자주 μ‚¬μš©λ˜λŠ” μΆ”λ‘  νŒ¨ν„΄ μ΄ˆκΈ°ν™”"""
78
+ return {
79
+ "analysis": {
80
+ "structure": ["ν˜„ν™© 뢄석", "핡심 μš”μΈ", "영ν–₯ 평가", "μ „λž΅ μ œμ•ˆ"],
81
+ "keywords": ["뢄석", "평가", "영ν–₯", "μ „λž΅"]
82
+ },
83
+ "comparison": {
84
+ "structure": ["λŒ€μƒ μ •μ˜", "비ꡐ κΈ°μ€€", "μž₯단점 뢄석", "κ²°λ‘ "],
85
+ "keywords": ["비ꡐ", "차이", "μž₯단점", "vs"]
86
+ },
87
+ "creative": {
88
+ "structure": ["문제 μ •μ˜", "창의적 μ ‘κ·Ό", "κ΅¬ν˜„ 방법", "μ˜ˆμƒ 효과"],
89
+ "keywords": ["창의적", "ν˜μ‹ μ ", "μƒˆλ‘œμš΄", "아이디어"]
90
+ },
91
+ "technical": {
92
+ "structure": ["기술 κ°œμš”", "핡심 원리", "κ΅¬ν˜„ 상세", "μ‹€μš© μ˜ˆμ‹œ"],
93
+ "keywords": ["기술", "κ΅¬ν˜„", "μ½”λ“œ", "μ‹œμŠ€ν…œ"]
94
+ }
95
+ }
96
+
97
+ def get_query_hash(self, query: str) -> str:
98
+ """쿼리 ν•΄μ‹œ 생성"""
99
+ return hashlib.md5(query.encode()).hexdigest()
100
+
101
+ def get(self, query: str) -> Optional[Dict]:
102
+ """μΊμ‹œμ—μ„œ 쑰회"""
103
+ query_hash = self.get_query_hash(query)
104
+
105
+ if query_hash in self.cache:
106
+ # TTL 체크
107
+ if datetime.now() - self.timestamps[query_hash] < self.ttl:
108
+ self.access_count[query_hash] += 1
109
+ return self.cache[query_hash]
110
+ else:
111
+ # 만료된 μΊμ‹œ μ‚­μ œ
112
+ del self.cache[query_hash]
113
+ del self.timestamps[query_hash]
114
+ del self.access_count[query_hash]
115
+
116
+ return None
117
+
118
+ def set(self, query: str, response: Dict):
119
+ """μΊμ‹œμ— μ €μž₯"""
120
+ query_hash = self.get_query_hash(query)
121
+
122
+ # μΊμ‹œ 크기 관리
123
+ if len(self.cache) >= self.max_size:
124
+ # LRU μ •μ±…: κ°€μž₯ 적게 μ‚¬μš©λœ ν•­λͺ© 제거
125
+ least_used = min(self.access_count, key=self.access_count.get)
126
+ del self.cache[least_used]
127
+ del self.timestamps[least_used]
128
+ del self.access_count[least_used]
129
+
130
+ self.cache[query_hash] = response
131
+ self.timestamps[query_hash] = datetime.now()
132
+ self.access_count[query_hash] = 1
133
+
134
+ def get_reasoning_pattern(self, query: str) -> Optional[Dict]:
135
+ """쿼리에 μ ν•©ν•œ μΆ”λ‘  νŒ¨ν„΄ λ°˜ν™˜"""
136
+ query_lower = query.lower()
137
+
138
+ for pattern_type, pattern_data in self.reasoning_patterns.items():
139
+ if any(keyword in query_lower for keyword in pattern_data["keywords"]):
140
+ return {
141
+ "type": pattern_type,
142
+ "structure": pattern_data["structure"]
143
+ }
144
+
145
+ return None
146
+
147
+
148
+ # ============================================================================
149
+ # 병렬 처리 μ΅œμ ν™” Brave Search
150
+ # ============================================================================
151
+
152
+ class AsyncBraveSearch:
153
+ """비동기 Brave 검색 ν΄λΌμ΄μ–ΈνŠΈ"""
154
+
155
+ def __init__(self, api_key: Optional[str] = None):
156
+ self.api_key = api_key or os.getenv("BRAVE_SEARCH_API_KEY")
157
+ self.base_url = "https://api.search.brave.com/res/v1/web/search"
158
+
159
+ async def search_async(self, query: str, count: int = 5) -> List[Dict]:
160
+ """비동기 검색"""
161
+ if not self.api_key:
162
+ return []
163
+
164
+ headers = {
165
+ "Accept": "application/json",
166
+ "X-Subscription-Token": self.api_key
167
+ }
168
+
169
+ params = {
170
+ "q": query,
171
+ "count": count,
172
+ "text_decorations": False,
173
+ "search_lang": "ko",
174
+ "country": "KR"
175
+ }
176
+
177
+ try:
178
+ async with aiohttp.ClientSession() as session:
179
+ async with session.get(
180
+ self.base_url,
181
+ headers=headers,
182
+ params=params,
183
+ timeout=aiohttp.ClientTimeout(total=5)
184
+ ) as response:
185
+ if response.status == 200:
186
+ data = await response.json()
187
+
188
+ results = []
189
+ if "web" in data and "results" in data["web"]:
190
+ for item in data["web"]["results"][:count]:
191
+ results.append({
192
+ "title": item.get("title", ""),
193
+ "url": item.get("url", ""),
194
+ "description": item.get("description", ""),
195
+ "age": item.get("age", "")
196
+ })
197
+
198
+ return results
199
+ except:
200
+ return []
201
+
202
+ return []
203
+
204
+
205
+ # ============================================================================
206
+ # μ΅œμ ν™”λœ Fireworks ν΄λΌμ΄μ–ΈνŠΈ
207
+ # ============================================================================
208
+
209
+ class OptimizedFireworksClient:
210
+ """μ΅œμ ν™”λœ LLM ν΄λΌμ΄μ–ΈνŠΈ"""
211
+
212
+ def __init__(self, api_key: Optional[str] = None):
213
+ self.api_key = api_key or os.getenv("FIREWORKS_API_KEY")
214
+ if not self.api_key:
215
+ raise ValueError("FIREWORKS_API_KEY is required!")
216
+
217
+ self.base_url = "https://api.fireworks.ai/inference/v1/chat/completions"
218
+ self.headers = {
219
+ "Accept": "application/json",
220
+ "Content-Type": "application/json",
221
+ "Authorization": f"Bearer {self.api_key}"
222
+ }
223
+
224
+ # 항상 졜고 μ„±λŠ₯ λͺ¨λΈ μ‚¬μš© (λ³΅μž‘ν•œ 질문 μ „μ œ)
225
+ self.model = "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507"
226
+
227
+ async def chat_stream_async(
228
+ self,
229
+ messages: List[Dict],
230
+ **kwargs
231
+ ) -> AsyncGenerator[str, None]:
232
+ """비동기 슀트리밍 λŒ€ν™”"""
233
+
234
+ payload = {
235
+ "model": self.model,
236
+ "messages": messages,
237
+ "max_tokens": kwargs.get("max_tokens", 2000),
238
+ "temperature": kwargs.get("temperature", 0.7),
239
+ "top_p": kwargs.get("top_p", 1.0),
240
+ "top_k": kwargs.get("top_k", 40),
241
+ "stream": True
242
+ }
243
+
244
+ try:
245
+ async with aiohttp.ClientSession() as session:
246
+ async with session.post(
247
+ self.base_url,
248
+ headers={**self.headers, "Accept": "text/event-stream"},
249
+ json=payload,
250
+ timeout=aiohttp.ClientTimeout(total=30)
251
+ ) as response:
252
+ async for line in response.content:
253
+ line_str = line.decode('utf-8').strip()
254
+ if line_str.startswith("data: "):
255
+ data_str = line_str[6:]
256
+ if data_str == "[DONE]":
257
+ break
258
+ try:
259
+ data = json.loads(data_str)
260
+ if "choices" in data and len(data["choices"]) > 0:
261
+ delta = data["choices"][0].get("delta", {})
262
+ if "content" in delta:
263
+ yield delta["content"]
264
+ except json.JSONDecodeError:
265
+ continue
266
+ except Exception as e:
267
+ yield f"였λ₯˜: {str(e)}"
268
+
269
+
270
+ # ============================================================================
271
+ # κ²½λŸ‰ν™”λœ μΆ”λ‘  체인
272
+ # ============================================================================
273
+
274
+ class LightweightReasoningChain:
275
+ """λΉ λ₯Έ 좔둠을 μœ„ν•œ ν…œν”Œλ¦Ώ 기반 μ‹œμŠ€ν…œ"""
276
+
277
+ def __init__(self):
278
+ self.templates = {
279
+ "problem_solving": {
280
+ "steps": ["문제 λΆ„ν•΄", "핡심 μš”μΈ", "ν•΄κ²° λ°©μ•ˆ", "κ΅¬ν˜„ μ „λž΅"],
281
+ "prompt": "μ²΄κ³„μ μœΌλ‘œ λ‹¨κ³„λ³„λ‘œ λΆ„μ„ν•˜κ³  해결책을 μ œμ‹œν•˜μ„Έμš”."
282
+ },
283
+ "creative_thinking": {
284
+ "steps": ["κΈ°μ‘΄ μ ‘κ·Ό", "창의적 λŒ€μ•ˆ", "ν˜μ‹  포인트", "μ‹€ν–‰ 방법"],
285
+ "prompt": "κΈ°μ‘΄ 방식을 λ„˜μ–΄μ„  창의적이고 ν˜μ‹ μ μΈ 접근을 μ œμ‹œν•˜μ„Έμš”."
286
+ },
287
+ "critical_analysis": {
288
+ "steps": ["ν˜„ν™© 평가", "강점/약점", "기회/μœ„ν˜‘", "κ°œμ„  λ°©ν–₯"],
289
+ "prompt": "λΉ„νŒμ  κ΄€μ μ—μ„œ μ² μ €νžˆ λΆ„μ„ν•˜κ³  κ°œμ„ μ μ„ λ„μΆœν•˜μ„Έμš”."
290
+ }
291
+ }
292
+
293
+ def get_reasoning_structure(self, query_type: str) -> Dict:
294
+ """쿼리 μœ ν˜•μ— λ§žλŠ” μΆ”λ‘  ꡬ쑰 λ°˜ν™˜"""
295
+ # 기본값은 problem_solving
296
+ return self.templates.get(query_type, self.templates["problem_solving"])
297
+
298
+
299
+ # ============================================================================
300
+ # μ‘°κΈ° μ’…λ£Œ λ©”μ»€λ‹ˆμ¦˜
301
+ # ============================================================================
302
+
303
+ class QualityChecker:
304
+ """ν’ˆμ§ˆ 체크 및 μ‘°κΈ° μ’…λ£Œ κ²°μ •"""
305
+
306
+ def __init__(self, min_quality: float = 0.75):
307
+ self.min_quality = min_quality
308
+ self.quality_metrics = {
309
+ "length": 0.2,
310
+ "structure": 0.3,
311
+ "completeness": 0.3,
312
+ "clarity": 0.2
313
+ }
314
+
315
+ def evaluate_response(self, response: str, query: str) -> Tuple[float, bool]:
316
+ """응닡 ν’ˆμ§ˆ 평가"""
317
+ scores = {}
318
+
319
+ # 길이 평가
320
+ scores["length"] = min(len(response) / 1000, 1.0) # 1000자 κΈ°μ€€
321
+
322
+ # ꡬ쑰 평가
323
+ structure_markers = ["1.", "2.", "β€’", "-", "첫째", "λ‘˜μ§Έ", "κ²°λ‘ ", "μš”μ•½"]
324
+ scores["structure"] = sum(1 for m in structure_markers if m in response) / len(structure_markers)
325
+
326
+ # μ™„μ „μ„± 평가 (쿼리 ν‚€μ›Œλ“œ 포함 μ—¬λΆ€)
327
+ query_words = set(query.split())
328
+ response_words = set(response.split())
329
+ scores["completeness"] = len(query_words & response_words) / max(len(query_words), 1)
330
+
331
+ # λͺ…ν™•μ„± 평가 (λ¬Έμž₯ ꡬ쑰)
332
+ sentences = response.split('.')
333
+ avg_sentence_length = sum(len(s.split()) for s in sentences) / max(len(sentences), 1)
334
+ scores["clarity"] = min(avg_sentence_length / 20, 1.0) # 20단어 κΈ°μ€€
335
+
336
+ # 가쀑 평균 계산
337
+ total_score = sum(
338
+ scores[metric] * weight
339
+ for metric, weight in self.quality_metrics.items()
340
+ )
341
+
342
+ should_continue = total_score < self.min_quality
343
+
344
+ return total_score, should_continue
345
+
346
+
347
+ # ============================================================================
348
+ # 슀트리밍 μ΅œμ ν™”
349
+ # ============================================================================
350
+
351
+ class OptimizedStreaming:
352
+ """슀트리밍 버퍼 μ΅œμ ν™”"""
353
+
354
+ def __init__(self, chunk_size: int = 100, flush_interval: float = 0.1):
355
+ self.chunk_size = chunk_size
356
+ self.flush_interval = flush_interval
357
+ self.buffer = ""
358
+ self.last_flush = time.time()
359
+
360
+ async def buffer_and_yield(
361
+ self,
362
+ stream: AsyncGenerator[str, None]
363
+ ) -> AsyncGenerator[str, None]:
364
+ """λ²„νΌλ§λœ 슀트리밍"""
365
+
366
+ async for chunk in stream:
367
+ self.buffer += chunk
368
+ current_time = time.time()
369
+
370
+ if (len(self.buffer) >= self.chunk_size or
371
+ current_time - self.last_flush >= self.flush_interval):
372
+
373
+ yield self.buffer
374
+ self.buffer = ""
375
+ self.last_flush = current_time
376
+
377
+ # 남은 버퍼 ν”ŒλŸ¬μ‹œ
378
+ if self.buffer:
379
+ yield self.buffer
380
+
381
+
382
+ # ============================================================================
383
+ # 톡합 μ΅œμ ν™” λ©€ν‹° μ—μ΄μ „νŠΈ μ‹œμŠ€ν…œ
384
+ # ============================================================================
385
+
386
+ class SpeedOptimizedMultiAgentSystem:
387
+ """속도 μ΅œμ ν™”λœ λ©€ν‹° μ—μ΄μ „νŠΈ μ‹œμŠ€ν…œ"""
388
+
389
+ def __init__(self):
390
+ self.llm = OptimizedFireworksClient()
391
+ self.search = AsyncBraveSearch()
392
+ self.cache = SmartCache()
393
+ self.reasoning = LightweightReasoningChain()
394
+ self.quality_checker = QualityChecker()
395
+ self.streaming = OptimizedStreaming()
396
+
397
+ # 컴팩트 ν”„λ‘¬ν”„νŠΈ
398
+ self.compact_prompts = self._init_compact_prompts()
399
+
400
+ # 병렬 처리 ν’€
401
+ self.executor = ThreadPoolExecutor(max_workers=4)
402
+
403
+ def _init_compact_prompts(self) -> Dict:
404
+ """μ••μΆ•λœ 고효율 ν”„λ‘¬ν”„νŠΈ"""
405
+ return {
406
+ AgentRole.SUPERVISOR: """[κ°λ…μž-ꡬ쑰섀계]
407
+ μ¦‰μ‹œλΆ„μ„: ν•΅μ‹¬μ˜λ„+ν•„μš”μ •λ³΄+닡변ꡬ쑰
408
+ 좜λ ₯: 5개 ν•΅μ‹¬ν¬μΈνŠΈ(각 1λ¬Έμž₯)
409
+ 좔둠체계 λͺ…μ‹œ""",
410
+
411
+ AgentRole.CREATIVE: """[μ°½μ˜μ„±μƒμ„±μž]
412
+ μž…λ ₯ꡬ쑰 따라 창의적 ν™•μž₯
413
+ μ‹€μš©μ˜ˆμ‹œ+ν˜μ‹ μ ‘κ·Ό+ꡬ체쑰언
414
+ λΆˆν•„μš”μ„€λͺ… 제거""",
415
+
416
+ AgentRole.CRITIC: """[λΉ„ν‰μž-검증]
417
+ 신속검토: μ •ν™•μ„±/논리성/μ‹€μš©μ„±
418
+ κ°œμ„ ν¬μΈνŠΈ 3개만
419
+ 각 2λ¬Έμž₯ 이내""",
420
+
421
+ AgentRole.FINALIZER: """[μ΅œμ’…ν†΅ν•©]
422
+ λͺ¨λ“ μ˜κ²¬ μ’…ν•©β†’μ΅œμ λ‹΅λ³€
423
+ λͺ…확ꡬ쑰+μ‹€μš©μ •λ³΄+μ°½μ˜κ· ν˜•
424
+ 핡심먼저+μƒμ„ΈλŠ”ν›„μˆœμœ„"""
425
+ }
426
+
427
+ async def parallel_process_agents(
428
+ self,
429
+ query: str,
430
+ search_results: List[Dict],
431
+ show_progress: bool = True
432
+ ) -> AsyncGenerator[Tuple[str, str], None]:
433
+ """병렬 처리 νŒŒμ΄ν”„λΌμΈ"""
434
+
435
+ start_time = time.time()
436
+ search_context = self._format_search_results(search_results)
437
+ accumulated_response = ""
438
+ agent_thoughts = ""
439
+
440
+ # μΊμ‹œ 확인
441
+ cached = self.cache.get(query)
442
+ if cached:
443
+ yield cached["response"], "✨ μΊμ‹œμ—μ„œ μ¦‰μ‹œ λ‘œλ“œ"
444
+ return
445
+
446
+ # μΆ”λ‘  νŒ¨ν„΄ κ²°μ •
447
+ reasoning_pattern = self.cache.get_reasoning_pattern(query)
448
+
449
+ try:
450
+ # === 1단계: κ°λ…μž + 검색 병렬 μ‹€ν–‰ ===
451
+ if show_progress:
452
+ agent_thoughts = "### πŸš€ 병렬 처리 μ‹œμž‘\n"
453
+ agent_thoughts += "πŸ‘” κ°λ…μž 뢄석 + πŸ” μΆ”κ°€ 검색 λ™μ‹œ μ§„ν–‰...\n\n"
454
+ yield accumulated_response, agent_thoughts
455
+
456
+ # κ°λ…μž ν”„λ‘¬ν”„νŠΈ
457
+ supervisor_prompt = f"""
458
+ 질문: {query}
459
+ 검색결과: {search_context}
460
+ μΆ”λ‘ νŒ¨ν„΄: {reasoning_pattern}
461
+ μ¦‰μ‹œ 핡심ꡬ쑰 5개 μ œμ‹œ"""
462
+
463
+ supervisor_response = ""
464
+ supervisor_task = self.llm.chat_stream_async(
465
+ messages=[
466
+ {"role": "system", "content": self.compact_prompts[AgentRole.SUPERVISOR]},
467
+ {"role": "user", "content": supervisor_prompt}
468
+ ],
469
+ temperature=0.3,
470
+ max_tokens=500
471
+ )
472
+
473
+ # κ°λ…μž 슀트리밍 (버퍼링)
474
+ async for chunk in self.streaming.buffer_and_yield(supervisor_task):
475
+ supervisor_response += chunk
476
+ if show_progress and len(supervisor_response) < 300:
477
+ agent_thoughts = f"### πŸ‘” κ°λ…μž 뢄석\n{supervisor_response[:300]}...\n\n"
478
+ yield accumulated_response, agent_thoughts
479
+
480
+ # === 2단계: μ°½μ˜μ„± + 비평 μ€€λΉ„ 병렬 ===
481
+ if show_progress:
482
+ agent_thoughts += "### 🎨 μ°½μ˜μ„± μƒμ„±μž + πŸ” λΉ„ν‰μž μ€€λΉ„...\n\n"
483
+ yield accumulated_response, agent_thoughts
484
+
485
+ # μ°½μ˜μ„± 생성 μ‹œμž‘
486
+ creative_prompt = f"""
487
+ 질문: {query}
488
+ κ°λ…μžκ΅¬μ‘°: {supervisor_response}
489
+ 검색결과: {search_context}
490
+ 창의적+μ‹€μš©μ  λ‹΅λ³€ μ¦‰μ‹œμƒμ„±"""
491
+
492
+ creative_response = ""
493
+ creative_partial = "" # λΉ„ν‰μžμš© λΆ€λΆ„ 응닡
494
+ critic_started = False
495
+ critic_response = ""
496
+
497
+ creative_task = self.llm.chat_stream_async(
498
+ messages=[
499
+ {"role": "system", "content": self.compact_prompts[AgentRole.CREATIVE]},
500
+ {"role": "user", "content": creative_prompt}
501
+ ],
502
+ temperature=0.8,
503
+ max_tokens=1500
504
+ )
505
+
506
+ # μ°½μ˜μ„± 슀트리밍 + λΉ„ν‰μž μ‘°κΈ° μ‹œμž‘
507
+ async for chunk in self.streaming.buffer_and_yield(creative_task):
508
+ creative_response += chunk
509
+ creative_partial += chunk
510
+
511
+ # μ°½μ˜μ„± 응닡이 500자 λ„˜μœΌλ©΄ λΉ„ν‰μž μ‹œμž‘
512
+ if len(creative_partial) > 500 and not critic_started:
513
+ critic_started = True
514
+
515
+ # λΉ„ν‰μž 비동기 μ‹œμž‘
516
+ critic_prompt = f"""
517
+ μ›λ³Έμ§ˆλ¬Έ: {query}
518
+ μ°½μ˜μ„±λ‹΅λ³€(일뢀): {creative_partial}
519
+ μ‹ μ†κ²€ν† β†’κ°œμ„ μ 3개"""
520
+
521
+ critic_task = asyncio.create_task(
522
+ self._run_critic_async(critic_prompt)
523
+ )
524
+
525
+ if show_progress:
526
+ display_creative = creative_response[:400] + "..." if len(creative_response) > 400 else creative_response
527
+ agent_thoughts = f"### 🎨 μ°½μ˜μ„± μƒμ„±μž\n{display_creative}\n\n"
528
+ yield accumulated_response, agent_thoughts
529
+
530
+ # λΉ„ν‰μž κ²°κ³Ό λŒ€κΈ°
531
+ if critic_started:
532
+ critic_response = await critic_task
533
+
534
+ if show_progress:
535
+ agent_thoughts += f"### πŸ” λΉ„ν‰μž κ²€ν† \n{critic_response[:200]}...\n\n"
536
+ yield accumulated_response, agent_thoughts
537
+
538
+ # === 3단계: ν’ˆμ§ˆ 체크 및 μ‘°κΈ° μ’…λ£Œ ===
539
+ quality_score, need_more = self.quality_checker.evaluate_response(
540
+ creative_response, query
541
+ )
542
+
543
+ if not need_more and quality_score > 0.85:
544
+ # ν’ˆμ§ˆμ΄ μΆ©λΆ„νžˆ λ†’μœΌλ©΄ λ°”λ‘œ λ°˜ν™˜
545
+ accumulated_response = creative_response
546
+
547
+ if show_progress:
548
+ agent_thoughts += f"### βœ… ν’ˆμ§ˆ μΆ©μ‘± (점수: {quality_score:.2f})\nμ‘°κΈ° μ™„λ£Œ!\n"
549
+
550
+ # μΊμ‹œ μ €μž₯
551
+ self.cache.set(query, {
552
+ "response": accumulated_response,
553
+ "timestamp": datetime.now()
554
+ })
555
+
556
+ yield accumulated_response, agent_thoughts
557
+ return
558
+
559
+ # === 4단계: μ΅œμ’… 톡합 (슀트리밍) ===
560
+ if show_progress:
561
+ agent_thoughts += "### βœ… μ΅œμ’… 톡합 쀑...\n\n"
562
+ yield accumulated_response, agent_thoughts
563
+
564
+ final_prompt = f"""
565
+ 질문: {query}
566
+ μ°½μ˜μ„±λ‹΅λ³€: {creative_response}
567
+ λΉ„ν‰ν”Όλ“œλ°±: {critic_response}
568
+ κ°λ…μžκ΅¬μ‘°: {supervisor_response}
569
+ μ΅œμ’…ν†΅ν•©β†’μ™„λ²½λ‹΅λ³€"""
570
+
571
+ final_task = self.llm.chat_stream_async(
572
+ messages=[
573
+ {"role": "system", "content": self.compact_prompts[AgentRole.FINALIZER]},
574
+ {"role": "user", "content": final_prompt}
575
+ ],
576
+ temperature=0.5,
577
+ max_tokens=2500
578
+ )
579
+
580
+ # μ΅œμ’… λ‹΅λ³€ 슀트리밍
581
+ accumulated_response = ""
582
+ async for chunk in self.streaming.buffer_and_yield(final_task):
583
+ accumulated_response += chunk
584
+ yield accumulated_response, agent_thoughts
585
+
586
+ # 처리 μ‹œκ°„ μΆ”κ°€
587
+ processing_time = time.time() - start_time
588
+ accumulated_response += f"\n\n---\n⚑ 처리 μ‹œκ°„: {processing_time:.1f}초"
589
+
590
+ # μΊμ‹œ μ €μž₯
591
+ self.cache.set(query, {
592
+ "response": accumulated_response,
593
+ "timestamp": datetime.now()
594
+ })
595
+
596
+ yield accumulated_response, agent_thoughts
597
+
598
+ except Exception as e:
599
+ error_msg = f"❌ 였λ₯˜ λ°œμƒ: {str(e)}"
600
+ yield error_msg, agent_thoughts
601
+
602
+ async def _run_critic_async(self, prompt: str) -> str:
603
+ """λΉ„ν‰μž 비동기 μ‹€ν–‰"""
604
+ try:
605
+ response = ""
606
+ async for chunk in self.llm.chat_stream_async(
607
+ messages=[
608
+ {"role": "system", "content": self.compact_prompts[AgentRole.CRITIC]},
609
+ {"role": "user", "content": prompt}
610
+ ],
611
+ temperature=0.2,
612
+ max_tokens=500
613
+ ):
614
+ response += chunk
615
+ return response
616
+ except:
617
+ return "비평 처리 쀑 였λ₯˜"
618
+
619
+ def _format_search_results(self, results: List[Dict]) -> str:
620
+ """검색 κ²°κ³Ό μ••μΆ• 포맷"""
621
+ if not results:
622
+ return "κ²€μƒ‰κ²°κ³Όμ—†μŒ"
623
+
624
+ formatted = []
625
+ for i, r in enumerate(results[:3], 1): # μƒμœ„ 3개만
626
+ formatted.append(f"[{i}]{r.get('title','')[:50]}:{r.get('description','')[:100]}")
627
+
628
+ return " | ".join(formatted)
629
+
630
+
631
+ # ============================================================================
632
+ # Gradio UI (μ΅œμ ν™” 버전)
633
+ # ============================================================================
634
+
635
+ def create_optimized_gradio_interface():
636
+ """μ΅œμ ν™”λœ Gradio μΈν„°νŽ˜μ΄μŠ€"""
637
+
638
+ # μ‹œμŠ€ν…œ μ΄ˆκΈ°ν™”
639
+ system = SpeedOptimizedMultiAgentSystem()
640
+
641
+ def process_query_optimized(
642
+ message: str,
643
+ history: List[Dict],
644
+ use_search: bool,
645
+ show_agent_thoughts: bool,
646
+ search_count: int
647
+ ):
648
+ """μ΅œμ ν™”λœ 쿼리 처리 - 동기 버전"""
649
+
650
+ if not message:
651
+ yield history, "", ""
652
+ return
653
+
654
+ # 비동기 ν•¨μˆ˜λ₯Ό λ™κΈ°μ μœΌλ‘œ μ‹€ν–‰
655
+ try:
656
+ import nest_asyncio
657
+ nest_asyncio.apply()
658
+ except ImportError:
659
+ pass # nest_asyncioκ°€ 없어도 μ§„ν–‰
660
+
661
+ def run_async_function(coro):
662
+ """비동기 ν•¨μˆ˜λ₯Ό λ™κΈ°μ μœΌλ‘œ μ‹€ν–‰ν•˜λŠ” 헬퍼"""
663
+ try:
664
+ loop = asyncio.get_event_loop()
665
+ if loop.is_running():
666
+ # 이미 μ‹€ν–‰ 쀑인 루프가 있으면 μƒˆ μŠ€λ ˆλ“œμ—μ„œ μ‹€ν–‰
667
+ import concurrent.futures
668
+ with concurrent.futures.ThreadPoolExecutor() as executor:
669
+ future = executor.submit(asyncio.run, coro)
670
+ return future.result()
671
+ else:
672
+ return loop.run_until_complete(coro)
673
+ except RuntimeError:
674
+ # 루프가 μ—†μœΌλ©΄ μƒˆλ‘œ 생성
675
+ return asyncio.run(coro)
676
+
677
+ try:
678
+ # 검색 μˆ˜ν–‰ (동기화)
679
+ search_results = []
680
+ search_display = ""
681
+
682
+ if use_search:
683
+ # 검색 μƒνƒœ ν‘œμ‹œ
684
+ history_with_message = history + [
685
+ {"role": "user", "content": message},
686
+ {"role": "assistant", "content": "⚑ 고속 처리 쀑..."}
687
+ ]
688
+ yield history_with_message, "", ""
689
+
690
+ # 비동기 검색을 λ™κΈ°μ μœΌλ‘œ μ‹€ν–‰
691
+ search_results = run_async_function(
692
+ system.search.search_async(message, count=search_count)
693
+ )
694
+
695
+ if search_results:
696
+ search_display = "## πŸ“š μ°Έκ³  자료\n\n"
697
+ for i, result in enumerate(search_results[:3], 1):
698
+ search_display += f"**{i}. [{result['title'][:50]}]({result['url']})**\n"
699
+ search_display += f" {result['description'][:100]}...\n\n"
700
+
701
+ # μ‚¬μš©μž λ©”μ‹œμ§€ μΆ”κ°€
702
+ current_history = history + [{"role": "user", "content": message}]
703
+
704
+ # 병렬 처리 싀행을 λ™κΈ°μ μœΌλ‘œ μˆ˜μ§‘
705
+ async def collect_responses():
706
+ responses = []
707
+ async for response, thoughts in system.parallel_process_agents(
708
+ query=message,
709
+ search_results=search_results,
710
+ show_progress=show_agent_thoughts
711
+ ):
712
+ responses.append((response, thoughts))
713
+ return responses
714
+
715
+ # λͺ¨λ“  응닡 μˆ˜μ§‘
716
+ all_responses = run_async_function(collect_responses())
717
+
718
+ # μˆ˜μ§‘λœ 응닡을 yield
719
+ for response, thoughts in all_responses:
720
+ updated_history = current_history + [
721
+ {"role": "assistant", "content": response}
722
+ ]
723
+ yield updated_history, thoughts, search_display
724
+
725
+ except Exception as e:
726
+ error_history = history + [
727
+ {"role": "user", "content": message},
728
+ {"role": "assistant", "content": f"❌ 였λ₯˜: {str(e)}"}
729
+ ]
730
+ yield error_history, "", ""
731
+
732
+ # Gradio μΈν„°νŽ˜μ΄μŠ€
733
+ with gr.Blocks(
734
+ title="⚑ Speed-Optimized Multi-Agent System",
735
+ theme=gr.themes.Soft(),
736
+ css="""
737
+ .gradio-container {
738
+ max-width: 1400px !important;
739
+ margin: auto !important;
740
+ }
741
+ """
742
+ ) as demo:
743
+ gr.Markdown("""
744
+ # ⚑ 고속 Multi-Agent RAG System
745
+ ### λ³΅μž‘ν•œ μ§ˆλ¬Έλ„ 5초 이내 처리 λͺ©ν‘œ
746
+
747
+ **μ΅œμ ν™” 기술:**
748
+ - πŸš€ 병렬 처리: μ—μ΄μ „νŠΈ λ™μ‹œ μ‹€ν–‰
749
+ - πŸ’Ύ 슀마트 캐싱: 자주 λ¬»λŠ” νŒ¨ν„΄ μ¦‰μ‹œ 응닡
750
+ - ⚑ 슀트리밍 버퍼: λ„€νŠΈμ›Œν¬ μ΅œμ ν™”
751
+ - 🎯 μ‘°κΈ° μ’…λ£Œ: ν’ˆμ§ˆ μΆ©μ‘± μ‹œ μ¦‰μ‹œ μ™„λ£Œ
752
+ """)
753
+
754
+ with gr.Row():
755
+ with gr.Column(scale=3):
756
+ chatbot = gr.Chatbot(
757
+ height=500,
758
+ label="πŸ’¬ λŒ€ν™”",
759
+ type="messages"
760
+ )
761
+
762
+ msg = gr.Textbox(
763
+ label="λ³΅μž‘ν•œ 질문 μž…λ ₯",
764
+ placeholder="뢄석, μ „λž΅, 창의적 해결이 ν•„μš”ν•œ λ³΅μž‘ν•œ μ§ˆλ¬Έμ„ μž…λ ₯ν•˜μ„Έμš”...",
765
+ lines=3
766
+ )
767
+
768
+ with gr.Row():
769
+ submit = gr.Button("⚑ 고속 처리", variant="primary")
770
+ clear = gr.Button("πŸ”„ μ΄ˆκΈ°ν™”")
771
+
772
+ with gr.Accordion("πŸ€– μ—μ΄μ „νŠΈ 처리 κ³Όμ •", open=False):
773
+ agent_thoughts = gr.Markdown()
774
+
775
+ with gr.Accordion("πŸ“š 검색 μ†ŒμŠ€", open=False):
776
+ search_sources = gr.Markdown()
777
+
778
+ with gr.Column(scale=1):
779
+ gr.Markdown("### βš™οΈ μ„€μ •")
780
+
781
+ use_search = gr.Checkbox(
782
+ label="πŸ” μ›Ή 검색 μ‚¬μš©",
783
+ value=True
784
+ )
785
+
786
+ show_agent_thoughts = gr.Checkbox(
787
+ label="🧠 처리 κ³Όμ • ν‘œμ‹œ",
788
+ value=True
789
+ )
790
+
791
+ search_count = gr.Slider(
792
+ minimum=3,
793
+ maximum=10,
794
+ value=5,
795
+ step=1,
796
+ label="검색 κ²°κ³Ό 수"
797
+ )
798
+
799
+ gr.Markdown("""
800
+ ### ⚑ μ΅œμ ν™” μƒνƒœ
801
+
802
+ **ν™œμ„±ν™”λœ μ΅œμ ν™”:**
803
+ - βœ… 병렬 처리
804
+ - βœ… 슀마트 캐싱
805
+ - βœ… 버퍼 슀트리밍
806
+ - βœ… μ‘°κΈ° μ’…λ£Œ
807
+ - βœ… μ••μΆ• ν”„λ‘¬ν”„νŠΈ
808
+
809
+ **μ˜ˆμƒ 처리 μ‹œκ°„:**
810
+ - μΊμ‹œ 히트: < 1초
811
+ - 일반 질문: 3-5초
812
+ - λ³΅μž‘ν•œ 질문: 5-8초
813
+ """)
814
+
815
+ # λ³΅μž‘ν•œ 질문 예제
816
+ gr.Examples(
817
+ examples=[
818
+ "AI 기술이 ν–₯ν›„ 10λ…„κ°„ ν•œκ΅­ κ²½μ œμ— λ―ΈμΉ  영ν–₯을 λ‹€κ°λ„λ‘œ λΆ„μ„ν•˜κ³  λŒ€μ‘ μ „λž΅μ„ μ œμ‹œν•΄μ€˜",
819
+ "μŠ€νƒ€νŠΈμ—…μ΄ λŒ€κΈ°μ—…κ³Ό κ²½μŸν•˜κΈ° μœ„ν•œ ν˜μ‹ μ μΈ μ „λž΅μ„ λ‹¨κ³„λ³„λ‘œ μˆ˜λ¦½ν•΄μ€˜",
820
+ "κΈ°ν›„λ³€ν™” λŒ€μ‘μ„ μœ„ν•œ 창의적인 λΉ„μ¦ˆλ‹ˆμŠ€ λͺ¨λΈ 5κ°€μ§€λ₯Ό ꡬ체적으둜 μ„€κ³„ν•΄μ€˜",
821
+ "μ–‘μžμ»΄ν“¨ν„°κ°€ ν˜„μž¬ μ•”ν˜Έν™” 체계에 λ―ΈμΉ  영ν–₯κ³Ό λŒ€μ•ˆμ„ 기술적으둜 λΆ„μ„ν•΄μ€˜",
822
+ "λ©”νƒ€λ²„μŠ€ μ‹œλŒ€μ˜ ꡐ윑 ν˜μ‹  λ°©μ•ˆμ„ μ‹€μ œ κ΅¬ν˜„ κ°€λŠ₯ν•œ μˆ˜μ€€μœΌλ‘œ μ œμ•ˆν•΄μ€˜"
823
+ ],
824
+ inputs=msg
825
+ )
826
+
827
+ # 이벀트 바인딩
828
+ submit.click(
829
+ process_query_optimized,
830
+ inputs=[msg, chatbot, use_search, show_agent_thoughts, search_count],
831
+ outputs=[chatbot, agent_thoughts, search_sources]
832
+ ).then(
833
+ lambda: "",
834
+ None,
835
+ msg
836
+ )
837
+
838
+ msg.submit(
839
+ process_query_optimized,
840
+ inputs=[msg, chatbot, use_search, show_agent_thoughts, search_count],
841
+ outputs=[chatbot, agent_thoughts, search_sources]
842
+ ).then(
843
+ lambda: "",
844
+ None,
845
+ msg
846
+ )
847
+
848
+ clear.click(
849
+ lambda: ([], "", ""),
850
+ None,
851
+ [chatbot, agent_thoughts, search_sources]
852
+ )
853
+
854
+ return demo
855
+
856
+
857
+ # ============================================================================
858
+ # 메인 μ‹€ν–‰
859
+ # ============================================================================
860
+
861
+ if __name__ == "__main__":
862
+ print("""
863
+ ╔══════════════════════════════════════════════════════════════╗
864
+ β•‘ ⚑ Speed-Optimized Multi-Agent RAG System ⚑ β•‘
865
+ β•‘ β•‘
866
+ β•‘ λ³΅μž‘ν•œ μ§ˆλ¬Έλ„ 5초 이내 μ²˜λ¦¬ν•˜λŠ” 고속 AI μ‹œμŠ€ν…œ β•‘
867
+ β•‘ β•‘
868
+ β•‘ μ΅œμ ν™” 기술: β•‘
869
+ β•‘ β€’ 병렬 처리 νŒŒμ΄ν”„λΌμΈ β•‘
870
+ β•‘ β€’ 슀마트 캐싱 μ‹œμŠ€ν…œ β•‘
871
+ β•‘ β€’ 슀트리밍 버퍼 μ΅œμ ν™” β•‘
872
+ β•‘ β€’ ν’ˆμ§ˆ 기반 μ‘°κΈ° μ’…λ£Œ β•‘
873
+ β•‘ β€’ μ••μΆ• ν”„λ‘¬ν”„νŠΈ μ—”μ§€λ‹ˆμ–΄λ§ β•‘
874
+ β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
875
+ """)
876
+
877
+ # API ν‚€ 확인
878
+ if not os.getenv("FIREWORKS_API_KEY"):
879
+ print("\n⚠️ FIREWORKS_API_KEYκ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
880
+
881
+ if not os.getenv("BRAVE_SEARCH_API_KEY"):
882
+ print("\n⚠️ BRAVE_SEARCH_API_KEYκ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
883
+
884
+ # Gradio μ•± μ‹€ν–‰
885
+ demo = create_optimized_gradio_interface()
886
+
887
+ is_hf_spaces = os.getenv("SPACE_ID") is not None
888
+
889
+ if is_hf_spaces:
890
+ print("\nπŸ€— Hugging Face Spacesμ—μ„œ μ΅œμ ν™” λͺ¨λ“œλ‘œ μ‹€ν–‰ 쀑...")
891
+ demo.launch(server_name="0.0.0.0", server_port=7860)
892
+ else:
893
+ print("\nπŸ’» 둜컬 ν™˜κ²½μ—μ„œ μ΅œμ ν™” λͺ¨λ“œλ‘œ μ‹€ν–‰ 쀑...")
894
+ demo.launch(server_name="0.0.0.0", server_port=7860, share=False)