ZBro7 commited on
Commit
7976e22
·
verified ·
1 Parent(s): fb3d2df

Update router.py

Browse files
Files changed (1) hide show
  1. router.py +80 -18
router.py CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  from llm_clients import (
2
  call_llama,
3
  call_gemini,
@@ -9,16 +13,46 @@ from memory import save_message, load_memory
9
  from search_tool import search_web
10
  from rag_engine import rag_response
11
 
12
- import requests
13
 
 
 
 
14
 
15
- # 🔥 Image microservice endpoint
16
  IMAGE_SPACE_URL = "https://your-image-space.hf.space/generate"
17
 
18
- # 🔥 Simple in-memory cache
 
19
  response_cache = {}
20
 
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  def build_messages(system_prompt, memory, user_prompt):
23
 
24
  messages = []
@@ -32,6 +66,10 @@ def build_messages(system_prompt, memory, user_prompt):
32
  return messages
33
 
34
 
 
 
 
 
35
  def call_image_microservice(prompt):
36
 
37
  try:
@@ -45,6 +83,22 @@ def call_image_microservice(prompt):
45
  return {"error": "Image service unavailable"}
46
 
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  def route_request(prompt, user_id):
49
 
50
  cache_key = f"{user_id}:{prompt}"
@@ -52,8 +106,9 @@ def route_request(prompt, user_id):
52
  # ==========================
53
  # CACHE CHECK
54
  # ==========================
55
- if cache_key in response_cache:
56
- return {"response": response_cache[cache_key]}
 
57
 
58
  # ==========================
59
  # IMAGE COMMAND
@@ -66,9 +121,8 @@ def route_request(prompt, user_id):
66
  # RAG QUICK RESPONSE
67
  # ==========================
68
  rag_answer = rag_response(prompt)
69
-
70
  if rag_answer:
71
- response_cache[cache_key] = rag_answer
72
  return {"response": rag_answer}
73
 
74
  # ==========================
@@ -77,25 +131,25 @@ def route_request(prompt, user_id):
77
  memory = load_memory(user_id)
78
 
79
  # ==========================
80
- # AI CLASSIFICATION
81
  # ==========================
82
  classification = classify_prompt(prompt)
83
 
84
  intent = classification.get("intent", "chat")
85
  needs_search = classification.get("needs_search", False)
86
 
87
- system_prompt = "You are ZXAI, a powerful advanced AI assistant."
88
 
89
  # ==========================
90
  # GREETING FAST PATH
91
  # ==========================
92
  if intent == "greeting":
93
- response = "Hello 👋 I am ZXAI. How can I assist you today?"
94
 
95
  save_message(user_id, "user", prompt)
96
  save_message(user_id, "assistant", response)
97
 
98
- response_cache[cache_key] = response
99
  return {"response": response}
100
 
101
  # ==========================
@@ -109,11 +163,11 @@ def route_request(prompt, user_id):
109
  save_message(user_id, "user", prompt)
110
  save_message(user_id, "assistant", response)
111
 
112
- response_cache[cache_key] = response
113
  return {"response": response}
114
 
115
  # ==========================
116
- # LIVE DATA / SEARCH
117
  # ==========================
118
  if intent == "live_data" or needs_search:
119
 
@@ -131,17 +185,25 @@ Use web data if helpful.
131
 
132
  messages = build_messages(system_prompt, memory, enriched_prompt)
133
 
134
- llama_answer = call_llama(messages)
135
- gemini_answer = call_gemini(messages)
 
136
 
137
- winner = judge_answers(llama_answer, gemini_answer)
 
 
 
138
 
 
 
 
139
  final_answer = gemini_answer if winner == 2 else llama_answer
140
 
141
  save_message(user_id, "user", prompt)
142
  save_message(user_id, "assistant", final_answer)
143
 
144
- response_cache[cache_key] = final_answer
 
145
  return {"response": final_answer}
146
 
147
  # ==========================
@@ -154,6 +216,6 @@ Use web data if helpful.
154
  save_message(user_id, "user", prompt)
155
  save_message(user_id, "assistant", response)
156
 
157
- response_cache[cache_key] = response
158
 
159
  return {"response": response}
 
1
+ import asyncio
2
+ import time
3
+ import requests
4
+
5
  from llm_clients import (
6
  call_llama,
7
  call_gemini,
 
13
  from search_tool import search_web
14
  from rag_engine import rag_response
15
 
 
16
 
17
+ # =====================================
18
+ # CONFIG
19
+ # =====================================
20
 
 
21
  IMAGE_SPACE_URL = "https://your-image-space.hf.space/generate"
22
 
23
+ CACHE_TTL_SECONDS = 300 # 5 minutes
24
+
25
  response_cache = {}
26
 
27
 
28
+ # =====================================
29
+ # CACHE HELPERS
30
+ # =====================================
31
+
32
+ def get_cached_response(cache_key):
33
+ entry = response_cache.get(cache_key)
34
+
35
+ if not entry:
36
+ return None
37
+
38
+ if time.time() > entry["expires_at"]:
39
+ del response_cache[cache_key]
40
+ return None
41
+
42
+ return entry["response"]
43
+
44
+
45
+ def set_cache(cache_key, response):
46
+ response_cache[cache_key] = {
47
+ "response": response,
48
+ "expires_at": time.time() + CACHE_TTL_SECONDS
49
+ }
50
+
51
+
52
+ # =====================================
53
+ # MESSAGE BUILDER
54
+ # =====================================
55
+
56
  def build_messages(system_prompt, memory, user_prompt):
57
 
58
  messages = []
 
66
  return messages
67
 
68
 
69
+ # =====================================
70
+ # IMAGE SERVICE
71
+ # =====================================
72
+
73
  def call_image_microservice(prompt):
74
 
75
  try:
 
83
  return {"error": "Image service unavailable"}
84
 
85
 
86
+ # =====================================
87
+ # ASYNC LLM WRAPPERS
88
+ # =====================================
89
+
90
+ async def async_llama(messages):
91
+ return await asyncio.to_thread(call_llama, messages)
92
+
93
+
94
+ async def async_gemini(messages):
95
+ return await asyncio.to_thread(call_gemini, messages)
96
+
97
+
98
+ # =====================================
99
+ # MAIN ROUTER
100
+ # =====================================
101
+
102
  def route_request(prompt, user_id):
103
 
104
  cache_key = f"{user_id}:{prompt}"
 
106
  # ==========================
107
  # CACHE CHECK
108
  # ==========================
109
+ cached = get_cached_response(cache_key)
110
+ if cached:
111
+ return {"response": cached}
112
 
113
  # ==========================
114
  # IMAGE COMMAND
 
121
  # RAG QUICK RESPONSE
122
  # ==========================
123
  rag_answer = rag_response(prompt)
 
124
  if rag_answer:
125
+ set_cache(cache_key, rag_answer)
126
  return {"response": rag_answer}
127
 
128
  # ==========================
 
131
  memory = load_memory(user_id)
132
 
133
  # ==========================
134
+ # CLASSIFY
135
  # ==========================
136
  classification = classify_prompt(prompt)
137
 
138
  intent = classification.get("intent", "chat")
139
  needs_search = classification.get("needs_search", False)
140
 
141
+ system_prompt = "You are ZXAI, an advanced AI assistant."
142
 
143
  # ==========================
144
  # GREETING FAST PATH
145
  # ==========================
146
  if intent == "greeting":
147
+ response = "Hello 👋 I am ZXAI. How can I help you today?"
148
 
149
  save_message(user_id, "user", prompt)
150
  save_message(user_id, "assistant", response)
151
 
152
+ set_cache(cache_key, response)
153
  return {"response": response}
154
 
155
  # ==========================
 
163
  save_message(user_id, "user", prompt)
164
  save_message(user_id, "assistant", response)
165
 
166
+ set_cache(cache_key, response)
167
  return {"response": response}
168
 
169
  # ==========================
170
+ # LIVE DATA (Parallel LLM)
171
  # ==========================
172
  if intent == "live_data" or needs_search:
173
 
 
185
 
186
  messages = build_messages(system_prompt, memory, enriched_prompt)
187
 
188
+ async def run_parallel():
189
+ llama_task = asyncio.create_task(async_llama(messages))
190
+ gemini_task = asyncio.create_task(async_gemini(messages))
191
 
192
+ llama_answer = await llama_task
193
+ gemini_answer = await gemini_task
194
+
195
+ return llama_answer, gemini_answer
196
 
197
+ llama_answer, gemini_answer = asyncio.run(run_parallel())
198
+
199
+ winner = judge_answers(llama_answer, gemini_answer)
200
  final_answer = gemini_answer if winner == 2 else llama_answer
201
 
202
  save_message(user_id, "user", prompt)
203
  save_message(user_id, "assistant", final_answer)
204
 
205
+ set_cache(cache_key, final_answer)
206
+
207
  return {"response": final_answer}
208
 
209
  # ==========================
 
216
  save_message(user_id, "user", prompt)
217
  save_message(user_id, "assistant", response)
218
 
219
+ set_cache(cache_key, response)
220
 
221
  return {"response": response}