Spaces:
Paused
Paused
File size: 5,398 Bytes
35c109d 7976e22 5aecd24 85ce10f fb3d2df 5aecd24 7976e22 5aecd24 7976e22 fb3d2df 5aecd24 7976e22 5aecd24 7976e22 35c109d 7976e22 35c109d 5aecd24 35c109d 5aecd24 35c109d 5aecd24 7976e22 35c109d 5aecd24 fb3d2df 7976e22 fb3d2df 5aecd24 fb3d2df 5aecd24 35c109d 5aecd24 fb3d2df 7976e22 fb3d2df 5aecd24 7976e22 5aecd24 7976e22 5aecd24 fb3d2df 5aecd24 7976e22 fb3d2df 5aecd24 fb3d2df 7976e22 5aecd24 fb3d2df 5aecd24 fb3d2df 5aecd24 35c109d 5aecd24 7976e22 5aecd24 7976e22 5aecd24 fb3d2df 5aecd24 85ce10f 35c109d 85ce10f 35c109d 7976e22 5aecd24 85ce10f 5aecd24 85ce10f 7976e22 5aecd24 85ce10f 5aecd24 fb3d2df 5aecd24 85ce10f 35c109d 85ce10f 5aecd24 85ce10f 7976e22 fb3d2df 5aecd24 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 |
import asyncio
import time
import requests
from llm_clients import (
call_llama,
call_gemini,
classify_prompt,
judge_answers
)
from memory import save_message, load_memory
from search_tool import search_web
from rag_engine import rag_response
# =====================================
# CONFIG
# =====================================
IMAGE_SPACE_URL = "https://your-image-space.hf.space/generate"
CACHE_TTL_SECONDS = 300 # 5 minutes
response_cache = {}
# =====================================
# CACHE HELPERS
# =====================================
def get_cached_response(cache_key):
entry = response_cache.get(cache_key)
if not entry:
return None
if time.time() > entry["expires_at"]:
del response_cache[cache_key]
return None
return entry["response"]
def set_cache(cache_key, response):
response_cache[cache_key] = {
"response": response,
"expires_at": time.time() + CACHE_TTL_SECONDS
}
# =====================================
# MESSAGE BUILDER
# =====================================
def build_messages(system_prompt, memory, user_prompt):
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.extend(memory)
messages.append({"role": "user", "content": user_prompt})
return messages
# =====================================
# IMAGE SERVICE (Async Safe)
# =====================================
async def call_image_microservice(prompt):
try:
return await asyncio.to_thread(
lambda: requests.post(
IMAGE_SPACE_URL,
json={"prompt": prompt},
timeout=60
).json()
)
except Exception:
return {"error": "Image service unavailable"}
# =====================================
# ASYNC LLM WRAPPERS
# =====================================
async def async_llama(messages):
return await asyncio.to_thread(call_llama, messages)
async def async_gemini(messages):
return await asyncio.to_thread(call_gemini, messages)
# =====================================
# MAIN ROUTER
# =====================================
async def route_request(prompt, user_id):
cache_key = f"{user_id}:{prompt}"
# ==========================
# CACHE CHECK
# ==========================
cached = get_cached_response(cache_key)
if cached:
return {"response": cached}
# ==========================
# IMAGE COMMAND
# ==========================
if prompt.startswith("/image"):
clean_prompt = prompt.replace("/image", "").strip()
return await call_image_microservice(clean_prompt)
# ==========================
# RAG QUICK RESPONSE
# ==========================
rag_answer = rag_response(prompt)
if rag_answer:
set_cache(cache_key, rag_answer)
return {"response": rag_answer}
# ==========================
# LOAD MEMORY
# ==========================
memory = load_memory(user_id)
# ==========================
# CLASSIFY
# ==========================
classification = classify_prompt(prompt)
intent = classification.get("intent", "chat")
needs_search = classification.get("needs_search", False)
system_prompt = "You are ZXAI, an advanced AI assistant."
# ==========================
# GREETING FAST PATH
# ==========================
if intent == "greeting":
response = "Hello ๐ I am ZXAI. How can I help you today?"
save_message(user_id, "user", prompt)
save_message(user_id, "assistant", response)
set_cache(cache_key, response)
return {"response": response}
# ==========================
# REASONING โ GEMINI
# ==========================
if intent == "reasoning":
messages = build_messages(system_prompt, memory, prompt)
response = await async_gemini(messages)
save_message(user_id, "user", prompt)
save_message(user_id, "assistant", response)
set_cache(cache_key, response)
return {"response": response}
# ==========================
# LIVE DATA (Parallel LLM)
# ==========================
if intent == "live_data" or needs_search:
web_data = search_web(prompt)
enriched_prompt = f"""
User Question:
{prompt}
Web Data:
{web_data}
Use web data if helpful.
"""
messages = build_messages(system_prompt, memory, enriched_prompt)
llama_task = asyncio.create_task(async_llama(messages))
gemini_task = asyncio.create_task(async_gemini(messages))
llama_answer = await llama_task
gemini_answer = await gemini_task
winner = judge_answers(llama_answer, gemini_answer)
final_answer = gemini_answer if winner == 2 else llama_answer
save_message(user_id, "user", prompt)
save_message(user_id, "assistant", final_answer)
set_cache(cache_key, final_answer)
return {"response": final_answer}
# ==========================
# DEFAULT CHAT โ LLAMA
# ==========================
messages = build_messages(system_prompt, memory, prompt)
response = await async_llama(messages)
save_message(user_id, "user", prompt)
save_message(user_id, "assistant", response)
set_cache(cache_key, response)
return {"response": response}
|