Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,8 +12,12 @@ API_KEY = os.getenv("GROQ_API_KEY")
|
|
| 12 |
# In-memory chat history storage
|
| 13 |
chat_history: List[Dict[str, str]] = []
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
def groq_with_memory(message: str, topic: str = "general") -> tuple:
|
| 16 |
-
"""Groq API call with
|
| 17 |
|
| 18 |
if not API_KEY:
|
| 19 |
return "β No API Key found", ""
|
|
@@ -35,14 +39,30 @@ def groq_with_memory(message: str, topic: str = "general") -> tuple:
|
|
| 35 |
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M")
|
| 36 |
})
|
| 37 |
|
| 38 |
-
#
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
payload = {
|
| 43 |
"model": "gemma2-9b-it",
|
| 44 |
"messages": messages,
|
| 45 |
-
"max_tokens":
|
| 46 |
"temperature": 0.7
|
| 47 |
}
|
| 48 |
|
|
@@ -266,15 +286,28 @@ with gr.Blocks(
|
|
| 266 |
if not chat_history:
|
| 267 |
return "β No conversation context yet"
|
| 268 |
|
| 269 |
-
|
| 270 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
|
| 272 |
-
for msg in
|
| 273 |
role_icon = "π€" if msg["role"] == "user" else "π€"
|
| 274 |
context_text += f"{role_icon} **{msg['timestamp']}** [{msg.get('topic', 'general')}]\n"
|
| 275 |
-
context_text += f"{msg['content'][:
|
| 276 |
|
| 277 |
-
context_text += f"\nπ‘ *AI
|
|
|
|
| 278 |
return context_text
|
| 279 |
|
| 280 |
def refresh_topic_choices():
|
|
|
|
| 12 |
# In-memory chat history storage
|
| 13 |
chat_history: List[Dict[str, str]] = []
|
| 14 |
|
| 15 |
+
def count_tokens_rough(text: str) -> int:
|
| 16 |
+
"""Rough token estimation (1 token β 4 chars)"""
|
| 17 |
+
return len(text) // 4
|
| 18 |
+
|
| 19 |
def groq_with_memory(message: str, topic: str = "general") -> tuple:
|
| 20 |
+
"""Groq API call with smart context management"""
|
| 21 |
|
| 22 |
if not API_KEY:
|
| 23 |
return "β No API Key found", ""
|
|
|
|
| 39 |
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M")
|
| 40 |
})
|
| 41 |
|
| 42 |
+
# Smart context management - keep within ~6000 tokens (24k chars)
|
| 43 |
+
messages = []
|
| 44 |
+
total_chars = 0
|
| 45 |
+
max_chars = 24000 # Safe limit for gemma2-9b-it
|
| 46 |
+
|
| 47 |
+
# Add messages from most recent, working backwards
|
| 48 |
+
for msg in reversed(chat_history):
|
| 49 |
+
msg_content = f"{msg['content']}"
|
| 50 |
+
msg_chars = len(msg_content)
|
| 51 |
+
|
| 52 |
+
if total_chars + msg_chars < max_chars:
|
| 53 |
+
messages.insert(0, {"role": msg["role"], "content": msg["content"]})
|
| 54 |
+
total_chars += msg_chars
|
| 55 |
+
else:
|
| 56 |
+
break
|
| 57 |
+
|
| 58 |
+
# Ensure we have at least the current message
|
| 59 |
+
if not messages:
|
| 60 |
+
messages = [{"role": "user", "content": message.strip()}]
|
| 61 |
|
| 62 |
payload = {
|
| 63 |
"model": "gemma2-9b-it",
|
| 64 |
"messages": messages,
|
| 65 |
+
"max_tokens": 2000, # Conservative for safety
|
| 66 |
"temperature": 0.7
|
| 67 |
}
|
| 68 |
|
|
|
|
| 286 |
if not chat_history:
|
| 287 |
return "β No conversation context yet"
|
| 288 |
|
| 289 |
+
# Show actual context that will be sent to AI
|
| 290 |
+
messages = []
|
| 291 |
+
total_chars = 0
|
| 292 |
+
max_chars = 24000
|
| 293 |
+
|
| 294 |
+
for msg in reversed(chat_history):
|
| 295 |
+
msg_chars = len(msg['content'])
|
| 296 |
+
if total_chars + msg_chars < max_chars:
|
| 297 |
+
messages.insert(0, msg)
|
| 298 |
+
total_chars += msg_chars
|
| 299 |
+
else:
|
| 300 |
+
break
|
| 301 |
+
|
| 302 |
+
context_text = f"π§ **Current AI Context** ({len(messages)} messages, ~{total_chars:,} chars)\n\n"
|
| 303 |
|
| 304 |
+
for msg in messages:
|
| 305 |
role_icon = "π€" if msg["role"] == "user" else "π€"
|
| 306 |
context_text += f"{role_icon} **{msg['timestamp']}** [{msg.get('topic', 'general')}]\n"
|
| 307 |
+
context_text += f"{msg['content'][:150]}{'...' if len(msg['content']) > 150 else ''}\n\n"
|
| 308 |
|
| 309 |
+
context_text += f"\nπ‘ *AI remembers {len(messages)} messages (~{total_chars:,} characters)*"
|
| 310 |
+
context_text += f"\nπ’ *Context limit: {max_chars:,} characters*"
|
| 311 |
return context_text
|
| 312 |
|
| 313 |
def refresh_topic_choices():
|