pvanand commited on
Commit
be1349b
·
verified ·
1 Parent(s): 71e7504

Update llm_utils.py

Browse files
Files changed (1) hide show
  1. llm_utils.py +31 -1
llm_utils.py CHANGED
@@ -1,12 +1,42 @@
1
  from openai import OpenAI
2
  from config import get_api_keys
3
  import logging
 
 
 
4
 
5
  logger = logging.getLogger(__name__)
6
-
7
  api_keys = get_api_keys()
8
  or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1")
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def chat_with_llama_stream(messages, model="openai/gpt-4o-mini", max_llm_history=4, max_output_tokens=2500):
11
  logger.info(f"Starting chat with model: {model}")
12
  while calculate_tokens(messages) > (8000 - max_output_tokens):
 
1
  from openai import OpenAI
2
  from config import get_api_keys
3
  import logging
4
+ import tiktoken
5
+ import time
6
+ import asyncio
7
 
8
  logger = logging.getLogger(__name__)
 
9
  api_keys = get_api_keys()
10
  or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1")
11
 
12
+
13
+ # Token encoding
14
+ encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
15
+
16
+ def limit_tokens(input_string, token_limit=6000):
17
+ return encoding.decode(encoding.encode(input_string)[:token_limit])
18
+
19
+ def calculate_tokens(msgs):
20
+ return sum(len(encoding.encode(str(m))) for m in msgs)
21
+
22
+ # In-memory storage for conversations
23
+ conversations = {}
24
+ last_activity = {}
25
+
26
+ async def clear_inactive_conversations():
27
+ while True:
28
+ current_time = time.time()
29
+ inactive_convos = [conv_id for conv_id, last_time in last_activity.items()
30
+ if current_time - last_time > 3600*24] # 24 hour
31
+ for conv_id in inactive_convos:
32
+ if conv_id in conversations:
33
+ del conversations[conv_id]
34
+ if conv_id in last_activity:
35
+ del last_activity[conv_id]
36
+ logger.info(f"Cleared {len(inactive_convos)} inactive conversations")
37
+ await asyncio.sleep(600) # Check every hour
38
+
39
+
40
  def chat_with_llama_stream(messages, model="openai/gpt-4o-mini", max_llm_history=4, max_output_tokens=2500):
41
  logger.info(f"Starting chat with model: {model}")
42
  while calculate_tokens(messages) > (8000 - max_output_tokens):