specialized-agents-api

Runtime error

pvanand commited on Sep 13, 2024

Commit

be1349b

verified ·

1 Parent(s): 71e7504

Update llm_utils.py

Files changed (1) hide show

llm_utils.py CHANGED Viewed

@@ -1,12 +1,42 @@
 from openai import OpenAI
 from config import get_api_keys
 import logging
 logger = logging.getLogger(__name__)
 api_keys = get_api_keys()
 or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1")
 def chat_with_llama_stream(messages, model="openai/gpt-4o-mini", max_llm_history=4, max_output_tokens=2500):
     logger.info(f"Starting chat with model: {model}")
     while calculate_tokens(messages) > (8000 - max_output_tokens):

 from openai import OpenAI
 from config import get_api_keys
 import logging
+import tiktoken
+import time
+import asyncio
 logger = logging.getLogger(__name__)
 api_keys = get_api_keys()
 or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1")
+# Token encoding
+encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
+def limit_tokens(input_string, token_limit=6000):
+    return encoding.decode(encoding.encode(input_string)[:token_limit])
+def calculate_tokens(msgs):
+    return sum(len(encoding.encode(str(m))) for m in msgs)
+# In-memory storage for conversations
+conversations = {}
+last_activity = {}
+async def clear_inactive_conversations():
+    while True:
+        current_time = time.time()
+        inactive_convos = [conv_id for conv_id, last_time in last_activity.items()
+                           if current_time - last_time > 3600*24]  # 24 hour
+        for conv_id in inactive_convos:
+            if conv_id in conversations:
+                del conversations[conv_id]
+            if conv_id in last_activity:
+                del last_activity[conv_id]
+        logger.info(f"Cleared {len(inactive_convos)} inactive conversations")
+        await asyncio.sleep(600)  # Check every hour
 def chat_with_llama_stream(messages, model="openai/gpt-4o-mini", max_llm_history=4, max_output_tokens=2500):
     logger.info(f"Starting chat with model: {model}")
     while calculate_tokens(messages) > (8000 - max_output_tokens):