Spaces:
Runtime error
Runtime error
Update llm_utils.py
Browse files- llm_utils.py +31 -1
llm_utils.py
CHANGED
|
@@ -1,12 +1,42 @@
|
|
| 1 |
from openai import OpenAI
|
| 2 |
from config import get_api_keys
|
| 3 |
import logging
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
logger = logging.getLogger(__name__)
|
| 6 |
-
|
| 7 |
api_keys = get_api_keys()
|
| 8 |
or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1")
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
def chat_with_llama_stream(messages, model="openai/gpt-4o-mini", max_llm_history=4, max_output_tokens=2500):
|
| 11 |
logger.info(f"Starting chat with model: {model}")
|
| 12 |
while calculate_tokens(messages) > (8000 - max_output_tokens):
|
|
|
|
| 1 |
from openai import OpenAI
|
| 2 |
from config import get_api_keys
|
| 3 |
import logging
|
| 4 |
+
import tiktoken
|
| 5 |
+
import time
|
| 6 |
+
import asyncio
|
| 7 |
|
| 8 |
logger = logging.getLogger(__name__)
|
|
|
|
| 9 |
api_keys = get_api_keys()
|
| 10 |
or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1")
|
| 11 |
|
| 12 |
+
|
| 13 |
+
# Token encoding
|
| 14 |
+
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
|
| 15 |
+
|
| 16 |
+
def limit_tokens(input_string, token_limit=6000):
|
| 17 |
+
return encoding.decode(encoding.encode(input_string)[:token_limit])
|
| 18 |
+
|
| 19 |
+
def calculate_tokens(msgs):
|
| 20 |
+
return sum(len(encoding.encode(str(m))) for m in msgs)
|
| 21 |
+
|
| 22 |
+
# In-memory storage for conversations
|
| 23 |
+
conversations = {}
|
| 24 |
+
last_activity = {}
|
| 25 |
+
|
| 26 |
+
async def clear_inactive_conversations():
|
| 27 |
+
while True:
|
| 28 |
+
current_time = time.time()
|
| 29 |
+
inactive_convos = [conv_id for conv_id, last_time in last_activity.items()
|
| 30 |
+
if current_time - last_time > 3600*24] # 24 hour
|
| 31 |
+
for conv_id in inactive_convos:
|
| 32 |
+
if conv_id in conversations:
|
| 33 |
+
del conversations[conv_id]
|
| 34 |
+
if conv_id in last_activity:
|
| 35 |
+
del last_activity[conv_id]
|
| 36 |
+
logger.info(f"Cleared {len(inactive_convos)} inactive conversations")
|
| 37 |
+
await asyncio.sleep(600) # Check every hour
|
| 38 |
+
|
| 39 |
+
|
| 40 |
def chat_with_llama_stream(messages, model="openai/gpt-4o-mini", max_llm_history=4, max_output_tokens=2500):
|
| 41 |
logger.info(f"Starting chat with model: {model}")
|
| 42 |
while calculate_tokens(messages) > (8000 - max_output_tokens):
|