Spaces:
Runtime error
Runtime error
| from openai import OpenAI | |
| from config import get_api_keys | |
| import logging | |
| import tiktoken | |
| import time | |
| import asyncio | |
| logger = logging.getLogger(__name__) | |
| api_keys = get_api_keys() | |
| or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1") | |
| # Token encoding | |
| encoding = tiktoken.encoding_for_model("gpt-3.5-turbo") | |
| def limit_tokens(input_string, token_limit=6000): | |
| return encoding.decode(encoding.encode(input_string)[:token_limit]) | |
| def calculate_tokens(msgs): | |
| return sum(len(encoding.encode(str(m))) for m in msgs) | |
| # In-memory storage for conversations | |
| conversations = {} | |
| last_activity = {} | |
| async def clear_inactive_conversations(): | |
| while True: | |
| current_time = time.time() | |
| inactive_convos = [conv_id for conv_id, last_time in last_activity.items() | |
| if current_time - last_time > 3600*24] # 24 hour | |
| for conv_id in inactive_convos: | |
| if conv_id in conversations: | |
| del conversations[conv_id] | |
| if conv_id in last_activity: | |
| del last_activity[conv_id] | |
| logger.info(f"Cleared {len(inactive_convos)} inactive conversations") | |
| await asyncio.sleep(600) # Check every hour | |
| def chat_with_llama_stream(messages, model="openai/gpt-4o-mini", max_llm_history=4, max_output_tokens=2500): | |
| logger.info(f"Starting chat with model: {model}") | |
| while calculate_tokens(messages) > (8000 - max_output_tokens): | |
| if len(messages) > max_llm_history: | |
| messages = [messages[0]] + messages[-max_llm_history:] | |
| else: | |
| max_llm_history -= 1 | |
| if max_llm_history < 2: | |
| error_message = "Token limit exceeded. Please shorten your input or start a new conversation." | |
| logger.error(error_message) | |
| raise Exception(error_message) | |
| try: | |
| response = or_client.chat.completions.create( | |
| model=model, | |
| messages=messages, | |
| max_tokens=max_output_tokens, | |
| stream=True | |
| ) | |
| full_response = "" | |
| for chunk in response: | |
| if chunk.choices[0].delta.content is not None: | |
| content = chunk.choices[0].delta.content | |
| full_response += content | |
| yield content | |
| # After streaming, add the full response to the conversation history | |
| messages.append({"role": "assistant", "content": full_response}) | |
| logger.info("Chat completed successfully") | |
| except Exception as e: | |
| logger.error(f"Error in model response: {str(e)}") | |
| raise Exception(f"Error in model response: {str(e)}") |