import asyncio import textwrap import openai from halo import Halo gpt_costs_per_thousand_out = { "gpt-3.5-turbo-16k": 0.004, "gpt-4-32k": 0.12, } gpt_costs_per_thousand_in = { "gpt-3.5-turbo-16k": 0.003, "gpt-4-32k": 0.06, } def estimate_costs(prompt_tokens, model: str): costs = (prompt_tokens / 1000) * gpt_costs_per_thousand_in[model] return costs async def chatbot(conversation, model, temperature=0): max_retry = 7 retry = 0 while True: try: response = await openai.ChatCompletion.acreate( model=model, messages=conversation, temperature=temperature ) text = response["choices"][0]["message"]["content"] return text, response["usage"] except Exception as oops: print(f'\n\nError communicating with OpenAI: "{oops}"') if "maximum context length" in str(oops): a = conversation.pop(0) print("\n\n DEBUG: Trimming oldest message") continue retry += 1 if retry >= max_retry: print(f"\n\nExiting due to excessive errors in API: {oops}") exit(1) print(f"\n\nRetrying in {2 ** (retry - 1) * 5} seconds...") await asyncio.sleep(2 ** (retry - 1) * 5) def chat_print(text): formatted_lines = [ textwrap.fill(line, width=120, initial_indent=" ", subsequent_indent=" ") for line in text.split("\n") ] formatted_text = "\n".join(formatted_lines) print("\n\n\nCHATBOT:\n\n%s" % formatted_text)