bldemo / components /LLM.py
Druso's picture
Update components/LLM.py
7b14ec6
import openai
import time
import tiktoken
from components.configs import LLMtemp, gptmodel
#Call to Openai Chat Model
def chatopenai(finalprompt):
response = openai.ChatCompletion.create(
model=gptmodel,
messages=finalprompt,
temperature = LLMtemp,
)
# API_response = response should I want to store the full API returned content...
assistant_response = response.choices[0].message['content']
usedtokens = response["usage"]["total_tokens"]
return assistant_response, usedtokens
#Call to Openai Chat Model 3 times, if it fails will return default messages back
def chatopenairetry(finalprompt, max_retries=3, retry_delay=1):
default_assistant_response = "Sorry, I'm having issue with OpenAI API, it happens... could you retry?"
default_used_tokens = 0
for _ in range(max_retries):
try:
response = openai.ChatCompletion.create(
model=gptmodel,
messages=finalprompt,
temperature = LLMtemp,
)
assistant_response = response.choices[0].message['content']
used_tokens = response["usage"]["total_tokens"]
return assistant_response, used_tokens
except Exception as e:
print(f"Encountered an error: {e}")
print("Retrying the API call...")
time.sleep(retry_delay)
# If all retries fail, return dummy values
print("Maximum retries reached. Unable to get a response from OpenAI.")
return default_assistant_response, default_used_tokens
# Call to Openai with the stream of content
def chatopenaistream(finalprompt, callback, LLMtemp=LLMtemp):
assistant_response = ""
for chunk in openai.ChatCompletion.create(
model=gptmodel,
messages=finalprompt,
temperature = LLMtemp,
stream=True,
):
content = chunk["choices"][0].get("delta", {}).get("content")
if content is not None:
assistant_response += content
yield callback(content) # Yield the result of the callback function
#token calculator to be run when using the streaming. Will calculate an estimate after the stream ended
def calculatetokens(finalprompt,assistant_response):
encoding = tiktoken.encoding_for_model(gptmodel)
usedtokens = len(encoding.encode(assistant_response + f"{finalprompt}"))
usedtokens=0
return usedtokens