| from typing import Literal |
| from groq import Groq, InternalServerError |
| from openai import OpenAI, InternalServerError as OpenaiInternalServerError |
| from src.settings import settings |
| from dotenv import load_dotenv |
| import os |
|
|
| load_dotenv() |
|
|
| CHAT_MODEL = Literal["llama3-8b-8192", "llama3-70b-8192", "mixtral-8x7b-32768", "gemma-7b-it"] |
| groq_api_key = os.getenv('GROQ_API_KEY') |
| openrouter_api_key = settings.openrouter_api_key |
|
|
| client = Groq( |
| api_key=groq_api_key, |
| ) |
|
|
| openrouter_client = OpenAI( |
| base_url = "https://openrouter.ai/api/v1", |
| api_key=openrouter_api_key |
| ) |
|
|
| def groq_chat( |
| message: str, |
| preamble: str, |
| model: CHAT_MODEL = "mixtral-8x7b-32768", |
| temperature: float = 0.5, |
| max_tokens: int = 1024, |
| top_p: float = 1, |
| stop: str | None = None, |
| stream: bool = False, |
| chat_history: list[dict] | None = None, |
| ) -> dict: |
| """ |
| Sends a chat message to the Groq LLM and returns the response. |
| |
| Args: |
| message (str): The user message to be sent to the LLM. |
| preamble (str): The system message that sets the behavior of the assistant. |
| model (str, optional): The language model which will generate the completion. Defaults to "mixtral-8x7b-32768". |
| temperature (float, optional): Controls randomness. Defaults to 0.5. |
| max_tokens (int, optional): The maximum number of tokens to generate. Defaults to 1024. |
| top_p (float, optional): Controls diversity via nucleus sampling. Defaults to 1. |
| stop (str | None, optional): A stop sequence to signal the LLM to stop generating content. Defaults to None. |
| stream (bool, optional): If set, partial message deltas will be sent. Defaults to False. |
| chat_history (list[dict] | None, optional): The chat history to be used for the conversation. Defaults to None. |
| |
| Returns: |
| dict: The response from the LLM. |
| """ |
| |
| messages = [] |
| messages.append({ |
| "role": "system", |
| "content": preamble |
| }) |
| if chat_history: |
| messages.extend(chat_history) |
| messages.append({ |
| "role": "user", |
| "content": message |
| }) |
|
|
| |
| try: |
| chat_completion = client.chat.completions.create( |
| messages=messages, |
| model=model, |
| temperature=temperature, |
| max_tokens=max_tokens, |
| top_p=top_p, |
| stop=stop, |
| stream=stream, |
| ) |
|
|
| |
| return chat_completion |
| except InternalServerError: |
| return{"error": "Groq server is currently unavailable. Please try again later."} |
|
|
| def openrouter_chat( |
| message: str, |
| preamble: str, |
| model: CHAT_MODEL = "mistralai/mixtral-8x7b-instruct", |
| temperature: float = 0.5, |
| max_tokens: int = 1024, |
| top_p: float = 1, |
| stop: str | None = None, |
| stream: bool = False, |
| chat_history: list[dict] | None = None |
| ) -> dict: |
| messages = [] |
| messages.append({ |
| "role": "system", |
| "content": preamble |
| }) |
| if chat_history: |
| messages.extend(chat_history) |
| messages.append({ |
| "role": "user", |
| "content": message |
| }) |
| |
| try: |
| chat_completion = openrouter_client.chat.completions.create( |
| messages=messages, |
| model=model, |
| temperature=temperature, |
| max_tokens=max_tokens, |
| top_p=top_p, |
| stop=stop, |
| stream=stream, |
| ) |
|
|
| |
| return chat_completion |
| except OpenaiInternalServerError: |
| return{"error": "Groq server is currently unavailable. Please try again later."} |
|
|
|
|
| |
| |
| |
| |
| |
| |