from config import * @st.cache_data(ttl=3600) def make_request(messages, max_tokens=1000, stream=False): client = InferenceClient( MODEL_PATH, token=HUGGING_FACE_API_KEY, ) try: response = client.chat_completion( messages=messages, max_tokens=max_tokens, stream=stream, ) return response.choices[0].message['content'] except Exception as e: st.write(f"An error occurred: {e}") st.stop() def get_token_amount(text, model_name="gpt-4") -> int: """Uses the tiktoken library to check if a text is too long for a given model. Even tough we are using a Llama model, we are using the GPT-4 model ans an approximation. Args: text (str): The text to check. model_name (str): The name of the model to check. Defaults to "gpt-4". Returns: int: The number of tokens in the text. """ encoding = tiktoken.encoding_for_model(model_name) tokens = encoding.encode(text) return len(tokens) @st.cache_data def annotate(topic, value): """ Annotate the text """ return (topic.replace('▁',''), str(value))