# hf_llm.py from huggingface_hub import InferenceClient import os # You can change the default model here: DEFAULT_MODEL = "mistralai/Mistral-7B-Instruct-v0.2" # Load token from environment variable for security HF_API_TOKEN = os.getenv("HF_API_TOKEN", None) # Client setup client = InferenceClient( model=DEFAULT_MODEL, token=HF_API_TOKEN ) def generate_with_hf(prompt: str, max_new_tokens: int = 256, temperature: float = 0.7) -> str: """ Generate chat-style responses using Hugging Face text generation models. Args: prompt (str): The instruction or user query. max_new_tokens (int): Length of output. temperature (float): Controls creativity. Returns: str: Model response. """ response = client.text_generation( prompt, max_new_tokens=max_new_tokens, temperature=temperature ) # HF returns raw text return response