Spaces:
Sleeping
Sleeping
| # hf_llm.py | |
| from huggingface_hub import InferenceClient | |
| import os | |
| # You can change the default model here: | |
| DEFAULT_MODEL = "mistralai/Mistral-7B-Instruct-v0.2" | |
| # Load token from environment variable for security | |
| HF_API_TOKEN = os.getenv("HF_API_TOKEN", None) | |
| # Client setup | |
| client = InferenceClient( | |
| model=DEFAULT_MODEL, | |
| token=HF_API_TOKEN | |
| ) | |
| def generate_with_hf(prompt: str, max_new_tokens: int = 256, temperature: float = 0.7) -> str: | |
| """ | |
| Generate chat-style responses using Hugging Face text generation models. | |
| Args: | |
| prompt (str): The instruction or user query. | |
| max_new_tokens (int): Length of output. | |
| temperature (float): Controls creativity. | |
| Returns: | |
| str: Model response. | |
| """ | |
| response = client.text_generation( | |
| prompt, | |
| max_new_tokens=max_new_tokens, | |
| temperature=temperature | |
| ) | |
| # HF returns raw text | |
| return response | |