MindSync_AI / hf_llm.py
Gagan0141's picture
Create hf_llm.py
1286b78 verified
raw
history blame contribute delete
926 Bytes
# hf_llm.py
from huggingface_hub import InferenceClient
import os
# You can change the default model here:
DEFAULT_MODEL = "mistralai/Mistral-7B-Instruct-v0.2"
# Load token from environment variable for security
HF_API_TOKEN = os.getenv("HF_API_TOKEN", None)
# Client setup
client = InferenceClient(
model=DEFAULT_MODEL,
token=HF_API_TOKEN
)
def generate_with_hf(prompt: str, max_new_tokens: int = 256, temperature: float = 0.7) -> str:
"""
Generate chat-style responses using Hugging Face text generation models.
Args:
prompt (str): The instruction or user query.
max_new_tokens (int): Length of output.
temperature (float): Controls creativity.
Returns:
str: Model response.
"""
response = client.text_generation(
prompt,
max_new_tokens=max_new_tokens,
temperature=temperature
)
# HF returns raw text
return response