File size: 1,754 Bytes
5637ddb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | import warnings
warnings.filterwarnings(action='ignore')
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
from dotenv import load_dotenv
load_dotenv()
def load_huggingface_model():
"""
Initialize and return a Hugging Face chat model wrapper for RAG-based graphology analysis.
Creates a HuggingFaceEndpoint instance connected to the Qwen2.5-7B-Instruct model
and wraps it with ChatHuggingFace for conversational compatibility with LangChain.
Configuration:
- Model: Qwen/Qwen2.5-7B-Instruct (7B parameter instruction-tuned model)
- Temperature: 0.65 (balanced between creativity and coherence)
- Max new tokens: 1024
- Top-p: 0.92 (nucleus sampling)
- Repetition penalty: 1.05 (light discouragement of repetitions)
Returns
-------
ChatHuggingFace
Configured LangChain-compatible chat model ready to be used in chains
Notes
-----
- Requires HUGGINGFACEHUB_API_TOKEN to be set in environment variables
(loaded via dotenv)
- Uses inference endpoint (cloud-based inference) — no local GPU/CPU loading
- Model is reloaded every time this function is called
- Current settings are optimized for structured, precise graphological analysis
with controlled creativity
- Consider adjusting temperature/max_new_tokens based on response length needs
Raises
------
ValueError
If HUGGINGFACEHUB_API_TOKEN is missing or invalid
"""
chat_llm = HuggingFaceEndpoint(
repo_id="Qwen/Qwen2.5-7B-Instruct",
task="text-generation",
temperature=0.65,
max_new_tokens=1024,
top_p=0.92,
repetition_penalty=1.05
)
model = ChatHuggingFace(llm=chat_llm)
return model |