Spaces:
Sleeping
Sleeping
| from transformers import pipeline | |
| from config import LLM_MODEL, HUGGINGFACE_API_KEY | |
| # HuggingFace LLM (stable decoding for production) | |
| generator = pipeline( | |
| "text2text-generation", | |
| model=LLM_MODEL, | |
| token=HUGGINGFACE_API_KEY | |
| ) | |
| def refine_answer(prompt: str) -> str: | |
| """ | |
| Stable generation: | |
| - Prevents repetition loops (OPTIONS OPTIONS bug) | |
| - Deterministic output for SOP / RAG answers | |
| """ | |
| response = generator( | |
| prompt, | |
| max_new_tokens=200, | |
| do_sample=False, # 🔑 turn OFF sampling | |
| temperature=0.0, # deterministic | |
| repetition_penalty=1.2, # penalize repetition | |
| no_repeat_ngram_size=3 # block repeating phrases | |
| )[0]["generated_text"] | |
| return response.strip() | |