File size: 772 Bytes
a9601e9
 
 
c086a33
a9601e9
 
 
c086a33
a9601e9
362f479
 
c086a33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from transformers import pipeline
from config import LLM_MODEL, HUGGINGFACE_API_KEY

# HuggingFace LLM (stable decoding for production)
generator = pipeline(
    "text2text-generation",
    model=LLM_MODEL,
    token=HUGGINGFACE_API_KEY
)

def refine_answer(prompt: str) -> str:
    """
    Stable generation:
    - Prevents repetition loops (OPTIONS OPTIONS bug)
    - Deterministic output for SOP / RAG answers
    """
    response = generator(
        prompt,
        max_new_tokens=200,
        do_sample=False,          # 🔑 turn OFF sampling
        temperature=0.0,          # deterministic
        repetition_penalty=1.2,   # penalize repetition
        no_repeat_ngram_size=3    # block repeating phrases
    )[0]["generated_text"]

    return response.strip()