File size: 2,330 Bytes
b5e0c74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from dataclasses import dataclass

from openai import OpenAI

from app.core.config import settings
from app.core.models import SearchResult


@dataclass(frozen=True)
class ChatAnswer:
    answer: str
    reasoning: str | None
    context: list[SearchResult]


class NvidiaChatClient:
    def __init__(self):
        if not settings.NVIDIA_API_KEY:
            raise ValueError("NVIDIA_API_KEY is required for NVIDIA chat completions.")

        self.client = OpenAI(
            base_url=settings.NVIDIA_API_URL,
            api_key=settings.NVIDIA_API_KEY,
        )

    def answer_with_context(self, question: str, context: list[SearchResult]) -> ChatAnswer:
        context_text = "\n\n".join(
            [
                (
                    f"[{index}] title={item.title}\n"
                    f"source={item.source}\n"
                    f"score={item.score:.4f}\n"
                    f"text={item.text}"
                )
                for index, item in enumerate(context, start=1)
            ]
        )
        messages = [
            {
                "role": "system",
                "content": (
                    "You are KnowledgeHub's retrieval assistant. Answer only from the "
                    "provided context. If the context is insufficient, say what is missing. "
                    "Cite sources using bracket numbers like [1], [2]."
                ),
            },
            {
                "role": "user",
                "content": f"Question:\n{question}\n\nRetrieved context:\n{context_text}",
            },
        ]
        completion = self.client.chat.completions.create(
            model=settings.NVIDIA_CHAT_MODEL,
            messages=messages,
            temperature=settings.CHAT_TEMPERATURE,
            top_p=settings.CHAT_TOP_P,
            max_tokens=settings.CHAT_MAX_TOKENS,
            frequency_penalty=0,
            presence_penalty=0,
            stream=False,
            extra_body={
                "min_thinking_tokens": settings.MIN_THINKING_TOKENS,
                "max_thinking_tokens": settings.MAX_THINKING_TOKENS,
            },
        )
        message = completion.choices[0].message
        reasoning = getattr(message, "reasoning_content", None)
        return ChatAnswer(answer=message.content or "", reasoning=reasoning, context=context)