Spaces:
Running on Zero
Running on Zero
File size: 2,330 Bytes
b5e0c74 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | from dataclasses import dataclass
from openai import OpenAI
from app.core.config import settings
from app.core.models import SearchResult
@dataclass(frozen=True)
class ChatAnswer:
answer: str
reasoning: str | None
context: list[SearchResult]
class NvidiaChatClient:
def __init__(self):
if not settings.NVIDIA_API_KEY:
raise ValueError("NVIDIA_API_KEY is required for NVIDIA chat completions.")
self.client = OpenAI(
base_url=settings.NVIDIA_API_URL,
api_key=settings.NVIDIA_API_KEY,
)
def answer_with_context(self, question: str, context: list[SearchResult]) -> ChatAnswer:
context_text = "\n\n".join(
[
(
f"[{index}] title={item.title}\n"
f"source={item.source}\n"
f"score={item.score:.4f}\n"
f"text={item.text}"
)
for index, item in enumerate(context, start=1)
]
)
messages = [
{
"role": "system",
"content": (
"You are KnowledgeHub's retrieval assistant. Answer only from the "
"provided context. If the context is insufficient, say what is missing. "
"Cite sources using bracket numbers like [1], [2]."
),
},
{
"role": "user",
"content": f"Question:\n{question}\n\nRetrieved context:\n{context_text}",
},
]
completion = self.client.chat.completions.create(
model=settings.NVIDIA_CHAT_MODEL,
messages=messages,
temperature=settings.CHAT_TEMPERATURE,
top_p=settings.CHAT_TOP_P,
max_tokens=settings.CHAT_MAX_TOKENS,
frequency_penalty=0,
presence_penalty=0,
stream=False,
extra_body={
"min_thinking_tokens": settings.MIN_THINKING_TOKENS,
"max_thinking_tokens": settings.MAX_THINKING_TOKENS,
},
)
message = completion.choices[0].message
reasoning = getattr(message, "reasoning_content", None)
return ChatAnswer(answer=message.content or "", reasoning=reasoning, context=context)
|