Spaces:

MohitG012
/

BI_Assistant_Backend

Sleeping

BI_Assistant_Backend / app /tools /llm_answer.py

MohitGupta41

Final Commit

e4544a7 6 months ago

2.32 kB

	# app/tools/llm_answer.py
	from __future__ import annotations
	from typing import Optional, Dict, Any, List
	import requests, json

	HF_CHAT_URL = "https://router.huggingface.co/v1/chat/completions"

	SYSTEM_PROMPT = """You are a BI copilot.
	- NEVER invent numbers; only summarize from provided table rows.
	- Use 3-letter region codes (NCR, BLR, MUM, HYD, CHN, PUN).
	- Write one concise paragraph and up to 2 brief bullets with clear takeaways.
	- If you donot get any response then just say that donot invent anything new.
	"""

	class AnswerLLM:
	def __init__(self, model_id: str, token: Optional[str], temperature: float = 0.2, max_tokens: int = 300, timeout: int = 60):
	self.model_id = model_id
	self.token = token
	self.temperature = temperature
	self.max_tokens = max_tokens
	self.timeout = timeout
	self.enabled = bool(token and model_id)

	def set_token(self, token: Optional[str]) -> None:
	self.token = token
	self.enabled = bool(token and self.model_id)

	def generate(self, question: str, sql: str, columns: List[str], rows: List[list]) -> str:
	if not self.enabled:
	# deterministic fallback
	return f"Rows: {len(rows)} \| Columns: {columns[:4]}..."
	# keep rows small in prompt; if big, sample top-N
	preview = rows if len(rows) <= 50 else rows[:50]
	table_json = json.dumps({"columns": columns, "rows": preview}, ensure_ascii=False)
	payload = {
	"model": self.model_id,
	"stream": False,
	"messages": [
	{"role":"system", "content":[{"type":"text","text":SYSTEM_PROMPT}]},
	{"role":"user", "content":[
	{"type":"text","text": f"Question: {question}\nSQL used:\n{sql}\n\nHere are the rows (JSON):\n{table_json}\n\nAnswer:"}
	]},
	],
	"temperature": self.temperature,
	"max_tokens": self.max_tokens,
	}
	headers = {"Authorization": f"Bearer {self.token}",
	"Accept": "application/json",
	"Accept-Encoding": "identity",
	}
	r = requests.post(HF_CHAT_URL, headers=headers, json=payload, timeout=self.timeout)
	r.raise_for_status()
	return r.json()["choices"][0]["message"]["content"]