MohitGupta41 commited on
Commit
e4544a7
·
1 Parent(s): 95bb394

Final Commit

Browse files
Dockerfile CHANGED
@@ -1,7 +1,3 @@
1
- # --------------------------
2
- # Hugging Face Space (Docker)
3
- # CPU-friendly default base
4
- # --------------------------
5
  FROM python:3.12-slim
6
 
7
  # System deps (add build-essential + common runtime libs for OpenCV/ONNX)
 
 
 
 
 
1
  FROM python:3.12-slim
2
 
3
  # System deps (add build-essential + common runtime libs for OpenCV/ONNX)
app/deps.py CHANGED
@@ -40,14 +40,9 @@ def get_hf_token(
40
  return None
41
 
42
  def build_agent_with_token(hf_token: Optional[str]) -> SQLAgent:
43
- """
44
- Returns the shared SQLAgent but (temporarily) sets the token when present.
45
- Avoids re-instantiating clients for every request.
46
- """
47
- if hf_token:
48
- _sqlgen.set_token(hf_token)
49
- _ansllm.set_token(hf_token)
50
- else:
51
- _sqlgen.set_token(settings.HF_TOKEN)
52
- _ansllm.set_token(settings.HF_TOKEN)
53
  return _sql_agent
 
40
  return None
41
 
42
  def build_agent_with_token(hf_token: Optional[str]) -> SQLAgent:
43
+ tok = hf_token or settings.HF_TOKEN
44
+ if not tok:
45
+ raise RuntimeError("HF token missing: pass X-HF-Token or set HF_TOKEN")
46
+ _sqlgen.set_token(tok)
47
+ _ansllm.set_token(tok)
 
 
 
 
 
48
  return _sql_agent
app/main.py CHANGED
@@ -199,4 +199,38 @@ async def query(req: QueryReq, hf_token: str | None = Depends(get_hf_token)):
199
  # uncertainty=0.15
200
  )
201
  except Exception as e:
202
- raise HTTPException(status_code=400, detail=f"Query failed: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  # uncertainty=0.15
200
  )
201
  except Exception as e:
202
+ raise HTTPException(status_code=400, detail=f"Query failed: {e}")
203
+
204
+ import requests
205
+
206
+ @app.get("/llm/health")
207
+ def llm_health(hf_token: str | None = Depends(get_hf_token)):
208
+ tok = hf_token or settings.HF_TOKEN
209
+ if not tok:
210
+ return {"status": 400, "ok": False, "body": "HF token missing"}
211
+
212
+ try:
213
+ r = requests.post(
214
+ "https://router.huggingface.co/v1/chat/completions",
215
+ headers={
216
+ "Authorization": f"Bearer {tok}",
217
+ "Accept": "application/json",
218
+ "Accept-Encoding": "identity", # <---
219
+ },
220
+ json={
221
+ "model": settings.LLM_MODEL_ID,
222
+ "messages": [{"role":"user","content":[{"type":"text","text":"ping"}]}],
223
+ "max_tokens": 1,
224
+ "stream": False
225
+ },
226
+ timeout=20
227
+ )
228
+ ok = r.ok
229
+ # If OK, don't read text (keeps it light)
230
+ body = "ok" if ok else (r.text[:200] if r.text else str(r.content[:200]))
231
+ return {"status": r.status_code, "ok": ok, "body": body, "ce": r.headers.get("content-encoding")}
232
+ except requests.exceptions.ContentDecodingError as e:
233
+ # Rare gzip mismatch – report clearly
234
+ return {"status": 502, "ok": False, "body": f"gzip decode error: {e.__class__.__name__}"}
235
+ except Exception as e:
236
+ return {"status": 500, "ok": False, "body": f"{type(e).__name__}: {e}"}
app/tools/llm_answer.py CHANGED
@@ -3,7 +3,7 @@ from __future__ import annotations
3
  from typing import Optional, Dict, Any, List
4
  import requests, json
5
 
6
- HF_CHAT_URL = "https://router.huggingface.co/featherless-ai/v1/chat/completions"
7
 
8
  SYSTEM_PROMPT = """You are a BI copilot.
9
  - NEVER invent numbers; only summarize from provided table rows.
@@ -44,7 +44,10 @@ class AnswerLLM:
44
  "temperature": self.temperature,
45
  "max_tokens": self.max_tokens,
46
  }
47
- headers = {"Authorization": f"Bearer {self.token}"}
 
 
 
48
  r = requests.post(HF_CHAT_URL, headers=headers, json=payload, timeout=self.timeout)
49
  r.raise_for_status()
50
  return r.json()["choices"][0]["message"]["content"]
 
3
  from typing import Optional, Dict, Any, List
4
  import requests, json
5
 
6
+ HF_CHAT_URL = "https://router.huggingface.co/v1/chat/completions"
7
 
8
  SYSTEM_PROMPT = """You are a BI copilot.
9
  - NEVER invent numbers; only summarize from provided table rows.
 
44
  "temperature": self.temperature,
45
  "max_tokens": self.max_tokens,
46
  }
47
+ headers = {"Authorization": f"Bearer {self.token}",
48
+ "Accept": "application/json",
49
+ "Accept-Encoding": "identity",
50
+ }
51
  r = requests.post(HF_CHAT_URL, headers=headers, json=payload, timeout=self.timeout)
52
  r.raise_for_status()
53
  return r.json()["choices"][0]["message"]["content"]
app/tools/llm_sqlgen.py CHANGED
@@ -3,7 +3,7 @@ from __future__ import annotations
3
  from typing import Optional, Dict, Any
4
  import requests, json
5
 
6
- HF_CHAT_URL = "https://router.huggingface.co/featherless-ai/v1/chat/completions"
7
 
8
  SCHEMA_SPEC = """
9
  Tables and columns (SQLite):
@@ -24,6 +24,10 @@ Rules:
24
  - For monthly rollups use strftime('%Y-%m', day).
25
  - Join to dim_product when you need category/name/price.
26
  - For per-employee metrics use fact_sales_detail (employee_id may be NULL for Online).
 
 
 
 
27
  """
28
 
29
  FEW_SHOTS = [
@@ -93,7 +97,10 @@ class SQLGenTool:
93
  "temperature": self.temperature,
94
  "max_tokens": self.max_tokens,
95
  }
96
- headers = {"Authorization": f"Bearer {self.token}"}
 
 
 
97
  r = requests.post(HF_CHAT_URL, headers=headers, json=payload, timeout=self.timeout)
98
  r.raise_for_status()
99
  content = r.json()["choices"][0]["message"]["content"].strip()
 
3
  from typing import Optional, Dict, Any
4
  import requests, json
5
 
6
+ HF_CHAT_URL = "https://router.huggingface.co/v1/chat/completions"
7
 
8
  SCHEMA_SPEC = """
9
  Tables and columns (SQLite):
 
24
  - For monthly rollups use strftime('%Y-%m', day).
25
  - Join to dim_product when you need category/name/price.
26
  - For per-employee metrics use fact_sales_detail (employee_id may be NULL for Online).
27
+ - Always generate the SQL Queries in English
28
+ for example.
29
+ "q": रमेश का टोटल जेनरेटेड रेवेन्यू बताओ
30
+ "sql": SELECT SUM(d.revenue) AS total_revenue FROM fact_sales_detail d JOIN dim_employee e ON e.emp_id = d.employee_id WHERE e.name LIKE 'Ramesh %'
31
  """
32
 
33
  FEW_SHOTS = [
 
97
  "temperature": self.temperature,
98
  "max_tokens": self.max_tokens,
99
  }
100
+ headers = {"Authorization": f"Bearer {self.token}",
101
+ "Accept": "application/json",
102
+ "Accept-Encoding": "identity"
103
+ }
104
  r = requests.post(HF_CHAT_URL, headers=headers, json=payload, timeout=self.timeout)
105
  r.raise_for_status()
106
  content = r.json()["choices"][0]["message"]["content"].strip()
requirements.txt CHANGED
@@ -5,7 +5,6 @@ pydantic-settings
5
  numpy==1.26.4
6
  faiss-cpu
7
  insightface==0.7.3
8
- # onnxruntime
9
  onnxruntime==1.17.3
10
  opencv-python==4.10.0.84
11
  python-multipart
 
5
  numpy==1.26.4
6
  faiss-cpu
7
  insightface==0.7.3
 
8
  onnxruntime==1.17.3
9
  opencv-python==4.10.0.84
10
  python-multipart