Oleksii Obolonskyi commited on
Commit
52f5ee4
·
1 Parent(s): cb42fd4

Add LLM router and surface model errors

Browse files
Files changed (1) hide show
  1. app.py +57 -7
app.py CHANGED
@@ -41,6 +41,9 @@ HF_API_URL = os.environ.get("RAG_HF_API_URL", "").strip()
41
  if not HF_API_URL:
42
  HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}"
43
 
 
 
 
44
  REPO_OWNER = "16bitSega"
45
  REPO_NAME = "RAG_project"
46
 
@@ -497,9 +500,13 @@ def answer_question(
497
  + format_rules
498
  + f"\nQuestion:\n{question}\n\nContext:\n{context}\n\nAnswer:"
499
  )
500
- answer, err = hf_chat(prompt)
501
- if err or not answer:
502
- return "Not found in dataset.", citations, False
 
 
 
 
503
  return sanitize_answer(answer), citations, True
504
 
505
  def build_hf_prompt(user_prompt: str, model_id: str) -> str:
@@ -544,6 +551,42 @@ def hf_chat(prompt: str, timeout: Tuple[int, int] = (10, 600)) -> Tuple[str, Opt
544
  except Exception as e:
545
  return "", str(e)
546
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
547
  def github_create_issue(title: str, body: str, labels: Optional[List[str]] = None) -> Tuple[Optional[int], Optional[str]]:
548
  if not GITHUB_TOKEN:
549
  return None, "Missing GITHUB_TOKEN"
@@ -747,8 +790,10 @@ def run_enhance(question: str, enhanced_key: str):
747
  if ok:
748
  st.session_state[enhanced_key] = {"answer": answer, "citations": citations, "not_found": False}
749
  else:
750
- st.session_state[enhanced_key] = {"answer": "Not found in dataset.", "citations": [], "not_found": True}
751
- st.session_state["ticket_prefill"] = {"question": question, "citations": citations}
 
 
752
  st.session_state["enhancing_key"] = None
753
 
754
  def run_regen():
@@ -756,10 +801,15 @@ def run_regen():
756
  "Generate exactly 3 concise user questions about MCP and AI agents orchestration. "
757
  "Return each question on its own line without extra text."
758
  )
759
- text, err = hf_chat(gen_prompt)
760
- if err or not text:
 
761
  st.warning(f"LLM request failed: {err}")
762
  return
 
 
 
 
763
  qs = parse_generated_questions(text)
764
  if len(qs) == 3:
765
  st.session_state["article_questions"] = qs
 
41
  if not HF_API_URL:
42
  HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}"
43
 
44
+ OLLAMA_BASE_URL = os.environ.get("RAG_OLLAMA_URL", "http://localhost:11434").rstrip("/")
45
+ OLLAMA_MODEL = os.environ.get("RAG_OLLAMA_MODEL", "llama3.2:1b")
46
+
47
  REPO_OWNER = "16bitSega"
48
  REPO_NAME = "RAG_project"
49
 
 
500
  + format_rules
501
  + f"\nQuestion:\n{question}\n\nContext:\n{context}\n\nAnswer:"
502
  )
503
+ answer, err = llm_chat(prompt)
504
+ if err:
505
+ st.error(err)
506
+ return f"Model error: {err}", citations, False
507
+ if not answer:
508
+ st.error("Empty response from model")
509
+ return "Model error: Empty response from model", citations, False
510
  return sanitize_answer(answer), citations, True
511
 
512
  def build_hf_prompt(user_prompt: str, model_id: str) -> str:
 
551
  except Exception as e:
552
  return "", str(e)
553
 
554
+ def ollama_chat(prompt: str, timeout: Tuple[int, int] = (10, 600)) -> Tuple[str, Optional[str]]:
555
+ url = f"{OLLAMA_BASE_URL}/api/chat"
556
+ payload = {
557
+ "model": OLLAMA_MODEL,
558
+ "messages": [
559
+ {"role": "system", "content": f"You are an assistant for {COMPANY_NAME}. Contact: {COMPANY_EMAIL}, {COMPANY_PHONE}. {COMPANY_ABOUT}. Answer only from the provided context. Keep answers concise. Cite sources using the provided citation tags exactly."},
560
+ {"role": "user", "content": prompt},
561
+ ],
562
+ "stream": False,
563
+ "options": {"temperature": 0.2},
564
+ }
565
+ try:
566
+ r = requests.post(url, json=payload, timeout=timeout)
567
+ r.raise_for_status()
568
+ data = r.json()
569
+ msg = (data.get("message") or {}).get("content") or ""
570
+ return msg.strip(), None
571
+ except Exception as e:
572
+ return "", str(e)
573
+
574
+ def llm_chat(prompt: str, timeout: Tuple[int, int] = (10, 600)) -> Tuple[str, Optional[str]]:
575
+ """
576
+ Routes generation to HF if configured; otherwise falls back to Ollama.
577
+ Prefer explicit env var if you want:
578
+ RAG_LLM_BACKEND=hf or RAG_LLM_BACKEND=ollama
579
+ """
580
+ backend = (os.environ.get("RAG_LLM_BACKEND", "") or "").strip().lower()
581
+
582
+ if backend == "hf":
583
+ return hf_chat(prompt, timeout=timeout)
584
+ if backend == "ollama":
585
+ return ollama_chat(prompt, timeout=timeout)
586
+ if (HF_TOKEN or "").strip():
587
+ return hf_chat(prompt, timeout=timeout)
588
+ return ollama_chat(prompt, timeout=timeout)
589
+
590
  def github_create_issue(title: str, body: str, labels: Optional[List[str]] = None) -> Tuple[Optional[int], Optional[str]]:
591
  if not GITHUB_TOKEN:
592
  return None, "Missing GITHUB_TOKEN"
 
790
  if ok:
791
  st.session_state[enhanced_key] = {"answer": answer, "citations": citations, "not_found": False}
792
  else:
793
+ not_found = answer.strip() == "Not found in dataset."
794
+ st.session_state[enhanced_key] = {"answer": answer, "citations": citations, "not_found": not_found}
795
+ if not_found:
796
+ st.session_state["ticket_prefill"] = {"question": question, "citations": citations}
797
  st.session_state["enhancing_key"] = None
798
 
799
  def run_regen():
 
801
  "Generate exactly 3 concise user questions about MCP and AI agents orchestration. "
802
  "Return each question on its own line without extra text."
803
  )
804
+ text, err = llm_chat(gen_prompt)
805
+ if err:
806
+ st.error(err)
807
  st.warning(f"LLM request failed: {err}")
808
  return
809
+ if not text:
810
+ st.error("Empty response from model")
811
+ st.warning("LLM request failed: empty response")
812
+ return
813
  qs = parse_generated_questions(text)
814
  if len(qs) == 3:
815
  st.session_state["article_questions"] = qs