Spaces:
Sleeping
Sleeping
Oleksii Obolonskyi commited on
Commit ·
52f5ee4
1
Parent(s): cb42fd4
Add LLM router and surface model errors
Browse files
app.py
CHANGED
|
@@ -41,6 +41,9 @@ HF_API_URL = os.environ.get("RAG_HF_API_URL", "").strip()
|
|
| 41 |
if not HF_API_URL:
|
| 42 |
HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}"
|
| 43 |
|
|
|
|
|
|
|
|
|
|
| 44 |
REPO_OWNER = "16bitSega"
|
| 45 |
REPO_NAME = "RAG_project"
|
| 46 |
|
|
@@ -497,9 +500,13 @@ def answer_question(
|
|
| 497 |
+ format_rules
|
| 498 |
+ f"\nQuestion:\n{question}\n\nContext:\n{context}\n\nAnswer:"
|
| 499 |
)
|
| 500 |
-
answer, err =
|
| 501 |
-
if err
|
| 502 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 503 |
return sanitize_answer(answer), citations, True
|
| 504 |
|
| 505 |
def build_hf_prompt(user_prompt: str, model_id: str) -> str:
|
|
@@ -544,6 +551,42 @@ def hf_chat(prompt: str, timeout: Tuple[int, int] = (10, 600)) -> Tuple[str, Opt
|
|
| 544 |
except Exception as e:
|
| 545 |
return "", str(e)
|
| 546 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 547 |
def github_create_issue(title: str, body: str, labels: Optional[List[str]] = None) -> Tuple[Optional[int], Optional[str]]:
|
| 548 |
if not GITHUB_TOKEN:
|
| 549 |
return None, "Missing GITHUB_TOKEN"
|
|
@@ -747,8 +790,10 @@ def run_enhance(question: str, enhanced_key: str):
|
|
| 747 |
if ok:
|
| 748 |
st.session_state[enhanced_key] = {"answer": answer, "citations": citations, "not_found": False}
|
| 749 |
else:
|
| 750 |
-
|
| 751 |
-
st.session_state[
|
|
|
|
|
|
|
| 752 |
st.session_state["enhancing_key"] = None
|
| 753 |
|
| 754 |
def run_regen():
|
|
@@ -756,10 +801,15 @@ def run_regen():
|
|
| 756 |
"Generate exactly 3 concise user questions about MCP and AI agents orchestration. "
|
| 757 |
"Return each question on its own line without extra text."
|
| 758 |
)
|
| 759 |
-
text, err =
|
| 760 |
-
if err
|
|
|
|
| 761 |
st.warning(f"LLM request failed: {err}")
|
| 762 |
return
|
|
|
|
|
|
|
|
|
|
|
|
|
| 763 |
qs = parse_generated_questions(text)
|
| 764 |
if len(qs) == 3:
|
| 765 |
st.session_state["article_questions"] = qs
|
|
|
|
| 41 |
if not HF_API_URL:
|
| 42 |
HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}"
|
| 43 |
|
| 44 |
+
OLLAMA_BASE_URL = os.environ.get("RAG_OLLAMA_URL", "http://localhost:11434").rstrip("/")
|
| 45 |
+
OLLAMA_MODEL = os.environ.get("RAG_OLLAMA_MODEL", "llama3.2:1b")
|
| 46 |
+
|
| 47 |
REPO_OWNER = "16bitSega"
|
| 48 |
REPO_NAME = "RAG_project"
|
| 49 |
|
|
|
|
| 500 |
+ format_rules
|
| 501 |
+ f"\nQuestion:\n{question}\n\nContext:\n{context}\n\nAnswer:"
|
| 502 |
)
|
| 503 |
+
answer, err = llm_chat(prompt)
|
| 504 |
+
if err:
|
| 505 |
+
st.error(err)
|
| 506 |
+
return f"Model error: {err}", citations, False
|
| 507 |
+
if not answer:
|
| 508 |
+
st.error("Empty response from model")
|
| 509 |
+
return "Model error: Empty response from model", citations, False
|
| 510 |
return sanitize_answer(answer), citations, True
|
| 511 |
|
| 512 |
def build_hf_prompt(user_prompt: str, model_id: str) -> str:
|
|
|
|
| 551 |
except Exception as e:
|
| 552 |
return "", str(e)
|
| 553 |
|
| 554 |
+
def ollama_chat(prompt: str, timeout: Tuple[int, int] = (10, 600)) -> Tuple[str, Optional[str]]:
|
| 555 |
+
url = f"{OLLAMA_BASE_URL}/api/chat"
|
| 556 |
+
payload = {
|
| 557 |
+
"model": OLLAMA_MODEL,
|
| 558 |
+
"messages": [
|
| 559 |
+
{"role": "system", "content": f"You are an assistant for {COMPANY_NAME}. Contact: {COMPANY_EMAIL}, {COMPANY_PHONE}. {COMPANY_ABOUT}. Answer only from the provided context. Keep answers concise. Cite sources using the provided citation tags exactly."},
|
| 560 |
+
{"role": "user", "content": prompt},
|
| 561 |
+
],
|
| 562 |
+
"stream": False,
|
| 563 |
+
"options": {"temperature": 0.2},
|
| 564 |
+
}
|
| 565 |
+
try:
|
| 566 |
+
r = requests.post(url, json=payload, timeout=timeout)
|
| 567 |
+
r.raise_for_status()
|
| 568 |
+
data = r.json()
|
| 569 |
+
msg = (data.get("message") or {}).get("content") or ""
|
| 570 |
+
return msg.strip(), None
|
| 571 |
+
except Exception as e:
|
| 572 |
+
return "", str(e)
|
| 573 |
+
|
| 574 |
+
def llm_chat(prompt: str, timeout: Tuple[int, int] = (10, 600)) -> Tuple[str, Optional[str]]:
|
| 575 |
+
"""
|
| 576 |
+
Routes generation to HF if configured; otherwise falls back to Ollama.
|
| 577 |
+
Prefer explicit env var if you want:
|
| 578 |
+
RAG_LLM_BACKEND=hf or RAG_LLM_BACKEND=ollama
|
| 579 |
+
"""
|
| 580 |
+
backend = (os.environ.get("RAG_LLM_BACKEND", "") or "").strip().lower()
|
| 581 |
+
|
| 582 |
+
if backend == "hf":
|
| 583 |
+
return hf_chat(prompt, timeout=timeout)
|
| 584 |
+
if backend == "ollama":
|
| 585 |
+
return ollama_chat(prompt, timeout=timeout)
|
| 586 |
+
if (HF_TOKEN or "").strip():
|
| 587 |
+
return hf_chat(prompt, timeout=timeout)
|
| 588 |
+
return ollama_chat(prompt, timeout=timeout)
|
| 589 |
+
|
| 590 |
def github_create_issue(title: str, body: str, labels: Optional[List[str]] = None) -> Tuple[Optional[int], Optional[str]]:
|
| 591 |
if not GITHUB_TOKEN:
|
| 592 |
return None, "Missing GITHUB_TOKEN"
|
|
|
|
| 790 |
if ok:
|
| 791 |
st.session_state[enhanced_key] = {"answer": answer, "citations": citations, "not_found": False}
|
| 792 |
else:
|
| 793 |
+
not_found = answer.strip() == "Not found in dataset."
|
| 794 |
+
st.session_state[enhanced_key] = {"answer": answer, "citations": citations, "not_found": not_found}
|
| 795 |
+
if not_found:
|
| 796 |
+
st.session_state["ticket_prefill"] = {"question": question, "citations": citations}
|
| 797 |
st.session_state["enhancing_key"] = None
|
| 798 |
|
| 799 |
def run_regen():
|
|
|
|
| 801 |
"Generate exactly 3 concise user questions about MCP and AI agents orchestration. "
|
| 802 |
"Return each question on its own line without extra text."
|
| 803 |
)
|
| 804 |
+
text, err = llm_chat(gen_prompt)
|
| 805 |
+
if err:
|
| 806 |
+
st.error(err)
|
| 807 |
st.warning(f"LLM request failed: {err}")
|
| 808 |
return
|
| 809 |
+
if not text:
|
| 810 |
+
st.error("Empty response from model")
|
| 811 |
+
st.warning("LLM request failed: empty response")
|
| 812 |
+
return
|
| 813 |
qs = parse_generated_questions(text)
|
| 814 |
if len(qs) == 3:
|
| 815 |
st.session_state["article_questions"] = qs
|