Spaces:

16bitSega
/

Agentic_RAG

Sleeping

App Files Files Community

Oleksii Obolonskyi commited on Jan 29

Commit

52f5ee4

1 Parent(s): cb42fd4

Add LLM router and surface model errors

Browse files

Files changed (1) hide show

app.py +57 -7

app.py CHANGED Viewed

@@ -41,6 +41,9 @@ HF_API_URL = os.environ.get("RAG_HF_API_URL", "").strip()
 if not HF_API_URL:
     HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}"
 REPO_OWNER = "16bitSega"
 REPO_NAME = "RAG_project"
@@ -497,9 +500,13 @@ def answer_question(
         + format_rules
         + f"\nQuestion:\n{question}\n\nContext:\n{context}\n\nAnswer:"
     )
-    answer, err = hf_chat(prompt)
-    if err or not answer:
-        return "Not found in dataset.", citations, False
     return sanitize_answer(answer), citations, True
 def build_hf_prompt(user_prompt: str, model_id: str) -> str:
@@ -544,6 +551,42 @@ def hf_chat(prompt: str, timeout: Tuple[int, int] = (10, 600)) -> Tuple[str, Opt
     except Exception as e:
         return "", str(e)
 def github_create_issue(title: str, body: str, labels: Optional[List[str]] = None) -> Tuple[Optional[int], Optional[str]]:
     if not GITHUB_TOKEN:
         return None, "Missing GITHUB_TOKEN"
@@ -747,8 +790,10 @@ def run_enhance(question: str, enhanced_key: str):
     if ok:
         st.session_state[enhanced_key] = {"answer": answer, "citations": citations, "not_found": False}
     else:
-        st.session_state[enhanced_key] = {"answer": "Not found in dataset.", "citations": [], "not_found": True}
-        st.session_state["ticket_prefill"] = {"question": question, "citations": citations}
     st.session_state["enhancing_key"] = None
 def run_regen():
@@ -756,10 +801,15 @@ def run_regen():
         "Generate exactly 3 concise user questions about MCP and AI agents orchestration. "
         "Return each question on its own line without extra text."
     )
-    text, err = hf_chat(gen_prompt)
-    if err or not text:
         st.warning(f"LLM request failed: {err}")
         return
     qs = parse_generated_questions(text)
     if len(qs) == 3:
         st.session_state["article_questions"] = qs

 if not HF_API_URL:
     HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}"
+OLLAMA_BASE_URL = os.environ.get("RAG_OLLAMA_URL", "http://localhost:11434").rstrip("/")
+OLLAMA_MODEL = os.environ.get("RAG_OLLAMA_MODEL", "llama3.2:1b")
 REPO_OWNER = "16bitSega"
 REPO_NAME = "RAG_project"
         + format_rules
         + f"\nQuestion:\n{question}\n\nContext:\n{context}\n\nAnswer:"
     )
+    answer, err = llm_chat(prompt)
+    if err:
+        st.error(err)
+        return f"Model error: {err}", citations, False
+    if not answer:
+        st.error("Empty response from model")
+        return "Model error: Empty response from model", citations, False
     return sanitize_answer(answer), citations, True
 def build_hf_prompt(user_prompt: str, model_id: str) -> str:
     except Exception as e:
         return "", str(e)
+def ollama_chat(prompt: str, timeout: Tuple[int, int] = (10, 600)) -> Tuple[str, Optional[str]]:
+    url = f"{OLLAMA_BASE_URL}/api/chat"
+    payload = {
+        "model": OLLAMA_MODEL,
+        "messages": [
+            {"role": "system", "content": f"You are an assistant for {COMPANY_NAME}. Contact: {COMPANY_EMAIL}, {COMPANY_PHONE}. {COMPANY_ABOUT}. Answer only from the provided context. Keep answers concise. Cite sources using the provided citation tags exactly."},
+            {"role": "user", "content": prompt},
+        ],
+        "stream": False,
+        "options": {"temperature": 0.2},
+    }
+    try:
+        r = requests.post(url, json=payload, timeout=timeout)
+        r.raise_for_status()
+        data = r.json()
+        msg = (data.get("message") or {}).get("content") or ""
+        return msg.strip(), None
+    except Exception as e:
+        return "", str(e)
+def llm_chat(prompt: str, timeout: Tuple[int, int] = (10, 600)) -> Tuple[str, Optional[str]]:
+    """
+    Routes generation to HF if configured; otherwise falls back to Ollama.
+    Prefer explicit env var if you want:
+      RAG_LLM_BACKEND=hf  or  RAG_LLM_BACKEND=ollama
+    """
+    backend = (os.environ.get("RAG_LLM_BACKEND", "") or "").strip().lower()
+    if backend == "hf":
+        return hf_chat(prompt, timeout=timeout)
+    if backend == "ollama":
+        return ollama_chat(prompt, timeout=timeout)
+    if (HF_TOKEN or "").strip():
+        return hf_chat(prompt, timeout=timeout)
+    return ollama_chat(prompt, timeout=timeout)
 def github_create_issue(title: str, body: str, labels: Optional[List[str]] = None) -> Tuple[Optional[int], Optional[str]]:
     if not GITHUB_TOKEN:
         return None, "Missing GITHUB_TOKEN"
     if ok:
         st.session_state[enhanced_key] = {"answer": answer, "citations": citations, "not_found": False}
     else:
+        not_found = answer.strip() == "Not found in dataset."
+        st.session_state[enhanced_key] = {"answer": answer, "citations": citations, "not_found": not_found}
+        if not_found:
+            st.session_state["ticket_prefill"] = {"question": question, "citations": citations}
     st.session_state["enhancing_key"] = None
 def run_regen():
         "Generate exactly 3 concise user questions about MCP and AI agents orchestration. "
         "Return each question on its own line without extra text."
     )
+    text, err = llm_chat(gen_prompt)
+    if err:
+        st.error(err)
         st.warning(f"LLM request failed: {err}")
         return
+    if not text:
+        st.error("Empty response from model")
+        st.warning("LLM request failed: empty response")
+        return
     qs = parse_generated_questions(text)
     if len(qs) == 3:
         st.session_state["article_questions"] = qs