Spaces:

Inframat-x
/

ML-Chatbot

Sleeping

App Files Files Community

Inframat-x commited on Nov 2, 2025

Commit

65df9cc

verified ·

1 Parent(s): 2287ebf

Update app.py

Browse files

Files changed (1) hide show

app.py +158 -26

app.py CHANGED Viewed

@@ -5,6 +5,7 @@
 # - Predictor: safe model caching + safe feature alignment
 # - Stable categoricals ("NA"); no over-strict completeness gate
 # - Fixed [[PAGE=...]] regex
 # ================================================================
 # ---------------------- Runtime flags (HF-safe) ----------------------
@@ -14,7 +15,7 @@ os.environ["TRANSFORMERS_NO_FLAX"] = "1"
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 # ------------------------------- Imports ------------------------------
-import re, joblib, warnings, json, traceback
 from pathlib import Path
 from typing import List, Dict, Any
@@ -548,9 +549,27 @@ def compose_extractive(selected: List[Dict[str, Any]]) -> str:
         return ""
     return " ".join(f"{s['sent']} ({s['doc']}, p.{s['page']})" for s in selected)
-def synthesize_with_llm(question: str, sentence_lines: List[str], model: str = None, temperature: float = 0.2) -> str:
-    if not LLM_AVAILABLE:
         return None
     client = OpenAI(api_key=OPENAI_API_KEY)
     model = model or OPENAI_MODEL
     SYSTEM_PROMPT = (
@@ -573,9 +592,22 @@ def synthesize_with_llm(question: str, sentence_lines: List[str], model: str = N
             ],
             temperature=temperature,
         )
-        return getattr(resp, "output_text", None) or str(resp)
     except Exception:
-        return None
 def rag_reply(
     question: str,
@@ -590,41 +622,141 @@ def rag_reply(
     w_bm25: float  = W_BM25_DEFAULT,
     w_emb: float   = W_EMB_DEFAULT
 ) -> str:
     hits = hybrid_search(question, k=k, w_tfidf=w_tfidf, w_bm25=w_bm25, w_emb=w_emb)
-    if hits is None or hits.empty:
-        return "No indexed PDFs found. Upload PDFs to the 'papers/' folder and reload the Space."
     selected = mmr_select_sentences(question, hits, top_n=int(n_sentences), pool_per_chunk=6, lambda_div=0.7)
     header_cites = "; ".join(f"{Path(r['doc_path']).name} (p.{_extract_page(r['text'])})" for _, r in hits.head(6).iterrows())
     srcs = {Path(r['doc_path']).name for _, r in hits.iterrows()}
     coverage_note = "" if len(srcs) >= 3 else f"\n\n> Note: Only {len(srcs)} unique source(s) contributed. Add more PDFs or increase Top-K."
     if strict_quotes_only:
         if not selected:
-            return f"**Quoted Passages:**\n\n---\n" + "\n\n".join(hits['text'].tolist()[:2]) + f"\n\n**Citations:** {header_cites}{coverage_note}"
-        msg = "**Quoted Passages:**\n- " + "\n- ".join(f"{s['sent']} ({s['doc']}, p.{s['page']})" for s in selected)
-        msg += f"\n\n**Citations:** {header_cites}{coverage_note}"
-        if include_passages:
-            msg += "\n\n---\n" + "\n\n".join(hits['text'].tolist()[:2])
-        return msg
     extractive = compose_extractive(selected)
     if use_llm and selected:
         lines = [f"{s['sent']} ({s['doc']}, p.{s['page']})" for s in selected]
-        llm_text = synthesize_with_llm(question, lines, model=model, temperature=temperature)
         if llm_text:
-            msg = f"**Answer (LLM synthesis):** {llm_text}\n\n**Citations:** {header_cites}{coverage_note}"
             if include_passages:
-                msg += "\n\n---\n" + "\n\n".join(hits['text'].tolist()[:2])
-            return msg
-    if not extractive:
-        return f"**Answer:** Here are relevant passages.\n\n**Citations:** {header_cites}{coverage_note}\n\n---\n" + "\n\n".join(hits['text'].tolist()[:2])
-    msg = f"**Answer:** {extractive}\n\n**Citations:** {header_cites}{coverage_note}"
-    if include_passages:
-        msg += "\n\n---\n" + "\n\n".join(hits['text'].tolist()[:2])
-    return msg
 def rag_chat_fn(message, history, top_k, n_sentences, include_passages,
                 use_llm, model_name, temperature, strict_quotes_only,

 # - Predictor: safe model caching + safe feature alignment
 # - Stable categoricals ("NA"); no over-strict completeness gate
 # - Fixed [[PAGE=...]] regex
+# - NEW: Lightweight instrumentation (JSONL logs per RAG turn)
 # ================================================================
 # ---------------------- Runtime flags (HF-safe) ----------------------
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 # ------------------------------- Imports ------------------------------
+import re, joblib, warnings, json, traceback, time, uuid
 from pathlib import Path
 from typing import List, Dict, Any
         return ""
     return " ".join(f"{s['sent']} ({s['doc']}, p.{s['page']})" for s in selected)
+# ========================= NEW: Instrumentation helpers =========================
+LOG_PATH = ARTIFACT_DIR / "rag_logs.jsonl"
+OPENAI_IN_COST_PER_1K  = float(os.getenv("OPENAI_COST_IN_PER_1K", "0"))
+OPENAI_OUT_COST_PER_1K = float(os.getenv("OPENAI_COST_OUT_PER_1K", "0"))
+def _safe_write_jsonl(path: Path, record: dict):
+    try:
+        with open(path, "a", encoding="utf-8") as f:
+            f.write(json.dumps(record, ensure_ascii=False) + "\n")
+    except Exception as e:
+        print("[Log] write failed:", e)
+def _calc_cost_usd(prompt_toks, completion_toks):
+    if prompt_toks is None or completion_toks is None:
         return None
+    return (prompt_toks / 1000.0) * OPENAI_IN_COST_PER_1K + (completion_toks / 1000.0) * OPENAI_OUT_COST_PER_1K
+# ----------------- Modified to return (text, usage_dict) -----------------
+def synthesize_with_llm(question: str, sentence_lines: List[str], model: str = None, temperature: float = 0.2):
+    if not LLM_AVAILABLE:
+        return None, None
     client = OpenAI(api_key=OPENAI_API_KEY)
     model = model or OPENAI_MODEL
     SYSTEM_PROMPT = (
             ],
             temperature=temperature,
         )
+        # Try to extract text
+        out_text = getattr(resp, "output_text", None) or str(resp)
+        # Try to extract usage (prompt_tokens, completion_tokens)
+        usage = None
+        try:
+            u = getattr(resp, "usage", None)
+            if u:
+                # Newer SDKs: resp.usage has attributes or dict-like
+                pt = getattr(u, "prompt_tokens", None) if hasattr(u, "prompt_tokens") else u.get("prompt_tokens", None)
+                ct = getattr(u, "completion_tokens", None) if hasattr(u, "completion_tokens") else u.get("completion_tokens", None)
+                usage = {"prompt_tokens": pt, "completion_tokens": ct}
+        except Exception:
+            usage = None
+        return out_text, usage
     except Exception:
+        return None, None
 def rag_reply(
     question: str,
     w_bm25: float  = W_BM25_DEFAULT,
     w_emb: float   = W_EMB_DEFAULT
 ) -> str:
+    run_id = str(uuid.uuid4())
+    t0_total = time.time()
+    t0_retr  = time.time()
+    # --- Retrieval ---
     hits = hybrid_search(question, k=k, w_tfidf=w_tfidf, w_bm25=w_bm25, w_emb=w_emb)
+    t1_retr = time.time()
+    latency_ms_retriever = int((t1_retr - t0_retr) * 1000)
+    if hits is None or hits.empty:
+        final = "No indexed PDFs found. Upload PDFs to the 'papers/' folder and reload the Space."
+        # Minimal log on miss
+        record = {
+            "run_id": run_id,
+            "ts": int(time.time()*1000),
+            "inputs": {
+                "question": question, "top_k": int(k), "n_sentences": int(n_sentences),
+                "w_tfidf": float(w_tfidf), "w_bm25": float(w_bm25), "w_emb": float(w_emb),
+                "use_llm": bool(use_llm), "model": model, "temperature": float(temperature)
+            },
+            "retrieval": {"hits": [], "latency_ms_retriever": latency_ms_retriever},
+            "output": {"final_answer": final, "used_sentences": []},
+            "latency_ms_total": int((time.time()-t0_total)*1000),
+            "openai": None
+        }
+        _safe_write_jsonl(LOG_PATH, record)
+        return final
+    # Select sentences
     selected = mmr_select_sentences(question, hits, top_n=int(n_sentences), pool_per_chunk=6, lambda_div=0.7)
     header_cites = "; ".join(f"{Path(r['doc_path']).name} (p.{_extract_page(r['text'])})" for _, r in hits.head(6).iterrows())
     srcs = {Path(r['doc_path']).name for _, r in hits.iterrows()}
     coverage_note = "" if len(srcs) >= 3 else f"\n\n> Note: Only {len(srcs)} unique source(s) contributed. Add more PDFs or increase Top-K."
+    # Prepare retrieval list for logging
+    retr_list = []
+    for _, r in hits.iterrows():
+        retr_list.append({
+            "doc": Path(r["doc_path"]).name,
+            "page": _extract_page(r["text"]),
+            "score_tfidf": float(r.get("score_tfidf", 0.0)),
+            "score_bm25": float(r.get("score_bm25", 0.0)),
+            "score_dense": float(r.get("score_dense", 0.0)),
+            "combo_score": float(r.get("score", 0.0)),
+        })
+    # Strict quotes only (no LLM)
     if strict_quotes_only:
         if not selected:
+            final = f"**Quoted Passages:**\n\n---\n" + "\n\n".join(hits['text'].tolist()[:2]) + f"\n\n**Citations:** {header_cites}{coverage_note}"
+        else:
+            final = "**Quoted Passages:**\n- " + "\n- ".join(f"{s['sent']} ({s['doc']}, p.{s['page']})" for s in selected)
+            final += f"\n\n**Citations:** {header_cites}{coverage_note}"
+            if include_passages:
+                final += "\n\n---\n" + "\n\n".join(hits['text'].tolist()[:2])
+        record = {
+            "run_id": run_id,
+            "ts": int(time.time()*1000),
+            "inputs": {
+                "question": question, "top_k": int(k), "n_sentences": int(n_sentences),
+                "w_tfidf": float(w_tfidf), "w_bm25": float(w_bm25), "w_emb": float(w_emb),
+                "use_llm": False, "model": None, "temperature": float(temperature)
+            },
+            "retrieval": {"hits": retr_list, "latency_ms_retriever": latency_ms_retriever},
+            "output": {
+                "final_answer": final,
+                "used_sentences": [{"sent": s["sent"], "doc": s["doc"], "page": s["page"]} for s in selected]
+            },
+            "latency_ms_total": int((time.time()-t0_total)*1000),
+            "openai": None
+        }
+        _safe_write_jsonl(LOG_PATH, record)
+        return final
+    # Extractive or LLM synthesis
     extractive = compose_extractive(selected)
+    llm_usage = None
+    llm_latency_ms = None
     if use_llm and selected:
         lines = [f"{s['sent']} ({s['doc']}, p.{s['page']})" for s in selected]
+        t0_llm = time.time()
+        llm_text, llm_usage = synthesize_with_llm(question, lines, model=model, temperature=temperature)
+        t1_llm = time.time()
+        llm_latency_ms = int((t1_llm - t0_llm) * 1000)
         if llm_text:
+            final = f"**Answer (LLM synthesis):** {llm_text}\n\n**Citations:** {header_cites}{coverage_note}"
             if include_passages:
+                final += "\n\n---\n" + "\n\n".join(hits['text'].tolist()[:2])
+        else:
+            # fall back to extractive
+            if not extractive:
+                final = f"**Answer:** Here are relevant passages.\n\n**Citations:** {header_cites}{coverage_note}\n\n---\n" + "\n\n".join(hits['text'].tolist()[:2])
+            else:
+                final = f"**Answer:** {extractive}\n\n**Citations:** {header_cites}{coverage_note}"
+                if include_passages:
+                    final += "\n\n---\n" + "\n\n".join(hits['text'].tolist()[:2])
+    else:
+        if not extractive:
+            final = f"**Answer:** Here are relevant passages.\n\n**Citations:** {header_cites}{coverage_note}\n\n---\n" + "\n\n".join(hits['text'].tolist()[:2])
+        else:
+            final = f"**Answer:** {extractive}\n\n**Citations:** {header_cites}{coverage_note}"
+            if include_passages:
+                final += "\n\n---\n" + "\n\n".join(hits['text'].tolist()[:2])
+    # --------- Log full run ---------
+    prompt_toks = llm_usage.get("prompt_tokens") if llm_usage else None
+    completion_toks = llm_usage.get("completion_tokens") if llm_usage else None
+    cost_usd = _calc_cost_usd(prompt_toks, completion_toks)
+    total_ms = int((time.time() - t0_total) * 1000)
+    record = {
+        "run_id": run_id,
+        "ts": int(time.time()*1000),
+        "inputs": {
+            "question": question, "top_k": int(k), "n_sentences": int(n_sentences),
+            "w_tfidf": float(w_tfidf), "w_bm25": float(w_bm25), "w_emb": float(w_emb),
+            "use_llm": bool(use_llm), "model": model, "temperature": float(temperature)
+        },
+        "retrieval": {"hits": retr_list, "latency_ms_retriever": latency_ms_retriever},
+        "output": {
+            "final_answer": final,
+            "used_sentences": [{"sent": s["sent"], "doc": s["doc"], "page": s["page"]} for s in selected]
+        },
+        "latency_ms_total": total_ms,
+        "latency_ms_llm": llm_latency_ms,
+        "openai": {
+            "prompt_tokens": prompt_toks,
+            "completion_tokens": completion_toks,
+            "cost_usd": cost_usd
+        } if use_llm else None
+    }
+    _safe_write_jsonl(LOG_PATH, record)
+    return final
 def rag_chat_fn(message, history, top_k, n_sentences, include_passages,
                 use_llm, model_name, temperature, strict_quotes_only,