Spaces:

ahm1378
/

multimodal-rag-demo

Sleeping

App Files Files Community

amirhossein mohammadpour commited on Sep 16

Commit

6ca41c8

1 Parent(s): 3f6908f

change interface

Browse files

Files changed (1) hide show

app.py +78 -10

app.py CHANGED Viewed

@@ -335,8 +335,14 @@ def llm_generate(prompt: str,
 # ---- MCQ helpers ----
 def build_mcq_prompt(question: str, options: List[str], contexts: List[Dict[str, Any]], lang="fa", max_chars=5000) -> str:
-    sys_fa = "تو یک دستیار پاسخ‌گو هستی که فقط بر اساس متن‌های داده‌شده پاسخ می‌دهی."
-    sys_en = "You are a helpful assistant. Answer only using the retrieved passages."
     system_text = sys_fa if lang == "fa" else sys_en
     parts = []
@@ -352,17 +358,76 @@ def build_mcq_prompt(question: str, options: List[str], contexts: List[Dict[str,
     if lang == "fa":
         user = (
             f"سؤال: {question}\n\nگزینه‌ها:\n{opts_str}\n\nمتون بازیابی‌شده:\n{joined}\n\n"
-            'فقط براساس متون بالا پاسخ بده. دقیقاً در این قالب برگردان:\n{"answer_index": X, "reason": "…"}'
         )
     else:
         user = (
             f"Question: {question}\n\nOptions:\n{opts_str}\n\nRetrieved:\n{joined}\n\n"
-            'Answer strictly based on passages. Return exactly:\n{"answer_index": X, "reason": "..."}'
         )
     msgs = [{"role": "system", "content": system_text},
             {"role": "user", "content": user}]
     return tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
 def parse_mcq_output(text: str, n: int) -> Dict[str, Any]:
     m = re.search(r'{"\s*answer_index"\s*:\s*([0-9]+)\s*,\s*"reason"\s*:\s*"(.*?)"}', text, re.S)
     if m:
@@ -409,17 +474,20 @@ def ui_mcq(question, options_txt, image, topk, max_tokens, temperature, top_p, t
     prompt = build_mcq_prompt(question, opts, ret["contexts"], lang="fa", max_chars=5000)
     out = llm_generate(prompt, max_new_tokens=int(max_tokens),
                        temperature=float(temperature), top_p=float(top_p),
-                       top_k=int(top_k), do_sample=False)
-    parsed = parse_mcq_output(out, len(opts))
     pred = parsed["answer_index"]
     pred_text = (opts[pred] if (pred is not None and 0 <= pred < len(opts)) else "N/A")
     rows = []
     for i, c in enumerate(ret["contexts"], 1):
         snip = c["bio"][:180] + ("…" if len(c["bio"]) > 180 else "")
         rows.append([i, c["id"], round(c["score"], 4), snip])
     result = f"Pred: index={pred}  text={pred_text}\nReason: {parsed['reason']}"
     return result, out, rows, ret["route"]
 with gr.Blocks(title="Multimodal RAG (CPU) • E5 + CLIP Fusion + Qwen 0.5B") as demo:
     gr.Markdown("### Free-tier CPU demo: text RAG (E5) + optional fusion (CLIP) → Qwen 0.5B")
     with gr.Tab("Ask"):
@@ -438,11 +506,10 @@ with gr.Blocks(title="Multimodal RAG (CPU) • E5 + CLIP Fusion + Qwen 0.5B") as
         route = gr.Textbox(label="Route used (text_e5 or fusion)")
         table = gr.Dataframe(headers=["#", "id", "score", "snippet"], interactive=False)
         btn.click(ui_answer, [q, img, topk, max_tokens, temperature, top_p, top_k], [ans, table, route])
     with gr.Tab("MCQ"):
         with gr.Row():
             q_mcq = gr.Textbox(label="Question", lines=3)
-            opts_mcq = gr.Textbox(label="Options (one per line)", lines=6)
         img_mcq = gr.Image(type="pil", label="Optional image (fusion if provided)")
         with gr.Row():
             topk2 = gr.Slider(1, 20, value=5, step=1, label="Top-K retrieve")
@@ -452,8 +519,9 @@ with gr.Blocks(title="Multimodal RAG (CPU) • E5 + CLIP Fusion + Qwen 0.5B") as
             top_p2 = gr.Slider(0.1, 1.0, value=TOP_P_DEFAULT, step=0.05, label="Top-p")
             top_k2 = gr.Slider(1, 100, value=TOP_K_DEFAULT, step=1, label="Top-k")
         btn2 = gr.Button("Answer MCQ")
-        result = gr.Textbox(label="Prediction")
-        raw = gr.Textbox(label="Raw LLM output", lines=6)
         route2 = gr.Textbox(label="Route used")
         table2 = gr.Dataframe(headers=["#", "id", "score", "snippet"], interactive=False)
         btn2.click(ui_mcq, [q_mcq, opts_mcq, img_mcq, topk2, max_tokens2, temperature2, top_p2, top_k2],

 # ---- MCQ helpers ----
 def build_mcq_prompt(question: str, options: List[str], contexts: List[Dict[str, Any]], lang="fa", max_chars=5000) -> str:
+    sys_fa = (
+        "تو یک دستیار پاسخ‌گو هستی که فقط بر اساس متن‌های داده‌شده پاسخ می‌دهی. "
+        "باید دقیقاً فقط یک شیء JSON برگردانی و هیچ متن دیگری ننویسی."
+    )
+    sys_en = (
+        "You are a helpful assistant. Answer ONLY using the retrieved passages. "
+        "You MUST return a single JSON object and nothing else."
+    )
     system_text = sys_fa if lang == "fa" else sys_en
     parts = []
     if lang == "fa":
         user = (
             f"سؤال: {question}\n\nگزینه‌ها:\n{opts_str}\n\nمتون بازیابی‌شده:\n{joined}\n\n"
+            "دقیقاً و فقط یک JSON برگردان. فرمت اجباری: "
+            '{"answer_index": X, "reason": "…"} '
+            "که در آن X اندیس گزینه (۰-بِیس) است. هیچ متن دیگری ننویس."
         )
     else:
         user = (
             f"Question: {question}\n\nOptions:\n{opts_str}\n\nRetrieved:\n{joined}\n\n"
+            'Return EXACTLY one JSON: {"answer_index": X, "reason": "..."} '
+            "where X is the 0-based option index. Do not write anything else."
         )
     msgs = [{"role": "system", "content": system_text},
             {"role": "user", "content": user}]
     return tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
+import json as _json
+import re as _re
+import numpy as _np
+def _strict_json_from_text(text: str):
+    # فقط اولین بلاک {...} را بگیر و JSON-parse کن
+    m = _re.search(r'\{.*\}', text, _re.S)
+    if not m:
+        return None
+    frag = m.group(0)
+    try:
+        obj = _json.loads(frag)
+        return obj
+    except Exception:
+        return None
+def score_options_by_context(options: List[str], contexts: List[Dict[str, Any]]) -> int:
+    """
+    فال‌بک:
+      1) اگر اسم گزینه به‌صورت substring در متون بود → امتیاز خیلی بالا
+      2) وگرنه شباهت embedding با mE5 بین گزینه و کل کانتکست‌ها
+    """
+    text_blob = "\n".join([c.get("bio","") for c in contexts]).lower()
+    # 1) substring hit
+    hits = []
+    for i, opt in enumerate(options):
+        o = normalize_digits_months(str(opt).strip().lower())
+        score = 0
+        if o and (o in text_blob):
+            score += 10_000
+        hits.append((score, i))
+    hits.sort(reverse=True)
+    if hits and hits[0][0] > 0:
+        return hits[0][1]
+    # 2) embedding similarity (mE5)
+    try:
+        q_vecs = [_encode_query_e5(opt) for opt in options]   # (n, dim)
+        ctx_vec = _encode_query_e5(text_blob)                 # (dim,)
+        sims = [float(_np.dot(qv, ctx_vec)) for qv in q_vecs]
+        return int(_np.argmax(sims))
+    except Exception:
+        return 0  # پیش‌فرض محافظه‌کارانه
+def parse_mcq_output_strict(text: str, options: List[str], contexts: List[Dict[str, Any]]) -> Dict[str, Any]:
+    obj = _strict_json_from_text(text)
+    if obj and "answer_index" in obj:
+        idx = obj["answer_index"]
+        if isinstance(idx, int) and 0 <= idx < len(options):
+            reason = str(obj.get("reason", "")).strip() or "—"
+            return {"answer_index": idx, "reason": reason}
+    # اگر JSON درست نبود → فال‌بک
+    idx = score_options_by_context(options, contexts)
+    return {"answer_index": idx, "reason": "fallback_by_context_matching"}
 def parse_mcq_output(text: str, n: int) -> Dict[str, Any]:
     m = re.search(r'{"\s*answer_index"\s*:\s*([0-9]+)\s*,\s*"reason"\s*:\s*"(.*?)"}', text, re.S)
     if m:
     prompt = build_mcq_prompt(question, opts, ret["contexts"], lang="fa", max_chars=5000)
     out = llm_generate(prompt, max_new_tokens=int(max_tokens),
                        temperature=float(temperature), top_p=float(top_p),
+                       top_k=int(top_k), do_sample=False)  # deterministic on CPU
+    parsed = parse_mcq_output_strict(out, opts, ret["contexts"])
     pred = parsed["answer_index"]
     pred_text = (opts[pred] if (pred is not None and 0 <= pred < len(opts)) else "N/A")
     rows = []
     for i, c in enumerate(ret["contexts"], 1):
         snip = c["bio"][:180] + ("…" if len(c["bio"]) > 180 else "")
         rows.append([i, c["id"], round(c["score"], 4), snip])
     result = f"Pred: index={pred}  text={pred_text}\nReason: {parsed['reason']}"
     return result, out, rows, ret["route"]
 with gr.Blocks(title="Multimodal RAG (CPU) • E5 + CLIP Fusion + Qwen 0.5B") as demo:
     gr.Markdown("### Free-tier CPU demo: text RAG (E5) + optional fusion (CLIP) → Qwen 0.5B")
     with gr.Tab("Ask"):
         route = gr.Textbox(label="Route used (text_e5 or fusion)")
         table = gr.Dataframe(headers=["#", "id", "score", "snippet"], interactive=False)
         btn.click(ui_answer, [q, img, topk, max_tokens, temperature, top_p, top_k], [ans, table, route])
     with gr.Tab("MCQ"):
         with gr.Row():
             q_mcq = gr.Textbox(label="Question", lines=3)
+            opts_mcq = gr.Textbox(label="Options (one per line)", lines=8)
         img_mcq = gr.Image(type="pil", label="Optional image (fusion if provided)")
         with gr.Row():
             topk2 = gr.Slider(1, 20, value=5, step=1, label="Top-K retrieve")
             top_p2 = gr.Slider(0.1, 1.0, value=TOP_P_DEFAULT, step=0.05, label="Top-p")
             top_k2 = gr.Slider(1, 100, value=TOP_K_DEFAULT, step=1, label="Top-k")
         btn2 = gr.Button("Answer MCQ")
+        # 👇 باکس‌ها بزرگ‌تر
+        result = gr.Textbox(label="Prediction", lines=12, max_lines=20)
+        raw = gr.Textbox(label="Raw LLM output", lines=12, max_lines=20)
         route2 = gr.Textbox(label="Route used")
         table2 = gr.Dataframe(headers=["#", "id", "score", "snippet"], interactive=False)
         btn2.click(ui_mcq, [q_mcq, opts_mcq, img_mcq, topk2, max_tokens2, temperature2, top_p2, top_k2],