Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -69,12 +69,46 @@ class Pair:
|
|
| 69 |
title: Optional[str]
|
| 70 |
score: float
|
| 71 |
image_path: Optional[str]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
def _pair_from_idx(idx: int, score: float, rank: int) -> Pair:
|
| 74 |
m = TEXT_META[idx]
|
| 75 |
img_path = IMAGE_META[idx].get("image_path")
|
| 76 |
-
return Pair(
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
def search_text(q: str, topk: int = 10) -> List[Pair]:
|
| 79 |
qv = text_enc.encode([q], convert_to_numpy=True, normalize_embeddings=True).astype("float32")
|
| 80 |
D, I = T_INDEX.search(qv, topk)
|
|
@@ -97,12 +131,18 @@ def search_image(img: Image.Image, topk: int = 10) -> List[Pair]:
|
|
| 97 |
return out
|
| 98 |
|
| 99 |
def build_prompt(question: str, ctx: List[Pair]) -> str:
|
| 100 |
-
lines = [
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
| 103 |
for p in ctx:
|
| 104 |
-
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
return "\n".join(lines)
|
| 107 |
|
| 108 |
def call_llm(prompt: str) -> str:
|
|
|
|
| 69 |
title: Optional[str]
|
| 70 |
score: float
|
| 71 |
image_path: Optional[str]
|
| 72 |
+
text: Optional[str] = None # <-- NEW
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def _get_meta_text(m: dict) -> Optional[str]:
|
| 76 |
+
# Try common keys first
|
| 77 |
+
for k in ("text", "content", "passage", "body", "chunk", "article"):
|
| 78 |
+
if m.get(k):
|
| 79 |
+
return m[k]
|
| 80 |
+
# If you stored a local file path for the text, read it
|
| 81 |
+
p = m.get("path") or m.get("filepath")
|
| 82 |
+
if p:
|
| 83 |
+
import os
|
| 84 |
+
fp = p if os.path.isabs(p) else os.path.join(DATA_DIR, p)
|
| 85 |
+
if os.path.exists(fp):
|
| 86 |
+
try:
|
| 87 |
+
with open(fp, "r", encoding="utf-8") as f:
|
| 88 |
+
return f.read()
|
| 89 |
+
except:
|
| 90 |
+
pass
|
| 91 |
+
return None
|
| 92 |
|
| 93 |
def _pair_from_idx(idx: int, score: float, rank: int) -> Pair:
|
| 94 |
m = TEXT_META[idx]
|
| 95 |
img_path = IMAGE_META[idx].get("image_path")
|
| 96 |
+
return Pair(
|
| 97 |
+
rank=rank,
|
| 98 |
+
idx=idx,
|
| 99 |
+
doc_id=m.get("id"),
|
| 100 |
+
title=m.get("title"),
|
| 101 |
+
score=float(score),
|
| 102 |
+
image_path=img_path,
|
| 103 |
+
text=_get_meta_text(m), # <-- NEW
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
def _truncate(s: str, max_chars: int = 1200) -> str:
|
| 107 |
+
if not s: return ""
|
| 108 |
+
s = s.strip().replace("\r", " ")
|
| 109 |
+
return s[:max_chars]
|
| 110 |
+
|
| 111 |
+
|
| 112 |
def search_text(q: str, topk: int = 10) -> List[Pair]:
|
| 113 |
qv = text_enc.encode([q], convert_to_numpy=True, normalize_embeddings=True).astype("float32")
|
| 114 |
D, I = T_INDEX.search(qv, topk)
|
|
|
|
| 131 |
return out
|
| 132 |
|
| 133 |
def build_prompt(question: str, ctx: List[Pair]) -> str:
|
| 134 |
+
lines = [
|
| 135 |
+
"از زمینهٔ زیر استفاده کن و به فارسی پاسخ بده. اگر پاسخ در زمینه نبود، بگو «نمیدانم».",
|
| 136 |
+
"",
|
| 137 |
+
"### زمینه:",
|
| 138 |
+
]
|
| 139 |
for p in ctx:
|
| 140 |
+
snippet = _truncate(p.text or "")
|
| 141 |
+
lines.append(
|
| 142 |
+
f"- عنوان: {p.title or '—'} (id={p.doc_id}, score={p.score:.3f})\n"
|
| 143 |
+
f" متن: {snippet if snippet else '—'}"
|
| 144 |
+
)
|
| 145 |
+
lines.append(f"\n### پرسش: {question}\n### پاسخ:")
|
| 146 |
return "\n".join(lines)
|
| 147 |
|
| 148 |
def call_llm(prompt: str) -> str:
|