mamathew commited on
Commit
70c1c99
·
verified ·
1 Parent(s): d9c8d4f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -7
app.py CHANGED
@@ -69,12 +69,46 @@ class Pair:
69
  title: Optional[str]
70
  score: float
71
  image_path: Optional[str]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  def _pair_from_idx(idx: int, score: float, rank: int) -> Pair:
74
  m = TEXT_META[idx]
75
  img_path = IMAGE_META[idx].get("image_path")
76
- return Pair(rank=rank, idx=idx, doc_id=m.get("id"), title=m.get("title"), score=float(score), image_path=img_path)
77
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  def search_text(q: str, topk: int = 10) -> List[Pair]:
79
  qv = text_enc.encode([q], convert_to_numpy=True, normalize_embeddings=True).astype("float32")
80
  D, I = T_INDEX.search(qv, topk)
@@ -97,12 +131,18 @@ def search_image(img: Image.Image, topk: int = 10) -> List[Pair]:
97
  return out
98
 
99
  def build_prompt(question: str, ctx: List[Pair]) -> str:
100
- lines = ["You are a helpful assistant. Answer the user's question using the given context.",
101
- "If the answer is not contained in the context, say you don't know.\n",
102
- "Context:"]
 
 
103
  for p in ctx:
104
- lines.append(f"- {p.title or ''} (id={p.doc_id}) [score={p.score:.3f}]")
105
- lines.append(f"\nQuestion: {question}\nAnswer:")
 
 
 
 
106
  return "\n".join(lines)
107
 
108
  def call_llm(prompt: str) -> str:
 
69
  title: Optional[str]
70
  score: float
71
  image_path: Optional[str]
72
+ text: Optional[str] = None # <-- NEW
73
+
74
+
75
+ def _get_meta_text(m: dict) -> Optional[str]:
76
+ # Try common keys first
77
+ for k in ("text", "content", "passage", "body", "chunk", "article"):
78
+ if m.get(k):
79
+ return m[k]
80
+ # If you stored a local file path for the text, read it
81
+ p = m.get("path") or m.get("filepath")
82
+ if p:
83
+ import os
84
+ fp = p if os.path.isabs(p) else os.path.join(DATA_DIR, p)
85
+ if os.path.exists(fp):
86
+ try:
87
+ with open(fp, "r", encoding="utf-8") as f:
88
+ return f.read()
89
+ except:
90
+ pass
91
+ return None
92
 
93
  def _pair_from_idx(idx: int, score: float, rank: int) -> Pair:
94
  m = TEXT_META[idx]
95
  img_path = IMAGE_META[idx].get("image_path")
96
+ return Pair(
97
+ rank=rank,
98
+ idx=idx,
99
+ doc_id=m.get("id"),
100
+ title=m.get("title"),
101
+ score=float(score),
102
+ image_path=img_path,
103
+ text=_get_meta_text(m), # <-- NEW
104
+ )
105
+
106
+ def _truncate(s: str, max_chars: int = 1200) -> str:
107
+ if not s: return ""
108
+ s = s.strip().replace("\r", " ")
109
+ return s[:max_chars]
110
+
111
+
112
  def search_text(q: str, topk: int = 10) -> List[Pair]:
113
  qv = text_enc.encode([q], convert_to_numpy=True, normalize_embeddings=True).astype("float32")
114
  D, I = T_INDEX.search(qv, topk)
 
131
  return out
132
 
133
  def build_prompt(question: str, ctx: List[Pair]) -> str:
134
+ lines = [
135
+ "از زمینهٔ زیر استفاده کن و به فارسی پاسخ بده. اگر پاسخ در زمینه نبود، بگو «نمی‌دانم».",
136
+ "",
137
+ "### زمینه:",
138
+ ]
139
  for p in ctx:
140
+ snippet = _truncate(p.text or "")
141
+ lines.append(
142
+ f"- عنوان: {p.title or '—'} (id={p.doc_id}, score={p.score:.3f})\n"
143
+ f" متن: {snippet if snippet else '—'}"
144
+ )
145
+ lines.append(f"\n### پرسش: {question}\n### پاسخ:")
146
  return "\n".join(lines)
147
 
148
  def call_llm(prompt: str) -> str: