sofzcc commited on
Commit
ff5c1a5
·
verified ·
1 Parent(s): 5de21d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -14
app.py CHANGED
@@ -362,11 +362,31 @@ class RAGIndex:
362
  print(f"Retrieval error: {e}")
363
  return []
364
 
365
- def _generate_from_context(self, prompt: str, max_new_tokens: int = 128) -> str:
366
- """Run Flan-T5 on the given prompt and return the decoded answer."""
 
 
 
 
 
 
 
 
367
  if self.qa_model is None or self.qa_tokenizer is None:
368
  raise RuntimeError("QA model not loaded.")
369
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  inputs = self.qa_tokenizer(
371
  prompt,
372
  return_tensors="pt",
@@ -377,6 +397,7 @@ class RAGIndex:
377
  outputs = self.qa_model.generate(
378
  **inputs,
379
  max_new_tokens=max_new_tokens,
 
380
  do_sample=False,
381
  )
382
 
@@ -414,6 +435,7 @@ class RAGIndex:
414
  used_sources = set()
415
  context_texts = []
416
 
 
417
  for ctx, source, score in contexts:
418
  used_sources.add(source)
419
  cleaned_ctx = clean_context_text(ctx)
@@ -434,19 +456,13 @@ class RAGIndex:
434
  if len(combined_context) > max_context_chars:
435
  combined_context = combined_context[:max_context_chars]
436
 
437
- # 3) Build a prompt that works for both BART (summarization-style)
438
- # and instruction-tuned models like Flan-T5.
439
- prompt = (
440
- "You are an assistant that answers questions about a knowledge base.\n"
441
- "Using only the information in the passages below, answer the question in 2–4 sentences.\n"
442
- "Explain in clear, natural language. Do NOT copy section numbers, markdown headings, or bullet symbols.\n\n"
443
- f"Passages:\n{combined_context}\n\n"
444
- f"Question: {question}\n\n"
445
- "Answer:"
446
- )
447
-
448
  try:
449
- answer_text = self._generate_from_context(prompt, max_new_tokens=180).strip()
 
 
 
 
450
  except Exception as e:
451
  print(f"Generation error: {e}")
452
  return (
 
362
  print(f"Retrieval error: {e}")
363
  return []
364
 
365
+ def _generate_from_context(
366
+ self,
367
+ question: str,
368
+ context: str,
369
+ max_new_tokens: int = 180,
370
+ ) -> str:
371
+ """
372
+ Generate a grounded answer from the retrieved context using a seq2seq model
373
+ (FLAN-T5, BART, etc.). The prompt forces the model to only use the context.
374
+ """
375
  if self.qa_model is None or self.qa_tokenizer is None:
376
  raise RuntimeError("QA model not loaded.")
377
 
378
+ prompt = (
379
+ "You are a knowledge base assistant. Answer the question ONLY using the information "
380
+ "in the context below.\n"
381
+ "If the context does not contain the answer, say exactly: "
382
+ "\"The documents do not contain enough information to answer this.\"\n\n"
383
+ f"Question: {question}\n\n"
384
+ "Context:\n"
385
+ f"{context}\n\n"
386
+ "Write a helpful answer in 2–4 sentences. Keep it factual and concise. "
387
+ "Do NOT repeat the question. Do NOT include section titles or headings."
388
+ )
389
+
390
  inputs = self.qa_tokenizer(
391
  prompt,
392
  return_tensors="pt",
 
397
  outputs = self.qa_model.generate(
398
  **inputs,
399
  max_new_tokens=max_new_tokens,
400
+ temperature=0.0, # deterministic
401
  do_sample=False,
402
  )
403
 
 
435
  used_sources = set()
436
  context_texts = []
437
 
438
+ # Clean and collect the retrieved chunks
439
  for ctx, source, score in contexts:
440
  used_sources.add(source)
441
  cleaned_ctx = clean_context_text(ctx)
 
456
  if len(combined_context) > max_context_chars:
457
  combined_context = combined_context[:max_context_chars]
458
 
459
+ # 3) Generate grounded answer from context
 
 
 
 
 
 
 
 
 
 
460
  try:
461
+ answer_text = self._generate_from_context(
462
+ question=question,
463
+ context=combined_context,
464
+ max_new_tokens=180,
465
+ ).strip()
466
  except Exception as e:
467
  print(f"Generation error: {e}")
468
  return (