rohannsinghal commited on
Commit
9c2df30
Β·
1 Parent(s): 43a0745

updated main_api.py

Browse files
Files changed (1) hide show
  1. app/main_api.py +50 -6
app/main_api.py CHANGED
@@ -419,6 +419,39 @@ class OptimizedSemanticRAGPipeline:
419
  self.qa_chain = None
420
  logger.info(f"βœ… Optimized semantic RAG pipeline initialized: {collection_name}")
421
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
422
  def add_documents(self, chunks: List[Dict[str, Any]]):
423
  if not chunks:
424
  logger.error("❌ No chunks provided!")
@@ -488,7 +521,7 @@ class OptimizedSemanticRAGPipeline:
488
  }
489
  )
490
 
491
- # Enhanced semantic prompt template
492
  prompt_template = PromptTemplate(
493
  input_variables=["context", "question"],
494
  template="""You are an expert insurance policy analyst with semantic understanding capabilities. Analyze the policy document context to provide accurate, detailed answers.
@@ -502,14 +535,22 @@ SEMANTIC ANALYSIS INSTRUCTIONS:
502
  - Carefully analyze the semantic meaning and relationships in the policy context
503
  - Extract specific facts: numbers, percentages, time periods, conditions, and requirements
504
  - Understand implicit connections between different policy sections
505
- - Quote exact policy language when providing specific details
506
  - Synthesize information from multiple context sections when relevant
507
  - Distinguish between explicit statements and reasonable inferences
508
  - If information is partial, provide what's available and note limitations
509
  - Be precise about conditions, exceptions, and qualifying circumstances
510
 
 
 
 
 
 
 
 
 
511
  ANSWER FORMAT:
512
- Provide a comprehensive answer based on your semantic analysis of the policy document context.
513
 
514
  ANSWER:"""
515
  )
@@ -533,10 +574,13 @@ ANSWER:"""
533
  try:
534
  # Retrieve with semantic understanding
535
  result = await asyncio.to_thread(self.qa_chain, {"query": question})
536
- answer = result.get("result", "Failed to generate semantic answer.")
537
 
538
- logger.info(f"βœ… Semantic answer generated: {len(answer)} characters")
539
- return answer
 
 
 
540
 
541
  except Exception as e:
542
  logger.error(f"❌ Error during semantic QA: {e}")
 
419
  self.qa_chain = None
420
  logger.info(f"βœ… Optimized semantic RAG pipeline initialized: {collection_name}")
421
 
422
+ def clean_response(self, answer: str) -> str:
423
+ """Clean up the response formatting for better readability"""
424
+ if not answer:
425
+ return answer
426
+
427
+ # Remove excessive newlines
428
+ answer = re.sub(r'\n\s*\n\s*\n+', '\n\n', answer) # Multiple newlines to double
429
+ answer = re.sub(r'\n\s*\n', '\n\n', answer) # Ensure consistent double newlines for paragraphs
430
+
431
+ # Remove quotes around single words and short phrases
432
+ answer = re.sub(r'"([A-Z\s]{2,20})"', r'\1', answer) # Remove quotes from short caps phrases
433
+ answer = re.sub(r'"(\w+)"', r'\1', answer) # Remove quotes from single words
434
+ answer = re.sub(r'"(Rs\. [\d,]+[/-]*)"', r'\1', answer) # Remove quotes from amounts
435
+ answer = re.sub(r'"(\d+%)"', r'\1', answer) # Remove quotes from percentages
436
+ answer = re.sub(r'"(\d+ (?:days?|months?|years?))"', r'\1', answer) # Remove quotes from time periods
437
+
438
+ # Clean up policy references - keep important quotes but make them flow better
439
+ answer = re.sub(r'As stated in the policy: "([^"]+)"', r'The policy states that \1', answer)
440
+ answer = re.sub(r'According to the policy document: "([^"]+)"', r'According to the policy document, \1', answer)
441
+ answer = re.sub(r'The policy states: "([^"]+)"', r'The policy states that \1', answer)
442
+ answer = re.sub(r'As per the policy: "([^"]+)"', r'As per the policy, \1', answer)
443
+
444
+ # Fix spacing and formatting
445
+ answer = re.sub(r'\s+', ' ', answer) # Multiple spaces to single space
446
+ answer = answer.replace(' ,', ',') # Fix spacing before commas
447
+ answer = answer.replace(' .', '.') # Fix spacing before periods
448
+ answer = answer.strip() # Remove leading/trailing whitespace
449
+
450
+ # Clean up excessive line breaks in the middle of sentences
451
+ answer = re.sub(r'([a-z,])\s*\n\s*([a-z])', r'\1 \2', answer)
452
+
453
+ return answer
454
+
455
  def add_documents(self, chunks: List[Dict[str, Any]]):
456
  if not chunks:
457
  logger.error("❌ No chunks provided!")
 
521
  }
522
  )
523
 
524
+ # Enhanced semantic prompt template with better formatting
525
  prompt_template = PromptTemplate(
526
  input_variables=["context", "question"],
527
  template="""You are an expert insurance policy analyst with semantic understanding capabilities. Analyze the policy document context to provide accurate, detailed answers.
 
535
  - Carefully analyze the semantic meaning and relationships in the policy context
536
  - Extract specific facts: numbers, percentages, time periods, conditions, and requirements
537
  - Understand implicit connections between different policy sections
538
+ - Quote exact policy language when providing specific details, but format quotes naturally
539
  - Synthesize information from multiple context sections when relevant
540
  - Distinguish between explicit statements and reasonable inferences
541
  - If information is partial, provide what's available and note limitations
542
  - Be precise about conditions, exceptions, and qualifying circumstances
543
 
544
+ FORMATTING GUIDELINES:
545
+ - Write in clear, professional paragraphs without unnecessary line breaks
546
+ - When quoting policy text, integrate quotes smoothly into sentences
547
+ - Use bullet points or numbered lists only when listing multiple related items
548
+ - Avoid excessive quotation marks around single words or short phrases
549
+ - Write numbers and percentages directly (e.g., 30 days, 5%, Rs. 10,000) without quotes
550
+ - Make the response flow naturally and be easy to read
551
+
552
  ANSWER FORMAT:
553
+ Provide a comprehensive, well-formatted answer based on your semantic analysis of the policy document context.
554
 
555
  ANSWER:"""
556
  )
 
574
  try:
575
  # Retrieve with semantic understanding
576
  result = await asyncio.to_thread(self.qa_chain, {"query": question})
577
+ raw_answer = result.get("result", "Failed to generate semantic answer.")
578
 
579
+ # Clean up the response formatting
580
+ clean_answer = self.clean_response(raw_answer)
581
+
582
+ logger.info(f"βœ… Semantic answer generated: {len(clean_answer)} characters")
583
+ return clean_answer
584
 
585
  except Exception as e:
586
  logger.error(f"❌ Error during semantic QA: {e}")