ChAbhishek28 commited on
Commit
a1986d7
·
1 Parent(s): 548a99d

Improve response formatting for better readability with proper spacing and bullet points

Browse files
Files changed (1) hide show
  1. groq_websocket_handler.py +60 -52
groq_websocket_handler.py CHANGED
@@ -10,6 +10,7 @@ import asyncio
10
  import tempfile
11
  import os
12
  import time
 
13
  from typing import Dict, Any, Optional
14
  from pathlib import Path
15
  import uuid
@@ -798,63 +799,70 @@ class GroqWebSocketHandler:
798
  return response_text[:200] + "..." if len(response_text) > 200 else response_text
799
 
800
  def _apply_rajasthan_formatting(self, query: str, response_text: str) -> str:
801
- """Apply Rajasthan government-specific formatting to responses"""
802
  try:
803
- # Detect query language to avoid unnecessary Hindi formatting
804
- query_language = self.hybrid_llm.detect_language(query)
805
- query_lower = query.lower()
806
-
807
- # For English queries, apply minimal formatting
808
- if query_language == "english":
809
- # Just highlight important terms in English only
810
- highlighted = self.rajasthan_formatter.highlight_important_terms(response_text, language="english")
811
- return highlighted
812
-
813
- # Check if it's a procedure-related query
814
- if any(keyword in query_lower for keyword in ['procedure', 'process', 'steps', 'how to', 'apply', 'प्रक्रिया', 'कैसे']):
815
- # Extract procedure information from response
816
- procedure_info = {
817
- 'name': self._extract_procedure_name(query, response_text),
818
- 'steps': self._extract_steps(response_text),
819
- 'eligibility': self._extract_eligibility(response_text),
820
- 'fees': self._extract_fees(response_text),
821
- 'processing_time': self._extract_processing_time(response_text),
822
- 'office': self._extract_office_info(response_text)
823
- }
824
-
825
- # Apply procedure formatting if we have enough info
826
- if len(procedure_info['steps']) > 0:
827
- formatted = self.rajasthan_formatter.format_procedure_response(procedure_info)
828
- return self.rajasthan_formatter.add_rajasthan_context(formatted)
829
-
830
- # Check if it's a circular/policy query
831
- elif any(keyword in query_lower for keyword in ['circular', 'policy', 'rule', 'परिपत्र', 'नीति', 'नियम']):
832
- # Extract circular information
833
- circular_info = {
834
- 'department': self._detect_department(response_text),
835
- 'subject': query,
836
- 'date': time.strftime('%d/%m/%Y'),
837
- 'circular_no': self._extract_circular_number(response_text)
838
- }
839
-
840
- formatted = self.rajasthan_formatter.format_circular_response(response_text, circular_info)
841
- return self.rajasthan_formatter.add_rajasthan_context(formatted)
842
-
843
- # For calculation queries, format calculation steps
844
- elif any(keyword in query_lower for keyword in ['calculate', 'calculation', 'amount', 'गणना', 'राशि']):
845
- formatted = self.rajasthan_formatter.format_calculation_steps(response_text)
846
- return self.rajasthan_formatter.add_rajasthan_context(formatted)
847
-
848
- # General formatting for all responses
849
- else:
850
- # Apply highlighting and context
851
- highlighted = self.rajasthan_formatter.highlight_important_terms(response_text)
852
- return self.rajasthan_formatter.add_rajasthan_context(highlighted)
853
 
854
  except Exception as e:
855
  logger.error(f"❌ Error applying Rajasthan formatting: {e}")
856
  # Fallback to basic context addition
857
- return self.rajasthan_formatter.add_rajasthan_context(response_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
858
 
859
  def _extract_procedure_name(self, query: str, response: str) -> str:
860
  """Extract procedure name from query or response"""
 
10
  import tempfile
11
  import os
12
  import time
13
+ import re
14
  from typing import Dict, Any, Optional
15
  from pathlib import Path
16
  import uuid
 
799
  return response_text[:200] + "..." if len(response_text) > 200 else response_text
800
 
801
  def _apply_rajasthan_formatting(self, query: str, response_text: str) -> str:
802
+ """Apply clean, readable Rajasthan government-specific formatting to responses"""
803
  try:
804
+ # Simple, readable formatting approach
805
+ return self._format_for_readability(response_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
806
 
807
  except Exception as e:
808
  logger.error(f"❌ Error applying Rajasthan formatting: {e}")
809
  # Fallback to basic context addition
810
+ return response_text
811
+
812
+ def _format_for_readability(self, text: str) -> str:
813
+ """Format text for better readability with proper spacing and structure"""
814
+ try:
815
+ # Clean up the text first
816
+ text = text.strip()
817
+
818
+ # Split into sentences and clean up
819
+ sentences = text.split('. ')
820
+ formatted_sentences = []
821
+
822
+ current_section = ""
823
+
824
+ for sentence in sentences:
825
+ sentence = sentence.strip()
826
+ if not sentence:
827
+ continue
828
+
829
+ # Add period if missing
830
+ if not sentence.endswith(('.', '!', '?', ':', '।')):
831
+ sentence += '.'
832
+
833
+ # Check if this looks like a section header or important point
834
+ if any(marker in sentence.lower() for marker in [
835
+ 'eligibility criteria', 'minimum service', 'voluntary retirement',
836
+ 'family pension', 'gratuity', 'commutation', 'basic pay',
837
+ 'service pension', 'medical benefits', 'pension limitations'
838
+ ]):
839
+ # This is an important point - format as bullet
840
+ formatted_sentences.append(f"\n• **{sentence}**")
841
+ elif sentence.startswith(('The ', 'This ', 'It ', 'These ', 'Those ')):
842
+ # Main explanation sentence
843
+ formatted_sentences.append(f"\n{sentence}")
844
+ elif any(char.isdigit() for char in sentence[:10]):
845
+ # Might contain numbers/dates - format as bullet
846
+ formatted_sentences.append(f"\n• {sentence}")
847
+ else:
848
+ # Regular sentence
849
+ formatted_sentences.append(sentence)
850
+
851
+ # Join all sentences
852
+ formatted_text = ' '.join(formatted_sentences)
853
+
854
+ # Add proper spacing after bullets and sections
855
+ formatted_text = re.sub(r'\n•', '\n\n•', formatted_text)
856
+ formatted_text = re.sub(r'\*\*([^*]+)\*\*', r'**\1**\n', formatted_text)
857
+
858
+ # Clean up multiple newlines
859
+ formatted_text = re.sub(r'\n{3,}', '\n\n', formatted_text)
860
+
861
+ return formatted_text.strip()
862
+
863
+ except Exception as e:
864
+ logger.error(f"❌ Error in readability formatting: {e}")
865
+ return text
866
 
867
  def _extract_procedure_name(self, query: str, response: str) -> str:
868
  """Extract procedure name from query or response"""