Spaces:
Sleeping
Sleeping
File size: 7,197 Bytes
15a08d2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 |
"""
Utility functions for query classification and response generation
"""
import re
from typing import Tuple
class QueryClassifier:
"""Classify queries to determine if retrieval is needed"""
# Simple greetings/acknowledgments (no retrieval needed)
SIMPLE_PATTERNS = [
r"\b(hi|hello|hey|greetings|good morning|good evening|good afternoon)\b",
r"\b(thank you|thanks|thx|appreciate it)\b",
r"\b(bye|goodbye|see you|take care)\b",
r"\b(ok|okay|got it|understood|alright|sure)\b",
r"\b(yes|yeah|yep|no|nope)\b",
]
# Medical keywords (definitely needs retrieval)
MEDICAL_KEYWORDS = [
"symptom",
"treatment",
"disease",
"diagnosis",
"medicine",
"medication",
"cure",
"pain",
"fever",
"infection",
"doctor",
"hospital",
"prescription",
"side effect",
"dosage",
"therapy",
"vaccine",
"surgery",
"condition",
"blood",
"pressure",
"diabetes",
"cancer",
"heart",
"lung",
"kidney",
"test",
"scan",
"mri",
"x-ray",
"injury",
"allergy",
"chronic",
"acute",
"disorder",
"illness",
"sick",
"health",
]
@classmethod
def needs_retrieval(cls, query: str) -> Tuple[bool, str]:
"""
Determine if query needs document retrieval
Args:
query: User's input message
Returns:
Tuple[bool, str]: (needs_retrieval, reason)
"""
query_lower = query.lower().strip()
word_count = len(query_lower.split())
# Rule 1: Very short queries with simple patterns (no retrieval)
if word_count <= 3:
for pattern in cls.SIMPLE_PATTERNS:
if re.search(pattern, query_lower):
return False, "simple_greeting"
# Rule 2: Contains medical keywords (needs retrieval)
for keyword in cls.MEDICAL_KEYWORDS:
if keyword in query_lower:
return True, "medical_keyword_detected"
# Rule 3: Question words in longer queries (likely needs retrieval)
question_words = [
"what",
"how",
"why",
"when",
"where",
"which",
"who",
"can",
"should",
"is",
"are",
"does",
"do",
"could",
"would",
"will",
]
if word_count >= 3 and any(q in query_lower.split()[:3] for q in question_words):
return True, "question_detected"
# Rule 4: Single word queries (context-dependent, default to no retrieval)
if word_count == 1:
return False, "single_word"
# Default: If uncertain and query is substantial, use retrieval
if word_count >= 4:
return True, "substantial_query"
return False, "default_no_retrieval"
@classmethod
def get_simple_response(cls, query: str) -> str:
"""
Generate appropriate response for non-retrieval queries
Args:
query: User's input message
Returns:
str: Appropriate response without retrieval
"""
query_lower = query.lower().strip()
# Greetings
if re.search(cls.SIMPLE_PATTERNS[0], query_lower):
return (
"Hello! I'm your medical assistant. I can help answer questions about "
"symptoms, treatments, medications, and general health information. "
"How can I assist you today?"
)
# Thanks
if re.search(cls.SIMPLE_PATTERNS[1], query_lower):
return (
"You're very welcome! If you have any other health-related questions, "
"feel free to ask. I'm here to help!"
)
# Goodbye
if re.search(cls.SIMPLE_PATTERNS[2], query_lower):
return (
"Goodbye! Take care of your health. Feel free to return anytime you "
"have questions. Stay well!"
)
# Acknowledgments
if re.search(cls.SIMPLE_PATTERNS[3], query_lower):
return (
"Is there anything else you'd like to know about your health or medical concerns?"
)
# Yes/No
if re.search(cls.SIMPLE_PATTERNS[4], query_lower):
return (
"Could you please provide more details about your question? "
"I'm here to help with any health-related information you need."
)
# Default
return (
"I'm here to help with medical and health-related questions. "
"Could you please elaborate on what you'd like to know?"
)
class StreamingHandler:
"""Handle streaming responses from LangChain"""
@staticmethod
async def stream_rag_response(rag_chain, input_data: dict):
"""
Stream tokens from RAG chain
Args:
rag_chain: The retrieval chain to stream from
input_data: Dict with 'input' and 'chat_history' keys
Yields:
str: JSON formatted chunks with token data
"""
import json
try:
# Stream the response
full_answer = ""
async for chunk in rag_chain.astream(input_data):
# Extract answer tokens from the chunk
if "answer" in chunk:
token = chunk["answer"]
full_answer += token
# Send token as JSON
yield f"data: {json.dumps({'token': token, 'done': False})}\n\n"
# Send completion signal
yield f"data: {json.dumps({'token': '', 'done': True, 'full_answer': full_answer})}\n\n"
except Exception as e:
error_msg = f"Streaming error: {str(e)}"
yield f"data: {json.dumps({'error': error_msg, 'done': True})}\n\n"
@staticmethod
async def stream_simple_response(response: str):
"""
Stream a simple non-retrieval response character by character
Args:
response: The complete response text
Yields:
str: JSON formatted chunks with token data
"""
import json
import asyncio
# Stream character by character with slight delay for smooth effect
for char in response:
yield f"data: {json.dumps({'token': char, 'done': False})}\n\n"
await asyncio.sleep(0.01) # Small delay for smooth streaming
# Send completion signal
yield f"data: {json.dumps({'token': '', 'done': True, 'full_answer': response})}\n\n"
|