Spaces:
Sleeping
Sleeping
Enhance automatic validation process in agent.py. Updated response handling to provide context and next steps when guidelines are not found, ensuring professionalism. Modified validation function to run silently in the background, logging results for backend analysis without displaying them to the user. Improved error handling and logging for validation failures.
Browse files- core/agent.py +36 -57
core/agent.py
CHANGED
|
@@ -119,7 +119,14 @@ You will be responding to practicing medical professionals so adjust your answer
|
|
| 119 |
- When citing text:
|
| 120 |
* Specify the section or subsection heading
|
| 121 |
* Indicate if it's from a bullet point, paragraph, or other format
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
- Never speculate or provide information not present in the guidelines.
|
| 124 |
- Always respond in English.
|
| 125 |
|
|
@@ -235,6 +242,8 @@ def _should_validate_response(user_input: str, response: str) -> bool:
|
|
| 235 |
"sorry,",
|
| 236 |
"i don't know",
|
| 237 |
"i do not know",
|
|
|
|
|
|
|
| 238 |
"validation report",
|
| 239 |
"evaluation scores"
|
| 240 |
]
|
|
@@ -271,16 +280,17 @@ def _should_validate_response(user_input: str, response: str) -> bool:
|
|
| 271 |
return any(indicator in response_lower for indicator in medical_indicators)
|
| 272 |
|
| 273 |
|
| 274 |
-
def _perform_automatic_validation(user_input: str, response: str) ->
|
| 275 |
"""
|
| 276 |
-
Perform automatic validation
|
|
|
|
| 277 |
|
| 278 |
Args:
|
| 279 |
user_input: The user's input
|
| 280 |
response: The agent's response
|
| 281 |
|
| 282 |
Returns:
|
| 283 |
-
|
| 284 |
"""
|
| 285 |
try:
|
| 286 |
# Import here to avoid circular imports
|
|
@@ -289,52 +299,24 @@ def _perform_automatic_validation(user_input: str, response: str) -> str:
|
|
| 289 |
# Check if we have the necessary context for validation
|
| 290 |
if not _last_question or not _last_documents:
|
| 291 |
logger.info("Skipping validation: insufficient context")
|
| 292 |
-
return
|
| 293 |
|
| 294 |
# Perform validation using the original user input instead of tool query
|
| 295 |
evaluation = validate_medical_answer(user_input, _last_documents, response)
|
| 296 |
|
| 297 |
-
#
|
| 298 |
report = evaluation.get("validation_report", {})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
---
|
| 303 |
-
|
| 304 |
-
## 🔍 **AUTOMATIC VALIDATION REPORT**
|
| 305 |
-
|
| 306 |
-
**Overall Score:** {report.get('Overall_Rating', 'N/A')}/100
|
| 307 |
-
|
| 308 |
-
**Key Metrics:**
|
| 309 |
-
|
| 310 |
-
**Accuracy:** {report.get('Accuracy_Rating', 'N/A')}/100
|
| 311 |
-
{report.get('Accuracy_Comment', 'No comment available')}
|
| 312 |
-
|
| 313 |
-
**Coherence:** {report.get('Coherence_Rating', 'N/A')}/100
|
| 314 |
-
{report.get('Coherence_Comment', 'No comment available')}
|
| 315 |
-
|
| 316 |
-
**Relevance:** {report.get('Relevance_Rating', 'N/A')}/100
|
| 317 |
-
{report.get('Relevance_Comment', 'No comment available')}
|
| 318 |
-
|
| 319 |
-
**Completeness:** {report.get('Completeness_Rating', 'N/A')}/100
|
| 320 |
-
{report.get('Completeness_Comment', 'No comment available')}
|
| 321 |
-
|
| 322 |
-
**Citations:** {report.get('Citations_Attribution_Rating', 'N/A')}/100
|
| 323 |
-
{report.get('Citations_Attribution_Comment', 'No comment available')}
|
| 324 |
-
|
| 325 |
-
**Length:** {report.get('Length_Rating', 'N/A')}/100
|
| 326 |
-
{report.get('Length_Comment', 'No comment available')}
|
| 327 |
-
|
| 328 |
-
**Assessment:** {report.get('Final_Summary_and_Improvement_Plan', 'No assessment available')}
|
| 329 |
-
|
| 330 |
-
*Validation ID: {evaluation.get('interaction_id', 'N/A')} | Saved to evaluation_results.json*
|
| 331 |
-
"""
|
| 332 |
-
|
| 333 |
-
return response + validation_summary
|
| 334 |
|
| 335 |
except Exception as e:
|
| 336 |
-
logger.error(f"
|
| 337 |
-
return response
|
| 338 |
|
| 339 |
|
| 340 |
# ============================================================================
|
|
@@ -447,22 +429,15 @@ async def run_agent_streaming(user_input: str, session_id: str = "default", max_
|
|
| 447 |
if not response["output"] or not response["output"].strip():
|
| 448 |
raise ValidationError("Empty response from agent")
|
| 449 |
|
| 450 |
-
# Perform automatic validation
|
| 451 |
base_response = response["output"]
|
| 452 |
if _should_validate_response(user_input, base_response):
|
| 453 |
-
logger.info("Performing
|
| 454 |
try:
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
if len(validation_content) > len(base_response):
|
| 458 |
-
validation_part = validation_content[len(base_response):]
|
| 459 |
-
# Stream the validation part
|
| 460 |
-
validation_words = validation_part.split(' ')
|
| 461 |
-
for word in validation_words:
|
| 462 |
-
yield word + ' '
|
| 463 |
-
await asyncio.sleep(0.02)
|
| 464 |
except Exception as e:
|
| 465 |
-
logger.error(f"
|
| 466 |
|
| 467 |
# Save conversation context to memory
|
| 468 |
memory.save_context(
|
|
@@ -710,11 +685,15 @@ async def run_agent(user_input: str, session_id: str = "default", max_retries: i
|
|
| 710 |
|
| 711 |
logger.info(f"Successfully processed user input: {user_input[:50]}...")
|
| 712 |
|
| 713 |
-
# Perform automatic validation
|
| 714 |
final_response = response["output"]
|
| 715 |
if _should_validate_response(user_input, final_response):
|
| 716 |
-
logger.info("Performing
|
| 717 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 718 |
|
| 719 |
return final_response
|
| 720 |
|
|
|
|
| 119 |
- When citing text:
|
| 120 |
* Specify the section or subsection heading
|
| 121 |
* Indicate if it's from a bullet point, paragraph, or other format
|
| 122 |
+
|
| 123 |
+
- If the answer is not found in the retrieved guidelines, provide a helpful response that:
|
| 124 |
+
* Acknowledges the limitation: "Based on the available medical guidelines in my knowledge base, I could not find specific information about [topic]."
|
| 125 |
+
* Suggests alternatives: "You may want to:
|
| 126 |
+
- Rephrase your question with more specific clinical details
|
| 127 |
+
- Specify a particular guideline provider (NCCN, ASCO, ESMO, NICE)
|
| 128 |
+
- Consult the latest published guidelines directly for emerging topics"
|
| 129 |
+
* Maintains professionalism: Never simply say "I don't know" - always provide context and next steps
|
| 130 |
- Never speculate or provide information not present in the guidelines.
|
| 131 |
- Always respond in English.
|
| 132 |
|
|
|
|
| 242 |
"sorry,",
|
| 243 |
"i don't know",
|
| 244 |
"i do not know",
|
| 245 |
+
"could not find specific information",
|
| 246 |
+
"not found in the retrieved guidelines",
|
| 247 |
"validation report",
|
| 248 |
"evaluation scores"
|
| 249 |
]
|
|
|
|
| 280 |
return any(indicator in response_lower for indicator in medical_indicators)
|
| 281 |
|
| 282 |
|
| 283 |
+
def _perform_automatic_validation(user_input: str, response: str) -> None:
|
| 284 |
"""
|
| 285 |
+
Perform automatic validation in the background without displaying results to user.
|
| 286 |
+
Validation results are logged and saved to GitHub repository for backend analysis.
|
| 287 |
|
| 288 |
Args:
|
| 289 |
user_input: The user's input
|
| 290 |
response: The agent's response
|
| 291 |
|
| 292 |
Returns:
|
| 293 |
+
None: Validation runs silently in background
|
| 294 |
"""
|
| 295 |
try:
|
| 296 |
# Import here to avoid circular imports
|
|
|
|
| 299 |
# Check if we have the necessary context for validation
|
| 300 |
if not _last_question or not _last_documents:
|
| 301 |
logger.info("Skipping validation: insufficient context")
|
| 302 |
+
return
|
| 303 |
|
| 304 |
# Perform validation using the original user input instead of tool query
|
| 305 |
evaluation = validate_medical_answer(user_input, _last_documents, response)
|
| 306 |
|
| 307 |
+
# Log validation results to backend only (not shown to user)
|
| 308 |
report = evaluation.get("validation_report", {})
|
| 309 |
+
logger.info(f"Background validation completed - Interaction ID: {evaluation.get('interaction_id', 'N/A')}")
|
| 310 |
+
logger.info(f"Validation scores - Overall: {report.get('Overall_Rating', 'N/A')}/100, "
|
| 311 |
+
f"Accuracy: {report.get('Accuracy_Rating', 'N/A')}/100, "
|
| 312 |
+
f"Coherence: {report.get('Coherence_Rating', 'N/A')}/100, "
|
| 313 |
+
f"Relevance: {report.get('Relevance_Rating', 'N/A')}/100")
|
| 314 |
|
| 315 |
+
# Validation is automatically saved to GitHub by validate_medical_answer function
|
| 316 |
+
# No need to return anything - results are stored in backend only
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
|
| 318 |
except Exception as e:
|
| 319 |
+
logger.error(f"Background validation failed: {e}")
|
|
|
|
| 320 |
|
| 321 |
|
| 322 |
# ============================================================================
|
|
|
|
| 429 |
if not response["output"] or not response["output"].strip():
|
| 430 |
raise ValidationError("Empty response from agent")
|
| 431 |
|
| 432 |
+
# Perform automatic validation in background (hidden from user)
|
| 433 |
base_response = response["output"]
|
| 434 |
if _should_validate_response(user_input, base_response):
|
| 435 |
+
logger.info("Performing background validation for streaming response...")
|
| 436 |
try:
|
| 437 |
+
# Run validation silently - results saved to backend/GitHub only
|
| 438 |
+
_perform_automatic_validation(user_input, base_response)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 439 |
except Exception as e:
|
| 440 |
+
logger.error(f"Background validation failed: {e}")
|
| 441 |
|
| 442 |
# Save conversation context to memory
|
| 443 |
memory.save_context(
|
|
|
|
| 685 |
|
| 686 |
logger.info(f"Successfully processed user input: {user_input[:50]}...")
|
| 687 |
|
| 688 |
+
# Perform automatic validation in background (hidden from user)
|
| 689 |
final_response = response["output"]
|
| 690 |
if _should_validate_response(user_input, final_response):
|
| 691 |
+
logger.info("Performing background validation...")
|
| 692 |
+
try:
|
| 693 |
+
# Run validation silently - results saved to backend/GitHub only
|
| 694 |
+
_perform_automatic_validation(user_input, final_response)
|
| 695 |
+
except Exception as e:
|
| 696 |
+
logger.error(f"Background validation failed: {e}")
|
| 697 |
|
| 698 |
return final_response
|
| 699 |
|