Spaces:
Sleeping
Sleeping
dhruv575 commited on
Commit ·
d62475b
1
Parent(s): 87aa5b7
Debug issue
Browse files- controllers/log_controller.py +48 -45
controllers/log_controller.py
CHANGED
|
@@ -355,7 +355,7 @@ def classify_log_activities(current_user):
|
|
| 355 |
Extract and classify activities from a log file without creating incidents
|
| 356 |
Returns classified activities for the frontend to handle
|
| 357 |
"""
|
| 358 |
-
logger.info(f"classify_log_activities
|
| 359 |
|
| 360 |
if 'file' not in request.files:
|
| 361 |
logger.error("No file part in the request")
|
|
@@ -372,50 +372,42 @@ def classify_log_activities(current_user):
|
|
| 372 |
return jsonify({'message': 'Only PDF files are allowed'}), 400
|
| 373 |
|
| 374 |
try:
|
| 375 |
-
|
| 376 |
api_key = os.environ.get('OPENAI_API_KEY')
|
| 377 |
if not api_key:
|
| 378 |
logger.error("OPENAI_API_KEY environment variable is not set")
|
| 379 |
return jsonify({'message': 'OpenAI API key not configured'}), 500
|
| 380 |
|
| 381 |
-
|
| 382 |
-
client = openai.OpenAI(api_key=api_key)
|
| 383 |
-
|
| 384 |
-
# Read file content
|
| 385 |
file_content = file.read()
|
| 386 |
logger.info(f"Read {len(file_content)} bytes from file {file.filename}")
|
| 387 |
|
| 388 |
-
|
| 389 |
-
logger.info(f"Starting OCR for uploaded log")
|
| 390 |
extracted_text = pdf_to_text(file_content, is_bytes=True)
|
| 391 |
-
logger.info(f"Extracted {len(extracted_text)} characters
|
| 392 |
|
| 393 |
-
|
| 394 |
-
logger.info(f"Extracting activities from log")
|
| 395 |
activities_json = extract_activities(extracted_text)
|
|
|
|
| 396 |
|
| 397 |
-
|
| 398 |
activities_data = json.loads(activities_json)
|
| 399 |
activities = activities_data.get('activities', [])
|
| 400 |
-
logger.info(f"
|
| 401 |
|
| 402 |
-
|
| 403 |
-
logger.info(f"Classifying activities")
|
| 404 |
-
|
| 405 |
-
# Get all workflows for this department
|
| 406 |
workflows = Workflow.find_by_department(current_user.department_id)
|
|
|
|
| 407 |
|
| 408 |
-
# Skip if no workflows defined
|
| 409 |
if not workflows:
|
| 410 |
-
logger.warning(f"No workflows defined for department {current_user.department_id}")
|
| 411 |
return jsonify({
|
| 412 |
'message': 'No workflows to classify against',
|
| 413 |
'activities': activities,
|
| 414 |
'classified_activities': [],
|
| 415 |
-
'extracted_text': extracted_text
|
| 416 |
}), 200
|
| 417 |
|
| 418 |
-
# Prepare workflow information for classification
|
| 419 |
workflow_info = []
|
| 420 |
for workflow in workflows:
|
| 421 |
workflow_info.append({
|
|
@@ -423,43 +415,54 @@ def classify_log_activities(current_user):
|
|
| 423 |
"title": workflow.title,
|
| 424 |
"description": workflow.description
|
| 425 |
})
|
|
|
|
| 426 |
|
| 427 |
-
|
| 428 |
-
|
| 429 |
|
| 430 |
-
for activity in activities:
|
| 431 |
-
|
|
|
|
| 432 |
workflow_id = classify_activity(activity, workflow_info)
|
| 433 |
|
| 434 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 435 |
if workflow_id:
|
| 436 |
workflow = next((w for w in workflow_info if w["id"] == workflow_id), None)
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
"
|
| 441 |
-
"
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
else:
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
})
|
| 450 |
|
| 451 |
-
logger.info(f"Classification
|
| 452 |
|
|
|
|
|
|
|
| 453 |
return jsonify({
|
| 454 |
'message': 'Log activities extracted and classified',
|
| 455 |
-
'activities': activities,
|
| 456 |
-
'classified_activities':
|
| 457 |
'workflows': workflow_info,
|
| 458 |
-
'extracted_text': extracted_text
|
| 459 |
}), 200
|
| 460 |
|
| 461 |
except Exception as e:
|
| 462 |
-
|
|
|
|
| 463 |
import traceback
|
| 464 |
-
logger.error(traceback.format_exc())
|
| 465 |
-
|
|
|
|
|
|
| 355 |
Extract and classify activities from a log file without creating incidents
|
| 356 |
Returns classified activities for the frontend to handle
|
| 357 |
"""
|
| 358 |
+
logger.info(f"Entering classify_log_activities for user {current_user.email}")
|
| 359 |
|
| 360 |
if 'file' not in request.files:
|
| 361 |
logger.error("No file part in the request")
|
|
|
|
| 372 |
return jsonify({'message': 'Only PDF files are allowed'}), 400
|
| 373 |
|
| 374 |
try:
|
| 375 |
+
logger.info("Checking for OpenAI API key...")
|
| 376 |
api_key = os.environ.get('OPENAI_API_KEY')
|
| 377 |
if not api_key:
|
| 378 |
logger.error("OPENAI_API_KEY environment variable is not set")
|
| 379 |
return jsonify({'message': 'OpenAI API key not configured'}), 500
|
| 380 |
|
| 381 |
+
logger.info("Reading file content...")
|
|
|
|
|
|
|
|
|
|
| 382 |
file_content = file.read()
|
| 383 |
logger.info(f"Read {len(file_content)} bytes from file {file.filename}")
|
| 384 |
|
| 385 |
+
logger.info(f"Starting OCR...")
|
|
|
|
| 386 |
extracted_text = pdf_to_text(file_content, is_bytes=True)
|
| 387 |
+
logger.info(f"OCR finished. Extracted {len(extracted_text)} characters.")
|
| 388 |
|
| 389 |
+
logger.info(f"Extracting activities with LLM...")
|
|
|
|
| 390 |
activities_json = extract_activities(extracted_text)
|
| 391 |
+
logger.info(f"Activity extraction finished. Received JSON string of length {len(activities_json)}.")
|
| 392 |
|
| 393 |
+
logger.info("Parsing activities JSON...")
|
| 394 |
activities_data = json.loads(activities_json)
|
| 395 |
activities = activities_data.get('activities', [])
|
| 396 |
+
logger.info(f"Parsed activities JSON. Found {len(activities)} activities.")
|
| 397 |
|
| 398 |
+
logger.info(f"Fetching workflows for department {current_user.department_id}...")
|
|
|
|
|
|
|
|
|
|
| 399 |
workflows = Workflow.find_by_department(current_user.department_id)
|
| 400 |
+
logger.info(f"Fetched {len(workflows)} workflows.")
|
| 401 |
|
|
|
|
| 402 |
if not workflows:
|
| 403 |
+
logger.warning(f"No workflows defined for department {current_user.department_id}. Returning early.")
|
| 404 |
return jsonify({
|
| 405 |
'message': 'No workflows to classify against',
|
| 406 |
'activities': activities,
|
| 407 |
'classified_activities': [],
|
| 408 |
+
'extracted_text': extracted_text
|
| 409 |
}), 200
|
| 410 |
|
|
|
|
| 411 |
workflow_info = []
|
| 412 |
for workflow in workflows:
|
| 413 |
workflow_info.append({
|
|
|
|
| 415 |
"title": workflow.title,
|
| 416 |
"description": workflow.description
|
| 417 |
})
|
| 418 |
+
logger.info(f"Prepared workflow info for classification: {workflow_info}")
|
| 419 |
|
| 420 |
+
classified_activities_output = []
|
| 421 |
+
logger.info(f"Starting classification loop for {len(activities)} activities...")
|
| 422 |
|
| 423 |
+
for index, activity in enumerate(activities):
|
| 424 |
+
logger.info(f"Classifying activity {index + 1}/{len(activities)}: '{activity.get('activity', 'N/A')}'")
|
| 425 |
+
# classify_activity function now handles its own detailed logging
|
| 426 |
workflow_id = classify_activity(activity, workflow_info)
|
| 427 |
|
| 428 |
+
activity_result = {
|
| 429 |
+
"activity": activity,
|
| 430 |
+
"classified": False, # Default to false
|
| 431 |
+
"workflow_id": None,
|
| 432 |
+
"workflow_title": None
|
| 433 |
+
}
|
| 434 |
+
|
| 435 |
if workflow_id:
|
| 436 |
workflow = next((w for w in workflow_info if w["id"] == workflow_id), None)
|
| 437 |
+
if workflow:
|
| 438 |
+
activity_result["classified"] = True
|
| 439 |
+
activity_result["workflow_id"] = workflow_id
|
| 440 |
+
activity_result["workflow_title"] = workflow["title"]
|
| 441 |
+
logger.info(f"Activity {index + 1} classified as Workflow: {workflow['title']} ({workflow_id})")
|
| 442 |
+
else:
|
| 443 |
+
# This case should be rare now due to checks in classify_activity
|
| 444 |
+
logger.warning(f"Activity {index + 1} returned workflow ID {workflow_id} but no matching workflow found in info list.")
|
| 445 |
else:
|
| 446 |
+
logger.info(f"Activity {index + 1} classified as mundane.")
|
| 447 |
+
|
| 448 |
+
classified_activities_output.append(activity_result)
|
|
|
|
| 449 |
|
| 450 |
+
logger.info(f"Classification loop finished. Successfully processed {len(classified_activities_output)} activities.")
|
| 451 |
|
| 452 |
+
# If we reached here, the try block succeeded.
|
| 453 |
+
logger.info("Successfully processed classification request. Returning 200 OK.")
|
| 454 |
return jsonify({
|
| 455 |
'message': 'Log activities extracted and classified',
|
| 456 |
+
'activities': activities, # Original activities
|
| 457 |
+
'classified_activities': classified_activities_output, # Activities with classification results
|
| 458 |
'workflows': workflow_info,
|
| 459 |
+
'extracted_text': extracted_text
|
| 460 |
}), 200
|
| 461 |
|
| 462 |
except Exception as e:
|
| 463 |
+
# Log the exception *before* returning the 500 response
|
| 464 |
+
logger.error(f"!!! Unhandled exception in classify_log_activities: {str(e)}")
|
| 465 |
import traceback
|
| 466 |
+
logger.error(traceback.format_exc()) # Log the full traceback
|
| 467 |
+
# Return a generic error message, the detailed error is in the logs
|
| 468 |
+
return jsonify({'message': 'An internal server error occurred during log classification.'}), 500
|