dhruv575 commited on
Commit
d62475b
·
1 Parent(s): 87aa5b7

Debug issue

Browse files
Files changed (1) hide show
  1. controllers/log_controller.py +48 -45
controllers/log_controller.py CHANGED
@@ -355,7 +355,7 @@ def classify_log_activities(current_user):
355
  Extract and classify activities from a log file without creating incidents
356
  Returns classified activities for the frontend to handle
357
  """
358
- logger.info(f"classify_log_activities called. Files in request: {request.files.keys()}")
359
 
360
  if 'file' not in request.files:
361
  logger.error("No file part in the request")
@@ -372,50 +372,42 @@ def classify_log_activities(current_user):
372
  return jsonify({'message': 'Only PDF files are allowed'}), 400
373
 
374
  try:
375
- # Check if OpenAI API key is set
376
  api_key = os.environ.get('OPENAI_API_KEY')
377
  if not api_key:
378
  logger.error("OPENAI_API_KEY environment variable is not set")
379
  return jsonify({'message': 'OpenAI API key not configured'}), 500
380
 
381
- # Create OpenAI client - removed any proxies parameter
382
- client = openai.OpenAI(api_key=api_key)
383
-
384
- # Read file content
385
  file_content = file.read()
386
  logger.info(f"Read {len(file_content)} bytes from file {file.filename}")
387
 
388
- # Extract text from PDF using OCR
389
- logger.info(f"Starting OCR for uploaded log")
390
  extracted_text = pdf_to_text(file_content, is_bytes=True)
391
- logger.info(f"Extracted {len(extracted_text)} characters of text")
392
 
393
- # Extract activities using LLM
394
- logger.info(f"Extracting activities from log")
395
  activities_json = extract_activities(extracted_text)
 
396
 
397
- # Parse the activities JSON
398
  activities_data = json.loads(activities_json)
399
  activities = activities_data.get('activities', [])
400
- logger.info(f"Extracted {len(activities)} activities")
401
 
402
- # Classify each activity against workflows
403
- logger.info(f"Classifying activities")
404
-
405
- # Get all workflows for this department
406
  workflows = Workflow.find_by_department(current_user.department_id)
 
407
 
408
- # Skip if no workflows defined
409
  if not workflows:
410
- logger.warning(f"No workflows defined for department {current_user.department_id}")
411
  return jsonify({
412
  'message': 'No workflows to classify against',
413
  'activities': activities,
414
  'classified_activities': [],
415
- 'extracted_text': extracted_text # Include the extracted text in the response
416
  }), 200
417
 
418
- # Prepare workflow information for classification
419
  workflow_info = []
420
  for workflow in workflows:
421
  workflow_info.append({
@@ -423,43 +415,54 @@ def classify_log_activities(current_user):
423
  "title": workflow.title,
424
  "description": workflow.description
425
  })
 
426
 
427
- # Classify each activity
428
- classified_activities = []
429
 
430
- for activity in activities:
431
- # Classify activity against workflows
 
432
  workflow_id = classify_activity(activity, workflow_info)
433
 
434
- # Add classification result
 
 
 
 
 
 
435
  if workflow_id:
436
  workflow = next((w for w in workflow_info if w["id"] == workflow_id), None)
437
- workflow_title = workflow["title"] if workflow else "Unknown workflow"
438
-
439
- classified_activities.append({
440
- "activity": activity,
441
- "workflow_id": workflow_id,
442
- "workflow_title": workflow_title,
443
- "classified": True
444
- })
445
  else:
446
- classified_activities.append({
447
- "activity": activity,
448
- "classified": False
449
- })
450
 
451
- logger.info(f"Classification complete. {len(classified_activities)} activities classified.")
452
 
 
 
453
  return jsonify({
454
  'message': 'Log activities extracted and classified',
455
- 'activities': activities,
456
- 'classified_activities': classified_activities,
457
  'workflows': workflow_info,
458
- 'extracted_text': extracted_text # Include the extracted text in the response
459
  }), 200
460
 
461
  except Exception as e:
462
- logger.error(f"Error classifying log activities: {str(e)}")
 
463
  import traceback
464
- logger.error(traceback.format_exc())
465
- return jsonify({'message': f'Error classifying log activities: {str(e)}'}), 500
 
 
355
  Extract and classify activities from a log file without creating incidents
356
  Returns classified activities for the frontend to handle
357
  """
358
+ logger.info(f"Entering classify_log_activities for user {current_user.email}")
359
 
360
  if 'file' not in request.files:
361
  logger.error("No file part in the request")
 
372
  return jsonify({'message': 'Only PDF files are allowed'}), 400
373
 
374
  try:
375
+ logger.info("Checking for OpenAI API key...")
376
  api_key = os.environ.get('OPENAI_API_KEY')
377
  if not api_key:
378
  logger.error("OPENAI_API_KEY environment variable is not set")
379
  return jsonify({'message': 'OpenAI API key not configured'}), 500
380
 
381
+ logger.info("Reading file content...")
 
 
 
382
  file_content = file.read()
383
  logger.info(f"Read {len(file_content)} bytes from file {file.filename}")
384
 
385
+ logger.info(f"Starting OCR...")
 
386
  extracted_text = pdf_to_text(file_content, is_bytes=True)
387
+ logger.info(f"OCR finished. Extracted {len(extracted_text)} characters.")
388
 
389
+ logger.info(f"Extracting activities with LLM...")
 
390
  activities_json = extract_activities(extracted_text)
391
+ logger.info(f"Activity extraction finished. Received JSON string of length {len(activities_json)}.")
392
 
393
+ logger.info("Parsing activities JSON...")
394
  activities_data = json.loads(activities_json)
395
  activities = activities_data.get('activities', [])
396
+ logger.info(f"Parsed activities JSON. Found {len(activities)} activities.")
397
 
398
+ logger.info(f"Fetching workflows for department {current_user.department_id}...")
 
 
 
399
  workflows = Workflow.find_by_department(current_user.department_id)
400
+ logger.info(f"Fetched {len(workflows)} workflows.")
401
 
 
402
  if not workflows:
403
+ logger.warning(f"No workflows defined for department {current_user.department_id}. Returning early.")
404
  return jsonify({
405
  'message': 'No workflows to classify against',
406
  'activities': activities,
407
  'classified_activities': [],
408
+ 'extracted_text': extracted_text
409
  }), 200
410
 
 
411
  workflow_info = []
412
  for workflow in workflows:
413
  workflow_info.append({
 
415
  "title": workflow.title,
416
  "description": workflow.description
417
  })
418
+ logger.info(f"Prepared workflow info for classification: {workflow_info}")
419
 
420
+ classified_activities_output = []
421
+ logger.info(f"Starting classification loop for {len(activities)} activities...")
422
 
423
+ for index, activity in enumerate(activities):
424
+ logger.info(f"Classifying activity {index + 1}/{len(activities)}: '{activity.get('activity', 'N/A')}'")
425
+ # classify_activity function now handles its own detailed logging
426
  workflow_id = classify_activity(activity, workflow_info)
427
 
428
+ activity_result = {
429
+ "activity": activity,
430
+ "classified": False, # Default to false
431
+ "workflow_id": None,
432
+ "workflow_title": None
433
+ }
434
+
435
  if workflow_id:
436
  workflow = next((w for w in workflow_info if w["id"] == workflow_id), None)
437
+ if workflow:
438
+ activity_result["classified"] = True
439
+ activity_result["workflow_id"] = workflow_id
440
+ activity_result["workflow_title"] = workflow["title"]
441
+ logger.info(f"Activity {index + 1} classified as Workflow: {workflow['title']} ({workflow_id})")
442
+ else:
443
+ # This case should be rare now due to checks in classify_activity
444
+ logger.warning(f"Activity {index + 1} returned workflow ID {workflow_id} but no matching workflow found in info list.")
445
  else:
446
+ logger.info(f"Activity {index + 1} classified as mundane.")
447
+
448
+ classified_activities_output.append(activity_result)
 
449
 
450
+ logger.info(f"Classification loop finished. Successfully processed {len(classified_activities_output)} activities.")
451
 
452
+ # If we reached here, the try block succeeded.
453
+ logger.info("Successfully processed classification request. Returning 200 OK.")
454
  return jsonify({
455
  'message': 'Log activities extracted and classified',
456
+ 'activities': activities, # Original activities
457
+ 'classified_activities': classified_activities_output, # Activities with classification results
458
  'workflows': workflow_info,
459
+ 'extracted_text': extracted_text
460
  }), 200
461
 
462
  except Exception as e:
463
+ # Log the exception *before* returning the 500 response
464
+ logger.error(f"!!! Unhandled exception in classify_log_activities: {str(e)}")
465
  import traceback
466
+ logger.error(traceback.format_exc()) # Log the full traceback
467
+ # Return a generic error message, the detailed error is in the logs
468
+ return jsonify({'message': 'An internal server error occurred during log classification.'}), 500