iyadsultan commited on
Commit
aeba5f6
·
1 Parent(s): 4a7a70e

Refactor data directory management and session handling

Browse files

Update the DATA_DIR configuration to allow for a default temporary directory while ensuring the creation of a sessions directory. Simplify logging by removing unnecessary debug statements and enhance clarity in session management. Adjust document navigation logging for better readability and maintainability.

Files changed (2) hide show
  1. __pycache__/app.cpython-310.pyc +0 -0
  2. app.py +14 -51
__pycache__/app.cpython-310.pyc CHANGED
Binary files a/__pycache__/app.cpython-310.pyc and b/__pycache__/app.cpython-310.pyc differ
 
app.py CHANGED
@@ -13,20 +13,15 @@ import chardet
13
  import uuid
14
 
15
  # Define DATA_DIR first before using it
16
- # Force persistent data directory - never use /tmp on any platform
17
- DATA_DIR = os.path.join(os.getcwd(), 'data')
18
- print(f"DATA_DIR forced to persistent location: {DATA_DIR}")
19
-
20
- # Override any environment variable that might force /tmp usage
21
- if 'DATA_DIR' in os.environ:
22
- print(f"WARNING: Ignoring environment DATA_DIR={os.environ['DATA_DIR']} - using persistent location instead")
23
 
24
  # Then configure the app
25
  app = Flask(__name__)
26
  app.secret_key = os.environ.get('SECRET_KEY', 'your-secret-key-here') # Gets from env or uses default
27
 
28
- # Configure session - use simple in-memory sessions for HF Spaces compatibility
29
- # Filesystem sessions cause permission issues on HF Spaces
 
30
  app.config['SESSION_PERMANENT'] = False
31
 
32
  # Constants
@@ -128,12 +123,8 @@ def save_evaluation(data):
128
 
129
  # Log the data being saved
130
  log_error(f"Attempting to save evaluation for {data.get('document_title')} by {data.get('investigator_name')}")
131
- log_error(f"DATA_DIR: {DATA_DIR}")
132
- log_error(f"DATA_DIR exists: {os.path.exists(DATA_DIR)}")
133
- log_error(f"DATA_DIR is writable: {os.access(DATA_DIR, os.W_OK) if os.path.exists(DATA_DIR) else 'Directory does not exist'}")
134
 
135
  eval_path = os.path.join(DATA_DIR, 'evaluations.csv')
136
- log_error(f"Evaluation file path: {eval_path}")
137
 
138
  # Add timestamp to the data
139
  data['timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
@@ -427,7 +418,8 @@ def evaluate():
427
  # Make sure data directory exists
428
  ensure_data_directory()
429
 
430
- # Using in-memory sessions, no need to create sessions directory
 
431
 
432
  # Initialize current document index if not set (start fresh each session, 1-based)
433
  if 'current_document_index' not in session:
@@ -440,14 +432,10 @@ def evaluate():
440
  documents = load_documents()
441
  if jump_to >= 1 and jump_to <= len(documents):
442
  session['current_document_index'] = jump_to # Store as 1-based index
443
- log_error(f"JUMP: Setting document index to {jump_to} (document #{jump_to})")
444
  else:
445
  flash(f"Invalid document number. Please enter a number between 1 and {len(documents)}.")
446
  log_error(f"Invalid jump_to value: {jump_to}")
447
- else:
448
- # Log current position when not jumping
449
- current_idx = session.get('current_document_index', 1)
450
- log_error(f"CONTINUE: Current document index is {current_idx} (document #{current_idx})")
451
 
452
  # Generate session ID if not set
453
  if 'session_id' not in session:
@@ -514,11 +502,9 @@ def evaluate():
514
 
515
  # Handle "Skip" action
516
  if action == 'skip':
517
- old_index = session.get('current_document_index', 1)
518
- session['current_document_index'] = old_index + 1
519
- log_error(f"SKIP: Moving from document {old_index} to document {session['current_document_index']}")
520
  flash("Document skipped.")
521
- return redirect(url_for('evaluate')) # No jump_to parameter, continue from current position
522
 
523
  # Handle regular evaluation submission
524
  # Get all documents
@@ -565,14 +551,13 @@ def evaluate():
565
 
566
  # Move to next document
567
  session['current_document_index'] = current_index + 1
568
- log_error(f"SUBMIT: Moving from document {current_index} to document {session['current_document_index']}")
569
 
570
  # Check if all documents have been processed
571
  if session['current_document_index'] > len(all_documents):
572
  flash("All documents have been evaluated. Thank you!")
573
  return redirect(url_for('results'))
574
 
575
- # Redirect to next document (no jump_to parameter, continue from current position)
576
  return redirect(url_for('evaluate'))
577
  else:
578
  flash("Error saving evaluation. Please try again.")
@@ -661,14 +646,6 @@ def jump_to_document():
661
  def results():
662
  """Results page showing evaluations from current session only."""
663
  session_id = session.get('session_id', '')
664
- log_error(f"Results page accessed. Session ID: {session_id}")
665
- log_error(f"DATA_DIR in results: {DATA_DIR}")
666
- eval_path = os.path.join(DATA_DIR, 'evaluations.csv')
667
- log_error(f"Checking for evaluations file at: {eval_path}")
668
- log_error(f"File exists: {os.path.exists(eval_path)}")
669
- if os.path.exists(eval_path):
670
- log_error(f"File size: {os.path.getsize(eval_path)} bytes")
671
-
672
  eval_df, filename_to_desc, filename_to_mrn = get_session_results(session_id)
673
 
674
  # Check if this is coming from "stop and save" action
@@ -921,22 +898,12 @@ def error_page():
921
  error_details=error_details)
922
 
923
  def ensure_data_directory():
924
- """Ensure data directory exists with proper permissions"""
925
  try:
926
  os.makedirs(DATA_DIR, exist_ok=True)
927
- # Set permissions to be world-writable for HF Spaces compatibility
928
- try:
929
- os.chmod(DATA_DIR, 0o777)
930
- print(f"Created/verified data directory at {DATA_DIR} with write permissions")
931
- except Exception as perm_error:
932
- print(f"Created directory but couldn't set permissions: {perm_error}")
933
-
934
- log_error(f"Created/verified data directory at {DATA_DIR}")
935
- log_error(f"Directory writable: {os.access(DATA_DIR, os.W_OK)}")
936
  except Exception as e:
937
- error_msg = f"Error creating data directory: {str(e)}"
938
- print(error_msg)
939
- log_error(error_msg)
940
 
941
  def copy_template_if_needed():
942
  """Copy template file to documents.csv if it doesn't exist"""
@@ -978,11 +945,7 @@ def get_stored_evaluator_name():
978
  return None
979
 
980
  if __name__ == '__main__':
981
- print("\n===== Application Startup at", datetime.now().strftime('%Y-%m-%d %H:%M:%S'), "=====")
982
- print(f"IMPORTANT: Using DATA_DIR = {DATA_DIR}")
983
- print(f"Current working directory: {os.getcwd()}")
984
- print("Session storage: In-memory (HF Spaces compatible)")
985
- print("=====\n")
986
 
987
  # Create data directory
988
  ensure_data_directory()
 
13
  import uuid
14
 
15
  # Define DATA_DIR first before using it
16
+ DATA_DIR = os.environ.get('DATA_DIR', '/tmp/human_notes_evaluator')
 
 
 
 
 
 
17
 
18
  # Then configure the app
19
  app = Flask(__name__)
20
  app.secret_key = os.environ.get('SECRET_KEY', 'your-secret-key-here') # Gets from env or uses default
21
 
22
+ # Now configure session after DATA_DIR is defined
23
+ app.config['SESSION_TYPE'] = 'filesystem'
24
+ app.config['SESSION_FILE_DIR'] = os.path.join(DATA_DIR, 'sessions')
25
  app.config['SESSION_PERMANENT'] = False
26
 
27
  # Constants
 
123
 
124
  # Log the data being saved
125
  log_error(f"Attempting to save evaluation for {data.get('document_title')} by {data.get('investigator_name')}")
 
 
 
126
 
127
  eval_path = os.path.join(DATA_DIR, 'evaluations.csv')
 
128
 
129
  # Add timestamp to the data
130
  data['timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
 
418
  # Make sure data directory exists
419
  ensure_data_directory()
420
 
421
+ # Create sessions directory if it doesn't exist
422
+ os.makedirs(os.path.join(DATA_DIR, 'sessions'), exist_ok=True)
423
 
424
  # Initialize current document index if not set (start fresh each session, 1-based)
425
  if 'current_document_index' not in session:
 
432
  documents = load_documents()
433
  if jump_to >= 1 and jump_to <= len(documents):
434
  session['current_document_index'] = jump_to # Store as 1-based index
435
+ log_error(f"Jumping to document number: {jump_to} (index: {jump_to})")
436
  else:
437
  flash(f"Invalid document number. Please enter a number between 1 and {len(documents)}.")
438
  log_error(f"Invalid jump_to value: {jump_to}")
 
 
 
 
439
 
440
  # Generate session ID if not set
441
  if 'session_id' not in session:
 
502
 
503
  # Handle "Skip" action
504
  if action == 'skip':
505
+ session['current_document_index'] = session.get('current_document_index', 1) + 1
 
 
506
  flash("Document skipped.")
507
+ return redirect(url_for('evaluate'))
508
 
509
  # Handle regular evaluation submission
510
  # Get all documents
 
551
 
552
  # Move to next document
553
  session['current_document_index'] = current_index + 1
 
554
 
555
  # Check if all documents have been processed
556
  if session['current_document_index'] > len(all_documents):
557
  flash("All documents have been evaluated. Thank you!")
558
  return redirect(url_for('results'))
559
 
560
+ # Redirect to next document
561
  return redirect(url_for('evaluate'))
562
  else:
563
  flash("Error saving evaluation. Please try again.")
 
646
  def results():
647
  """Results page showing evaluations from current session only."""
648
  session_id = session.get('session_id', '')
 
 
 
 
 
 
 
 
649
  eval_df, filename_to_desc, filename_to_mrn = get_session_results(session_id)
650
 
651
  # Check if this is coming from "stop and save" action
 
898
  error_details=error_details)
899
 
900
  def ensure_data_directory():
901
+ """Ensure data directory exists"""
902
  try:
903
  os.makedirs(DATA_DIR, exist_ok=True)
904
+ print(f"Created/verified data directory at {DATA_DIR}")
 
 
 
 
 
 
 
 
905
  except Exception as e:
906
+ print(f"Error creating data directory: {str(e)}")
 
 
907
 
908
  def copy_template_if_needed():
909
  """Copy template file to documents.csv if it doesn't exist"""
 
945
  return None
946
 
947
  if __name__ == '__main__':
948
+ print("\n===== Application Startup at", datetime.now().strftime('%Y-%m-%d %H:%M:%S'), "=====\n")
 
 
 
 
949
 
950
  # Create data directory
951
  ensure_data_directory()