iyadsultan commited on
Commit
59cfd68
·
1 Parent(s): 8b48b6d

Refactor application structure and enhance functionality

Browse files

Update the application to improve compatibility with HF Spaces by configuring the data directory based on the environment. Introduce a new function to ensure the data directory exists and create template files if they are missing. Streamline the evaluation process by consolidating form submissions and enhancing user feedback. Update the Dockerfile to use Python 3.9 and adjust directory permissions for better security. Improve logging throughout the application for better traceability and debugging.

Files changed (3) hide show
  1. Dockerfile +16 -10
  2. app.py +372 -480
  3. templates/evaluate.html +11 -14
Dockerfile CHANGED
@@ -1,19 +1,25 @@
1
- FROM python:3.10-slim
2
 
3
  WORKDIR /app
4
 
5
- # Create writable directories
6
- RUN mkdir -p /tmp/human_notes_evaluator && \
7
- chmod -R 777 /tmp/human_notes_evaluator
8
-
9
  COPY requirements.txt .
10
  RUN pip install --no-cache-dir -r requirements.txt
11
 
12
- COPY . .
 
 
 
 
 
 
13
 
14
- # Make sure environment variables are set
15
- ENV DATA_DIR=/tmp/human_notes_evaluator
16
  ENV PYTHONUNBUFFERED=1
17
 
18
- # Start the application
19
- CMD ["python", "app.py"]
 
 
 
 
1
+ FROM python:3.9-slim
2
 
3
  WORKDIR /app
4
 
5
+ # Install dependencies
 
 
 
6
  COPY requirements.txt .
7
  RUN pip install --no-cache-dir -r requirements.txt
8
 
9
+ # Copy application files
10
+ COPY app.py .
11
+ COPY templates/ templates/
12
+ COPY static/ static/
13
+
14
+ # Create data directory with proper permissions
15
+ RUN mkdir -p /app/data && chmod 777 /app/data
16
 
17
+ # Set environment variables
18
+ ENV DATA_DIR=/app/data
19
  ENV PYTHONUNBUFFERED=1
20
 
21
+ # Expose port
22
+ EXPOSE 7860
23
+
24
+ # Run the application
25
+ CMD ["python", "app.py"]
app.py CHANGED
@@ -11,20 +11,26 @@ import shutil
11
  import traceback
12
  import chardet
13
 
14
- # Define DATA_DIR first before using it
15
- DATA_DIR = os.environ.get('DATA_DIR', '/tmp/human_notes_evaluator')
16
-
17
- # Then configure the app
 
 
 
 
 
18
  app = Flask(__name__)
19
- app.secret_key = os.environ.get('SECRET_KEY', 'your-secret-key-here') # Gets from env or uses default
20
 
21
  # Configure session
22
  app.config['SESSION_PERMANENT'] = False
 
23
 
24
  # Constants
25
  CRITERIA = [
26
  "Up-to-date",
27
- "Accurate",
28
  "Thorough",
29
  "Relevant",
30
  "Well-organized",
@@ -61,106 +67,132 @@ def log_error(error_msg):
61
  # Keep only the most recent 10 errors
62
  while len(ERROR_LOG) > 10:
63
  ERROR_LOG.pop(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
- def detect_encoding(file_path):
66
- """Detect the encoding of a file to handle different character encodings."""
67
- with open(file_path, 'rb') as f:
68
- result = chardet.detect(f.read())
69
- return result['encoding']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
  def load_documents():
72
- """Load all documents from CSV file (fresh start each session)."""
73
  try:
74
  file_path = os.path.join(DATA_DIR, 'documents.csv')
75
 
76
- # Check if file exists
77
  if not os.path.exists(file_path):
78
  log_error(f"Documents file not found at {file_path}")
79
  return []
80
 
81
- try:
82
- # Try to detect encoding
83
- encoding = detect_encoding(file_path)
84
- log_error(f"Detected encoding: {encoding}")
85
-
86
- # Try to read with pandas using the detected encoding
87
- df = pd.read_csv(file_path, encoding=encoding)
88
- log_error("Successfully parsed CSV with standard settings")
89
- except Exception as e:
90
- log_error(f"Error parsing CSV: {str(e)}")
91
- return []
92
 
93
  # Convert columns to string to ensure compatibility
94
  for col in df.columns:
95
- df[col] = df[col].astype(str)
96
 
97
- # Log some stats
98
  log_error(f"DataFrame columns: {list(df.columns)}")
99
  log_error(f"DataFrame shape: {df.shape}")
100
 
101
- if not df.empty:
102
- log_error(f"First row: {df.iloc[0].to_dict()}")
103
-
104
- # Convert to list of dictionaries (return all documents)
105
  documents = df.to_dict('records')
106
- log_error(f"Returning {len(documents)} documents for evaluation")
107
  return documents
108
 
109
  except Exception as e:
110
  log_error(f"Error in load_documents: {str(e)}")
111
  return []
112
 
113
-
114
-
115
  def save_evaluation(data):
116
  """Save evaluation data to CSV file."""
117
  try:
118
- # Ensure the data directory exists
119
  ensure_data_directory()
120
 
121
- # Log the data being saved
122
- log_error(f"Attempting to save evaluation for {data.get('document_title')} by {data.get('investigator_name')}")
123
 
124
  eval_path = os.path.join(DATA_DIR, 'evaluations.csv')
125
 
126
- # Add timestamp to the data
127
  data['timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
128
 
129
- # Check if file exists and if it's corrupted
130
  file_exists = os.path.exists(eval_path)
131
- file_corrupted = False
132
-
133
- if file_exists:
134
- # Try to read the existing file to check for corruption
135
- try:
136
- pd.read_csv(eval_path)
137
- log_error("Existing CSV file is valid")
138
- except pd.errors.ParserError as e:
139
- log_error(f"Existing CSV file is corrupted: {str(e)}")
140
- file_corrupted = True
141
- # Create backup of corrupted file
142
- backup_path = eval_path + f'.corrupted.{datetime.now().strftime("%Y%m%d_%H%M%S")}'
143
- try:
144
- shutil.copy(eval_path, backup_path)
145
- log_error(f"Backed up corrupted file to {backup_path}")
146
- except Exception as backup_error:
147
- log_error(f"Could not backup corrupted file: {str(backup_error)}")
148
-
149
- # Remove corrupted file
150
- os.remove(eval_path)
151
- file_exists = False
152
- log_error("Removed corrupted CSV file")
153
 
154
- # Write the data
 
 
 
 
 
 
 
 
 
155
  with open(eval_path, 'a', newline='', encoding='utf-8') as f:
156
- writer = csv.DictWriter(f, fieldnames=data.keys())
157
 
158
- # Write header if it's a new file or was corrupted
159
- if not file_exists or file_corrupted:
160
  writer.writeheader()
161
- log_error("Wrote CSV header")
162
 
163
- # Write the data row
164
  writer.writerow(data)
165
 
166
  log_error(f"Successfully saved evaluation to {eval_path}")
@@ -168,149 +200,95 @@ def save_evaluation(data):
168
 
169
  except Exception as e:
170
  log_error(f"Error saving evaluation: {str(e)}")
171
- # Attempt to save to a backup location as fallback
172
- try:
173
- backup_path = os.path.join('/tmp', 'evaluation_backup.csv')
174
- log_error(f"Trying backup location: {backup_path}")
175
-
176
- with open(backup_path, 'a', newline='', encoding='utf-8') as f:
177
- writer = csv.DictWriter(f, fieldnames=data.keys())
178
- if not os.path.exists(backup_path) or os.path.getsize(backup_path) == 0:
179
- writer.writeheader()
180
- writer.writerow(data)
181
-
182
- log_error(f"Saved to backup location: {backup_path}")
183
- return True
184
- except Exception as backup_error:
185
- log_error(f"Backup save also failed: {str(backup_error)}")
186
- return False
187
 
188
  def get_results():
189
- """Get evaluation results for display."""
190
  try:
191
- # Load evaluations with error handling
192
- try:
193
- eval_df = pd.read_csv(os.path.join(DATA_DIR, 'evaluations.csv'))
194
- except pd.errors.ParserError as e:
195
- log_error(f"CSV parsing error in get_results: {str(e)}")
196
- return pd.DataFrame(), {}, {}
197
-
198
- # Load all documents to get descriptions and MRN
199
- try:
200
- docs_df = pd.read_csv(os.path.join(DATA_DIR, 'documents.csv'))
201
- # Create a mapping of filename to description and MRN
202
- filename_to_desc = dict(zip(docs_df['filename'], docs_df['description']))
203
- filename_to_mrn = dict(zip(docs_df['filename'], docs_df['mrn']))
204
- except FileNotFoundError:
205
- filename_to_desc = {}
206
- filename_to_mrn = {}
207
 
208
- return eval_df, filename_to_desc, filename_to_mrn
209
- except FileNotFoundError:
210
- return pd.DataFrame(), {}, {}
211
-
212
- def get_session_results(session_id):
213
- """Get evaluation results for current session only."""
214
- try:
215
- # Load evaluations with error handling for column mismatches
216
- try:
217
- eval_df = pd.read_csv(os.path.join(DATA_DIR, 'evaluations.csv'))
218
- except pd.errors.ParserError as e:
219
- log_error(f"CSV parsing error: {str(e)}")
220
- # Try to read with different options or recreate the file
221
- eval_path = os.path.join(DATA_DIR, 'evaluations.csv')
222
- log_error(f"Attempting to backup and recreate corrupted evaluations file")
223
- # Backup the corrupted file
224
- if os.path.exists(eval_path):
225
- backup_path = eval_path + '.backup'
226
- shutil.copy(eval_path, backup_path)
227
- log_error(f"Backed up corrupted file to {backup_path}")
228
- # Return empty DataFrame
229
  return pd.DataFrame(), {}, {}
230
 
231
- # Filter by session_id if column exists
232
- if 'session_id' in eval_df.columns and session_id:
233
- eval_df = eval_df[eval_df['session_id'] == session_id]
234
- else:
235
- # If no session_id column exists or no session_id provided, return empty for fresh sessions
236
- log_error(f"No session_id column found or no session_id provided. Returning empty results for fresh session.")
237
- eval_df = pd.DataFrame()
238
 
239
- # Load all documents to get descriptions and MRN
240
  try:
241
  docs_df = pd.read_csv(os.path.join(DATA_DIR, 'documents.csv'))
242
- # Create a mapping of filename to description and MRN
243
  filename_to_desc = dict(zip(docs_df['filename'], docs_df['description']))
244
  filename_to_mrn = dict(zip(docs_df['filename'], docs_df['mrn']))
245
- except FileNotFoundError:
246
  filename_to_desc = {}
247
  filename_to_mrn = {}
248
 
249
  return eval_df, filename_to_desc, filename_to_mrn
250
- except FileNotFoundError:
 
 
251
  return pd.DataFrame(), {}, {}
252
 
253
- def get_total_document_count():
254
- """Get the total number of documents."""
 
 
 
 
 
 
255
  try:
256
- df = pd.read_csv(os.path.join(DATA_DIR, 'documents.csv'))
257
- return len(df)
258
- except Exception:
259
- return 0
 
 
 
260
 
261
- def get_evaluated_document_count():
262
- """Get the count of evaluated documents."""
263
  try:
264
- eval_df = pd.read_csv(os.path.join(DATA_DIR, 'evaluations.csv'))
265
- return len(eval_df['document_title'].unique())
266
- except FileNotFoundError:
267
- return 0
268
- except Exception:
269
- return 0
 
 
270
 
271
- def load_and_validate_csv(file_path):
272
- """Load and validate a CSV file to ensure it has the required format"""
273
  try:
274
- # Try to detect encoding
275
- encoding = detect_encoding(file_path)
276
- log_error(f"Detected encoding: {encoding}")
277
-
278
- # Try to read with pandas using the detected encoding
279
- df = pd.read_csv(file_path, encoding=encoding)
280
- log_error("Successfully parsed CSV with standard settings")
281
-
282
- # Check for required columns
283
- required_columns = ['filename', 'description', 'mrn', 'note']
284
- missing_columns = [col for col in required_columns if col not in df.columns]
285
-
286
- if missing_columns:
287
- log_error(f"Missing required columns: {missing_columns}")
288
- raise ValueError(f"Missing required columns: {missing_columns}")
289
-
290
- # Log success information
291
- log_error(f"DataFrame columns: {list(df.columns)}")
292
- log_error(f"DataFrame shape: {df.shape}")
293
-
294
- if not df.empty:
295
- log_error(f"First row: {df.iloc[0].to_dict()}")
296
-
297
- return df
298
-
299
  except Exception as e:
300
- log_error(f"Error validating CSV file: {str(e)}")
301
- raise
 
 
 
 
 
 
 
 
 
 
 
 
302
 
303
  @app.route('/', methods=['GET', 'POST'])
304
  def index():
305
- # Clear any existing session data for fresh start
306
- session.clear()
307
-
308
  if request.method == 'POST':
309
- # Ensure data directory exists
310
  ensure_data_directory()
311
 
312
  # Get evaluator name
313
- evaluator_name = request.form.get('evaluator_name', '')
314
  if not evaluator_name:
315
  flash("Please enter your name as the evaluator.")
316
  return render_template('index.html')
@@ -323,172 +301,179 @@ def index():
323
  flash("No file selected.")
324
  return render_template('index.html')
325
 
326
- if file and '.' in file.filename and file.filename.rsplit('.', 1)[1].lower() == 'csv':
327
  try:
328
- # Read file directly from memory instead of saving to disk first
329
  file_content = file.read()
330
 
331
- # Parse CSV from memory
332
- try:
333
- # Try to detect encoding
334
- encoding = chardet.detect(file_content)['encoding']
335
- log_error(f"Detected encoding: {encoding}")
336
-
337
- # Load CSV from in-memory content
338
- csv_buffer = io.StringIO(file_content.decode(encoding))
339
- df = pd.read_csv(csv_buffer)
340
-
341
- # Validate the dataframe
342
- required_columns = ['filename', 'description', 'mrn', 'note']
343
- missing_columns = [col for col in required_columns if col not in df.columns]
344
-
345
- if missing_columns:
346
- raise ValueError(f"Missing required columns: {missing_columns}")
347
-
348
- # Write to documents.csv only after validation succeeded
349
- documents_path = os.path.join(DATA_DIR, 'documents.csv')
350
- df.to_csv(documents_path, index=False)
351
-
352
- # Set session cookie
353
- session['evaluator_name'] = evaluator_name
354
-
355
- flash("File uploaded successfully!")
356
-
357
- # After uploading documents.csv successfully, copy template_documents if it doesn't exist
358
- documents_path = os.path.join(DATA_DIR, 'documents.csv')
359
-
360
- # If user uploaded file, ensure we save the evaluator name persistently
361
- if evaluator_name:
362
- store_evaluator_name(evaluator_name)
363
-
364
- # Also create a direct access link they can bookmark
365
- direct_link = url_for('evaluate', evaluator=evaluator_name, _external=True)
366
- flash(f"Bookmark this link for direct access to your evaluation: {direct_link}")
367
-
368
- return redirect(url_for('evaluate'))
369
-
370
- except Exception as inner_e:
371
- log_error(f"Error parsing CSV data: {str(inner_e)}")
372
- raise ValueError(f"Error parsing CSV data: {str(inner_e)}")
373
-
374
  except Exception as e:
375
- log_error(f"Error during file upload: {str(e)}")
376
- flash(f"Error during file upload: {str(e)}. Please try again.")
377
  else:
378
  flash("Please upload a CSV file.")
379
 
380
  return render_template('index.html')
381
 
382
- # Handle GET request
383
- return render_template('index.html')
 
384
 
385
  @app.route('/evaluate', methods=['GET', 'POST'])
386
  def evaluate():
387
- """Display a document for evaluation or process evaluation form."""
388
- log_error(f"Starting /evaluate route, session: {session}")
389
-
390
- # Try multiple methods to get evaluator name
391
- evaluator_name = session.get('evaluator_name', '')
392
-
393
- # If not in session, check query parameter
394
- if not evaluator_name and request.args.get('evaluator'):
395
- evaluator_name = request.args.get('evaluator')
396
- session['evaluator_name'] = evaluator_name
397
- store_evaluator_name(evaluator_name)
398
- log_error(f"Got evaluator name from query param: {evaluator_name}")
399
-
400
- # If still not found, try file-based storage
401
- if not evaluator_name:
402
- evaluator_name = get_stored_evaluator_name()
403
- if evaluator_name:
404
- session['evaluator_name'] = evaluator_name
405
- log_error(f"Got evaluator name from file: {evaluator_name}")
406
-
407
- log_error(f"Final evaluator name: {evaluator_name}")
408
 
409
- # Still no evaluator name, redirect to index with a message
410
  if not evaluator_name:
411
  flash("Please enter your name before evaluating documents.")
412
- log_error("No evaluator name found, redirecting to index")
413
  return redirect(url_for('index'))
414
 
415
- # Make sure data directory exists
 
 
 
 
 
416
  ensure_data_directory()
417
 
418
- # Create sessions directory if it doesn't exist
419
- os.makedirs(os.path.join(DATA_DIR, 'sessions'), exist_ok=True)
 
 
420
 
421
- # Initialize current document index from file
422
  current_index = load_current_index(evaluator_name)
423
 
424
  # Handle jump requests
425
  jump_to = request.args.get('jump_to', type=int)
426
- if jump_to is not None:
427
- documents = load_documents()
428
- if 1 <= jump_to <= len(documents):
429
- current_index = jump_to
430
- save_current_index(evaluator_name, current_index)
431
 
432
- # Handle form submissions
433
  if request.method == 'POST':
434
  action = request.form.get('action', 'submit')
435
 
436
  if action == 'skip':
437
- current_index += 1
438
  save_current_index(evaluator_name, current_index)
439
  flash("Document skipped.")
440
  return redirect(url_for('evaluate'))
441
 
442
  elif action == 'stop_save':
443
- # Handle save logic
444
- return redirect(url_for('results'))
445
 
446
  elif action == 'submit':
447
- # Save evaluation logic
448
- current_index += 1
449
- save_current_index(evaluator_name, current_index)
450
- flash("Evaluation saved successfully!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
451
 
452
- # Load current document
453
- documents = load_documents()
454
  if current_index > len(documents):
455
- flash("All documents have been evaluated.")
456
  return redirect(url_for('results'))
457
 
 
458
  document = documents[current_index - 1]
459
 
460
- # Render template with current_index
461
- return render_template('evaluate.html',
462
- current_note_number=current_index,
463
- evaluator_name=evaluator_name,
464
- note=document.get('note', ''),
465
- description=document.get('description', ''),
466
- mrn=document.get('mrn', ''),
467
- criteria=CRITERIA,
468
- descriptions=CRITERIA_DESCRIPTIONS,
469
- score_range=range(1, 6),
470
- note_origins=NOTE_ORIGINS,
471
- total_docs=len(documents),
472
- evaluated_docs=current_index - 1,
473
- progress=int((current_index - 1) / len(documents) * 100) if len(documents) > 0 else 0)
 
 
 
 
474
 
475
  @app.route('/jump', methods=['POST'])
476
  def jump_to_document():
477
  """Jump to a specific document number."""
478
  try:
479
  document_number = int(request.form.get('document_number', 1))
480
-
481
- # Validate document number range
482
  documents = load_documents()
483
- total_docs = len(documents)
484
 
485
  if document_number < 1:
486
  flash("Document number must be 1 or greater.")
487
- elif document_number > total_docs:
488
- flash(f"Document number cannot be greater than {total_docs}.")
489
  else:
490
- # Redirect to evaluate with jump_to parameter
491
- log_error(f"Jump requested to document {document_number}")
492
  return redirect(url_for('evaluate', jump_to=document_number))
493
  except ValueError:
494
  flash("Please enter a valid document number.")
@@ -498,29 +483,41 @@ def jump_to_document():
498
  @app.route('/results')
499
  def results():
500
  """Results page showing all evaluations."""
501
- eval_df, filename_to_desc, filename_to_mrn = get_results()
502
-
503
- # Always show results page if coming from stop_save
504
- if request.referrer and url_for('evaluate') in request.referrer:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
505
  return render_template('results.html',
506
- evaluations=eval_df.to_dict('records'),
507
- criteria=CRITERIA,
508
- descriptions=CRITERIA_DESCRIPTIONS,
509
- show_empty_message=True)
510
-
511
- # Only redirect if completely empty
512
- if eval_df.empty:
513
- flash('No evaluations available.')
514
- return redirect(url_for('index'))
515
 
516
- return render_template('results.html',
517
- evaluations=eval_df.to_dict('records'),
518
- criteria=CRITERIA,
519
- descriptions=CRITERIA_DESCRIPTIONS)
520
 
521
  @app.route('/export-csv')
522
  def export_csv():
523
- """Export all evaluations to CSV file."""
524
  try:
525
  eval_df, _, _ = get_results()
526
 
@@ -528,12 +525,12 @@ def export_csv():
528
  flash('No evaluations available to export.')
529
  return redirect(url_for('results'))
530
 
531
- # Create in-memory CSV
532
  output = io.StringIO()
533
  eval_df.to_csv(output, index=False, quoting=csv.QUOTE_ALL)
534
  output.seek(0)
535
 
536
- # Convert to BytesIO for send_file
537
  mem = io.BytesIO()
538
  mem.write(output.getvalue().encode('utf-8'))
539
  mem.seek(0)
@@ -545,14 +542,13 @@ def export_csv():
545
  download_name=f'evaluations_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv'
546
  )
547
  except Exception as e:
548
- error_msg = f'Error exporting CSV: {str(e)}'
549
- log_error(error_msg)
550
- flash(error_msg)
551
  return redirect(url_for('results'))
552
 
553
  @app.route('/upload-documents', methods=['GET', 'POST'])
554
  def upload_documents():
555
- """Alternative method to upload documents.csv via web interface."""
556
  if request.method == 'POST':
557
  try:
558
  if 'file' not in request.files:
@@ -565,98 +561,54 @@ def upload_documents():
565
  return redirect(request.url)
566
 
567
  if file and file.filename.endswith('.csv'):
568
- # Save the file
569
- file_path = os.path.join(DATA_DIR, 'documents.csv')
570
-
571
- # Ensure directory exists with proper permissions
572
  ensure_data_directory()
573
 
574
- # Try to save the file
 
575
  file.save(file_path)
576
 
577
- # Verify the file was saved and is readable
578
- if os.path.exists(file_path):
579
- try:
580
- # Test if we can read it
581
- df = pd.read_csv(file_path)
582
- flash(f'Documents file uploaded successfully! Found {len(df)} documents.')
583
- log_error(f"Documents file uploaded via web interface: {len(df)} documents")
584
- return redirect(url_for('index'))
585
- except Exception as read_error:
586
- flash(f'File uploaded but could not be parsed: {str(read_error)}')
587
- log_error(f"Error parsing uploaded file: {str(read_error)}")
588
- else:
589
- flash('File upload failed - file not found after save')
590
  else:
591
  flash('Please upload a CSV file')
592
 
593
  except Exception as e:
594
- error_msg = f'Error uploading file: {str(e)}'
595
- flash(error_msg)
596
- log_error(error_msg)
597
 
598
  return render_template('upload_documents.html')
599
 
600
- @app.route('/hf-debug')
601
- def hf_debug():
602
- """Special debug route for HF Spaces troubleshooting."""
603
- import platform
604
- debug_info = {
605
- 'platform': platform.system(),
606
- 'python_version': platform.python_version(),
607
- 'current_working_directory': os.getcwd(),
608
- 'data_dir_setting': DATA_DIR,
609
- 'data_dir_exists': os.path.exists(DATA_DIR),
610
- 'data_dir_writable': os.access(DATA_DIR, os.W_OK) if os.path.exists(DATA_DIR) else 'Directory does not exist',
611
- 'data_dir_permissions': oct(os.stat(DATA_DIR).st_mode)[-3:] if os.path.exists(DATA_DIR) else 'N/A',
612
- 'relevant_env_vars': {k: v for k, v in os.environ.items() if k in ['DATA_DIR', 'SPACE_ID', 'SPACE_AUTHOR_NAME', 'SPACE_REPO_NAME']},
613
- 'directory_contents': os.listdir('.') if os.path.exists('.') else [],
614
- 'data_directory_contents': os.listdir(DATA_DIR) if os.path.exists(DATA_DIR) else 'Directory does not exist'
615
- }
616
-
617
- return f"<html><body><h1>HF Debug Info</h1><pre>{str(debug_info)}</pre></body></html>"
618
-
619
  @app.route('/debug')
620
  def debug():
621
  """Debug page showing application state."""
622
- # Check for data directory
623
- if not os.path.exists(DATA_DIR):
624
- try:
625
- ensure_data_directory()
626
- except Exception as e:
627
- log_error(f"Failed to create data directory in debug route: {str(e)}")
628
 
629
- # Get documents
630
  documents = load_documents()
 
 
631
 
632
- # Get evaluations - properly handle DataFrame
633
- eval_df, _, _ = get_results() # Show all evaluations in debug, not just session
634
- # Convert DataFrame to list of dictionaries if not empty
635
- evaluations = [] if eval_df is None or eval_df.empty else eval_df.to_dict('records')
636
-
637
- # Get file paths
638
- docs_path = os.path.join(DATA_DIR, 'documents.csv')
639
- evals_path = os.path.join(DATA_DIR, 'evaluations.csv')
640
-
641
- # Additional debug info
642
  debug_info = {
643
  'data_dir': DATA_DIR,
644
  'data_dir_exists': os.path.exists(DATA_DIR),
645
  'data_dir_writable': os.access(DATA_DIR, os.W_OK) if os.path.exists(DATA_DIR) else False,
646
  'current_working_dir': os.getcwd(),
647
  'session_id': session.get('session_id', 'None'),
648
- 'current_document_index': session.get('current_document_index', 'None'),
649
  'evaluator_name': session.get('evaluator_name', 'None'),
650
  'documents_count': len(documents),
651
  'evaluations_count': len(evaluations),
652
- 'eval_file_size': os.path.getsize(evals_path) if os.path.exists(evals_path) else 0
653
  }
654
 
655
  return render_template('debug.html',
656
  documents=documents,
657
  evaluations=evaluations,
658
- documents_exists=os.path.exists(docs_path),
659
- evaluations_exists=os.path.exists(evals_path),
660
  errors=ERROR_LOG,
661
  debug_info=debug_info
662
  )
@@ -665,44 +617,47 @@ def debug():
665
  def view_instructions():
666
  """Display instructions page."""
667
  return render_template('instructions.html',
668
- criteria=CRITERIA,
669
- descriptions=CRITERIA_DESCRIPTIONS)
 
670
 
671
  @app.route('/download/instructions')
672
  def download_instructions():
673
- """Download instructions as markdown file."""
674
  try:
675
  instructions_path = os.path.join(DATA_DIR, 'instructions.md')
676
  return send_file(instructions_path,
677
- mimetype='text/markdown',
678
- download_name='instructions.md',
679
- as_attachment=True)
 
680
  except FileNotFoundError:
681
  flash('Instructions file not found.')
682
  return redirect(url_for('index'))
683
 
684
  @app.route('/download/template')
685
  def download_template():
686
- """Download sample template CSV file."""
687
  try:
688
  template_path = os.path.join(DATA_DIR, 'sample_documents_template.csv')
689
  return send_file(template_path,
690
- mimetype='text/csv',
691
- download_name='sample_documents_template.csv',
692
- as_attachment=True)
 
693
  except FileNotFoundError:
694
  flash('Template file not found.')
695
  return redirect(url_for('index'))
696
 
697
  @app.route('/reset', methods=['POST'])
698
  def reset():
699
- """Reset the session and return to the landing page."""
700
  session.clear()
701
- # Remove evaluations.csv if it exists
 
702
  evaluations_path = os.path.join(DATA_DIR, 'evaluations.csv')
703
  if os.path.exists(evaluations_path):
704
- # Create backup before removing
705
- backup_path = evaluations_path + f'.backup.{datetime.now().strftime("%Y%m%d_%H%M%S")}'
706
  try:
707
  shutil.copy(evaluations_path, backup_path)
708
  log_error(f"Created backup at {backup_path}")
@@ -710,26 +665,25 @@ def reset():
710
  log_error(f"Could not create backup: {str(e)}")
711
 
712
  os.remove(evaluations_path)
713
- log_error("Removed evaluations.csv file")
 
714
  flash('Session reset. All evaluation data cleared.')
715
  return redirect(url_for('index'))
716
 
717
  @app.route('/clear-corrupted-data', methods=['POST'])
718
  def clear_corrupted_data():
719
- """Clear corrupted evaluations file to start fresh."""
720
  evaluations_path = os.path.join(DATA_DIR, 'evaluations.csv')
721
  if os.path.exists(evaluations_path):
722
- # Create backup before removing
723
- backup_path = evaluations_path + f'.corrupted_backup.{datetime.now().strftime("%Y%m%d_%H%M%S")}'
724
  try:
725
  shutil.copy(evaluations_path, backup_path)
726
- log_error(f"Created corrupted file backup at {backup_path}")
727
  except Exception as e:
728
- log_error(f"Could not create backup: {str(e)}")
729
 
730
  os.remove(evaluations_path)
731
- log_error("Removed corrupted evaluations.csv file")
732
- flash('Corrupted evaluation data cleared. You can now save evaluations properly.')
733
  else:
734
  flash('No evaluation data file found.')
735
 
@@ -741,91 +695,29 @@ def error_page():
741
  error_message = request.args.get('message', 'An unknown error occurred')
742
  error_details = request.args.get('details', '')
743
  return render_template('error.html',
744
- error_message=error_message,
745
- error_details=error_details)
746
-
747
- def ensure_data_directory():
748
- """Ensure data directory exists"""
749
- try:
750
- os.makedirs(DATA_DIR, exist_ok=True)
751
- print(f"Created/verified data directory at {DATA_DIR}")
752
- except Exception as e:
753
- print(f"Error creating data directory: {str(e)}")
754
-
755
- def copy_template_if_needed():
756
- """Copy template file to documents.csv if it doesn't exist"""
757
- documents_path = os.path.join(DATA_DIR, 'documents.csv')
758
- if not os.path.exists(documents_path):
759
- try:
760
- # Copy from template
761
- template_path = 'sample_documents_template.csv'
762
- if os.path.exists(template_path):
763
- shutil.copy(template_path, documents_path)
764
- print(f"Copied template to {documents_path}")
765
- except Exception as e:
766
- print(f"Error copying template: {str(e)}")
767
-
768
- def store_evaluator_name(name):
769
- """Store evaluator name in a file for persistence"""
770
- try:
771
- ensure_data_directory()
772
- with open(os.path.join(DATA_DIR, 'current_evaluator.txt'), 'w') as f:
773
- f.write(name)
774
- log_error(f"Stored evaluator name in file: {name}")
775
- return True
776
- except Exception as e:
777
- log_error(f"Error storing evaluator name: {str(e)}")
778
- return False
779
-
780
- def get_stored_evaluator_name():
781
- """Get stored evaluator name from file"""
782
- try:
783
- file_path = os.path.join(DATA_DIR, 'current_evaluator.txt')
784
- if os.path.exists(file_path):
785
- with open(file_path, 'r') as f:
786
- name = f.read().strip()
787
- log_error(f"Retrieved evaluator name from file: {name}")
788
- return name
789
- return None
790
- except Exception as e:
791
- log_error(f"Error retrieving evaluator name: {str(e)}")
792
- return None
793
-
794
- # Add these new functions for progress tracking
795
- def get_progress_file(evaluator_name):
796
- """Get path to progress file for an evaluator"""
797
- return os.path.join(DATA_DIR, f'{evaluator_name}_progress.txt')
798
-
799
- def save_current_index(evaluator_name, index):
800
- """Save current document index to file"""
801
- try:
802
- with open(get_progress_file(evaluator_name), 'w') as f:
803
- f.write(str(index))
804
- return True
805
- except Exception as e:
806
- log_error(f"Error saving progress: {str(e)}")
807
- return False
808
-
809
- def load_current_index(evaluator_name):
810
- """Load current document index from file"""
811
- try:
812
- if os.path.exists(get_progress_file(evaluator_name)):
813
- with open(get_progress_file(evaluator_name), 'r') as f:
814
- return int(f.read().strip())
815
- return 1 # Start at 1 if no progress file
816
- except Exception as e:
817
- log_error(f"Error loading progress: {str(e)}")
818
- return 1
819
 
820
  if __name__ == '__main__':
821
- print("\n===== Application Startup at", datetime.now().strftime('%Y-%m-%d %H:%M:%S'), "=====\n")
 
 
822
 
823
- # Create data directory
824
  ensure_data_directory()
825
 
826
- # Copy template files if needed
827
- copy_template_if_needed()
 
 
 
 
 
 
 
 
 
828
 
829
  # Run the app
830
- app.config['DEBUG'] = True
831
- app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860)))
 
11
  import traceback
12
  import chardet
13
 
14
+ # Configure data directory for HF Spaces compatibility
15
+ if 'SPACE_ID' in os.environ:
16
+ # Running on HF Spaces - use persistent directory
17
+ DATA_DIR = os.path.join(os.getcwd(), 'data')
18
+ else:
19
+ # Local development
20
+ DATA_DIR = os.environ.get('DATA_DIR', '/tmp/human_notes_evaluator')
21
+
22
+ # Configure the Flask app
23
  app = Flask(__name__)
24
+ app.secret_key = os.environ.get('SECRET_KEY', 'your-secret-key-here')
25
 
26
  # Configure session
27
  app.config['SESSION_PERMANENT'] = False
28
+ app.config['SESSION_TYPE'] = 'filesystem'
29
 
30
  # Constants
31
  CRITERIA = [
32
  "Up-to-date",
33
+ "Accurate",
34
  "Thorough",
35
  "Relevant",
36
  "Well-organized",
 
67
  # Keep only the most recent 10 errors
68
  while len(ERROR_LOG) > 10:
69
  ERROR_LOG.pop(0)
70
+ print(f"[LOG] {error_msg}") # Also print to console
71
+
72
+ def ensure_data_directory():
73
+ """Ensure data directory exists in a persistent location"""
74
+ global DATA_DIR
75
+
76
+ # For HF Spaces, use the current working directory which persists
77
+ if 'SPACE_ID' in os.environ:
78
+ DATA_DIR = os.path.join(os.getcwd(), 'data')
79
+ log_error(f"Running on HF Spaces, using data directory: {DATA_DIR}")
80
+
81
+ try:
82
+ os.makedirs(DATA_DIR, exist_ok=True)
83
+ os.makedirs(os.path.join(DATA_DIR, 'sessions'), exist_ok=True)
84
+ log_error(f"Created/verified data directory at {DATA_DIR}")
85
+
86
+ # Create template files if they don't exist
87
+ create_template_files()
88
+
89
+ except Exception as e:
90
+ log_error(f"Error creating data directory: {str(e)}")
91
+ raise
92
 
93
+ def create_template_files():
94
+ """Create template CSV and instructions files if they don't exist"""
95
+ # Create sample documents template
96
+ template_path = os.path.join(DATA_DIR, 'sample_documents_template.csv')
97
+ if not os.path.exists(template_path):
98
+ template_data = [
99
+ ['filename', 'description', 'mrn', 'note'],
100
+ ['sample1.txt', 'Example Clinical Note', 'MRN12345', 'This is a sample clinical note for evaluation. Patient presents with...'],
101
+ ['sample2.txt', 'Example Progress Note', 'MRN67890', 'Patient returns for follow-up visit. Current medications include...']
102
+ ]
103
+ with open(template_path, 'w', newline='', encoding='utf-8') as f:
104
+ writer = csv.writer(f)
105
+ writer.writerows(template_data)
106
+ log_error(f"Created template file at {template_path}")
107
+
108
+ # Create instructions.md if it doesn't exist
109
+ instructions_path = os.path.join(DATA_DIR, 'instructions.md')
110
+ if not os.path.exists(instructions_path):
111
+ with open(instructions_path, 'w', encoding='utf-8') as f:
112
+ f.write("# Instructions for Human Notes Evaluator\n\n")
113
+ f.write("## How to Use This Application\n\n")
114
+ f.write("1. Upload a CSV file with your documents\n")
115
+ f.write("2. Enter your name as the evaluator\n")
116
+ f.write("3. Rate each document on the 9 criteria\n")
117
+ f.write("4. Export results when complete\n")
118
+ log_error(f"Created instructions at {instructions_path}")
119
+
120
+ def detect_encoding(file_content):
121
+ """Detect the encoding of file content."""
122
+ if isinstance(file_content, str):
123
+ file_content = file_content.encode()
124
+ result = chardet.detect(file_content)
125
+ return result['encoding'] or 'utf-8'
126
 
127
  def load_documents():
128
+ """Load all documents from CSV file."""
129
  try:
130
  file_path = os.path.join(DATA_DIR, 'documents.csv')
131
 
 
132
  if not os.path.exists(file_path):
133
  log_error(f"Documents file not found at {file_path}")
134
  return []
135
 
136
+ # Read file and detect encoding
137
+ with open(file_path, 'rb') as f:
138
+ content = f.read()
139
+
140
+ encoding = detect_encoding(content)
141
+ log_error(f"Detected encoding: {encoding}")
142
+
143
+ # Parse CSV
144
+ df = pd.read_csv(io.BytesIO(content), encoding=encoding)
145
+ log_error("Successfully parsed CSV")
 
146
 
147
  # Convert columns to string to ensure compatibility
148
  for col in df.columns:
149
+ df[col] = df[col].astype(str).replace('nan', '')
150
 
151
+ # Log stats
152
  log_error(f"DataFrame columns: {list(df.columns)}")
153
  log_error(f"DataFrame shape: {df.shape}")
154
 
155
+ # Convert to list of dictionaries
 
 
 
156
  documents = df.to_dict('records')
157
+ log_error(f"Loaded {len(documents)} documents for evaluation")
158
  return documents
159
 
160
  except Exception as e:
161
  log_error(f"Error in load_documents: {str(e)}")
162
  return []
163
 
 
 
164
  def save_evaluation(data):
165
  """Save evaluation data to CSV file."""
166
  try:
 
167
  ensure_data_directory()
168
 
169
+ log_error(f"Saving evaluation for {data.get('document_title')} by {data.get('investigator_name')}")
 
170
 
171
  eval_path = os.path.join(DATA_DIR, 'evaluations.csv')
172
 
173
+ # Add timestamp
174
  data['timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
175
 
176
+ # Check if file exists
177
  file_exists = os.path.exists(eval_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
+ # Define column order
180
+ columns = ['timestamp', 'document_title', 'description', 'mrn', 'investigator_name',
181
+ 'session_id'] + CRITERIA + ['note_origin']
182
+
183
+ # Ensure all columns exist in data
184
+ for col in columns:
185
+ if col not in data:
186
+ data[col] = ''
187
+
188
+ # Write to CSV
189
  with open(eval_path, 'a', newline='', encoding='utf-8') as f:
190
+ writer = csv.DictWriter(f, fieldnames=columns, extrasaction='ignore')
191
 
192
+ if not file_exists:
 
193
  writer.writeheader()
194
+ log_error("Created new evaluations.csv with header")
195
 
 
196
  writer.writerow(data)
197
 
198
  log_error(f"Successfully saved evaluation to {eval_path}")
 
200
 
201
  except Exception as e:
202
  log_error(f"Error saving evaluation: {str(e)}")
203
+ return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
  def get_results():
206
+ """Get all evaluation results."""
207
  try:
208
+ eval_path = os.path.join(DATA_DIR, 'evaluations.csv')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
+ if not os.path.exists(eval_path):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  return pd.DataFrame(), {}, {}
212
 
213
+ # Read evaluations
214
+ eval_df = pd.read_csv(eval_path)
 
 
 
 
 
215
 
216
+ # Load documents for descriptions and MRNs
217
  try:
218
  docs_df = pd.read_csv(os.path.join(DATA_DIR, 'documents.csv'))
 
219
  filename_to_desc = dict(zip(docs_df['filename'], docs_df['description']))
220
  filename_to_mrn = dict(zip(docs_df['filename'], docs_df['mrn']))
221
+ except:
222
  filename_to_desc = {}
223
  filename_to_mrn = {}
224
 
225
  return eval_df, filename_to_desc, filename_to_mrn
226
+
227
+ except Exception as e:
228
+ log_error(f"Error in get_results: {str(e)}")
229
  return pd.DataFrame(), {}, {}
230
 
231
+ # Progress tracking functions
232
+ def get_progress_file(evaluator_name):
233
+ """Get path to progress file for an evaluator."""
234
+ safe_name = "".join(c for c in evaluator_name if c.isalnum() or c in (' ', '-', '_')).rstrip()
235
+ return os.path.join(DATA_DIR, 'sessions', f'{safe_name}_progress.txt')
236
+
237
+ def save_current_index(evaluator_name, index):
238
+ """Save current document index to file."""
239
  try:
240
+ os.makedirs(os.path.join(DATA_DIR, 'sessions'), exist_ok=True)
241
+ with open(get_progress_file(evaluator_name), 'w') as f:
242
+ f.write(str(index))
243
+ return True
244
+ except Exception as e:
245
+ log_error(f"Error saving progress: {str(e)}")
246
+ return False
247
 
248
+ def load_current_index(evaluator_name):
249
+ """Load current document index from file."""
250
  try:
251
+ progress_file = get_progress_file(evaluator_name)
252
+ if os.path.exists(progress_file):
253
+ with open(progress_file, 'r') as f:
254
+ return int(f.read().strip())
255
+ return 1
256
+ except Exception as e:
257
+ log_error(f"Error loading progress: {str(e)}")
258
+ return 1
259
 
260
+ def store_evaluator_name(name):
261
+ """Store evaluator name in a file for persistence."""
262
  try:
263
+ ensure_data_directory()
264
+ with open(os.path.join(DATA_DIR, 'current_evaluator.txt'), 'w') as f:
265
+ f.write(name)
266
+ log_error(f"Stored evaluator name: {name}")
267
+ return True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  except Exception as e:
269
+ log_error(f"Error storing evaluator name: {str(e)}")
270
+ return False
271
+
272
+ def get_stored_evaluator_name():
273
+ """Get stored evaluator name from file."""
274
+ try:
275
+ file_path = os.path.join(DATA_DIR, 'current_evaluator.txt')
276
+ if os.path.exists(file_path):
277
+ with open(file_path, 'r') as f:
278
+ return f.read().strip()
279
+ return None
280
+ except Exception as e:
281
+ log_error(f"Error retrieving evaluator name: {str(e)}")
282
+ return None
283
 
284
  @app.route('/', methods=['GET', 'POST'])
285
  def index():
286
+ """Home page with file upload and evaluator name."""
 
 
287
  if request.method == 'POST':
 
288
  ensure_data_directory()
289
 
290
  # Get evaluator name
291
+ evaluator_name = request.form.get('evaluator_name', '').strip()
292
  if not evaluator_name:
293
  flash("Please enter your name as the evaluator.")
294
  return render_template('index.html')
 
301
  flash("No file selected.")
302
  return render_template('index.html')
303
 
304
+ if file and file.filename.endswith('.csv'):
305
  try:
306
+ # Read file content
307
  file_content = file.read()
308
 
309
+ # Detect encoding and parse CSV
310
+ encoding = detect_encoding(file_content)
311
+ csv_text = file_content.decode(encoding)
312
+ df = pd.read_csv(io.StringIO(csv_text))
313
+
314
+ # Validate columns
315
+ required_columns = ['filename', 'description', 'mrn', 'note']
316
+ missing_columns = [col for col in required_columns if col not in df.columns]
317
+
318
+ if missing_columns:
319
+ flash(f"Missing required columns: {', '.join(missing_columns)}")
320
+ return render_template('index.html')
321
+
322
+ # Save documents
323
+ documents_path = os.path.join(DATA_DIR, 'documents.csv')
324
+ df.to_csv(documents_path, index=False)
325
+
326
+ # Set session
327
+ session['evaluator_name'] = evaluator_name
328
+ session['session_id'] = f"{evaluator_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
329
+
330
+ # Store evaluator name
331
+ store_evaluator_name(evaluator_name)
332
+
333
+ flash(f"File uploaded successfully! Found {len(df)} documents.")
334
+ return redirect(url_for('evaluate'))
335
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
  except Exception as e:
337
+ flash(f"Error processing file: {str(e)}")
338
+ log_error(f"File upload error: {str(e)}")
339
  else:
340
  flash("Please upload a CSV file.")
341
 
342
  return render_template('index.html')
343
 
344
+ # GET request
345
+ evaluator_name = session.get('evaluator_name', '') or get_stored_evaluator_name() or ''
346
+ return render_template('index.html', evaluator_name=evaluator_name)
347
 
348
  @app.route('/evaluate', methods=['GET', 'POST'])
349
  def evaluate():
350
+ """Document evaluation page."""
351
+ # Get evaluator name from multiple sources
352
+ evaluator_name = (
353
+ session.get('evaluator_name') or
354
+ request.args.get('evaluator') or
355
+ get_stored_evaluator_name()
356
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
 
 
358
  if not evaluator_name:
359
  flash("Please enter your name before evaluating documents.")
 
360
  return redirect(url_for('index'))
361
 
362
+ # Update session
363
+ session['evaluator_name'] = evaluator_name
364
+ if 'session_id' not in session:
365
+ session['session_id'] = f"{evaluator_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
366
+
367
+ # Ensure directories exist
368
  ensure_data_directory()
369
 
370
+ # Load documents
371
+ documents = load_documents()
372
+ if not documents:
373
+ return render_template('no_documents.html')
374
 
375
+ # Get current index
376
  current_index = load_current_index(evaluator_name)
377
 
378
  # Handle jump requests
379
  jump_to = request.args.get('jump_to', type=int)
380
+ if jump_to and 1 <= jump_to <= len(documents):
381
+ current_index = jump_to
382
+ save_current_index(evaluator_name, current_index)
 
 
383
 
384
+ # Handle POST requests
385
  if request.method == 'POST':
386
  action = request.form.get('action', 'submit')
387
 
388
  if action == 'skip':
389
+ current_index = min(current_index + 1, len(documents) + 1)
390
  save_current_index(evaluator_name, current_index)
391
  flash("Document skipped.")
392
  return redirect(url_for('evaluate'))
393
 
394
  elif action == 'stop_save':
395
+ flash("Progress saved. You can resume later.")
396
+ return redirect(url_for('results', session_saved=True))
397
 
398
  elif action == 'submit':
399
+ if current_index <= len(documents):
400
+ current_doc = documents[current_index - 1]
401
+
402
+ # Prepare evaluation data
403
+ eval_data = {
404
+ 'document_title': current_doc.get('filename', ''),
405
+ 'description': current_doc.get('description', ''),
406
+ 'mrn': current_doc.get('mrn', ''),
407
+ 'investigator_name': evaluator_name,
408
+ 'session_id': session.get('session_id', ''),
409
+ 'note_origin': request.form.get('note_origin', '')
410
+ }
411
+
412
+ # Add criteria scores
413
+ all_scores_present = True
414
+ for i, criterion in enumerate(CRITERIA):
415
+ score = request.form.get(f'criteria_{i}')
416
+ if score:
417
+ eval_data[criterion] = score
418
+ else:
419
+ all_scores_present = False
420
+ flash(f"Please rate: {criterion}")
421
+
422
+ # Check note origin
423
+ if not eval_data['note_origin']:
424
+ all_scores_present = False
425
+ flash("Please select a note origin assessment.")
426
+
427
+ # Save if all data present
428
+ if all_scores_present:
429
+ if save_evaluation(eval_data):
430
+ current_index = min(current_index + 1, len(documents) + 1)
431
+ save_current_index(evaluator_name, current_index)
432
+ flash("Evaluation saved successfully!")
433
+ else:
434
+ flash("Error saving evaluation. Please try again.")
435
+
436
+ return redirect(url_for('evaluate'))
437
 
438
+ # Check if all documents evaluated
 
439
  if current_index > len(documents):
440
+ flash("All documents have been evaluated. Thank you!")
441
  return redirect(url_for('results'))
442
 
443
+ # Get current document
444
  document = documents[current_index - 1]
445
 
446
+ # Calculate progress
447
+ evaluated_docs = current_index - 1
448
+ progress = int((evaluated_docs / len(documents)) * 100) if documents else 0
449
+
450
+ return render_template('evaluate.html',
451
+ current_note_number=current_index,
452
+ evaluator_name=evaluator_name,
453
+ note=document.get('note', ''),
454
+ description=document.get('description', ''),
455
+ mrn=document.get('mrn', ''),
456
+ criteria=CRITERIA,
457
+ descriptions=CRITERIA_DESCRIPTIONS,
458
+ score_range=range(1, 6),
459
+ note_origins=NOTE_ORIGINS,
460
+ total_docs=len(documents),
461
+ evaluated_docs=evaluated_docs,
462
+ progress=progress
463
+ )
464
 
465
  @app.route('/jump', methods=['POST'])
466
  def jump_to_document():
467
  """Jump to a specific document number."""
468
  try:
469
  document_number = int(request.form.get('document_number', 1))
 
 
470
  documents = load_documents()
 
471
 
472
  if document_number < 1:
473
  flash("Document number must be 1 or greater.")
474
+ elif document_number > len(documents):
475
+ flash(f"Document number cannot be greater than {len(documents)}.")
476
  else:
 
 
477
  return redirect(url_for('evaluate', jump_to=document_number))
478
  except ValueError:
479
  flash("Please enter a valid document number.")
 
483
  @app.route('/results')
484
  def results():
485
  """Results page showing all evaluations."""
486
+ try:
487
+ eval_df, filename_to_desc, filename_to_mrn = get_results()
488
+
489
+ # Convert to list of dicts and enhance with descriptions/MRNs
490
+ evaluations = []
491
+ if not eval_df.empty:
492
+ for _, row in eval_df.iterrows():
493
+ eval_dict = row.to_dict()
494
+ doc_title = eval_dict.get('document_title', '')
495
+
496
+ # Add description and MRN if not already present
497
+ if 'description' not in eval_dict or pd.isna(eval_dict['description']):
498
+ eval_dict['description'] = filename_to_desc.get(doc_title, '')
499
+ if 'mrn' not in eval_dict or pd.isna(eval_dict['mrn']):
500
+ eval_dict['mrn'] = filename_to_mrn.get(doc_title, '')
501
+
502
+ evaluations.append(eval_dict)
503
+
504
+ session_saved = request.args.get('session_saved', False)
505
+
506
  return render_template('results.html',
507
+ evaluations=evaluations,
508
+ criteria=CRITERIA,
509
+ descriptions=CRITERIA_DESCRIPTIONS,
510
+ session_saved=session_saved
511
+ )
 
 
 
 
512
 
513
+ except Exception as e:
514
+ log_error(f"Error in results route: {str(e)}")
515
+ flash(f"Error loading results: {str(e)}")
516
+ return redirect(url_for('index'))
517
 
518
  @app.route('/export-csv')
519
  def export_csv():
520
+ """Export evaluations to CSV."""
521
  try:
522
  eval_df, _, _ = get_results()
523
 
 
525
  flash('No evaluations available to export.')
526
  return redirect(url_for('results'))
527
 
528
+ # Create CSV in memory
529
  output = io.StringIO()
530
  eval_df.to_csv(output, index=False, quoting=csv.QUOTE_ALL)
531
  output.seek(0)
532
 
533
+ # Convert to bytes
534
  mem = io.BytesIO()
535
  mem.write(output.getvalue().encode('utf-8'))
536
  mem.seek(0)
 
542
  download_name=f'evaluations_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv'
543
  )
544
  except Exception as e:
545
+ flash(f'Error exporting CSV: {str(e)}')
546
+ log_error(f"Export error: {str(e)}")
 
547
  return redirect(url_for('results'))
548
 
549
  @app.route('/upload-documents', methods=['GET', 'POST'])
550
  def upload_documents():
551
+ """Alternative document upload page."""
552
  if request.method == 'POST':
553
  try:
554
  if 'file' not in request.files:
 
561
  return redirect(request.url)
562
 
563
  if file and file.filename.endswith('.csv'):
 
 
 
 
564
  ensure_data_directory()
565
 
566
+ # Save file
567
+ file_path = os.path.join(DATA_DIR, 'documents.csv')
568
  file.save(file_path)
569
 
570
+ # Verify file
571
+ try:
572
+ df = pd.read_csv(file_path)
573
+ flash(f'Documents uploaded successfully! Found {len(df)} documents.')
574
+ return redirect(url_for('index'))
575
+ except Exception as e:
576
+ flash(f'File uploaded but could not be parsed: {str(e)}')
 
 
 
 
 
 
577
  else:
578
  flash('Please upload a CSV file')
579
 
580
  except Exception as e:
581
+ flash(f'Error uploading file: {str(e)}')
582
+ log_error(f"Upload error: {str(e)}")
 
583
 
584
  return render_template('upload_documents.html')
585
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
586
  @app.route('/debug')
587
  def debug():
588
  """Debug page showing application state."""
589
+ ensure_data_directory()
 
 
 
 
 
590
 
 
591
  documents = load_documents()
592
+ eval_df, _, _ = get_results()
593
+ evaluations = [] if eval_df.empty else eval_df.to_dict('records')
594
 
 
 
 
 
 
 
 
 
 
 
595
  debug_info = {
596
  'data_dir': DATA_DIR,
597
  'data_dir_exists': os.path.exists(DATA_DIR),
598
  'data_dir_writable': os.access(DATA_DIR, os.W_OK) if os.path.exists(DATA_DIR) else False,
599
  'current_working_dir': os.getcwd(),
600
  'session_id': session.get('session_id', 'None'),
 
601
  'evaluator_name': session.get('evaluator_name', 'None'),
602
  'documents_count': len(documents),
603
  'evaluations_count': len(evaluations),
604
+ 'environment': 'HF Spaces' if 'SPACE_ID' in os.environ else 'Local'
605
  }
606
 
607
  return render_template('debug.html',
608
  documents=documents,
609
  evaluations=evaluations,
610
+ documents_exists=os.path.exists(os.path.join(DATA_DIR, 'documents.csv')),
611
+ evaluations_exists=os.path.exists(os.path.join(DATA_DIR, 'evaluations.csv')),
612
  errors=ERROR_LOG,
613
  debug_info=debug_info
614
  )
 
617
  def view_instructions():
618
  """Display instructions page."""
619
  return render_template('instructions.html',
620
+ criteria=CRITERIA,
621
+ descriptions=CRITERIA_DESCRIPTIONS
622
+ )
623
 
624
  @app.route('/download/instructions')
625
  def download_instructions():
626
+ """Download instructions as markdown."""
627
  try:
628
  instructions_path = os.path.join(DATA_DIR, 'instructions.md')
629
  return send_file(instructions_path,
630
+ mimetype='text/markdown',
631
+ download_name='instructions.md',
632
+ as_attachment=True
633
+ )
634
  except FileNotFoundError:
635
  flash('Instructions file not found.')
636
  return redirect(url_for('index'))
637
 
638
  @app.route('/download/template')
639
  def download_template():
640
+ """Download sample template CSV."""
641
  try:
642
  template_path = os.path.join(DATA_DIR, 'sample_documents_template.csv')
643
  return send_file(template_path,
644
+ mimetype='text/csv',
645
+ download_name='sample_documents_template.csv',
646
+ as_attachment=True
647
+ )
648
  except FileNotFoundError:
649
  flash('Template file not found.')
650
  return redirect(url_for('index'))
651
 
652
  @app.route('/reset', methods=['POST'])
653
  def reset():
654
+ """Reset session and clear evaluations."""
655
  session.clear()
656
+
657
+ # Backup and remove evaluations
658
  evaluations_path = os.path.join(DATA_DIR, 'evaluations.csv')
659
  if os.path.exists(evaluations_path):
660
+ backup_path = f"{evaluations_path}.backup.{datetime.now().strftime('%Y%m%d_%H%M%S')}"
 
661
  try:
662
  shutil.copy(evaluations_path, backup_path)
663
  log_error(f"Created backup at {backup_path}")
 
665
  log_error(f"Could not create backup: {str(e)}")
666
 
667
  os.remove(evaluations_path)
668
+ log_error("Removed evaluations.csv")
669
+
670
  flash('Session reset. All evaluation data cleared.')
671
  return redirect(url_for('index'))
672
 
673
  @app.route('/clear-corrupted-data', methods=['POST'])
674
  def clear_corrupted_data():
675
+ """Clear corrupted evaluations file."""
676
  evaluations_path = os.path.join(DATA_DIR, 'evaluations.csv')
677
  if os.path.exists(evaluations_path):
678
+ backup_path = f"{evaluations_path}.corrupted.{datetime.now().strftime('%Y%m%d_%H%M%S')}"
 
679
  try:
680
  shutil.copy(evaluations_path, backup_path)
681
+ log_error(f"Backed up corrupted file to {backup_path}")
682
  except Exception as e:
683
+ log_error(f"Could not backup: {str(e)}")
684
 
685
  os.remove(evaluations_path)
686
+ flash('Corrupted evaluation data cleared.')
 
687
  else:
688
  flash('No evaluation data file found.')
689
 
 
695
  error_message = request.args.get('message', 'An unknown error occurred')
696
  error_details = request.args.get('details', '')
697
  return render_template('error.html',
698
+ error_message=error_message,
699
+ error_details=error_details
700
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
701
 
702
  if __name__ == '__main__':
703
+ print("\n" + "="*60)
704
+ print(f"Application Starting at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
705
+ print("="*60 + "\n")
706
 
707
+ # Initialize
708
  ensure_data_directory()
709
 
710
+ # Log startup info
711
+ print(f"Data directory: {DATA_DIR}")
712
+ print(f"Data directory exists: {os.path.exists(DATA_DIR)}")
713
+ print(f"Environment: {'HF Spaces' if 'SPACE_ID' in os.environ else 'Local'}")
714
+
715
+ if 'SPACE_ID' in os.environ:
716
+ print(f"Space ID: {os.environ.get('SPACE_ID')}")
717
+ print(f"Space Author: {os.environ.get('SPACE_AUTHOR_NAME')}")
718
+
719
+ print(f"Data directory contents: {os.listdir(DATA_DIR) if os.path.exists(DATA_DIR) else 'N/A'}")
720
+ print("\n" + "="*60 + "\n")
721
 
722
  # Run the app
723
+ app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860)), debug=True)
 
templates/evaluate.html CHANGED
@@ -71,13 +71,19 @@
71
  <strong>MRN:</strong> {{ mrn }}
72
  </div>
73
  {% endif %}
 
 
 
 
 
74
  </div>
75
  <div class="note-content">
76
  {{ note }}
77
  </div>
78
  </div>
79
 
80
- <form method="POST" action="{{ url_for('evaluate') }}">
 
81
  <div class="criteria-container">
82
  {% for i in range(criteria|length) %}
83
  <div class="criteria-group">
@@ -112,19 +118,10 @@
112
  </div>
113
 
114
  <div class="form-buttons">
115
- <form method="POST" action="{{ url_for('evaluate') }}">
116
- <button type="submit" name="action" value="submit" class="submit-btn">Submit Evaluation</button>
117
- </form>
118
-
119
- <div class="action-buttons">
120
- <form method="POST" action="{{ url_for('evaluate') }}" onsubmit="return confirm('Are you sure you want to skip this document?');">
121
- <button type="submit" name="action" value="skip" class="skip-btn">Skip Document</button>
122
- </form>
123
-
124
- <form method="POST" action="{{ url_for('evaluate') }}">
125
- <button type="submit" name="action" value="stop_save" class="stop-save-btn">Stop and Save Progress</button>
126
- </form>
127
- </div>
128
  </div>
129
  </form>
130
 
 
71
  <strong>MRN:</strong> {{ mrn }}
72
  </div>
73
  {% endif %}
74
+ {% if description %}
75
+ <div class="info-item">
76
+ <strong>Description:</strong> {{ description }}
77
+ </div>
78
+ {% endif %}
79
  </div>
80
  <div class="note-content">
81
  {{ note }}
82
  </div>
83
  </div>
84
 
85
+ <!-- Single form for all evaluation data -->
86
+ <form method="POST" action="{{ url_for('evaluate') }}" id="evaluation-form">
87
  <div class="criteria-container">
88
  {% for i in range(criteria|length) %}
89
  <div class="criteria-group">
 
118
  </div>
119
 
120
  <div class="form-buttons">
121
+ <button type="submit" name="action" value="submit" class="submit-btn">Submit Evaluation</button>
122
+ <button type="submit" name="action" value="skip" class="skip-btn"
123
+ onclick="return confirm('Are you sure you want to skip this document?');">Skip Document</button>
124
+ <button type="submit" name="action" value="stop_save" class="stop-save-btn">Stop and Save Progress</button>
 
 
 
 
 
 
 
 
 
125
  </div>
126
  </form>
127