Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -36,9 +36,9 @@ SESSION_TIMEOUT = 3600 # 1 hour session timeout
|
|
| 36 |
|
| 37 |
# Initialize logging
|
| 38 |
logging.basicConfig(
|
| 39 |
-
filename='transcript_parser.log',
|
| 40 |
level=logging.DEBUG,
|
| 41 |
-
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
|
|
| 42 |
)
|
| 43 |
|
| 44 |
# Model configuration - Only DeepSeek
|
|
@@ -318,7 +318,7 @@ class TranscriptParser:
|
|
| 318 |
logging.error(f"Error parsing transcript: {str(e)}")
|
| 319 |
raise ValueError(f"Couldn't parse transcript: {str(e)}")
|
| 320 |
|
| 321 |
-
def _parse_miami_dade_format(self, text: str) -> Dict:
|
| 322 |
"""Parse Miami-Dade County Public Schools transcripts."""
|
| 323 |
# Initialize PDF reader from text (simulating the PDF structure)
|
| 324 |
lines = [line.strip() for line in text.split('\n') if line.strip()]
|
|
@@ -328,39 +328,62 @@ class TranscriptParser:
|
|
| 328 |
'student_info': {},
|
| 329 |
'graduation_requirements': [],
|
| 330 |
'course_history': [],
|
| 331 |
-
'summary': {}
|
|
|
|
| 332 |
}
|
| 333 |
|
| 334 |
-
# Parse student information
|
| 335 |
student_info_lines = []
|
| 336 |
-
for line in lines:
|
|
|
|
| 337 |
if "DORAL ACADEMY HIGH SCHOOL" in line:
|
| 338 |
-
|
|
|
|
| 339 |
break
|
| 340 |
|
| 341 |
if student_info_lines:
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 359 |
|
| 360 |
# Parse graduation requirements
|
| 361 |
requirements_start = None
|
| 362 |
requirements_end = None
|
| 363 |
for i, line in enumerate(lines):
|
|
|
|
| 364 |
if "Code" in line and "Description" in line and "Required" in line:
|
| 365 |
requirements_start = i + 1
|
| 366 |
if requirements_start and "Total" in line:
|
|
@@ -369,31 +392,42 @@ class TranscriptParser:
|
|
| 369 |
|
| 370 |
if requirements_start and requirements_end:
|
| 371 |
for line in lines[requirements_start:requirements_end]:
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 384 |
|
| 385 |
# Parse total line
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
|
| 394 |
# Parse course history
|
| 395 |
course_history_start = None
|
| 396 |
for i, line in enumerate(lines):
|
|
|
|
| 397 |
if "Requirement" in line and "School Year" in line and "GradeLv1" in line:
|
| 398 |
course_history_start = i + 1
|
| 399 |
break
|
|
@@ -401,38 +435,49 @@ class TranscriptParser:
|
|
| 401 |
if course_history_start:
|
| 402 |
current_requirement = None
|
| 403 |
for line in lines[course_history_start:]:
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 426 |
|
| 427 |
# Calculate graduation status
|
| 428 |
-
|
| 429 |
-
'
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 436 |
|
| 437 |
return data
|
| 438 |
|
|
|
|
| 36 |
|
| 37 |
# Initialize logging
|
| 38 |
logging.basicConfig(
|
|
|
|
| 39 |
level=logging.DEBUG,
|
| 40 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
| 41 |
+
filename='transcript_parser.log'
|
| 42 |
)
|
| 43 |
|
| 44 |
# Model configuration - Only DeepSeek
|
|
|
|
| 318 |
logging.error(f"Error parsing transcript: {str(e)}")
|
| 319 |
raise ValueError(f"Couldn't parse transcript: {str(e)}")
|
| 320 |
|
| 321 |
+
def _parse_miami_dade_format(self, text: str, strict_mode: bool = False) -> Dict:
|
| 322 |
"""Parse Miami-Dade County Public Schools transcripts."""
|
| 323 |
# Initialize PDF reader from text (simulating the PDF structure)
|
| 324 |
lines = [line.strip() for line in text.split('\n') if line.strip()]
|
|
|
|
| 328 |
'student_info': {},
|
| 329 |
'graduation_requirements': [],
|
| 330 |
'course_history': [],
|
| 331 |
+
'summary': {},
|
| 332 |
+
'format': 'miami_dade' # Add format identifier
|
| 333 |
}
|
| 334 |
|
| 335 |
+
# Parse student information with more robust checks
|
| 336 |
student_info_lines = []
|
| 337 |
+
for i, line in enumerate(lines):
|
| 338 |
+
logging.debug(f"Processing line: {line}") # Added debug logging
|
| 339 |
if "DORAL ACADEMY HIGH SCHOOL" in line:
|
| 340 |
+
# Get the next 4 lines (or fewer if we're at the end)
|
| 341 |
+
student_info_lines = lines[i:i+5]
|
| 342 |
break
|
| 343 |
|
| 344 |
if student_info_lines:
|
| 345 |
+
try:
|
| 346 |
+
# Parse school and cohort info - more defensive
|
| 347 |
+
school_info_parts = student_info_lines[0].split('|')
|
| 348 |
+
if len(school_info_parts) > 2:
|
| 349 |
+
data['student_info']['school'] = school_info_parts[1].strip() if len(school_info_parts) > 1 else ''
|
| 350 |
+
data['student_info']['district'] = school_info_parts[2].strip() if len(school_info_parts) > 2 else ''
|
| 351 |
+
|
| 352 |
+
# Parse student name and ID - more defensive
|
| 353 |
+
if len(student_info_lines) > 1:
|
| 354 |
+
name_id_line = student_info_lines[1].split('-')
|
| 355 |
+
if len(name_id_line) > 1:
|
| 356 |
+
name_parts = name_id_line[1].split(',')
|
| 357 |
+
if len(name_parts) > 1:
|
| 358 |
+
data['student_info']['student_id'] = name_id_line[0].strip()
|
| 359 |
+
data['student_info']['student_name'] = name_parts[1].strip() + " " + name_parts[0].strip()
|
| 360 |
+
|
| 361 |
+
# Parse academic info - more defensive
|
| 362 |
+
if len(student_info_lines) > 2:
|
| 363 |
+
academic_info = student_info_lines[2].split('|')
|
| 364 |
+
if len(academic_info) > 5:
|
| 365 |
+
data['student_info']['current_grade'] = academic_info[1].split(':')[1].strip() if ':' in academic_info[1] else ''
|
| 366 |
+
data['student_info']['graduation_year'] = academic_info[2].strip()
|
| 367 |
+
data['student_info']['weighted_gpa'] = academic_info[3].split(':')[1].strip() if ':' in academic_info[3] else ''
|
| 368 |
+
data['student_info']['community_service_date'] = academic_info[4].split(':')[1].strip() if ':' in academic_info[4] else ''
|
| 369 |
+
data['student_info']['total_credits_earned'] = academic_info[5].split(':')[1].strip() if ':' in academic_info[5] else ''
|
| 370 |
+
|
| 371 |
+
# Validate we got the essential student info
|
| 372 |
+
if not data['student_info'].get('student_name'):
|
| 373 |
+
logging.warning("Failed to parse student name")
|
| 374 |
+
if strict_mode:
|
| 375 |
+
raise ValueError("Could not parse student name from transcript")
|
| 376 |
+
|
| 377 |
+
except Exception as e:
|
| 378 |
+
logging.warning(f"Error parsing student info: {str(e)}")
|
| 379 |
+
if strict_mode:
|
| 380 |
+
raise
|
| 381 |
|
| 382 |
# Parse graduation requirements
|
| 383 |
requirements_start = None
|
| 384 |
requirements_end = None
|
| 385 |
for i, line in enumerate(lines):
|
| 386 |
+
logging.debug(f"Processing line: {line}") # Added debug logging
|
| 387 |
if "Code" in line and "Description" in line and "Required" in line:
|
| 388 |
requirements_start = i + 1
|
| 389 |
if requirements_start and "Total" in line:
|
|
|
|
| 392 |
|
| 393 |
if requirements_start and requirements_end:
|
| 394 |
for line in lines[requirements_start:requirements_end]:
|
| 395 |
+
try:
|
| 396 |
+
if '|' in line:
|
| 397 |
+
parts = [p.strip() for p in line.split('|') if p.strip()]
|
| 398 |
+
if len(parts) >= 6:
|
| 399 |
+
req = {
|
| 400 |
+
'code': parts[0],
|
| 401 |
+
'description': parts[1],
|
| 402 |
+
'required': parts[2],
|
| 403 |
+
'waived': parts[3],
|
| 404 |
+
'completed': parts[4],
|
| 405 |
+
'status': parts[5]
|
| 406 |
+
}
|
| 407 |
+
data['graduation_requirements'].append(req)
|
| 408 |
+
except Exception as e:
|
| 409 |
+
logging.warning(f"Error parsing requirement line: {line} - {str(e)}")
|
| 410 |
+
if strict_mode:
|
| 411 |
+
raise
|
| 412 |
|
| 413 |
# Parse total line
|
| 414 |
+
try:
|
| 415 |
+
total_line = lines[requirements_end]
|
| 416 |
+
total_parts = [p.strip() for p in total_line.split('|') if p.strip()]
|
| 417 |
+
if len(total_parts) >= 5:
|
| 418 |
+
data['summary']['total_required'] = total_parts[1]
|
| 419 |
+
data['summary']['total_waived'] = total_parts[2]
|
| 420 |
+
data['summary']['total_completed'] = total_parts[3]
|
| 421 |
+
data['summary']['completion_percentage'] = total_parts[4]
|
| 422 |
+
except Exception as e:
|
| 423 |
+
logging.warning(f"Error parsing requirements summary: {str(e)}")
|
| 424 |
+
if strict_mode:
|
| 425 |
+
raise
|
| 426 |
|
| 427 |
# Parse course history
|
| 428 |
course_history_start = None
|
| 429 |
for i, line in enumerate(lines):
|
| 430 |
+
logging.debug(f"Processing line: {line}") # Added debug logging
|
| 431 |
if "Requirement" in line and "School Year" in line and "GradeLv1" in line:
|
| 432 |
course_history_start = i + 1
|
| 433 |
break
|
|
|
|
| 435 |
if course_history_start:
|
| 436 |
current_requirement = None
|
| 437 |
for line in lines[course_history_start:]:
|
| 438 |
+
try:
|
| 439 |
+
if '|' in line:
|
| 440 |
+
parts = [p.strip() for p in line.split('|') if p.strip()]
|
| 441 |
+
|
| 442 |
+
# Check if this is a new requirement line
|
| 443 |
+
if len(parts) >= 2 and parts[0] and parts[0] in [req['code'] for req in data['graduation_requirements']]:
|
| 444 |
+
current_requirement = parts[0]
|
| 445 |
+
parts = parts[1:] # Remove the requirement code
|
| 446 |
+
|
| 447 |
+
if len(parts) >= 9:
|
| 448 |
+
course = {
|
| 449 |
+
'requirement': current_requirement,
|
| 450 |
+
'school_year': parts[0],
|
| 451 |
+
'grade_level': parts[1],
|
| 452 |
+
'course_number': parts[2],
|
| 453 |
+
'description': parts[3],
|
| 454 |
+
'term': parts[4],
|
| 455 |
+
'district_number': parts[5],
|
| 456 |
+
'fg': parts[6],
|
| 457 |
+
'included': parts[7],
|
| 458 |
+
'credits': parts[8]
|
| 459 |
+
}
|
| 460 |
+
data['course_history'].append(course)
|
| 461 |
+
except Exception as e:
|
| 462 |
+
logging.warning(f"Error parsing course line: {line} - {str(e)}")
|
| 463 |
+
if strict_mode:
|
| 464 |
+
raise
|
| 465 |
|
| 466 |
# Calculate graduation status
|
| 467 |
+
try:
|
| 468 |
+
if data['summary'].get('total_required') and data['summary'].get('total_completed'):
|
| 469 |
+
graduation_status = {
|
| 470 |
+
'total_required_credits': float(data['summary']['total_required']),
|
| 471 |
+
'total_completed_credits': float(data['summary']['total_completed']),
|
| 472 |
+
'percent_complete': float(data['summary']['completion_percentage'].replace('%', '')),
|
| 473 |
+
'remaining_credits': float(data['summary']['total_required']) - float(data['summary']['total_completed']),
|
| 474 |
+
'on_track': float(data['summary']['completion_percentage'].replace('%', '')) >= 75.0
|
| 475 |
+
}
|
| 476 |
+
data['graduation_status'] = graduation_status
|
| 477 |
+
except Exception as e:
|
| 478 |
+
logging.warning(f"Error calculating graduation status: {str(e)}")
|
| 479 |
+
if strict_mode:
|
| 480 |
+
raise
|
| 481 |
|
| 482 |
return data
|
| 483 |
|