Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -338,6 +338,23 @@ class GraduationProgress(BaseModel):
|
|
| 338 |
courses: List[Course]
|
| 339 |
assessments: Dict[str, str]
|
| 340 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 341 |
class TranscriptParser:
|
| 342 |
def __init__(self):
|
| 343 |
self.student_data = {}
|
|
@@ -351,20 +368,25 @@ class TranscriptParser:
|
|
| 351 |
try:
|
| 352 |
text = preprocess_text(text)
|
| 353 |
|
| 354 |
-
# First try the
|
| 355 |
-
parsed_data = self.
|
| 356 |
if parsed_data:
|
|
|
|
| 357 |
return parsed_data
|
| 358 |
-
|
| 359 |
# Fall back to simplified parser if detailed parsing fails
|
| 360 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 361 |
|
| 362 |
except Exception as e:
|
| 363 |
logging.error(f"Error parsing transcript: {str(e)}")
|
| 364 |
raise ValueError(f"Couldn't parse transcript content. Error: {str(e)}")
|
| 365 |
|
| 366 |
-
def
|
| 367 |
-
"""
|
| 368 |
try:
|
| 369 |
parsed_data = {
|
| 370 |
'student_info': {},
|
|
@@ -385,7 +407,7 @@ class TranscriptParser:
|
|
| 385 |
parsed_data['student_info']['grade'] = student_info_match.group(3)
|
| 386 |
parsed_data['student_info']['year_of_graduation'] = student_info_match.group(4)
|
| 387 |
|
| 388 |
-
#
|
| 389 |
gpa_matches = re.findall(
|
| 390 |
r"(?:Un.?weighted|Weighted)\s*GPA\s*([\d.]+)",
|
| 391 |
text,
|
|
@@ -415,46 +437,56 @@ class TranscriptParser:
|
|
| 415 |
if virtual_grade_match:
|
| 416 |
parsed_data['student_info']['virtual_grade'] = virtual_grade_match.group(1)
|
| 417 |
|
| 418 |
-
# Extract requirements
|
| 419 |
-
req_section = re.search(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 420 |
if req_section:
|
| 421 |
req_lines = [line.strip() for line in req_section.group(1).split('\n') if line.strip()]
|
| 422 |
for line in req_lines:
|
| 423 |
if '|' in line: # Table format
|
| 424 |
-
parts = [part.strip() for part in line.split('|')]
|
| 425 |
-
if len(parts) >=
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
|
|
|
|
|
|
| 438 |
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
|
|
|
|
|
|
|
|
|
| 447 |
|
| 448 |
-
# Extract assessments
|
| 449 |
-
assess_section = re.search(r"Z-Assessment.*?\n(.*?)(?:\n\s*\n|$)", text, re.DOTALL)
|
| 450 |
if assess_section:
|
| 451 |
assess_lines = [line.strip() for line in assess_section.group(1).split('\n') if line.strip()]
|
| 452 |
for line in assess_lines:
|
| 453 |
if '|' in line:
|
| 454 |
-
parts = [part.strip() for part in line.split('|')]
|
| 455 |
if len(parts) >= 5 and parts[0].startswith('Z-'):
|
| 456 |
name = parts[0].replace('Z-', '').strip()
|
| 457 |
-
status = parts[4]
|
| 458 |
parsed_data['assessments'][name] = status
|
| 459 |
|
| 460 |
# Extract course history with more fault-tolerant parsing
|
|
@@ -471,10 +503,10 @@ class TranscriptParser:
|
|
| 471 |
]
|
| 472 |
|
| 473 |
for line in course_lines:
|
| 474 |
-
parts = [part.strip() for part in line.split('|')]
|
| 475 |
|
| 476 |
-
#
|
| 477 |
-
|
| 478 |
course = {
|
| 479 |
'requirement': parts[0] if len(parts) > 0 else "",
|
| 480 |
'school_year': parts[1] if len(parts) > 1 else "",
|
|
@@ -489,17 +521,20 @@ class TranscriptParser:
|
|
| 489 |
}
|
| 490 |
|
| 491 |
# Handle "inProgress" and empty credits
|
| 492 |
-
if "
|
| 493 |
course['credits'] = "0"
|
| 494 |
elif not course['credits'].replace('.','').isdigit():
|
| 495 |
course['credits'] = "0"
|
| 496 |
|
| 497 |
parsed_data['course_history'].append(course)
|
|
|
|
|
|
|
|
|
|
| 498 |
|
| 499 |
return parsed_data
|
| 500 |
|
| 501 |
except Exception as e:
|
| 502 |
-
logging.warning(f"
|
| 503 |
return None
|
| 504 |
|
| 505 |
def _parse_simplified_transcript(self, text: str) -> Dict:
|
|
@@ -544,7 +579,7 @@ class TranscriptParser:
|
|
| 544 |
logging.warning(f"Pattern {pattern} failed: {str(e)}")
|
| 545 |
continue
|
| 546 |
|
| 547 |
-
|
| 548 |
|
| 549 |
# ========== ENHANCED ANALYSIS FUNCTIONS ==========
|
| 550 |
def analyze_gpa(parsed_data: Dict) -> str:
|
|
@@ -955,11 +990,11 @@ class LearningStyleQuiz:
|
|
| 955 |
result += "You may benefit from combining different learning approaches:\n"
|
| 956 |
for style in primary_styles:
|
| 957 |
result += f"\n**{style}** techniques:\n"
|
| 958 |
-
for tip in
|
| 959 |
result += f"- {tip}\n"
|
| 960 |
|
| 961 |
result += f"\n**{style}** career suggestions:\n"
|
| 962 |
-
for career in
|
| 963 |
result += f"- {career}\n"
|
| 964 |
|
| 965 |
return result
|
|
|
|
| 338 |
courses: List[Course]
|
| 339 |
assessments: Dict[str, str]
|
| 340 |
|
| 341 |
+
def validate_parsed_data(parsed_data: Dict) -> bool:
|
| 342 |
+
"""Ensure all critical fields exist"""
|
| 343 |
+
required_fields = [
|
| 344 |
+
('student_info', 'name'),
|
| 345 |
+
('student_info', 'weighted_gpa'),
|
| 346 |
+
('requirements', 'A-English'), # Sample requirement
|
| 347 |
+
('course_history', 0) # At least one course
|
| 348 |
+
]
|
| 349 |
+
|
| 350 |
+
for path in required_fields:
|
| 351 |
+
current = parsed_data
|
| 352 |
+
for key in path:
|
| 353 |
+
if key not in current:
|
| 354 |
+
raise ValueError(f"Missing critical field: {'.'.join(path)}")
|
| 355 |
+
current = current[key]
|
| 356 |
+
return True
|
| 357 |
+
|
| 358 |
class TranscriptParser:
|
| 359 |
def __init__(self):
|
| 360 |
self.student_data = {}
|
|
|
|
| 368 |
try:
|
| 369 |
text = preprocess_text(text)
|
| 370 |
|
| 371 |
+
# First try the specialized Miami-Dade parser
|
| 372 |
+
parsed_data = self._parse_miami_dade_transcript(text)
|
| 373 |
if parsed_data:
|
| 374 |
+
validate_parsed_data(parsed_data)
|
| 375 |
return parsed_data
|
| 376 |
+
|
| 377 |
# Fall back to simplified parser if detailed parsing fails
|
| 378 |
+
parsed_data = self._parse_simplified_transcript(text)
|
| 379 |
+
if parsed_data:
|
| 380 |
+
return parsed_data
|
| 381 |
+
|
| 382 |
+
raise ValueError("No data could be parsed from the transcript")
|
| 383 |
|
| 384 |
except Exception as e:
|
| 385 |
logging.error(f"Error parsing transcript: {str(e)}")
|
| 386 |
raise ValueError(f"Couldn't parse transcript content. Error: {str(e)}")
|
| 387 |
|
| 388 |
+
def _parse_miami_dade_transcript(self, text: str) -> Optional[Dict]:
|
| 389 |
+
"""Specialized parser for Miami-Dade County Public Schools transcripts"""
|
| 390 |
try:
|
| 391 |
parsed_data = {
|
| 392 |
'student_info': {},
|
|
|
|
| 407 |
parsed_data['student_info']['grade'] = student_info_match.group(3)
|
| 408 |
parsed_data['student_info']['year_of_graduation'] = student_info_match.group(4)
|
| 409 |
|
| 410 |
+
# Extract GPA information
|
| 411 |
gpa_matches = re.findall(
|
| 412 |
r"(?:Un.?weighted|Weighted)\s*GPA\s*([\d.]+)",
|
| 413 |
text,
|
|
|
|
| 437 |
if virtual_grade_match:
|
| 438 |
parsed_data['student_info']['virtual_grade'] = virtual_grade_match.group(1)
|
| 439 |
|
| 440 |
+
# Extract requirements section - more robust table parsing
|
| 441 |
+
req_section = re.search(
|
| 442 |
+
r"Code\s*Description\s*Required\s*Waived\s*Completed\s*Status(.*?)(?:\n\s*\n|$)",
|
| 443 |
+
text,
|
| 444 |
+
re.DOTALL | re.IGNORECASE
|
| 445 |
+
)
|
| 446 |
+
|
| 447 |
if req_section:
|
| 448 |
req_lines = [line.strip() for line in req_section.group(1).split('\n') if line.strip()]
|
| 449 |
for line in req_lines:
|
| 450 |
if '|' in line: # Table format
|
| 451 |
+
parts = [part.strip() for part in line.split('|') if part.strip()]
|
| 452 |
+
if len(parts) >= 5: # More lenient check for number of columns
|
| 453 |
+
try:
|
| 454 |
+
code = parts[0] if len(parts) > 0 else ""
|
| 455 |
+
description = parts[1] if len(parts) > 1 else ""
|
| 456 |
+
required = float(parts[2]) if len(parts) > 2 and parts[2].replace('.','').isdigit() else 0.0
|
| 457 |
+
waived = float(parts[3]) if len(parts) > 3 and parts[3].replace('.','').isdigit() else 0.0
|
| 458 |
+
completed = float(parts[4]) if len(parts) > 4 and parts[4].replace('.','').isdigit() else 0.0
|
| 459 |
+
status = parts[5] if len(parts) > 5 else ""
|
| 460 |
+
|
| 461 |
+
# Extract percentage if available
|
| 462 |
+
percent = 0.0
|
| 463 |
+
if status:
|
| 464 |
+
percent_match = re.search(r"(\d+)%", status)
|
| 465 |
+
if percent_match:
|
| 466 |
+
percent = float(percent_match.group(1))
|
| 467 |
|
| 468 |
+
parsed_data['requirements'][code] = {
|
| 469 |
+
"description": description,
|
| 470 |
+
"required": required,
|
| 471 |
+
"waived": waived,
|
| 472 |
+
"completed": completed,
|
| 473 |
+
"percent_complete": percent,
|
| 474 |
+
"status": status
|
| 475 |
+
}
|
| 476 |
+
except (IndexError, ValueError) as e:
|
| 477 |
+
logging.warning(f"Skipping malformed requirement line: {line}. Error: {str(e)}")
|
| 478 |
+
continue
|
| 479 |
|
| 480 |
+
# Extract assessments section
|
| 481 |
+
assess_section = re.search(r"Z-Assessment.*?\n(.*?)(?:\n\s*\n|$)", text, re.DOTALL | re.IGNORECASE)
|
| 482 |
if assess_section:
|
| 483 |
assess_lines = [line.strip() for line in assess_section.group(1).split('\n') if line.strip()]
|
| 484 |
for line in assess_lines:
|
| 485 |
if '|' in line:
|
| 486 |
+
parts = [part.strip() for part in line.split('|') if part.strip()]
|
| 487 |
if len(parts) >= 5 and parts[0].startswith('Z-'):
|
| 488 |
name = parts[0].replace('Z-', '').strip()
|
| 489 |
+
status = parts[4] if len(parts) > 4 else ""
|
| 490 |
parsed_data['assessments'][name] = status
|
| 491 |
|
| 492 |
# Extract course history with more fault-tolerant parsing
|
|
|
|
| 503 |
]
|
| 504 |
|
| 505 |
for line in course_lines:
|
| 506 |
+
parts = [part.strip() for part in line.split('|') if part.strip()]
|
| 507 |
|
| 508 |
+
# More robust handling of course data
|
| 509 |
+
try:
|
| 510 |
course = {
|
| 511 |
'requirement': parts[0] if len(parts) > 0 else "",
|
| 512 |
'school_year': parts[1] if len(parts) > 1 else "",
|
|
|
|
| 521 |
}
|
| 522 |
|
| 523 |
# Handle "inProgress" and empty credits
|
| 524 |
+
if "inprogress" in course['credits'].lower() or not course['credits']:
|
| 525 |
course['credits'] = "0"
|
| 526 |
elif not course['credits'].replace('.','').isdigit():
|
| 527 |
course['credits'] = "0"
|
| 528 |
|
| 529 |
parsed_data['course_history'].append(course)
|
| 530 |
+
except (IndexError, ValueError) as e:
|
| 531 |
+
logging.warning(f"Skipping malformed course line: {line}. Error: {str(e)}")
|
| 532 |
+
continue
|
| 533 |
|
| 534 |
return parsed_data
|
| 535 |
|
| 536 |
except Exception as e:
|
| 537 |
+
logging.warning(f"Miami-Dade transcript parsing failed: {str(e)}")
|
| 538 |
return None
|
| 539 |
|
| 540 |
def _parse_simplified_transcript(self, text: str) -> Dict:
|
|
|
|
| 579 |
logging.warning(f"Pattern {pattern} failed: {str(e)}")
|
| 580 |
continue
|
| 581 |
|
| 582 |
+
return None
|
| 583 |
|
| 584 |
# ========== ENHANCED ANALYSIS FUNCTIONS ==========
|
| 585 |
def analyze_gpa(parsed_data: Dict) -> str:
|
|
|
|
| 990 |
result += "You may benefit from combining different learning approaches:\n"
|
| 991 |
for style in primary_styles:
|
| 992 |
result += f"\n**{style}** techniques:\n"
|
| 993 |
+
for tip in style_info['tips'][:2]:
|
| 994 |
result += f"- {tip}\n"
|
| 995 |
|
| 996 |
result += f"\n**{style}** career suggestions:\n"
|
| 997 |
+
for career in style_info['careers'][:3]:
|
| 998 |
result += f"- {career}\n"
|
| 999 |
|
| 1000 |
return result
|